Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ jobs:
docker version
docker info

- name: Install Docker Buildx
# The windows-2025 runner ships Docker Engine but not the buildx CLI plugin, which
# test_gen_dockerfile_syntax needs for `docker buildx build --check`. Install the
# plugin binary so it uses the implicit 'default' builder (the host docker daemon);
# we deliberately do NOT create a docker-container builder (that driver is Linux-only).
run: |
$version = "v0.34.1"
$url = "https://github.com/docker/buildx/releases/download/$version/buildx-$version.windows-amd64.exe"
$dest = Join-Path $env:ProgramData "Docker\cli-plugins"
New-Item -ItemType Directory -Force -Path $dest | Out-Null
Invoke-WebRequest -Uri $url -OutFile (Join-Path $dest "docker-buildx.exe")
docker buildx version

- name: Install test dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions docs/Development.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,11 @@ python -m launch.scripts.upload_docker\
--clear_after_push 0 # 0 for false and 1 for true
```

### Re-assemble Dockerfile (Beta / Preview)
### Re-assemble Dockerfile

Reconstruct Dockerfile of a commited image from instance["docker_image_layers"].

Note this script is still under development. If you find any bugs of this script, welcome GitHub issues and pull requests.
The Dockerfile behavior strictly aligns with that of RepoLaunch-created images. It produces two layers (the setup layer and the organize layer) with error commands silenty bypassed instead of interuptting the build.

```bash
python -m launch.scripts.gen_dockerfile \
Expand Down
125 changes: 104 additions & 21 deletions launch/scripts/gen_dockerfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
import json
import os
from typing import Any, Literal, Optional, TypedDict
import logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)

class LayerInfo(TypedDict):
base_image: str
Expand All @@ -29,9 +34,22 @@ class LayerInfo(TypedDict):
LINUX_WORKDIR = "/testbed"
WINDOWS_WORKDIR = r"C:\testbed"

# Heredoc / here-string delimiters chosen to be unlikely to appear in any real command.
# Heredoc delimiter for the linux generator (BuildKit heredoc works on linux).
LINUX_HEREDOC_TAG = "RL_CMD_EOF"
WINDOWS_HEREDOC_TAG = "RL_CMD_EOF"

# Windows sentinels. The windows generator must work on the *legacy* Docker builder
# (Docker Desktop in Windows-container mode does not use BuildKit, and no
# `docker/dockerfile` frontend tag publishes a Windows manifest, so heredocs are
# unavailable for windows containers). The legacy builder also strips literal double
# quotes from shell-form RUN instructions. So every command is carried as a
# single-quoted PowerShell string with these substitutions, then reconstituted into a
# .ps1 at build time:
# " -> WINDOWS_DQ_SENTINEL (decoded to [char]34 ; avoids the builder eating quotes)
# \n -> WINDOWS_NL_SENTINEL (decoded to [char]10 ; avoids RUN line-splitting)
# ' -> '' (PowerShell single-quoted-string escaping)
# Chosen to be extremely unlikely to appear verbatim in any real command.
WINDOWS_DQ_SENTINEL = "~~RLDQ~~"
WINDOWS_NL_SENTINEL = "~~RLNL~~"


def _render_linux_layer(commands: list[str], comment: str) -> list[str]:
Expand Down Expand Up @@ -75,40 +93,102 @@ def gen_linux_dockerfile(layers: LayerInfo) -> str:
return "\n".join(lines)


def _render_windows_layer(commands: list[str], comment: str) -> list[str]:
def _windows_layer_script_path(comment: str) -> str:
"""Stable, unlikely-to-collide path for the per-layer script staged into the image."""
slug = comment.strip().lower().replace(" ", "_")
return rf"C:\rl_{slug}.ps1"


def _encode_windows_command(cmd: str) -> str:
"""
Render one windows layer as a single Dockerfile RUN that uses a BuildKit heredoc
feeding a literal here-string into PowerShell.
Encode one command for transport inside a single-quoted PowerShell string on one
physical (backtick-continued) Dockerfile line. See WINDOWS_*_SENTINEL above.

Each input command is wrapped in a try/catch so multi-line commands stay readable
and one failure does not abort the build (note 5). Single quotes inside the
`@'...'@` here-string are doubled per PowerShell rules.
Substitution order is significant: hide double quotes first (plain swap), then
escape single quotes for the surrounding single-quoted string, then hide newlines.
The two sentinels are disjoint from `'`-doubling so decode order at build time does
not matter.
"""
s = cmd.rstrip("\n")
s = s.replace('"', WINDOWS_DQ_SENTINEL)
s = s.replace("'", "''")
s = s.replace("\r\n", "\n").replace("\n", WINDOWS_NL_SENTINEL)
return s


def _render_windows_layer(commands: list[str], comment: str) -> list[str]:
"""
Render one windows layer as a single RUN instruction (one layer per setup/organize,
note 3/4) that assembles a .ps1 inside the image and executes it.

The RUN does, in order (each step its own backtick-continued physical line):
- Set-Content an empty .ps1, then one Add-Content per command appending its
encoded `try { <cmd> } catch { ... }` block (note 5: a failure -- PowerShell
error -- is swallowed; a nonzero native exit code does not throw, so execution
simply falls through to the next command);
- decode the two sentinels back to real `"` and newlines, rewriting the .ps1 as a
normal multi-line script (note 6: multi-line commands are preserved verbatim);
- execute the .ps1.

Why not a heredoc: Docker Desktop builds windows containers with the *legacy*
builder (no BuildKit), and no `docker/dockerfile` frontend image is published for
windows, so `RUN <<EOF` / `COPY <<EOF` are unavailable. Why the sentinels: the
legacy builder strips literal double quotes from shell-form RUN, and a literal
newline would split the RUN into separate (invalid) instructions. Encoding both
sidesteps the builder entirely and is plaintext (no base64).
"""
if not commands:
return []

out: list[str] = ["", f"# ---- {comment} ----", f"RUN <<'{WINDOWS_HEREDOC_TAG}'"]
out.append("$ErrorActionPreference = 'Continue'")
script_path: str = _windows_layer_script_path(comment)

# Each entry becomes one physical line in the RUN, joined by " ; `" continuations.
statements: list[str] = [
f"Set-Content -LiteralPath {script_path} -Value '' -Encoding UTF8",
f"Add-Content -LiteralPath {script_path} -Value '$ErrorActionPreference = ''Continue'''",
]
for cmd in commands:
body = cmd.rstrip("\n").replace("'", "''")
out.append("try {")
out.append(" $cmd = @'")
for line in body.splitlines() or [""]:
out.append(line)
out.append("'@")
out.append(" Invoke-Expression $cmd")
out.append("} catch { Write-Host $_.Exception.Message }")
out.append(WINDOWS_HEREDOC_TAG)
return out
# try/catch swallows PowerShell-thrown errors; `finally { $global:LASTEXITCODE = 0 }`
# neutralizes a *native* command's nonzero exit (e.g. `dotnet test` failing does
# not throw, it only sets $LASTEXITCODE). Together: every command exits clean and
# the next one always runs (note 5), no matter how it failed.
body = (
"try {"
+ WINDOWS_NL_SENTINEL
+ _encode_windows_command(cmd)
+ WINDOWS_NL_SENTINEL
+ "} catch { Write-Host $_.Exception.Message } finally { $global:LASTEXITCODE = 0 }"
)
statements.append(f"Add-Content -LiteralPath {script_path} -Value '{body}'")
statements.append(
f"(Get-Content -LiteralPath {script_path} -Raw)"
f" -replace '{WINDOWS_DQ_SENTINEL}',[char]34"
f" -replace '{WINDOWS_NL_SENTINEL}',[char]10"
f" | Set-Content -LiteralPath {script_path} -Encoding UTF8"
)
statements.append(f"& {script_path}")
# Remove the script in the SAME RUN so it never gets committed into the layer
# (deleting it in a later RUN would whiteout-mask it but keep it on disk, growing
# the image). SilentlyContinue so cleanup never fails the RUN.
statements.append(f"Remove-Item -Force -ErrorAction SilentlyContinue {script_path}")

# Backtick line-continuation: every physical line but the last ends with " ; `".
run_lines = [("RUN " if i == 0 else " ") + stmt for i, stmt in enumerate(statements)]
run_block = " ; `\n".join(run_lines)

return ["", f"# ---- {comment} ----", run_block]


def gen_windows_dockerfile(layers: LayerInfo) -> str:
base_image: str = layers["base_image"]
setup_cmds: list[str] = list(layers.get("setup_layer") or [])
organize_cmds: list[str] = list(layers.get("organize_layer") or [])

# `# escape=`` switches the line-continuation char to a backtick so each layer's RUN
# can span multiple physical lines (one Add-Content per line). No `# syntax`
# directive: it would force pulling the dockerfile frontend image, which is not
# published for windows and fails to resolve.
lines: list[str] = [
"# syntax=docker/dockerfile:1.4",
"# escape=`",
f"FROM {base_image}",
f"WORKDIR {WINDOWS_WORKDIR}",
Expand All @@ -121,6 +201,9 @@ def gen_windows_dockerfile(layers: LayerInfo) -> str:


def main(instances: list[dict[str, Any]], output_dir: Path, platform: Literal["linux", "windows"]) -> None:
logging.info(("The gen_dockerfile script produces a Dockerfile from the command sequence of RepoLaunch. ",
"The Dockerfile behavior strictly aligns with that of RepoLaunch-created images: "
"it produces two layers (the setup layer and the organize layer) with error commands silenty bypassed instead of interuptting the build.\n"))
for instance in instances:
filename: str = "Dockerfile_" + instance["instance_id"].strip().replace("/", "_") + "_" + platform
filepath: Path = (output_dir / filename)
Expand Down
81 changes: 0 additions & 81 deletions launch/scripts/recollect.py

This file was deleted.

Loading
Loading