diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index 0676be051..7a9716a11 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -7,21 +7,34 @@ name: Release VM Dev # # Prerequisites: the vm-dev release must already contain kernel runtime # tarballs. Run the "Release VM Kernel" workflow first if they are missing. +# +# OS-49: this workflow uses shared runners for VM build/package jobs. It does +# not validate VFIO GPU passthrough; that requires a dedicated VFIO-capable host. on: push: branches: [main] workflow_dispatch: + inputs: + release_tag: + description: GitHub Release tag to update + required: false + default: vm-dev + type: string permissions: contents: write packages: read -# Serialize with release-vm-kernel.yml — both update the vm-dev release. +# Serialize with release-vm-kernel.yml when both workflows update the same +# release tag. concurrency: - group: vm-dev-release + group: vm-release-${{ inputs.release_tag || 'vm-dev' }} cancel-in-progress: false +env: + VM_RELEASE_TAG: ${{ inputs.release_tag || 'vm-dev' }} + defaults: run: shell: bash @@ -32,7 +45,7 @@ jobs: # --------------------------------------------------------------------------- compute-versions: name: Compute Versions - runs-on: build-amd64 + runs-on: linux-amd64-cpu8 timeout-minutes: 5 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -56,14 +69,21 @@ jobs: id: v run: | set -euo pipefail - echo "cargo=$(uv run python tasks/scripts/release.py get-version --cargo)" >> "$GITHUB_OUTPUT" + # Scratch VM tags can look numeric to setuptools_scm, for example + # vm-dev-os49-1177 -> 1177. Only patch Cargo.toml with valid SemVer. + cargo_version="$(uv run python tasks/scripts/release.py get-version --cargo)" + if [[ ! "$cargo_version" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z][0-9A-Za-z.-]*)?(\+[0-9A-Za-z][0-9A-Za-z.-]*)?$ ]]; then + echo "::warning::Computed cargo version '${cargo_version}' is not valid SemVer; leaving Cargo.toml version unchanged." + cargo_version="" + fi + printf 'cargo=%s\n' "$cargo_version" >> "$GITHUB_OUTPUT" # --------------------------------------------------------------------------- - # Download kernel runtime tarballs from the vm-dev release + # Download kernel runtime tarballs from the configured VM release # --------------------------------------------------------------------------- download-kernel-runtime: name: Download Kernel Runtime - runs-on: build-amd64 + runs-on: linux-amd64-cpu8 timeout-minutes: 10 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -82,7 +102,7 @@ jobs: for platform in linux-aarch64 linux-x86_64 darwin-aarch64; do echo "Downloading vm-runtime-${platform}.tar.zst..." - gh release download vm-dev \ + gh release download "${VM_RELEASE_TAG}" \ --repo "${GITHUB_REPOSITORY}" \ --pattern "vm-runtime-${platform}.tar.zst" \ --dir runtime-artifacts \ @@ -100,9 +120,9 @@ jobs: if [ ! -f "$file" ]; then echo "ERROR: Missing ${file}" >&2 echo "" >&2 - echo "The vm-dev release does not have kernel runtime artifacts." >&2 + echo "The ${VM_RELEASE_TAG} release does not have kernel runtime artifacts." >&2 echo "Run the 'Release VM Kernel' workflow first:" >&2 - echo " gh workflow run release-vm-kernel.yml" >&2 + echo " gh workflow run release-vm-kernel.yml -f release_tag=${VM_RELEASE_TAG}" >&2 exit 1 fi echo "OK: ${file} ($(du -sh "$file" | cut -f1))" @@ -125,10 +145,10 @@ jobs: matrix: include: - arch: arm64 - runner: build-arm64 + runner: linux-arm64-cpu8 guest_arch: aarch64 - arch: amd64 - runner: build-amd64 + runner: linux-amd64-cpu8 guest_arch: x86_64 runs-on: ${{ matrix.runner }} timeout-minutes: 30 @@ -189,12 +209,12 @@ jobs: matrix: include: - arch: arm64 - runner: build-arm64 + runner: linux-arm64-cpu8 target: aarch64-unknown-linux-gnu platform: linux-aarch64 guest_arch: aarch64 - arch: amd64 - runner: build-amd64 + runner: linux-amd64-cpu8 target: x86_64-unknown-linux-gnu platform: linux-x86_64 guest_arch: x86_64 @@ -208,7 +228,6 @@ jobs: options: --privileged env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }} OPENSHELL_IMAGE_TAG: dev steps: - uses: actions/checkout@v6 @@ -315,7 +334,7 @@ jobs: build-vm-macos: name: Build VM (macOS) needs: [compute-versions, download-kernel-runtime, build-rootfs] - runs-on: build-amd64 + runs-on: linux-amd64-cpu8 timeout-minutes: 60 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -327,7 +346,6 @@ jobs: - /var/run/docker.sock:/var/run/docker.sock env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }} steps: - uses: actions/checkout@v6 with: @@ -344,6 +362,8 @@ jobs: - name: Set up Docker Buildx uses: ./.github/actions/setup-buildx + with: + driver: local - name: Install zstd run: apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/* @@ -426,12 +446,12 @@ jobs: matrix: include: - arch: arm64 - runner: build-arm64 + runner: linux-arm64-cpu8 target: aarch64-unknown-linux-gnu platform: linux-aarch64 guest_arch: aarch64 - arch: amd64 - runner: build-amd64 + runner: linux-amd64-cpu8 target: x86_64-unknown-linux-gnu platform: linux-x86_64 guest_arch: x86_64 @@ -445,7 +465,6 @@ jobs: options: --privileged env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }} OPENSHELL_IMAGE_TAG: dev steps: - uses: actions/checkout@v6 @@ -543,13 +562,69 @@ jobs: path: artifacts/*.tar.gz retention-days: 5 + # --------------------------------------------------------------------------- + # Build Linux ARM64 supervisor bundle for embedding into the macOS VM driver + # --------------------------------------------------------------------------- + build-supervisor-arm64: + name: Build Supervisor Bundle (arm64) + runs-on: linux-arm64-cpu8 + timeout-minutes: 30 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENSHELL_IMAGE_TAG: dev + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Mark workspace safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Fetch tags + run: git fetch --tags --force + + - name: Install tools + run: mise install --locked + + - name: Cache Rust target and registry + uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2 + with: + shared-key: driver-vm-supervisor-arm64 + cache-directories: .cache/sccache + cache-targets: "true" + + - name: Install zstd + run: apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/* + + - name: Build bundled supervisor + run: | + set -euo pipefail + tasks/scripts/vm/build-supervisor-bundle.sh --arch aarch64 + + - name: sccache stats + if: always() + run: mise x -- sccache --show-stats + + - name: Upload supervisor bundle + uses: actions/upload-artifact@v4 + with: + name: driver-vm-supervisor-arm64 + path: target/vm-runtime-compressed/openshell-sandbox.zst + retention-days: 1 + if-no-files-found: error + # --------------------------------------------------------------------------- # Build openshell-driver-vm binary (macOS ARM64 via osxcross) # --------------------------------------------------------------------------- build-driver-vm-macos: name: Build Driver VM (macOS) - needs: [compute-versions, download-kernel-runtime] - runs-on: build-amd64 + needs: [compute-versions, download-kernel-runtime, build-supervisor-arm64] + runs-on: linux-amd64-cpu8 timeout-minutes: 60 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -561,7 +636,6 @@ jobs: - /var/run/docker.sock:/var/run/docker.sock env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }} steps: - uses: actions/checkout@v6 with: @@ -578,6 +652,8 @@ jobs: - name: Set up Docker Buildx uses: ./.github/actions/setup-buildx + with: + driver: local - name: Install zstd run: apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/* @@ -613,20 +689,17 @@ jobs: echo "Staged macOS compressed artifacts:" ls -lah "$COMPRESSED_DIR" - - name: Build bundled supervisor + - name: Download bundled supervisor + uses: actions/download-artifact@v4 + with: + name: driver-vm-supervisor-arm64 + path: target/vm-runtime-compressed-macos/ + + - name: Verify bundled supervisor run: | set -euo pipefail - docker buildx build \ - --file deploy/docker/Dockerfile.images \ - --platform linux/arm64 \ - --build-arg OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" \ - --build-arg OPENSHELL_IMAGE_TAG=dev \ - --target supervisor-output \ - --output type=local,dest=supervisor-out/ \ - . - - zstd -19 -T0 -f supervisor-out/openshell-sandbox \ - -o "${PWD}/target/vm-runtime-compressed-macos/openshell-sandbox.zst" + test -f target/vm-runtime-compressed-macos/openshell-sandbox.zst + ls -lh target/vm-runtime-compressed-macos/openshell-sandbox.zst - name: Build macOS binary via Docker (osxcross) run: | @@ -657,7 +730,7 @@ jobs: retention-days: 5 # --------------------------------------------------------------------------- - # Upload all VM binaries to the vm-dev rolling release + # Upload all VM binaries to the configured VM release # --------------------------------------------------------------------------- release-vm-dev: name: Release VM Dev @@ -666,7 +739,7 @@ jobs: - build-vm-macos - build-driver-vm-linux - build-driver-vm-macos - runs-on: build-amd64 + runs-on: linux-amd64-cpu8 timeout-minutes: 10 steps: - uses: actions/checkout@v6 @@ -708,24 +781,25 @@ jobs: > vm-binary-checksums-sha256.txt cat vm-binary-checksums-sha256.txt - - name: Ensure vm-dev tag exists + - name: Ensure VM release tag exists run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git tag -fa vm-dev -m "VM Development Build" "${GITHUB_SHA}" - git push --force origin vm-dev + git tag -fa "${VM_RELEASE_TAG}" -m "VM Development Build" "${GITHUB_SHA}" + git push --force origin "refs/tags/${VM_RELEASE_TAG}" - - name: Prune stale VM binary assets from vm-dev release + - name: Prune stale VM binary assets from VM release uses: actions/github-script@v7 with: script: | const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/'); + const tag = process.env.VM_RELEASE_TAG; let release; try { - release = await github.rest.repos.getReleaseByTag({ owner, repo, tag: 'vm-dev' }); + release = await github.rest.repos.getReleaseByTag({ owner, repo, tag }); } catch (err) { if (err.status === 404) { - core.info('No existing vm-dev release; will create fresh.'); + core.info(`No existing ${tag} release; will create fresh.`); return; } throw err; @@ -742,12 +816,12 @@ jobs: } } - - name: Upload to vm-dev GitHub Release + - name: Upload to VM GitHub Release uses: softprops/action-gh-release@v2 with: name: OpenShell VM Development Build prerelease: true - tag_name: vm-dev + tag_name: ${{ env.VM_RELEASE_TAG }} target_commitish: ${{ github.sha }} body: | Rolling development build of **openshell-vm** — the MicroVM runtime for OpenShell. diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index 4f63a5a4b..bb3f0b465 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -13,19 +13,32 @@ name: Release VM Kernel # This workflow runs on-demand (or when kernel config / pins change). It is # intentionally decoupled from the per-commit VM binary build because the # kernel rarely changes and takes 15-45 minutes to compile. +# +# OS-49: this workflow uses shared runners for VM runtime build/package jobs. +# It does not validate VFIO GPU passthrough. on: workflow_dispatch: + inputs: + release_tag: + description: GitHub Release tag to update + required: false + default: vm-dev + type: string permissions: contents: write packages: read -# Serialize with release-vm-dev.yml — both update the vm-dev release. +# Serialize with release-vm-dev.yml when both workflows update the same +# release tag. concurrency: - group: vm-dev-release + group: vm-release-${{ inputs.release_tag || 'vm-dev' }} cancel-in-progress: false +env: + VM_RELEASE_TAG: ${{ inputs.release_tag || 'vm-dev' }} + defaults: run: shell: bash @@ -36,7 +49,7 @@ jobs: # --------------------------------------------------------------------------- build-runtime-linux-arm64: name: Build Runtime (Linux ARM64) - runs-on: build-arm64 + runs-on: linux-arm64-cpu8 timeout-minutes: 60 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -79,6 +92,7 @@ jobs: path: | target/libkrun-build/kernel.c target/libkrun-build/ABI_VERSION + if-no-files-found: error retention-days: 1 # --------------------------------------------------------------------------- @@ -86,7 +100,7 @@ jobs: # --------------------------------------------------------------------------- build-runtime-linux-amd64: name: Build Runtime (Linux AMD64) - runs-on: build-amd64 + runs-on: linux-amd64-cpu8 timeout-minutes: 60 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -163,12 +177,12 @@ jobs: retention-days: 5 # --------------------------------------------------------------------------- - # Upload all runtime tarballs to the vm-dev rolling release + # Upload all runtime tarballs to the configured VM release # --------------------------------------------------------------------------- release-kernel: name: Release Kernel Runtime needs: [build-runtime-linux-arm64, build-runtime-linux-amd64, build-runtime-macos-arm64] - runs-on: build-amd64 + runs-on: linux-amd64-cpu8 timeout-minutes: 10 steps: - uses: actions/checkout@v6 @@ -187,24 +201,25 @@ jobs: sha256sum vm-runtime-*.tar.zst > vm-runtime-checksums-sha256.txt cat vm-runtime-checksums-sha256.txt - - name: Ensure vm-dev tag exists + - name: Ensure VM release tag exists run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git tag -fa vm-dev -m "VM Development Build" "${GITHUB_SHA}" - git push --force origin vm-dev + git tag -fa "${VM_RELEASE_TAG}" -m "VM Development Build" "${GITHUB_SHA}" + git push --force origin "refs/tags/${VM_RELEASE_TAG}" - - name: Prune stale runtime assets from vm-dev release + - name: Prune stale runtime assets from VM release uses: actions/github-script@v7 with: script: | const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/'); + const tag = process.env.VM_RELEASE_TAG; let release; try { - release = await github.rest.repos.getReleaseByTag({ owner, repo, tag: 'vm-dev' }); + release = await github.rest.repos.getReleaseByTag({ owner, repo, tag }); } catch (err) { if (err.status === 404) { - core.info('No existing vm-dev release; will create fresh.'); + core.info(`No existing ${tag} release; will create fresh.`); return; } throw err; @@ -217,12 +232,12 @@ jobs: } } - - name: Create / update vm-dev GitHub Release + - name: Create / update VM GitHub Release uses: softprops/action-gh-release@v2 with: name: OpenShell VM Development Build prerelease: true - tag_name: vm-dev + tag_name: ${{ env.VM_RELEASE_TAG }} target_commitish: ${{ github.sha }} body: | Rolling development build of **openshell-vm** — the MicroVM runtime for OpenShell. diff --git a/architecture/ci-e2e.md b/architecture/ci-e2e.md index 0041f981b..5b57a9cb5 100644 --- a/architecture/ci-e2e.md +++ b/architecture/ci-e2e.md @@ -34,7 +34,7 @@ OS-49 Phase 5 added non-required shadow workflows for the non-release workflows The `mise-lockfile` job regenerates `mise.lock` with the CI image's pinned mise version and requires the checked-in file to match exactly. This intentionally includes generated metadata so contributors catch toolchain-version drift instead of letting different mise versions churn the lockfile. -OS-49 Phase 7 moves the release-facing CPU jobs in `release-canary.yml`, `release-dev.yml`, and `release-tag.yml` to the same shared CPU labels. The release workflows also call `driver-vm-linux.yml` and `deb-package.yml`, so those reusable workers use the same labels to avoid retaining a hidden ARC dependency in the release path. `release-vm-dev.yml` and `release-vm-kernel.yml` remain on the old labels until the VM runtime decision is recorded for OS-131. +OS-49 Phase 7 moves the release-facing CPU jobs in `release-canary.yml`, `release-dev.yml`, and `release-tag.yml` to the same shared CPU labels. The release workflows also call `driver-vm-linux.yml` and `deb-package.yml`, so those reusable workers use the same labels to avoid retaining a hidden ARC dependency in the release path. `release-vm-dev.yml` and `release-vm-kernel.yml` use shared runners for VM build/package work; actual VFIO GPU passthrough remains a separate dedicated-host validation path. ## Trigger taxonomy diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 4f6bffa34..f9ba11be2 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -299,8 +299,8 @@ versions change. | Platform | Runner | Build Method | |----------|--------|-------------| -| Linux ARM64 | `build-arm64` (self-hosted) | Native `build-libkrun.sh` | -| Linux x86_64 | `build-amd64` (self-hosted) | Native `build-libkrun.sh` | +| Linux ARM64 | `linux-arm64-cpu8` (shared runner) | Native `build-libkrun.sh` | +| Linux x86_64 | `linux-amd64-cpu8` (shared runner) | Native `build-libkrun.sh` | | macOS ARM64 | `macos-latest-xlarge` (GitHub-hosted) | `build-libkrun-macos.sh` | Artifacts: `vm-runtime-{platform}.tar.zst` containing libkrun, libkrunfw, @@ -321,6 +321,9 @@ run `cargo build --release -p openshell-driver-vm`. The macOS driver is cross-compiled via osxcross (no macOS runner needed for the binary build — only for the kernel build). +These release workflows build and package VM artifacts. They do not validate +VFIO GPU passthrough; passthrough requires a dedicated VFIO-capable host. + macOS driver binaries produced via osxcross are not codesigned. Development builds are signed automatically by `tasks/scripts/gateway-vm.sh` (registered as `mise run gateway:vm`); a packaged release needs signing in diff --git a/tasks/scripts/vm/build-libkrun.sh b/tasks/scripts/vm/build-libkrun.sh index ec636f2a3..d0e33e6ce 100755 --- a/tasks/scripts/vm/build-libkrun.sh +++ b/tasks/scripts/vm/build-libkrun.sh @@ -66,7 +66,7 @@ install_deps() { if command -v apt-get &>/dev/null; then # Debian/Ubuntu - DEPS="build-essential git python3 python3-pip python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev cpio zstd jq" + DEPS="build-essential git python3 python3-pip python3-pyelftools flex bison libelf-dev libssl-dev libcap-ng-dev bc curl libclang-dev cpio zstd jq" MISSING="" for dep in $DEPS; do if ! dpkg -s "$dep" &>/dev/null; then @@ -83,14 +83,14 @@ install_deps() { elif command -v dnf &>/dev/null; then # Fedora/RHEL - DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel cpio zstd jq" + DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel libcap-ng-devel bc glibc-static curl clang-devel cpio zstd jq" echo " Installing dependencies via dnf..." $SUDO dnf install -y $DEPS else echo "Warning: Unknown package manager. Please install manually:" >&2 echo " build-essential git python3 python3-pyelftools flex bison" >&2 - echo " libelf-dev libssl-dev bc curl cpio" >&2 + echo " libelf-dev libssl-dev libcap-ng-dev bc curl cpio" >&2 fi } @@ -239,6 +239,32 @@ make -j"$(nproc)" cp libkrunfw.so* "$OUTPUT_DIR/" echo " Built: $(ls "$OUTPUT_DIR"/libkrunfw.so* | xargs -n1 basename | tr '\n' ' ')" +# Export the generated kernel bundle for the macOS job. The generated file is +# produced by libkrunfw and may move if upstream adjusts its build layout, so +# prefer the expected path and fail loudly if it is absent. +KERNEL_C_SOURCE="kernel.c" +if [ ! -f "$KERNEL_C_SOURCE" ]; then + KERNEL_C_SOURCE="$(find . -maxdepth 3 -name kernel.c -print -quit)" +fi +if [ -z "$KERNEL_C_SOURCE" ] || [ ! -f "$KERNEL_C_SOURCE" ]; then + echo "ERROR: libkrunfw build did not produce kernel.c" >&2 + exit 1 +fi +cp "$KERNEL_C_SOURCE" "$OUTPUT_DIR/kernel.c" + +if [ -f ABI_VERSION ]; then + cp ABI_VERSION "$OUTPUT_DIR/ABI_VERSION" +else + LIBKRUNFW_VERSIONED="$(ls "$OUTPUT_DIR"/libkrunfw.so.* 2>/dev/null | sort -V | tail -n1 || true)" + ABI_VERSION="$(basename "$LIBKRUNFW_VERSIONED" | sed -n 's/^libkrunfw\.so\.\([0-9][0-9]*\).*/\1/p')" + if [ -z "$ABI_VERSION" ]; then + echo "ERROR: libkrunfw build did not produce ABI_VERSION and ABI could not be inferred" >&2 + exit 1 + fi + printf '%s\n' "$ABI_VERSION" > "$OUTPUT_DIR/ABI_VERSION" +fi +echo " Exported: kernel.c ABI_VERSION=$(cat "$OUTPUT_DIR/ABI_VERSION")" + cd "$BUILD_DIR" # ── Build libkrun (VMM) ─────────────────────────────────────────────────