diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml index cb2f5440e..3d8dd5928 100644 --- a/.github/workflows/branch-e2e.yml +++ b/.github/workflows/branch-e2e.yml @@ -35,8 +35,19 @@ jobs: component: gateway platform: linux/arm64 + build-supervisor: + needs: [pr_metadata] + if: needs.pr_metadata.outputs.should_run == 'true' + permissions: + contents: read + packages: write + uses: ./.github/workflows/docker-build.yml + with: + component: supervisor + platform: linux/arm64 + e2e: - needs: [pr_metadata, build-gateway] + needs: [pr_metadata, build-gateway, build-supervisor] if: needs.pr_metadata.outputs.should_run == 'true' permissions: contents: read diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 536cfb2a5..e8e03799e 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -28,8 +28,13 @@ jobs: include: - suite: python cmd: "mise run --no-deps --skip-deps e2e:python" - - suite: rust + apt_packages: "" + - suite: rust-docker cmd: "mise run --no-deps --skip-deps e2e:rust" + apt_packages: "openssh-client" + - suite: rust-podman + cmd: "mise run --no-deps --skip-deps e2e:podman" + apt_packages: "openssh-client podman" container: image: ghcr.io/nvidia/openshell/ci:latest credentials: @@ -50,16 +55,24 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Log in to GHCR + - name: Install OS test dependencies + if: matrix.apt_packages != '' + env: + APT_PACKAGES: ${{ matrix.apt_packages }} + run: apt-get update && apt-get install -y ${APT_PACKAGES} && rm -rf /var/lib/apt/lists/* + + - name: Log in to GHCR with Docker run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + - name: Log in to GHCR with Podman + if: matrix.suite == 'rust-podman' + run: echo "${{ secrets.GITHUB_TOKEN }}" | podman login ghcr.io -u "${{ github.actor }}" --password-stdin + - name: Install Python dependencies and generate protobuf stubs if: matrix.suite == 'python' run: uv sync --frozen && mise run --no-deps python:proto - - name: Install SSH client - if: matrix.suite != 'python' - run: apt-get update && apt-get install -y --no-install-recommends openssh-client && rm -rf /var/lib/apt/lists/* - - name: Run tests + env: + OPENSHELL_SUPERVISOR_IMAGE: ${{ format('ghcr.io/nvidia/openshell/supervisor:{0}', inputs.image-tag) }} run: ${{ matrix.cmd }} diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 0165b132c..5086ac7cd 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -71,7 +71,7 @@ jobs: cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} e2e: - needs: [build-gateway, build-cluster] + needs: [build-gateway, build-supervisor, build-cluster] uses: ./.github/workflows/e2e-test.yml with: image-tag: ${{ github.sha }} diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml index 639188571..1794beb25 100644 --- a/.github/workflows/release-tag.yml +++ b/.github/workflows/release-tag.yml @@ -86,7 +86,7 @@ jobs: cargo-version: ${{ needs.compute-versions.outputs.cargo_version }} e2e: - needs: [build-gateway, build-cluster] + needs: [build-gateway, build-supervisor, build-cluster] uses: ./.github/workflows/e2e-test.yml with: image-tag: ${{ github.sha }} diff --git a/TESTING.md b/TESTING.md index c356b4a62..49c9b781a 100644 --- a/TESTING.md +++ b/TESTING.md @@ -145,29 +145,43 @@ Rust-based e2e tests that exercise the `openshell` CLI binary as a subprocess. They live in the `openshell-e2e` crate and use a shared harness for sandbox lifecycle management, output parsing, and cleanup. -Tests: +Suites: -- `tests/custom_image.rs` — custom Docker image build and sandbox run -- `tests/sync.rs` — bidirectional file sync round-trip (including large files) -- `tests/port_forward.rs` — TCP port forwarding through a sandbox +- Common suite (`--features e2e`) - driver-neutral CLI behavior, sandbox lifecycle, sync, port forwarding, policy, and provider tests. +- Docker suite (`--features e2e-docker`) - common suite plus Docker-only coverage such as Dockerfile image builds, Docker preflight checks, and managed Docker gateway resume. +- Docker GPU suite (`--features e2e-docker-gpu`) - Docker suite plus GPU sandbox smoke coverage. -Run all CLI e2e tests: +Run the Docker-backed Rust CLI e2e suite: -```bash +```shell mise run e2e:rust ``` +Run the Podman-backed Rust CLI e2e suite: + +```shell +mise run e2e:podman +``` + Run a single test directly with cargo: -```bash +```shell cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test sync ``` +Run a single Docker-only test directly with cargo: + +```shell +cargo test --manifest-path e2e/rust/Cargo.toml --features e2e-docker --test custom_image +``` + The harness (`e2e/rust/src/harness/`) provides: | Module | Purpose | |---|---| | `binary` | Builds and resolves the `openshell` binary from the workspace | +| `container` | Container-engine selection and support containers for proxy tests | +| `gateway` | Managed gateway restart controls for gateway-owned e2e runs | | `sandbox` | `SandboxGuard` RAII type — creates sandboxes and deletes them on drop | | `output` | ANSI stripping and field extraction from CLI output | | `port` | `wait_for_port()` and `find_free_port()` for TCP testing | @@ -178,3 +192,4 @@ The harness (`e2e/rust/src/harness/`) provides: |---|---| | `OPENSHELL_GATEWAY` | Override active gateway name for E2E tests | | `OPENSHELL_GATEWAY_ENDPOINT` | Run E2E tests against an existing plaintext HTTP gateway endpoint | +| `OPENSHELL_E2E_DRIVER` | Driver name exported by the e2e gateway wrapper (`docker`, `podman`, or `vm`) | diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index ff0b68a46..3c5df292f 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -33,6 +33,8 @@ fn is_selinux_enabled() -> bool { pub const LABEL_SANDBOX_ID: &str = "openshell.sandbox-id"; /// Label key for the sandbox name. pub const LABEL_SANDBOX_NAME: &str = "openshell.sandbox-name"; +/// Label key for the sandbox namespace. +pub const LABEL_SANDBOX_NAMESPACE: &str = "openshell.sandbox-namespace"; /// Label applied to all managed containers. pub const LABEL_MANAGED: &str = "openshell.managed"; /// Label filter string for list/event queries. @@ -308,6 +310,7 @@ fn build_labels(sandbox: &DriverSandbox) -> BTreeMap { // Managed labels (highest priority -- always overwrite). labels.insert(LABEL_SANDBOX_ID.into(), sandbox.id.clone()); labels.insert(LABEL_SANDBOX_NAME.into(), sandbox.name.clone()); + labels.insert(LABEL_SANDBOX_NAMESPACE.into(), sandbox.namespace.clone()); labels.insert(LABEL_MANAGED.into(), "true".into()); labels @@ -499,12 +502,13 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi secret_name(&sandbox.id), )]), stop_timeout: config.stop_timeout_secs, - // Inject host.containers.internal into /etc/hosts so sandbox - // containers can reach the gateway server on the host. The - // "host-gateway" magic value tells Podman to resolve to the - // host's actual IP (pasta uses 169.254.1.2 in rootless mode). - // This is the Podman equivalent of Docker's host.docker.internal. - hostadd: vec!["host.containers.internal:host-gateway".into()], + // Inject stable host aliases into /etc/hosts so sandbox containers can + // reach services on the host. `host.openshell.internal` is the driver- + // neutral alias used by policies and e2e tests. + hostadd: vec![ + "host.containers.internal:host-gateway".into(), + "host.openshell.internal:host-gateway".into(), + ], netns: NetNS { nsmode: "bridge".to_string(), }, @@ -856,12 +860,17 @@ mod tests { use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate}; let mut sandbox = test_sandbox("real-id", "real-name"); + sandbox.namespace = "real-namespace".to_string(); let mut label_overrides = std::collections::HashMap::new(); label_overrides.insert("openshell.sandbox-id".to_string(), "spoofed-id".to_string()); label_overrides.insert( "openshell.sandbox-name".to_string(), "spoofed-name".to_string(), ); + label_overrides.insert( + "openshell.sandbox-namespace".to_string(), + "spoofed-namespace".to_string(), + ); sandbox.spec = Some(DriverSandboxSpec { template: Some(DriverSandboxTemplate { labels: label_overrides, @@ -888,6 +897,40 @@ mod tests { Some("real-name"), "openshell.sandbox-name must not be overridden by template labels" ); + assert_eq!( + labels + .get("openshell.sandbox-namespace") + .and_then(|v| v.as_str()), + Some("real-namespace"), + "openshell.sandbox-namespace must not be overridden by template labels" + ); + } + + #[test] + fn container_spec_injects_host_aliases() { + let sandbox = test_sandbox("test-id", "test-name"); + let config = test_config(); + let spec = build_container_spec(&sandbox, &config); + + let hostadd: Vec<&str> = spec["hostadd"] + .as_array() + .expect("hostadd should be an array") + .iter() + .filter_map(|v| v.as_str()) + .collect(); + + assert!( + hostadd.contains(&"host.containers.internal:host-gateway"), + "missing Podman host alias" + ); + assert!( + hostadd.contains(&"host.openshell.internal:host-gateway"), + "missing OpenShell stable host alias" + ); + assert!( + !hostadd.contains(&"host.docker.internal:host-gateway"), + "Podman should not inject Docker's host alias" + ); } #[test] diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml index fb45c27e1..57bc1ff68 100644 --- a/e2e/rust/Cargo.toml +++ b/e2e/rust/Cargo.toml @@ -17,6 +17,28 @@ publish = false [features] e2e = [] +e2e-docker = ["e2e"] +e2e-docker-gpu = ["e2e-docker"] + +[[test]] +name = "custom_image" +path = "tests/custom_image.rs" +required-features = ["e2e-docker"] + +[[test]] +name = "docker_gpu" +path = "tests/docker_gpu.rs" +required-features = ["e2e-docker-gpu"] + +[[test]] +name = "docker_preflight" +path = "tests/docker_preflight.rs" +required-features = ["e2e-docker"] + +[[test]] +name = "gateway_resume" +path = "tests/gateway_resume.rs" +required-features = ["e2e-docker"] [dependencies] tokio = { version = "1.43", features = ["full"] } diff --git a/e2e/rust/e2e-docker.sh b/e2e/rust/e2e-docker.sh index ebdf631bb..a020f87c8 100755 --- a/e2e/rust/e2e-docker.sh +++ b/e2e/rust/e2e-docker.sh @@ -10,11 +10,12 @@ set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" E2E_TEST="${OPENSHELL_E2E_DOCKER_TEST:-smoke}" +E2E_FEATURES="${OPENSHELL_E2E_DOCKER_FEATURES:-e2e,e2e-docker}" cargo build -p openshell-cli --features openshell-core/dev-settings exec "${ROOT}/e2e/with-docker-gateway.sh" \ cargo test --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \ - --features e2e \ + --features "${E2E_FEATURES}" \ --test "${E2E_TEST}" \ -- --nocapture diff --git a/e2e/rust/e2e-podman.sh b/e2e/rust/e2e-podman.sh index aa7b0ad48..44c2eaeb4 100755 --- a/e2e/rust/e2e-podman.sh +++ b/e2e/rust/e2e-podman.sh @@ -2,171 +2,17 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# Run the Rust e2e smoke test against a Podman-backed gateway. -# -# Usage: -# mise run e2e:podman # start a gateway with Podman driver -# mise run e2e:podman -- --port=9090 # use a specific port -# -# Options: -# --port=PORT Gateway listen port (default: random free port). -# -# The script: -# 1. Verifies Podman is available and the socket is reachable -# 2. Starts openshell-gateway with --drivers podman --disable-tls -# 3. Waits for the gateway to become healthy -# 4. Runs the Rust smoke test -# 5. Cleans up the gateway process and any leftover sandbox containers -# -# Prerequisites: -# - Rootless Podman service running (systemctl --user start podman.socket) -# - Supervisor image built (mise run build:docker:supervisor) -# - Sandbox base image available locally +# Run the Rust e2e suite against a standalone gateway running the bundled Podman +# compute driver. Set OPENSHELL_GATEWAY_ENDPOINT=http://host:port to reuse an +# existing plaintext gateway instead of starting an ephemeral one. set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -GATEWAY_BIN="${ROOT}/target/debug/openshell-gateway" -TIMEOUT=120 -# ── Parse arguments ────────────────────────────────────────────────── -PORT="" -for arg in "$@"; do - case "$arg" in - --port=*) PORT="${arg#--port=}" ;; - *) echo "Unknown argument: $arg"; exit 1 ;; - esac -done - -if [ -z "${PORT}" ]; then - PORT=$(python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()') -fi - -# Allocate a separate port for the unauthenticated health endpoint. -HEALTH_PORT=$(python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()') - -# ── Pre-flight checks ─────────────────────────────────────────────── - -if ! command -v podman &>/dev/null; then - echo "ERROR: podman is not installed or not in PATH" - exit 1 -fi - -if ! podman info &>/dev/null; then - echo "ERROR: podman service is not reachable. Start it with:" - echo " systemctl --user start podman.socket" - exit 1 -fi - -if [ ! -f "${GATEWAY_BIN}" ]; then - echo "Building openshell-gateway..." - cargo build -p openshell-server --features openshell-core/dev-settings -fi - -# ── Resolve images ─────────────────────────────────────────────────── -# Use the same image defaults as the driver, allowing env overrides. -SUPERVISOR_IMAGE="${OPENSHELL_SUPERVISOR_IMAGE:-openshell/supervisor:dev}" -SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-}" - -# Verify the supervisor image exists locally. -if ! podman image exists "${SUPERVISOR_IMAGE}" 2>/dev/null; then - echo "ERROR: supervisor image '${SUPERVISOR_IMAGE}' not found locally." - echo "Build it with: mise run build:docker:supervisor" - exit 1 -fi - -# ── Generate a unique handshake secret ─────────────────────────────── -HANDSHAKE_SECRET="e2e-podman-$(python3 -c 'import secrets; print(secrets.token_hex(16))')" - -# ── Start the gateway ──────────────────────────────────────────────── -GW_LOG=$(mktemp /tmp/openshell-gw-podman-e2e.XXXXXX) -GW_PID="" - -cleanup() { - local exit_code=$? - - # Kill the gateway process. - if [ -n "${GW_PID:-}" ] && kill -0 "${GW_PID}" 2>/dev/null; then - echo "Stopping gateway (pid ${GW_PID})..." - kill "${GW_PID}" 2>/dev/null || true - wait "${GW_PID}" 2>/dev/null || true - fi - - # Clean up any leftover sandbox containers, volumes, and secrets. - echo "Cleaning up Podman resources..." - for cid in $(podman ps -a --filter label=openshell.managed=true --format '{{.ID}}' 2>/dev/null); do - podman rm -f "${cid}" 2>/dev/null || true - done - for vid in $(podman volume ls --filter label=openshell.managed=true --format '{{.Name}}' 2>/dev/null); do - podman volume rm -f "${vid}" 2>/dev/null || true - done - # Secrets created by the driver use the openshell-handshake- prefix. - for sid in $(podman secret ls --format '{{.Name}}' 2>/dev/null | grep '^openshell-handshake-'); do - podman secret rm "${sid}" 2>/dev/null || true - done - - if [ "${exit_code}" -ne 0 ] && [ -f "${GW_LOG}" ]; then - echo "=== Gateway log (preserved for debugging) ===" - cat "${GW_LOG}" - echo "=== end gateway log ===" - fi - - rm -f "${GW_LOG}" 2>/dev/null || true -} -trap cleanup EXIT - -echo "Starting openshell-gateway on port ${PORT} with Podman driver..." - -OPENSHELL_SSH_HANDSHAKE_SECRET="${HANDSHAKE_SECRET}" \ -OPENSHELL_SUPERVISOR_IMAGE="${SUPERVISOR_IMAGE}" \ - "${GATEWAY_BIN}" \ - --port "${PORT}" \ - --health-port "${HEALTH_PORT}" \ - --ssh-gateway-port "${PORT}" \ - --drivers podman \ - --disable-tls \ - --db-url "sqlite::memory:" \ - ${SANDBOX_IMAGE:+--sandbox-image "${SANDBOX_IMAGE}"} \ - --log-level info \ - >"${GW_LOG}" 2>&1 & -GW_PID=$! - -# ── Wait for health ───────────────────────────────────────────────── -echo "Waiting for gateway to become healthy (timeout ${TIMEOUT}s)..." -elapsed=0 -healthy=false -while [ "${elapsed}" -lt "${TIMEOUT}" ]; do - if ! kill -0 "${GW_PID}" 2>/dev/null; then - echo "ERROR: gateway exited before becoming ready" - cat "${GW_LOG}" - exit 1 - fi - - # Poll the dedicated health port (--health-port). - if curl -sf "http://127.0.0.1:${HEALTH_PORT}/healthz" >/dev/null 2>&1; then - healthy=true - break - fi - - sleep 2 - elapsed=$((elapsed + 2)) -done - -if [ "${healthy}" != "true" ]; then - echo "ERROR: gateway did not become healthy after ${TIMEOUT}s" - cat "${GW_LOG}" - exit 1 -fi -echo "Gateway is ready (${elapsed}s)." - -# ── Run the smoke test ─────────────────────────────────────────────── -export OPENSHELL_GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" -# Use a synthetic gateway name so the CLI does not require stored mTLS creds. -export OPENSHELL_GATEWAY="e2e-podman" -export OPENSHELL_PROVISION_TIMEOUT=300 - -echo "Running e2e smoke test (gateway: ${OPENSHELL_GATEWAY}, endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})..." cargo build -p openshell-cli --features openshell-core/dev-settings -cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test smoke -- --nocapture -echo "Smoke test passed." +exec "${ROOT}/e2e/with-podman-gateway.sh" \ + cargo test --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \ + --features e2e \ + -- --nocapture diff --git a/e2e/rust/src/harness/container.rs b/e2e/rust/src/harness/container.rs new file mode 100644 index 000000000..8d9f03fb3 --- /dev/null +++ b/e2e/rust/src/harness/container.rs @@ -0,0 +1,190 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Container-engine helpers for Rust e2e tests. +//! +//! Most e2e tests should exercise the `OpenShell` gateway contract rather than a +//! specific local container runtime. This module keeps small support containers +//! and container-engine selection aligned between Docker- and Podman-backed +//! gateway runs. + +use std::process::Command; +use std::time::Duration; + +use tokio::time::{interval, timeout}; + +use super::port::find_free_port; + +const DEFAULT_TEST_SERVER_IMAGE: &str = + "ghcr.io/nvidia/openshell-community/sandboxes/base:latest"; + +#[must_use] +pub fn e2e_driver() -> Option { + std::env::var("OPENSHELL_E2E_DRIVER") + .ok() + .map(|value| value.trim().to_ascii_lowercase()) + .filter(|value| !value.is_empty()) +} + +#[must_use] +pub fn is_e2e_driver(driver: &str) -> bool { + e2e_driver().as_deref() == Some(driver) +} + +#[derive(Clone, Debug)] +pub struct ContainerEngine { + binary: String, +} + +impl ContainerEngine { + #[must_use] + pub fn from_env() -> Self { + let binary = std::env::var("OPENSHELL_E2E_CONTAINER_ENGINE") + .ok() + .filter(|value| !value.trim().is_empty()) + .or_else(|| match e2e_driver().as_deref() { + Some("podman") => Some("podman".to_string()), + _ => Some("docker".to_string()), + }) + .expect("container engine fallback should be set"); + + Self { binary } + } + + #[must_use] + pub fn command(&self) -> Command { + let mut command = Command::new(&self.binary); + if let Ok(value) = std::env::var("OPENSHELL_E2E_CONTAINER_ENGINE_XDG_CONFIG_HOME") { + command.env("XDG_CONFIG_HOME", value); + } else if std::env::var_os("OPENSHELL_E2E_CONTAINER_ENGINE_UNSET_XDG_CONFIG_HOME").is_some() + { + command.env_remove("XDG_CONFIG_HOME"); + } + command + } + + #[must_use] + pub fn name(&self) -> &str { + &self.binary + } +} + +#[must_use] +pub fn e2e_network_name() -> Option { + std::env::var("OPENSHELL_E2E_NETWORK_NAME") + .ok() + .or_else(|| std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME").ok()) + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) +} + +pub struct ContainerHttpServer { + pub host: String, + pub port: u16, + container_id: String, + engine: ContainerEngine, +} + +impl ContainerHttpServer { + pub async fn start_python(alias: &str, script: &str) -> Result { + let engine = ContainerEngine::from_env(); + let host_port = find_free_port(); + let network = e2e_network_name(); + let host = network.as_ref().map_or_else( + || "host.openshell.internal".to_string(), + |_| alias.to_string(), + ); + let port = if network.is_some() { 8000 } else { host_port }; + + let mut args = vec![ + "run".to_string(), + "--detach".to_string(), + "--rm".to_string(), + "--entrypoint".to_string(), + "python3".to_string(), + ]; + if let Some(network) = network.as_deref() { + args.extend([ + "--network".to_string(), + network.to_string(), + "--network-alias".to_string(), + alias.to_string(), + ]); + } else { + args.extend(["-p".to_string(), format!("{host_port}:8000")]); + } + args.extend([ + DEFAULT_TEST_SERVER_IMAGE.to_string(), + "-c".to_string(), + script.to_string(), + ]); + + let output = engine + .command() + .args(&args) + .output() + .map_err(|e| format!("start {} test server: {e}", engine.name()))?; + + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + + if !output.status.success() { + return Err(format!( + "{} run failed (exit {:?}):\n{stderr}", + engine.name(), + output.status.code() + )); + } + + let server = Self { + host, + port, + container_id: stdout, + engine, + }; + server.wait_until_ready().await?; + Ok(server) + } + + async fn wait_until_ready(&self) -> Result<(), String> { + let container_id = self.container_id.clone(); + let engine = self.engine.clone(); + timeout(Duration::from_secs(60), async move { + let mut tick = interval(Duration::from_millis(500)); + loop { + tick.tick().await; + let output = engine + .command() + .args([ + "exec", + &container_id, + "python3", + "-c", + "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000', timeout=1).read()", + ]) + .output() + .ok(); + if output.is_some_and(|o| o.status.success()) { + return; + } + } + }) + .await + .map_err(|_| { + format!( + "{} test server did not become ready within 60s", + self.engine.name() + ) + }) + } +} + +impl Drop for ContainerHttpServer { + fn drop(&mut self) { + let _ = self + .engine + .command() + .args(["rm", "-f", &self.container_id]) + .output(); + } +} diff --git a/e2e/rust/src/harness/mod.rs b/e2e/rust/src/harness/mod.rs index 33105a4c7..5feb21c70 100644 --- a/e2e/rust/src/harness/mod.rs +++ b/e2e/rust/src/harness/mod.rs @@ -4,6 +4,7 @@ //! Shared test harness modules for CLI e2e tests. pub mod binary; +pub mod container; pub mod gateway; pub mod output; pub mod port; diff --git a/e2e/rust/tests/custom_image.rs b/e2e/rust/tests/custom_image.rs index 66b9d34b8..ff3cda86a 100644 --- a/e2e/rust/tests/custom_image.rs +++ b/e2e/rust/tests/custom_image.rs @@ -41,7 +41,7 @@ const MARKER: &str = "custom-image-e2e-marker"; /// created from it contains the expected marker file. #[tokio::test] async fn sandbox_from_custom_dockerfile() { - // Step 1 — Write a temporary Dockerfile. + // Step 1: Write a temporary Dockerfile. let tmpdir = tempfile::tempdir().expect("create tmpdir"); let dockerfile_path = tmpdir.path().join("Dockerfile"); { @@ -50,7 +50,7 @@ async fn sandbox_from_custom_dockerfile() { .expect("write Dockerfile"); } - // Step 2 — Create a sandbox from the Dockerfile. + // Step 2: Create a sandbox from the Dockerfile. let dockerfile_str = dockerfile_path.to_str().expect("Dockerfile path is UTF-8"); let mut guard = SandboxGuard::create(&[ "--from", @@ -62,7 +62,7 @@ async fn sandbox_from_custom_dockerfile() { .await .expect("sandbox create from Dockerfile"); - // Step 3 — Verify the marker file content appears in the output. + // Step 3: Verify the marker file content appears in the output. let clean_output = strip_ansi(&guard.create_output); assert!( clean_output.contains(MARKER), diff --git a/e2e/rust/tests/forward_proxy_graphql_l7.rs b/e2e/rust/tests/forward_proxy_graphql_l7.rs index a0a14c00d..aeb3648b0 100644 --- a/e2e/rust/tests/forward_proxy_graphql_l7.rs +++ b/e2e/rust/tests/forward_proxy_graphql_l7.rs @@ -11,27 +11,15 @@ #![cfg(feature = "e2e")] use std::io::Write; -use std::process::Command; -use std::time::Duration; -use openshell_e2e::harness::port::find_free_port; +use openshell_e2e::harness::container::ContainerHttpServer; use openshell_e2e::harness::sandbox::SandboxGuard; use tempfile::NamedTempFile; -use tokio::time::{interval, timeout}; -const TEST_SERVER_IMAGE: &str = "public.ecr.aws/docker/library/python:3.13-alpine"; const TEST_SERVER_ALIAS: &str = "graphql-l7.openshell.test"; -struct DockerServer { - host: String, - port: u16, - container_id: String, -} - -impl DockerServer { - async fn start() -> Result { - let port = find_free_port(); - let script = r#"from http.server import BaseHTTPRequestHandler, HTTPServer +async fn start_test_server() -> Result { + let script = r#"from http.server import BaseHTTPRequestHandler, HTTPServer class Handler(BaseHTTPRequestHandler): def read_chunked(self): @@ -68,80 +56,7 @@ class Handler(BaseHTTPRequestHandler): HTTPServer(("0.0.0.0", 8000), Handler).serve_forever() "#; - let e2e_network = std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME") - .ok() - .filter(|network| !network.trim().is_empty()); - let host = e2e_network.as_ref().map_or_else( - || "host.openshell.internal".to_string(), - |_| TEST_SERVER_ALIAS.to_string(), - ); - let port = if e2e_network.is_some() { 8000 } else { port }; - - let mut args = vec!["run", "--detach", "--rm"]; - let published_port = format!("{port}:8000"); - if let Some(network) = e2e_network.as_deref() { - args.extend(["--network", network, "--network-alias", TEST_SERVER_ALIAS]); - } else { - args.extend(["-p", &published_port]); - } - args.extend([TEST_SERVER_IMAGE, "python3", "-c", script]); - - let output = Command::new("docker") - .args(args) - .output() - .map_err(|e| format!("start docker test server: {e}"))?; - - let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); - let stderr = String::from_utf8_lossy(&output.stderr).to_string(); - - if !output.status.success() { - return Err(format!( - "docker run failed (exit {:?}):\n{stderr}", - output.status.code() - )); - } - - let server = Self { - host, - port, - container_id: stdout, - }; - server.wait_until_ready().await?; - Ok(server) - } - - async fn wait_until_ready(&self) -> Result<(), String> { - let container_id = self.container_id.clone(); - timeout(Duration::from_secs(60), async move { - let mut tick = interval(Duration::from_millis(500)); - loop { - tick.tick().await; - let output = Command::new("docker") - .args([ - "exec", - &container_id, - "python3", - "-c", - "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000', timeout=1).read()", - ]) - .output() - .ok(); - if output.is_some_and(|o| o.status.success()) { - return; - } - } - }) - .await - .map_err(|_| "docker test server did not become ready within 60s".to_string()) - } -} - -impl Drop for DockerServer { - fn drop(&mut self) { - let _ = Command::new("docker") - .args(["rm", "-f", &self.container_id]) - .output(); - } + ContainerHttpServer::start_python(TEST_SERVER_ALIAS, script).await } fn write_graphql_policy(host: &str, port: u16) -> Result { @@ -216,9 +131,7 @@ network_policies: #[tokio::test] #[allow(clippy::too_many_lines)] async fn graphql_l7_enforces_allow_and_deny_rules_on_forward_and_connect_paths() { - let server = DockerServer::start() - .await - .expect("start docker test server"); + let server = start_test_server().await.expect("start test server"); let policy = write_graphql_policy(&server.host, server.port).expect("write custom policy"); let policy_path = policy .path() diff --git a/e2e/rust/tests/forward_proxy_l7_bypass.rs b/e2e/rust/tests/forward_proxy_l7_bypass.rs index 4a4c65c59..6cbaca1eb 100644 --- a/e2e/rust/tests/forward_proxy_l7_bypass.rs +++ b/e2e/rust/tests/forward_proxy_l7_bypass.rs @@ -9,27 +9,15 @@ #![cfg(feature = "e2e")] use std::io::Write; -use std::process::Command; -use std::time::Duration; -use openshell_e2e::harness::port::find_free_port; +use openshell_e2e::harness::container::ContainerHttpServer; use openshell_e2e::harness::sandbox::SandboxGuard; use tempfile::NamedTempFile; -use tokio::time::{interval, timeout}; -const TEST_SERVER_IMAGE: &str = "public.ecr.aws/docker/library/python:3.13-alpine"; const TEST_SERVER_ALIAS: &str = "rest-l7.openshell.test"; -struct DockerServer { - host: String, - port: u16, - container_id: String, -} - -impl DockerServer { - async fn start() -> Result { - let port = find_free_port(); - let script = r#"from http.server import BaseHTTPRequestHandler, HTTPServer +async fn start_test_server() -> Result { + let script = r#"from http.server import BaseHTTPRequestHandler, HTTPServer class Handler(BaseHTTPRequestHandler): def do_GET(self): @@ -46,80 +34,7 @@ class Handler(BaseHTTPRequestHandler): HTTPServer(("0.0.0.0", 8000), Handler).serve_forever() "#; - let e2e_network = std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME") - .ok() - .filter(|network| !network.trim().is_empty()); - let host = e2e_network.as_ref().map_or_else( - || "host.openshell.internal".to_string(), - |_| TEST_SERVER_ALIAS.to_string(), - ); - let port = if e2e_network.is_some() { 8000 } else { port }; - - let mut args = vec!["run", "--detach", "--rm"]; - let published_port = format!("{port}:8000"); - if let Some(network) = e2e_network.as_deref() { - args.extend(["--network", network, "--network-alias", TEST_SERVER_ALIAS]); - } else { - args.extend(["-p", &published_port]); - } - args.extend([TEST_SERVER_IMAGE, "python3", "-c", script]); - - let output = Command::new("docker") - .args(args) - .output() - .map_err(|e| format!("start docker test server: {e}"))?; - - let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); - let stderr = String::from_utf8_lossy(&output.stderr).to_string(); - - if !output.status.success() { - return Err(format!( - "docker run failed (exit {:?}):\n{stderr}", - output.status.code() - )); - } - - let server = Self { - host, - port, - container_id: stdout, - }; - server.wait_until_ready().await?; - Ok(server) - } - - async fn wait_until_ready(&self) -> Result<(), String> { - let container_id = self.container_id.clone(); - timeout(Duration::from_secs(60), async move { - let mut tick = interval(Duration::from_millis(500)); - loop { - tick.tick().await; - let output = Command::new("docker") - .args([ - "exec", - &container_id, - "python3", - "-c", - "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000', timeout=1).read()", - ]) - .output() - .ok(); - if output.is_some_and(|o| o.status.success()) { - return; - } - } - }) - .await - .map_err(|_| "docker test server did not become ready within 60s".to_string()) - } -} - -impl Drop for DockerServer { - fn drop(&mut self) { - let _ = Command::new("docker") - .args(["rm", "-f", &self.container_id]) - .output(); - } + ContainerHttpServer::start_python(TEST_SERVER_ALIAS, script).await } fn write_policy_with_l7_rules(host: &str, port: u16) -> Result { @@ -183,9 +98,7 @@ network_policies: /// GET /allowed should succeed — the L7 policy explicitly allows it. #[tokio::test] async fn forward_proxy_allows_l7_permitted_request() { - let server = DockerServer::start() - .await - .expect("start docker test server"); + let server = start_test_server().await.expect("start test server"); let policy = write_policy_with_l7_rules(&server.host, server.port).expect("write custom policy"); let policy_path = policy @@ -225,9 +138,7 @@ except Exception as e: /// POST /allowed should be denied — the L7 policy only allows GET. #[tokio::test] async fn forward_proxy_denies_l7_blocked_request() { - let server = DockerServer::start() - .await - .expect("start docker test server"); + let server = start_test_server().await.expect("start test server"); let policy = write_policy_with_l7_rules(&server.host, server.port).expect("write custom policy"); let policy_path = policy diff --git a/e2e/support/gateway-common.sh b/e2e/support/gateway-common.sh new file mode 100644 index 000000000..d8acbd191 --- /dev/null +++ b/e2e/support/gateway-common.sh @@ -0,0 +1,163 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Shared helpers for local gateway-backed e2e wrappers. Driver-specific setup, +# cleanup, and runtime behavior stay in the Docker/Podman wrapper scripts. + +e2e_cargo_target_dir() { + local root=$1 + + if [ -n "${CARGO_TARGET_DIR:-}" ]; then + case "${CARGO_TARGET_DIR}" in + /*) printf '%s\n' "${CARGO_TARGET_DIR}" ;; + *) printf '%s\n' "${root}/${CARGO_TARGET_DIR}" ;; + esac + return 0 + fi + + cargo metadata --format-version=1 --no-deps \ + | python3 -c 'import json, sys; print(json.load(sys.stdin)["target_directory"])' +} + +e2e_endpoint_port() { + python3 - "$1" <<'PY' +import sys +from urllib.parse import urlparse + +parsed = urlparse(sys.argv[1]) +print(parsed.port or (443 if parsed.scheme == "https" else 80)) +PY +} + +e2e_pick_port() { + python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()' +} + +e2e_register_plaintext_gateway() { + local config_home=$1 + local name=$2 + local endpoint=$3 + local port=$4 + local gateway_config_dir="${config_home}/openshell/gateways/${name}" + + mkdir -p "${gateway_config_dir}" + cat >"${gateway_config_dir}/metadata.json" <"${config_home}/openshell/active_gateway" +} + +e2e_register_mtls_gateway() { + local config_home=$1 + local name=$2 + local endpoint=$3 + local port=$4 + local pki_dir=$5 + local gateway_config_dir="${config_home}/openshell/gateways/${name}" + + mkdir -p "${gateway_config_dir}/mtls" + cp "${pki_dir}/ca.crt" "${gateway_config_dir}/mtls/ca.crt" + cp "${pki_dir}/client.crt" "${gateway_config_dir}/mtls/tls.crt" + cp "${pki_dir}/client.key" "${gateway_config_dir}/mtls/tls.key" + cat >"${gateway_config_dir}/metadata.json" <"${config_home}/openshell/active_gateway" +} + +e2e_build_gateway_binaries() { + local root=$1 + local target_var=$2 + local gateway_var=$3 + local cli_var=$4 + local target_dir + local jobs=() + + if [ -n "${CARGO_BUILD_JOBS:-}" ]; then + jobs=(-j "${CARGO_BUILD_JOBS}") + fi + + target_dir="$(e2e_cargo_target_dir "${root}")" + printf -v "${target_var}" '%s' "${target_dir}" + printf -v "${gateway_var}" '%s' "${target_dir}/debug/openshell-gateway" + printf -v "${cli_var}" '%s' "${target_dir}/debug/openshell" + + echo "Building openshell-gateway..." + cargo build "${jobs[@]}" \ + -p openshell-server --bin openshell-gateway \ + --features openshell-core/dev-settings + + echo "Building openshell-cli..." + cargo build "${jobs[@]}" \ + -p openshell-cli --bin openshell \ + --features openshell-core/dev-settings + + if [ ! -x "${target_dir}/debug/openshell-gateway" ]; then + echo "ERROR: expected openshell-gateway binary at ${target_dir}/debug/openshell-gateway" >&2 + exit 1 + fi + if [ ! -x "${target_dir}/debug/openshell" ]; then + echo "ERROR: expected openshell CLI binary at ${target_dir}/debug/openshell" >&2 + exit 1 + fi +} + +e2e_write_gateway_args_file() { + local args_file=$1 + shift + + : >"${args_file}" + for arg in "$@"; do + printf '%s\0' "${arg}" >>"${args_file}" + done +} + +e2e_export_gateway_restart_metadata() { + local gateway_bin=$1 + local args_file=$2 + local log_file=$3 + local pid_file=$4 + + export OPENSHELL_E2E_GATEWAY_BIN="${gateway_bin}" + export OPENSHELL_E2E_GATEWAY_ARGS_FILE="${args_file}" + export OPENSHELL_E2E_GATEWAY_LOG="${log_file}" + export OPENSHELL_E2E_GATEWAY_PID_FILE="${pid_file}" +} + +e2e_stop_gateway() { + local gateway_pid=$1 + local gateway_pid_file=$2 + + if [ -f "${gateway_pid_file}" ]; then + gateway_pid="$(cat "${gateway_pid_file}" 2>/dev/null || true)" + fi + if [ -n "${gateway_pid}" ] && kill -0 "${gateway_pid}" 2>/dev/null; then + echo "Stopping openshell-gateway (pid ${gateway_pid})..." + kill "${gateway_pid}" 2>/dev/null || true + wait "${gateway_pid}" 2>/dev/null || true + fi +} + +e2e_print_gateway_log_on_failure() { + local exit_code=$1 + local gateway_log=$2 + + if [ "${exit_code}" -ne 0 ] && [ -f "${gateway_log}" ]; then + echo "=== gateway log (preserved for debugging) ===" + cat "${gateway_log}" + echo "=== end gateway log ===" + fi +} + diff --git a/e2e/with-docker-gateway.sh b/e2e/with-docker-gateway.sh index f1edcf05b..449e257c4 100755 --- a/e2e/with-docker-gateway.sh +++ b/e2e/with-docker-gateway.sh @@ -22,19 +22,8 @@ if [ "$#" -eq 0 ]; then fi ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" - -cargo_target_dir() { - if [ -n "${CARGO_TARGET_DIR:-}" ]; then - case "${CARGO_TARGET_DIR}" in - /*) printf '%s\n' "${CARGO_TARGET_DIR}" ;; - *) printf '%s\n' "${ROOT}/${CARGO_TARGET_DIR}" ;; - esac - return 0 - fi - - cargo metadata --format-version=1 --no-deps \ - | python3 -c 'import json, sys; print(json.load(sys.stdin)["target_directory"])' -} +# shellcheck source=e2e/support/gateway-common.sh +source "${ROOT}/e2e/support/gateway-common.sh" github_actions_host_docker_tmpdir() { if [ "${GITHUB_ACTIONS:-}" != "true" ] \ @@ -74,28 +63,21 @@ GATEWAY_PID="" GATEWAY_LOG="${WORKDIR}/gateway.log" GATEWAY_PID_FILE="${WORKDIR}/gateway.pid" GATEWAY_ARGS_FILE="${WORKDIR}/gateway.args" -GATEWAY_CONFIG_DIR="" E2E_NAMESPACE="" DOCKER_NETWORK_NAME="" DOCKER_NETWORK_CONNECTED_CONTAINER="" DOCKER_NETWORK_MANAGED=0 GPU_MODE="${OPENSHELL_E2E_DOCKER_GPU:-0}" +DOCKER_SUPERVISOR_ARGS=() # Isolate CLI/SDK gateway metadata from the developer's real config. export XDG_CONFIG_HOME="${WORKDIR}/config" +export XDG_DATA_HOME="${WORKDIR}/data" cleanup() { local exit_code=$? - local gateway_pid="${GATEWAY_PID}" - if [ -f "${GATEWAY_PID_FILE}" ]; then - gateway_pid="$(cat "${GATEWAY_PID_FILE}" 2>/dev/null || true)" - fi - if [ -n "${gateway_pid}" ] && kill -0 "${gateway_pid}" 2>/dev/null; then - echo "Stopping openshell-gateway (pid ${gateway_pid})..." - kill "${gateway_pid}" 2>/dev/null || true - wait "${gateway_pid}" 2>/dev/null || true - fi + e2e_stop_gateway "${GATEWAY_PID}" "${GATEWAY_PID_FILE}" if [ "${exit_code}" -ne 0 ] \ && [ -n "${E2E_NAMESPACE}" ] \ @@ -143,71 +125,12 @@ cleanup() { docker network rm "${DOCKER_NETWORK_NAME}" >/dev/null 2>&1 || true fi - if [ "${exit_code}" -ne 0 ] && [ -f "${GATEWAY_LOG}" ]; then - echo "=== gateway log (preserved for debugging) ===" - cat "${GATEWAY_LOG}" - echo "=== end gateway log ===" - fi + e2e_print_gateway_log_on_failure "${exit_code}" "${GATEWAY_LOG}" rm -rf "${WORKDIR}" 2>/dev/null || true } trap cleanup EXIT -register_plaintext_gateway() { - local name=$1 - local endpoint=$2 - local port=$3 - - GATEWAY_CONFIG_DIR="${XDG_CONFIG_HOME}/openshell/gateways/${name}" - mkdir -p "${GATEWAY_CONFIG_DIR}" - cat >"${GATEWAY_CONFIG_DIR}/metadata.json" <"${XDG_CONFIG_HOME}/openshell/active_gateway" -} - -register_mtls_gateway() { - local name=$1 - local endpoint=$2 - local port=$3 - local pki_dir=$4 - - GATEWAY_CONFIG_DIR="${XDG_CONFIG_HOME}/openshell/gateways/${name}" - mkdir -p "${GATEWAY_CONFIG_DIR}/mtls" - cp "${pki_dir}/ca.crt" "${GATEWAY_CONFIG_DIR}/mtls/ca.crt" - cp "${pki_dir}/client.crt" "${GATEWAY_CONFIG_DIR}/mtls/tls.crt" - cp "${pki_dir}/client.key" "${GATEWAY_CONFIG_DIR}/mtls/tls.key" - cat >"${GATEWAY_CONFIG_DIR}/metadata.json" <"${XDG_CONFIG_HOME}/openshell/active_gateway" -} - -endpoint_port() { - python3 - "$1" <<'PY' -import sys -from urllib.parse import urlparse - -parsed = urlparse(sys.argv[1]) -print(parsed.port or (443 if parsed.scheme == "https" else 80)) -PY -} - -pick_port() { - python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()' -} - ensure_e2e_docker_network() { local network=$1 @@ -287,9 +210,15 @@ if [ -n "${OPENSHELL_GATEWAY_ENDPOINT:-}" ]; then esac GATEWAY_NAME="${OPENSHELL_GATEWAY:-openshell-e2e-endpoint}" - register_plaintext_gateway "${GATEWAY_NAME}" "${OPENSHELL_GATEWAY_ENDPOINT}" "$(endpoint_port "${OPENSHELL_GATEWAY_ENDPOINT}")" + e2e_register_plaintext_gateway \ + "${XDG_CONFIG_HOME}" \ + "${GATEWAY_NAME}" \ + "${OPENSHELL_GATEWAY_ENDPOINT}" \ + "$(e2e_endpoint_port "${OPENSHELL_GATEWAY_ENDPOINT}")" export OPENSHELL_GATEWAY="${GATEWAY_NAME}" export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-180}" + export OPENSHELL_E2E_DRIVER="${OPENSHELL_E2E_DRIVER:-docker}" + export OPENSHELL_E2E_CONTAINER_ENGINE="${OPENSHELL_E2E_CONTAINER_ENGINE:-docker}" echo "Using existing e2e gateway endpoint: ${OPENSHELL_GATEWAY_ENDPOINT}" "$@" @@ -340,6 +269,48 @@ linux_target_triple() { esac } +resolve_docker_supervisor_image() { + if [ -n "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ]; then + printf '%s\n' "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE}" + return 0 + fi + + if [ -n "${OPENSHELL_SUPERVISOR_IMAGE:-}" ]; then + printf '%s\n' "${OPENSHELL_SUPERVISOR_IMAGE}" + return 0 + fi + + if [ -n "${CI:-}" ]; then + if [ -z "${IMAGE_TAG:-}" ]; then + echo "ERROR: IMAGE_TAG must be set in CI when no Docker supervisor image override is provided." >&2 + exit 2 + fi + + local registry="${OPENSHELL_REGISTRY:-ghcr.io/nvidia/openshell}" + printf '%s/supervisor:%s\n' "${registry%/}" "${IMAGE_TAG}" + return 0 + fi + + printf '%s\n' "" +} + +ensure_docker_supervisor_image() { + local image=$1 + + if docker image inspect "${image}" >/dev/null 2>&1; then + return 0 + fi + + echo "Pulling Docker supervisor image ${image}..." + if docker pull "${image}"; then + return 0 + fi + + echo "ERROR: supervisor image '${image}' is not available." >&2 + echo " Build it, push it, or set OPENSHELL_SUPERVISOR_IMAGE to a pullable image." >&2 + exit 2 +} + DAEMON_ARCH="$(normalize_arch "$(docker info --format '{{.Architecture}}' 2>/dev/null || true)")" SUPERVISOR_TARGET="$(linux_target_triple "${DAEMON_ARCH}")" HOST_OS="$(uname -s)" @@ -352,48 +323,35 @@ if [ -n "${CARGO_BUILD_JOBS:-}" ]; then CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}") fi -TARGET_DIR="$(cargo_target_dir)" -GATEWAY_BIN="${TARGET_DIR}/debug/openshell-gateway" -CLI_BIN="${TARGET_DIR}/debug/openshell" +e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN -echo "Building openshell-gateway..." -cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ - -p openshell-server --bin openshell-gateway \ - --features openshell-core/dev-settings - -echo "Building openshell-cli..." -cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ - -p openshell-cli --bin openshell \ - --features openshell-core/dev-settings - -if [ ! -x "${GATEWAY_BIN}" ]; then - echo "ERROR: expected openshell-gateway binary at ${GATEWAY_BIN}" >&2 - exit 1 -fi -if [ ! -x "${CLI_BIN}" ]; then - echo "ERROR: expected openshell CLI binary at ${CLI_BIN}" >&2 - exit 1 -fi - -echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..." -mkdir -p "${SUPERVISOR_OUT_DIR}" -if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then - rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true - cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ - --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" - cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}" +SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)" +if [ -n "${SUPERVISOR_IMAGE}" ]; then + ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}" + echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}" + DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}") else - CONTAINER_ENGINE=docker \ - DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \ - DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \ - bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output -fi + echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..." + mkdir -p "${SUPERVISOR_OUT_DIR}" + if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then + rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true + cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" + cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}" + else + CONTAINER_ENGINE=docker \ + DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \ + DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \ + bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output + fi -if [ ! -f "${SUPERVISOR_BIN}" ]; then - echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2 - exit 1 + if [ ! -f "${SUPERVISOR_BIN}" ]; then + echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2 + exit 1 + fi + chmod +x "${SUPERVISOR_BIN}" + DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}") fi -chmod +x "${SUPERVISOR_BIN}" DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest" SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-${OPENSHELL_SANDBOX_IMAGE:-${DEFAULT_SANDBOX_IMAGE}}}" @@ -439,7 +397,7 @@ openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial \ cd "${ROOT}" -HOST_PORT=$(pick_port) +HOST_PORT=$(e2e_pick_port) STATE_DIR="${WORKDIR}/state" mkdir -p "${STATE_DIR}" @@ -450,6 +408,10 @@ GATEWAY_HOST_ALIAS_IP="" ensure_e2e_docker_network "${DOCKER_NETWORK_NAME}" export OPENSHELL_E2E_DOCKER_NETWORK_NAME="${DOCKER_NETWORK_NAME}" +export OPENSHELL_E2E_NETWORK_NAME="${DOCKER_NETWORK_NAME}" +export OPENSHELL_E2E_SANDBOX_NAMESPACE="${E2E_NAMESPACE}" +export OPENSHELL_E2E_DRIVER="docker" +export OPENSHELL_E2E_CONTAINER_ENGINE="docker" if connect_current_container_to_docker_network "${DOCKER_NETWORK_NAME}"; then echo "Connected CI job container to Docker network ${DOCKER_NETWORK_NAME} (${GATEWAY_HOST_ALIAS_IP})." else @@ -468,7 +430,7 @@ GATEWAY_ARGS=( --tls-client-ca "${PKI_DIR}/ca.crt" \ --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" \ --grpc-endpoint "${GATEWAY_ENDPOINT}" \ - --docker-supervisor-bin "${SUPERVISOR_BIN}" \ + "${DOCKER_SUPERVISOR_ARGS[@]}" \ --docker-tls-ca "${PKI_DIR}/ca.crt" \ --docker-tls-cert "${PKI_DIR}/client.crt" \ --docker-tls-key "${PKI_DIR}/client.key" \ @@ -479,14 +441,12 @@ if [ -n "${GATEWAY_HOST_ALIAS_IP}" ]; then GATEWAY_ARGS+=(--host-gateway-ip "${GATEWAY_HOST_ALIAS_IP}") fi -: >"${GATEWAY_ARGS_FILE}" -for arg in "${GATEWAY_ARGS[@]}"; do - printf '%s\0' "${arg}" >>"${GATEWAY_ARGS_FILE}" -done -export OPENSHELL_E2E_GATEWAY_BIN="${GATEWAY_BIN}" -export OPENSHELL_E2E_GATEWAY_ARGS_FILE="${GATEWAY_ARGS_FILE}" -export OPENSHELL_E2E_GATEWAY_LOG="${GATEWAY_LOG}" -export OPENSHELL_E2E_GATEWAY_PID_FILE="${GATEWAY_PID_FILE}" +e2e_write_gateway_args_file "${GATEWAY_ARGS_FILE}" "${GATEWAY_ARGS[@]}" +e2e_export_gateway_restart_metadata \ + "${GATEWAY_BIN}" \ + "${GATEWAY_ARGS_FILE}" \ + "${GATEWAY_LOG}" \ + "${GATEWAY_PID_FILE}" "${GATEWAY_BIN}" "${GATEWAY_ARGS[@]}" >"${GATEWAY_LOG}" 2>&1 & GATEWAY_PID=$! @@ -494,7 +454,12 @@ printf '%s\n' "${GATEWAY_PID}" >"${GATEWAY_PID_FILE}" GATEWAY_NAME="openshell-e2e-docker-${HOST_PORT}" CLI_GATEWAY_ENDPOINT="https://127.0.0.1:${HOST_PORT}" -register_mtls_gateway "${GATEWAY_NAME}" "${CLI_GATEWAY_ENDPOINT}" "${HOST_PORT}" "${PKI_DIR}" +e2e_register_mtls_gateway \ + "${XDG_CONFIG_HOME}" \ + "${GATEWAY_NAME}" \ + "${CLI_GATEWAY_ENDPOINT}" \ + "${HOST_PORT}" \ + "${PKI_DIR}" export OPENSHELL_GATEWAY="${GATEWAY_NAME}" export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-180}" diff --git a/e2e/with-podman-gateway.sh b/e2e/with-podman-gateway.sh new file mode 100755 index 000000000..ee8073f2b --- /dev/null +++ b/e2e/with-podman-gateway.sh @@ -0,0 +1,389 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Run an e2e command against a Podman-backed OpenShell gateway. +# +# Modes: +# - OPENSHELL_GATEWAY_ENDPOINT unset: +# Build and start an ephemeral standalone gateway with the Podman compute +# driver, then run the command against that gateway. +# - OPENSHELL_GATEWAY_ENDPOINT=http://host:port: +# Use the existing plaintext gateway endpoint and run the command. +# +# Podman e2e currently uses plaintext gateway traffic. The Podman driver does +# not yet inject gateway mTLS client materials into sandbox containers. + +set -euo pipefail + +if [ "$#" -eq 0 ]; then + echo "Usage: e2e/with-podman-gateway.sh [args...]" >&2 + exit 2 +fi + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=e2e/support/gateway-common.sh +source "${ROOT}/e2e/support/gateway-common.sh" + +PODMAN_XDG_CONFIG_HOME_WAS_SET=0 +PODMAN_XDG_CONFIG_HOME="" +if [ "${XDG_CONFIG_HOME+x}" = x ]; then + PODMAN_XDG_CONFIG_HOME_WAS_SET=1 + PODMAN_XDG_CONFIG_HOME="${XDG_CONFIG_HOME}" + export OPENSHELL_E2E_CONTAINER_ENGINE_XDG_CONFIG_HOME="${PODMAN_XDG_CONFIG_HOME}" + unset OPENSHELL_E2E_CONTAINER_ENGINE_UNSET_XDG_CONFIG_HOME +else + export OPENSHELL_E2E_CONTAINER_ENGINE_UNSET_XDG_CONFIG_HOME=1 + unset OPENSHELL_E2E_CONTAINER_ENGINE_XDG_CONFIG_HOME +fi + +with_podman_config() { + if [ "${PODMAN_XDG_CONFIG_HOME_WAS_SET}" = "1" ]; then + XDG_CONFIG_HOME="${PODMAN_XDG_CONFIG_HOME}" "$@" + else + env -u XDG_CONFIG_HOME "$@" + fi +} + +podman_cmd() { + with_podman_config podman "$@" +} + +WORKDIR_PARENT="${TMPDIR:-/tmp}" +WORKDIR_PARENT="${WORKDIR_PARENT%/}" +WORKDIR="$(mktemp -d "${WORKDIR_PARENT}/openshell-e2e-podman.XXXXXX")" +GATEWAY_BIN="" +CLI_BIN="" +GATEWAY_PID="" +GATEWAY_LOG="${WORKDIR}/gateway.log" +GATEWAY_PID_FILE="${WORKDIR}/gateway.pid" +GATEWAY_ARGS_FILE="${WORKDIR}/gateway.args" +E2E_NAMESPACE="" +PODMAN_NETWORK_NAME="" +PODMAN_NETWORK_MANAGED=0 +PODMAN_SERVICE_PID="" +PODMAN_SERVICE_LOG="${WORKDIR}/podman-service.log" +PODMAN_SOCKET="" + +# Isolate CLI/SDK gateway metadata from the developer's real config. +export XDG_CONFIG_HOME="${WORKDIR}/config" + +cleanup() { + local exit_code=$? + + e2e_stop_gateway "${GATEWAY_PID}" "${GATEWAY_PID_FILE}" + + local sandbox_ids="" + if [ -n "${E2E_NAMESPACE}" ] && command -v podman >/dev/null 2>&1; then + sandbox_ids="$(podman_cmd ps -aq \ + --filter "label=openshell.managed=true" \ + --filter "label=openshell.sandbox-namespace=${E2E_NAMESPACE}" \ + 2>/dev/null || true)" + fi + + if [ "${exit_code}" -ne 0 ] && [ -n "${sandbox_ids}" ]; then + echo "=== sandbox container logs (preserved for debugging) ===" + for id in ${sandbox_ids}; do + echo "--- container ${id} (inspect) ---" + podman_cmd inspect --format '{{.Name}} state={{.State.Status}} exit={{.State.ExitCode}} error={{.State.Error}}' "${id}" 2>/dev/null || true + echo "--- container ${id} (last 80 log lines) ---" + podman_cmd logs --tail 80 "${id}" 2>&1 || true + done + echo "=== end sandbox container logs ===" + fi + + if [ -n "${sandbox_ids}" ]; then + for id in ${sandbox_ids}; do + local sandbox_id + sandbox_id="$(podman_cmd inspect --format '{{ index .Config.Labels "openshell.sandbox-id" }}' "${id}" 2>/dev/null || true)" + podman_cmd rm -f "${id}" >/dev/null 2>&1 || true + if [ -n "${sandbox_id}" ] && [ "${sandbox_id}" != "" ]; then + podman_cmd volume rm -f "openshell-sandbox-${sandbox_id}-workspace" >/dev/null 2>&1 || true + podman_cmd secret rm "openshell-handshake-${sandbox_id}" >/dev/null 2>&1 || true + fi + done + fi + + if [ "${PODMAN_NETWORK_MANAGED}" = "1" ] \ + && [ -n "${PODMAN_NETWORK_NAME}" ] \ + && command -v podman >/dev/null 2>&1; then + podman_cmd network rm "${PODMAN_NETWORK_NAME}" >/dev/null 2>&1 || true + fi + + e2e_print_gateway_log_on_failure "${exit_code}" "${GATEWAY_LOG}" + if [ "${exit_code}" -ne 0 ] && [ -f "${PODMAN_SERVICE_LOG}" ]; then + echo "=== podman service log (preserved for debugging) ===" + cat "${PODMAN_SERVICE_LOG}" || true + echo "=== end podman service log ===" + fi + + if [ -n "${PODMAN_SERVICE_PID}" ]; then + kill "${PODMAN_SERVICE_PID}" >/dev/null 2>&1 || true + wait "${PODMAN_SERVICE_PID}" >/dev/null 2>&1 || true + fi + + rm -rf "${WORKDIR}" 2>/dev/null || true +} +trap cleanup EXIT + +ensure_e2e_podman_network() { + local network=$1 + + if podman_cmd network inspect "${network}" >/dev/null 2>&1; then + return 0 + fi + + podman_cmd network create \ + --driver bridge \ + --label openshell.managed=true \ + --label "openshell.sandbox-namespace=${E2E_NAMESPACE}" \ + "${network}" >/dev/null + PODMAN_NETWORK_MANAGED=1 +} + +default_podman_socket_path() { + case "$(uname -s)" in + Darwin) + printf '%s\n' "${HOME}/.local/share/containers/podman/machine/podman.sock" + ;; + Linux) + if [ -n "${XDG_RUNTIME_DIR:-}" ]; then + printf '%s\n' "${XDG_RUNTIME_DIR}/podman/podman.sock" + else + printf '%s\n' "/run/user/$(id -u)/podman/podman.sock" + fi + ;; + *) + return 1 + ;; + esac +} + +ensure_podman_api_socket() { + if [ -n "${OPENSHELL_PODMAN_SOCKET:-}" ]; then + return 0 + fi + + local default_socket + if default_socket="$(default_podman_socket_path)" \ + && [ -S "${default_socket}" ] \ + && podman_cmd --url "unix://${default_socket}" info >/dev/null 2>&1; then + export OPENSHELL_PODMAN_SOCKET="${default_socket}" + return 0 + fi + + PODMAN_SOCKET="${WORKDIR}/podman/podman.sock" + mkdir -p "$(dirname "${PODMAN_SOCKET}")" + + echo "Starting temporary Podman API service at ${PODMAN_SOCKET}..." + with_podman_config podman system service --time=0 "unix://${PODMAN_SOCKET}" \ + >"${PODMAN_SERVICE_LOG}" 2>&1 & + PODMAN_SERVICE_PID=$! + export OPENSHELL_PODMAN_SOCKET="${PODMAN_SOCKET}" + + local elapsed=0 + local timeout=30 + while [ "${elapsed}" -lt "${timeout}" ]; do + if [ -S "${PODMAN_SOCKET}" ] \ + && podman_cmd --url "unix://${PODMAN_SOCKET}" info >/dev/null 2>&1; then + return 0 + fi + + if ! kill -0 "${PODMAN_SERVICE_PID}" 2>/dev/null; then + echo "ERROR: Podman API service exited before becoming reachable" >&2 + cat "${PODMAN_SERVICE_LOG}" >&2 || true + exit 2 + fi + + sleep 1 + elapsed=$((elapsed + 1)) + done + + echo "ERROR: Podman API service did not become reachable within ${timeout}s" >&2 + cat "${PODMAN_SERVICE_LOG}" >&2 || true + exit 2 +} + +resolve_podman_supervisor_image() { + if [ -n "${OPENSHELL_SUPERVISOR_IMAGE:-}" ]; then + printf '%s\n' "${OPENSHELL_SUPERVISOR_IMAGE}" + return 0 + fi + + if [ -n "${CI:-}" ]; then + if [ -z "${IMAGE_TAG:-}" ]; then + echo "ERROR: IMAGE_TAG must be set in CI when no Podman supervisor image override is provided." >&2 + exit 2 + fi + + local registry="${OPENSHELL_REGISTRY:-ghcr.io/nvidia/openshell}" + printf '%s/supervisor:%s\n' "${registry%/}" "${IMAGE_TAG}" + return 0 + fi + + printf '%s\n' "openshell/supervisor:dev" +} + +ensure_podman_supervisor_image() { + local image=$1 + + if podman_cmd image exists "${image}" 2>/dev/null; then + return 0 + fi + + if [ "${image}" = "openshell/supervisor:dev" ] \ + && [ -z "${OPENSHELL_SUPERVISOR_IMAGE:-}" ] \ + && [ -z "${CI:-}" ]; then + echo "Building local Podman supervisor image ${image}..." + with_podman_config env CONTAINER_ENGINE=podman IMAGE_TAG=dev \ + bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor + if podman_cmd image exists "${image}" 2>/dev/null; then + return 0 + fi + + echo "ERROR: expected supervisor image '${image}' after local build." >&2 + exit 2 + fi + + echo "Pulling Podman supervisor image ${image}..." + if podman_cmd pull "${image}"; then + return 0 + fi + + echo "ERROR: supervisor image '${image}' is not available." >&2 + echo " Build it, push it, or set OPENSHELL_SUPERVISOR_IMAGE to a pullable image." >&2 + exit 2 +} + +if [ -n "${OPENSHELL_GATEWAY_ENDPOINT:-}" ]; then + case "${OPENSHELL_GATEWAY_ENDPOINT}" in + http://*) ;; + https://*) + echo "ERROR: OPENSHELL_GATEWAY_ENDPOINT endpoint mode is HTTP-only for Podman e2e." >&2 + echo " Podman e2e does not yet support sandbox mTLS client material injection." >&2 + exit 2 + ;; + *) + echo "ERROR: OPENSHELL_GATEWAY_ENDPOINT must start with http:// for Podman e2e endpoint mode." >&2 + exit 2 + ;; + esac + + GATEWAY_NAME="${OPENSHELL_GATEWAY:-openshell-e2e-podman-endpoint}" + e2e_register_plaintext_gateway \ + "${XDG_CONFIG_HOME}" \ + "${GATEWAY_NAME}" \ + "${OPENSHELL_GATEWAY_ENDPOINT}" \ + "$(e2e_endpoint_port "${OPENSHELL_GATEWAY_ENDPOINT}")" + export OPENSHELL_GATEWAY="${GATEWAY_NAME}" + export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}" + export OPENSHELL_E2E_DRIVER="${OPENSHELL_E2E_DRIVER:-podman}" + export OPENSHELL_E2E_CONTAINER_ENGINE="${OPENSHELL_E2E_CONTAINER_ENGINE:-podman}" + + echo "Using existing Podman e2e gateway endpoint: ${OPENSHELL_GATEWAY_ENDPOINT}" + "$@" + exit $? +fi + +# Preflight for managed Podman gateway mode. +if ! command -v podman >/dev/null 2>&1; then + echo "ERROR: podman CLI is required to run Podman-backed e2e tests" >&2 + exit 2 +fi +if ! podman_cmd info >/dev/null 2>&1; then + echo "ERROR: podman service is not reachable (podman info failed)" >&2 + echo " Start it with 'podman machine start' on macOS, or the user service on Linux." >&2 + exit 2 +fi +ensure_podman_api_socket + +e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN + +SUPERVISOR_IMAGE="$(resolve_podman_supervisor_image)" +ensure_podman_supervisor_image "${SUPERVISOR_IMAGE}" +echo "Using Podman supervisor image: ${SUPERVISOR_IMAGE}" + +DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest" +SANDBOX_IMAGE="${OPENSHELL_E2E_PODMAN_SANDBOX_IMAGE:-${OPENSHELL_SANDBOX_IMAGE:-${DEFAULT_SANDBOX_IMAGE}}}" +if ! podman_cmd image exists "${SANDBOX_IMAGE}" 2>/dev/null; then + echo "Pulling ${SANDBOX_IMAGE}..." + podman_cmd pull "${SANDBOX_IMAGE}" +fi + +HOST_PORT=$(e2e_pick_port) +HEALTH_PORT=$(e2e_pick_port) +STATE_DIR="${WORKDIR}/state" +mkdir -p "${STATE_DIR}" + +HANDSHAKE_SECRET="e2e-podman-$(python3 -c 'import secrets; print(secrets.token_hex(16))')" +E2E_NAMESPACE="e2e-podman-$$-${HOST_PORT}" +PODMAN_NETWORK_NAME="${E2E_NAMESPACE}" +ensure_e2e_podman_network "${PODMAN_NETWORK_NAME}" + +export OPENSHELL_E2E_DRIVER="podman" +export OPENSHELL_E2E_CONTAINER_ENGINE="podman" +export OPENSHELL_E2E_NETWORK_NAME="${PODMAN_NETWORK_NAME}" +export OPENSHELL_E2E_SANDBOX_NAMESPACE="${E2E_NAMESPACE}" + +echo "Starting openshell-gateway on port ${HOST_PORT} (namespace: ${E2E_NAMESPACE})..." +GATEWAY_ARGS=( + --bind-address 0.0.0.0 + --port "${HOST_PORT}" + --health-port "${HEALTH_PORT}" + --ssh-gateway-port "${HOST_PORT}" + --drivers podman + --disable-tls + --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" + --sandbox-namespace "${E2E_NAMESPACE}" + --sandbox-image "${SANDBOX_IMAGE}" + --sandbox-image-pull-policy missing + --log-level info +) + +e2e_write_gateway_args_file "${GATEWAY_ARGS_FILE}" "${GATEWAY_ARGS[@]}" +e2e_export_gateway_restart_metadata \ + "${GATEWAY_BIN}" \ + "${GATEWAY_ARGS_FILE}" \ + "${GATEWAY_LOG}" \ + "${GATEWAY_PID_FILE}" + +OPENSHELL_SSH_HANDSHAKE_SECRET="${HANDSHAKE_SECRET}" \ +OPENSHELL_SUPERVISOR_IMAGE="${SUPERVISOR_IMAGE}" \ +OPENSHELL_NETWORK_NAME="${PODMAN_NETWORK_NAME}" \ + "${GATEWAY_BIN}" "${GATEWAY_ARGS[@]}" >"${GATEWAY_LOG}" 2>&1 & +GATEWAY_PID=$! +printf '%s\n' "${GATEWAY_PID}" >"${GATEWAY_PID_FILE}" + +GATEWAY_NAME="openshell-e2e-podman-${HOST_PORT}" +CLI_GATEWAY_ENDPOINT="http://127.0.0.1:${HOST_PORT}" +e2e_register_plaintext_gateway \ + "${XDG_CONFIG_HOME}" \ + "${GATEWAY_NAME}" \ + "${CLI_GATEWAY_ENDPOINT}" \ + "${HOST_PORT}" + +export OPENSHELL_GATEWAY="${GATEWAY_NAME}" +export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}" + +echo "Waiting for gateway to become healthy..." +elapsed=0 +timeout=120 +while [ "${elapsed}" -lt "${timeout}" ]; do + if ! kill -0 "${GATEWAY_PID}" 2>/dev/null; then + echo "ERROR: openshell-gateway exited before becoming healthy" + exit 1 + fi + if curl -sf "http://127.0.0.1:${HEALTH_PORT}/healthz" >/dev/null 2>&1; then + echo "Gateway healthy after ${elapsed}s." + break + fi + sleep 2 + elapsed=$((elapsed + 2)) +done +if [ "${elapsed}" -ge "${timeout}" ]; then + echo "ERROR: gateway did not become healthy within ${timeout}s" + exit 1 +fi + +echo "Running e2e command against ${CLI_GATEWAY_ENDPOINT}: $*" +"$@" diff --git a/mise.lock b/mise.lock index 4cff5b4a1..6ab204f6e 100644 --- a/mise.lock +++ b/mise.lock @@ -1,4 +1,4 @@ -# @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html +# @generated - this file is auto-generated by `mise lock` https://mise.en.dev/dev-tools/mise-lock.html [[tools."cargo:cargo-zigbuild"]] version = "0.22.3" diff --git a/tasks/test.toml b/tasks/test.toml index da0fe8cc0..152e34d37 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -31,7 +31,7 @@ hide = true description = "Run Rust CLI e2e tests against a Docker-backed gateway" run = [ "cargo build -p openshell-cli --features openshell-core/dev-settings", - "e2e/with-docker-gateway.sh cargo test --manifest-path e2e/rust/Cargo.toml --features e2e -- --skip docker_gpu_sandbox_runs_nvidia_smi", + "e2e/with-docker-gateway.sh cargo test --manifest-path e2e/rust/Cargo.toml --features e2e-docker", ] ["e2e:python"] @@ -47,8 +47,7 @@ env = { UV_NO_SYNC = "1", PYTHONPATH = "python" } run = "uv run pytest -o python_files='test_*.py' -m gpu -n ${E2E_PARALLEL:-1} e2e/python" ["e2e:podman"] -description = "Start a Podman-backed gateway and run smoke e2e (requires rootless Podman; pass -- --port=N to override)" -depends = ["build:docker:supervisor"] +description = "Run Rust CLI e2e tests against a Podman-backed gateway" run = "e2e/rust/e2e-podman.sh" ["e2e:vm"] @@ -61,5 +60,5 @@ run = "e2e/rust/e2e-docker.sh" ["e2e:docker:gpu"] description = "Run GPU e2e against a standalone gateway with the Docker compute driver" -env = { OPENSHELL_E2E_DOCKER_GPU = "1", OPENSHELL_E2E_DOCKER_TEST = "docker_gpu" } +env = { OPENSHELL_E2E_DOCKER_GPU = "1", OPENSHELL_E2E_DOCKER_TEST = "docker_gpu", OPENSHELL_E2E_DOCKER_FEATURES = "e2e-docker-gpu" } run = "e2e/rust/e2e-docker.sh"