Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions .agents/skills/helm-dev-environment/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ The gateway Service uses ClusterIP. Access is via Envoy Gateway (port `8080`) or

### TLS behaviour

`values-skaffold.yaml` sets `server.disableTls: true`, so Skaffold-based deploys run
`ci/values-skaffold.yaml` sets `server.disableTls: true`, so Skaffold-based deploys run
plaintext by default. To test with TLS enabled, comment out that line and redeploy.

| Mode | `server.disableTls` | Gateway scheme |
Expand Down Expand Up @@ -160,7 +160,7 @@ imports the openshell realm from `scripts/keycloak-realm.json`, and prints a por
command for acquiring tokens from the CLI.

Then activate OIDC in the OpenShell Helm chart:
1. Uncomment `#- values-keycloak.yaml` in `skaffold.yaml`
1. Uncomment `#- ci/values-keycloak.yaml` in `skaffold.yaml`
2. Redeploy: `mise run helm:skaffold:run`

To remove Keycloak:
Expand Down Expand Up @@ -191,10 +191,12 @@ mise run helm:k3s:status
|------|---------|
| `deploy/helm/openshell/skaffold.yaml` | Skaffold config — images, Helm releases, values overlays |
| `deploy/helm/openshell/values.yaml` | Default Helm values |
| `deploy/helm/openshell/values-skaffold.yaml` | Dev overrides (image pull policy, local image names) |
| `deploy/helm/openshell/values-cert-manager.yaml` | cert-manager TLS overlay (opt-in; disables pkiInitJob) |
| `deploy/helm/openshell/values-gateway.yaml` | Envoy Gateway GRPCRoute + Gateway overlay |
| `deploy/helm/openshell/values-keycloak.yaml` | Keycloak OIDC overlay |
| `deploy/helm/openshell/ci/values-skaffold.yaml` | Dev overrides (image pull policy, TLS disabled for local Skaffold) |
| `deploy/helm/openshell/ci/values-cert-manager.yaml` | cert-manager PKI overlay (opt-in; disables pkiInitJob) |
| `deploy/helm/openshell/ci/values-gateway.yaml` | Envoy Gateway GRPCRoute + Gateway overlay |
| `deploy/helm/openshell/ci/values-keycloak.yaml` | Keycloak OIDC overlay |
| `deploy/helm/openshell/ci/values-tls-disabled.yaml` | Lint-only: TLS + auth disabled (reverse-proxy edge termination) |
| `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass for Envoy Gateway (`mise run helm:gateway:apply`) |
| `tasks/scripts/helm-k3s-local.sh` | k3d cluster create/delete/start/stop/status |
| `tasks/scripts/helm-e2e.sh` | Bootstrap k3d cluster and run Rust + Python e2e via Helm |
| `tasks/scripts/keycloak-k8s-setup.sh` | Keycloak deploy + realm import |
132 changes: 132 additions & 0 deletions .github/workflows/branch-helm-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

name: Branch Helm E2E

on:
push:
branches:
- "pull-request/[0-9]+"
workflow_dispatch: {}

permissions: {}

jobs:
pr_metadata:
name: Resolve PR metadata
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
outputs:
should_run: ${{ steps.gate.outputs.should_run }}
steps:
- uses: actions/checkout@v6

- id: gate
uses: ./.github/actions/pr-gate
with:
required_label: test:e2e-helm

build-gateway:
needs: [pr_metadata]
if: needs.pr_metadata.outputs.should_run == 'true'
permissions:
contents: read
packages: write
uses: ./.github/workflows/docker-build.yml
with:
component: gateway
platform: linux/amd64

build-supervisor:
needs: [pr_metadata]
if: needs.pr_metadata.outputs.should_run == 'true'
permissions:
contents: read
packages: write
uses: ./.github/workflows/docker-build.yml
with:
component: supervisor
platform: linux/amd64

helm-e2e-rust:
name: Helm E2E (rust)
needs: [pr_metadata, build-gateway, build-supervisor]
if: needs.pr_metadata.outputs.should_run == 'true'
runs-on: linux-amd64-cpu8
timeout-minutes: 60
permissions:
contents: read
packages: read
env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HELM_E2E_CLUSTER_NAME: helm-e2e-${{ github.run_id }}-rust
HELM_E2E_IMAGE_TAG: ${{ github.sha }}
HELM_E2E_SKIP_CLUSTER: "1"
HELM_E2E_IMAGE_LOADER: kind
steps:
- uses: actions/checkout@v6

- name: Log in to GHCR
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin

- name: Install mise
run: |
curl https://mise.run | sh
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
echo "$HOME/.local/share/mise/shims" >> "$GITHUB_PATH"

- name: Install tools
run: mise install --locked

- name: Create kind cluster
uses: helm/kind-action@v1
with:
cluster_name: ${{ env.HELM_E2E_CLUSTER_NAME }}
wait: 120s

- name: Run Helm E2E (Rust)
run: mise run e2e:helm:rust

helm-e2e-python:
name: Helm E2E (python)
needs: [pr_metadata, build-gateway, build-supervisor]
if: needs.pr_metadata.outputs.should_run == 'true'
runs-on: linux-amd64-cpu8
timeout-minutes: 60
permissions:
contents: read
packages: read
env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HELM_E2E_CLUSTER_NAME: helm-e2e-${{ github.run_id }}-python
HELM_E2E_IMAGE_TAG: ${{ github.sha }}
HELM_E2E_SKIP_CLUSTER: "1"
HELM_E2E_IMAGE_LOADER: kind
steps:
- uses: actions/checkout@v6

- name: Log in to GHCR
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin

- name: Install mise
run: |
curl https://mise.run | sh
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
echo "$HOME/.local/share/mise/shims" >> "$GITHUB_PATH"

- name: Install tools
run: mise install --locked

- name: Install Python dependencies
run: uv sync --frozen && mise run --no-deps python:proto

- name: Create kind cluster
uses: helm/kind-action@v1
with:
cluster_name: ${{ env.HELM_E2E_CLUSTER_NAME }}
wait: 120s

- name: Run Helm E2E (Python)
run: mise run e2e:helm:python
14 changes: 13 additions & 1 deletion .github/workflows/e2e-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request:
types: [opened, synchronize, reopened, labeled, unlabeled, ready_for_review]
workflow_run:
workflows: ["Branch E2E Checks", "GPU Test"]
workflows: ["Branch E2E Checks", "GPU Test", "Branch Helm E2E"]
types: [completed]

permissions: {}
Expand Down Expand Up @@ -36,6 +36,18 @@ jobs:
required_label: test:e2e-gpu
workflow_file: test-gpu.yml

helm-e2e:
name: Helm E2E
if: github.event_name == 'pull_request'
permissions:
contents: read
pull-requests: read
actions: read
uses: ./.github/workflows/e2e-gate-check.yml
with:
required_label: test:e2e-helm
workflow_file: branch-helm-e2e.yml

# When the guarded workflow finishes, GitHub fires `workflow_run` in the
# default-branch context — any check posted from here would land on `main`,
# not on the PR. Instead, find the latest `pull_request`-triggered gate run
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/e2e-label-help.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ permissions: {}
jobs:
hint:
name: Post next-step hint for E2E label
if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu'
if: |
github.event.label.name == 'test:e2e' ||
github.event.label.name == 'test:e2e-gpu' ||
github.event.label.name == 'test:e2e-helm'
runs-on: ubuntu-latest
permissions:
pull-requests: write
Expand All @@ -40,6 +43,7 @@ jobs:
case "$LABEL_NAME" in
test:e2e) workflow_file=branch-e2e.yml; workflow_name="Branch E2E Checks" ;;
test:e2e-gpu) workflow_file=test-gpu.yml; workflow_name="GPU Test" ;;
test:e2e-helm) workflow_file=branch-helm-e2e.yml; workflow_name="Branch Helm E2E" ;;
*) echo "Unrecognized label $LABEL_NAME"; exit 1 ;;
esac

Expand Down
57 changes: 57 additions & 0 deletions .github/workflows/helm-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

name: Helm Lint

on:
push:
branches:
- "pull-request/[0-9]+"
paths:
- "deploy/helm/**"
workflow_dispatch:

env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

permissions:
contents: read
packages: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
pr_metadata:
name: Resolve PR metadata
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
outputs:
should_run: ${{ steps.gate.outputs.should_run }}
steps:
- uses: actions/checkout@v6

- id: gate
uses: ./.github/actions/pr-gate

helm-lint:
name: Helm Lint
needs: pr_metadata
if: needs.pr_metadata.outputs.should_run == 'true'
runs-on: linux-amd64-cpu8
container:
image: ghcr.io/nvidia/openshell/ci:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v6

- name: Install tools
run: mise install --locked

- name: Lint Helm chart
run: mise run helm:lint
1 change: 1 addition & 0 deletions deploy/docker/Dockerfile.images
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ WORKDIR /build
COPY Cargo.toml Cargo.lock ./
COPY crates/ crates/
COPY proto/ proto/
COPY providers/ providers/

RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/build/target \
Expand Down
6 changes: 1 addition & 5 deletions deploy/helm/openshell/.helmignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,4 @@

# Ignore development files
skaffold.yaml
values-keycloak.yaml
values-ingress.yaml
values-gateway.yaml
values-cert-manager.yaml
values-skaffold.yaml
ci/
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

# Merge after values.yaml when cert-manager CRDs are installed, e.g.:
# helm install ... -f values.yaml -f values-cert-manager.yaml
# helm install ... -f values.yaml -f ci/values-cert-manager.yaml
# Or add this file to skaffold manifests.helm.releases[].valuesFiles.
server:
disableTls: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# Requires Envoy Gateway in the cluster (installed via skaffold.yaml).
# Add this file to the openshell release valuesFiles to activate:
# uncomment values-gateway.yaml in deploy/helm/openshell/skaffold.yaml
# uncomment ci/values-gateway.yaml in deploy/helm/openshell/skaffold.yaml
#
# Envoy Gateway will create an Envoy proxy Deployment and a LoadBalancer
# Service (named envoy-<namespace>-<gateway-name>-*) in the openshell namespace.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# Then layer this file on top of values.yaml when deploying:
# helm upgrade --install openshell . \
# -f values.yaml -f values-skaffold.yaml -f values-keycloak.yaml
# -f values.yaml -f ci/values-skaffold.yaml -f ci/values-keycloak.yaml
#
# Or add this file to skaffold.yaml valuesFiles for iterative dev.
#
Expand Down
10 changes: 10 additions & 0 deletions deploy/helm/openshell/ci/values-tls-disabled.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# CI lint target: TLS disabled (plaintext HTTP, no client cert requirement).
# Typical when a reverse proxy or tunnel terminates TLS at the edge.
server:
disableTls: true
disableGatewayAuth: true
pkiInitJob:
enabled: false
10 changes: 5 additions & 5 deletions deploy/helm/openshell/skaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,16 @@ deploy:
createNamespace: true
valuesFiles:
- values.yaml
- values-skaffold.yaml
# Add values-cert-manager.yaml here (and uncomment the cert-manager
- ci/values-skaffold.yaml
# Add ci/values-cert-manager.yaml here (and uncomment the cert-manager
# release above) to switch from pkiInitJob to cert-manager for PKI.
#- values-cert-manager.yaml
#- ci/values-cert-manager.yaml
# To enable OIDC with a local Keycloak instance, run the one-time
# setup task first, then uncomment the line below:
# mise run keycloak:k8s:setup
#- values-keycloak.yaml
#- ci/values-keycloak.yaml
# To enable the Gateway API HTTPRoute (requires Envoy Gateway above):
#- values-gateway.yaml
#- ci/values-gateway.yaml
setValueTemplates:
image.repository: '{{.IMAGE_REPO_openshell_gateway}}'
image.tag: '{{.IMAGE_TAG_openshell_gateway}}'
Expand Down
36 changes: 33 additions & 3 deletions tasks/helm.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,18 @@
# Helm chart tasks

["helm:lint"]
description = "Lint the openshell helm chart"
run = "helm lint deploy/helm/openshell"
hide = true
description = "Lint the openshell Helm chart (defaults + all CI configuration variants)"
run = """
set -e
echo "--- helm lint: defaults ---"
helm lint deploy/helm/openshell
for f in deploy/helm/openshell/ci/values-*.yaml; do
variant=$(basename "$f" .yaml | sed 's/values-//')
echo "--- helm lint: $variant ---"
helm lint deploy/helm/openshell -f "$f"
done
echo "All variants passed."
"""

["helm:skaffold:dev"]
description = "Run skaffold dev for deploy/helm/openshell (iterative deploy)"
Expand Down Expand Up @@ -59,3 +68,24 @@ hide = true
["helm:gateway:apply"]
description = "Apply the Envoy GatewayClass manifest (run after helm:skaffold:run when gateway routing is enabled)"
run = "kubectl apply -f deploy/kube/manifests/envoy-gateway-openshell.yaml"

# Helm e2e — boots a k3d cluster via the Helm path and runs the Rust + Python suites

["e2e:helm"]
description = "Bootstrap Helm k3d cluster and run Rust + Python e2e suites"
run = "tasks/scripts/helm-e2e.sh"

["e2e:helm:rust"]
description = "Bootstrap Helm k3d cluster and run Rust e2e only"
env = { HELM_E2E_SUITE = "rust" }
run = "tasks/scripts/helm-e2e.sh"

["e2e:helm:python"]
description = "Bootstrap Helm k3d cluster and run Python e2e only"
env = { HELM_E2E_SUITE = "python" }
run = "tasks/scripts/helm-e2e.sh"

["e2e:helm:cert-manager"]
description = "Bootstrap Helm k3d cluster with cert-manager PKI and run full e2e"
env = { HELM_E2E_PKI = "cert-manager" }
run = "tasks/scripts/helm-e2e.sh"
Loading
Loading