Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/scripts/dispatch_publication_pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -euo pipefail

workflow_file="${PIPELINE_WORKFLOW_FILE:-pipeline.yaml}"
workflow_ref="${PIPELINE_WORKFLOW_REF:-main}"

if [[ -z "${RUN_ID:-}" ]]; then
echo "RUN_ID is required" >&2
exit 1
fi

if [[ -z "${SOURCE_SHA:-}" ]]; then
echo "SOURCE_SHA is required" >&2
exit 1
fi

gh workflow run "${workflow_file}" \
--ref "${workflow_ref}" \
-f run_id="${RUN_ID}" \
-f source_sha="${SOURCE_SHA}"

if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
{
echo "## Pipeline Dispatched"
echo
echo "| Field | Value |"
echo "|-------|-------|"
echo "| Run ID | \`${RUN_ID}\` |"
echo "| Source SHA | \`${SOURCE_SHA}\` |"
echo "| Workflow | \`${workflow_file}\` |"
echo "| Workflow ref | \`${workflow_ref}\` |"
} >> "${GITHUB_STEP_SUMMARY}"
fi
86 changes: 86 additions & 0 deletions .github/scripts/resolve_run_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Resolve run context for GitHub Actions workflows."""

from __future__ import annotations

import os
import sys
from pathlib import Path

_REPO_ROOT = Path(__file__).resolve().parents[2]
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))

from policyengine_us_data.utils.run_context import ( # noqa: E402
DEFAULT_MODAL_APP_PREFIX,
RunContext,
build_modal_resource_name,
)


def _append_key_values(path_env: str, values: dict[str, str]) -> None:
output_path = os.environ.get(path_env)
if not output_path:
return
with Path(output_path).open("a") as handle:
for key, value in values.items():
handle.write(f"{key}={value}\n")


def main() -> None:
app_prefix = os.environ.get("US_DATA_MODAL_APP_PREFIX", DEFAULT_MODAL_APP_PREFIX)
context = RunContext.from_env(modal_app_prefix=app_prefix)
if not context.run_id:
raise RuntimeError(
"Could not resolve run ID. Set US_DATA_RUN_ID or run "
"inside GitHub Actions with GITHUB_RUN_ID."
)

pipeline_volume_name = os.environ.get(
"US_DATA_PIPELINE_VOLUME_NAME",
build_modal_resource_name(
context.run_id,
prefix="pipeline-artifacts",
),
)
staging_volume_name = os.environ.get(
"US_DATA_STAGING_VOLUME_NAME",
build_modal_resource_name(
context.run_id,
prefix="local-area-staging",
),
)
checkpoint_volume_name = os.environ.get(
"US_DATA_CHECKPOINT_VOLUME_NAME",
build_modal_resource_name(
context.run_id,
prefix="data-build-checkpoints",
),
)
context = RunContext.from_mapping(
{
**context.to_dict(),
"pipeline_volume_name": pipeline_volume_name,
"staging_volume_name": staging_volume_name,
"checkpoint_volume_name": checkpoint_volume_name,
},
modal_app_name=context.modal_app_name,
modal_environment=context.modal_environment,
)

outputs = {
"run_id": context.run_id,
"modal_app_name": context.modal_app_name,
"modal_environment": context.modal_environment,
"hf_staging_prefix": context.hf_staging_prefix,
"github_run_url": context.github_run_url,
"pipeline_volume_name": context.pipeline_volume_name,
"staging_volume_name": context.staging_volume_name,
"checkpoint_volume_name": context.checkpoint_volume_name,
}
_append_key_values("GITHUB_OUTPUT", outputs)
_append_key_values("GITHUB_ENV", context.export_env())
print(context.to_json())


if __name__ == "__main__":
main()
33 changes: 29 additions & 4 deletions .github/scripts/spawn_modal_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
import os
import sys
from pathlib import Path

import modal

_REPO_ROOT = Path(__file__).resolve().parents[2]
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))

from policyengine_us_data.utils.run_context import RunContext # noqa: E402


def _as_bool(value: str) -> bool:
return value.lower() == "true"


def _append_summary(function_call_id: str) -> None:
def _append_summary(function_call_id: str, context: RunContext) -> None:
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if not summary_path:
return
Expand All @@ -23,13 +30,20 @@ def _append_summary(function_call_id: str) -> None:
f"`{os.environ['EPOCHS']}` / "
f"`{os.environ['NATIONAL_EPOCHS']}` |\n"
)
handle.write(f"| Run ID | `{context.run_id}` |\n")
handle.write(f"| Modal app | `{context.modal_app_name}` |\n")
handle.write(f"| Modal environment | `{context.modal_environment}` |\n")
handle.write(f"| HF staging | `{context.hf_staging_prefix}` |\n")
if os.environ.get("SOURCE_SHA"):
handle.write(f"| Source SHA | `{os.environ['SOURCE_SHA']}` |\n")
handle.write(f"| Function call ID | `{function_call_id}` |\n\n")
handle.write("**[Monitor on Modal Dashboard](https://modal.com/apps)**\n")


def main() -> None:
app_name = os.environ.get("MODAL_APP_NAME", "policyengine-us-data-pipeline")
environment_name = os.environ.get("MODAL_ENVIRONMENT")
context = RunContext.from_env()
app_name = context.modal_app_name or "policyengine-us-data-pipeline"
environment_name = context.modal_environment or os.environ.get("MODAL_ENVIRONMENT")
kwargs = {
"branch": os.environ.get("PIPELINE_BRANCH", "main"),
"gpu": os.environ["GPU"],
Expand All @@ -39,6 +53,11 @@ def main() -> None:
"skip_national": _as_bool(os.environ["SKIP_NATIONAL"]),
"resume_run_id": os.environ.get("RESUME_RUN_ID") or None,
"version_override": os.environ.get("VERSION_OVERRIDE", ""),
"sha_override": os.environ.get("SOURCE_SHA", ""),
"run_id": context.run_id,
"run_context": context.to_dict(),
"modal_app_name": context.modal_app_name,
"modal_environment": context.modal_environment,
}
if environment_name:
run_pipeline = modal.Function.from_name(
Expand All @@ -50,8 +69,14 @@ def main() -> None:
run_pipeline = modal.Function.from_name(app_name, "run_pipeline")
function_call = run_pipeline.spawn(**kwargs)
print("Pipeline spawned.")
print(f"Run ID: {context.run_id}")
print(f"Modal app: {app_name}")
print(f"Modal environment: {environment_name}")
print(f"HF staging prefix: {context.hf_staging_prefix}")
if os.environ.get("SOURCE_SHA"):
print(f"Source SHA: {os.environ['SOURCE_SHA']}")
print(f"Function call ID: {function_call.object_id}")
_append_summary(function_call.object_id)
_append_summary(function_call.object_id, context)


if __name__ == "__main__":
Expand Down
28 changes: 21 additions & 7 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: Run Pipeline

on:
push:
branches: [main]
workflow_dispatch:
inputs:
gpu:
Expand Down Expand Up @@ -33,19 +31,30 @@ on:
description: "Override version (default: read from pyproject.toml)"
default: ""
type: string
run_id:
description: "Run ID to use across GitHub, Modal, and HF staging"
default: ""
type: string
source_sha:
description: "Exact policyengine-us-data commit SHA to deploy"
default: ""
type: string

concurrency:
group: pipeline-main
group: pipeline-${{ github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false

jobs:
pipeline:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
github.event.head_commit.message == 'Update package version'
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-pub
US_DATA_RUN_ID: ${{ inputs.run_id || '' }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.source_sha || github.sha }}

- uses: actions/setup-python@v5
with:
Expand All @@ -54,6 +63,10 @@ jobs:
- name: Install Modal Runner Deps
run: pip install modal pandas

- name: Resolve run context
id: run-context
run: python .github/scripts/resolve_run_context.py

- name: Deploy and launch pipeline on Modal
env:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
Expand All @@ -66,6 +79,7 @@ jobs:
SKIP_NATIONAL: ${{ inputs.skip_national || 'false' }}
RESUME_RUN_ID: ${{ inputs.resume_run_id || '' }}
VERSION_OVERRIDE: ${{ inputs.version_override || '' }}
SOURCE_SHA: ${{ inputs.source_sha || github.sha }}
run: |
modal deploy modal_app/pipeline.py
modal deploy --env="${MODAL_ENVIRONMENT}" --name="${MODAL_APP_NAME}" --tag="${RUN_ID}" modal_app/pipeline.py
python .github/scripts/spawn_modal_pipeline.py
58 changes: 56 additions & 2 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,56 @@ jobs:
- run: pip install ruff>=0.9.0
- run: ruff format --check .

run-context:
name: Run context
runs-on: ubuntu-latest
if: github.event.head_commit.message != 'Update package version'
outputs:
run_id: ${{ steps.run-context.outputs.run_id }}
github_run_url: ${{ steps.run-context.outputs.github_run_url }}
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-pub
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.14"
- name: Resolve run context
id: run-context
run: python .github/scripts/resolve_run_context.py

# ── Dataset build ───────────────────────────────────────────
build-datasets:
name: Build datasets
runs-on: ubuntu-latest
needs: lint
needs:
- lint
- run-context
if: github.event.head_commit.message != 'Update package version'
env:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
MODAL_ENVIRONMENT: main
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
US_DATA_RUN_ID: ${{ needs.run-context.outputs.run_id }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.14"
- run: pip install modal
- name: Resolve run context
id: run-context
env:
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-build
run: python .github/scripts/resolve_run_context.py
- name: Build datasets on Modal
run: |
modal run --env="${MODAL_ENVIRONMENT}" modal_app/data_build.py \
--upload \
--branch=${{ github.ref_name }}
--branch=${{ github.ref_name }} \
--run-id="${RUN_ID}"

# ── Documentation ──────────────────────────────────────────
docs:
Expand Down Expand Up @@ -67,7 +95,10 @@ jobs:
versioning:
name: Versioning
runs-on: ubuntu-latest
needs: run-context
if: github.event.head_commit.message != 'Update package version'
outputs:
version_sha: ${{ steps.version-commit.outputs.sha }}
steps:
- name: Generate GitHub App token
id: app-token
Expand Down Expand Up @@ -95,6 +126,29 @@ jobs:
with:
add: "."
message: Update package version
- name: Capture version commit
id: version-commit
run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"

# ── Full publication pipeline ───────────────────────────────
launch-pipeline:
name: Launch publication pipeline
runs-on: ubuntu-latest
needs:
- run-context
- build-datasets
- versioning
if: github.event.head_commit.message != 'Update package version'
permissions:
actions: write
contents: read
steps:
- name: Dispatch pipeline workflow
env:
GH_TOKEN: ${{ github.token }}
RUN_ID: ${{ needs.run-context.outputs.run_id }}
SOURCE_SHA: ${{ needs.versioning.outputs.version_sha }}
run: bash .github/scripts/dispatch_publication_pipeline.sh

# ── PyPI publish (version bump commits only) ────────────────
publish:
Expand Down
1 change: 1 addition & 0 deletions changelog.d/phase-3c-publication-context.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add run-scoped publication identity for GitHub, Modal, and Hugging Face staging.
Loading
Loading