diff --git a/.github/bump_version.py b/.github/bump_version.py index 2ac79193..391c64dc 100644 --- a/.github/bump_version.py +++ b/.github/bump_version.py @@ -116,11 +116,49 @@ def update_file(path: Path, new_version: str): print(f" Updated {path}") +def sync_release_manifest_versions(manifest_dir: Path, new_version: str): + if not manifest_dir.exists(): + return + + for manifest_path in sorted(manifest_dir.glob("*.json")): + country_id = manifest_path.stem + text = manifest_path.read_text() + updated = text + updated, bundle_id_replacements = re.subn( + r'("bundle_id"\s*:\s*")[^"]+(")', + rf"\g<1>{country_id}-{new_version}\g<2>", + updated, + count=1, + ) + updated, policyengine_version_replacements = re.subn( + r'("policyengine_version"\s*:\s*")[^"]+(")', + rf"\g<1>{new_version}\g<2>", + updated, + count=1, + ) + missing_fields = [] + if bundle_id_replacements == 0: + missing_fields.append("bundle_id") + if policyengine_version_replacements == 0: + missing_fields.append("policyengine_version") + if missing_fields: + print( + f"Could not update {manifest_path}: missing fields " + f"{', '.join(missing_fields)}", + file=sys.stderr, + ) + sys.exit(1) + if updated != text: + manifest_path.write_text(updated) + print(f" Updated {manifest_path}") + + def main(): root = Path(__file__).resolve().parent.parent pyproject = root / "pyproject.toml" changelog = root / "CHANGELOG.md" changelog_dir = root / "changelog.d" + manifest_dir = root / "src" / "policyengine" / "data" / "release_manifests" current = get_current_version(pyproject, changelog, root) bump = infer_bump(changelog_dir) @@ -129,6 +167,7 @@ def main(): print(f"Version: {current} -> {new} ({bump})") update_file(pyproject, new) + sync_release_manifest_versions(manifest_dir, new) if __name__ == "__main__": diff --git a/changelog.d/release-manifest-version-sync.fixed.md b/changelog.d/release-manifest-version-sync.fixed.md new file mode 100644 index 00000000..ce1f65f0 --- /dev/null +++ b/changelog.d/release-manifest-version-sync.fixed.md @@ -0,0 +1 @@ +Sync bundled release manifest bundle IDs and `policyengine_version` fields during release version bumps. diff --git a/src/policyengine/data/release_manifests/uk.json b/src/policyengine/data/release_manifests/uk.json index 961defbd..10c0eec3 100644 --- a/src/policyengine/data/release_manifests/uk.json +++ b/src/policyengine/data/release_manifests/uk.json @@ -1,8 +1,8 @@ { "schema_version": 1, - "bundle_id": "uk-4.0.0", + "bundle_id": "uk-4.3.1", "country_id": "uk", - "policyengine_version": "4.0.0", + "policyengine_version": "4.3.1", "model_package": { "name": "policyengine-uk", "version": "2.88.0", diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index fa58fcfa..ef45ee11 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -1,8 +1,8 @@ { "schema_version": 1, - "bundle_id": "us-4.0.0", + "bundle_id": "us-4.3.1", "country_id": "us", - "policyengine_version": "4.0.0", + "policyengine_version": "4.3.1", "model_package": { "name": "policyengine-us", "version": "1.667.1", diff --git a/src/policyengine/provenance/__init__.py b/src/policyengine/provenance/__init__.py index b17361c1..a2b37ed1 100644 --- a/src/policyengine/provenance/__init__.py +++ b/src/policyengine/provenance/__init__.py @@ -18,6 +18,9 @@ from .bundle import RefreshResult as RefreshResult from .bundle import refresh_release_bundle as refresh_release_bundle from .bundle import regenerate_trace_tro as regenerate_trace_tro +from .bundle import ( + sync_release_manifest_policyengine_version as sync_release_manifest_policyengine_version, +) from .manifest import ( CertifiedDataArtifact as CertifiedDataArtifact, ) diff --git a/src/policyengine/provenance/bundle.py b/src/policyengine/provenance/bundle.py index fa432913..50ebf173 100644 --- a/src/policyengine/provenance/bundle.py +++ b/src/policyengine/provenance/bundle.py @@ -55,6 +55,57 @@ REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent MANIFEST_DIR = REPO_ROOT / "src" / "policyengine" / "data" / "release_manifests" PYPROJECT = REPO_ROOT / "pyproject.toml" +SEMVER_PATTERN = re.compile(r"^(\d+)\.(\d+)\.(\d+)$") + + +# --------------------------------------------------------------------------- +# policyengine.py bundle identity +# --------------------------------------------------------------------------- + + +def _pyproject_version(pyproject_path: Path) -> str: + text = pyproject_path.read_text() + match = re.search(r'^version\s*=\s*"(\d+\.\d+\.\d+)"', text, re.MULTILINE) + if match is None: + raise ValueError(f"Could not find project version in {pyproject_path}") + return match.group(1) + + +def sync_release_manifest_policyengine_version( + *, + policyengine_version: Optional[str] = None, + manifest_dir: Path = MANIFEST_DIR, + pyproject_path: Path = PYPROJECT, +) -> list[Path]: + """Sync bundled release manifests to the current ``policyengine.py`` version. + + Country model/data refreshes and package release bumps move through + different automation paths. This helper keeps the top-level bundle identity + tied to the package release regardless of which path writes the manifest. + """ + resolved_version = policyengine_version or _pyproject_version(pyproject_path) + if not SEMVER_PATTERN.match(resolved_version): + raise ValueError(f"Invalid policyengine version: {resolved_version}") + + updated_paths: list[Path] = [] + for manifest_path in sorted(manifest_dir.glob("*.json")): + manifest_json = json.loads(manifest_path.read_text()) + country_id = manifest_json.get("country_id") or manifest_path.stem + expected_bundle_id = f"{country_id}-{resolved_version}" + if ( + manifest_json.get("policyengine_version") == resolved_version + and manifest_json.get("bundle_id") == expected_bundle_id + ): + continue + + manifest_json["policyengine_version"] = resolved_version + manifest_json["bundle_id"] = expected_bundle_id + manifest_path.write_text( + json.dumps(manifest_json, indent=2, sort_keys=False) + "\n" + ) + updated_paths.append(manifest_path) + + return updated_paths # --------------------------------------------------------------------------- @@ -231,6 +282,7 @@ def refresh_release_bundle( else: new_dataset_sha256 = old_dataset_sha256 new_uri = f"hf://{repo_id}/{dataset_path}@{new_data}" + policyengine_version = _pyproject_version(pyproject_path) # Mutate the manifest JSON in place (keep unknown fields untouched). manifest_json["model_package"]["version"] = new_model @@ -251,6 +303,10 @@ def refresh_release_bundle( manifest_path.write_text( json.dumps(manifest_json, indent=2, sort_keys=False) + "\n" ) + sync_release_manifest_policyengine_version( + policyengine_version=policyengine_version, + manifest_dir=manifest_dir, + ) pyproject_updated = False if update_pyproject and model_version is not None: diff --git a/tests/test_bump_version.py b/tests/test_bump_version.py index 79461b01..1936aa15 100644 --- a/tests/test_bump_version.py +++ b/tests/test_bump_version.py @@ -3,6 +3,8 @@ import importlib.util from pathlib import Path +import pytest + REPO_ROOT = Path(__file__).resolve().parents[1] MODULE_PATH = REPO_ROOT / ".github" / "bump_version.py" @@ -54,3 +56,42 @@ def test_update_file_replaces_stale_version_field(tmp_path): bump_version.update_file(pyproject, "3.4.3") assert 'version = "3.4.3"' in pyproject.read_text() + + +def test_sync_release_manifest_versions_rewrites_bundle_identity(tmp_path): + manifest_dir = tmp_path / "release_manifests" + manifest_dir.mkdir() + manifest_path = manifest_dir / "uk.json" + manifest_path.write_text( + "{\n" + ' "schema_version": 1,\n' + ' "bundle_id": "uk-4.0.0",\n' + ' "country_id": "uk",\n' + ' "policyengine_version": "4.0.0"\n' + "}\n" + ) + + bump_version.sync_release_manifest_versions(manifest_dir, "4.3.2") + + text = manifest_path.read_text() + assert '"bundle_id": "uk-4.3.2"' in text + assert '"policyengine_version": "4.3.2"' in text + + +def test_sync_release_manifest_versions_fails_when_required_field_missing(tmp_path): + manifest_dir = tmp_path / "release_manifests" + manifest_dir.mkdir() + manifest_path = manifest_dir / "uk.json" + manifest_path.write_text( + "{\n" + ' "schema_version": 1,\n' + ' "bundle_id": "uk-4.0.0",\n' + ' "country_id": "uk"\n' + "}\n" + ) + original = manifest_path.read_text() + + with pytest.raises(SystemExit): + bump_version.sync_release_manifest_versions(manifest_dir, "4.3.2") + + assert manifest_path.read_text() == original diff --git a/tests/test_bundle_refresh.py b/tests/test_bundle_refresh.py index aa796a43..85a12b1c 100644 --- a/tests/test_bundle_refresh.py +++ b/tests/test_bundle_refresh.py @@ -127,6 +127,9 @@ def sandbox(tmp_path: Path) -> dict: pyproject_path = tmp_path / "pyproject.toml" pyproject_path.write_text( + "[project]\n" + 'version = "4.2.0"\n' + "\n" "[project.optional-dependencies]\n" "us = [\n" ' "policyengine_core>=3.25.0",\n' @@ -166,6 +169,8 @@ def fake_urlopen(request, *args, **kwargs): written = json.loads((sandbox["manifest_dir"] / "us.json").read_text()) assert written["model_package"]["version"] == "1.653.3" assert written["model_package"]["sha256"] == "a" * 64 + assert written["bundle_id"] == "us-4.2.0" + assert written["policyengine_version"] == "4.2.0" # Dataset pins untouched. assert written["data_package"]["version"] == "1.70.0" assert written["certified_data_artifact"]["sha256"] == "d" * 64 @@ -254,6 +259,24 @@ def fake_urlopen(*args, **kwargs): assert "policyengine-us==1.600.0" in sandbox["pyproject_path"].read_text() +def test__invalid_pyproject_version_fails_before_manifest_write( + sandbox, tmp_path +) -> None: + invalid_pyproject = tmp_path / "invalid-pyproject.toml" + invalid_pyproject.write_text('[project]\nname = "policyengine"\n') + manifest_path = sandbox["manifest_dir"] / "us.json" + original = manifest_path.read_text() + + with pytest.raises(ValueError, match="Could not find project version"): + refresh_release_bundle( + country="us", + manifest_dir=sandbox["manifest_dir"], + pyproject_path=invalid_pyproject, + ) + + assert manifest_path.read_text() == original + + def test__no_matching_wheel_on_pypi_raises(sandbox) -> None: def fake_urlopen(*args, **kwargs): return io.BytesIO(json.dumps({"urls": []}).encode()) diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index 244fe672..0f0722d3 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -1,6 +1,8 @@ """Tests for bundled compatibility manifests and data release manifests.""" import json +import re +from pathlib import Path from unittest.mock import MagicMock, patch from requests import Timeout @@ -25,6 +27,13 @@ ) from policyengine.tax_benefit_models.us import us_latest +PYPROJECT = Path(__file__).resolve().parents[1] / "pyproject.toml" +POLICYENGINE_VERSION = re.search( + r'^version\s*=\s*"([^"]+)"', + PYPROJECT.read_text(), + re.MULTILINE, +).group(1) + def _response_with_json(payload: dict) -> MagicMock: response = MagicMock() @@ -45,9 +54,9 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self): manifest = get_release_manifest("us") assert manifest.schema_version == 1 - assert manifest.bundle_id == "us-4.0.0" + assert manifest.bundle_id == f"us-{POLICYENGINE_VERSION}" assert manifest.country_id == "us" - assert manifest.policyengine_version == "4.0.0" + assert manifest.policyengine_version == POLICYENGINE_VERSION assert manifest.model_package.name == "policyengine-us" assert manifest.model_package.version == "1.667.1" assert manifest.data_package.name == "policyengine-us-data" @@ -67,9 +76,9 @@ def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self): manifest = get_release_manifest("uk") assert manifest.schema_version == 1 - assert manifest.bundle_id == "uk-4.0.0" + assert manifest.bundle_id == f"uk-{POLICYENGINE_VERSION}" assert manifest.country_id == "uk" - assert manifest.policyengine_version == "4.0.0" + assert manifest.policyengine_version == POLICYENGINE_VERSION assert manifest.model_package.name == "policyengine-uk" assert manifest.model_package.version == "2.88.0" assert manifest.data_package.name == "policyengine-uk-data" @@ -408,7 +417,7 @@ def test__given_manifest_certification__then_release_bundle_exposes_it(self): bundle = model_version.release_bundle - assert bundle["bundle_id"] == "uk-4.0.0" + assert bundle["bundle_id"] == f"uk-{POLICYENGINE_VERSION}" assert bundle["default_dataset"] == "enhanced_frs_2023_24" assert bundle["default_dataset_uri"] == manifest.default_dataset_uri assert bundle["certified_data_build_id"] == "policyengine-uk-data-1.40.4" @@ -455,7 +464,9 @@ def test__given_us_managed_microsimulation__then_passes_certified_dataset_and_bu dataset = mock_microsimulation.call_args.kwargs["dataset"] assert dataset == microsim.policyengine_bundle["runtime_dataset_source"] - assert microsim.policyengine_bundle["policyengine_version"] == "4.0.0" + assert ( + microsim.policyengine_bundle["policyengine_version"] == POLICYENGINE_VERSION + ) assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_cps_2024" assert ( microsim.policyengine_bundle["runtime_dataset_uri"] @@ -493,7 +504,9 @@ def test__given_uk_managed_dataset_name__then_resolves_within_bundle(self): "hf://policyengine/policyengine-uk-data-private/" "enhanced_frs_2023_24.h5@1.40.4" ) - assert microsim.policyengine_bundle["policyengine_version"] == "4.0.0" + assert ( + microsim.policyengine_bundle["policyengine_version"] == POLICYENGINE_VERSION + ) assert microsim.policyengine_bundle["runtime_dataset"] == "enhanced_frs_2023_24" assert microsim.policyengine_bundle["runtime_dataset_uri"] == ( "hf://policyengine/policyengine-uk-data-private/enhanced_frs_2023_24.h5@1.40.4"