diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild.py b/src/microplex_us/pipelines/pe_us_data_rebuild.py index 435724c..791cb61 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild.py @@ -103,6 +103,9 @@ def default_policyengine_us_data_rebuild_source_providers( puf_demographics_path: str | Path | None = None, puf_expand_persons: bool = True, include_donor_surveys: bool = True, + include_acs: bool | None = None, + include_sipp: bool | None = None, + include_scf: bool | None = None, acs_year: int = 2022, sipp_year: int = 2023, scf_year: int = 2022, @@ -150,14 +153,22 @@ def default_policyengine_us_data_rebuild_source_providers( social_security_split_strategy=SOCIAL_SECURITY_SPLIT_STRATEGY_PE_QRF, ), ] - if include_donor_surveys: + resolved_include_acs = include_donor_surveys if include_acs is None else include_acs + resolved_include_sipp = ( + include_donor_surveys if include_sipp is None else include_sipp + ) + resolved_include_scf = include_donor_surveys if include_scf is None else include_scf + if resolved_include_acs: + providers.append( + ACSSourceProvider( + year=int(acs_year), + policyengine_us_data_repo=policyengine_us_data_repo, + policyengine_us_data_python=policyengine_us_data_python, + ) + ) + if resolved_include_sipp: providers.extend( [ - ACSSourceProvider( - year=int(acs_year), - policyengine_us_data_repo=policyengine_us_data_repo, - policyengine_us_data_python=policyengine_us_data_python, - ), SIPPSourceProvider( block="tips", year=int(sipp_year), @@ -168,13 +179,16 @@ def default_policyengine_us_data_rebuild_source_providers( year=int(sipp_year), cache_dir=donor_cache, ), - SCFSourceProvider( - year=int(scf_year), - policyengine_us_data_repo=policyengine_us_data_repo, - policyengine_us_data_python=policyengine_us_data_python, - ), ] ) + if resolved_include_scf: + providers.append( + SCFSourceProvider( + year=int(scf_year), + policyengine_us_data_repo=policyengine_us_data_repo, + policyengine_us_data_python=policyengine_us_data_python, + ) + ) return tuple(providers) diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py index 58e5d99..1b5e437 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py @@ -1806,6 +1806,9 @@ def run_policyengine_us_data_rebuild_checkpoint( puf_demographics_path: str | Path | None = None, puf_expand_persons: bool = True, include_donor_surveys: bool = True, + include_acs: bool | None = None, + include_sipp: bool | None = None, + include_scf: bool | None = None, acs_year: int = 2022, sipp_year: int = 2023, scf_year: int = 2022, @@ -1888,6 +1891,9 @@ def run_policyengine_us_data_rebuild_checkpoint( puf_demographics_path=puf_demographics_path, puf_expand_persons=puf_expand_persons, include_donor_surveys=include_donor_surveys, + include_acs=include_acs, + include_sipp=include_sipp, + include_scf=include_scf, acs_year=acs_year, sipp_year=sipp_year, scf_year=scf_year, @@ -2091,6 +2097,27 @@ def main(argv: list[str] | None = None) -> None: action=argparse.BooleanOptionalAction, default=True, ) + parser.add_argument( + "--include-acs", + action=argparse.BooleanOptionalAction, + default=None, + help=( + "Include the ACS donor provider. Defaults to --include-donor-surveys; " + "use --no-include-acs for an eCPS-shaped run that keeps SIPP/SCF." + ), + ) + parser.add_argument( + "--include-sipp", + action=argparse.BooleanOptionalAction, + default=None, + help="Include SIPP donor providers. Defaults to --include-donor-surveys.", + ) + parser.add_argument( + "--include-scf", + action=argparse.BooleanOptionalAction, + default=None, + help="Include the SCF donor provider. Defaults to --include-donor-surveys.", + ) parser.add_argument("--no-cps-download", action="store_true") parser.add_argument("--no-puf-expand-persons", action="store_true") parser.add_argument("--defer-policyengine-harness", action="store_true") @@ -2235,6 +2262,9 @@ def main(argv: list[str] | None = None) -> None: puf_demographics_path=args.puf_demographics_path, puf_expand_persons=not args.no_puf_expand_persons, include_donor_surveys=args.include_donor_surveys, + include_acs=args.include_acs, + include_sipp=args.include_sipp, + include_scf=args.include_scf, acs_year=args.acs_year, sipp_year=args.sipp_year, scf_year=args.scf_year, diff --git a/tests/pipelines/test_pe_us_data_rebuild.py b/tests/pipelines/test_pe_us_data_rebuild.py index 16acadf..01cf675 100644 --- a/tests/pipelines/test_pe_us_data_rebuild.py +++ b/tests/pipelines/test_pe_us_data_rebuild.py @@ -166,6 +166,24 @@ def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor assert isinstance(providers[5], SCFSourceProvider) +def test_default_policyengine_us_data_rebuild_source_providers_can_disable_only_acs() -> None: + providers = default_policyengine_us_data_rebuild_source_providers( + include_donor_surveys=True, + include_acs=False, + cps_download=False, + ) + + assert len(providers) == 5 + assert isinstance(providers[0], CPSASECSourceProvider) + assert isinstance(providers[1], PUFSourceProvider) + assert isinstance(providers[2], SIPPSourceProvider) + assert providers[2].block == "tips" + assert isinstance(providers[3], SIPPSourceProvider) + assert providers[3].block == "assets" + assert isinstance(providers[4], SCFSourceProvider) + assert not any(isinstance(provider, ACSSourceProvider) for provider in providers) + + def test_build_policyengine_us_data_rebuild_pipeline_returns_configured_pipeline() -> None: pipeline = build_policyengine_us_data_rebuild_pipeline( random_seed=321,