Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions src/microplex_us/pipelines/pe_us_data_rebuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def default_policyengine_us_data_rebuild_source_providers(
puf_demographics_path: str | Path | None = None,
puf_expand_persons: bool = True,
include_donor_surveys: bool = True,
include_acs: bool | None = None,
include_sipp: bool | None = None,
include_scf: bool | None = None,
acs_year: int = 2022,
sipp_year: int = 2023,
scf_year: int = 2022,
Expand Down Expand Up @@ -150,14 +153,22 @@ def default_policyengine_us_data_rebuild_source_providers(
social_security_split_strategy=SOCIAL_SECURITY_SPLIT_STRATEGY_PE_QRF,
),
]
if include_donor_surveys:
resolved_include_acs = include_donor_surveys if include_acs is None else include_acs
resolved_include_sipp = (
include_donor_surveys if include_sipp is None else include_sipp
)
resolved_include_scf = include_donor_surveys if include_scf is None else include_scf
if resolved_include_acs:
providers.append(
ACSSourceProvider(
year=int(acs_year),
policyengine_us_data_repo=policyengine_us_data_repo,
policyengine_us_data_python=policyengine_us_data_python,
)
)
if resolved_include_sipp:
providers.extend(
[
ACSSourceProvider(
year=int(acs_year),
policyengine_us_data_repo=policyengine_us_data_repo,
policyengine_us_data_python=policyengine_us_data_python,
),
SIPPSourceProvider(
block="tips",
year=int(sipp_year),
Expand All @@ -168,13 +179,16 @@ def default_policyengine_us_data_rebuild_source_providers(
year=int(sipp_year),
cache_dir=donor_cache,
),
SCFSourceProvider(
year=int(scf_year),
policyengine_us_data_repo=policyengine_us_data_repo,
policyengine_us_data_python=policyengine_us_data_python,
),
]
)
if resolved_include_scf:
providers.append(
SCFSourceProvider(
year=int(scf_year),
policyengine_us_data_repo=policyengine_us_data_repo,
policyengine_us_data_python=policyengine_us_data_python,
)
)
return tuple(providers)


Expand Down
30 changes: 30 additions & 0 deletions src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -1806,6 +1806,9 @@ def run_policyengine_us_data_rebuild_checkpoint(
puf_demographics_path: str | Path | None = None,
puf_expand_persons: bool = True,
include_donor_surveys: bool = True,
include_acs: bool | None = None,
include_sipp: bool | None = None,
include_scf: bool | None = None,
acs_year: int = 2022,
sipp_year: int = 2023,
scf_year: int = 2022,
Expand Down Expand Up @@ -1888,6 +1891,9 @@ def run_policyengine_us_data_rebuild_checkpoint(
puf_demographics_path=puf_demographics_path,
puf_expand_persons=puf_expand_persons,
include_donor_surveys=include_donor_surveys,
include_acs=include_acs,
include_sipp=include_sipp,
include_scf=include_scf,
acs_year=acs_year,
sipp_year=sipp_year,
scf_year=scf_year,
Expand Down Expand Up @@ -2091,6 +2097,27 @@ def main(argv: list[str] | None = None) -> None:
action=argparse.BooleanOptionalAction,
default=True,
)
parser.add_argument(
"--include-acs",
action=argparse.BooleanOptionalAction,
default=None,
help=(
"Include the ACS donor provider. Defaults to --include-donor-surveys; "
"use --no-include-acs for an eCPS-shaped run that keeps SIPP/SCF."
),
)
parser.add_argument(
"--include-sipp",
action=argparse.BooleanOptionalAction,
default=None,
help="Include SIPP donor providers. Defaults to --include-donor-surveys.",
)
parser.add_argument(
"--include-scf",
action=argparse.BooleanOptionalAction,
default=None,
help="Include the SCF donor provider. Defaults to --include-donor-surveys.",
)
parser.add_argument("--no-cps-download", action="store_true")
parser.add_argument("--no-puf-expand-persons", action="store_true")
parser.add_argument("--defer-policyengine-harness", action="store_true")
Expand Down Expand Up @@ -2235,6 +2262,9 @@ def main(argv: list[str] | None = None) -> None:
puf_demographics_path=args.puf_demographics_path,
puf_expand_persons=not args.no_puf_expand_persons,
include_donor_surveys=args.include_donor_surveys,
include_acs=args.include_acs,
include_sipp=args.include_sipp,
include_scf=args.include_scf,
acs_year=args.acs_year,
sipp_year=args.sipp_year,
scf_year=args.scf_year,
Expand Down
18 changes: 18 additions & 0 deletions tests/pipelines/test_pe_us_data_rebuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,24 @@ def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor
assert isinstance(providers[5], SCFSourceProvider)


def test_default_policyengine_us_data_rebuild_source_providers_can_disable_only_acs() -> None:
providers = default_policyengine_us_data_rebuild_source_providers(
include_donor_surveys=True,
include_acs=False,
cps_download=False,
)

assert len(providers) == 5
assert isinstance(providers[0], CPSASECSourceProvider)
assert isinstance(providers[1], PUFSourceProvider)
assert isinstance(providers[2], SIPPSourceProvider)
assert providers[2].block == "tips"
assert isinstance(providers[3], SIPPSourceProvider)
assert providers[3].block == "assets"
assert isinstance(providers[4], SCFSourceProvider)
assert not any(isinstance(provider, ACSSourceProvider) for provider in providers)


def test_build_policyengine_us_data_rebuild_pipeline_returns_configured_pipeline() -> None:
pipeline = build_policyengine_us_data_rebuild_pipeline(
random_seed=321,
Expand Down
Loading