diff --git a/changelog.d/764.fixed.md b/changelog.d/764.fixed.md new file mode 100644 index 000000000..e50c1f38b --- /dev/null +++ b/changelog.d/764.fixed.md @@ -0,0 +1 @@ +Make the 2025 ACA take-up override state-aware and spending-aware, allowing the final vector to add or remove tax-unit take-up by state against checked-in APTC enrollment and spending targets. diff --git a/policyengine_us_data/calibration/diagnose_aca_state_targets.py b/policyengine_us_data/calibration/diagnose_aca_state_targets.py new file mode 100644 index 000000000..7cb20c43a --- /dev/null +++ b/policyengine_us_data/calibration/diagnose_aca_state_targets.py @@ -0,0 +1,484 @@ +"""Diagnose state ACA target fit for local-area H5 files. + +Usage: + python -m policyengine_us_data.calibration.diagnose_aca_state_targets + python -m policyengine_us_data.calibration.diagnose_aca_state_targets \ + --run-id 1.89.1_a2f3bb36_20260430_205113 + python -m policyengine_us_data.calibration.diagnose_aca_state_targets \ + --h5-prefix /tmp/staging/states --states NY,MN,FL,TX +""" + +import argparse +import gc +from pathlib import Path + +import numpy as np +import pandas as pd + +from policyengine_us_data.calibration.calibration_utils import STATE_CODES +from policyengine_us_data.datasets.cps.enhanced_cps import ( + _get_base_aca_takeup, + _get_period_array, + create_aca_2025_takeup_override, +) +from policyengine_us_data.storage import STORAGE_FOLDER + +DEFAULT_HF_PREFIX = "hf://policyengine/policyengine-us-data/staging/states" +STATE_ABBRS = sorted(STATE_CODES.values()) +OPTIONAL_BLOCKERS = { + "marketplace_people": "has_marketplace_health_coverage", + "premium_people": "pays_aca_premium", + "medicaid_blocked_people": "is_medicaid_eligible", + "chip_blocked_people": "is_chip_eligible", + "eshi_blocked_people": "is_aca_eshi_eligible", + "medicare_blocked_people": "is_medicare_eligible", + "bhp_blocked_people": "is_basic_health_program_eligible", + "immigration_blocked_people": "is_aca_ptc_immigration_status_eligible", + "taxpayer_tin_blocked_people": "taxpayer_has_tin", +} + + +def _array(values, dtype=None) -> np.ndarray: + if hasattr(values, "values"): + values = values.values + return np.asarray(values, dtype=dtype) + + +def _state_code_array(values) -> np.ndarray: + return np.asarray( + [ + value.decode("utf-8") if isinstance(value, bytes) else str(value) + for value in values + ] + ) + + +def _weighted_count(mask: np.ndarray, weights: np.ndarray) -> float: + return float(np.dot(np.asarray(mask, dtype=np.float64), weights)) + + +def _weighted_sum(values: np.ndarray, weights: np.ndarray, mask: np.ndarray) -> float: + return float(np.dot(np.where(mask, values, 0), weights)) + + +def _percent_error(value: float, target: float) -> float: + if target == 0: + return np.nan + return 100 * (value - target) / target + + +def _target_path(period: int) -> Path: + return ( + STORAGE_FOLDER + / "calibration_targets" + / f"aca_spending_and_enrollment_{period}.csv" + ) + + +def _load_targets(period: int) -> pd.DataFrame: + targets = pd.read_csv(_target_path(period)) + targets["annual_spending"] = targets["spending"] * 12 + return targets + + +def _resolve_h5_path(prefix: str, state: str) -> str: + if prefix.startswith("hf://"): + return f"{prefix.rstrip('/')}/{state}.h5" + return str(Path(prefix) / f"{state}.h5") + + +def _parse_states(states: str) -> list[str]: + if states == "": + return STATE_ABBRS + return [state.strip().upper() for state in states.split(",") if state.strip()] + + +def _person_tax_unit_indices( + person_tax_unit_ids: np.ndarray, + tax_unit_ids: np.ndarray, +) -> np.ndarray: + tax_unit_id_to_idx = { + int(tax_unit_id): idx for idx, tax_unit_id in enumerate(tax_unit_ids) + } + return np.array( + [tax_unit_id_to_idx[int(tax_unit_id)] for tax_unit_id in person_tax_unit_ids], + dtype=np.int64, + ) + + +def _optional_person_bool(sim, variable: str, period: int) -> np.ndarray | None: + try: + return _array( + sim.calculate( + variable, + map_to="person", + period=period, + use_weights=False, + ), + dtype=bool, + ) + except Exception: + return None + + +def _delete_if_cached(sim, variable: str) -> None: + try: + sim.delete_arrays(variable) + except Exception: + pass + + +def _assigned_aca_spending( + sim, + period: int, + takeup: np.ndarray, + household_weights: np.ndarray, + household_in_state: np.ndarray, +) -> float: + sim.set_input("takes_up_aca_if_eligible", period, takeup.astype(bool, copy=False)) + _delete_if_cached(sim, "assigned_aca_ptc") + assigned_aca_ptc = _array( + sim.calculate( + "assigned_aca_ptc", + map_to="household", + period=period, + use_weights=False, + ), + dtype=np.float64, + ) + return _weighted_sum(assigned_aca_ptc, household_weights, household_in_state) + + +def _diagnose_state( + state: str, + h5_path: str, + targets_by_state: pd.DataFrame, + period: int, +): + from policyengine_us import Microsimulation + + target_row = targets_by_state.loc[state] + target_enrollment = float(target_row.enrollment) + target_annual_spending = float(target_row.annual_spending) + + sim = Microsimulation(dataset=h5_path) + data = sim.dataset.load_dataset() + base_year = int(str(sim.default_calculation_period)) + + tax_unit_ids = _get_period_array(data["tax_unit_id"], base_year) + person_tax_unit_ids = _get_period_array(data["person_tax_unit_id"], base_year) + person_tax_unit_idx = _person_tax_unit_indices( + person_tax_unit_ids=person_tax_unit_ids, + tax_unit_ids=tax_unit_ids, + ) + base_takeup = _get_base_aca_takeup( + data=data, + base_year=base_year, + tax_unit_count=len(tax_unit_ids), + ) + + if "household_weight" in data: + sim.set_input( + "household_weight", + base_year, + _get_period_array(data["household_weight"], base_year).astype(np.float32), + ) + + sim.set_input( + "takes_up_aca_if_eligible", + period, + np.ones(len(tax_unit_ids), dtype=bool), + ) + _delete_if_cached(sim, "aca_ptc") + _delete_if_cached(sim, "assigned_aca_ptc") + + person_weights = _array( + sim.calculate("person_weight", period=period, use_weights=False), + dtype=np.float64, + ) + household_weights = _array( + sim.calculate( + "household_weight", + map_to="household", + period=period, + use_weights=False, + ), + dtype=np.float64, + ) + person_state = _state_code_array( + _array( + sim.calculate( + "state_code", + map_to="person", + period=period, + use_weights=False, + ) + ) + ) + household_state = _state_code_array( + _array( + sim.calculate( + "state_code", + map_to="household", + period=period, + use_weights=False, + ) + ) + ) + person_in_state = person_state == state + household_in_state = household_state == state + + aca_ptc_person = _array( + sim.calculate( + "aca_ptc", + map_to="person", + period=period, + use_weights=False, + ), + dtype=np.float64, + ) + aca_ptc_household = _array( + sim.calculate( + "aca_ptc", + map_to="household", + period=period, + use_weights=False, + ), + dtype=np.float64, + ) + aca_ptc_tax_unit = _array( + sim.calculate( + "aca_ptc", + period=period, + use_weights=False, + ), + dtype=np.float64, + ) + tax_unit_weights = _array( + sim.calculate( + "tax_unit_weight", + period=period, + use_weights=False, + ), + dtype=np.float64, + ) + is_aca_eligible = _array( + sim.calculate( + "is_aca_ptc_eligible", + map_to="person", + period=period, + use_weights=False, + ), + dtype=bool, + ) + + potential = person_in_state & (aca_ptc_person > 0) + loss_concept = potential & is_aca_eligible + base_selected = potential & base_takeup[person_tax_unit_idx] + adjusted_takeup = create_aca_2025_takeup_override( + base_takeup=base_takeup, + person_enrolled_if_takeup=aca_ptc_person > 0, + person_weights=person_weights, + person_tax_unit_ids=person_tax_unit_ids, + tax_unit_ids=tax_unit_ids, + person_state_codes=person_state, + target_people_by_state={state: target_enrollment}, + tax_unit_aca_ptc=aca_ptc_tax_unit, + tax_unit_weights=tax_unit_weights, + target_spending_by_state={state: target_annual_spending}, + ) + adjusted_selected = potential & adjusted_takeup[person_tax_unit_idx] + potential_people = _weighted_count(potential, person_weights) + base_selected_people = _weighted_count(base_selected, person_weights) + adjusted_selected_people = _weighted_count(adjusted_selected, person_weights) + loss_concept_people = _weighted_count(loss_concept, person_weights) + + row = { + "state": state, + "status": "ok", + "h5_path": h5_path, + "target_enrollment": target_enrollment, + "target_annual_spending": target_annual_spending, + "potential_people": potential_people, + "potential_gap": potential_people - target_enrollment, + "potential_error_pct": _percent_error(potential_people, target_enrollment), + "loss_concept_people": loss_concept_people, + "loss_concept_error_pct": _percent_error( + loss_concept_people, + target_enrollment, + ), + "base_selected_people": base_selected_people, + "base_selected_error_pct": _percent_error( + base_selected_people, + target_enrollment, + ), + "adjusted_selected_people": adjusted_selected_people, + "adjusted_selected_error_pct": _percent_error( + adjusted_selected_people, + target_enrollment, + ), + "aca_ptc_spending": _weighted_sum( + aca_ptc_household, + household_weights, + household_in_state, + ), + "base_assigned_aca_ptc_spending": _assigned_aca_spending( + sim, + period, + base_takeup, + household_weights, + household_in_state, + ), + "adjusted_assigned_aca_ptc_spending": _assigned_aca_spending( + sim, + period, + adjusted_takeup, + household_weights, + household_in_state, + ), + } + row["aca_ptc_spending_error_pct"] = _percent_error( + row["aca_ptc_spending"], + target_annual_spending, + ) + row["adjusted_assigned_spending_error_pct"] = _percent_error( + row["adjusted_assigned_aca_ptc_spending"], + target_annual_spending, + ) + + for column, variable in OPTIONAL_BLOCKERS.items(): + values = _optional_person_bool(sim, variable, period) + if values is None: + row[column] = np.nan + row[f"marketplace_{column}"] = np.nan + continue + if variable in { + "is_aca_ptc_immigration_status_eligible", + "taxpayer_has_tin", + }: + values = ~values + row[column] = _weighted_count(person_in_state & values, person_weights) + marketplace = _optional_person_bool( + sim, + "has_marketplace_health_coverage", + period, + ) + if marketplace is None: + row[f"marketplace_{column}"] = np.nan + else: + row[f"marketplace_{column}"] = _weighted_count( + person_in_state & marketplace & values, + person_weights, + ) + + return row + + +def main(argv=None) -> int: + parser = argparse.ArgumentParser( + description="Diagnose state ACA enrollment and spending target fit." + ) + parser.add_argument( + "--h5-prefix", + "--hf-prefix", + default=DEFAULT_HF_PREFIX, + help=f"Path prefix for state H5 files (default: {DEFAULT_HF_PREFIX})", + ) + parser.add_argument( + "--run-id", + default="", + help="Run ID to scope HF staging prefix (e.g. staging/{run_id}/states/...)", + ) + parser.add_argument( + "--states", + default="", + help="Comma-separated states to diagnose. Defaults to all states.", + ) + parser.add_argument("--period", type=int, default=2025) + parser.add_argument( + "--output", + default="aca_state_diagnostics.csv", + help="Output CSV path.", + ) + args = parser.parse_args(argv) + + if args.run_id and args.h5_prefix == DEFAULT_HF_PREFIX: + args.h5_prefix = ( + f"hf://policyengine/policyengine-us-data/staging/{args.run_id}/states" + ) + + targets = _load_targets(args.period).set_index("state") + states = _parse_states(args.states) + rows = [] + for index, state in enumerate(states, start=1): + h5_path = _resolve_h5_path(args.h5_prefix, state) + print(f"[{index}/{len(states)}] {state}...", end=" ", flush=True) + try: + rows.append( + _diagnose_state( + state=state, + h5_path=h5_path, + targets_by_state=targets, + period=args.period, + ) + ) + print("OK") + except Exception as exc: + print(f"FAILED: {exc}") + rows.append( + { + "state": state, + "status": "failed", + "h5_path": h5_path, + "error": str(exc), + } + ) + gc.collect() + + df = pd.DataFrame(rows) + output_path = Path(args.output) + df.to_csv(output_path, index=False) + ok = df[df["status"] == "ok"].copy() + + if not ok.empty: + summary_columns = [ + "state", + "target_enrollment", + "potential_people", + "adjusted_selected_people", + "adjusted_selected_error_pct", + "aca_ptc_spending_error_pct", + "adjusted_assigned_spending_error_pct", + ] + print("\nACA state diagnostics:") + print( + ok[summary_columns] + .sort_values("adjusted_selected_error_pct", key=np.abs, ascending=False) + .to_string(index=False) + ) + + shortages = ok.sort_values("potential_gap").head(10) + print("\nLargest potential shortfalls:") + print( + shortages[ + [ + "state", + "target_enrollment", + "potential_people", + "potential_gap", + "potential_error_pct", + ] + ].to_string(index=False) + ) + + failures = df[df["status"] != "ok"] + if not failures.empty: + print("\nFailures:") + print(failures[["state", "error"]].to_string(index=False)) + + print(f"\nSaved diagnostics to {output_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py index 57455617a..25d320afc 100644 --- a/policyengine_us_data/datasets/cps/enhanced_cps.py +++ b/policyengine_us_data/datasets/cps/enhanced_cps.py @@ -18,6 +18,7 @@ from policyengine_us_data.utils.randomness import seeded_rng from policyengine_us_data.utils.takeup import ( ACA_POST_CALIBRATION_PERSON_TARGETS, + adjust_aca_takeup_to_state_targets, extend_aca_takeup_to_match_target, ) import logging @@ -68,6 +69,53 @@ def _set_period_array( period_values[period] = values +def _load_aca_enrollment_targets(period: int) -> dict[str, float] | None: + path = ( + STORAGE_FOLDER + / "calibration_targets" + / f"aca_spending_and_enrollment_{period}.csv" + ) + if not path.exists(): + return None + targets = pd.read_csv(path) + return { + str(row.state): float(row.enrollment) for row in targets.itertuples(index=False) + } + + +def _load_aca_spending_targets(period: int) -> dict[str, float] | None: + path = ( + STORAGE_FOLDER + / "calibration_targets" + / f"aca_spending_and_enrollment_{period}.csv" + ) + if not path.exists(): + return None + targets = pd.read_csv(path) + return { + str(row.state): float(row.spending) * 12 + for row in targets.itertuples(index=False) + } + + +def _normalise_state_code(value) -> str: + if isinstance(value, bytes): + return value.decode("utf-8") + return str(value) + + +def _tax_unit_state_codes( + person_state_codes: np.ndarray, + person_tax_unit_idx: np.ndarray, + tax_unit_count: int, +) -> np.ndarray: + state_codes = np.full(tax_unit_count, "", dtype=object) + for state_code, tax_unit_idx in zip(person_state_codes, person_tax_unit_idx): + if state_codes[tax_unit_idx] == "": + state_codes[tax_unit_idx] = _normalise_state_code(state_code) + return state_codes + + def create_aca_2025_takeup_override( base_takeup: np.ndarray, person_enrolled_if_takeup: np.ndarray, @@ -75,8 +123,13 @@ def create_aca_2025_takeup_override( person_tax_unit_ids: np.ndarray, tax_unit_ids: np.ndarray, target_people: float = ACA_POST_CALIBRATION_PERSON_TARGETS[2025], + person_state_codes: np.ndarray | None = None, + target_people_by_state: dict[str, float] | None = None, + tax_unit_aca_ptc: np.ndarray | None = None, + tax_unit_weights: np.ndarray | None = None, + target_spending_by_state: dict[str, float] | None = None, ) -> np.ndarray: - """Add 2025 ACA takers until weighted APTC enrollment hits target.""" + """Set 2025 ACA take-up to match APTC enrollment targets.""" tax_unit_id_to_idx = { int(tax_unit_id): idx for idx, tax_unit_id in enumerate(tax_unit_ids) } @@ -92,6 +145,35 @@ def create_aca_2025_takeup_override( ) draws = seeded_rng("takes_up_aca_if_eligible").random(len(tax_unit_ids)) + if target_people_by_state is not None: + if person_state_codes is None: + raise ValueError( + "person_state_codes are required for state-level ACA targets" + ) + assigned_spending_weights = None + if target_spending_by_state is not None: + if tax_unit_aca_ptc is None or tax_unit_weights is None: + raise ValueError( + "tax_unit_aca_ptc and tax_unit_weights are required for " + "state-level ACA spending targets" + ) + assigned_spending_weights = np.asarray( + tax_unit_aca_ptc, dtype=np.float64 + ) * np.asarray(tax_unit_weights, dtype=np.float64) + return adjust_aca_takeup_to_state_targets( + base_takeup=np.asarray(base_takeup, dtype=bool), + entity_draws=draws, + enrolled_person_weights=enrolled_person_weights, + entity_state_codes=_tax_unit_state_codes( + person_state_codes=person_state_codes, + person_tax_unit_idx=person_tax_unit_idx, + tax_unit_count=len(tax_unit_ids), + ), + target_people_by_state=target_people_by_state, + assigned_spending_weights=assigned_spending_weights, + target_spending_by_state=target_spending_by_state, + ) + return extend_aca_takeup_to_match_target( base_takeup=np.asarray(base_takeup, dtype=bool), entity_draws=draws, @@ -342,6 +424,30 @@ def generate(self): base_year, ), tax_unit_ids=_get_period_array(data["tax_unit_id"], base_year), + person_state_codes=np.asarray( + sim.calculate( + "state_code", + map_to="person", + period=2025, + use_weights=False, + ) + ), + target_people_by_state=_load_aca_enrollment_targets(2025), + tax_unit_aca_ptc=np.asarray( + sim.calculate( + "aca_ptc", + period=2025, + use_weights=False, + ) + ), + tax_unit_weights=np.asarray( + sim.calculate( + "tax_unit_weight", + period=2025, + use_weights=False, + ) + ), + target_spending_by_state=_load_aca_spending_targets(2025), ), ) diff --git a/policyengine_us_data/utils/takeup.py b/policyengine_us_data/utils/takeup.py index 84ee94bc7..cb19ffdcf 100644 --- a/policyengine_us_data/utils/takeup.py +++ b/policyengine_us_data/utils/takeup.py @@ -84,8 +84,11 @@ if spec.get("target") is not None } -# CMS 2025 Marketplace OEP State-Level Public Use File, Total / All row. -# This is the number of consumers receiving APTC in plan year 2025. +# Fallback national CMS 2025 Marketplace OEP State-Level Public Use File, +# Total / All row. This is the number of consumers receiving APTC in plan +# year 2025. Prefer state-level targets when available so the final ACA +# take-up vector does not reallocate enrollment across states after +# calibration. ACA_POST_CALIBRATION_PERSON_TARGETS = { 2025: 22_380_137, } @@ -367,6 +370,168 @@ def extend_aca_takeup_to_match_target( return result +def _closest_prefix_length( + current_people: float, + cumulative_people: np.ndarray, + target_people: float, +) -> int: + estimates = np.concatenate( + [np.array([current_people], dtype=np.float64), cumulative_people] + ) + return int(np.argmin(np.abs(estimates - target_people))) + + +def adjust_aca_takeup_to_match_target( + base_takeup: np.ndarray, + entity_draws: np.ndarray, + enrolled_person_weights: np.ndarray, + target_people: float, +) -> np.ndarray: + """Add or remove ACA takers to get closest to an enrollment target.""" + result = base_takeup.copy() + enrolled_person_weights = np.asarray(enrolled_person_weights, dtype=np.float64) + entity_draws = np.asarray(entity_draws, dtype=np.float64) + current_people = float(enrolled_person_weights[result].sum()) + if np.isclose(current_people, target_people): + return result + + if current_people < target_people: + candidate_mask = (~result) & (enrolled_person_weights > 0) + if not candidate_mask.any(): + return result + candidate_idx = np.flatnonzero(candidate_mask) + ordered_idx = candidate_idx[ + np.argsort(entity_draws[candidate_idx], kind="stable") + ] + cumulative_people = current_people + np.cumsum( + enrolled_person_weights[ordered_idx] + ) + n_to_add = _closest_prefix_length( + current_people, + cumulative_people, + target_people, + ) + result[ordered_idx[:n_to_add]] = True + return result + + candidate_mask = result & (enrolled_person_weights > 0) + if not candidate_mask.any(): + return result + candidate_idx = np.flatnonzero(candidate_mask) + ordered_idx = candidate_idx[np.argsort(-entity_draws[candidate_idx], kind="stable")] + cumulative_people = current_people - np.cumsum(enrolled_person_weights[ordered_idx]) + n_to_remove = _closest_prefix_length( + current_people, + cumulative_people, + target_people, + ) + result[ordered_idx[:n_to_remove]] = False + return result + + +def adjust_aca_takeup_to_match_enrollment_and_spending_targets( + base_takeup: np.ndarray, + entity_draws: np.ndarray, + enrolled_person_weights: np.ndarray, + assigned_spending_weights: np.ndarray, + target_people: float, + target_spending: float, +) -> np.ndarray: + """Set ACA takers to match enrollment and target average PTC.""" + enrolled_person_weights = np.asarray(enrolled_person_weights, dtype=np.float64) + assigned_spending_weights = np.asarray(assigned_spending_weights, dtype=np.float64) + if len(assigned_spending_weights) != len(enrolled_person_weights): + raise ValueError("spending weights and person weights must align") + + result = np.zeros(len(base_takeup), dtype=bool) + candidate_mask = enrolled_person_weights > 0 + if not candidate_mask.any() or target_people <= 0: + return result + + potential_people = float(enrolled_person_weights[candidate_mask].sum()) + if potential_people <= target_people: + result[candidate_mask] = True + return result + + target_average_spending = target_spending / target_people + candidate_idx = np.flatnonzero(candidate_mask) + average_spending = ( + assigned_spending_weights[candidate_idx] + / enrolled_person_weights[candidate_idx] + ) + ordering_keys = [ + np.abs(average_spending - target_average_spending), + -average_spending, + average_spending, + ] + best_score = np.inf + best_result = result + + for key in ordering_keys: + ordered_idx = candidate_idx[np.lexsort((entity_draws[candidate_idx], key))] + cumulative_people = np.cumsum(enrolled_person_weights[ordered_idx]) + n_to_add = _closest_prefix_length( + 0, + cumulative_people, + target_people, + ) + candidate_result = np.zeros(len(base_takeup), dtype=bool) + candidate_result[ordered_idx[:n_to_add]] = True + candidate_people = enrolled_person_weights[candidate_result].sum() + candidate_spending = assigned_spending_weights[candidate_result].sum() + score = abs(candidate_people - target_people) / (target_people + 1) + abs( + candidate_spending - target_spending + ) / (target_spending + 1) + if score < best_score: + best_score = score + best_result = candidate_result + + return best_result + + +def adjust_aca_takeup_to_state_targets( + base_takeup: np.ndarray, + entity_draws: np.ndarray, + enrolled_person_weights: np.ndarray, + entity_state_codes: np.ndarray, + target_people_by_state: Dict[str, float], + assigned_spending_weights: np.ndarray | None = None, + target_spending_by_state: Dict[str, float] | None = None, +) -> np.ndarray: + """Match ACA take-up to state-level APTC enrollment targets.""" + result = base_takeup.copy() + entity_state_codes = np.asarray(entity_state_codes).astype(str) + + for state, target_people in sorted(target_people_by_state.items()): + state_mask = entity_state_codes == str(state) + if not state_mask.any(): + continue + if ( + assigned_spending_weights is not None + and target_spending_by_state is not None + and state in target_spending_by_state + ): + result[state_mask] = ( + adjust_aca_takeup_to_match_enrollment_and_spending_targets( + base_takeup=result[state_mask], + entity_draws=entity_draws[state_mask], + enrolled_person_weights=enrolled_person_weights[state_mask], + assigned_spending_weights=assigned_spending_weights[state_mask], + target_people=float(target_people), + target_spending=float(target_spending_by_state[state]), + ) + ) + continue + result[state_mask] = adjust_aca_takeup_to_match_target( + base_takeup=result[state_mask], + entity_draws=entity_draws[state_mask], + enrolled_person_weights=enrolled_person_weights[state_mask], + target_people=float(target_people), + ) + + return result + + def apply_block_takeup_to_arrays( hh_blocks: np.ndarray, hh_state_fips: np.ndarray, diff --git a/pyproject.toml b/pyproject.toml index dfc38069c..5829ef4e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us>=1.674.1", + "policyengine-us>=1.680.0", # policyengine-core 3.25.4 fixes PolicyEngine/policyengine-core#482 # (user-set ETERNITY inputs lost after _invalidate_all_caches). "policyengine-core>=3.25.4,<3.26", diff --git a/tests/unit/calibration/test_unified_calibration.py b/tests/unit/calibration/test_unified_calibration.py index e50b3d64d..ef9467386 100644 --- a/tests/unit/calibration/test_unified_calibration.py +++ b/tests/unit/calibration/test_unified_calibration.py @@ -27,6 +27,9 @@ from policyengine_us_data.utils.takeup import ( SIMPLE_TAKEUP_VARS, TAKEUP_AFFECTED_TARGETS, + adjust_aca_takeup_to_match_enrollment_and_spending_targets, + adjust_aca_takeup_to_match_target, + adjust_aca_takeup_to_state_targets, apply_block_takeup_to_arrays, compute_block_takeup_draws_for_entities, compute_block_takeup_for_entities, @@ -279,6 +282,87 @@ def test_extend_only_adds_true_values_until_target(self): np.array([True, False, True, True], dtype=bool), ) + def test_adjust_removes_high_draw_takers_when_above_target(self): + base_takeup = np.array([True, True, True, False], dtype=bool) + entity_draws = np.array([0.10, 0.90, 0.20, 0.30], dtype=np.float64) + enrolled_person_weights = np.array([2.0, 5.0, 3.0, 4.0], dtype=np.float64) + + result = adjust_aca_takeup_to_match_target( + base_takeup, + entity_draws, + enrolled_person_weights, + target_people=5.0, + ) + + np.testing.assert_array_equal( + result, + np.array([True, False, True, False], dtype=bool), + ) + + def test_adjust_state_targets_adds_and_removes_independently(self): + base_takeup = np.array([True, True, False, False], dtype=bool) + entity_draws = np.array([0.90, 0.10, 0.20, 0.30], dtype=np.float64) + enrolled_person_weights = np.array([5.0, 4.0, 7.0, 3.0], dtype=np.float64) + state_codes = np.array(["NY", "NY", "FL", "FL"]) + + result = adjust_aca_takeup_to_state_targets( + base_takeup, + entity_draws, + enrolled_person_weights, + entity_state_codes=state_codes, + target_people_by_state={"NY": 4.0, "FL": 10.0}, + ) + + np.testing.assert_array_equal( + result, + np.array([False, True, True, True], dtype=bool), + ) + + def test_adjust_targets_spending_per_person_when_provided(self): + base_takeup = np.array([True, True, True], dtype=bool) + entity_draws = np.array([0.30, 0.10, 0.20], dtype=np.float64) + enrolled_person_weights = np.array([100.0, 100.0, 100.0], dtype=np.float64) + assigned_spending_weights = np.array( + [100.0, 500.0, 1_000.0], + dtype=np.float64, + ) + + result = adjust_aca_takeup_to_match_enrollment_and_spending_targets( + base_takeup, + entity_draws, + enrolled_person_weights, + assigned_spending_weights, + target_people=100.0, + target_spending=1_000.0, + ) + + np.testing.assert_array_equal( + result, + np.array([False, False, True], dtype=bool), + ) + + def test_state_targets_use_spending_when_available(self): + base_takeup = np.array([False, False, False, False], dtype=bool) + entity_draws = np.array([0.10, 0.20, 0.30, 0.40], dtype=np.float64) + enrolled_person_weights = np.array([100.0, 100.0, 100.0, 100.0]) + assigned_spending_weights = np.array([100.0, 1_000.0, 500.0, 100.0]) + state_codes = np.array(["NY", "NY", "FL", "FL"]) + + result = adjust_aca_takeup_to_state_targets( + base_takeup, + entity_draws, + enrolled_person_weights, + entity_state_codes=state_codes, + target_people_by_state={"NY": 100.0, "FL": 100.0}, + assigned_spending_weights=assigned_spending_weights, + target_spending_by_state={"NY": 1_000.0, "FL": 100.0}, + ) + + np.testing.assert_array_equal( + result, + np.array([False, True, False, True], dtype=bool), + ) + class TestResolveRate: """Verify _resolve_rate handles scalar and dict rates.""" diff --git a/tests/unit/test_enhanced_cps.py b/tests/unit/test_enhanced_cps.py index 6c139f055..e49832601 100644 --- a/tests/unit/test_enhanced_cps.py +++ b/tests/unit/test_enhanced_cps.py @@ -3,6 +3,7 @@ from policyengine_us_data.datasets.cps.enhanced_cps import ( _get_base_aca_takeup, _set_period_array, + create_aca_2025_takeup_override, ) @@ -34,3 +35,43 @@ def test_set_period_array_creates_missing_variable_entry(): _set_period_array(data, "takes_up_aca_if_eligible", 2025, values) np.testing.assert_array_equal(data["takes_up_aca_if_eligible"][2025], values) + + +def test_create_aca_2025_takeup_override_matches_state_targets(): + result = create_aca_2025_takeup_override( + base_takeup=np.array([True, True, False, False], dtype=bool), + person_enrolled_if_takeup=np.ones(4, dtype=bool), + person_weights=np.array([5.0, 5.0, 7.0, 3.0], dtype=np.float64), + person_tax_unit_ids=np.array([10, 11, 12, 13], dtype=np.int64), + tax_unit_ids=np.array([10, 11, 12, 13], dtype=np.int64), + person_state_codes=np.array(["NY", "NY", "FL", "FL"]), + target_people_by_state={"NY": 5.0, "FL": 10.0}, + ) + + np.testing.assert_allclose( + [ + np.array([5.0, 5.0])[result[:2]].sum(), + np.array([7.0, 3.0])[result[2:]].sum(), + ], + [5.0, 10.0], + ) + + +def test_create_aca_2025_takeup_override_uses_state_spending_targets(): + result = create_aca_2025_takeup_override( + base_takeup=np.array([True, True, False], dtype=bool), + person_enrolled_if_takeup=np.ones(3, dtype=bool), + person_weights=np.array([5.0, 5.0, 5.0], dtype=np.float64), + person_tax_unit_ids=np.array([10, 11, 12], dtype=np.int64), + tax_unit_ids=np.array([10, 11, 12], dtype=np.int64), + person_state_codes=np.array(["NY", "NY", "NY"]), + target_people_by_state={"NY": 5.0}, + tax_unit_aca_ptc=np.array([20.0, 100.0, 60.0], dtype=np.float64), + tax_unit_weights=np.array([5.0, 5.0, 5.0], dtype=np.float64), + target_spending_by_state={"NY": 500.0}, + ) + + np.testing.assert_array_equal( + result, + np.array([False, True, False], dtype=bool), + ) diff --git a/uv.lock b/uv.lock index 460ad88d4..796c7b6d2 100644 --- a/uv.lock +++ b/uv.lock @@ -2122,7 +2122,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.678.0" +version = "1.680.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2132,9 +2132,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/41/04c9365e1d912b0648de09a1429589b0b20886eb30d8b1c79d1577f73aec/policyengine_us-1.678.0.tar.gz", hash = "sha256:2050115b812228867d80d214fe701be33056a5b257cc1e699c4f033f10626755", size = 9401199, upload-time = "2026-04-30T17:37:05.032Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/a3/ce5617d68630be953c347aeef122ff10d054a584c3c01c707fec48c044e2/policyengine_us-1.680.0.tar.gz", hash = "sha256:6638b991864efcb4d10131156f03bd761afd362d4dab30d0110b1d45fd55110f", size = 9418371, upload-time = "2026-05-01T01:15:37.188Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/71/42/3b0c20c039d6f8a8f4a78f387cbf00be31d4c0ffb44902bf976c151bd38f/policyengine_us-1.678.0-py3-none-any.whl", hash = "sha256:6130f1cedb5bde7c190ed0e1e781e387a4ab0815b23e0bc8f1dc7ee52d830a0c", size = 9815750, upload-time = "2026-04-30T17:37:01.521Z" }, + { url = "https://files.pythonhosted.org/packages/ea/e9/3a7939901c1a394b6a68ebd29a5424545b183d357d812e0b7462209a37ef/policyengine_us-1.680.0-py3-none-any.whl", hash = "sha256:9fec872b361bc018824f30b26f7d80022caeb6cf60a605d2021e177f2aa527ba", size = 9853536, upload-time = "2026-05-01T01:15:32.508Z" }, ] [[package]] @@ -2203,7 +2203,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.25.4,<3.26" }, - { name = "policyengine-us", specifier = ">=1.674.1" }, + { name = "policyengine-us", specifier = ">=1.680.0" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" },