diff --git a/arch/source_package.py b/arch/source_package.py index 3d48786..eb49e5c 100644 --- a/arch/source_package.py +++ b/arch/source_package.py @@ -101,6 +101,7 @@ ), "cms-aca-oep-state-level": Path("cms_aca/oep_state_level"), "cms-aca-oep-state-level-2022": Path("cms_aca/oep_state_level_2022"), + "cms-aca-oep-state-level-2025": Path("cms_aca/oep_state_level_2025"), "cms-aca-effectuated-enrollment-2022": Path( "cms_aca/effectuated_enrollment_2022" ), diff --git a/db/data/cms_aca/oep_state_level_2025/2025-oep-state-level-public-use-file.zip b/db/data/cms_aca/oep_state_level_2025/2025-oep-state-level-public-use-file.zip new file mode 100644 index 0000000..e82c0ab Binary files /dev/null and b/db/data/cms_aca/oep_state_level_2025/2025-oep-state-level-public-use-file.zip differ diff --git a/db/data/cms_aca/oep_state_level_2025/manifest.yaml b/db/data/cms_aca/oep_state_level_2025/manifest.yaml new file mode 100644 index 0000000..6e4f5ff --- /dev/null +++ b/db/data/cms_aca/oep_state_level_2025/manifest.yaml @@ -0,0 +1,18 @@ +source_id: cms_aca +package_id: cms-aca-oep-state-level-2025 +dataset: cms_marketplace_oep_state_level_2025_puf +source_page: https://www.cms.gov/data-research/statistics-trends-reports/marketplace-products/2025-marketplace-open-enrollment-period-public-use-files +table: 2025 OEP State-Level Public Use File +files: + 2025: + filename: 2025-oep-state-level-public-use-file.zip + source_url: https://www.cms.gov/files/zip/2025-oep-state-level-public-use-file.zip + sha256: 4538f7f8797b1957fe50fa8834fc5eb5c6816ea5ea5d0acf74e84bdbf563c5c3 + size_bytes: 99029 + fetched_at: '2026-05-11T22:35:24+00:00' + storage: + r2: + provider: r2 + bucket: arch-raw + key: raw/cms_aca/cms-aca-oep-state-level-2025/2025/4538f7f8797b1957fe50fa8834fc5eb5c6816ea5ea5d0acf74e84bdbf563c5c3/2025-oep-state-level-public-use-file.zip + uri: r2://arch-raw/raw/cms_aca/cms-aca-oep-state-level-2025/2025/4538f7f8797b1957fe50fa8834fc5eb5c6816ea5ea5d0acf74e84bdbf563c5c3/2025-oep-state-level-public-use-file.zip diff --git a/packages/cms_aca/oep_state_level_2025/source_package.yaml b/packages/cms_aca/oep_state_level_2025/source_package.yaml new file mode 100644 index 0000000..c938a4c --- /dev/null +++ b/packages/cms_aca/oep_state_level_2025/source_package.yaml @@ -0,0 +1,635 @@ +schema_version: arch.source_package.v1 +package_id: cms-aca-oep-state-level-2025 +label: CMS Marketplace 2025 OEP state-level public use file +artifact: + source_name: cms_aca + source_table: 2025 OEP State-Level Public Use File + resource_package: db + resource_directory: data/cms_aca/oep_state_level_2025 + manifest: manifest.yaml + vintage: 2025_open_enrollment_period + extracted_at: '2026-05-11' + extraction_method: zip archive CSV member full-row parse with source-row cells + parser: zip_delimited_text_full_rows + archive_member: 2025 OEP State-Level Public Use File.csv + sheet_name: 2025 OEP State-Level Public Use File.csv +record_sets: +- record_set_id: cms_aca.oep{year}.state_marketplace + record_set_spec_id: cms_aca.oep_state_marketplace.v1 + source_record_id_prefix: cms_aca.oep{year}.state_marketplace + sheet_name: 2025 OEP State-Level Public Use File.csv + period_type: calendar_year + period: '{year}' + geography_id: 0100000US + geography_level: country + geography_name: United States + geography_vintage: current + entity: person + entity_role: aca_marketplace_consumer + domain: aca_marketplace_qhp_selections + groupby_dimension: cms_aca.state_abrvtn + rows: + - value_id: ak + label: Alaska + ordinal: 0 + row_number: 2 + geography_id: 0400000US02 + geography_level: state + geography_name: Alaska + geography_vintage: current + expected_row_header: AK + expected_row_header_column: A + table_record_kind: total + - value_id: al + label: Alabama + ordinal: 1 + row_number: 3 + geography_id: 0400000US01 + geography_level: state + geography_name: Alabama + geography_vintage: current + expected_row_header: AL + expected_row_header_column: A + table_record_kind: total + - value_id: ar + label: Arkansas + ordinal: 2 + row_number: 4 + geography_id: 0400000US05 + geography_level: state + geography_name: Arkansas + geography_vintage: current + expected_row_header: AR + expected_row_header_column: A + table_record_kind: total + - value_id: az + label: Arizona + ordinal: 3 + row_number: 5 + geography_id: 0400000US04 + geography_level: state + geography_name: Arizona + geography_vintage: current + expected_row_header: AZ + expected_row_header_column: A + table_record_kind: total + - value_id: ca + label: California + ordinal: 4 + row_number: 6 + geography_id: 0400000US06 + geography_level: state + geography_name: California + geography_vintage: current + expected_row_header: CA + expected_row_header_column: A + table_record_kind: total + - value_id: co + label: Colorado + ordinal: 5 + row_number: 7 + geography_id: 0400000US08 + geography_level: state + geography_name: Colorado + geography_vintage: current + expected_row_header: CO + expected_row_header_column: A + table_record_kind: total + - value_id: ct + label: Connecticut + ordinal: 6 + row_number: 8 + geography_id: 0400000US09 + geography_level: state + geography_name: Connecticut + geography_vintage: current + expected_row_header: CT + expected_row_header_column: A + table_record_kind: total + - value_id: dc + label: District of Columbia + ordinal: 7 + row_number: 9 + geography_id: 0400000US11 + geography_level: state + geography_name: District of Columbia + geography_vintage: current + expected_row_header: DC + expected_row_header_column: A + table_record_kind: total + - value_id: de + label: Delaware + ordinal: 8 + row_number: 10 + geography_id: 0400000US10 + geography_level: state + geography_name: Delaware + geography_vintage: current + expected_row_header: DE + expected_row_header_column: A + table_record_kind: total + - value_id: fl + label: Florida + ordinal: 9 + row_number: 11 + geography_id: 0400000US12 + geography_level: state + geography_name: Florida + geography_vintage: current + expected_row_header: FL + expected_row_header_column: A + table_record_kind: total + - value_id: ga + label: Georgia + ordinal: 10 + row_number: 12 + geography_id: 0400000US13 + geography_level: state + geography_name: Georgia + geography_vintage: current + expected_row_header: GA + expected_row_header_column: A + table_record_kind: total + - value_id: hi + label: Hawaii + ordinal: 11 + row_number: 13 + geography_id: 0400000US15 + geography_level: state + geography_name: Hawaii + geography_vintage: current + expected_row_header: HI + expected_row_header_column: A + table_record_kind: total + - value_id: ia + label: Iowa + ordinal: 12 + row_number: 14 + geography_id: 0400000US19 + geography_level: state + geography_name: Iowa + geography_vintage: current + expected_row_header: IA + expected_row_header_column: A + table_record_kind: total + - value_id: id + label: Idaho + ordinal: 13 + row_number: 15 + geography_id: 0400000US16 + geography_level: state + geography_name: Idaho + geography_vintage: current + expected_row_header: ID + expected_row_header_column: A + table_record_kind: total + - value_id: il + label: Illinois + ordinal: 14 + row_number: 16 + geography_id: 0400000US17 + geography_level: state + geography_name: Illinois + geography_vintage: current + expected_row_header: IL + expected_row_header_column: A + table_record_kind: total + - value_id: in + label: Indiana + ordinal: 15 + row_number: 17 + geography_id: 0400000US18 + geography_level: state + geography_name: Indiana + geography_vintage: current + expected_row_header: IN + expected_row_header_column: A + table_record_kind: total + - value_id: ks + label: Kansas + ordinal: 16 + row_number: 18 + geography_id: 0400000US20 + geography_level: state + geography_name: Kansas + geography_vintage: current + expected_row_header: KS + expected_row_header_column: A + table_record_kind: total + - value_id: ky + label: Kentucky + ordinal: 17 + row_number: 19 + geography_id: 0400000US21 + geography_level: state + geography_name: Kentucky + geography_vintage: current + expected_row_header: KY + expected_row_header_column: A + table_record_kind: total + - value_id: la + label: Louisiana + ordinal: 18 + row_number: 20 + geography_id: 0400000US22 + geography_level: state + geography_name: Louisiana + geography_vintage: current + expected_row_header: LA + expected_row_header_column: A + table_record_kind: total + - value_id: ma + label: Massachusetts + ordinal: 19 + row_number: 21 + geography_id: 0400000US25 + geography_level: state + geography_name: Massachusetts + geography_vintage: current + expected_row_header: MA + expected_row_header_column: A + table_record_kind: total + - value_id: md + label: Maryland + ordinal: 20 + row_number: 22 + geography_id: 0400000US24 + geography_level: state + geography_name: Maryland + geography_vintage: current + expected_row_header: MD + expected_row_header_column: A + table_record_kind: total + - value_id: me + label: Maine + ordinal: 21 + row_number: 23 + geography_id: 0400000US23 + geography_level: state + geography_name: Maine + geography_vintage: current + expected_row_header: ME + expected_row_header_column: A + table_record_kind: total + - value_id: mi + label: Michigan + ordinal: 22 + row_number: 24 + geography_id: 0400000US26 + geography_level: state + geography_name: Michigan + geography_vintage: current + expected_row_header: MI + expected_row_header_column: A + table_record_kind: total + - value_id: mn + label: Minnesota + ordinal: 23 + row_number: 25 + geography_id: 0400000US27 + geography_level: state + geography_name: Minnesota + geography_vintage: current + expected_row_header: MN + expected_row_header_column: A + table_record_kind: total + - value_id: mo + label: Missouri + ordinal: 24 + row_number: 26 + geography_id: 0400000US29 + geography_level: state + geography_name: Missouri + geography_vintage: current + expected_row_header: MO + expected_row_header_column: A + table_record_kind: total + - value_id: ms + label: Mississippi + ordinal: 25 + row_number: 27 + geography_id: 0400000US28 + geography_level: state + geography_name: Mississippi + geography_vintage: current + expected_row_header: MS + expected_row_header_column: A + table_record_kind: total + - value_id: mt + label: Montana + ordinal: 26 + row_number: 28 + geography_id: 0400000US30 + geography_level: state + geography_name: Montana + geography_vintage: current + expected_row_header: MT + expected_row_header_column: A + table_record_kind: total + - value_id: nc + label: North Carolina + ordinal: 27 + row_number: 29 + geography_id: 0400000US37 + geography_level: state + geography_name: North Carolina + geography_vintage: current + expected_row_header: NC + expected_row_header_column: A + table_record_kind: total + - value_id: nd + label: North Dakota + ordinal: 28 + row_number: 30 + geography_id: 0400000US38 + geography_level: state + geography_name: North Dakota + geography_vintage: current + expected_row_header: ND + expected_row_header_column: A + table_record_kind: total + - value_id: ne + label: Nebraska + ordinal: 29 + row_number: 31 + geography_id: 0400000US31 + geography_level: state + geography_name: Nebraska + geography_vintage: current + expected_row_header: NE + expected_row_header_column: A + table_record_kind: total + - value_id: nh + label: New Hampshire + ordinal: 30 + row_number: 32 + geography_id: 0400000US33 + geography_level: state + geography_name: New Hampshire + geography_vintage: current + expected_row_header: NH + expected_row_header_column: A + table_record_kind: total + - value_id: nj + label: New Jersey + ordinal: 31 + row_number: 33 + geography_id: 0400000US34 + geography_level: state + geography_name: New Jersey + geography_vintage: current + expected_row_header: NJ + expected_row_header_column: A + table_record_kind: total + - value_id: nm + label: New Mexico + ordinal: 32 + row_number: 34 + geography_id: 0400000US35 + geography_level: state + geography_name: New Mexico + geography_vintage: current + expected_row_header: NM + expected_row_header_column: A + table_record_kind: total + - value_id: nv + label: Nevada + ordinal: 33 + row_number: 35 + geography_id: 0400000US32 + geography_level: state + geography_name: Nevada + geography_vintage: current + expected_row_header: NV + expected_row_header_column: A + table_record_kind: total + - value_id: ny + label: New York + ordinal: 34 + row_number: 36 + geography_id: 0400000US36 + geography_level: state + geography_name: New York + geography_vintage: current + expected_row_header: NY + expected_row_header_column: A + table_record_kind: total + - value_id: oh + label: Ohio + ordinal: 35 + row_number: 37 + geography_id: 0400000US39 + geography_level: state + geography_name: Ohio + geography_vintage: current + expected_row_header: OH + expected_row_header_column: A + table_record_kind: total + - value_id: ok + label: Oklahoma + ordinal: 36 + row_number: 38 + geography_id: 0400000US40 + geography_level: state + geography_name: Oklahoma + geography_vintage: current + expected_row_header: OK + expected_row_header_column: A + table_record_kind: total + - value_id: or + label: Oregon + ordinal: 37 + row_number: 39 + geography_id: 0400000US41 + geography_level: state + geography_name: Oregon + geography_vintage: current + expected_row_header: OR + expected_row_header_column: A + table_record_kind: total + - value_id: pa + label: Pennsylvania + ordinal: 38 + row_number: 40 + geography_id: 0400000US42 + geography_level: state + geography_name: Pennsylvania + geography_vintage: current + expected_row_header: PA + expected_row_header_column: A + table_record_kind: total + - value_id: ri + label: Rhode Island + ordinal: 39 + row_number: 41 + geography_id: 0400000US44 + geography_level: state + geography_name: Rhode Island + geography_vintage: current + expected_row_header: RI + expected_row_header_column: A + table_record_kind: total + - value_id: sc + label: South Carolina + ordinal: 40 + row_number: 42 + geography_id: 0400000US45 + geography_level: state + geography_name: South Carolina + geography_vintage: current + expected_row_header: SC + expected_row_header_column: A + table_record_kind: total + - value_id: sd + label: South Dakota + ordinal: 41 + row_number: 43 + geography_id: 0400000US46 + geography_level: state + geography_name: South Dakota + geography_vintage: current + expected_row_header: SD + expected_row_header_column: A + table_record_kind: total + - value_id: tn + label: Tennessee + ordinal: 42 + row_number: 44 + geography_id: 0400000US47 + geography_level: state + geography_name: Tennessee + geography_vintage: current + expected_row_header: TN + expected_row_header_column: A + table_record_kind: total + - value_id: tx + label: Texas + ordinal: 43 + row_number: 45 + geography_id: 0400000US48 + geography_level: state + geography_name: Texas + geography_vintage: current + expected_row_header: TX + expected_row_header_column: A + table_record_kind: total + - value_id: ut + label: Utah + ordinal: 44 + row_number: 46 + geography_id: 0400000US49 + geography_level: state + geography_name: Utah + geography_vintage: current + expected_row_header: UT + expected_row_header_column: A + table_record_kind: total + - value_id: va + label: Virginia + ordinal: 45 + row_number: 47 + geography_id: 0400000US51 + geography_level: state + geography_name: Virginia + geography_vintage: current + expected_row_header: VA + expected_row_header_column: A + table_record_kind: total + - value_id: vt + label: Vermont + ordinal: 46 + row_number: 48 + geography_id: 0400000US50 + geography_level: state + geography_name: Vermont + geography_vintage: current + expected_row_header: VT + expected_row_header_column: A + table_record_kind: total + - value_id: wa + label: Washington + ordinal: 47 + row_number: 49 + geography_id: 0400000US53 + geography_level: state + geography_name: Washington + geography_vintage: current + expected_row_header: WA + expected_row_header_column: A + table_record_kind: total + - value_id: wi + label: Wisconsin + ordinal: 48 + row_number: 50 + geography_id: 0400000US55 + geography_level: state + geography_name: Wisconsin + geography_vintage: current + expected_row_header: WI + expected_row_header_column: A + table_record_kind: total + - value_id: wv + label: West Virginia + ordinal: 49 + row_number: 51 + geography_id: 0400000US54 + geography_level: state + geography_name: West Virginia + geography_vintage: current + expected_row_header: WV + expected_row_header_column: A + table_record_kind: total + - value_id: wy + label: Wyoming + ordinal: 50 + row_number: 52 + geography_id: 0400000US56 + geography_level: state + geography_name: Wyoming + geography_vintage: current + expected_row_header: WY + expected_row_header_column: A + table_record_kind: total + measures: + - measure_id: marketplace_enrollment + label: Marketplace plan selections + ordinal: 0 + column: H + source_column_id: Cnsmr + expected_column_header_row: 1 + expected_column_header: Cnsmr + concept: cms_aca.marketplace_plan_selections + source_concept: cms_aca.Cnsmr + concept_relation: source_label + concept_authority: cms + unit: count + aggregation: count + expected_cell_type: number + - measure_id: aptc_recipients + label: APTC consumers + ordinal: 1 + column: AJ + source_column_id: APTC_Cnsmr + expected_column_header_row: 1 + expected_column_header: APTC_Cnsmr + concept: cms_aca.aptc_consumers + source_concept: cms_aca.APTC_Cnsmr + concept_relation: source_label + concept_authority: cms + unit: count + aggregation: count + expected_cell_type: number + - measure_id: average_monthly_aptc + label: Average monthly APTC + ordinal: 2 + column: AK + source_column_id: APTC_Cnsmr_Avg_APTC + expected_column_header_row: 1 + expected_column_header: APTC_Cnsmr_Avg_APTC + concept: cms_aca.average_monthly_aptc + source_concept: cms_aca.APTC_Cnsmr_Avg_APTC + concept_relation: source_label + concept_authority: cms + unit: usd + aggregation: mean + expected_cell_type: number diff --git a/tests/test_arch_source_package.py b/tests/test_arch_source_package.py index 6777815..068ecc4 100644 --- a/tests/test_arch_source_package.py +++ b/tests/test_arch_source_package.py @@ -840,6 +840,46 @@ def test_source_package_alias_builds_cms_aca_oep_state_level_2022_facts(): ) +def test_source_package_alias_builds_cms_aca_oep_state_level_2025_facts(): + package = load_source_package("cms-aca-oep-state-level-2025") + rows = package.build_source_rows(2025) + cells = package.build_source_cells(2025, source_rows=rows) + records = package.build_source_records(2025, cells=cells, source_rows=rows) + facts = package.build_facts(2025, cells=cells, source_rows=rows) + records_by_id = {record.source_record_id: record for record in records} + values_by_record = {fact.source_record_id: fact for fact in facts} + + assert package.package_id == "cms-aca-oep-state-level-2025" + assert len(rows) == 54 + assert validate_source_rows(rows).valid + assert rows[0].values["State_Abrvtn"] == "AK" + assert rows[0].values["APTC_Cnsmr_Avg_APTC"] == 1_008 + assert validate_source_cells(cells).valid + assert len(cells) == 5_610 + assert len(facts) == 153 + assert validate_facts(facts).valid + assert all(fact.source_row_keys for fact in facts) + assert all(fact.source.source_name == "cms_aca" for fact in facts) + assert all(fact.source.source_file.endswith(".zip") for fact in facts) + assert all(fact.source.raw_r2_uri for fact in facts) + assert records_by_id[ + "cms_aca.oep2025.state_marketplace.ca.average_monthly_aptc" + ].source_cell_addresses == ("AK6", "AK1") + assert ( + values_by_record[ + "cms_aca.oep2025.state_marketplace.ca.marketplace_enrollment" + ].value + == 1_979_504 + ) + ca_average_aptc = values_by_record[ + "cms_aca.oep2025.state_marketplace.ca.average_monthly_aptc" + ] + assert ca_average_aptc.value == 562 + assert ca_average_aptc.geography.id == "0400000US06" + assert ca_average_aptc.geography.level == "state" + assert not ca_average_aptc.constraints + + def test_source_package_alias_builds_cms_aca_effectuated_enrollment_2022_facts(): package = load_source_package("cms-aca-effectuated-enrollment-2022") rows = package.build_source_rows(2022) @@ -1281,6 +1321,19 @@ def test_validate_source_package_reports_cms_aca_oep_2022_counts(): } +def test_validate_source_package_reports_cms_aca_oep_2025_counts(): + report = validate_source_package("cms-aca-oep-state-level-2025", year=2025) + + assert report.valid + assert report.counts == { + "record_set_count": 1, + "row_count": 51, + "measure_count": 3, + "source_record_count": 153, + "source_region_count": 1, + } + + def test_validate_source_package_reports_cms_aca_effectuated_enrollment_2022_counts(): report = validate_source_package( "cms-aca-effectuated-enrollment-2022",