Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions arch/source_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@
"federal-reserve-z1-household-net-worth": Path(
"federal_reserve/z1_household_net_worth"
),
"hhs-acf-liheap-fy2023-national-profile": Path(
"hhs_acf_liheap/fy2023_national_profile"
),
"hhs-acf-liheap-fy2024-national-profile": Path(
"hhs_acf_liheap/fy2024_national_profile"
),
Expand Down
Binary file not shown.
22 changes: 22 additions & 0 deletions db/data/hhs_acf_liheap/fy2023_national_profile/manifest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
source: HHS ACF Office of Community Services
dataset: LIHEAP FY2023 National Profile (All States)
vintage: FY 2023
retrieved: 2026-05-11
notes: >-
Full publisher PDF downloaded from the HHS ACF LIHEAP Performance Management
site and parsed with the Arch PDF document-number parser.
files:
2023:
filename: acf_liheap_fy2023_all_states_national_profile.pdf
sha256: 91def56c2a2621d6899180e05a82e9d42419b64f535e1bc610145aac9ee3895f
source_url: https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2023/FY2023AllStates%28National%29Profile-508Compliant.pdf
source_table: LIHEAP FY2023 National Profile (All States)
size_bytes: 125199
storage:
r2:
provider: r2
bucket: arch-raw
key: raw/hhs_acf_liheap/hhs-acf-liheap-fy2023-national-profile/2023/91def56c2a2621d6899180e05a82e9d42419b64f535e1bc610145aac9ee3895f/acf_liheap_fy2023_all_states_national_profile.pdf
uri: r2://arch-raw/raw/hhs_acf_liheap/hhs-acf-liheap-fy2023-national-profile/2023/91def56c2a2621d6899180e05a82e9d42419b64f535e1bc610145aac9ee3895f/acf_liheap_fy2023_all_states_national_profile.pdf
source_id: hhs_acf_liheap
package_id: hhs-acf-liheap-fy2023-national-profile
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
schema_version: arch.source_package.v1
package_id: hhs-acf-liheap-fy2023-national-profile
label: HHS ACF LIHEAP FY2023 National Profile
artifact:
source_name: hhs_acf_liheap
source_table: LIHEAP FY2023 National Profile (All States)
resource_package: db
resource_directory: data/hhs_acf_liheap/fy2023_national_profile
manifest: manifest.yaml
vintage: fiscal_year_2023
extracted_at: "2026-05-11"
extraction_method: pypdf text-line number extraction from full publisher PDF
parser: pdf_text_numbers
record_sets:
- record_set_id: hhs_acf_liheap.fy2023.national_profile
record_set_spec_id: hhs_acf_liheap.national_profile.v1
source_record_id_prefix: hhs_acf_liheap.fy2023.national_profile
sheet_name: document_numbers
period_type: fiscal_year
period: 2023
geography_id: 0100000US
geography_level: country
geography_name: United States
geography_vintage: 2020_census
entity: household
entity_role: recipient_household
domain: liheap_state_programs
groupby_dimension: program
rows:
- value_id: state_programs
label: State programs
ordinal: 0
row_number: 5
expected_row_header_column: C
expected_row_header: "5,939,605"
filters:
program: liheap
administering_entity: state_programs
constraints:
- variable: program
operator: "=="
value: liheap
label: LIHEAP
- variable: administering_entity
operator: "=="
value: state_programs
label: State programs
guard_cells:
- column: A
expected_value: 1
label: PDF page number
- column: B
expected_value: 13
label: PDF extracted line number
- column: D
expected_value: "5,939,605"
label: source number text
- column: F
expected_value: Total Households Served by State Programs 5,939,605 State Maximum Income Eligibility Requirements for
label: nearby PDF text context
table_record_kind: total
measures:
- measure_id: households_served
label: Total households served by state programs
ordinal: 0
column: E
source_column_id: numeric_value
expected_column_header_row: 1
expected_column_header: numeric_value
concept: hhs_acf_liheap.households_served_by_state_programs
unit: count
aggregation: count
expected_cell_type: number
62 changes: 40 additions & 22 deletions tests/test_arch_source_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,43 +973,61 @@ def test_cms_medicare_trustees_package_builds_part_b_premium_fact():
assert values_by_record[record_id].measure.unit == "usd"


def test_hhs_acf_liheap_package_builds_household_count_fact():
package = load_source_package("hhs-acf-liheap-fy2024-national-profile")
cells = package.build_source_cells(2024)
records = package.build_source_records(2024, cells=cells)
facts = package.build_facts(2024, cells=cells)
@pytest.mark.parametrize(
("source", "year", "cell_count", "households", "addresses", "source_file"),
[
(
"hhs-acf-liheap-fy2023-national-profile",
2023,
360,
5_939_605,
("E5", "E1", "A5", "B5", "D5", "F5"),
"acf_liheap_fy2023_all_states_national_profile.pdf",
),
(
"hhs-acf-liheap-fy2024-national-profile",
2024,
348,
5_876_646,
("E3", "E1", "A3", "B3", "D3", "F3"),
"acf_liheap_fy2024_all_states_national_profile.pdf",
),
],
)
def test_hhs_acf_liheap_package_builds_household_count_fact(
source,
year,
cell_count,
households,
addresses,
source_file,
):
package = load_source_package(source)
cells = package.build_source_cells(year)
records = package.build_source_records(year, cells=cells)
facts = package.build_facts(year, cells=cells)
values_by_record = {fact.source_record_id: fact for fact in facts}

assert package.package_id == "hhs-acf-liheap-fy2024-national-profile"
assert package.package_id == source
assert validate_source_cells(cells).valid
assert validate_facts(facts).valid
assert len(cells) == 348
assert len(cells) == cell_count
assert len(facts) == 1
assert all(fact.source.raw_r2_uri for fact in facts)

record_id = (
"hhs_acf_liheap.fy2024.national_profile."
f"hhs_acf_liheap.fy{year}.national_profile."
"state_programs.households_served"
)
assert records[0].source_cell_addresses == (
"E3",
"E1",
"A3",
"B3",
"D3",
"F3",
)
assert values_by_record[record_id].value == 5_876_646
assert records[0].source_cell_addresses == addresses
assert values_by_record[record_id].value == households
assert values_by_record[record_id].measure.concept == (
"hhs_acf_liheap.households_served_by_state_programs"
)
assert values_by_record[record_id].period.value == 2024
assert values_by_record[record_id].period.value == year
assert values_by_record[record_id].geography.id == "0100000US"
assert values_by_record[record_id].entity.name == "household"
assert (
values_by_record[record_id].source.source_file
== "acf_liheap_fy2024_all_states_national_profile.pdf"
)
assert values_by_record[record_id].source.source_file == source_file
assert values_by_record[record_id].constraints


Expand Down
Loading