From c0a1a97e14d7f1e3b2811910c8f8d42c3ace0fbc Mon Sep 17 00:00:00 2001
From: aaronx2-illinois <aaronx2@illinois.edu>
Date: Sat, 4 Apr 2026 09:05:48 -0600
Subject: [PATCH 1/7] CommitName ;EOL mistrust Commit Detail Add EOL mistrust
 workflow tasks and target helpers - Implemented task definitions for the EOL
 mistrust , including:   - Left AMA prediction   - In-hospital mortality
 prediction   - Code status prediction - Added helper functions for data
 normalization, age calculation, length of stay calculation, and mapping of
 ethnicity and insurance. - Created a base task class for downstream
 predictions and specific task wrappers for each target. - Included necessary
 validation for input data and defined schemas for input and output.

---
 examples/eol_mistrust.py                    |  318 ++++
 pyhealth/datasets/configs/eol_mistrust.yaml |  161 ++
 pyhealth/datasets/eol_mistrust.py           | 1522 +++++++++++++++++++
 pyhealth/models/eol_mistrust.py             | 1235 +++++++++++++++
 pyhealth/tasks/eol_mistrust.py              |  351 +++++
 5 files changed, 3587 insertions(+)
 create mode 100644 examples/eol_mistrust.py
 create mode 100644 pyhealth/datasets/configs/eol_mistrust.yaml
 create mode 100644 pyhealth/datasets/eol_mistrust.py
 create mode 100644 pyhealth/models/eol_mistrust.py
 create mode 100644 pyhealth/tasks/eol_mistrust.py

diff --git a/examples/eol_mistrust.py b/examples/eol_mistrust.py
new file mode 100644
index 000000000..09c0e068b
--- /dev/null
+++ b/examples/eol_mistrust.py
@@ -0,0 +1,318 @@
+"""Example workflow for the EOL mistrust study pipeline.
+
+This script assumes you have already exported and combined the required MIMIC-III
+tables into a local directory such as:
+
+    downloads/eol_mistrust_required_combined/
+        mimiciii_clinical/
+        mimiciii_notes/
+        mimiciii_derived/
+
+It demonstrates two related flows:
+
+1. the study-style preprocessing + modeling pipeline built on pandas tables
+2. an optional PyHealth task demo using the custom EOL mistrust YAML config
+"""
+
+from __future__ import annotations
+
+import argparse
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+
+from pyhealth.datasets import MIMIC3Dataset
+from pyhealth.datasets.eol_mistrust import (
+    build_acuity_scores,
+    build_all_cohort,
+    build_base_admissions,
+    build_chartevent_artifacts_from_csv,
+    build_demographics_table,
+    build_eol_cohort,
+    build_final_model_table_from_code_status_targets,
+    build_note_artifacts_from_csv,
+    build_treatment_totals,
+    write_minimal_deliverables,
+)
+from pyhealth.models.eol_mistrust import EOLMistrustModel
+from pyhealth.tasks.eol_mistrust import EOLMistrustMortalityPredictionMIMIC3
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+DEFAULT_DATA_ROOT = REPO_ROOT / "downloads" / "eol_mistrust_required_combined"
+DEFAULT_CONFIG_PATH = REPO_ROOT / "pyhealth" / "datasets" / "configs" / "eol_mistrust.yaml"
+
+RAW_TABLE_PATHS = {
+    "admissions": "mimiciii_clinical/admissions.csv",
+    "patients": "mimiciii_clinical/patients.csv",
+    "icustays": "mimiciii_clinical/icustays.csv",
+    "d_items": "mimiciii_clinical/d_items.csv",
+}
+
+EVENT_TABLE_PATHS = {
+    "noteevents": "mimiciii_notes/noteevents.csv",
+    "chartevents": "mimiciii_clinical/chartevents.csv",
+}
+
+MATERIALIZED_VIEW_PATHS = {
+    "ventdurations": "mimiciii_derived/ventdurations.csv",
+    "vasopressordurations": "mimiciii_derived/vasopressordurations.csv",
+    "oasis": "mimiciii_derived/oasis.csv",
+    "sapsii": "mimiciii_derived/sapsii.csv",
+}
+
+
+def _read_csvs(root: Path, path_map: dict[str, str]) -> dict[str, pd.DataFrame]:
+    tables: dict[str, pd.DataFrame] = {}
+    for name, relative_path in path_map.items():
+        csv_path = root / relative_path
+        if not csv_path.exists():
+            raise FileNotFoundError(f"Missing required table for EOL example: {csv_path}")
+        table = pd.read_csv(csv_path, low_memory=False)
+        table.columns = [str(column).lower() for column in table.columns]
+        tables[name] = table
+    return tables
+
+
+def load_eol_mistrust_tables(
+    root: Path,
+) -> tuple[dict[str, pd.DataFrame], dict[str, pd.DataFrame]]:
+    """Load the raw tables and materialized views required by the pipeline."""
+
+    raw_tables = _read_csvs(root, RAW_TABLE_PATHS)
+    materialized_views = _read_csvs(root, MATERIALIZED_VIEW_PATHS)
+    return raw_tables, materialized_views
+
+
+def build_eol_mistrust_outputs(
+    root: Path,
+    repetitions: int = 100,
+    include_downstream_weight_summary: bool = False,
+    include_cdf_plot_data: bool = False,
+    output_dir: Path | None = None,
+    note_chunksize: int = 100_000,
+    chartevent_chunksize: int = 500_000,
+) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]]:
+    """Run the local end-to-end EOL mistrust workflow over downloaded CSV files."""
+
+    raw_tables, materialized_views = load_eol_mistrust_tables(root)
+    validation = {
+        "schema_name": "mimiciii",
+        "database_flavor": "postgresql",
+        "raw_tables": sorted(raw_tables.keys()),
+        "materialized_views": sorted(materialized_views.keys()),
+    }
+
+    admissions = raw_tables["admissions"]
+    patients = raw_tables["patients"]
+    icustays = raw_tables["icustays"]
+    d_items = raw_tables["d_items"]
+    noteevents_csv_path = root / EVENT_TABLE_PATHS["noteevents"]
+    chartevents_csv_path = root / EVENT_TABLE_PATHS["chartevents"]
+
+    base_admissions = build_base_admissions(admissions, patients)
+    demographics = build_demographics_table(base_admissions)
+    all_cohort = build_all_cohort(base_admissions, icustays)
+    eol_cohort = build_eol_cohort(base_admissions, demographics)
+    treatment_totals = build_treatment_totals(
+        icustays=icustays,
+        ventdurations=materialized_views["ventdurations"],
+        vasopressordurations=materialized_views["vasopressordurations"],
+    )
+    note_corpus, note_labels = build_note_artifacts_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=all_cohort["hadm_id"],
+        chunksize=note_chunksize,
+    )
+    feature_matrix, code_status_targets = build_chartevent_artifacts_from_csv(
+        chartevents_csv_path=chartevents_csv_path,
+        d_items=d_items,
+        all_hadm_ids=all_cohort["hadm_id"],
+        chunksize=chartevent_chunksize,
+    )
+    acuity_scores = build_acuity_scores(
+        materialized_views["oasis"],
+        materialized_views["sapsii"],
+    )
+
+    model = EOLMistrustModel(repetitions=repetitions)
+    mistrust_scores = model.build_mistrust_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+    )
+    final_model_table = build_final_model_table_from_code_status_targets(
+        demographics=demographics,
+        all_cohort=all_cohort,
+        admissions=admissions,
+        code_status_targets=code_status_targets,
+        mistrust_scores=mistrust_scores,
+    )
+    validation["base_admissions_rows"] = int(len(base_admissions))
+    validation["all_cohort_rows"] = int(len(all_cohort))
+    validation["eol_cohort_rows"] = int(len(eol_cohort))
+    model_outputs = model.run(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+        demographics=demographics,
+        eol_cohort=eol_cohort,
+        treatment_totals=treatment_totals,
+        acuity_scores=acuity_scores,
+        final_model_table=final_model_table,
+        include_downstream_weight_summary=include_downstream_weight_summary,
+        include_cdf_plot_data=include_cdf_plot_data,
+    )
+
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]] = {
+        "validation_summary": validation,
+        "base_admissions": base_admissions,
+        "demographics": demographics,
+        "all_cohort": all_cohort,
+        "eol_cohort": eol_cohort,
+        "treatment_totals": treatment_totals,
+        "note_corpus": note_corpus,
+        "note_labels": note_labels,
+        "chartevent_feature_matrix": feature_matrix,
+        "acuity_scores": acuity_scores,
+        "mistrust_scores": mistrust_scores,
+        "final_model_table": final_model_table,
+    }
+    artifacts.update(model_outputs)
+
+    if output_dir is not None:
+        write_minimal_deliverables(
+            {
+                "base_admissions": base_admissions,
+                "eol_cohort": eol_cohort,
+                "all_cohort": all_cohort,
+                "treatment_totals": treatment_totals,
+                "chartevent_feature_matrix": feature_matrix,
+                "note_labels": note_labels,
+                "mistrust_scores": mistrust_scores,
+                "acuity_scores": acuity_scores,
+                "final_model_table": final_model_table,
+            },
+            output_dir=output_dir,
+        )
+
+    return artifacts
+
+
+def run_task_demo(root: Path, config_path: Path) -> None:
+    """Build a PyHealth sample dataset with the custom EOL mistrust YAML config."""
+
+    base_dataset = MIMIC3Dataset(
+        root=str(root),
+        tables=["chartevents", "noteevents", "d_items"],
+        dataset_name="eol_mistrust_mimic3",
+        config_path=str(config_path),
+        cache_dir=tempfile.TemporaryDirectory().name,
+        dev=True,
+    )
+    base_dataset.stats()
+
+    task = EOLMistrustMortalityPredictionMIMIC3(include_notes=True)
+    sample_dataset = base_dataset.set_task(task, num_workers=1)
+    sample_dataset.stats()
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the EOL mistrust example workflow.")
+    parser.add_argument(
+        "--root",
+        type=Path,
+        default=DEFAULT_DATA_ROOT,
+        help="Root directory containing the combined EOL mistrust CSV exports.",
+    )
+    parser.add_argument(
+        "--config-path",
+        type=Path,
+        default=DEFAULT_CONFIG_PATH,
+        help="Path to the EOL mistrust dataset YAML config.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=None,
+        help="Optional directory for writing the required CSV deliverables.",
+    )
+    parser.add_argument(
+        "--repetitions",
+        type=int,
+        default=100,
+        help="Number of downstream 60/40 evaluation repetitions.",
+    )
+    parser.add_argument(
+        "--include-downstream-weight-summary",
+        action="store_true",
+        help="Also compute average downstream regularized weights across repetitions.",
+    )
+    parser.add_argument(
+        "--include-cdf-plot-data",
+        action="store_true",
+        help="Also build empirical CDF data for race-based and trust-based treatment plots.",
+    )
+    parser.add_argument(
+        "--task-demo",
+        action="store_true",
+        help="Also build a PyHealth sample dataset with the custom EOL mistrust task.",
+    )
+    parser.add_argument(
+        "--note-chunksize",
+        type=int,
+        default=100_000,
+        help="Chunk size for streamed noteevents processing.",
+    )
+    parser.add_argument(
+        "--chartevent-chunksize",
+        type=int,
+        default=500_000,
+        help="Chunk size for streamed chartevents processing.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+
+    artifacts = build_eol_mistrust_outputs(
+        root=args.root,
+        repetitions=args.repetitions,
+        include_downstream_weight_summary=args.include_downstream_weight_summary,
+        include_cdf_plot_data=args.include_cdf_plot_data,
+        output_dir=args.output_dir,
+        note_chunksize=args.note_chunksize,
+        chartevent_chunksize=args.chartevent_chunksize,
+    )
+
+    print("Validation summary:")
+    print(artifacts["validation_summary"])
+    print()
+    print("Core artifact shapes:")
+    for key in (
+        "base_admissions",
+        "all_cohort",
+        "eol_cohort",
+        "chartevent_feature_matrix",
+        "note_labels",
+        "mistrust_scores",
+        "final_model_table",
+    ):
+        df = artifacts[key]
+        if isinstance(df, pd.DataFrame):
+            print(f"  {key}: {df.shape}")
+
+    if args.output_dir is not None:
+        print()
+        print(f"Wrote required deliverables to: {args.output_dir}")
+
+    if args.task_demo:
+        print()
+        print("Running PyHealth task demo...")
+        run_task_demo(root=args.root, config_path=args.config_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyhealth/datasets/configs/eol_mistrust.yaml b/pyhealth/datasets/configs/eol_mistrust.yaml
new file mode 100644
index 000000000..8872e010d
--- /dev/null
+++ b/pyhealth/datasets/configs/eol_mistrust.yaml
@@ -0,0 +1,161 @@
+version: "1.0"
+
+tables:
+  patients:
+    file_path: "mimiciii_clinical/patients.csv"
+    patient_id: "subject_id"
+    timestamp: null
+    attributes:
+      - "gender"
+      - "dob"
+      - "dod"
+      - "dod_hosp"
+      - "dod_ssn"
+      - "expire_flag"
+
+  admissions:
+    file_path: "mimiciii_clinical/admissions.csv"
+    patient_id: "subject_id"
+    timestamp: "admittime"
+    attributes:
+      - "hadm_id"
+      - "dischtime"
+      - "deathtime"
+      - "admission_type"
+      - "admission_location"
+      - "discharge_location"
+      - "insurance"
+      - "language"
+      - "religion"
+      - "marital_status"
+      - "ethnicity"
+      - "edregtime"
+      - "edouttime"
+      - "diagnosis"
+      - "hospital_expire_flag"
+      - "has_chartevents_data"
+
+  icustays:
+    file_path: "mimiciii_clinical/icustays.csv"
+    patient_id: "subject_id"
+    timestamp: "intime"
+    attributes:
+      - "hadm_id"
+      - "icustay_id"
+      - "dbsource"
+      - "first_careunit"
+      - "last_careunit"
+      - "outtime"
+      - "los"
+
+  noteevents:
+    file_path: "mimiciii_notes/noteevents.csv"
+    patient_id: "subject_id"
+    timestamp: "charttime"
+    attributes:
+      - "row_id"
+      - "hadm_id"
+      - "chartdate"
+      - "text"
+      - "category"
+      - "description"
+      - "storetime"
+      - "iserror"
+
+  d_items:
+    file_path: "mimiciii_clinical/d_items.csv"
+    patient_id: null
+    timestamp: null
+    attributes:
+      - "itemid"
+      - "label"
+      - "abbreviation"
+      - "dbsource"
+      - "linksto"
+      - "category"
+      - "unitname"
+      - "param_type"
+      - "conceptid"
+
+  chartevents:
+    file_path: "mimiciii_clinical/chartevents.csv"
+    patient_id: "subject_id"
+    timestamp: "charttime"
+    join:
+      - file_path: "mimiciii_clinical/d_items.csv"
+        "on": "itemid"
+        how: "left"
+        columns:
+          - "label"
+          - "dbsource"
+          - "category"
+    attributes:
+      - "hadm_id"
+      - "icustay_id"
+      - "itemid"
+      - "storetime"
+      - "cgid"
+      - "value"
+      - "valuenum"
+      - "valueuom"
+      - "warning"
+      - "error"
+      - "resultstatus"
+      - "stopped"
+      - "label"
+      - "dbsource"
+      - "category"
+
+  ventdurations:
+    file_path: "mimiciii_derived/ventdurations.csv"
+    patient_id: "subject_id"
+    timestamp: "starttime"
+    join:
+      - file_path: "mimiciii_clinical/icustays.csv"
+        "on": "icustay_id"
+        how: "left"
+        columns:
+          - "subject_id"
+          - "hadm_id"
+    attributes:
+      - "hadm_id"
+      - "icustay_id"
+      - "ventnum"
+      - "endtime"
+      - "duration_hours"
+
+  vasopressordurations:
+    file_path: "mimiciii_derived/vasopressordurations.csv"
+    patient_id: "subject_id"
+    timestamp: "starttime"
+    join:
+      - file_path: "mimiciii_clinical/icustays.csv"
+        "on": "icustay_id"
+        how: "left"
+        columns:
+          - "subject_id"
+          - "hadm_id"
+    attributes:
+      - "hadm_id"
+      - "icustay_id"
+      - "vasonum"
+      - "endtime"
+      - "duration_hours"
+
+  oasis:
+    file_path: "mimiciii_derived/oasis.csv"
+    patient_id: "subject_id"
+    timestamp: null
+    attributes:
+      - "hadm_id"
+      - "icustay_id"
+      - "oasis"
+
+  sapsii:
+    file_path: "mimiciii_derived/sapsii.csv"
+    patient_id: "subject_id"
+    timestamp: null
+    attributes:
+      - "hadm_id"
+      - "icustay_id"
+      - "sapsii"
diff --git a/pyhealth/datasets/eol_mistrust.py b/pyhealth/datasets/eol_mistrust.py
new file mode 100644
index 000000000..040e825c4
--- /dev/null
+++ b/pyhealth/datasets/eol_mistrust.py
@@ -0,0 +1,1522 @@
+"""Utilities for reproducing the EOL mistrust preprocessing and modeling tables."""
+# pylint: disable=too-many-lines
+
+import importlib
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Callable, Iterable, Mapping, Sequence
+
+import pandas as pd  # pylint: disable=import-error
+
+from pyhealth.tasks.eol_mistrust import (
+    build_code_status_target as _build_task_code_status_target,
+    build_in_hospital_mortality_target as _build_task_in_hospital_mortality_target,
+    build_left_ama_target as _build_task_left_ama_target,
+)
+
+_SENTIMENT_BACKEND: Callable[[str], tuple[float, float]] | None = None
+
+
+def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
+    """Load a transformers sentiment pipeline, preferring GPU when available."""
+
+    transformers_module = importlib.import_module("transformers")
+    torch_module = importlib.import_module("torch")
+
+    pipeline_factory = getattr(transformers_module, "pipeline", None)
+    if not callable(pipeline_factory):
+        raise ModuleNotFoundError("transformers.pipeline is unavailable in the current environment.")
+
+    try:  # pragma: no cover - logging surface depends on transformers version
+        transformers_logging = importlib.import_module("transformers.utils.logging")
+        set_verbosity_error = getattr(transformers_logging, "set_verbosity_error", None)
+        if callable(set_verbosity_error):
+            set_verbosity_error()
+    except Exception:
+        pass
+
+    use_cuda = bool(getattr(torch_module, "cuda", None) and torch_module.cuda.is_available())
+    device = 0 if use_cuda else -1
+    classifier = pipeline_factory(
+        "sentiment-analysis",
+        model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+        device=device,
+    )
+
+    def _transformers_sentiment(text: str) -> tuple[float, float]:
+        cleaned = " ".join(str(text).split())
+        if not cleaned:
+            return (0.0, 0.0)
+        result = classifier(cleaned[:2048], truncation=True)[0]
+        label = str(result.get("label", "")).upper()
+        score = float(result.get("score", 0.0))
+        polarity = score if "POS" in label else -score
+        return (polarity, 0.0)
+
+    return _transformers_sentiment
+
+
+def _default_sentiment_backend(text: str) -> tuple[float, float]:
+    """Resolve and cache the default transformers sentiment backend lazily."""
+
+    global _SENTIMENT_BACKEND
+    if _SENTIMENT_BACKEND is None:
+        _SENTIMENT_BACKEND = _load_transformers_sentiment()
+    return _SENTIMENT_BACKEND(text)
+
+
+pattern_sentiment = _default_sentiment_backend
+
+try:
+    from sklearn.linear_model import LogisticRegression  # pylint: disable=import-error
+except ModuleNotFoundError:  # pragma: no cover - lightweight test env fallback
+    class LogisticRegression:  # type: ignore[no-redef]
+        """Fallback estimator that preserves the expected interface in test envs."""
+
+        def __init__(self, *args, **kwargs):
+            self.args = args
+            self.kwargs = kwargs
+
+        def fit(self, features, labels):
+            """Raise when scikit-learn is unavailable for model fitting."""
+
+            del features, labels
+            raise ModuleNotFoundError(
+                "scikit-learn is required for the default logistic regression estimator."
+            )
+
+        def predict_proba(self, features):
+            """Raise when scikit-learn is unavailable for probability scoring."""
+
+            del features
+            raise ModuleNotFoundError(
+                "scikit-learn is required for the default logistic regression estimator."
+            )
+
+
+RACE_WHITE = "WHITE"
+RACE_BLACK = "BLACK"
+RACE_ASIAN = "ASIAN"
+RACE_HISPANIC = "HISPANIC"
+RACE_NATIVE_AMERICAN = "NATIVE AMERICAN"
+RACE_OTHER = "OTHER"
+
+INSURANCE_PUBLIC = "Public"
+INSURANCE_PRIVATE = "Private"
+INSURANCE_SELF_PAY = "Self-Pay"
+
+RACE_CATEGORIES = [
+    RACE_WHITE,
+    RACE_BLACK,
+    RACE_ASIAN,
+    RACE_HISPANIC,
+    RACE_NATIVE_AMERICAN,
+    RACE_OTHER,
+]
+
+INSURANCE_CATEGORIES = [
+    INSURANCE_PRIVATE,
+    INSURANCE_PUBLIC,
+    INSURANCE_SELF_PAY,
+]
+
+TABLE2_LABELS = {
+    "1_1_sitter",
+    "bath",
+    "behavioral_interventions",
+    "education_barrier",
+    "education_learner",
+    "education_method",
+    "education_readiness",
+    "education_topic",
+    "family_communication_method",
+    "family_meeting",
+    "follows_commands",
+    "gcs_verbal_response",
+    "goal",
+    "hair_washed",
+    "harm_by_partner",
+    "healthcare_proxy",
+    "informed",
+    "judgment",
+    "non_violent_restraints",
+    "orientation",
+    "pain_assessment_method",
+    "pain_level",
+    "pain_management",
+    "reason_for_restraint",
+    "restraint_device",
+    "richmond_ras_scale",
+    "riker_sas_scale",
+    "safety_measures",
+    "security",
+    "side_rails",
+    "sitter",
+    "skin_care",
+    "social_work_consult",
+    "spiritual_support",
+    "stress",
+    "support_systems",
+    "understand_agree_with_plan",
+    "verbal_response",
+    "violent_restraints",
+    "wrist_restraints",
+}
+
+CODE_STATUS_ITEMIDS = {128, 223758}
+
+REQUIRED_RAW_TABLE_COLUMNS = {
+    "admissions": [
+        "hadm_id",
+        "subject_id",
+        "admittime",
+        "dischtime",
+        "ethnicity",
+        "insurance",
+        "discharge_location",
+        "hospital_expire_flag",
+        "has_chartevents_data",
+    ],
+    "patients": ["subject_id", "gender", "dob"],
+    "icustays": ["hadm_id", "icustay_id", "intime", "outtime"],
+    "noteevents": ["hadm_id", "category", "text", "iserror"],
+    "chartevents": ["hadm_id", "itemid", "value", "icustay_id"],
+    "d_items": ["itemid", "label", "dbsource"],
+}
+
+REQUIRED_MATERIALIZED_VIEW_COLUMNS = {
+    "ventdurations": ["icustay_id", "ventnum", "starttime", "endtime", "duration_hours"],
+    "vasopressordurations": ["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"],
+    "oasis": ["hadm_id", "icustay_id", "oasis"],
+    "sapsii": ["hadm_id", "icustay_id", "sapsii"],
+}
+
+REQUIRED_JOIN_KEYS = {
+    "subject_id",
+    "hadm_id",
+    "icustay_id",
+    "itemid",
+}
+
+NONCOMPLIANCE_PATTERNS = [
+    "noncomplian",
+    "non-complian",
+    "nonadher",
+    "non-adher",
+    "noncompliance",
+    "noncompliant",
+    "refuses treatment",
+    "refused treatment",
+    "refused medication",
+    "refuses medication",
+]
+
+
+def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
+    missing = [column for column in required if column not in df.columns]
+    if missing:
+        missing_str = ", ".join(missing)
+        raise ValueError(f"{df_name} is missing required columns: {missing_str}")
+
+
+def _filter_non_error_notes(noteevents: pd.DataFrame) -> pd.DataFrame:
+    """Keep notes where iserror is NULL or not equal to 1."""
+
+    iserror_numeric = pd.to_numeric(noteevents["iserror"], errors="coerce")
+    keep_mask = noteevents["iserror"].isna() | iserror_numeric.ne(1)
+    return noteevents.loc[keep_mask].copy()
+
+
+def _extract_positive_class_probabilities(probabilities) -> list[float]:
+    """Validate predict_proba output and return the positive-class column."""
+
+    probability_frame = pd.DataFrame(probabilities)
+    if probability_frame.shape[1] < 2:
+        raise ValueError(
+            "Estimator `predict_proba` output must have shape (n_samples, n_classes>=2)."
+        )
+    return probability_frame.iloc[:, 1].astype(float).tolist()
+
+
+def _to_datetime(series: pd.Series) -> pd.Series:
+    return pd.to_datetime(series, errors="coerce")
+
+
+def _normalize_hadm_ids(all_hadm_ids: Iterable[int] | None) -> list[int] | None:
+    """Normalize an optional hadm_id iterable to a sorted unique integer list."""
+
+    if all_hadm_ids is None:
+        return None
+    hadm_series = pd.Series(list(all_hadm_ids))
+    hadm_numeric = pd.to_numeric(hadm_series, errors="coerce").dropna().astype(int)
+    return sorted(set(hadm_numeric.tolist()))
+
+
+def _read_csv_columns(
+    csv_path: Path | str,
+    required_columns: Sequence[str],
+) -> pd.DataFrame:
+    """Read only the requested CSV columns and normalize headers to lowercase."""
+
+    required = {column.lower() for column in required_columns}
+    df = pd.read_csv(
+        csv_path,
+        usecols=lambda column: str(column).lower() in required,
+        low_memory=False,
+    )
+    df.columns = [str(column).lower() for column in df.columns]
+    return df
+
+
+def _iter_csv_chunks(
+    csv_path: Path | str,
+    required_columns: Sequence[str],
+    chunksize: int,
+):
+    """Yield CSV chunks with lowercase column names and only required columns."""
+
+    required = {column.lower() for column in required_columns}
+    reader = pd.read_csv(
+        csv_path,
+        usecols=lambda column: str(column).lower() in required,
+        chunksize=chunksize,
+        low_memory=False,
+    )
+    for chunk in reader:
+        chunk.columns = [str(column).lower() for column in chunk.columns]
+        yield chunk
+
+
+def _calculate_age_years(admittime: pd.Series, dob: pd.Series) -> pd.Series:
+    admittime = _to_datetime(admittime)
+    dob = _to_datetime(dob)
+    seconds_per_year = 365.25 * 24 * 3600
+
+    ages: list[float] = []
+    for admit, birth in zip(admittime, dob):
+        if pd.isna(admit) or pd.isna(birth):
+            ages.append(float("nan"))
+            continue
+        age = (admit.to_pydatetime() - birth.to_pydatetime()).total_seconds() / seconds_per_year
+        ages.append(90.0 if age > 200 else float(age))
+    return pd.Series(ages, index=admittime.index, dtype=float)
+
+
+def _normalize_token(value) -> str:
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return ""
+    value = str(value).strip().lower()
+    if not value:
+        return ""
+    value = re.sub(r"[^a-z0-9]+", "_", value)
+    value = re.sub(r"_+", "_", value)
+    return value.strip("_")
+
+
+def _clean_feature_text(value) -> str:
+    """Normalize display text for feature labels and values without lowercasing."""
+
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return ""
+    return re.sub(r"\s+", " ", str(value).strip())
+
+
+def _matches_table2_concept(label: str) -> bool:
+    """Return True when a d_items label matches a Table 2 concept by partial match."""
+
+    normalized_label = _normalize_token(label)
+    if normalized_label == "":
+        return False
+    return any(
+        (concept in normalized_label) or (normalized_label in concept)
+        for concept in TABLE2_LABELS
+    )
+
+
+def _collect_required_join_keys(raw_tables: Mapping[str, pd.DataFrame]) -> set[str]:
+    """Collect all join keys exposed by the core raw tables."""
+
+    required_tables = ["admissions", "patients", "icustays", "chartevents", "d_items"]
+    return set().union(*(set(raw_tables[name].columns) for name in required_tables))
+
+
+def _validate_database_identity(
+    schema_name: str | None,
+    database_flavor: str | None,
+) -> tuple[str, str]:
+    """Validate and normalize the declared database flavor and schema."""
+
+    resolved_schema = "mimiciii" if schema_name is None else str(schema_name).lower()
+    resolved_flavor = (
+        "postgresql" if database_flavor is None else str(database_flavor).lower()
+    )
+    if resolved_schema != "mimiciii":
+        raise ValueError("Database schema must be mimiciii.")
+    if resolved_flavor not in {"postgresql", "postgres"}:
+        raise ValueError("Database flavor must be PostgreSQL.")
+    return resolved_schema, resolved_flavor
+
+
+def _validate_required_inputs(
+    raw_tables: Mapping[str, pd.DataFrame],
+    materialized_views: Mapping[str, pd.DataFrame],
+) -> None:
+    """Ensure all required raw tables, views, and columns are present."""
+
+    missing_raw = sorted(set(REQUIRED_RAW_TABLE_COLUMNS) - set(raw_tables))
+    if missing_raw:
+        raise ValueError("Missing required raw tables: " + ", ".join(missing_raw))
+
+    missing_views = sorted(
+        set(REQUIRED_MATERIALIZED_VIEW_COLUMNS) - set(materialized_views)
+    )
+    if missing_views:
+        raise ValueError(
+            "Missing required materialized views: " + ", ".join(missing_views)
+        )
+
+    for table_name, required_columns in REQUIRED_RAW_TABLE_COLUMNS.items():
+        _require_columns(raw_tables[table_name], required_columns, table_name)
+    for view_name, required_columns in REQUIRED_MATERIALIZED_VIEW_COLUMNS.items():
+        _require_columns(materialized_views[view_name], required_columns, view_name)
+
+
+def _validate_text_access(noteevents: pd.DataFrame, chartevents: pd.DataFrame) -> None:
+    """Ensure the text fields required for NLP and string matching are present."""
+
+    if noteevents["text"].isna().all():
+        raise ValueError("noteevents.text must be accessible for NLP steps.")
+    if chartevents["value"].isna().all():
+        raise ValueError(
+            "chartevents.value must be accessible for string matching and feature extraction."
+        )
+
+
+def _validate_bridge_join(
+    source_df: pd.DataFrame,
+    bridge_df: pd.DataFrame,
+    join_column: str,
+    error_message: str,
+) -> None:
+    """Ensure a required bridge join yields at least one row when source rows exist."""
+
+    merged = source_df.merge(bridge_df, on=join_column, how="inner")
+    if source_df.shape[0] > 0 and merged.empty:
+        raise ValueError(error_message)
+
+
+def map_ethnicity(ethnicity) -> str:
+    """Map raw MIMIC ethnicity strings to the paper's coarse race groups."""
+
+    text = str(ethnicity or "").upper()
+    if "BLACK" in text or "AFRICAN" in text:
+        return RACE_BLACK
+    if "WHITE" in text or "EUROPEAN" in text or "PORTUGUESE" in text:
+        return RACE_WHITE
+    if "ASIAN" in text:
+        return RACE_ASIAN
+    if "HISPANIC" in text or "LATINO" in text or "SOUTH AMERICAN" in text:
+        return RACE_HISPANIC
+    if (
+        "NATIVE" in text
+        or "AMERICAN INDIAN" in text
+        or "ALASKA NATIVE" in text
+    ):
+        return RACE_NATIVE_AMERICAN
+    return RACE_OTHER
+
+
+def map_insurance(insurance) -> str:
+    """Collapse raw MIMIC insurance values into the required three groups."""
+
+    text = str(insurance or "").strip().lower()
+    normalized = re.sub(r"\s+", " ", text)
+    if normalized in {"medicare", "medicaid", "government", "public"}:
+        return INSURANCE_PUBLIC
+    if normalized in {"private"}:
+        return INSURANCE_PRIVATE
+    if normalized in {"self pay", "self-pay", "self_pay"}:
+        return INSURANCE_SELF_PAY
+    raise ValueError(f"Unexpected insurance value: {insurance}")
+
+
+def prepare_note_text_for_sentiment(text) -> str:
+    """Normalize note text using whitespace tokenization and rejoining only."""
+
+    if text is None or (isinstance(text, float) and pd.isna(text)):
+        return ""
+    tokens = str(text).split()
+    return " ".join(tokens)
+
+
+def build_base_admissions(admissions: pd.DataFrame, patients: pd.DataFrame) -> pd.DataFrame:
+    """Join admissions to patients and keep only rows with chart events available."""
+
+    _require_columns(
+        admissions,
+        [
+            "hadm_id",
+            "subject_id",
+            "admittime",
+            "dischtime",
+            "ethnicity",
+            "insurance",
+            "discharge_location",
+            "hospital_expire_flag",
+            "has_chartevents_data",
+        ],
+        "admissions",
+    )
+    _require_columns(patients, ["subject_id", "gender", "dob"], "patients")
+
+    admissions_df = admissions.copy()
+    patients_df = patients.copy()
+    admissions_df["admittime"] = _to_datetime(admissions_df["admittime"])
+    admissions_df["dischtime"] = _to_datetime(admissions_df["dischtime"])
+    patients_df["dob"] = _to_datetime(patients_df["dob"])
+
+    merged = admissions_df.merge(
+        patients_df[["subject_id", "gender", "dob"]],
+        on="subject_id",
+        how="left",
+        validate="many_to_one",
+    )
+    merged = merged.loc[merged["has_chartevents_data"] == 1].copy()
+    merged = merged.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return merged.reset_index(drop=True)
+
+
+def build_demographics_table(base_admissions: pd.DataFrame) -> pd.DataFrame:
+    """Derive race, age, LOS, and insurance-group fields for each admission."""
+
+    _require_columns(
+        base_admissions,
+        [
+            "hadm_id",
+            "subject_id",
+            "admittime",
+            "dischtime",
+            "ethnicity",
+            "insurance",
+            "gender",
+            "dob",
+        ],
+        "base_admissions",
+    )
+
+    df = base_admissions.copy()
+    df["admittime"] = _to_datetime(df["admittime"])
+    df["dischtime"] = _to_datetime(df["dischtime"])
+    df["dob"] = _to_datetime(df["dob"])
+
+    age_years = _calculate_age_years(df["admittime"], df["dob"])
+    los_hours = (df["dischtime"] - df["admittime"]).dt.total_seconds() / 3600.0
+    los_days = los_hours / 24.0
+    insurance_group = df["insurance"].map(map_insurance)
+
+    demographics = pd.DataFrame(
+        {
+            "hadm_id": df["hadm_id"],
+            "subject_id": df["subject_id"],
+            "gender": df["gender"],
+            "admittime": df["admittime"],
+            "dischtime": df["dischtime"],
+            "ethnicity": df["ethnicity"],
+            "insurance_raw": df["insurance"],
+            "race": df["ethnicity"].map(map_ethnicity),
+            "age": age_years.astype(float),
+            "los_hours": los_hours.astype(float),
+            "los_days": los_days.astype(float),
+            "insurance": insurance_group,
+            "insurance_group": insurance_group,
+        }
+    )
+    demographics = demographics.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return demographics.reset_index(drop=True)
+
+
+def build_eol_cohort(base_admissions: pd.DataFrame, demographics: pd.DataFrame) -> pd.DataFrame:
+    """Build the end-of-life cohort used for treatment-disparity analysis."""
+
+    _require_columns(
+        base_admissions,
+        ["hadm_id", "discharge_location", "hospital_expire_flag"],
+        "base_admissions",
+    )
+    _require_columns(demographics, ["hadm_id", "los_hours"], "demographics")
+
+    df = demographics.merge(
+        base_admissions[["hadm_id", "discharge_location", "hospital_expire_flag"]],
+        on="hadm_id",
+        how="inner",
+        validate="one_to_one",
+    )
+    discharge_location = df["discharge_location"].fillna("").str.upper()
+    is_deceased = df["hospital_expire_flag"].fillna(0).astype(int) == 1
+    is_hospice = discharge_location.str.contains("HOSPICE", na=False)
+    is_snf = discharge_location.str.contains(r"SKILLED NURSING|\bSNF\b", na=False, regex=True)
+
+    include = (df["los_hours"] >= 6) & (is_deceased | is_hospice | is_snf)
+    df = df.loc[include].copy()
+    df["discharge_category"] = "Skilled Nursing Facility"
+    df.loc[is_hospice.loc[df.index], "discharge_category"] = "Hospice"
+    df.loc[is_deceased.loc[df.index], "discharge_category"] = "Deceased"
+    df = df.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return df.reset_index(drop=True)
+
+
+def build_all_cohort(base_admissions: pd.DataFrame, icustays: pd.DataFrame) -> pd.DataFrame:
+    """Build the admission-level cohort with at least one ICU stay of 12 hours."""
+
+    _require_columns(base_admissions, ["hadm_id"], "base_admissions")
+    _require_columns(icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays")
+
+    icu = icustays.copy()
+    icu["intime"] = _to_datetime(icu["intime"])
+    icu["outtime"] = _to_datetime(icu["outtime"])
+    icu["icu_los_hours"] = (icu["outtime"] - icu["intime"]).dt.total_seconds() / 3600.0
+
+    qualifying = icu.loc[icu["icu_los_hours"] >= 12, "hadm_id"].drop_duplicates()
+    df = base_admissions.loc[base_admissions["hadm_id"].isin(set(qualifying))].copy()
+    df = df.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return df.reset_index(drop=True)
+
+
+def _merge_spans_for_hadm(spans: pd.DataFrame) -> float:
+    if spans.empty:
+        return 0.0
+
+    spans = spans.sort_values("starttime")
+    merged = []
+    current_start = None
+    current_end = None
+
+    for row in spans.itertuples(index=False):
+        start = row.starttime
+        end = row.endtime
+        if pd.isna(start) or pd.isna(end):
+            continue
+        if current_start is None:
+            current_start = start
+            current_end = end
+            continue
+
+        gap_minutes = (start - current_end).total_seconds() / 60.0
+        if gap_minutes <= 600:
+            current_end = max(current_end, end)
+        else:
+            merged.append((current_start, current_end))
+            current_start = start
+            current_end = end
+
+    if current_start is not None:
+        merged.append((current_start, current_end))
+
+    total_minutes = 0.0
+    for start, end in merged:
+        total_minutes += (end - start).total_seconds() / 60.0
+    return total_minutes
+
+
+def _duration_totals_by_hadm(
+    durations: pd.DataFrame,
+    icustays: pd.DataFrame,
+    number_col: str,
+    output_col: str,
+) -> pd.DataFrame:
+    _require_columns(
+        durations,
+        ["icustay_id", number_col, "starttime", "endtime", "duration_hours"],
+        output_col,
+    )
+    if durations.empty:
+        return pd.DataFrame(columns=["hadm_id", output_col])
+
+    bridge = icustays[["icustay_id", "hadm_id"]].drop_duplicates()
+    df = durations.copy()
+    if "hadm_id" in df.columns:
+        df = df.drop(columns=["hadm_id"])
+    df["starttime"] = _to_datetime(df["starttime"])
+    df["endtime"] = _to_datetime(df["endtime"])
+    df = df.merge(bridge, on="icustay_id", how="inner", validate="many_to_one")
+
+    totals = (
+        df.groupby("hadm_id", sort=True)
+        .apply(_merge_spans_for_hadm, include_groups=False)
+        .rename(output_col)
+        .reset_index()
+    )
+    return totals
+
+
+def build_treatment_totals(
+    icustays: pd.DataFrame,
+    ventdurations: pd.DataFrame,
+    vasopressordurations: pd.DataFrame,
+) -> pd.DataFrame:
+    """Compute admission-level ventilation and vasopressor totals in minutes."""
+
+    _require_columns(icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays")
+
+    vent_totals = _duration_totals_by_hadm(
+        ventdurations,
+        icustays,
+        number_col="ventnum",
+        output_col="total_vent_min",
+    )
+    vaso_totals = _duration_totals_by_hadm(
+        vasopressordurations,
+        icustays,
+        number_col="vasonum",
+        output_col="total_vaso_min",
+    )
+
+    if vent_totals.empty and vaso_totals.empty:
+        return pd.DataFrame(columns=["hadm_id", "total_vent_min", "total_vaso_min"])
+
+    totals = pd.merge(vent_totals, vaso_totals, on="hadm_id", how="outer")
+    totals = totals.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return totals.reset_index(drop=True)
+
+
+def build_note_corpus(
+    noteevents: pd.DataFrame,
+    all_hadm_ids: Iterable[int] | None = None,
+) -> pd.DataFrame:
+    """Aggregate non-error notes into one concatenated note per admission."""
+
+    _require_columns(noteevents, ["hadm_id", "text", "iserror"], "noteevents")
+
+    notes = noteevents.copy()
+    notes = _filter_non_error_notes(notes)
+    notes["text"] = notes["text"].map(prepare_note_text_for_sentiment)
+
+    grouped = (
+        notes.groupby("hadm_id", sort=True)["text"]
+        .apply(lambda series: prepare_note_text_for_sentiment(" ".join(t for t in series if t)))
+        .reset_index(name="note_text")
+    )
+
+    if all_hadm_ids is not None:
+        hadm_frame = pd.DataFrame({"hadm_id": list(all_hadm_ids)})
+        grouped = hadm_frame.merge(grouped, on="hadm_id", how="left")
+
+    grouped["note_text"] = grouped["note_text"].fillna("")
+    grouped = grouped.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return grouped.reset_index(drop=True)
+
+
+def _build_note_labels_from_corpus(note_corpus: pd.DataFrame) -> pd.DataFrame:
+    """Create the two note-derived labels from an admission-level note corpus."""
+
+    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
+    lowered = note_corpus["note_text"].fillna("").astype(str).str.lower()
+    noncompliance = lowered.apply(
+        lambda text: int(any(pattern in text for pattern in NONCOMPLIANCE_PATTERNS))
+    )
+    autopsy = lowered.apply(lambda text: int("autopsy" in text))
+
+    labels = pd.DataFrame(
+        {
+            "hadm_id": note_corpus["hadm_id"],
+            "noncompliance_label": noncompliance.astype(int),
+            "autopsy_label": autopsy.astype(int),
+        }
+    )
+    labels = labels.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return labels.reset_index(drop=True)
+
+
+def build_note_labels(
+    noteevents: pd.DataFrame,
+    all_hadm_ids: Iterable[int] | None = None,
+) -> pd.DataFrame:
+    """Create admission-level noncompliance and autopsy labels from notes."""
+
+    _require_columns(noteevents, ["hadm_id", "text", "iserror"], "noteevents")
+    corpus = build_note_corpus(noteevents, all_hadm_ids=all_hadm_ids)
+    return _build_note_labels_from_corpus(corpus)
+
+
+def build_note_artifacts_from_csv(
+    noteevents_csv_path: Path | str,
+    all_hadm_ids: Iterable[int] | None = None,
+    chunksize: int = 100_000,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Build the note corpus and note-derived labels from a large CSV in chunks."""
+
+    normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
+    hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
+    note_fragments: dict[int, list[str]] = defaultdict(list)
+
+    for chunk in _iter_csv_chunks(
+        noteevents_csv_path,
+        required_columns=["hadm_id", "text", "iserror"],
+        chunksize=chunksize,
+    ):
+        chunk["hadm_id"] = pd.to_numeric(chunk["hadm_id"], errors="coerce")
+        chunk = chunk.dropna(subset=["hadm_id"]).copy()
+        chunk["hadm_id"] = chunk["hadm_id"].astype(int)
+
+        if hadm_filter is not None:
+            chunk = chunk.loc[chunk["hadm_id"].isin(hadm_filter)]
+        if chunk.empty:
+            continue
+
+        chunk = _filter_non_error_notes(chunk)
+        if chunk.empty:
+            continue
+
+        chunk["text"] = chunk["text"].map(prepare_note_text_for_sentiment)
+        chunk = chunk.loc[chunk["text"] != ""]
+        if chunk.empty:
+            continue
+
+        grouped = (
+            chunk.groupby("hadm_id", sort=False)["text"]
+            .apply(lambda series: prepare_note_text_for_sentiment(" ".join(series)))
+        )
+        for hadm_id, text in grouped.items():
+            if text:
+                note_fragments[int(hadm_id)].append(text)
+
+    if normalized_hadm_ids is not None:
+        hadm_ids = normalized_hadm_ids
+    else:
+        hadm_ids = sorted(note_fragments)
+
+    corpus = pd.DataFrame(
+        {
+            "hadm_id": hadm_ids,
+            "note_text": [
+                prepare_note_text_for_sentiment(" ".join(note_fragments.get(hadm_id, [])))
+                for hadm_id in hadm_ids
+            ],
+        }
+    )
+    corpus = corpus.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    labels = _build_note_labels_from_corpus(corpus)
+    return corpus, labels
+
+
+def build_note_corpus_from_csv(
+    noteevents_csv_path: Path | str,
+    all_hadm_ids: Iterable[int] | None = None,
+    chunksize: int = 100_000,
+) -> pd.DataFrame:
+    """Build the admission-level note corpus from a large CSV in chunks."""
+
+    corpus, _ = build_note_artifacts_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=all_hadm_ids,
+        chunksize=chunksize,
+    )
+    return corpus
+
+
+def build_note_labels_from_csv(
+    noteevents_csv_path: Path | str,
+    all_hadm_ids: Iterable[int] | None = None,
+    chunksize: int = 100_000,
+) -> pd.DataFrame:
+    """Build note-derived labels from a large CSV in chunks."""
+
+    _, labels = build_note_artifacts_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=all_hadm_ids,
+        chunksize=chunksize,
+    )
+    return labels
+
+
+def identify_table2_itemids(d_items: pd.DataFrame) -> set[int]:
+    """Identify chart itemids that match the paper's Table 2 concepts."""
+
+    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
+    matches = d_items["label"].map(_matches_table2_concept)
+    return set(d_items.loc[matches, "itemid"].tolist())
+
+
+def build_chartevent_artifacts_from_csv(
+    chartevents_csv_path: Path | str,
+    d_items: pd.DataFrame,
+    allowed_labels: Iterable[str] | None = None,
+    all_hadm_ids: Iterable[int] | None = None,
+    chunksize: int = 500_000,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Build the feature matrix and code-status targets from a large CSV in chunks."""
+
+    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
+
+    items = d_items.copy()
+    items["normalized_label"] = items["label"].map(_normalize_token)
+    if allowed_labels is not None:
+        allowed = {_normalize_token(label) for label in allowed_labels}
+        items = items.loc[items["normalized_label"].isin(allowed)].copy()
+    else:
+        allowed_itemids = identify_table2_itemids(items)
+        items = items.loc[items["itemid"].isin(allowed_itemids)].copy()
+
+    items["itemid"] = pd.to_numeric(items["itemid"], errors="coerce")
+    items = items.dropna(subset=["itemid"]).copy()
+    items["itemid"] = items["itemid"].astype(int)
+
+    feature_lookup = (
+        items[["itemid", "label"]]
+        .drop_duplicates("itemid")
+        .set_index("itemid")["label"]
+        .to_dict()
+    )
+    feature_itemids = set(feature_lookup)
+    relevant_itemids = feature_itemids | set(CODE_STATUS_ITEMIDS)
+    normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
+    hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
+
+    feature_to_hadm: dict[str, set[int]] = defaultdict(set)
+    code_status_positive: dict[int, int] = {}
+
+    for chunk in _iter_csv_chunks(
+        chartevents_csv_path,
+        required_columns=["hadm_id", "itemid", "value", "icustay_id"],
+        chunksize=chunksize,
+    ):
+        chunk["hadm_id"] = pd.to_numeric(chunk["hadm_id"], errors="coerce")
+        chunk["itemid"] = pd.to_numeric(chunk["itemid"], errors="coerce")
+        chunk = chunk.dropna(subset=["hadm_id", "itemid"]).copy()
+        if chunk.empty:
+            continue
+
+        chunk["hadm_id"] = chunk["hadm_id"].astype(int)
+        chunk["itemid"] = chunk["itemid"].astype(int)
+
+        if hadm_filter is not None:
+            chunk = chunk.loc[chunk["hadm_id"].isin(hadm_filter)]
+        if chunk.empty:
+            continue
+
+        chunk = chunk.loc[chunk["itemid"].isin(relevant_itemids)].copy()
+        if chunk.empty:
+            continue
+
+        feature_chunk = chunk.loc[chunk["itemid"].isin(feature_itemids)].copy()
+        if not feature_chunk.empty:
+            feature_chunk["label"] = feature_chunk["itemid"].map(feature_lookup)
+            feature_chunk["normalized_value"] = feature_chunk["value"].map(_normalize_token)
+            feature_chunk["display_label"] = feature_chunk["label"].map(_clean_feature_text)
+            feature_chunk["display_value"] = feature_chunk["value"].map(_clean_feature_text)
+            feature_chunk = feature_chunk.loc[
+                (feature_chunk["normalized_value"] != "")
+                & (feature_chunk["display_label"] != "")
+            ].copy()
+            if not feature_chunk.empty:
+                feature_chunk["feature_name"] = (
+                    feature_chunk["display_label"] + ": " + feature_chunk["display_value"]
+                )
+                unique_pairs = feature_chunk[["hadm_id", "feature_name"]].drop_duplicates()
+                for feature_name, group in unique_pairs.groupby("feature_name", sort=False):
+                    feature_to_hadm[str(feature_name)].update(group["hadm_id"].astype(int).tolist())
+
+        code_chunk = chunk.loc[chunk["itemid"].isin(CODE_STATUS_ITEMIDS)].copy()
+        if not code_chunk.empty:
+            normalized_value = code_chunk["value"].map(_normalize_token)
+            positives = normalized_value.apply(
+                lambda value: int(
+                    ("dnr" in value)
+                    or ("dni" in value)
+                    or ("comfort" in value)
+                    or ("cmo" in value)
+                )
+            )
+            for hadm_id, is_positive in zip(code_chunk["hadm_id"].astype(int), positives):
+                code_status_positive[hadm_id] = max(
+                    code_status_positive.get(hadm_id, 0),
+                    int(is_positive),
+                )
+
+    if normalized_hadm_ids is not None:
+        hadm_ids = normalized_hadm_ids
+    else:
+        hadm_ids = sorted(set().union(*feature_to_hadm.values())) if feature_to_hadm else []
+
+    feature_names = sorted(feature_to_hadm)
+    feature_data: dict[str, object] = {"hadm_id": hadm_ids}
+    hadm_index = pd.Index(hadm_ids)
+    for feature_name in feature_names:
+        feature_data[feature_name] = hadm_index.isin(feature_to_hadm[feature_name]).astype(int)
+    feature_matrix = pd.DataFrame(feature_data)
+    if "hadm_id" not in feature_matrix.columns:
+        feature_matrix = pd.DataFrame(columns=["hadm_id"])
+    feature_matrix = (
+        feature_matrix.sort_values("hadm_id")
+        .drop_duplicates("hadm_id")
+        .reset_index(drop=True)
+    )
+
+    code_status_targets = pd.DataFrame(
+        {
+            "hadm_id": sorted(code_status_positive),
+            "code_status_dnr_dni_cmo": [
+                int(code_status_positive[hadm_id]) for hadm_id in sorted(code_status_positive)
+            ],
+        }
+    )
+    code_status_targets = (
+        code_status_targets.sort_values("hadm_id")
+        .drop_duplicates("hadm_id")
+        .reset_index(drop=True)
+    )
+    return feature_matrix, code_status_targets
+
+
+def build_chartevent_feature_matrix(
+    chartevents: pd.DataFrame,
+    d_items: pd.DataFrame,
+    allowed_labels: Iterable[str] | None = None,
+    all_hadm_ids: Iterable[int] | None = None,
+) -> pd.DataFrame:
+    """Build a binary admission-by-feature matrix from selected chart events."""
+
+    _require_columns(chartevents, ["hadm_id", "itemid", "value", "icustay_id"], "chartevents")
+    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
+
+    events = chartevents.copy()
+    items = d_items.copy()
+    items["normalized_label"] = items["label"].map(_normalize_token)
+
+    if allowed_labels is not None:
+        allowed = {_normalize_token(label) for label in allowed_labels}
+        items = items.loc[items["normalized_label"].isin(allowed)].copy()
+    else:
+        allowed_itemids = identify_table2_itemids(items)
+        items = items.loc[items["itemid"].isin(allowed_itemids)].copy()
+
+    merged = events.merge(
+        items[["itemid", "label", "normalized_label"]],
+        on="itemid",
+        how="inner",
+        validate="many_to_one",
+    )
+    merged["normalized_value"] = merged["value"].map(_normalize_token)
+    merged["display_label"] = merged["label"].map(_clean_feature_text)
+    merged["display_value"] = merged["value"].map(_clean_feature_text)
+    merged = merged.loc[
+        (merged["normalized_value"] != "") & (merged["display_label"] != "")
+    ].copy()
+
+    if merged.empty:
+        result = pd.DataFrame(columns=["hadm_id"])
+    else:
+        merged["feature_name"] = merged["display_label"] + ": " + merged["display_value"]
+        pivot = (
+            merged.assign(feature_value=1)
+            .pivot_table(
+                index="hadm_id",
+                columns="feature_name",
+                values="feature_value",
+                aggfunc="max",
+                fill_value=0,
+            )
+            .reset_index()
+        )
+        pivot.columns.name = None
+        result = pivot
+
+    if all_hadm_ids is not None:
+        hadm_frame = pd.DataFrame({"hadm_id": list(all_hadm_ids)})
+        result = hadm_frame.merge(result, on="hadm_id", how="left")
+
+    if "hadm_id" not in result.columns:
+        result = pd.DataFrame(columns=["hadm_id"])
+
+    feature_cols = [col for col in result.columns if col != "hadm_id"]
+    if feature_cols:
+        result[feature_cols] = result[feature_cols].fillna(0).astype(int)
+    result = result.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return result.reset_index(drop=True)
+
+
+def build_chartevent_feature_matrix_from_csv(
+    chartevents_csv_path: Path | str,
+    d_items: pd.DataFrame,
+    allowed_labels: Iterable[str] | None = None,
+    all_hadm_ids: Iterable[int] | None = None,
+    chunksize: int = 500_000,
+) -> pd.DataFrame:
+    """Build the binary feature matrix from a large chartevents CSV in chunks."""
+
+    feature_matrix, _ = build_chartevent_artifacts_from_csv(
+        chartevents_csv_path=chartevents_csv_path,
+        d_items=d_items,
+        allowed_labels=allowed_labels,
+        all_hadm_ids=all_hadm_ids,
+        chunksize=chunksize,
+    )
+    return feature_matrix
+
+
+def z_normalize_scores(df: pd.DataFrame, columns: Sequence[str]) -> pd.DataFrame:
+    """Apply independent z-score normalization to the requested score columns."""
+
+    normalized = df.copy()
+    for column in columns:
+        _require_columns(normalized, [column], "score_table")
+        values = normalized[column].astype(float)
+        mean = values.mean()
+        std = values.std(ddof=0)
+        if pd.isna(std) or std == 0:
+            normalized[column] = 0.0
+        else:
+            normalized[column] = (values - mean) / std
+    return normalized
+
+
+def build_acuity_scores(oasis: pd.DataFrame, sapsii: pd.DataFrame) -> pd.DataFrame:
+    """Aggregate OASIS and SAPS II to one admission-level row per hadm_id."""
+
+    _require_columns(oasis, ["hadm_id", "icustay_id", "oasis"], "oasis")
+    _require_columns(sapsii, ["hadm_id", "icustay_id", "sapsii"], "sapsii")
+
+    oasis_agg = oasis.groupby("hadm_id", as_index=False)["oasis"].max()
+    sapsii_agg = sapsii.groupby("hadm_id", as_index=False)["sapsii"].max()
+    acuity = oasis_agg.merge(sapsii_agg, on="hadm_id", how="outer")
+    acuity = acuity.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return acuity.reset_index(drop=True)
+
+
+def build_proxy_probability_scores(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    label_column: str,
+    estimator_factory: Callable[[], object] | None = None,
+) -> pd.DataFrame:
+    """Fit the proxy label model and return positive-class probabilities."""
+
+    _require_columns(feature_matrix, ["hadm_id"], "feature_matrix")
+    _require_columns(note_labels, ["hadm_id", label_column], "note_labels")
+
+    feature_columns = [column for column in feature_matrix.columns if column != "hadm_id"]
+    merged = feature_matrix.merge(
+        note_labels[["hadm_id", label_column]],
+        on="hadm_id",
+        how="inner",
+        validate="one_to_one",
+    ).sort_values("hadm_id")
+
+    feature_values = merged[feature_columns]
+    y = merged[label_column].astype(int)
+
+    if estimator_factory is None:
+        estimator = LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000)
+    else:
+        estimator = estimator_factory()
+
+    estimator.fit(feature_values, y)
+    probabilities = estimator.predict_proba(feature_values)
+    score_column = (
+        f"{label_column[:-6]}_score" if label_column.endswith("_label") else f"{label_column}_score"
+    )
+
+    scores = pd.DataFrame(
+        {
+            "hadm_id": merged["hadm_id"].tolist(),
+            score_column: _extract_positive_class_probabilities(probabilities),
+        }
+    )
+    scores = scores.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return scores.reset_index(drop=True)
+
+
+def build_negative_sentiment_scores(
+    note_corpus: pd.DataFrame,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+) -> pd.DataFrame:
+    """Convert note sentiment polarity into an admission-level mistrust score."""
+
+    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
+
+    if sentiment_fn is None:
+        sentiment_fn = pattern_sentiment
+
+    rows = []
+    for row in note_corpus.sort_values("hadm_id").itertuples(index=False):
+        text = prepare_note_text_for_sentiment(row.note_text)
+        if text == "":
+            score = 0.0
+        else:
+            polarity, _ = sentiment_fn(text)
+            score = -1.0 * float(polarity)
+        rows.append({"hadm_id": row.hadm_id, "negative_sentiment_score": score})
+
+    scores = pd.DataFrame(rows).sort_values("hadm_id").drop_duplicates("hadm_id")
+    return scores.reset_index(drop=True)
+
+
+def build_mistrust_score_table(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    note_corpus: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+) -> pd.DataFrame:
+    """Build and normalize the three admission-level mistrust score vectors."""
+
+    _require_columns(feature_matrix, ["hadm_id"], "feature_matrix")
+    _require_columns(
+        note_labels,
+        ["hadm_id", "noncompliance_label", "autopsy_label"],
+        "note_labels",
+    )
+    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
+
+    noncompliance_scores = build_proxy_probability_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column="noncompliance_label",
+        estimator_factory=estimator_factory,
+    )
+    autopsy_scores = build_proxy_probability_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column="autopsy_label",
+        estimator_factory=estimator_factory,
+    )
+    negative_sentiment_scores = build_negative_sentiment_scores(
+        note_corpus,
+        sentiment_fn=sentiment_fn,
+    )
+
+    merged = (
+        noncompliance_scores.merge(autopsy_scores, on="hadm_id", how="inner")
+        .merge(negative_sentiment_scores, on="hadm_id", how="inner")
+        .sort_values("hadm_id")
+    )
+    normalized = z_normalize_scores(
+        merged,
+        columns=[
+            "noncompliance_score",
+            "autopsy_score",
+            "negative_sentiment_score",
+        ],
+    )
+    normalized = normalized.rename(
+        columns={
+            "noncompliance_score": "noncompliance_score_z",
+            "autopsy_score": "autopsy_score_z",
+            "negative_sentiment_score": "negative_sentiment_score_z",
+        }
+    )
+    normalized = normalized.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return normalized.reset_index(drop=True)
+
+
+def _build_gender_one_hot(df: pd.DataFrame) -> pd.DataFrame:
+    output = pd.DataFrame({"hadm_id": df["hadm_id"]})
+    gender = df["gender"].fillna("").str.upper()
+    output["gender_f"] = (gender == "F").astype(int)
+    output["gender_m"] = (gender == "M").astype(int)
+    return output
+
+
+def _build_insurance_one_hot(df: pd.DataFrame) -> pd.DataFrame:
+    output = pd.DataFrame({"hadm_id": df["hadm_id"]})
+    insurance_column = "insurance_group" if "insurance_group" in df.columns else "insurance"
+    insurance = df[insurance_column].fillna("")
+    output["insurance_private"] = (insurance == INSURANCE_PRIVATE).astype(int)
+    output["insurance_public"] = (insurance == INSURANCE_PUBLIC).astype(int)
+    output["insurance_self_pay"] = (insurance == INSURANCE_SELF_PAY).astype(int)
+    return output
+
+
+def _build_race_one_hot(df: pd.DataFrame) -> pd.DataFrame:
+    output = pd.DataFrame({"hadm_id": df["hadm_id"]})
+    race = df["race"].fillna("")
+    output["race_white"] = (race == RACE_WHITE).astype(int)
+    output["race_black"] = (race == RACE_BLACK).astype(int)
+    output["race_asian"] = (race == RACE_ASIAN).astype(int)
+    output["race_hispanic"] = (race == RACE_HISPANIC).astype(int)
+    output["race_native_american"] = (race == RACE_NATIVE_AMERICAN).astype(int)
+    output["race_other"] = (race == RACE_OTHER).astype(int)
+    return output
+
+
+def _build_code_status_target(chartevents: pd.DataFrame, d_items: pd.DataFrame) -> pd.DataFrame:
+    _require_columns(chartevents, ["hadm_id", "itemid", "value", "icustay_id"], "chartevents")
+    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
+    return _build_task_code_status_target(chartevents, itemids=CODE_STATUS_ITEMIDS)
+
+
+def build_code_status_target_from_csv(
+    chartevents_csv_path: Path | str,
+    chunksize: int = 500_000,
+) -> pd.DataFrame:
+    """Build the code-status target from a large chartevents CSV in chunks."""
+
+    _, code_status_targets = build_chartevent_artifacts_from_csv(
+        chartevents_csv_path=chartevents_csv_path,
+        d_items=pd.DataFrame(columns=["itemid", "label", "dbsource"]),
+        all_hadm_ids=None,
+        chunksize=chunksize,
+    )
+    return code_status_targets
+
+
+def _assemble_final_model_table(
+    demographics: pd.DataFrame,
+    all_cohort: pd.DataFrame,
+    admissions: pd.DataFrame,
+    code_status: pd.DataFrame,
+    mistrust_scores: pd.DataFrame,
+    include_race: bool = True,
+    include_mistrust: bool = True,
+) -> pd.DataFrame:
+    """Shared implementation for final model table assembly."""
+
+    _require_columns(
+        demographics,
+        ["hadm_id", "age", "los_days", "gender", "insurance", "race"],
+        "demographics",
+    )
+    _require_columns(all_cohort, ["hadm_id"], "all_cohort")
+    _require_columns(
+        admissions,
+        ["hadm_id", "discharge_location", "hospital_expire_flag"],
+        "admissions",
+    )
+    _require_columns(
+        mistrust_scores,
+        [
+            "hadm_id",
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+        ],
+        "mistrust_scores",
+    )
+    _require_columns(code_status, ["hadm_id", "code_status_dnr_dni_cmo"], "code_status")
+
+    cohort_hadm = pd.DataFrame(
+        {"hadm_id": sorted(pd.to_numeric(all_cohort["hadm_id"], errors="coerce").dropna().astype(int).unique())}
+    )
+    demo = cohort_hadm.merge(demographics, on="hadm_id", how="left")
+
+    final = cohort_hadm.copy()
+    final = final.merge(
+        demo[["hadm_id", "age", "los_days"]],
+        on="hadm_id",
+        how="left",
+    )
+    final = final.merge(_build_gender_one_hot(demo), on="hadm_id", how="left")
+    final = final.merge(_build_insurance_one_hot(demo), on="hadm_id", how="left")
+
+    if include_race:
+        final = final.merge(_build_race_one_hot(demo), on="hadm_id", how="left")
+
+    if include_mistrust:
+        final = final.merge(mistrust_scores, on="hadm_id", how="left")
+
+    admissions_targets = admissions[
+        ["hadm_id", "discharge_location", "hospital_expire_flag"]
+    ].drop_duplicates("hadm_id")
+    left_ama = _build_task_left_ama_target(admissions_targets)
+    mortality = _build_task_in_hospital_mortality_target(admissions_targets)
+    final = final.merge(
+        left_ama.merge(mortality, on="hadm_id", how="outer"),
+        on="hadm_id",
+        how="left",
+    )
+    final = final.merge(code_status, on="hadm_id", how="left")
+    final["code_status_dnr_dni_cmo"] = pd.to_numeric(
+        final["code_status_dnr_dni_cmo"],
+        errors="coerce",
+    ).fillna(0).astype(int)
+
+    fill_zero_columns = [
+        "gender_f",
+        "gender_m",
+        "insurance_private",
+        "insurance_public",
+        "insurance_self_pay",
+        "left_ama",
+        "code_status_dnr_dni_cmo",
+        "in_hospital_mortality",
+    ]
+    if include_race:
+        fill_zero_columns.extend(
+            [
+                "race_white",
+                "race_black",
+                "race_asian",
+                "race_hispanic",
+                "race_native_american",
+                "race_other",
+            ]
+        )
+    for column in fill_zero_columns:
+        if column in final.columns:
+            final[column] = final[column].fillna(0).astype(int)
+
+    final = final.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return final.reset_index(drop=True)
+
+
+def build_final_model_table(  # pylint: disable=too-many-arguments,too-many-positional-arguments
+    demographics: pd.DataFrame,
+    all_cohort: pd.DataFrame,
+    admissions: pd.DataFrame,
+    chartevents: pd.DataFrame,
+    d_items: pd.DataFrame,
+    mistrust_scores: pd.DataFrame,
+    include_race: bool = True,
+    include_mistrust: bool = True,
+) -> pd.DataFrame:
+    """Assemble baseline, optional race, mistrust, and target columns."""
+    code_status = _build_code_status_target(chartevents, d_items)
+    return _assemble_final_model_table(
+        demographics=demographics,
+        all_cohort=all_cohort,
+        admissions=admissions,
+        code_status=code_status,
+        mistrust_scores=mistrust_scores,
+        include_race=include_race,
+        include_mistrust=include_mistrust,
+    )
+
+
+def build_final_model_table_from_code_status_targets(  # pylint: disable=too-many-arguments
+    demographics: pd.DataFrame,
+    all_cohort: pd.DataFrame,
+    admissions: pd.DataFrame,
+    code_status_targets: pd.DataFrame,
+    mistrust_scores: pd.DataFrame,
+    include_race: bool = True,
+    include_mistrust: bool = True,
+) -> pd.DataFrame:
+    """Assemble the final model table using precomputed code-status targets."""
+
+    return _assemble_final_model_table(
+        demographics=demographics,
+        all_cohort=all_cohort,
+        admissions=admissions,
+        code_status=code_status_targets,
+        mistrust_scores=mistrust_scores,
+        include_race=include_race,
+        include_mistrust=include_mistrust,
+    )
+
+
+def write_minimal_deliverables(artifacts: dict[str, pd.DataFrame], output_dir: Path | str) -> None:
+    """Write the required CSV deliverables to disk without index columns."""
+
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    filenames = {
+        "base_admissions": "base_admissions.csv",
+        "eol_cohort": "eol_cohort.csv",
+        "all_cohort": "all_cohort.csv",
+        "treatment_totals": "treatment_totals.csv",
+        "chartevent_feature_matrix": "chartevent_feature_matrix.csv",
+        "note_labels": "note_labels.csv",
+        "mistrust_scores": "mistrust_scores.csv",
+        "acuity_scores": "acuity_scores.csv",
+        "final_model_table": "final_model_table.csv",
+    }
+
+    for key, filename in filenames.items():
+        df = artifacts[key].copy()
+        if "hadm_id" in df.columns:
+            df = df.sort_values("hadm_id")
+        df.to_csv(output_path / filename, index=False)
+
+
+def validate_database_environment(  # pylint: disable=too-many-locals
+    raw_tables: Mapping[str, pd.DataFrame],
+    materialized_views: Mapping[str, pd.DataFrame],
+    schema_name: str | None = None,
+    database_flavor: str | None = None,
+) -> dict[str, object]:
+    """Validate that the loaded MIMIC environment supports the full pipeline."""
+
+    resolved_schema, resolved_flavor = _validate_database_identity(
+        schema_name=schema_name,
+        database_flavor=database_flavor,
+    )
+    _validate_required_inputs(raw_tables, materialized_views)
+
+    admissions = raw_tables["admissions"]
+    patients = raw_tables["patients"]
+    icustays = raw_tables["icustays"]
+    noteevents = raw_tables["noteevents"]
+    chartevents = raw_tables["chartevents"]
+    d_items = raw_tables["d_items"]
+    ventdurations = materialized_views["ventdurations"]
+    vasopressordurations = materialized_views["vasopressordurations"]
+
+    available_join_keys = _collect_required_join_keys(raw_tables)
+    missing_keys = sorted(REQUIRED_JOIN_KEYS - available_join_keys)
+    if missing_keys:
+        raise ValueError("Missing required join keys: " + ", ".join(missing_keys))
+
+    base = build_base_admissions(admissions, patients)
+    if len(base) <= 50000:
+        raise ValueError(
+            "Base admissions after admissions-patients join and "
+            "has_chartevents_data filter must exceed 50,000 rows."
+        )
+
+    if base["subject_id"].isna().any():
+        raise ValueError(
+            "Base admissions contains null subject_id values after "
+            "admissions-patients join."
+        )
+    if base["hadm_id"].isna().any():
+        raise ValueError("Base admissions contains null hadm_id values.")
+    if icustays["hadm_id"].isna().any() or icustays["icustay_id"].isna().any():
+        raise ValueError(
+            "icustays must provide non-null hadm_id and icustay_id for ICU bridging."
+        )
+    _validate_text_access(noteevents, chartevents)
+
+    icu_bridge = icustays[["icustay_id", "hadm_id"]].drop_duplicates()
+    _validate_bridge_join(
+        ventdurations,
+        icu_bridge,
+        "icustay_id",
+        "ventdurations must join to icustays through icustay_id.",
+    )
+    _validate_bridge_join(
+        vasopressordurations,
+        icu_bridge,
+        "icustay_id",
+        "vasopressordurations must join to icustays through icustay_id.",
+    )
+    _validate_bridge_join(
+        chartevents,
+        d_items[["itemid"]].drop_duplicates(),
+        "itemid",
+        "chartevents must join to d_items through itemid.",
+    )
+
+    acuity = build_acuity_scores(
+        materialized_views["oasis"],
+        materialized_views["sapsii"],
+    )
+    if acuity.empty:
+        raise ValueError(
+            "oasis and sapsii must join back to admissions on hadm_id "
+            "and yield admission-level acuity rows."
+        )
+
+    supports_multiple_icustays = bool(
+        icustays.groupby("hadm_id")["icustay_id"].nunique().gt(1).any()
+    )
+
+    return {
+        "database_flavor": resolved_flavor,
+        "schema_name": resolved_schema,
+        "base_admissions_rows": int(len(base)),
+        "raw_tables": sorted(raw_tables.keys()),
+        "materialized_views": sorted(materialized_views.keys()),
+        "supports_multiple_icustays_per_hadm": supports_multiple_icustays,
+    }
diff --git a/pyhealth/models/eol_mistrust.py b/pyhealth/models/eol_mistrust.py
new file mode 100644
index 000000000..6ba40bba8
--- /dev/null
+++ b/pyhealth/models/eol_mistrust.py
@@ -0,0 +1,1235 @@
+"""Modeling utilities for the EOL mistrust study pipeline.
+
+This module implements the model-facing pieces of the EOL mistrust workflow:
+
+1. three admission-level mistrust metrics
+2. feature-weight summaries for the two proxy logistic models
+3. race-gap, treatment-disparity, and acuity-control analyses
+4. downstream repeated-split prediction experiments
+"""
+
+from __future__ import annotations
+
+import importlib
+from collections import OrderedDict
+from itertools import combinations
+from typing import Callable, Iterable, Mapping, Sequence
+
+import numpy as np
+import pandas as pd
+from pyhealth.tasks.eol_mistrust import get_eol_mistrust_task_map
+
+try:
+    from scipy.stats import mannwhitneyu, pearsonr  # type: ignore
+except ModuleNotFoundError:  # pragma: no cover
+    mannwhitneyu = None
+    pearsonr = None
+
+try:
+    from sklearn.linear_model import LogisticRegression  # pylint: disable=import-error
+    from sklearn.metrics import roc_auc_score  # pylint: disable=import-error
+    from sklearn.model_selection import train_test_split  # pylint: disable=import-error
+except ModuleNotFoundError:  # pragma: no cover
+    class LogisticRegression:  # type: ignore[no-redef]
+        """Fallback estimator preserving the sklearn constructor surface."""
+
+        def __init__(self, *args, **kwargs):
+            self.args = args
+            self.kwargs = kwargs
+
+        def fit(self, features, labels):
+            del features, labels
+            raise ModuleNotFoundError(
+                "scikit-learn is required for EOL mistrust model fitting."
+            )
+
+        def predict_proba(self, features):
+            del features
+            raise ModuleNotFoundError(
+                "scikit-learn is required for EOL mistrust model inference."
+            )
+
+    def train_test_split(*args, **kwargs):  # type: ignore[no-redef]
+        del args, kwargs
+        raise ModuleNotFoundError(
+            "scikit-learn is required for downstream evaluation splits."
+        )
+
+    def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
+        del args, kwargs
+        raise ModuleNotFoundError(
+            "scikit-learn is required for downstream AUC evaluation."
+        )
+
+
+RACE_WHITE = "WHITE"
+RACE_BLACK = "BLACK"
+
+MISTRUST_SCORE_COLUMNS = [
+    "noncompliance_score_z",
+    "autopsy_score_z",
+    "negative_sentiment_score_z",
+]
+
+BASELINE_FEATURE_COLUMNS = [
+    "age",
+    "los_days",
+    "gender_f",
+    "gender_m",
+    "insurance_private",
+    "insurance_public",
+    "insurance_self_pay",
+]
+
+RACE_FEATURE_COLUMNS = [
+    "race_white",
+    "race_black",
+    "race_asian",
+    "race_hispanic",
+    "race_native_american",
+    "race_other",
+]
+
+DOWNSTREAM_TASK_MAP = get_eol_mistrust_task_map()
+
+DOWNSTREAM_FEATURE_CONFIGS = OrderedDict(
+    [
+        ("Baseline", list(BASELINE_FEATURE_COLUMNS)),
+        ("Baseline + Race", list(BASELINE_FEATURE_COLUMNS + RACE_FEATURE_COLUMNS)),
+        ("Baseline + Noncompliant", list(BASELINE_FEATURE_COLUMNS + ["noncompliance_score_z"])),
+        ("Baseline + Autopsy", list(BASELINE_FEATURE_COLUMNS + ["autopsy_score_z"])),
+        (
+            "Baseline + Neg-Sentiment",
+            list(BASELINE_FEATURE_COLUMNS + ["negative_sentiment_score_z"]),
+        ),
+        (
+            "Baseline + ALL",
+            list(BASELINE_FEATURE_COLUMNS + RACE_FEATURE_COLUMNS + MISTRUST_SCORE_COLUMNS),
+        ),
+    ]
+)
+
+
+_SENTIMENT_BACKEND: Callable[[str], tuple[float, float]] | None = None
+
+
+def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
+    """Load a transformers sentiment pipeline, preferring GPU when available."""
+
+    transformers_module = importlib.import_module("transformers")
+    torch_module = importlib.import_module("torch")
+
+    pipeline_factory = getattr(transformers_module, "pipeline", None)
+    if not callable(pipeline_factory):
+        raise ModuleNotFoundError("transformers.pipeline is unavailable in the current environment.")
+
+    try:  # pragma: no cover - logging surface depends on transformers version
+        transformers_logging = importlib.import_module("transformers.utils.logging")
+        set_verbosity_error = getattr(transformers_logging, "set_verbosity_error", None)
+        if callable(set_verbosity_error):
+            set_verbosity_error()
+    except Exception:
+        pass
+
+    use_cuda = bool(getattr(torch_module, "cuda", None) and torch_module.cuda.is_available())
+    device = 0 if use_cuda else -1
+    classifier = pipeline_factory(
+        "sentiment-analysis",
+        model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+        device=device,
+    )
+
+    def _transformers_sentiment(text: str) -> tuple[float, float]:
+        cleaned = " ".join(str(text).split())
+        if not cleaned:
+            return (0.0, 0.0)
+        result = classifier(cleaned[:2048], truncation=True)[0]
+        label = str(result.get("label", "")).upper()
+        score = float(result.get("score", 0.0))
+        polarity = score if "POS" in label else -score
+        return (polarity, 0.0)
+
+    return _transformers_sentiment
+
+
+def _default_sentiment_backend(text: str) -> tuple[float, float]:
+    """Resolve and cache the default transformers sentiment backend lazily."""
+
+    global _SENTIMENT_BACKEND
+    if _SENTIMENT_BACKEND is None:
+        _SENTIMENT_BACKEND = _load_transformers_sentiment()
+    return _SENTIMENT_BACKEND(text)
+
+
+pattern_sentiment = _default_sentiment_backend
+
+
+def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
+    missing = [column for column in required if column not in df.columns]
+    if missing:
+        missing_str = ", ".join(missing)
+        raise ValueError(f"{df_name} is missing required columns: {missing_str}")
+
+
+def _prepare_note_text_for_sentiment(text) -> str:
+    if text is None or (isinstance(text, float) and pd.isna(text)):
+        return ""
+    return " ".join(str(text).split())
+
+
+def _default_estimator_factory() -> object:
+    return LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000)
+
+
+def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
+    """Validate predict_proba output and return the positive-class column."""
+
+    probability_array = np.asarray(probabilities, dtype=float)
+    if probability_array.ndim != 2 or probability_array.shape[1] < 2:
+        raise IndexError(
+            "Estimator `predict_proba` output must have shape (n_samples, n_classes>=2)."
+        )
+    return probability_array[:, 1]
+
+
+def _score_column_name(label_column: str) -> str:
+    if label_column.endswith("_label"):
+        return f"{label_column[:-6]}_score"
+    return f"{label_column}_score"
+
+
+def _iter_downstream_jobs(
+    final_model_table: pd.DataFrame,
+    feature_configurations: Mapping[str, Sequence[str]] | None = None,
+    task_map: Mapping[str, str] | None = None,
+):
+    """Yield prepared downstream task/config jobs in stable order."""
+
+    _require_columns(final_model_table, ["hadm_id"], "final_model_table")
+    if feature_configurations is None:
+        configs = get_downstream_feature_configurations()
+    else:
+        configs = OrderedDict(
+            (name, list(columns)) for name, columns in feature_configurations.items()
+        )
+    tasks = get_downstream_task_map() if task_map is None else OrderedDict(task_map)
+
+    for task_name, target_column in tasks.items():
+        _require_columns(final_model_table, [target_column], "final_model_table")
+        for config_name, feature_columns in configs.items():
+            _require_columns(final_model_table, feature_columns, "final_model_table")
+            usable = final_model_table[["hadm_id", target_column, *feature_columns]].dropna().copy()
+            usable = usable.sort_values("hadm_id").reset_index(drop=True)
+            y = pd.to_numeric(usable[target_column], errors="coerce")
+            X = usable[feature_columns]
+            yield task_name, target_column, config_name, feature_columns, usable, X, y
+
+
+def _prepare_proxy_training_frame(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    label_column: str,
+) -> tuple[pd.DataFrame, list[str]]:
+    _require_columns(feature_matrix, ["hadm_id"], "feature_matrix")
+    _require_columns(note_labels, ["hadm_id", label_column], "note_labels")
+
+    feature_columns = [column for column in feature_matrix.columns if column != "hadm_id"]
+    merged = feature_matrix.merge(
+        note_labels[["hadm_id", label_column]],
+        on="hadm_id",
+        how="inner",
+        validate="one_to_one",
+    ).sort_values("hadm_id")
+    return merged.reset_index(drop=True), feature_columns
+
+
+def _make_metric_result(
+    left: pd.Series,
+    right: pd.Series,
+) -> tuple[float, float, float, float, int, int]:
+    left = pd.to_numeric(left, errors="coerce").dropna().astype(float)
+    right = pd.to_numeric(right, errors="coerce").dropna().astype(float)
+    if left.empty or right.empty:
+        return float("nan"), float("nan"), float("nan"), float("nan"), len(left), len(right)
+
+    left_median = float(left.median())
+    right_median = float(right.median())
+
+    if mannwhitneyu is None:  # pragma: no cover
+        statistic = float("nan")
+        pvalue = float("nan")
+    else:
+        result = mannwhitneyu(left, right, alternative="two-sided")
+        statistic = float(result.statistic)
+        pvalue = float(result.pvalue)
+
+    return statistic, pvalue, left_median, right_median, len(left), len(right)
+
+
+def _pearson_with_pvalue(left: pd.Series, right: pd.Series) -> tuple[float, float, int]:
+    frame = pd.DataFrame({"left": left, "right": right}).dropna()
+    if len(frame) < 2:
+        return float("nan"), float("nan"), len(frame)
+
+    if pearsonr is not None:  # pragma: no branch
+        corr, pvalue = pearsonr(frame["left"], frame["right"])
+        return float(corr), float(pvalue), len(frame)
+
+    corr = float(frame["left"].corr(frame["right"], method="pearson"))
+    return corr, float("nan"), len(frame)
+
+
+def build_empirical_cdf_curve(values: Iterable[float]) -> pd.DataFrame:
+    """Build a plot-ready empirical CDF curve from numeric values."""
+
+    series = pd.to_numeric(pd.Series(list(values)), errors="coerce").dropna().astype(float)
+    series = series.sort_values().reset_index(drop=True)
+    if series.empty:
+        return pd.DataFrame(columns=["x", "cdf"])
+    cdf = (np.arange(1, len(series) + 1) / len(series)).astype(float)
+    return pd.DataFrame({"x": series, "cdf": cdf})
+
+
+def get_downstream_feature_configurations() -> OrderedDict[str, list[str]]:
+    """Return the six required downstream feature configurations."""
+
+    return OrderedDict((name, list(columns)) for name, columns in DOWNSTREAM_FEATURE_CONFIGS.items())
+
+
+def get_downstream_task_map() -> OrderedDict[str, str]:
+    """Return the three required downstream prediction targets."""
+
+    return OrderedDict(DOWNSTREAM_TASK_MAP)
+
+
+def fit_proxy_mistrust_model(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    label_column: str,
+    estimator_factory: Callable[[], object] | None = None,
+):
+    """Fit the L1 logistic proxy model on the full ALL cohort."""
+
+    merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+    estimator.fit(merged[feature_columns], merged[label_column].astype(int))
+    return estimator
+
+
+def build_proxy_probability_scores(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    label_column: str,
+    estimator_factory: Callable[[], object] | None = None,
+) -> pd.DataFrame:
+    """Fit a proxy logistic model and return positive-class probabilities."""
+
+    merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+    estimator.fit(merged[feature_columns], merged[label_column].astype(int))
+    probabilities = estimator.predict_proba(merged[feature_columns])
+    positive_class = _extract_positive_class_probabilities(probabilities)
+
+    scores = pd.DataFrame(
+        {
+            "hadm_id": merged["hadm_id"],
+            _score_column_name(label_column): positive_class.astype(float),
+        }
+    )
+    return scores.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+
+
+def build_noncompliance_mistrust_scores(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+) -> pd.DataFrame:
+    return build_proxy_probability_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column="noncompliance_label",
+        estimator_factory=estimator_factory,
+    )
+
+
+def build_autopsy_mistrust_scores(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+) -> pd.DataFrame:
+    return build_proxy_probability_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column="autopsy_label",
+        estimator_factory=estimator_factory,
+    )
+
+
+def build_negative_sentiment_mistrust_scores(
+    note_corpus: pd.DataFrame,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+) -> pd.DataFrame:
+    """Compute negative-sentiment mistrust from whitespace-tokenized note text."""
+
+    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
+    scorer = pattern_sentiment if sentiment_fn is None else sentiment_fn
+
+    cleaned = note_corpus.copy()
+    cleaned["note_text"] = cleaned["note_text"].map(_prepare_note_text_for_sentiment)
+    cleaned["negative_sentiment_score"] = cleaned["note_text"].map(
+        lambda text: float(-1.0 * scorer(text)[0])
+    )
+    return cleaned[["hadm_id", "negative_sentiment_score"]].sort_values("hadm_id").reset_index(drop=True)
+
+
+def z_normalize_scores(
+    score_table: pd.DataFrame,
+    columns: Sequence[str] | None = None,
+) -> pd.DataFrame:
+    """Apply independent z-score normalization to the requested score columns."""
+
+    _require_columns(score_table, ["hadm_id"], "score_table")
+    normalized = score_table.copy()
+    if columns is None:
+        score_columns = [column for column in normalized.columns if column != "hadm_id"]
+    else:
+        score_columns = list(columns)
+
+    for column in score_columns:
+        _require_columns(normalized, [column], "score_table")
+        values = pd.to_numeric(normalized[column], errors="coerce").astype(float)
+        mean = float(values.mean())
+        std = float(values.std(ddof=0))
+        if pd.isna(std) or std == 0:
+            normalized[column] = 0.0
+        else:
+            normalized[column] = (values - mean) / std
+    return normalized
+
+
+def build_mistrust_score_table(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    note_corpus: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+) -> pd.DataFrame:
+    """Build the three normalized mistrust metrics."""
+
+    _require_columns(note_labels, ["hadm_id", "noncompliance_label", "autopsy_label"], "note_labels")
+    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
+
+    noncompliance = build_noncompliance_mistrust_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        estimator_factory=estimator_factory,
+    )
+    autopsy = build_autopsy_mistrust_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        estimator_factory=estimator_factory,
+    )
+    sentiment = build_negative_sentiment_mistrust_scores(
+        note_corpus=note_corpus,
+        sentiment_fn=sentiment_fn,
+    )
+
+    merged = (
+        noncompliance.merge(autopsy, on="hadm_id", how="inner", validate="one_to_one")
+        .merge(sentiment, on="hadm_id", how="inner", validate="one_to_one")
+        .sort_values("hadm_id")
+    )
+
+    normalized = z_normalize_scores(
+        merged,
+        columns=["noncompliance_score", "autopsy_score", "negative_sentiment_score"],
+    )
+    normalized = normalized.rename(
+        columns={
+            "noncompliance_score": "noncompliance_score_z",
+            "autopsy_score": "autopsy_score_z",
+            "negative_sentiment_score": "negative_sentiment_score_z",
+        }
+    )
+    return normalized.reset_index(drop=True)
+
+
+def summarize_feature_weights(
+    estimator,
+    feature_columns: Sequence[str],
+    top_n: int = 10,
+) -> dict[str, pd.DataFrame]:
+    """Summarize model coefficients into positive and negative rankings."""
+
+    if not hasattr(estimator, "coef_"):
+        raise ValueError("Estimator must expose `coef_` for weight summarization.")
+    coefficients = np.asarray(estimator.coef_)
+    if coefficients.ndim != 2 or coefficients.shape[0] == 0:
+        raise ValueError("Estimator `coef_` must have shape (n_classes, n_features).")
+    weights = coefficients[0]
+    if len(weights) != len(feature_columns):
+        raise ValueError("Feature columns must align with estimator coefficients.")
+
+    summary = pd.DataFrame({"feature": list(feature_columns), "weight": weights.astype(float)})
+    summary = summary.sort_values(["weight", "feature"], ascending=[False, True]).reset_index(drop=True)
+    positive = summary.head(top_n).reset_index(drop=True)
+    negative = summary.sort_values(["weight", "feature"], ascending=[True, True]).head(top_n).reset_index(drop=True)
+    return {"all": summary, "positive": positive, "negative": negative}
+
+
+def build_proxy_feature_weight_summary(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    label_column: str,
+    estimator_factory: Callable[[], object] | None = None,
+    top_n: int = 10,
+) -> dict[str, pd.DataFrame]:
+    """Fit a proxy model and summarize the learned coefficient weights."""
+
+    merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+    estimator.fit(merged[feature_columns], merged[label_column].astype(int))
+    return summarize_feature_weights(estimator, feature_columns, top_n=top_n)
+
+
+def build_noncompliance_feature_weight_summary(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+    top_n: int = 10,
+) -> dict[str, pd.DataFrame]:
+    return build_proxy_feature_weight_summary(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column="noncompliance_label",
+        estimator_factory=estimator_factory,
+        top_n=top_n,
+    )
+
+
+def build_autopsy_feature_weight_summary(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+    top_n: int = 10,
+) -> dict[str, pd.DataFrame]:
+    return build_proxy_feature_weight_summary(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column="autopsy_label",
+        estimator_factory=estimator_factory,
+        top_n=top_n,
+    )
+
+
+def run_race_gap_analysis(
+    mistrust_scores: pd.DataFrame,
+    demographics: pd.DataFrame,
+    score_columns: Sequence[str] | None = None,
+    race_column: str = "race",
+) -> pd.DataFrame:
+    """Compare White and Black mistrust score distributions by Mann-Whitney U."""
+
+    _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
+    _require_columns(demographics, ["hadm_id", race_column], "demographics")
+
+    columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
+    _require_columns(mistrust_scores, columns, "mistrust_scores")
+
+    merged = mistrust_scores.merge(
+        demographics[["hadm_id", race_column]],
+        on="hadm_id",
+        how="inner",
+        validate="one_to_one",
+    )
+    merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
+
+    rows: list[dict[str, float | int | str | bool]] = []
+    for column in columns:
+        black = merged.loc[merged[race_column] == RACE_BLACK, column]
+        white = merged.loc[merged[race_column] == RACE_WHITE, column]
+        statistic, pvalue, median_black, median_white, n_black, n_white = _make_metric_result(
+            black, white
+        )
+        rows.append(
+            {
+                "metric": column,
+                "n_black": n_black,
+                "n_white": n_white,
+                "median_black": median_black,
+                "median_white": median_white,
+                "median_gap_black_minus_white": median_black - median_white
+                if not (pd.isna(median_black) or pd.isna(median_white))
+                else float("nan"),
+                "statistic": statistic,
+                "pvalue": pvalue,
+                "black_median_higher": bool(
+                    not (pd.isna(median_black) or pd.isna(median_white))
+                    and median_black > median_white
+                ),
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def run_race_based_treatment_analysis(
+    eol_cohort: pd.DataFrame,
+    treatment_totals: pd.DataFrame,
+    race_column: str = "race",
+    treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
+) -> pd.DataFrame:
+    """Compare Black and White treatment durations within the EOL cohort."""
+
+    _require_columns(eol_cohort, ["hadm_id", race_column], "eol_cohort")
+    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+
+    merged = eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+    merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
+
+    rows: list[dict[str, float | int | str]] = []
+    for column in treatment_columns:
+        usable = merged.loc[merged[column].notna()].copy()
+        black = usable.loc[usable[race_column] == RACE_BLACK, column]
+        white = usable.loc[usable[race_column] == RACE_WHITE, column]
+        statistic, pvalue, median_black, median_white, n_black, n_white = _make_metric_result(
+            black, white
+        )
+        rows.append(
+            {
+                "treatment": column,
+                "n_black": n_black,
+                "n_white": n_white,
+                "median_black": median_black,
+                "median_white": median_white,
+                "median_gap_black_minus_white": median_black - median_white
+                if not (pd.isna(median_black) or pd.isna(median_white))
+                else float("nan"),
+                "statistic": statistic,
+                "pvalue": pvalue,
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def build_race_based_treatment_cdf_plot_data(
+    eol_cohort: pd.DataFrame,
+    treatment_totals: pd.DataFrame,
+    race_column: str = "race",
+    treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
+) -> dict[str, pd.DataFrame]:
+    """Build plot-ready CDF curves and median markers for race-based treatment analysis."""
+
+    _require_columns(eol_cohort, ["hadm_id", race_column], "eol_cohort")
+    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+
+    merged = eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+    merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
+
+    curves: list[dict[str, float | str]] = []
+    medians: list[dict[str, float | str]] = []
+    for treatment in treatment_columns:
+        usable = merged.loc[merged[treatment].notna()].copy()
+        for race_value, label in ((RACE_WHITE, "White"), (RACE_BLACK, "Black")):
+            values = usable.loc[usable[race_column] == race_value, treatment]
+            cdf = build_empirical_cdf_curve(values)
+            for row in cdf.itertuples(index=False):
+                curves.append(
+                    {
+                        "treatment": treatment,
+                        "group": label,
+                        "x": float(row.x),
+                        "cdf": float(row.cdf),
+                    }
+                )
+            median = pd.to_numeric(values, errors="coerce").dropna().astype(float).median()
+            medians.append(
+                {
+                    "treatment": treatment,
+                    "group": label,
+                    "median": float(median) if not pd.isna(median) else float("nan"),
+                    "line_style": "dotted",
+                }
+            )
+    return {"curves": pd.DataFrame(curves), "medians": pd.DataFrame(medians)}
+
+
+def run_trust_based_treatment_analysis(
+    eol_cohort: pd.DataFrame,
+    mistrust_scores: pd.DataFrame,
+    treatment_totals: pd.DataFrame,
+    score_columns: Sequence[str] | None = None,
+    treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
+    group_sizes: Mapping[str, int] | None = None,
+    race_column: str = "race",
+) -> pd.DataFrame:
+    """Compare high-vs-low mistrust groups on treatment duration within EOL."""
+
+    _require_columns(eol_cohort, ["hadm_id"], "eol_cohort")
+    _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
+    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+
+    columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
+    _require_columns(mistrust_scores, columns, "mistrust_scores")
+
+    merged = (
+        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+        .merge(mistrust_scores[["hadm_id", *columns]], on="hadm_id", how="inner", validate="one_to_one")
+    )
+    groups = dict(group_sizes or {})
+
+    if race_column in merged.columns:
+        race_based = run_race_based_treatment_analysis(
+            eol_cohort=eol_cohort,
+            treatment_totals=treatment_totals,
+            race_column=race_column,
+            treatment_columns=treatment_columns,
+        )
+        for row in race_based.itertuples(index=False):
+            groups.setdefault(str(row.treatment), int(row.n_black))
+
+    rows: list[dict[str, float | int | str]] = []
+    for treatment in treatment_columns:
+        for metric in columns:
+            usable = merged.loc[merged[treatment].notna() & merged[metric].notna()].copy()
+            usable = usable.sort_values([metric, "hadm_id"], ascending=[False, True]).reset_index(drop=True)
+            group_size = int(groups.get(treatment, 0))
+
+            if group_size <= 0 or group_size >= len(usable):
+                rows.append(
+                    {
+                        "metric": metric,
+                        "treatment": treatment,
+                        "stratification_n": group_size,
+                        "n_high": min(group_size, len(usable)),
+                        "n_low": max(len(usable) - group_size, 0),
+                        "median_high": float("nan"),
+                        "median_low": float("nan"),
+                        "median_gap": float("nan"),
+                        "statistic": float("nan"),
+                        "pvalue": float("nan"),
+                    }
+                )
+                continue
+
+            high = usable.iloc[:group_size][treatment]
+            low = usable.iloc[group_size:][treatment]
+            statistic, pvalue, median_high, median_low, n_high, n_low = _make_metric_result(
+                high, low
+            )
+            rows.append(
+                {
+                    "metric": metric,
+                    "treatment": treatment,
+                    "stratification_n": group_size,
+                    "n_high": n_high,
+                    "n_low": n_low,
+                    "median_high": median_high,
+                    "median_low": median_low,
+                    "median_gap": median_high - median_low
+                    if not (pd.isna(median_high) or pd.isna(median_low))
+                    else float("nan"),
+                    "statistic": statistic,
+                    "pvalue": pvalue,
+                }
+            )
+    return pd.DataFrame(rows)
+
+
+def build_trust_based_treatment_cdf_plot_data(
+    eol_cohort: pd.DataFrame,
+    mistrust_scores: pd.DataFrame,
+    treatment_totals: pd.DataFrame,
+    score_columns: Sequence[str] | None = None,
+    treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
+    group_sizes: Mapping[str, int] | None = None,
+    race_column: str = "race",
+) -> dict[str, pd.DataFrame]:
+    """Build plot-ready CDF curves and median markers for trust-based treatment analysis."""
+
+    _require_columns(eol_cohort, ["hadm_id"], "eol_cohort")
+    _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
+    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+
+    columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
+    _require_columns(mistrust_scores, columns, "mistrust_scores")
+
+    merged = (
+        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+        .merge(
+            mistrust_scores[["hadm_id", *columns]],
+            on="hadm_id",
+            how="inner",
+            validate="one_to_one",
+        )
+    )
+    groups = dict(group_sizes or {})
+    if race_column in merged.columns:
+        race_based = run_race_based_treatment_analysis(
+            eol_cohort=eol_cohort,
+            treatment_totals=treatment_totals,
+            race_column=race_column,
+            treatment_columns=treatment_columns,
+        )
+        for row in race_based.itertuples(index=False):
+            groups.setdefault(str(row.treatment), int(row.n_black))
+
+    curves: list[dict[str, float | str]] = []
+    medians: list[dict[str, float | str]] = []
+    for treatment in treatment_columns:
+        for metric in columns:
+            usable = merged.loc[merged[treatment].notna() & merged[metric].notna()].copy()
+            usable = usable.sort_values([metric, "hadm_id"], ascending=[False, True]).reset_index(drop=True)
+            group_size = int(groups.get(treatment, 0))
+            if group_size <= 0 or group_size >= len(usable):
+                continue
+
+            grouped_values = {
+                "High Mistrust": usable.iloc[:group_size][treatment],
+                "Low Mistrust": usable.iloc[group_size:][treatment],
+            }
+            for label, values in grouped_values.items():
+                cdf = build_empirical_cdf_curve(values)
+                for row in cdf.itertuples(index=False):
+                    curves.append(
+                        {
+                            "metric": metric,
+                            "treatment": treatment,
+                            "group": label,
+                            "x": float(row.x),
+                            "cdf": float(row.cdf),
+                        }
+                    )
+                median = pd.to_numeric(values, errors="coerce").dropna().astype(float).median()
+                medians.append(
+                    {
+                        "metric": metric,
+                        "treatment": treatment,
+                        "group": label,
+                        "median": float(median) if not pd.isna(median) else float("nan"),
+                        "line_style": "dotted",
+                    }
+                )
+    return {"curves": pd.DataFrame(curves), "medians": pd.DataFrame(medians)}
+
+
+def run_acuity_control_analysis(
+    mistrust_scores: pd.DataFrame,
+    acuity_scores: pd.DataFrame,
+    score_columns: Sequence[str] | None = None,
+    acuity_columns: Sequence[str] = ("oasis", "sapsii"),
+) -> pd.DataFrame:
+    """Compute pairwise Pearson correlations across mistrust and acuity scores."""
+
+    _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
+    _require_columns(acuity_scores, ["hadm_id", *acuity_columns], "acuity_scores")
+
+    columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
+    _require_columns(mistrust_scores, columns, "mistrust_scores")
+
+    merged = mistrust_scores.merge(
+        acuity_scores[["hadm_id", *acuity_columns]],
+        on="hadm_id",
+        how="inner",
+        validate="one_to_one",
+    )
+
+    analysis_columns = columns + list(acuity_columns)
+    rows: list[dict[str, float | int | str]] = []
+    for left, right in combinations(analysis_columns, 2):
+        correlation, pvalue, n = _pearson_with_pvalue(merged[left], merged[right])
+        rows.append(
+            {
+                "feature_a": left,
+                "feature_b": right,
+                "correlation": correlation,
+                "pvalue": pvalue,
+                "n": n,
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def evaluate_downstream_average_weights(
+    final_model_table: pd.DataFrame,
+    feature_configurations: Mapping[str, Sequence[str]] | None = None,
+    task_map: Mapping[str, str] | None = None,
+    estimator_factory: Callable[[], object] | None = None,
+    split_fn: Callable[..., tuple] | None = None,
+    repetitions: int = 100,
+    test_size: float = 0.4,
+) -> pd.DataFrame:
+    """Average downstream regularized model weights across repeated 60/40 splits."""
+
+    splitter = train_test_split if split_fn is None else split_fn
+    rows: list[dict[str, float | int | str]] = []
+
+    for task_name, target_column, config_name, feature_columns, usable, X, y in _iter_downstream_jobs(
+        final_model_table,
+        feature_configurations=feature_configurations,
+        task_map=task_map,
+    ):
+        collected_weights: list[np.ndarray] = []
+
+        for random_state in range(repetitions):
+            if usable.empty or y.nunique(dropna=True) < 2:
+                continue
+
+            X_train, X_test, y_train, y_test = splitter(
+                X,
+                y,
+                test_size=test_size,
+                random_state=random_state,
+            )
+            del X_test  # coefficients come from the fitted train-side model only
+            y_train = pd.Series(y_train)
+            y_test = pd.Series(y_test)
+            if y_train.nunique(dropna=True) < 2 or y_test.nunique(dropna=True) < 2:
+                continue
+
+            estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+            estimator.fit(X_train, y_train.astype(int))
+            coefficients = np.asarray(getattr(estimator, "coef_", None), dtype=float)
+            if coefficients.ndim != 2 or coefficients.shape[0] == 0:
+                raise ValueError(
+                    "Downstream estimator must expose `coef_` with shape (n_classes, n_features)."
+                )
+            weights = coefficients[0]
+            if len(weights) != len(feature_columns):
+                raise ValueError("Downstream feature columns must align with estimator coefficients.")
+            collected_weights.append(weights.astype(float))
+
+        if collected_weights:
+            weight_matrix = np.vstack(collected_weights)
+            weight_mean = weight_matrix.mean(axis=0)
+            weight_std = weight_matrix.std(axis=0, ddof=0)
+            n_valid = weight_matrix.shape[0]
+        else:
+            weight_mean = np.full(len(feature_columns), np.nan, dtype=float)
+            weight_std = np.full(len(feature_columns), np.nan, dtype=float)
+            n_valid = 0
+
+        for index, feature in enumerate(feature_columns):
+            rows.append(
+                {
+                    "task": task_name,
+                    "configuration": config_name,
+                    "target_column": target_column,
+                    "feature": feature,
+                    "n_repeats": int(repetitions),
+                    "n_valid_weights": int(n_valid),
+                    "weight_mean": float(weight_mean[index]) if not np.isnan(weight_mean[index]) else float("nan"),
+                    "weight_std": float(weight_std[index]) if not np.isnan(weight_std[index]) else float("nan"),
+                }
+            )
+
+    return pd.DataFrame(rows)
+
+
+def evaluate_downstream_predictions(
+    final_model_table: pd.DataFrame,
+    feature_configurations: Mapping[str, Sequence[str]] | None = None,
+    task_map: Mapping[str, str] | None = None,
+    estimator_factory: Callable[[], object] | None = None,
+    split_fn: Callable[..., tuple] | None = None,
+    auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
+    repetitions: int = 100,
+    test_size: float = 0.4,
+) -> pd.DataFrame:
+    """Run repeated 60/40 downstream AUC evaluation across all tasks/configs."""
+
+    splitter = train_test_split if split_fn is None else split_fn
+    metric = roc_auc_score if auc_fn is None else auc_fn
+
+    rows: list[dict[str, float | int | str]] = []
+    for task_name, target_column, config_name, feature_columns, usable, X, y in _iter_downstream_jobs(
+        final_model_table,
+        feature_configurations=feature_configurations,
+        task_map=task_map,
+    ):
+        auc_values: list[float] = []
+
+        for random_state in range(repetitions):
+            if usable.empty or y.nunique(dropna=True) < 2:
+                auc_values.append(float("nan"))
+                continue
+
+            X_train, X_test, y_train, y_test = splitter(
+                X,
+                y,
+                test_size=test_size,
+                random_state=random_state,
+            )
+            y_train = pd.Series(y_train)
+            y_test = pd.Series(y_test)
+
+            if y_train.nunique(dropna=True) < 2 or y_test.nunique(dropna=True) < 2:
+                auc_values.append(float("nan"))
+                continue
+
+            estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+            estimator.fit(X_train, y_train.astype(int))
+            probabilities = estimator.predict_proba(X_test)
+            positive_class = _extract_positive_class_probabilities(probabilities)
+            auc_values.append(float(metric(y_test.astype(int), positive_class)))
+
+        auc_series = pd.Series(auc_values, dtype=float)
+        rows.append(
+            {
+                "task": task_name,
+                "configuration": config_name,
+                "target_column": target_column,
+                "n_rows": int(len(usable)),
+                "n_features": int(len(feature_columns)),
+                "n_repeats": int(repetitions),
+                "n_valid_auc": int(auc_series.notna().sum()),
+                "auc_mean": float(auc_series.mean()) if auc_series.notna().any() else float("nan"),
+                "auc_std": float(auc_series.std(ddof=0)) if auc_series.notna().any() else float("nan"),
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def plot_grouped_treatment_cdf(
+    curves: pd.DataFrame,
+    medians: pd.DataFrame,
+    group_column: str = "group",
+    x_column: str = "x",
+    y_column: str = "cdf",
+    median_column: str = "median",
+    ax=None,
+):
+    """Plot grouped empirical CDF curves with dotted median lines."""
+
+    try:
+        import matplotlib.pyplot as plt  # type: ignore
+    except ModuleNotFoundError as exc:  # pragma: no cover
+        raise ModuleNotFoundError("matplotlib is required for EOL mistrust CDF plotting.") from exc
+
+    if ax is None:
+        _, ax = plt.subplots()
+
+    ordered_curves = curves.copy()
+    if not ordered_curves.empty:
+        ordered_curves = ordered_curves.sort_values([group_column, x_column]).reset_index(drop=True)
+    for group_value, group_df in ordered_curves.groupby(group_column, sort=False):
+        ax.plot(group_df[x_column], group_df[y_column], label=str(group_value))
+
+    for row in medians.itertuples(index=False):
+        ax.axvline(
+            getattr(row, median_column),
+            linestyle=getattr(row, "line_style", "dotted"),
+            label=f"{getattr(row, group_column)} median",
+        )
+    return ax
+
+
+def run_full_eol_mistrust_modeling(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    note_corpus: pd.DataFrame,
+    demographics: pd.DataFrame | None = None,
+    eol_cohort: pd.DataFrame | None = None,
+    treatment_totals: pd.DataFrame | None = None,
+    acuity_scores: pd.DataFrame | None = None,
+    final_model_table: pd.DataFrame | None = None,
+    estimator_factory: Callable[[], object] | None = None,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+    split_fn: Callable[..., tuple] | None = None,
+    auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
+    repetitions: int = 100,
+    include_downstream_weight_summary: bool = False,
+    include_cdf_plot_data: bool = False,
+) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame]]:
+    """Run the end-to-end model-stage workflow and collect its outputs."""
+
+    mistrust_scores = build_mistrust_score_table(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+        estimator_factory=estimator_factory,
+        sentiment_fn=sentiment_fn,
+    )
+    outputs: dict[str, pd.DataFrame | dict[str, pd.DataFrame]] = {
+        "mistrust_scores": mistrust_scores,
+        "feature_weight_summaries": {
+            "noncompliance": build_noncompliance_feature_weight_summary(
+                feature_matrix=feature_matrix,
+                note_labels=note_labels,
+                estimator_factory=estimator_factory,
+            ),
+            "autopsy": build_autopsy_feature_weight_summary(
+                feature_matrix=feature_matrix,
+                note_labels=note_labels,
+                estimator_factory=estimator_factory,
+            ),
+        },
+    }
+
+    if demographics is not None:
+        outputs["race_gap_results"] = run_race_gap_analysis(mistrust_scores, demographics)
+
+    if eol_cohort is not None and treatment_totals is not None:
+        outputs["race_treatment_results"] = run_race_based_treatment_analysis(
+            eol_cohort=eol_cohort,
+            treatment_totals=treatment_totals,
+        )
+        outputs["trust_treatment_results"] = run_trust_based_treatment_analysis(
+            eol_cohort=eol_cohort,
+            mistrust_scores=mistrust_scores,
+            treatment_totals=treatment_totals,
+        )
+        if include_cdf_plot_data:
+            outputs["race_treatment_cdf_plot_data"] = build_race_based_treatment_cdf_plot_data(
+                eol_cohort=eol_cohort,
+                treatment_totals=treatment_totals,
+            )
+            outputs["trust_treatment_cdf_plot_data"] = build_trust_based_treatment_cdf_plot_data(
+                eol_cohort=eol_cohort,
+                mistrust_scores=mistrust_scores,
+                treatment_totals=treatment_totals,
+            )
+
+    if acuity_scores is not None:
+        outputs["acuity_correlations"] = run_acuity_control_analysis(
+            mistrust_scores=mistrust_scores,
+            acuity_scores=acuity_scores,
+        )
+
+    if final_model_table is not None:
+        downstream = final_model_table.copy()
+        if not set(MISTRUST_SCORE_COLUMNS).issubset(downstream.columns):
+            downstream = downstream.merge(mistrust_scores, on="hadm_id", how="left")
+        outputs["downstream_auc_results"] = evaluate_downstream_predictions(
+            final_model_table=downstream,
+            estimator_factory=estimator_factory,
+            split_fn=split_fn,
+            auc_fn=auc_fn,
+            repetitions=repetitions,
+        )
+        if include_downstream_weight_summary:
+            outputs["downstream_weight_results"] = evaluate_downstream_average_weights(
+                final_model_table=downstream,
+                estimator_factory=estimator_factory,
+                split_fn=split_fn,
+                repetitions=repetitions,
+            )
+
+    return outputs
+
+
+class EOLMistrustModel:
+    """Thin object wrapper around the functional EOL mistrust model pipeline."""
+
+    def __init__(
+        self,
+        estimator_factory: Callable[[], object] | None = None,
+        sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+        split_fn: Callable[..., tuple] | None = None,
+        auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
+        repetitions: int = 100,
+    ) -> None:
+        self.estimator_factory = estimator_factory
+        self.sentiment_fn = sentiment_fn
+        self.split_fn = split_fn
+        self.auc_fn = auc_fn
+        self.repetitions = repetitions
+
+    def build_mistrust_scores(
+        self,
+        feature_matrix: pd.DataFrame,
+        note_labels: pd.DataFrame,
+        note_corpus: pd.DataFrame,
+    ) -> pd.DataFrame:
+        return build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=self.estimator_factory,
+            sentiment_fn=self.sentiment_fn,
+        )
+
+    def evaluate_downstream(self, final_model_table: pd.DataFrame) -> pd.DataFrame:
+        return evaluate_downstream_predictions(
+            final_model_table=final_model_table,
+            estimator_factory=self.estimator_factory,
+            split_fn=self.split_fn,
+            auc_fn=self.auc_fn,
+            repetitions=self.repetitions,
+        )
+
+    def run(
+        self,
+        feature_matrix: pd.DataFrame,
+        note_labels: pd.DataFrame,
+        note_corpus: pd.DataFrame,
+        demographics: pd.DataFrame | None = None,
+        eol_cohort: pd.DataFrame | None = None,
+        treatment_totals: pd.DataFrame | None = None,
+        acuity_scores: pd.DataFrame | None = None,
+        final_model_table: pd.DataFrame | None = None,
+        include_downstream_weight_summary: bool = False,
+        include_cdf_plot_data: bool = False,
+    ) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame]]:
+        return run_full_eol_mistrust_modeling(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            demographics=demographics,
+            eol_cohort=eol_cohort,
+            treatment_totals=treatment_totals,
+            acuity_scores=acuity_scores,
+            final_model_table=final_model_table,
+            estimator_factory=self.estimator_factory,
+            sentiment_fn=self.sentiment_fn,
+            split_fn=self.split_fn,
+            auc_fn=self.auc_fn,
+            repetitions=self.repetitions,
+            include_downstream_weight_summary=include_downstream_weight_summary,
+            include_cdf_plot_data=include_cdf_plot_data,
+        )
+
+
+normalize_mistrust_scores = z_normalize_scores
+run_racial_gap_validation = run_race_gap_analysis
+run_acuity_correlation_analysis = run_acuity_control_analysis
+run_downstream_prediction_experiments = evaluate_downstream_predictions
+build_mistrust_metrics = build_mistrust_score_table
+
+
+__all__ = [
+    "BASELINE_FEATURE_COLUMNS",
+    "DOWNSTREAM_FEATURE_CONFIGS",
+    "DOWNSTREAM_TASK_MAP",
+    "EOLMistrustModel",
+    "MISTRUST_SCORE_COLUMNS",
+    "RACE_FEATURE_COLUMNS",
+    "build_autopsy_feature_weight_summary",
+    "build_autopsy_mistrust_scores",
+    "build_empirical_cdf_curve",
+    "build_mistrust_metrics",
+    "build_mistrust_score_table",
+    "build_negative_sentiment_mistrust_scores",
+    "build_noncompliance_feature_weight_summary",
+    "build_noncompliance_mistrust_scores",
+    "build_proxy_feature_weight_summary",
+    "build_proxy_probability_scores",
+    "build_race_based_treatment_cdf_plot_data",
+    "build_trust_based_treatment_cdf_plot_data",
+    "evaluate_downstream_average_weights",
+    "evaluate_downstream_predictions",
+    "fit_proxy_mistrust_model",
+    "get_downstream_feature_configurations",
+    "get_downstream_task_map",
+    "normalize_mistrust_scores",
+    "plot_grouped_treatment_cdf",
+    "run_acuity_control_analysis",
+    "run_acuity_correlation_analysis",
+    "run_downstream_prediction_experiments",
+    "run_full_eol_mistrust_modeling",
+    "run_race_based_treatment_analysis",
+    "run_race_gap_analysis",
+    "run_racial_gap_validation",
+    "run_trust_based_treatment_analysis",
+    "summarize_feature_weights",
+    "z_normalize_scores",
+]
diff --git a/pyhealth/tasks/eol_mistrust.py b/pyhealth/tasks/eol_mistrust.py
new file mode 100644
index 000000000..9451fd9e4
--- /dev/null
+++ b/pyhealth/tasks/eol_mistrust.py
@@ -0,0 +1,351 @@
+"""Task definitions and target helpers for the EOL mistrust workflow."""
+
+from __future__ import annotations
+
+import re
+from collections import OrderedDict
+from datetime import datetime
+from typing import Any, Dict, Iterable, List, Mapping, Sequence
+
+import pandas as pd
+
+from .base_task import BaseTask
+
+CODE_STATUS_ITEMIDS = {128, 223758}
+
+EOL_MISTRUST_TASK_MAP = OrderedDict(
+    [
+        ("Left AMA", "left_ama"),
+        ("Code Status", "code_status_dnr_dni_cmo"),
+        ("In-hospital mortality", "in_hospital_mortality"),
+    ]
+)
+
+
+def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
+    missing = [column for column in required if column not in df.columns]
+    if missing:
+        missing_str = ", ".join(missing)
+        raise ValueError(f"{df_name} is missing required columns: {missing_str}")
+
+
+def _normalize_token(value) -> str:
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return ""
+    value = str(value).strip().lower()
+    if not value:
+        return ""
+    value = re.sub(r"[^a-z0-9]+", "_", value)
+    value = re.sub(r"_+", "_", value)
+    return value.strip("_")
+
+
+def _to_datetime(value) -> pd.Timestamp:
+    return pd.to_datetime(value, errors="coerce")
+
+
+def _calculate_age_years(admittime, dob) -> float:
+    admit_time = _to_datetime(admittime)
+    birth_time = _to_datetime(dob)
+    if pd.isna(admit_time) or pd.isna(birth_time):
+        return float("nan")
+
+    seconds_per_year = 365.25 * 24 * 3600
+    age = (admit_time.to_pydatetime() - birth_time.to_pydatetime()).total_seconds() / seconds_per_year
+    return 90.0 if age > 200 else float(age)
+
+
+def _calculate_los_days(admittime, dischtime) -> float:
+    admit_time = _to_datetime(admittime)
+    discharge_time = _to_datetime(dischtime)
+    if pd.isna(admit_time) or pd.isna(discharge_time):
+        return float("nan")
+    return float((discharge_time - admit_time).total_seconds() / 86400.0)
+
+
+def map_ethnicity_to_race(ethnicity) -> str:
+    """Collapse raw MIMIC ethnicity strings into the study race groups."""
+
+    text = str(ethnicity or "").upper()
+    if "BLACK" in text or "AFRICAN" in text:
+        return "BLACK"
+    if "WHITE" in text or "EUROPEAN" in text or "PORTUGUESE" in text:
+        return "WHITE"
+    if "ASIAN" in text:
+        return "ASIAN"
+    if "HISPANIC" in text or "LATINO" in text or "SOUTH AMERICAN" in text:
+        return "HISPANIC"
+    if "NATIVE" in text or "AMERICAN INDIAN" in text or "ALASKA NATIVE" in text:
+        return "NATIVE AMERICAN"
+    return "OTHER"
+
+
+def map_insurance_to_group(insurance) -> str:
+    """Collapse raw insurance text into the three study groups."""
+
+    text = str(insurance or "").strip().lower()
+    normalized = re.sub(r"\s+", " ", text)
+    if normalized in {"medicare", "medicaid", "government", "public"}:
+        return "Public"
+    if normalized in {"private"}:
+        return "Private"
+    if normalized in {"self pay", "self-pay", "self_pay"}:
+        return "Self-Pay"
+    return str(insurance or "")
+
+
+def prepare_note_text(text) -> str:
+    """Normalize note text by whitespace tokenization and rejoining only."""
+
+    if text is None or (isinstance(text, float) and pd.isna(text)):
+        return ""
+    return " ".join(str(text).split())
+
+
+def build_left_ama_target(admissions: pd.DataFrame) -> pd.DataFrame:
+    """Build the exact-match Left AMA target from admissions."""
+
+    _require_columns(admissions, ["hadm_id", "discharge_location"], "admissions")
+    targets = admissions[["hadm_id", "discharge_location"]].drop_duplicates("hadm_id").copy()
+    targets["left_ama"] = (
+        targets["discharge_location"]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.upper()
+        .eq("LEFT AGAINST MEDICAL ADVICE")
+        .astype(int)
+    )
+    return targets[["hadm_id", "left_ama"]].sort_values("hadm_id").reset_index(drop=True)
+
+
+def build_in_hospital_mortality_target(admissions: pd.DataFrame) -> pd.DataFrame:
+    """Build the in-hospital mortality target from admissions."""
+
+    _require_columns(admissions, ["hadm_id", "hospital_expire_flag"], "admissions")
+    targets = admissions[["hadm_id", "hospital_expire_flag"]].drop_duplicates("hadm_id").copy()
+    targets["in_hospital_mortality"] = (
+        pd.to_numeric(targets["hospital_expire_flag"], errors="coerce").fillna(0).astype(int)
+    )
+    return (
+        targets[["hadm_id", "in_hospital_mortality"]]
+        .sort_values("hadm_id")
+        .reset_index(drop=True)
+    )
+
+
+def build_code_status_target(
+    chartevents: pd.DataFrame,
+    itemids: Iterable[int] | None = None,
+) -> pd.DataFrame:
+    """Build the code-status target using the required itemids only."""
+
+    _require_columns(chartevents, ["hadm_id", "itemid", "value"], "chartevents")
+    allowed_itemids = set(CODE_STATUS_ITEMIDS if itemids is None else itemids)
+
+    if chartevents.empty:
+        return pd.DataFrame(columns=["hadm_id", "code_status_dnr_dni_cmo"])
+
+    codes = chartevents.loc[chartevents["itemid"].isin(allowed_itemids)].copy()
+    if codes.empty:
+        return pd.DataFrame(columns=["hadm_id", "code_status_dnr_dni_cmo"])
+
+    normalized_value = codes["value"].map(_normalize_token)
+    positive = normalized_value.apply(
+        lambda value: int(
+            ("dnr" in value)
+            or ("dni" in value)
+            or ("comfort" in value)
+            or ("cmo" in value)
+        )
+    )
+    target = (
+        pd.DataFrame({"hadm_id": codes["hadm_id"], "code_status_dnr_dni_cmo": positive})
+        .groupby("hadm_id", as_index=False)["code_status_dnr_dni_cmo"]
+        .max()
+        .sort_values("hadm_id")
+    )
+    return target.reset_index(drop=True)
+
+
+def get_eol_mistrust_task_map() -> OrderedDict[str, str]:
+    """Return the three downstream target names used by the study."""
+
+    return OrderedDict(EOL_MISTRUST_TASK_MAP)
+
+
+class EOLMistrustDownstreamMIMIC3(BaseTask):
+    """Admission-level downstream prediction task for the EOL mistrust study."""
+
+    task_name = "EOLMistrustDownstreamMIMIC3"
+
+    def __init__(
+        self,
+        target: str = "in_hospital_mortality",
+        include_notes: bool = False,
+    ) -> None:
+        if target not in set(EOL_MISTRUST_TASK_MAP.values()):
+            raise ValueError(f"Unsupported EOL mistrust target: {target}")
+
+        self.target = target
+        self.include_notes = include_notes
+        self.input_schema: Dict[str, str] = {
+            "conditions": "sequence",
+            "procedures": "sequence",
+            "drugs": "sequence",
+            "age": "float",
+            "los_days": "float",
+            "gender": "text",
+            "insurance": "text",
+            "race": "text",
+        }
+        if include_notes:
+            self.input_schema["clinical_notes"] = "text"
+        self.output_schema: Dict[str, str] = {target: "binary"}
+
+    def _get_single_patient_event(self, patient: Any, event_type: str):
+        events = patient.get_events(event_type=event_type)
+        if not events:
+            return None
+        return events[0]
+
+    def _get_codes_for_admission(self, patient: Any, event_type: str, hadm_id) -> List[str]:
+        events = patient.get_events(
+            event_type=event_type,
+            filters=[("hadm_id", "==", hadm_id)],
+        )
+        values: List[str] = []
+        for event in events:
+            for attribute in ("icd9_code", "icd_code", "drug", "ndc"):
+                value = getattr(event, attribute, None)
+                if value is not None and str(value).strip():
+                    values.append(str(value))
+                    break
+        return values
+
+    def _get_note_text(self, patient: Any, hadm_id) -> str:
+        notes = patient.get_events(
+            event_type="noteevents",
+            filters=[("hadm_id", "==", hadm_id)],
+        )
+        return prepare_note_text(" ".join(str(getattr(note, "text", "")) for note in notes))
+
+    def _get_code_status_label(self, patient: Any, hadm_id) -> int:
+        events = patient.get_events(
+            event_type="chartevents",
+            filters=[("hadm_id", "==", hadm_id)],
+        )
+        rows = []
+        for event in events:
+            rows.append(
+                {
+                    "hadm_id": getattr(event, "hadm_id", hadm_id),
+                    "itemid": getattr(event, "itemid", None),
+                    "value": getattr(event, "value", None),
+                }
+            )
+        if not rows:
+            return 0
+        target = build_code_status_target(pd.DataFrame(rows))
+        if target.empty:
+            return 0
+        return int(target["code_status_dnr_dni_cmo"].max())
+
+    def _get_target_value(self, patient: Any, admission: Any) -> int:
+        if self.target == "left_ama":
+            discharge_location = str(getattr(admission, "discharge_location", "") or "")
+            return int(discharge_location.strip().upper() == "LEFT AGAINST MEDICAL ADVICE")
+        if self.target == "in_hospital_mortality":
+            expire_flag = getattr(admission, "hospital_expire_flag", 0)
+            try:
+                return int(expire_flag)
+            except (TypeError, ValueError):
+                return 0
+        if self.target == "code_status_dnr_dni_cmo":
+            return self._get_code_status_label(patient, admission.hadm_id)
+        raise ValueError(f"Unsupported EOL mistrust target: {self.target}")
+
+    def __call__(self, patient: Any) -> List[Dict[str, Any]]:
+        admissions = patient.get_events(event_type="admissions")
+        patient_event = self._get_single_patient_event(patient, "patients")
+        if not admissions:
+            return []
+
+        samples: List[Dict[str, Any]] = []
+        for admission in admissions:
+            hadm_id = getattr(admission, "hadm_id", None)
+            if hadm_id is None:
+                continue
+
+            conditions = self._get_codes_for_admission(patient, "diagnoses_icd", hadm_id)
+            procedures = self._get_codes_for_admission(patient, "procedures_icd", hadm_id)
+            drugs = self._get_codes_for_admission(patient, "prescriptions", hadm_id)
+
+            sample: Dict[str, Any] = {
+                "visit_id": hadm_id,
+                "hadm_id": hadm_id,
+                "patient_id": patient.patient_id,
+                "conditions": conditions,
+                "procedures": procedures,
+                "drugs": drugs,
+                "age": _calculate_age_years(
+                    getattr(admission, "timestamp", None),
+                    getattr(patient_event, "dob", None) if patient_event is not None else None,
+                ),
+                "los_days": _calculate_los_days(
+                    getattr(admission, "timestamp", None),
+                    getattr(admission, "dischtime", None),
+                ),
+                "gender": getattr(patient_event, "gender", None) if patient_event is not None else None,
+                "insurance": map_insurance_to_group(getattr(admission, "insurance", None)),
+                "race": map_ethnicity_to_race(getattr(admission, "ethnicity", None)),
+                self.target: self._get_target_value(patient, admission),
+            }
+            if self.include_notes:
+                sample["clinical_notes"] = self._get_note_text(patient, hadm_id)
+            samples.append(sample)
+        return samples
+
+
+class EOLMistrustLeftAMAPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
+    """Task wrapper for the Left AMA downstream target."""
+
+    task_name = "EOLMistrustLeftAMAPredictionMIMIC3"
+
+    def __init__(self, include_notes: bool = False) -> None:
+        super().__init__(target="left_ama", include_notes=include_notes)
+
+
+class EOLMistrustCodeStatusPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
+    """Task wrapper for the code-status downstream target."""
+
+    task_name = "EOLMistrustCodeStatusPredictionMIMIC3"
+
+    def __init__(self, include_notes: bool = False) -> None:
+        super().__init__(target="code_status_dnr_dni_cmo", include_notes=include_notes)
+
+
+class EOLMistrustMortalityPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
+    """Task wrapper for the in-hospital mortality downstream target."""
+
+    task_name = "EOLMistrustMortalityPredictionMIMIC3"
+
+    def __init__(self, include_notes: bool = False) -> None:
+        super().__init__(target="in_hospital_mortality", include_notes=include_notes)
+
+
+__all__ = [
+    "CODE_STATUS_ITEMIDS",
+    "EOL_MISTRUST_TASK_MAP",
+    "EOLMistrustCodeStatusPredictionMIMIC3",
+    "EOLMistrustDownstreamMIMIC3",
+    "EOLMistrustLeftAMAPredictionMIMIC3",
+    "EOLMistrustMortalityPredictionMIMIC3",
+    "build_code_status_target",
+    "build_in_hospital_mortality_target",
+    "build_left_ama_target",
+    "get_eol_mistrust_task_map",
+    "map_ethnicity_to_race",
+    "map_insurance_to_group",
+    "prepare_note_text",
+]

From 91fbf1923e8765a1aba35fbe6604b0739345dac3 Mon Sep 17 00:00:00 2001
From: aaronx2-illinois <aaronx2@illinois.edu>
Date: Sat, 4 Apr 2026 09:07:08 -0600
Subject: [PATCH 2/7] CommitName : checkin with testing Py file Commit Detail
 Refactor code structure for improved readability and maintainability

---
 tests/core/test_eol_mistrust_Integration.py   | 1874 +++++++++++
 ...test_eol_mistrust_TrainingAndEvaluation.py | 1375 ++++++++
 tests/core/test_eol_mistrust_dataset.py       | 2861 +++++++++++++++++
 tests/core/test_eol_mistrust_model.py         | 1679 ++++++++++
 tests/core/test_eol_mistrust_module.py        | 1523 +++++++++
 5 files changed, 9312 insertions(+)
 create mode 100644 tests/core/test_eol_mistrust_Integration.py
 create mode 100644 tests/core/test_eol_mistrust_TrainingAndEvaluation.py
 create mode 100644 tests/core/test_eol_mistrust_dataset.py
 create mode 100644 tests/core/test_eol_mistrust_model.py
 create mode 100644 tests/core/test_eol_mistrust_module.py

diff --git a/tests/core/test_eol_mistrust_Integration.py b/tests/core/test_eol_mistrust_Integration.py
new file mode 100644
index 000000000..f8ca42422
--- /dev/null
+++ b/tests/core/test_eol_mistrust_Integration.py
@@ -0,0 +1,1874 @@
+import importlib
+import importlib.util
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import pandas as pd
+
+
+def _load_dataset_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "datasets" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_integration_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def _load_model_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "models" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.models.eol_mistrust_integration_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+class _FakeProbEstimator:
+    def __init__(self, probabilities):
+        self.probabilities = list(probabilities)
+        self.coef_ = None
+        self.fit_X = None
+        self.fit_y = None
+
+    def fit(self, X, y):
+        self.fit_X = X.copy()
+        self.fit_y = y.copy()
+        self.coef_ = [[0.1] * X.shape[1]]
+        return self
+
+    def predict_proba(self, X):
+        probs = self.probabilities[: len(X)]
+        return [[1.0 - prob, prob] for prob in probs]
+
+
+class _SplitRecorder:
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, X, y, test_size, random_state):
+        features = X.reset_index(drop=True)
+        labels = pd.Series(y).reset_index(drop=True)
+        self.calls.append(
+            {
+                "test_size": test_size,
+                "random_state": random_state,
+                "n_rows": len(features),
+            }
+        )
+        train_idx = [0, 1, 2, 3]
+        test_idx = [4, 5]
+        return (
+            features.iloc[train_idx].copy(),
+            features.iloc[test_idx].copy(),
+            labels.iloc[train_idx].copy(),
+            labels.iloc[test_idx].copy(),
+        )
+
+
+class _AUCRecorder:
+    def __init__(self, value=0.75):
+        self.value = float(value)
+        self.calls = []
+
+    def __call__(self, y_true, y_prob):
+        self.calls.append(
+            {
+                "y_true": list(pd.Series(y_true)),
+                "y_prob": list(pd.Series(y_prob)),
+            }
+        )
+        return self.value
+
+
+class TestEOLMistrustIntegration(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.dataset = _load_dataset_module()
+        cls.model = _load_model_module()
+
+    def setUp(self):
+        self.admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "subject_id": 201,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-03 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 102,
+                    "subject_id": 202,
+                    "admittime": "2100-01-02 00:00:00",
+                    "dischtime": "2100-01-04 00:00:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "LEFT AGAINST MEDICAL ADVICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 103,
+                    "subject_id": 203,
+                    "admittime": "2100-01-03 00:00:00",
+                    "dischtime": "2100-01-05 00:00:00",
+                    "ethnicity": "ASIAN",
+                    "insurance": "Medicaid",
+                    "discharge_location": "SNF",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 104,
+                    "subject_id": 204,
+                    "admittime": "2100-01-04 00:00:00",
+                    "dischtime": "2100-01-06 00:00:00",
+                    "ethnicity": "HISPANIC OR LATINO",
+                    "insurance": "Private",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 1,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 105,
+                    "subject_id": 205,
+                    "admittime": "2100-01-05 00:00:00",
+                    "dischtime": "2100-01-07 00:00:00",
+                    "ethnicity": "AMERICAN INDIAN/ALASKA NATIVE",
+                    "insurance": "Self Pay",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 106,
+                    "subject_id": 206,
+                    "admittime": "2100-01-06 00:00:00",
+                    "dischtime": "2100-01-08 00:00:00",
+                    "ethnicity": "UNKNOWN/NOT SPECIFIED",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        self.patients = pd.DataFrame(
+            [
+                {"subject_id": 201, "gender": "M", "dob": "1800-01-01 00:00:00"},
+                {"subject_id": 202, "gender": "F", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 203, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 204, "gender": "F", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 205, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 206, "gender": "F", "dob": "2070-01-01 00:00:00"},
+            ]
+        )
+        self.icustays = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1001, "intime": "2100-01-01 00:00:00", "outtime": "2100-01-01 13:00:00"},
+                {"hadm_id": 101, "icustay_id": 1002, "intime": "2100-01-01 14:00:00", "outtime": "2100-01-01 18:00:00"},
+                {"hadm_id": 102, "icustay_id": 1003, "intime": "2100-01-02 00:00:00", "outtime": "2100-01-02 13:00:00"},
+                {"hadm_id": 103, "icustay_id": 1004, "intime": "2100-01-03 00:00:00", "outtime": "2100-01-03 13:00:00"},
+                {"hadm_id": 104, "icustay_id": 1005, "intime": "2100-01-04 00:00:00", "outtime": "2100-01-04 13:00:00"},
+                {"hadm_id": 105, "icustay_id": 1006, "intime": "2100-01-05 00:00:00", "outtime": "2100-01-05 13:00:00"},
+                {"hadm_id": 106, "icustay_id": 1007, "intime": "2100-01-06 00:00:00", "outtime": "2100-01-06 13:00:00"},
+            ]
+        )
+        self.d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Pain Level", "dbsource": "carevue"},
+                {"itemid": 3, "label": "Richmond-RAS Scale Assessment", "dbsource": "metavision"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+            ]
+        )
+        self.chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 101, "itemid": 1, "value": "No", "icustay_id": 1001},
+                {"hadm_id": 101, "itemid": 1, "value": "No", "icustay_id": 1001},
+                {"hadm_id": 101, "itemid": 2, "value": "7-Mod to Severe", "icustay_id": 1001},
+                {"hadm_id": 101, "itemid": 128, "value": "Full Code", "icustay_id": 1001},
+                {"hadm_id": 102, "itemid": 3, "value": "0 Alert and Calm", "icustay_id": 1003},
+                {"hadm_id": 102, "itemid": 128, "value": "DNR/DNI", "icustay_id": 1003},
+                {"hadm_id": 103, "itemid": 2, "value": "None", "icustay_id": 1004},
+                {"hadm_id": 104, "itemid": 128, "value": "Comfort Measures", "icustay_id": 1005},
+                {"hadm_id": 105, "itemid": 1, "value": "Yes", "icustay_id": 1006},
+                {"hadm_id": 106, "itemid": 2, "value": "None", "icustay_id": 1007},
+            ]
+        )
+        self.noteevents = pd.DataFrame(
+            [
+                {"hadm_id": 101, "category": "Nursing", "text": "Patient is non-complian and refused medication.", "iserror": None},
+                {"hadm_id": 102, "category": "Nursing", "text": "Patient is calm. Autopsy discussed with family.", "iserror": None},
+                {"hadm_id": 103, "category": "Nursing", "text": "Patient is non-adher to the follow up plan.", "iserror": None},
+                {"hadm_id": 104, "category": "Nursing", "text": "Date:[**5-1-18**]  patient   has   good rapport.", "iserror": None},
+                {"hadm_id": 105, "category": "Nursing", "text": "this note should be dropped", "iserror": 1},
+                {"hadm_id": 106, "category": "Nursing", "text": "", "iserror": None},
+            ]
+        )
+        self.ventdurations = pd.DataFrame(
+            [
+                {"icustay_id": 1004, "ventnum": 1, "starttime": "2100-01-03 00:00:00", "endtime": "2100-01-03 01:00:00", "duration_hours": 1.0},
+                {"icustay_id": 1004, "ventnum": 2, "starttime": "2100-01-03 11:00:00", "endtime": "2100-01-03 13:00:00", "duration_hours": 2.0},
+                {"icustay_id": 1005, "ventnum": 1, "starttime": "2100-01-04 00:00:00", "endtime": "2100-01-04 02:00:00", "duration_hours": 2.0},
+            ]
+        )
+        self.vasopressordurations = pd.DataFrame(
+            [
+                {"icustay_id": 1004, "vasonum": 1, "starttime": "2100-01-03 03:00:00", "endtime": "2100-01-03 04:00:00", "duration_hours": 1.0},
+                {"icustay_id": 1005, "vasonum": 1, "starttime": "2100-01-04 05:00:00", "endtime": "2100-01-04 07:00:00", "duration_hours": 2.0},
+            ]
+        )
+        self.oasis = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1001, "oasis": 10},
+                {"hadm_id": 102, "icustay_id": 1003, "oasis": 12},
+                {"hadm_id": 103, "icustay_id": 1004, "oasis": 20},
+                {"hadm_id": 104, "icustay_id": 1005, "oasis": 25},
+                {"hadm_id": 105, "icustay_id": 1006, "oasis": 8},
+                {"hadm_id": 106, "icustay_id": 1007, "oasis": 9},
+            ]
+        )
+        self.sapsii = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1001, "sapsii": 30},
+                {"hadm_id": 102, "icustay_id": 1003, "sapsii": 35},
+                {"hadm_id": 103, "icustay_id": 1004, "sapsii": 50},
+                {"hadm_id": 104, "icustay_id": 1005, "sapsii": 55},
+                {"hadm_id": 105, "icustay_id": 1006, "sapsii": 20},
+                {"hadm_id": 106, "icustay_id": 1007, "sapsii": 22},
+            ]
+        )
+
+    def _sentiment_fn(self, text):
+        if "non" in text or "refused" in text:
+            return (-0.6, 0.0)
+        return (0.2, 0.0)
+
+    def _build_core_artifacts(self):
+        base = self.dataset.build_base_admissions(self.admissions, self.patients)
+        demographics = self.dataset.build_demographics_table(base)
+        all_cohort = self.dataset.build_all_cohort(base, self.icustays)
+        eol_cohort = self.dataset.build_eol_cohort(base, demographics)
+        feature_matrix = self.dataset.build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            all_hadm_ids=all_cohort["hadm_id"].tolist(),
+        )
+        note_labels = self.dataset.build_note_labels(self.noteevents, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        note_corpus = self.dataset.build_note_corpus(self.noteevents, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        treatment_totals = self.dataset.build_treatment_totals(self.icustays, self.ventdurations, self.vasopressordurations)
+        acuity_scores = self.dataset.build_acuity_scores(self.oasis, self.sapsii)
+        mistrust_scores = self.model.build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        final_model_table = self.dataset.build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+        return {
+            "base": base,
+            "demographics": demographics,
+            "all_cohort": all_cohort,
+            "eol_cohort": eol_cohort,
+            "feature_matrix": feature_matrix,
+            "note_labels": note_labels,
+            "note_corpus": note_corpus,
+            "treatment_totals": treatment_totals,
+            "acuity_scores": acuity_scores,
+            "mistrust_scores": mistrust_scores,
+            "final_model_table": final_model_table,
+        }
+
+    def _build_valid_environment(self):
+        hadm_ids = list(range(1, 50003))
+        subject_ids = list(range(100001, 150003))
+        admissions = pd.DataFrame(
+            {
+                "hadm_id": hadm_ids,
+                "subject_id": subject_ids,
+                "admittime": ["2100-01-01 00:00:00"] * len(hadm_ids),
+                "dischtime": ["2100-01-02 00:00:00"] * len(hadm_ids),
+                "ethnicity": ["WHITE"] * len(hadm_ids),
+                "insurance": ["Medicare"] * len(hadm_ids),
+                "discharge_location": ["HOME"] * len(hadm_ids),
+                "hospital_expire_flag": [0] * len(hadm_ids),
+                "has_chartevents_data": [1] * len(hadm_ids),
+            }
+        )
+        patients = pd.DataFrame(
+            {
+                "subject_id": subject_ids,
+                "gender": ["M"] * len(subject_ids),
+                "dob": ["2070-01-01 00:00:00"] * len(subject_ids),
+            }
+        )
+        icustays = pd.DataFrame(
+            {
+                "hadm_id": hadm_ids,
+                "icustay_id": list(range(700001, 750003)),
+                "intime": ["2100-01-01 00:00:00"] * len(hadm_ids),
+                "outtime": ["2100-01-01 13:00:00"] * len(hadm_ids),
+            }
+        )
+        noteevents = pd.DataFrame([{"hadm_id": 1, "category": "Nursing", "text": "ok", "iserror": None}])
+        chartevents = pd.DataFrame([{"hadm_id": 1, "itemid": 1, "value": "No", "icustay_id": 700001}])
+        d_items = pd.DataFrame([{"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"}])
+        ventdurations = pd.DataFrame([{"icustay_id": 700001, "ventnum": 1, "starttime": "2100-01-01 00:00:00", "endtime": "2100-01-01 01:00:00", "duration_hours": 1.0}])
+        vasopressordurations = pd.DataFrame([{"icustay_id": 700001, "vasonum": 1, "starttime": "2100-01-01 02:00:00", "endtime": "2100-01-01 03:00:00", "duration_hours": 1.0}])
+        oasis = pd.DataFrame([{"hadm_id": 1, "icustay_id": 700001, "oasis": 10}])
+        sapsii = pd.DataFrame([{"hadm_id": 1, "icustay_id": 700001, "sapsii": 30}])
+        return (
+            {
+                "admissions": admissions,
+                "patients": patients,
+                "icustays": icustays,
+                "noteevents": noteevents,
+                "chartevents": chartevents,
+                "d_items": d_items,
+            },
+            {
+                "ventdurations": ventdurations,
+                "vasopressordurations": vasopressordurations,
+                "oasis": oasis,
+                "sapsii": sapsii,
+            },
+        )
+
+    def _build_deliverable_artifacts(self):
+        artifacts = self._build_core_artifacts()
+        return {
+            "base_admissions": artifacts["base"],
+            "eol_cohort": artifacts["eol_cohort"],
+            "all_cohort": artifacts["all_cohort"],
+            "treatment_totals": artifacts["treatment_totals"],
+            "chartevent_feature_matrix": artifacts["feature_matrix"],
+            "note_labels": artifacts["note_labels"],
+            "mistrust_scores": artifacts["mistrust_scores"],
+            "acuity_scores": artifacts["acuity_scores"],
+            "final_model_table": artifacts["final_model_table"],
+        }
+
+    def test_dataset_public_api_exposes_expected_function_contracts(self):
+        expected = {
+            "build_base_admissions",
+            "build_demographics_table",
+            "build_all_cohort",
+            "build_eol_cohort",
+            "build_chartevent_feature_matrix",
+            "build_note_corpus",
+            "build_note_labels",
+            "build_treatment_totals",
+            "build_acuity_scores",
+            "build_final_model_table",
+            "validate_database_environment",
+        }
+        for name in expected:
+            self.assertTrue(hasattr(self.dataset, name), msg=name)
+            self.assertTrue(callable(getattr(self.dataset, name)), msg=name)
+
+    def test_model_public_api_exposes_expected_function_contracts(self):
+        expected = {
+            "fit_proxy_mistrust_model",
+            "build_proxy_probability_scores",
+            "build_negative_sentiment_mistrust_scores",
+            "z_normalize_scores",
+            "build_mistrust_score_table",
+            "summarize_feature_weights",
+            "run_race_gap_analysis",
+            "run_race_based_treatment_analysis",
+            "run_trust_based_treatment_analysis",
+            "run_acuity_control_analysis",
+            "evaluate_downstream_predictions",
+            "run_full_eol_mistrust_modeling",
+        }
+        for name in expected:
+            self.assertTrue(hasattr(self.model, name), msg=name)
+            self.assertTrue(callable(getattr(self.model, name)), msg=name)
+
+    def test_dataset_helper_unit_rules_cover_mapping_and_whitespace_cleanup(self):
+        self.assertEqual(self.dataset.map_ethnicity("BLACK/AFRICAN AMERICAN"), "BLACK")
+        self.assertEqual(self.dataset.map_ethnicity("HISPANIC OR LATINO"), "HISPANIC")
+        self.assertEqual(
+            self.dataset.map_ethnicity("AMERICAN INDIAN/ALASKA NATIVE"),
+            "NATIVE AMERICAN",
+        )
+        self.assertEqual(self.dataset.map_insurance("Medicare"), "Public")
+        self.assertEqual(self.dataset.map_insurance("Private"), "Private")
+        self.assertEqual(self.dataset.map_insurance("Self Pay"), "Self-Pay")
+        self.assertEqual(
+            self.dataset.prepare_note_text_for_sentiment(" Date:[**5-1-18**]   calm   rapport "),
+            "Date:[**5-1-18**] calm rapport",
+        )
+
+    def test_dataset_build_demographics_table_caps_age_and_computes_expected_columns(self):
+        base = self.dataset.build_base_admissions(self.admissions, self.patients)
+        demographics = self.dataset.build_demographics_table(base)
+        self.assertEqual(
+            demographics.columns.tolist(),
+            [
+                "hadm_id",
+                "subject_id",
+                "gender",
+                "admittime",
+                "dischtime",
+                "ethnicity",
+                "insurance_raw",
+                "race",
+                "age",
+                "los_hours",
+                "los_days",
+                "insurance",
+                "insurance_group",
+            ],
+        )
+        by_hadm = demographics.set_index("hadm_id")
+        self.assertEqual(float(by_hadm.loc[101, "age"]), 90.0)
+        self.assertAlmostEqual(float(by_hadm.loc[101, "los_hours"]), 48.0, places=7)
+        self.assertEqual(by_hadm.loc[105, "race"], "NATIVE AMERICAN")
+
+    def test_dataset_build_base_admissions_filters_has_chartevents_and_enforces_unique_hadm(self):
+        admissions = pd.concat(
+            [
+                self.admissions,
+                pd.DataFrame(
+                    [
+                        {
+                            "hadm_id": 107,
+                            "subject_id": 207,
+                            "admittime": "2100-01-07 00:00:00",
+                            "dischtime": "2100-01-08 00:00:00",
+                            "ethnicity": "WHITE",
+                            "insurance": "Medicare",
+                            "discharge_location": "HOME",
+                            "hospital_expire_flag": 0,
+                            "has_chartevents_data": 0,
+                        }
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+        patients = pd.concat(
+            [
+                self.patients,
+                pd.DataFrame([{"subject_id": 207, "gender": "M", "dob": "2070-01-01 00:00:00"}]),
+            ],
+            ignore_index=True,
+        )
+        base = self.dataset.build_base_admissions(admissions, patients)
+        self.assertNotIn(107, base["hadm_id"].tolist())
+        self.assertEqual(len(base), len(set(base["hadm_id"])))
+
+    def test_dataset_build_all_and_eol_cohorts_respect_duration_boundaries(self):
+        base = self.dataset.build_base_admissions(self.admissions, self.patients)
+        demographics = self.dataset.build_demographics_table(base)
+
+        boundary_demo = pd.DataFrame(
+            [
+                {"hadm_id": 1, "los_hours": 5.99},
+                {"hadm_id": 2, "los_hours": 6.0},
+            ]
+        )
+        boundary_base = pd.DataFrame(
+            [
+                {"hadm_id": 1, "discharge_location": "SNF", "hospital_expire_flag": 0},
+                {"hadm_id": 2, "discharge_location": "SNF", "hospital_expire_flag": 0},
+            ]
+        )
+        eol = self.dataset.build_eol_cohort(boundary_base, boundary_demo)
+        self.assertEqual(eol["hadm_id"].tolist(), [2])
+
+        all_cohort = self.dataset.build_all_cohort(base, self.icustays)
+        self.assertEqual(all_cohort["hadm_id"].tolist(), [101, 102, 103, 104, 105, 106])
+        self.assertEqual(len(all_cohort), len(set(all_cohort["hadm_id"])))
+        full_eol = self.dataset.build_eol_cohort(base, demographics)
+        self.assertEqual(full_eol["hadm_id"].tolist(), [103, 104])
+
+    def test_dataset_build_all_cohort_includes_exact_12_hours_and_excludes_eleven_fifty_nine(self):
+        base = pd.DataFrame([{"hadm_id": 1}, {"hadm_id": 2}])
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 1,
+                    "icustay_id": 1,
+                    "intime": "2100-01-01 00:00:00",
+                    "outtime": "2100-01-01 12:00:00",
+                },
+                {
+                    "hadm_id": 2,
+                    "icustay_id": 2,
+                    "intime": "2100-01-01 00:00:00",
+                    "outtime": "2100-01-01 11:59:00",
+                },
+            ]
+        )
+        cohort = self.dataset.build_all_cohort(base, icustays)
+        self.assertEqual(cohort["hadm_id"].tolist(), [1])
+
+    def test_dataset_note_corpus_and_labels_filter_errors_and_capture_required_phrases(self):
+        all_hadm_ids = [101, 102, 103, 104, 105, 106]
+        note_corpus = self.dataset.build_note_corpus(self.noteevents, all_hadm_ids=all_hadm_ids)
+        note_labels = self.dataset.build_note_labels(self.noteevents, all_hadm_ids=all_hadm_ids)
+
+        self.assertEqual(note_corpus.columns.tolist(), ["hadm_id", "note_text"])
+        self.assertEqual(note_corpus["hadm_id"].tolist(), all_hadm_ids)
+        self.assertEqual(note_corpus.set_index("hadm_id").loc[105, "note_text"], "")
+        by_hadm = note_labels.set_index("hadm_id")
+        self.assertEqual(int(by_hadm.loc[101, "noncompliance_label"]), 1)
+        self.assertEqual(int(by_hadm.loc[102, "autopsy_label"]), 1)
+        self.assertEqual(int(by_hadm.loc[103, "noncompliance_label"]), 1)
+
+    def test_dataset_build_note_corpus_concatenates_with_single_spaces_and_drops_only_iserror_one(self):
+        notes = pd.DataFrame(
+            [
+                {"hadm_id": 1, "category": "Nursing", "text": "first", "iserror": None},
+                {"hadm_id": 1, "category": "Nursing", "text": "second", "iserror": 0},
+                {"hadm_id": 1, "category": "Nursing", "text": "third", "iserror": 1},
+                {"hadm_id": 2, "category": "Nursing", "text": " lone ", "iserror": None},
+            ]
+        )
+        corpus = self.dataset.build_note_corpus(notes, all_hadm_ids=[1, 2, 3])
+        by_hadm = corpus.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[1, "note_text"], "first second")
+        self.assertEqual(by_hadm.loc[2, "note_text"], "lone")
+        self.assertEqual(by_hadm.loc[3, "note_text"], "")
+
+    def test_dataset_identify_table2_itemids_and_feature_matrix_support_partial_matching_binary_rows_and_zero_rows(self):
+        itemids = self.dataset.identify_table2_itemids(self.d_items)
+        self.assertEqual(itemids, {1, 2, 3})
+
+        feature_matrix = self.dataset.build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            all_hadm_ids=[101, 102, 103, 104],
+        )
+        self.assertEqual(feature_matrix["hadm_id"].tolist(), [101, 102, 103, 104])
+        self.assertEqual(
+            sorted([column for column in feature_matrix.columns if column != "hadm_id"]),
+            [
+                "Education Readiness: No",
+                "Education Readiness: Yes",
+                "Pain Level: 7-Mod to Severe",
+                "Pain Level: None",
+                "Richmond-RAS Scale Assessment: 0 Alert and Calm",
+            ],
+        )
+        row_101 = feature_matrix.set_index("hadm_id").loc[101]
+        self.assertEqual(int(row_101["Education Readiness: No"]), 1)
+        self.assertEqual(int(row_101["Pain Level: 7-Mod to Severe"]), 1)
+        row_104 = feature_matrix.set_index("hadm_id").loc[104]
+        self.assertTrue((row_104.fillna(0).astype(int) == 0).all())
+
+    def test_dataset_identify_table2_itemids_does_not_overmatch_unrelated_labels(self):
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Readiness Scoreboard", "dbsource": "carevue"},
+                {"itemid": 3, "label": "Orientation", "dbsource": "carevue"},
+                {"itemid": 4, "label": "Random Unrelated Label", "dbsource": "carevue"},
+            ]
+        )
+        itemids = self.dataset.identify_table2_itemids(d_items)
+        self.assertEqual(itemids, {1, 3})
+
+    def test_dataset_feature_matrix_is_binary_integer_typed_and_sorted_by_hadm(self):
+        feature_matrix = self.dataset.build_chartevent_feature_matrix(
+            self.chartevents.sample(frac=1.0, random_state=0),
+            self.d_items,
+            all_hadm_ids=[106, 103, 102, 101],
+        )
+        self.assertEqual(feature_matrix["hadm_id"].tolist(), [101, 102, 103, 106])
+        for column in feature_matrix.columns:
+            if column == "hadm_id":
+                continue
+            self.assertTrue(pd.api.types.is_integer_dtype(feature_matrix[column]))
+            self.assertTrue(set(feature_matrix[column].dropna().unique()).issubset({0, 1}))
+
+    def test_dataset_build_treatment_totals_merges_gap_boundary_and_outputs_sorted_schema(self):
+        boundary_icu = pd.DataFrame(
+            [{"hadm_id": 1, "icustay_id": 99, "intime": "2100-01-01", "outtime": "2100-01-02"}]
+        )
+        boundary_vent = pd.DataFrame(
+            [
+                {"icustay_id": 99, "ventnum": 1, "starttime": "2100-01-01 00:00:00", "endtime": "2100-01-01 01:00:00", "duration_hours": 1.0},
+                {"icustay_id": 99, "ventnum": 2, "starttime": "2100-01-01 11:00:00", "endtime": "2100-01-01 12:00:00", "duration_hours": 1.0},
+                {"icustay_id": 99, "ventnum": 3, "starttime": "2100-01-01 22:01:00", "endtime": "2100-01-01 23:01:00", "duration_hours": 1.0},
+            ]
+        )
+        empty_vaso = pd.DataFrame(columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"])
+        totals = self.dataset.build_treatment_totals(boundary_icu, boundary_vent, empty_vaso)
+        self.assertEqual(totals.columns.tolist(), ["hadm_id", "total_vent_min", "total_vaso_min"])
+        row = totals.fillna(0).set_index("hadm_id").loc[1]
+        self.assertEqual(float(row["total_vent_min"]), 780.0)
+
+    def test_dataset_build_final_model_table_returns_exact_full_schema_order(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"]
+        self.assertEqual(
+            final_model_table.columns.tolist(),
+            [
+                "hadm_id",
+                "age",
+                "los_days",
+                "gender_f",
+                "gender_m",
+                "insurance_private",
+                "insurance_public",
+                "insurance_self_pay",
+                "race_white",
+                "race_black",
+                "race_asian",
+                "race_hispanic",
+                "race_native_american",
+                "race_other",
+                "noncompliance_score_z",
+                "autopsy_score_z",
+                "negative_sentiment_score_z",
+                "left_ama",
+                "in_hospital_mortality",
+                "code_status_dnr_dni_cmo",
+            ],
+        )
+        self.assertEqual(final_model_table["hadm_id"].tolist(), [101, 102, 103, 104, 105, 106])
+        self.assertEqual(int(final_model_table.set_index("hadm_id").loc[102, "left_ama"]), 1)
+        self.assertEqual(
+            int(final_model_table.set_index("hadm_id").loc[102, "code_status_dnr_dni_cmo"]),
+            1,
+        )
+
+    def test_dataset_validate_database_environment_accepts_valid_minimal_environment(self):
+        raw_tables, materialized_views = self._build_valid_environment()
+        summary = self.dataset.validate_database_environment(raw_tables, materialized_views)
+        self.assertEqual(summary["database_flavor"], "postgresql")
+        self.assertEqual(summary["schema_name"], "mimiciii")
+        self.assertGreater(summary["base_admissions_rows"], 50000)
+        self.assertIn("admissions", summary["raw_tables"])
+        self.assertIn("oasis", summary["materialized_views"])
+
+    def test_dataset_z_normalize_scores_turns_constant_columns_to_zero(self):
+        score_table = pd.DataFrame(
+            [
+                {"hadm_id": 1, "score_a": 1.0, "score_b": 5.0},
+                {"hadm_id": 2, "score_a": 2.0, "score_b": 5.0},
+                {"hadm_id": 3, "score_a": 3.0, "score_b": 5.0},
+            ]
+        )
+        normalized = self.dataset.z_normalize_scores(score_table, columns=["score_a", "score_b"])
+        self.assertEqual(normalized["hadm_id"].tolist(), [1, 2, 3])
+        self.assertAlmostEqual(float(normalized["score_a"].mean()), 0.0, places=7)
+        self.assertTrue((normalized["score_b"] == 0.0).all())
+
+    def test_model_helper_units_cover_score_column_and_note_cleanup_rules(self):
+        self.assertEqual(self.model._score_column_name("noncompliance_label"), "noncompliance_score")
+        self.assertEqual(self.model._score_column_name("custom_target"), "custom_target_score")
+        self.assertEqual(
+            self.model._prepare_note_text_for_sentiment(" Date:[**5-1-18**]   calm   rapport "),
+            "Date:[**5-1-18**] calm rapport",
+        )
+        self.assertEqual(self.model._prepare_note_text_for_sentiment(None), "")
+
+    def test_model_fit_and_proxy_probability_functions_use_full_input_and_sorted_positive_scores(self):
+        artifacts = self._build_core_artifacts()
+        created = []
+
+        class _RecordingLogisticRegression:
+            def __init__(self, *args, **kwargs):
+                del args
+                self.kwargs = kwargs
+                self.coef_ = None
+                self.fit_X = None
+                self.fit_y = None
+                created.append(self)
+
+            def fit(self, X, y):
+                self.fit_X = X.copy()
+                self.fit_y = y.copy()
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.2, 0.8] for _ in range(len(X))]
+
+        with patch.object(self.model, "LogisticRegression", _RecordingLogisticRegression):
+            self.model.fit_proxy_mistrust_model(
+                artifacts["feature_matrix"],
+                artifacts["note_labels"],
+                "noncompliance_label",
+            )
+
+        self.assertEqual(created[0].kwargs["penalty"], "l1")
+        self.assertEqual(created[0].kwargs["solver"], "liblinear")
+        self.assertEqual(created[0].kwargs["max_iter"], 1000)
+        self.assertEqual(len(created[0].fit_X), len(artifacts["feature_matrix"]))
+
+        scores = self.model.build_proxy_probability_scores(
+            artifacts["feature_matrix"],
+            artifacts["note_labels"],
+            "autopsy_label",
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+        )
+        self.assertEqual(scores.columns.tolist(), ["hadm_id", "autopsy_score"])
+        self.assertEqual(scores["hadm_id"].tolist(), sorted(scores["hadm_id"].tolist()))
+
+    def test_model_build_proxy_probability_scores_supports_nonstandard_label_names_and_bad_probability_shapes(self):
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 1, "feature_a": 1},
+                {"hadm_id": 2, "feature_a": 0},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 1, "custom_target": 1},
+                {"hadm_id": 2, "custom_target": 0},
+            ]
+        )
+        scores = self.model.build_proxy_probability_scores(
+            feature_matrix,
+            note_labels,
+            "custom_target",
+            estimator_factory=lambda: _FakeProbEstimator([0.2, 0.8]),
+        )
+        self.assertEqual(scores.columns.tolist(), ["hadm_id", "custom_target_score"])
+
+        class _MalformedProbEstimator:
+            def fit(self, X, y):
+                del X, y
+                self.coef_ = [[0.1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[1.0] for _ in range(len(X))]
+
+        with self.assertRaises(IndexError):
+            self.model.build_proxy_probability_scores(
+                feature_matrix,
+                note_labels,
+                "custom_target",
+                estimator_factory=lambda: _MalformedProbEstimator(),
+            )
+
+    def test_model_negative_sentiment_and_normalization_functions_return_stable_schemas(self):
+        artifacts = self._build_core_artifacts()
+        sentiment_scores = self.model.build_negative_sentiment_mistrust_scores(
+            artifacts["note_corpus"],
+            sentiment_fn=self._sentiment_fn,
+        )
+        self.assertEqual(sentiment_scores.columns.tolist(), ["hadm_id", "negative_sentiment_score"])
+        self.assertEqual(sentiment_scores["hadm_id"].tolist(), sorted(sentiment_scores["hadm_id"].tolist()))
+
+        normalized = self.model.z_normalize_scores(
+            pd.DataFrame(
+                [
+                    {"hadm_id": 1, "score_a": 1.0, "score_b": 10.0},
+                    {"hadm_id": 2, "score_a": 2.0, "score_b": 10.0},
+                    {"hadm_id": 3, "score_a": 3.0, "score_b": 10.0},
+                ]
+            ),
+            columns=["score_a", "score_b"],
+        )
+        self.assertEqual(normalized["hadm_id"].tolist(), [1, 2, 3])
+        self.assertTrue((normalized["score_b"] == 0.0).all())
+
+    def test_model_negative_sentiment_handles_none_and_whitespace_only_notes(self):
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 2, "note_text": "   "},
+                {"hadm_id": 1, "note_text": None},
+            ]
+        )
+        seen = []
+
+        def _sentiment(text):
+            seen.append(text)
+            return (0.25, 0.0)
+
+        scores = self.model.build_negative_sentiment_mistrust_scores(note_corpus, sentiment_fn=_sentiment)
+        self.assertEqual(seen, ["", ""])
+        self.assertEqual(scores["hadm_id"].tolist(), [1, 2])
+        self.assertEqual(scores["negative_sentiment_score"].tolist(), [-0.25, -0.25])
+
+    def test_model_z_normalize_scores_handles_all_nan_columns(self):
+        score_table = pd.DataFrame(
+            [
+                {"hadm_id": 1, "score_a": float("nan")},
+                {"hadm_id": 2, "score_a": float("nan")},
+            ]
+        )
+        normalized = self.model.z_normalize_scores(score_table, columns=["score_a"])
+        self.assertEqual(normalized["hadm_id"].tolist(), [1, 2])
+        self.assertTrue((normalized["score_a"] == 0.0).all())
+
+    def test_model_build_mistrust_score_table_returns_required_schema_sorted_unique_and_float_scores(self):
+        artifacts = self._build_core_artifacts()
+        mistrust_scores = artifacts["mistrust_scores"]
+        self.assertEqual(
+            mistrust_scores.columns.tolist(),
+            [
+                "hadm_id",
+                "noncompliance_score_z",
+                "autopsy_score_z",
+                "negative_sentiment_score_z",
+            ],
+        )
+        self.assertEqual(mistrust_scores["hadm_id"].tolist(), sorted(mistrust_scores["hadm_id"].tolist()))
+        self.assertEqual(len(mistrust_scores), len(set(mistrust_scores["hadm_id"])))
+        for column in mistrust_scores.columns[1:]:
+            self.assertTrue(pd.api.types.is_float_dtype(mistrust_scores[column]))
+
+    def test_model_summarize_feature_weights_returns_sorted_positive_and_negative_rankings(self):
+        estimator = _FakeProbEstimator([0.1, 0.9])
+        estimator.coef_ = [[0.7, -0.2, 0.1]]
+        summary = self.model.summarize_feature_weights(
+            estimator,
+            ["Education Readiness: No", "Pain Level: None", "State: Alert"],
+            top_n=2,
+        )
+        self.assertEqual(set(summary.keys()), {"all", "positive", "negative"})
+        self.assertEqual(summary["positive"]["feature"].tolist(), ["Education Readiness: No", "State: Alert"])
+        self.assertEqual(summary["negative"]["feature"].tolist(), ["Pain Level: None", "State: Alert"])
+
+    def test_model_race_gap_treatment_and_acuity_functions_return_required_schemas(self):
+        artifacts = self._build_core_artifacts()
+        race_gap = self.model.run_race_gap_analysis(
+            artifacts["mistrust_scores"],
+            artifacts["demographics"],
+        )
+        self.assertEqual(
+            race_gap.columns.tolist(),
+            [
+                "metric",
+                "n_black",
+                "n_white",
+                "median_black",
+                "median_white",
+                "median_gap_black_minus_white",
+                "statistic",
+                "pvalue",
+                "black_median_higher",
+            ],
+        )
+
+        race_treatment = self.model.run_race_based_treatment_analysis(
+            artifacts["eol_cohort"],
+            artifacts["treatment_totals"],
+        )
+        self.assertEqual(
+            race_treatment.columns.tolist(),
+            [
+                "treatment",
+                "n_black",
+                "n_white",
+                "median_black",
+                "median_white",
+                "median_gap_black_minus_white",
+                "statistic",
+                "pvalue",
+            ],
+        )
+
+        trust_treatment = self.model.run_trust_based_treatment_analysis(
+            artifacts["eol_cohort"],
+            artifacts["mistrust_scores"],
+            artifacts["treatment_totals"],
+            group_sizes={"total_vent_min": 1, "total_vaso_min": 1},
+        )
+        self.assertEqual(
+            trust_treatment.columns.tolist(),
+            [
+                "metric",
+                "treatment",
+                "stratification_n",
+                "n_high",
+                "n_low",
+                "median_high",
+                "median_low",
+                "median_gap",
+                "statistic",
+                "pvalue",
+            ],
+        )
+
+        acuity = self.model.run_acuity_control_analysis(
+            artifacts["mistrust_scores"],
+            artifacts["acuity_scores"],
+        )
+        self.assertEqual(
+            acuity.columns.tolist(),
+            ["feature_a", "feature_b", "correlation", "pvalue", "n"],
+        )
+        self.assertTrue(
+            ((acuity["feature_a"] == "oasis") & (acuity["feature_b"] == "sapsii")).any()
+            or ((acuity["feature_a"] == "sapsii") & (acuity["feature_b"] == "oasis")).any()
+        )
+
+    def test_model_run_trust_based_treatment_analysis_handles_invalid_n_and_exact_median_gap(self):
+        eol = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "WHITE"},
+                {"hadm_id": 2, "race": "BLACK"},
+                {"hadm_id": 3, "race": "BLACK"},
+            ]
+        )
+        scores = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.1},
+                {"hadm_id": 2, "noncompliance_score_z": 0.9},
+                {"hadm_id": 3, "noncompliance_score_z": 0.5},
+            ]
+        )
+        treatments = pd.DataFrame(
+            [
+                {"hadm_id": 1, "total_vent_min": 10.0},
+                {"hadm_id": 2, "total_vent_min": 40.0},
+                {"hadm_id": 3, "total_vent_min": 20.0},
+            ]
+        )
+        invalid = self.model.run_trust_based_treatment_analysis(
+            eol,
+            scores,
+            treatments,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=["total_vent_min"],
+            group_sizes={"total_vent_min": 0},
+        )
+        self.assertTrue(pd.isna(invalid.loc[0, "median_gap"]))
+
+        valid = self.model.run_trust_based_treatment_analysis(
+            eol,
+            scores,
+            treatments,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=["total_vent_min"],
+            group_sizes={"total_vent_min": 1},
+        )
+        self.assertEqual(float(valid.loc[0, "median_high"]), 40.0)
+        self.assertEqual(float(valid.loc[0, "median_low"]), 15.0)
+        self.assertEqual(float(valid.loc[0, "median_gap"]), 25.0)
+
+    def test_model_evaluate_downstream_predictions_returns_18_rows_and_respects_split_contract(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        splitter = _SplitRecorder()
+        auc_recorder = _AUCRecorder(0.8)
+        results = self.model.evaluate_downstream_predictions(
+            final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=splitter,
+            auc_fn=auc_recorder,
+            repetitions=3,
+        )
+        self.assertEqual(len(results), 18)
+        self.assertEqual(set(results["task"]), set(self.model.DOWNSTREAM_TASK_MAP.keys()))
+        self.assertEqual(set(results["configuration"]), set(self.model.DOWNSTREAM_FEATURE_CONFIGS.keys()))
+        self.assertTrue((results["n_repeats"] == 3).all())
+        self.assertEqual(splitter.calls[0]["test_size"], 0.4)
+        self.assertEqual(splitter.calls[0]["random_state"], 0)
+        self.assertTrue(all(abs(float(value) - 0.8) < 1e-9 for value in results["auc_mean"]))
+
+    def test_model_evaluate_downstream_predictions_drops_null_rows_before_splitting(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table.loc[0, "age"] = float("nan")
+        calls = []
+
+        def _splitter(X, y, test_size, random_state):
+            features = X.reset_index(drop=True)
+            labels = pd.Series(y).reset_index(drop=True)
+            calls.append(
+                {
+                    "n_rows": len(features),
+                    "test_size": test_size,
+                    "random_state": random_state,
+                }
+            )
+            return (
+                features.iloc[:3].copy(),
+                features.iloc[3:].copy(),
+                labels.iloc[:3].copy(),
+                labels.iloc[3:].copy(),
+            )
+
+        self.model.evaluate_downstream_predictions(
+            final_model_table,
+            task_map={"Left AMA": "left_ama"},
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_splitter,
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        self.assertEqual(calls[0]["n_rows"], 5)
+
+    def test_model_run_full_eol_mistrust_modeling_returns_expected_sections_and_aligned_outputs(self):
+        artifacts = self._build_core_artifacts()
+        outputs = self.model.run_full_eol_mistrust_modeling(
+            feature_matrix=artifacts["feature_matrix"],
+            note_labels=artifacts["note_labels"],
+            note_corpus=artifacts["note_corpus"],
+            demographics=artifacts["demographics"],
+            eol_cohort=artifacts["eol_cohort"],
+            treatment_totals=artifacts["treatment_totals"],
+            acuity_scores=artifacts["acuity_scores"],
+            final_model_table=artifacts["final_model_table"],
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        self.assertEqual(
+            set(outputs.keys()),
+            {
+                "mistrust_scores",
+                "feature_weight_summaries",
+                "race_gap_results",
+                "race_treatment_results",
+                "trust_treatment_results",
+                "acuity_correlations",
+                "downstream_auc_results",
+            },
+        )
+        self.assertEqual(
+            outputs["mistrust_scores"]["hadm_id"].tolist(),
+            artifacts["final_model_table"]["hadm_id"].tolist(),
+        )
+        self.assertEqual(len(outputs["downstream_auc_results"]), 18)
+
+    def test_model_run_full_eol_mistrust_modeling_returns_only_base_outputs_when_optional_inputs_absent(self):
+        artifacts = self._build_core_artifacts()
+        outputs = self.model.run_full_eol_mistrust_modeling(
+            feature_matrix=artifacts["feature_matrix"],
+            note_labels=artifacts["note_labels"],
+            note_corpus=artifacts["note_corpus"],
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            repetitions=1,
+        )
+        self.assertEqual(set(outputs.keys()), {"mistrust_scores", "feature_weight_summaries"})
+
+    def test_dataset_public_functions_raise_clear_errors_for_missing_required_columns(self):
+        with self.assertRaisesRegex(ValueError, "subject_id"):
+            self.dataset.build_base_admissions(
+                self.admissions,
+                self.patients.drop(columns=["subject_id"]),
+            )
+
+        with self.assertRaisesRegex(ValueError, "value"):
+            self.dataset.build_chartevent_feature_matrix(
+                self.chartevents.drop(columns=["value"]),
+                self.d_items,
+            )
+
+        artifacts = self._build_core_artifacts()
+        with self.assertRaisesRegex(ValueError, "noncompliance_score_z"):
+            self.dataset.build_final_model_table(
+                demographics=artifacts["demographics"],
+                all_cohort=artifacts["all_cohort"],
+                admissions=artifacts["base"],
+                chartevents=self.chartevents,
+                d_items=self.d_items,
+                mistrust_scores=artifacts["mistrust_scores"].drop(columns=["noncompliance_score_z"]),
+            )
+
+        raw_tables, materialized_views = self._build_valid_environment()
+        del raw_tables["admissions"]
+        with self.assertRaisesRegex(ValueError, "Missing required raw tables"):
+            self.dataset.validate_database_environment(raw_tables, materialized_views)
+
+    def test_dataset_empty_input_contracts_return_stable_schemas(self):
+        empty_notes = pd.DataFrame(columns=["hadm_id", "category", "text", "iserror"])
+        note_corpus = self.dataset.build_note_corpus(empty_notes, all_hadm_ids=[1, 2])
+        self.assertEqual(note_corpus.columns.tolist(), ["hadm_id", "note_text"])
+        self.assertEqual(note_corpus["hadm_id"].tolist(), [1, 2])
+        self.assertEqual(note_corpus["note_text"].tolist(), ["", ""])
+
+        note_labels = self.dataset.build_note_labels(empty_notes, all_hadm_ids=[1, 2])
+        self.assertEqual(
+            note_labels.columns.tolist(),
+            ["hadm_id", "noncompliance_label", "autopsy_label"],
+        )
+        self.assertTrue((note_labels["noncompliance_label"] == 0).all())
+        self.assertTrue((note_labels["autopsy_label"] == 0).all())
+
+        empty_treatments = self.dataset.build_treatment_totals(
+            self.icustays,
+            pd.DataFrame(columns=["icustay_id", "ventnum", "starttime", "endtime", "duration_hours"]),
+            pd.DataFrame(columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"]),
+        )
+        self.assertEqual(
+            empty_treatments.columns.tolist(),
+            ["hadm_id", "total_vent_min", "total_vaso_min"],
+        )
+        self.assertTrue(empty_treatments.empty)
+
+        empty_acuity = self.dataset.build_acuity_scores(
+            pd.DataFrame(columns=["hadm_id", "icustay_id", "oasis"]),
+            pd.DataFrame(columns=["hadm_id", "icustay_id", "sapsii"]),
+        )
+        self.assertEqual(empty_acuity.columns.tolist(), ["hadm_id", "oasis", "sapsii"])
+        self.assertTrue(empty_acuity.empty)
+
+    def test_dataset_private_span_merge_helper_covers_empty_single_overlap_and_gap_boundaries(self):
+        empty = pd.DataFrame(columns=["starttime", "endtime"])
+        self.assertEqual(self.dataset._merge_spans_for_hadm(empty), 0.0)
+
+        single = pd.DataFrame(
+            [{"starttime": pd.Timestamp("2100-01-01 00:00:00"), "endtime": pd.Timestamp("2100-01-01 01:00:00")}]
+        )
+        self.assertEqual(self.dataset._merge_spans_for_hadm(single), 60.0)
+
+        overlap = pd.DataFrame(
+            [
+                {"starttime": pd.Timestamp("2100-01-01 00:00:00"), "endtime": pd.Timestamp("2100-01-01 01:00:00")},
+                {"starttime": pd.Timestamp("2100-01-01 00:30:00"), "endtime": pd.Timestamp("2100-01-01 01:30:00")},
+            ]
+        )
+        self.assertEqual(self.dataset._merge_spans_for_hadm(overlap), 90.0)
+
+        boundary = pd.DataFrame(
+            [
+                {"starttime": pd.Timestamp("2100-01-01 00:00:00"), "endtime": pd.Timestamp("2100-01-01 01:00:00")},
+                {"starttime": pd.Timestamp("2100-01-01 11:00:00"), "endtime": pd.Timestamp("2100-01-01 12:00:00")},
+                {"starttime": pd.Timestamp("2100-01-01 22:01:00"), "endtime": pd.Timestamp("2100-01-01 23:01:00")},
+            ]
+        )
+        self.assertEqual(self.dataset._merge_spans_for_hadm(boundary), 780.0)
+
+    def test_dataset_build_final_model_table_schema_toggles_include_race_and_mistrust(self):
+        artifacts = self._build_core_artifacts()
+        baseline_only = self.dataset.build_final_model_table(
+            demographics=artifacts["demographics"],
+            all_cohort=artifacts["all_cohort"],
+            admissions=artifacts["base"],
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=artifacts["mistrust_scores"],
+            include_race=False,
+            include_mistrust=False,
+        )
+        self.assertEqual(
+            baseline_only.columns.tolist(),
+            [
+                "hadm_id",
+                "age",
+                "los_days",
+                "gender_f",
+                "gender_m",
+                "insurance_private",
+                "insurance_public",
+                "insurance_self_pay",
+                "left_ama",
+                "in_hospital_mortality",
+                "code_status_dnr_dni_cmo",
+            ],
+        )
+
+        race_only = self.dataset.build_final_model_table(
+            demographics=artifacts["demographics"],
+            all_cohort=artifacts["all_cohort"],
+            admissions=artifacts["base"],
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=artifacts["mistrust_scores"],
+            include_race=True,
+            include_mistrust=False,
+        )
+        self.assertTrue(all(column in race_only.columns for column in self.model.RACE_FEATURE_COLUMNS))
+        self.assertFalse(any(column in race_only.columns for column in self.model.MISTRUST_SCORE_COLUMNS))
+
+    def test_dataset_function_outputs_are_deterministic_across_repeated_runs(self):
+        base_one = self.dataset.build_base_admissions(self.admissions, self.patients)
+        base_two = self.dataset.build_base_admissions(self.admissions, self.patients)
+        pd.testing.assert_frame_equal(base_one, base_two)
+
+        labels_one = self.dataset.build_note_labels(self.noteevents, all_hadm_ids=[101, 102, 103, 104, 105, 106])
+        labels_two = self.dataset.build_note_labels(self.noteevents, all_hadm_ids=[101, 102, 103, 104, 105, 106])
+        pd.testing.assert_frame_equal(labels_one, labels_two)
+
+        matrix_one = self.dataset.build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            all_hadm_ids=[101, 102, 103, 104, 105, 106],
+        )
+        matrix_two = self.dataset.build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            all_hadm_ids=[101, 102, 103, 104, 105, 106],
+        )
+        pd.testing.assert_frame_equal(matrix_one, matrix_two)
+
+    def test_model_private_metric_helpers_cover_empty_nan_and_small_sample_edges(self):
+        statistic, pvalue, med_left, med_right, n_left, n_right = self.model._make_metric_result(
+            pd.Series([], dtype=float),
+            pd.Series([1.0, 2.0], dtype=float),
+        )
+        self.assertTrue(pd.isna(statistic))
+        self.assertTrue(pd.isna(pvalue))
+        self.assertTrue(pd.isna(med_left))
+        self.assertTrue(pd.isna(med_right))
+        self.assertEqual((n_left, n_right), (0, 2))
+
+        corr, corr_pvalue, n = self.model._pearson_with_pvalue(
+            pd.Series([1.0]),
+            pd.Series([2.0]),
+        )
+        self.assertTrue(pd.isna(corr))
+        self.assertTrue(pd.isna(corr_pvalue))
+        self.assertEqual(n, 1)
+
+        corr_nan, corr_nan_pvalue, n_nan = self.model._pearson_with_pvalue(
+            pd.Series([float("nan"), float("nan")]),
+            pd.Series([1.0, 2.0]),
+        )
+        self.assertTrue(pd.isna(corr_nan))
+        self.assertTrue(pd.isna(corr_nan_pvalue))
+        self.assertEqual(n_nan, 0)
+
+    def test_model_public_functions_raise_clear_errors_for_missing_required_columns(self):
+        with self.assertRaisesRegex(ValueError, "note_text"):
+            self.model.build_negative_sentiment_mistrust_scores(
+                pd.DataFrame([{"hadm_id": 1, "text": "oops"}]),
+                sentiment_fn=self._sentiment_fn,
+            )
+
+        with self.assertRaisesRegex(ValueError, "race"):
+            self.model.run_race_gap_analysis(
+                pd.DataFrame([{"hadm_id": 1, "noncompliance_score_z": 0.1}]),
+                pd.DataFrame([{"hadm_id": 1}]),
+                score_columns=["noncompliance_score_z"],
+            )
+
+        with self.assertRaisesRegex(ValueError, "left_ama"):
+            self.model.evaluate_downstream_predictions(
+                self._build_core_artifacts()["final_model_table"].drop(columns=["left_ama"]),
+                task_map={"Left AMA": "left_ama"},
+                feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+                repetitions=1,
+            )
+
+    def test_model_public_functions_propagate_estimator_errors(self):
+        artifacts = self._build_core_artifacts()
+
+        class _FitFailureEstimator:
+            def fit(self, X, y):
+                del X, y
+                raise RuntimeError("fit failed")
+
+        with self.assertRaisesRegex(RuntimeError, "fit failed"):
+            self.model.build_proxy_probability_scores(
+                artifacts["feature_matrix"],
+                artifacts["note_labels"],
+                "noncompliance_label",
+                estimator_factory=lambda: _FitFailureEstimator(),
+            )
+
+        n_features = len(self.model.BASELINE_FEATURE_COLUMNS)
+
+        class _PredictFailureEstimator:
+            def fit(self, X, y):
+                del X, y
+                self.coef_ = [[0.1] * n_features]
+                return self
+
+            def predict_proba(self, X):
+                del X
+                raise RuntimeError("predict failed")
+
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        with self.assertRaisesRegex(RuntimeError, "predict failed"):
+            self.model.evaluate_downstream_predictions(
+                final_model_table,
+                task_map={"Left AMA": "left_ama"},
+                feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+                estimator_factory=lambda: _PredictFailureEstimator(),
+                split_fn=_SplitRecorder(),
+                repetitions=1,
+            )
+
+    def test_duplicate_key_contracts_are_enforced_or_deduplicated_as_expected(self):
+        duplicate_patients = pd.concat([self.patients, self.patients.iloc[[0]]], ignore_index=True)
+        with self.assertRaises(Exception):
+            self.dataset.build_base_admissions(self.admissions, duplicate_patients)
+
+        artifacts = self._build_core_artifacts()
+        duplicate_features = pd.concat(
+            [artifacts["feature_matrix"], artifacts["feature_matrix"].iloc[[0]]],
+            ignore_index=True,
+        )
+        with self.assertRaises(Exception):
+            self.model.build_proxy_probability_scores(
+                duplicate_features,
+                artifacts["note_labels"],
+                "noncompliance_label",
+                estimator_factory=lambda: _FakeProbEstimator([0.1] * (len(duplicate_features))),
+            )
+
+    def test_model_function_outputs_are_deterministic_across_repeated_runs(self):
+        artifacts = self._build_core_artifacts()
+        scores_one = self.model.build_mistrust_score_table(
+            artifacts["feature_matrix"],
+            artifacts["note_labels"],
+            artifacts["note_corpus"],
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        scores_two = self.model.build_mistrust_score_table(
+            artifacts["feature_matrix"],
+            artifacts["note_labels"],
+            artifacts["note_corpus"],
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        pd.testing.assert_frame_equal(scores_one, scores_two)
+
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        downstream_one = self.model.evaluate_downstream_predictions(
+            final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.77),
+            repetitions=2,
+        )
+        downstream_two = self.model.evaluate_downstream_predictions(
+            final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.77),
+            repetitions=2,
+        )
+        pd.testing.assert_frame_equal(downstream_one, downstream_two)
+
+    def test_integration_end_to_end_pipeline_runs_from_dataset_sources_to_model_outputs(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        outputs = self.model.run_full_eol_mistrust_modeling(
+            feature_matrix=artifacts["feature_matrix"],
+            note_labels=artifacts["note_labels"],
+            note_corpus=artifacts["note_corpus"],
+            demographics=artifacts["demographics"],
+            eol_cohort=artifacts["eol_cohort"],
+            treatment_totals=artifacts["treatment_totals"],
+            acuity_scores=artifacts["acuity_scores"],
+            final_model_table=final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        self.assertEqual(outputs["mistrust_scores"]["hadm_id"].tolist(), [101, 102, 103, 104, 105, 106])
+        self.assertEqual(outputs["downstream_auc_results"].shape[0], 18)
+
+    def test_integration_stage_to_stage_contracts_accept_upstream_outputs_without_translation(self):
+        base = self.dataset.build_base_admissions(self.admissions, self.patients)
+        demographics = self.dataset.build_demographics_table(base)
+        all_cohort = self.dataset.build_all_cohort(base, self.icustays)
+        feature_matrix = self.dataset.build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            all_hadm_ids=all_cohort["hadm_id"].tolist(),
+        )
+        note_labels = self.dataset.build_note_labels(self.noteevents, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        note_corpus = self.dataset.build_note_corpus(self.noteevents, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        mistrust_scores = self.model.build_mistrust_score_table(
+            feature_matrix,
+            note_labels,
+            note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        final_model_table = self.dataset.build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+        results = self.model.evaluate_downstream_predictions(
+            final_model_table.assign(
+                left_ama=[0, 1, 0, 1, 0, 1],
+                code_status_dnr_dni_cmo=[1, 0, 1, 0, 1, 0],
+                in_hospital_mortality=[0, 1, 0, 1, 0, 1],
+            ),
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.6),
+            repetitions=1,
+        )
+        self.assertEqual(results.shape[0], 18)
+
+    def test_integration_optional_input_permutations_return_expected_output_sections(self):
+        artifacts = self._build_core_artifacts()
+        base_kwargs = {
+            "feature_matrix": artifacts["feature_matrix"],
+            "note_labels": artifacts["note_labels"],
+            "note_corpus": artifacts["note_corpus"],
+            "estimator_factory": lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            "sentiment_fn": self._sentiment_fn,
+            "repetitions": 1,
+        }
+        only_required = self.model.run_full_eol_mistrust_modeling(**base_kwargs)
+        self.assertEqual(set(only_required.keys()), {"mistrust_scores", "feature_weight_summaries"})
+
+        with_demo = self.model.run_full_eol_mistrust_modeling(
+            **base_kwargs,
+            demographics=artifacts["demographics"],
+        )
+        self.assertIn("race_gap_results", with_demo)
+
+        with_treatment = self.model.run_full_eol_mistrust_modeling(
+            **base_kwargs,
+            eol_cohort=artifacts["eol_cohort"],
+            treatment_totals=artifacts["treatment_totals"],
+        )
+        self.assertTrue({"race_treatment_results", "trust_treatment_results"}.issubset(with_treatment))
+
+        with_acuity = self.model.run_full_eol_mistrust_modeling(
+            **base_kwargs,
+            acuity_scores=artifacts["acuity_scores"],
+        )
+        self.assertIn("acuity_correlations", with_acuity)
+
+    def test_integration_data_alignment_preserves_shared_hadm_ids_across_artifacts(self):
+        artifacts = self._build_core_artifacts()
+        all_ids = set(artifacts["all_cohort"]["hadm_id"])
+        self.assertEqual(set(artifacts["feature_matrix"]["hadm_id"]), all_ids)
+        self.assertEqual(set(artifacts["note_labels"]["hadm_id"]), all_ids)
+        self.assertEqual(set(artifacts["note_corpus"]["hadm_id"]), all_ids)
+        self.assertEqual(set(artifacts["mistrust_scores"]["hadm_id"]), all_ids)
+        self.assertEqual(set(artifacts["final_model_table"]["hadm_id"]), all_ids)
+        self.assertTrue(set(artifacts["eol_cohort"]["hadm_id"]).issubset(all_ids))
+
+    def test_integration_ordering_shuffle_invariance_produces_identical_outputs(self):
+        original = self._build_core_artifacts()
+        self.admissions = self.admissions.sample(frac=1.0, random_state=1).reset_index(drop=True)
+        self.patients = self.patients.sample(frac=1.0, random_state=2).reset_index(drop=True)
+        self.icustays = self.icustays.sample(frac=1.0, random_state=3).reset_index(drop=True)
+        self.chartevents = self.chartevents.sample(frac=1.0, random_state=4).reset_index(drop=True)
+        self.noteevents = self.noteevents.sample(frac=1.0, random_state=5).reset_index(drop=True)
+        shuffled = self._build_core_artifacts()
+        for key in ("base", "demographics", "all_cohort", "eol_cohort", "feature_matrix", "note_labels", "note_corpus", "treatment_totals", "acuity_scores", "mistrust_scores", "final_model_table"):
+            pd.testing.assert_frame_equal(original[key], shuffled[key])
+
+    def test_integration_controlled_nulls_only_reduce_affected_stage_rows(self):
+        artifacts = self._build_core_artifacts()
+        note_corpus = artifacts["note_corpus"].copy()
+        note_corpus.loc[note_corpus["hadm_id"] == 101, "note_text"] = None
+        scores = self.model.build_mistrust_score_table(
+            artifacts["feature_matrix"],
+            artifacts["note_labels"],
+            note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        self.assertEqual(len(scores), 6)
+
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table.loc[0, "age"] = float("nan")
+        results = self.model.evaluate_downstream_predictions(
+            final_model_table,
+            task_map={"Left AMA": "left_ama"},
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=(lambda X, y, test_size, random_state: (X.iloc[:3], X.iloc[3:], pd.Series(y).iloc[:3], pd.Series(y).iloc[3:])),
+            auc_fn=_AUCRecorder(0.5),
+            repetitions=1,
+        )
+        self.assertEqual(int(results.loc[0, "n_rows"]), 5)
+
+    def test_integration_duplicate_cardinality_violation_fails_at_join_boundary(self):
+        artifacts = self._build_core_artifacts()
+        duplicate_labels = pd.concat(
+            [artifacts["note_labels"], artifacts["note_labels"].iloc[[0]]],
+            ignore_index=True,
+        )
+        with self.assertRaises(Exception):
+            self.model.build_mistrust_score_table(
+                artifacts["feature_matrix"],
+                duplicate_labels,
+                artifacts["note_corpus"],
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+                sentiment_fn=self._sentiment_fn,
+            )
+
+    def test_integration_write_read_round_trip_artifacts_remain_consumable(self):
+        deliverables = self._build_deliverable_artifacts()
+        with tempfile.TemporaryDirectory() as tmpdir:
+            self.dataset.write_minimal_deliverables(deliverables, tmpdir)
+            final_model_table = pd.read_csv(Path(tmpdir) / "final_model_table.csv")
+            mistrust_scores = pd.read_csv(Path(tmpdir) / "mistrust_scores.csv")
+            acuity_scores = pd.read_csv(Path(tmpdir) / "acuity_scores.csv")
+
+            final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+            final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+            final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+
+            downstream = self.model.evaluate_downstream_predictions(
+                final_model_table,
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+                split_fn=_SplitRecorder(),
+                auc_fn=_AUCRecorder(0.55),
+                repetitions=1,
+            )
+            acuity = self.model.run_acuity_control_analysis(mistrust_scores, acuity_scores)
+            self.assertEqual(downstream.shape[0], 18)
+            self.assertEqual(acuity.shape[1], 5)
+
+    def test_integration_cross_component_dataset_outputs_feed_model_without_translation(self):
+        artifacts = self._build_core_artifacts()
+        race_gap = self.model.run_race_gap_analysis(artifacts["mistrust_scores"], artifacts["demographics"])
+        acuity = self.model.run_acuity_control_analysis(artifacts["mistrust_scores"], artifacts["acuity_scores"])
+        self.assertEqual(set(race_gap["metric"]), set(self.model.MISTRUST_SCORE_COLUMNS))
+        self.assertTrue((acuity["n"] >= 2).all())
+
+    def test_integration_configuration_variants_are_consumable_with_matching_feature_sets(self):
+        artifacts = self._build_core_artifacts()
+        base = artifacts["base"]
+        demographics = artifacts["demographics"]
+        all_cohort = artifacts["all_cohort"]
+        mistrust_scores = artifacts["mistrust_scores"]
+
+        tables = {
+            "baseline": (
+                self.dataset.build_final_model_table(
+                    demographics, all_cohort, base, self.chartevents, self.d_items, mistrust_scores, include_race=False, include_mistrust=False
+                ),
+                {"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            ),
+            "race": (
+                self.dataset.build_final_model_table(
+                    demographics, all_cohort, base, self.chartevents, self.d_items, mistrust_scores, include_race=True, include_mistrust=False
+                ),
+                {"Baseline + Race": self.model.BASELINE_FEATURE_COLUMNS + self.model.RACE_FEATURE_COLUMNS},
+            ),
+            "mistrust": (
+                self.dataset.build_final_model_table(
+                    demographics, all_cohort, base, self.chartevents, self.d_items, mistrust_scores, include_race=False, include_mistrust=True
+                ),
+                {"Baseline + ALL": self.model.BASELINE_FEATURE_COLUMNS + self.model.MISTRUST_SCORE_COLUMNS},
+            ),
+        }
+        for table, configs in tables.values():
+            table = table.copy()
+            table["left_ama"] = [0, 1, 0, 1, 0, 1]
+            results = self.model.evaluate_downstream_predictions(
+                table,
+                feature_configurations=configs,
+                task_map={"Left AMA": "left_ama"},
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+                split_fn=_SplitRecorder(),
+                auc_fn=_AUCRecorder(0.66),
+                repetitions=1,
+            )
+            self.assertEqual(results.shape[0], 1)
+
+    def test_integration_broken_intermediate_artifact_propagates_clear_error(self):
+        artifacts = self._build_core_artifacts()
+        broken_feature_matrix = artifacts["feature_matrix"].drop(columns=["hadm_id"])
+        with self.assertRaisesRegex(ValueError, "hadm_id"):
+            self.model.run_full_eol_mistrust_modeling(
+                feature_matrix=broken_feature_matrix,
+                note_labels=artifacts["note_labels"],
+                note_corpus=artifacts["note_corpus"],
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+                sentiment_fn=self._sentiment_fn,
+                repetitions=1,
+            )
+
+    def test_integration_full_pipeline_is_reproducible_across_repeated_runs(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        kwargs = {
+            "feature_matrix": artifacts["feature_matrix"],
+            "note_labels": artifacts["note_labels"],
+            "note_corpus": artifacts["note_corpus"],
+            "demographics": artifacts["demographics"],
+            "eol_cohort": artifacts["eol_cohort"],
+            "treatment_totals": artifacts["treatment_totals"],
+            "acuity_scores": artifacts["acuity_scores"],
+            "final_model_table": final_model_table,
+            "estimator_factory": lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            "sentiment_fn": self._sentiment_fn,
+            "split_fn": _SplitRecorder(),
+            "auc_fn": _AUCRecorder(0.61),
+            "repetitions": 1,
+        }
+        first = self.model.run_full_eol_mistrust_modeling(**kwargs)
+        second = self.model.run_full_eol_mistrust_modeling(**kwargs)
+        for key in first:
+            if isinstance(first[key], dict):
+                for inner_key in first[key]:
+                    if isinstance(first[key][inner_key], dict):
+                        for leaf_key in first[key][inner_key]:
+                            pd.testing.assert_frame_equal(
+                                first[key][inner_key][leaf_key],
+                                second[key][inner_key][leaf_key],
+                            )
+                    else:
+                        pd.testing.assert_frame_equal(first[key][inner_key], second[key][inner_key])
+            else:
+                pd.testing.assert_frame_equal(first[key], second[key])
+
+    def test_integration_extra_nonbreaking_columns_do_not_change_results(self):
+        original = self._build_core_artifacts()
+        self.admissions["unused_admissions_col"] = "x"
+        self.patients["unused_patients_col"] = "y"
+        self.icustays["unused_icu_col"] = "z"
+        self.chartevents["unused_event_col"] = "q"
+        self.noteevents["unused_note_col"] = "r"
+        with_extra = self._build_core_artifacts()
+        pd.testing.assert_frame_equal(
+            original["demographics"],
+            with_extra["demographics"],
+        )
+        self.assertTrue(set(original["all_cohort"].columns).issubset(with_extra["all_cohort"].columns))
+        pd.testing.assert_frame_equal(
+            original["all_cohort"][original["all_cohort"].columns],
+            with_extra["all_cohort"][original["all_cohort"].columns],
+        )
+        pd.testing.assert_frame_equal(original["feature_matrix"], with_extra["feature_matrix"])
+        pd.testing.assert_frame_equal(original["note_labels"], with_extra["note_labels"])
+        pd.testing.assert_frame_equal(original["note_corpus"], with_extra["note_corpus"])
+        pd.testing.assert_frame_equal(original["mistrust_scores"], with_extra["mistrust_scores"])
+        self.assertTrue(set(original["base"].columns).issubset(with_extra["base"].columns))
+        pd.testing.assert_frame_equal(
+            original["base"][original["base"].columns],
+            with_extra["base"][original["base"].columns],
+        )
+        pd.testing.assert_frame_equal(
+            original["final_model_table"],
+            with_extra["final_model_table"],
+        )
+
+    def test_integration_package_import_and_direct_load_modules_are_compatible(self):
+        dataset_pkg = importlib.import_module("pyhealth.datasets.eol_mistrust")
+        model_pkg = importlib.import_module("pyhealth.models.eol_mistrust")
+        self.assertTrue(callable(dataset_pkg.build_final_model_table))
+        self.assertTrue(callable(model_pkg.run_full_eol_mistrust_modeling))
+        self.assertEqual(model_pkg.MISTRUST_SCORE_COLUMNS, self.model.MISTRUST_SCORE_COLUMNS)
+
+    def test_integration_minimal_boundary_scale_pipeline_runs_with_two_admissions(self):
+        admissions = pd.DataFrame(
+            [
+                {"hadm_id": 1, "subject_id": 10, "admittime": "2100-01-01 00:00:00", "dischtime": "2100-01-02 00:00:00", "ethnicity": "WHITE", "insurance": "Medicare", "discharge_location": "HOME", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+                {"hadm_id": 2, "subject_id": 11, "admittime": "2100-01-02 00:00:00", "dischtime": "2100-01-03 00:00:00", "ethnicity": "BLACK/AFRICAN AMERICAN", "insurance": "Private", "discharge_location": "SNF", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 10, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 11, "gender": "F", "dob": "2070-01-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {"hadm_id": 1, "icustay_id": 1, "intime": "2100-01-01 00:00:00", "outtime": "2100-01-01 12:00:00"},
+                {"hadm_id": 2, "icustay_id": 2, "intime": "2100-01-02 00:00:00", "outtime": "2100-01-02 12:00:00"},
+            ]
+        )
+        d_items = pd.DataFrame([{"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"}])
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 1, "itemid": 1, "value": "No", "icustay_id": 1},
+                {"hadm_id": 2, "itemid": 1, "value": "Yes", "icustay_id": 2},
+            ]
+        )
+        noteevents = pd.DataFrame(
+            [
+                {"hadm_id": 1, "category": "Nursing", "text": "noncompliant", "iserror": None},
+                {"hadm_id": 2, "category": "Nursing", "text": "autopsy", "iserror": None},
+            ]
+        )
+        base = self.dataset.build_base_admissions(admissions, patients)
+        all_cohort = self.dataset.build_all_cohort(base, icustays)
+        feature_matrix = self.dataset.build_chartevent_feature_matrix(chartevents, d_items, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        note_labels = self.dataset.build_note_labels(noteevents, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        note_corpus = self.dataset.build_note_corpus(noteevents, all_hadm_ids=all_cohort["hadm_id"].tolist())
+        scores = self.model.build_mistrust_score_table(
+            feature_matrix,
+            note_labels,
+            note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.2, 0.8]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        self.assertEqual(scores["hadm_id"].tolist(), [1, 2])
+
+    def test_integration_outputs_are_consumable_by_simple_consumer_operations(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        outputs = self.model.run_full_eol_mistrust_modeling(
+            feature_matrix=artifacts["feature_matrix"],
+            note_labels=artifacts["note_labels"],
+            note_corpus=artifacts["note_corpus"],
+            demographics=artifacts["demographics"],
+            eol_cohort=artifacts["eol_cohort"],
+            treatment_totals=artifacts["treatment_totals"],
+            acuity_scores=artifacts["acuity_scores"],
+            final_model_table=final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        grouped = outputs["downstream_auc_results"].groupby("task").size().to_dict()
+        indexed = outputs["mistrust_scores"].set_index("hadm_id")
+        trust_counts = outputs["trust_treatment_results"].groupby("treatment").size().to_dict()
+        self.assertEqual(grouped, {task: 6 for task in self.model.DOWNSTREAM_TASK_MAP.keys()})
+        self.assertEqual(indexed.index.tolist(), [101, 102, 103, 104, 105, 106])
+        self.assertEqual(
+            trust_counts,
+            {"total_vent_min": 3, "total_vaso_min": 3},
+        )
+
+    def test_integration_resume_from_existing_artifact_directory_is_idempotent(self):
+        deliverables = self._build_deliverable_artifacts()
+        with tempfile.TemporaryDirectory() as tmpdir:
+            self.dataset.write_minimal_deliverables(deliverables, tmpdir)
+            first_contents = {
+                path.name: path.read_text()
+                for path in Path(tmpdir).glob("*.csv")
+            }
+            self.dataset.write_minimal_deliverables(deliverables, tmpdir)
+            second_contents = {
+                path.name: path.read_text()
+                for path in Path(tmpdir).glob("*.csv")
+            }
+        self.assertEqual(first_contents, second_contents)
+
+    def test_integration_write_side_effects_do_not_mutate_in_memory_artifacts(self):
+        deliverables = self._build_deliverable_artifacts()
+        before = {key: value.copy(deep=True) for key, value in deliverables.items()}
+        with tempfile.TemporaryDirectory() as tmpdir:
+            self.dataset.write_minimal_deliverables(deliverables, tmpdir)
+        for key in deliverables:
+            pd.testing.assert_frame_equal(deliverables[key], before[key])
+
+    def test_integration_multi_output_results_are_internally_consistent(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        outputs = self.model.run_full_eol_mistrust_modeling(
+            feature_matrix=artifacts["feature_matrix"],
+            note_labels=artifacts["note_labels"],
+            note_corpus=artifacts["note_corpus"],
+            demographics=artifacts["demographics"],
+            eol_cohort=artifacts["eol_cohort"],
+            treatment_totals=artifacts["treatment_totals"],
+            acuity_scores=artifacts["acuity_scores"],
+            final_model_table=final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        self.assertEqual(set(outputs["race_gap_results"]["metric"]), set(self.model.MISTRUST_SCORE_COLUMNS))
+        self.assertEqual(set(outputs["trust_treatment_results"]["metric"]), set(self.model.MISTRUST_SCORE_COLUMNS))
+        self.assertEqual(set(outputs["downstream_auc_results"]["configuration"]), set(self.model.DOWNSTREAM_FEATURE_CONFIGS.keys()))
+        self.assertEqual(set(outputs["feature_weight_summaries"].keys()), {"noncompliance", "autopsy"})
+
+    def test_integration_fixed_golden_workflow_matches_expected_snapshot(self):
+        artifacts = self._build_core_artifacts()
+        final_model_table = artifacts["final_model_table"].copy()
+        final_model_table["left_ama"] = [0, 1, 0, 1, 0, 1]
+        final_model_table["code_status_dnr_dni_cmo"] = [1, 0, 1, 0, 1, 0]
+        final_model_table["in_hospital_mortality"] = [0, 1, 0, 1, 0, 1]
+        downstream = self.model.evaluate_downstream_predictions(
+            final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        snapshot = {
+            "all_hadm_ids": artifacts["all_cohort"]["hadm_id"].tolist(),
+            "eol_hadm_ids": artifacts["eol_cohort"]["hadm_id"].tolist(),
+            "mistrust_first_row": {
+                "hadm_id": int(artifacts["mistrust_scores"].iloc[0]["hadm_id"]),
+                "noncompliance_score_z": round(float(artifacts["mistrust_scores"].iloc[0]["noncompliance_score_z"]), 6),
+                "autopsy_score_z": round(float(artifacts["mistrust_scores"].iloc[0]["autopsy_score_z"]), 6),
+                "negative_sentiment_score_z": round(float(artifacts["mistrust_scores"].iloc[0]["negative_sentiment_score_z"]), 6),
+            },
+            "downstream_rows": int(len(downstream)),
+            "downstream_first": {
+                "task": downstream.iloc[0]["task"],
+                "configuration": downstream.iloc[0]["configuration"],
+                "auc_mean": round(float(downstream.iloc[0]["auc_mean"]), 6),
+            },
+        }
+        self.assertEqual(
+            snapshot,
+            {
+                "all_hadm_ids": [101, 102, 103, 104, 105, 106],
+                "eol_hadm_ids": [103, 104],
+                "mistrust_first_row": {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -1.511858,
+                    "autopsy_score_z": -1.511858,
+                    "negative_sentiment_score_z": 1.414214,
+                },
+                "downstream_rows": 18,
+                "downstream_first": {
+                    "task": "Left AMA",
+                    "configuration": "Baseline",
+                    "auc_mean": 0.7,
+                },
+            },
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/core/test_eol_mistrust_TrainingAndEvaluation.py b/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
new file mode 100644
index 000000000..5157d84f0
--- /dev/null
+++ b/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
@@ -0,0 +1,1375 @@
+import importlib.util
+import unittest
+import warnings
+from pathlib import Path
+from unittest.mock import patch
+
+import numpy as np
+import pandas as pd
+from pandas.errors import MergeError
+
+try:
+    from sklearn.exceptions import ConvergenceWarning  # type: ignore
+except ModuleNotFoundError:  # pragma: no cover
+    ConvergenceWarning = None
+
+
+def _load_dataset_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "datasets" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_training_eval_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def _load_model_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "models" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.models.eol_mistrust_training_eval_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+class _RecordingProbEstimator:
+    def __init__(self, probabilities, coef_values=None):
+        self.probabilities = list(probabilities)
+        self.coef_values = coef_values
+        self.fit_X = None
+        self.fit_y = None
+        self.predicted_shape = None
+        self.coef_ = None
+
+    def fit(self, X, y):
+        self.fit_X = X.copy()
+        self.fit_y = pd.Series(y).copy()
+        if self.coef_values is None:
+            self.coef_ = [[0.1] * X.shape[1]]
+        else:
+            self.coef_ = [list(self.coef_values)]
+        return self
+
+    def predict_proba(self, X):
+        probs = self.probabilities[: len(X)]
+        matrix = [[1.0 - prob, prob] for prob in probs]
+        self.predicted_shape = (len(matrix), len(matrix[0]) if matrix else 0)
+        return matrix
+
+
+class _EstimatorFactorySequence:
+    def __init__(self, estimator_builders):
+        self.estimator_builders = list(estimator_builders)
+        self.created = []
+
+    def __call__(self):
+        index = min(len(self.created), len(self.estimator_builders) - 1)
+        estimator = self.estimator_builders[index]()
+        self.created.append(estimator)
+        return estimator
+
+
+class _SplitRecorder:
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, X, y, test_size, random_state):
+        features = X.reset_index(drop=True)
+        labels = pd.Series(y).reset_index(drop=True)
+        n_rows = len(features)
+        n_test = max(1, int(round(n_rows * test_size)))
+        n_train = max(1, n_rows - n_test)
+        if n_train + n_test > n_rows:
+            n_test = n_rows - n_train
+        if n_test == 0:
+            n_test = 1
+            n_train = max(0, n_rows - 1)
+
+        self.calls.append(
+            {
+                "random_state": random_state,
+                "test_size": test_size,
+                "n_rows": n_rows,
+                "n_train": n_train,
+                "n_test": n_test,
+                "train_indices": list(range(n_train)),
+                "test_indices": list(range(n_train, n_train + n_test)),
+                "train_hadm_ids": list(features.iloc[:n_train]["hadm_id"]) if "hadm_id" in features.columns else [],
+                "test_hadm_ids": list(features.iloc[n_train : n_train + n_test]["hadm_id"]) if "hadm_id" in features.columns else [],
+            }
+        )
+
+        return (
+            features.iloc[:n_train].copy(),
+            features.iloc[n_train : n_train + n_test].copy(),
+            labels.iloc[:n_train].copy(),
+            labels.iloc[n_train : n_train + n_test].copy(),
+        )
+
+
+class _AUCRecorder:
+    def __init__(self, value=0.75):
+        self.value = float(value)
+        self.calls = []
+
+    def __call__(self, y_true, y_prob):
+        self.calls.append(
+            {
+                "y_true": list(pd.Series(y_true)),
+                "y_prob": list(pd.Series(y_prob)),
+                "value": self.value,
+            }
+        )
+        return self.value
+
+
+class _AUCSequenceRecorder:
+    def __init__(self, values):
+        self.values = [float(value) for value in values]
+        self.calls = []
+
+    def __call__(self, y_true, y_prob):
+        index = len(self.calls)
+        value = self.values[index]
+        self.calls.append(
+            {
+                "y_true": list(pd.Series(y_true)),
+                "y_prob": list(pd.Series(y_prob)),
+                "value": value,
+            }
+        )
+        return value
+
+
+class _DeterministicSplitRecorder:
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, X, y, test_size, random_state):
+        features = X.reset_index(drop=True)
+        labels = pd.Series(y).reset_index(drop=True)
+        rng = np.random.RandomState(random_state)
+        indices = np.arange(len(features))
+        if len(indices) > 0:
+            rng.shuffle(indices)
+        n_test = max(1, int(round(len(indices) * test_size))) if len(indices) else 0
+        n_train = max(0, len(indices) - n_test)
+        train_idx = indices[:n_train]
+        test_idx = indices[n_train:]
+        train_features = features.iloc[train_idx].reset_index(drop=True)
+        test_features = features.iloc[test_idx].reset_index(drop=True)
+        train_labels = labels.iloc[train_idx].reset_index(drop=True)
+        test_labels = labels.iloc[test_idx].reset_index(drop=True)
+        self.calls.append(
+            {
+                "random_state": random_state,
+                "train_indices": list(map(int, train_idx)),
+                "test_indices": list(map(int, test_idx)),
+                "train_hadm_ids": list(train_features["hadm_id"]) if "hadm_id" in train_features.columns else [],
+                "test_hadm_ids": list(test_features["hadm_id"]) if "hadm_id" in test_features.columns else [],
+                "n_rows": len(features),
+                "n_train": len(train_features),
+                "n_test": len(test_features),
+            }
+        )
+        return train_features, test_features, train_labels, test_labels
+
+
+class _FailingEstimator:
+    def fit(self, X, y):
+        del X, y
+        raise RuntimeError("estimator fit failed")
+
+
+class TestEOLMistrustTrainingAndEvaluation(unittest.TestCase):
+    """Synthetic training/evaluation contract tests for the EOL mistrust workflow."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.dataset = _load_dataset_module()
+        cls.model = _load_model_module()
+
+    def setUp(self):
+        hadm_ids = list(range(1001, 1013))
+        self.feature_matrix = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "Riker-SAS Scale Score: Agitated": int(index % 2 == 0),
+                    "Education Readiness: No": int(index % 3 == 0),
+                    "Pain Level: 7-Mod to Severe": int(index % 4 in {0, 1}),
+                    "Richmond-RAS Scale: 0 Alert and Calm": int(index % 2 == 1),
+                    "Restraint Device: Soft Limb": int(index % 3 == 1),
+                    "Pain Present: No": int(index % 4 in {2, 3}),
+                }
+                for index, hadm_id in enumerate(hadm_ids)
+            ]
+        )
+        self.note_labels = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_label": int(index % 2 == 0),
+                    "autopsy_label": int(index % 3 == 0),
+                }
+                for index, hadm_id in enumerate(hadm_ids)
+            ]
+        )
+        self.note_corpus = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "note_text": (
+                        "Patient is non-complian and refused medication."
+                        if index % 2 == 0
+                        else "Patient remained calm. Date:[**5-1-18**] discussed."
+                    ),
+                }
+                for index, hadm_id in enumerate(hadm_ids)
+            ]
+        )
+        self.demographics = pd.DataFrame(
+            [
+                {"hadm_id": hadm_id, "race": "WHITE" if index < 6 else "BLACK"}
+                for index, hadm_id in enumerate(hadm_ids)
+            ]
+        )
+        self.eol_cohort = pd.DataFrame(
+            [
+                {"hadm_id": hadm_id, "race": "WHITE" if index < 6 else "BLACK"}
+                for index, hadm_id in enumerate(hadm_ids[:10])
+            ]
+        )
+        self.treatment_totals = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "total_vent_min": float(200 + 50 * index),
+                    "total_vaso_min": float(20 + 10 * (index % 5)),
+                }
+                for index, hadm_id in enumerate(hadm_ids[:10])
+            ]
+        )
+        self.acuity_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "oasis": float(15 + index),
+                    "sapsii": float(25 + index),
+                }
+                for index, hadm_id in enumerate(hadm_ids)
+            ]
+        )
+        self.final_model_table = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "age": float(40 + index),
+                    "los_days": float(1.5 + 0.25 * index),
+                    "gender_f": int(index % 2 == 1),
+                    "gender_m": int(index % 2 == 0),
+                    "insurance_private": int(index % 3 == 0),
+                    "insurance_public": int(index % 3 == 1),
+                    "insurance_self_pay": int(index % 3 == 2),
+                    "race_white": int(index < 6),
+                    "race_black": int(index >= 6),
+                    "race_asian": 0,
+                    "race_hispanic": 0,
+                    "race_native_american": 0,
+                    "race_other": 0,
+                    "noncompliance_score_z": float(-1.5 + 0.3 * index),
+                    "autopsy_score_z": float(1.2 - 0.2 * index),
+                    "negative_sentiment_score_z": float(-0.9 + 0.18 * index),
+                    "left_ama": int(index % 2 == 0),
+                    "code_status_dnr_dni_cmo": int(index % 3 == 0),
+                    "in_hospital_mortality": int(index % 4 == 0),
+                }
+                for index, hadm_id in enumerate(hadm_ids)
+            ]
+        )
+
+    def _pending_real_data(self, requirement: str) -> None:
+        self.skipTest(requirement)
+
+    def test_proxy_metric_training_inputs_align_rows_and_binary_labels(self):
+        factory_non = _EstimatorFactorySequence(
+            [lambda: _RecordingProbEstimator([0.8] * len(self.feature_matrix))]
+        )
+        non_model = self.model.fit_proxy_mistrust_model(
+            self.feature_matrix,
+            self.note_labels,
+            "noncompliance_label",
+            estimator_factory=factory_non,
+        )
+
+        self.assertEqual(non_model.fit_X.shape[0], len(self.feature_matrix))
+        self.assertEqual(len(non_model.fit_y), len(self.feature_matrix))
+        self.assertTrue(set(non_model.fit_y.unique()).issubset({0, 1}))
+
+        factory_auto = _EstimatorFactorySequence(
+            [lambda: _RecordingProbEstimator([0.3] * len(self.feature_matrix))]
+        )
+        auto_model = self.model.fit_proxy_mistrust_model(
+            self.feature_matrix,
+            self.note_labels,
+            "autopsy_label",
+            estimator_factory=factory_auto,
+        )
+
+        self.assertEqual(auto_model.fit_X.shape[0], len(self.feature_matrix))
+        self.assertEqual(len(auto_model.fit_y), len(self.feature_matrix))
+        self.assertTrue(set(auto_model.fit_y.unique()).issubset({0, 1}))
+
+    def test_proxy_metric_predict_proba_outputs_have_two_columns_and_unit_interval(self):
+        non_estimator = _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35])
+        auto_estimator = _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75])
+
+        non_scores = self.model.build_proxy_probability_scores(
+            self.feature_matrix,
+            self.note_labels,
+            "noncompliance_label",
+            estimator_factory=lambda: non_estimator,
+        )
+        auto_scores = self.model.build_proxy_probability_scores(
+            self.feature_matrix,
+            self.note_labels,
+            "autopsy_label",
+            estimator_factory=lambda: auto_estimator,
+        )
+
+        self.assertEqual(non_estimator.predicted_shape, (len(self.feature_matrix), 2))
+        self.assertEqual(auto_estimator.predicted_shape, (len(self.feature_matrix), 2))
+        self.assertTrue(non_scores["noncompliance_score"].between(0.0, 1.0).all())
+        self.assertTrue(auto_scores["autopsy_score"].between(0.0, 1.0).all())
+
+    def test_synthetic_proxy_models_converge_without_warning_with_default_max_iter(self):
+        if ConvergenceWarning is None:
+            self.skipTest("scikit-learn is unavailable in the current environment.")
+
+        feature_matrix = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 2000 + index,
+                    "Riker-SAS Scale Score: Agitated": int(index % 2 == 0),
+                    "Education Readiness: No": int(index % 2 == 0),
+                    "Pain Level: 7-Mod to Severe": int(index % 3 == 0),
+                    "Richmond-RAS Scale: 0 Alert and Calm": int(index % 2 == 1),
+                    "Restraint Device: Soft Limb": int(index % 4 in {0, 1}),
+                    "Pain Present: No": int(index % 4 in {2, 3}),
+                }
+                for index in range(40)
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 2000 + index,
+                    "noncompliance_label": int(index % 2 == 0),
+                    "autopsy_label": int(index % 4 in {0, 1}),
+                }
+                for index in range(40)
+            ]
+        )
+
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            self.model.fit_proxy_mistrust_model(feature_matrix, note_labels, "noncompliance_label")
+            self.model.fit_proxy_mistrust_model(feature_matrix, note_labels, "autopsy_label")
+
+        convergence_warnings = [
+            warning for warning in caught if isinstance(warning.message, ConvergenceWarning)
+        ]
+        self.assertEqual(convergence_warnings, [])
+
+    def test_mistrust_score_arrays_are_finite_and_z_normalized(self):
+        factory = _EstimatorFactorySequence(
+            [
+                lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+            ]
+        )
+        scores = self.model.build_mistrust_score_table(
+            self.feature_matrix,
+            self.note_labels,
+            self.note_corpus,
+            estimator_factory=factory,
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+
+        for column in self.model.MISTRUST_SCORE_COLUMNS:
+            values = pd.to_numeric(scores[column], errors="coerce")
+            self.assertFalse(values.isna().any())
+            self.assertTrue(np.isfinite(values).all())
+            self.assertLess(abs(float(values.mean())), 0.01)
+            self.assertGreaterEqual(float(values.std(ddof=0)), 0.99)
+            self.assertLessEqual(float(values.std(ddof=0)), 1.01)
+
+    def test_synthetic_race_gap_analysis_matches_expected_directional_pattern(self):
+        mistrust_scores = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": -1.4, "autopsy_score_z": -0.1, "negative_sentiment_score_z": -1.1},
+                {"hadm_id": 2, "noncompliance_score_z": -1.0, "autopsy_score_z": 0.0, "negative_sentiment_score_z": -0.8},
+                {"hadm_id": 3, "noncompliance_score_z": -0.8, "autopsy_score_z": 0.2, "negative_sentiment_score_z": -0.6},
+                {"hadm_id": 4, "noncompliance_score_z": -0.6, "autopsy_score_z": 0.3, "negative_sentiment_score_z": -0.5},
+                {"hadm_id": 5, "noncompliance_score_z": 0.8, "autopsy_score_z": 0.1, "negative_sentiment_score_z": 0.7},
+                {"hadm_id": 6, "noncompliance_score_z": 1.0, "autopsy_score_z": 0.0, "negative_sentiment_score_z": 0.9},
+                {"hadm_id": 7, "noncompliance_score_z": 1.2, "autopsy_score_z": 0.2, "negative_sentiment_score_z": 1.1},
+                {"hadm_id": 8, "noncompliance_score_z": 1.4, "autopsy_score_z": 0.3, "negative_sentiment_score_z": 1.3},
+            ]
+        )
+        demographics = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "WHITE"},
+                {"hadm_id": 2, "race": "WHITE"},
+                {"hadm_id": 3, "race": "WHITE"},
+                {"hadm_id": 4, "race": "WHITE"},
+                {"hadm_id": 5, "race": "BLACK"},
+                {"hadm_id": 6, "race": "BLACK"},
+                {"hadm_id": 7, "race": "BLACK"},
+                {"hadm_id": 8, "race": "BLACK"},
+            ]
+        )
+
+        results = self.model.run_race_gap_analysis(mistrust_scores, demographics).set_index("metric")
+        self.assertTrue(results.loc["noncompliance_score_z", "black_median_higher"])
+        self.assertTrue(results.loc["negative_sentiment_score_z", "black_median_higher"])
+        self.assertLess(float(results.loc["noncompliance_score_z", "pvalue"]), 0.05)
+        self.assertLess(float(results.loc["negative_sentiment_score_z", "pvalue"]), 0.05)
+        self.assertGreater(float(results.loc["autopsy_score_z", "pvalue"]), 0.05)
+
+    def test_downstream_feature_configurations_match_required_widths_and_are_finite(self):
+        configs = self.model.get_downstream_feature_configurations()
+        self.assertEqual(len(configs["Baseline"]), 7)
+        self.assertEqual(len(configs["Baseline + ALL"]), 16)
+
+        for columns in configs.values():
+            values = self.final_model_table[columns].apply(pd.to_numeric, errors="coerce")
+            self.assertFalse(values.isna().any().any())
+            self.assertTrue(np.isfinite(values.to_numpy(dtype=float)).all())
+
+    def test_downstream_evaluation_uses_100_random_states_and_approximate_60_40_splits(self):
+        split_recorder = _SplitRecorder()
+        auc_recorder = _AUCRecorder(0.76)
+
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=split_recorder,
+            auc_fn=auc_recorder,
+            repetitions=100,
+        )
+
+        self.assertEqual(len(split_recorder.calls), 100)
+        self.assertEqual({call["random_state"] for call in split_recorder.calls}, set(range(100)))
+        self.assertTrue(all(call["test_size"] == 0.4 for call in split_recorder.calls))
+        self.assertTrue(all(call["n_train"] == 7 for call in split_recorder.calls))
+        self.assertTrue(all(call["n_test"] == 5 for call in split_recorder.calls))
+        self.assertEqual(int(results.loc[0, "n_repeats"]), 100)
+
+    def test_downstream_models_run_without_warning_and_auc_values_are_non_degenerate(self):
+        split_recorder = _SplitRecorder()
+        auc_recorder = _AUCRecorder(0.74)
+
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            results = self.model.evaluate_downstream_predictions(
+                self.final_model_table,
+                feature_configurations={"Baseline + ALL": self.model.get_downstream_feature_configurations()["Baseline + ALL"]},
+                task_map={"Code Status": "code_status_dnr_dni_cmo"},
+                estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+                split_fn=split_recorder,
+                auc_fn=auc_recorder,
+                repetitions=100,
+            )
+
+        if ConvergenceWarning is not None:
+            convergence_warnings = [
+                warning for warning in caught if isinstance(warning.message, ConvergenceWarning)
+            ]
+            self.assertEqual(convergence_warnings, [])
+        self.assertTrue(all(0.5 <= call["value"] <= 1.0 for call in auc_recorder.calls))
+        self.assertTrue(0.5 <= float(results.loc[0, "auc_mean"]) <= 1.0)
+
+    def test_training_and_evaluation_pipeline_returns_expected_sections(self):
+        factory = _EstimatorFactorySequence(
+            [
+                lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+            ]
+        )
+        outputs = self.model.run_full_eol_mistrust_modeling(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            note_corpus=self.note_corpus,
+            demographics=self.demographics,
+            eol_cohort=self.eol_cohort,
+            treatment_totals=self.treatment_totals,
+            acuity_scores=self.acuity_scores,
+            final_model_table=self.final_model_table,
+            estimator_factory=factory,
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.77),
+            repetitions=5,
+        )
+
+        self.assertEqual(
+            set(outputs.keys()),
+            {
+                "mistrust_scores",
+                "feature_weight_summaries",
+                "race_gap_results",
+                "race_treatment_results",
+                "trust_treatment_results",
+                "acuity_correlations",
+                "downstream_auc_results",
+            },
+        )
+
+    def test_proxy_metric_models_use_full_all_cohort_and_never_call_split_function(self):
+        factory = _EstimatorFactorySequence(
+            [
+                lambda: _RecordingProbEstimator([0.9] * len(self.feature_matrix)),
+                lambda: _RecordingProbEstimator([0.1] * len(self.feature_matrix)),
+            ]
+        )
+
+        with patch.object(self.model, "train_test_split", side_effect=AssertionError("split should not be used")):
+            scores = self.model.build_mistrust_score_table(
+                self.feature_matrix,
+                self.note_labels,
+                self.note_corpus,
+                estimator_factory=factory,
+                sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+            )
+
+        self.assertEqual(scores["hadm_id"].tolist(), self.feature_matrix["hadm_id"].tolist())
+        self.assertEqual(factory.created[0].fit_X.shape[0], len(self.feature_matrix))
+        self.assertEqual(factory.created[1].fit_X.shape[0], len(self.feature_matrix))
+
+    def test_sentiment_metric_does_not_instantiate_or_fit_any_estimator(self):
+        with patch.object(self.model, "LogisticRegression", side_effect=AssertionError("estimator should not be used")):
+            scores = self.model.build_negative_sentiment_mistrust_scores(
+                self.note_corpus,
+                sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+            )
+        self.assertEqual(scores.columns.tolist(), ["hadm_id", "negative_sentiment_score"])
+
+    def test_downstream_train_and_test_partitions_are_disjoint_for_every_run(self):
+        split_recorder = _DeterministicSplitRecorder()
+        self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=split_recorder,
+            auc_fn=_AUCRecorder(0.76),
+            repetitions=10,
+        )
+
+        for call in split_recorder.calls:
+            self.assertTrue(set(call["train_indices"]).isdisjoint(set(call["test_indices"])))
+
+    def test_downstream_splits_are_reproducible_for_same_random_state(self):
+        recorder_one = _DeterministicSplitRecorder()
+        recorder_two = _DeterministicSplitRecorder()
+        kwargs = {
+            "final_model_table": self.final_model_table,
+            "feature_configurations": {"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            "task_map": {"Left AMA": "left_ama"},
+            "estimator_factory": lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            "auc_fn": _AUCRecorder(0.74),
+            "repetitions": 5,
+        }
+        self.model.evaluate_downstream_predictions(split_fn=recorder_one, **kwargs)
+        self.model.evaluate_downstream_predictions(split_fn=recorder_two, **kwargs)
+
+        self.assertEqual(recorder_one.calls, recorder_two.calls)
+
+    def test_downstream_drops_missing_rows_before_splitting_and_keeps_n_stable(self):
+        table = self.final_model_table.copy()
+        table.loc[0, "left_ama"] = np.nan
+        table.loc[1, "age"] = np.nan
+        split_recorder = _SplitRecorder()
+
+        results = self.model.evaluate_downstream_predictions(
+            table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7]),
+            split_fn=split_recorder,
+            auc_fn=_AUCRecorder(0.71),
+            repetitions=10,
+        )
+
+        self.assertTrue(all(call["n_rows"] == 10 for call in split_recorder.calls))
+        self.assertEqual(int(results.loc[0, "n_rows"]), 10)
+
+    def test_feature_configurations_do_not_include_any_target_column(self):
+        target_columns = set(self.model.get_downstream_task_map().values())
+        for columns in self.model.get_downstream_feature_configurations().values():
+            self.assertTrue(target_columns.isdisjoint(set(columns)))
+
+    def test_proxy_feature_weight_summary_preserves_feature_to_coefficient_alignment(self):
+        coef_values = [0.4, 0.1, -0.3, -0.5, 0.2, 0.0]
+        summary = self.model.build_noncompliance_feature_weight_summary(
+            self.feature_matrix,
+            self.note_labels,
+            estimator_factory=lambda: _RecordingProbEstimator([0.8] * len(self.feature_matrix), coef_values=coef_values),
+            top_n=6,
+        )
+
+        by_feature = summary["all"].set_index("feature")["weight"].to_dict()
+        feature_columns = [column for column in self.feature_matrix.columns if column != "hadm_id"]
+        expected = dict(zip(feature_columns, coef_values))
+        self.assertEqual(by_feature, expected)
+
+    def test_weight_aggregation_output_schema_placeholder(self):
+        self.skipTest(
+            "Average coefficient aggregation across 100 downstream runs is not yet exposed by a dedicated public API."
+        )
+
+    def test_mistrust_score_arrays_preserve_hadm_alignment_after_training_and_normalization(self):
+        factory = _EstimatorFactorySequence(
+            [
+                lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+            ]
+        )
+        scores = self.model.build_mistrust_score_table(
+            self.feature_matrix,
+            self.note_labels,
+            self.note_corpus,
+            estimator_factory=factory,
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+        self.assertEqual(scores["hadm_id"].tolist(), sorted(self.feature_matrix["hadm_id"].tolist()))
+
+    def test_mistrust_score_normalization_happens_after_raw_score_generation(self):
+        non = self.model.build_proxy_probability_scores(
+            self.feature_matrix,
+            self.note_labels,
+            "noncompliance_label",
+            estimator_factory=lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+        )
+        auto = self.model.build_proxy_probability_scores(
+            self.feature_matrix,
+            self.note_labels,
+            "autopsy_label",
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+        )
+        sentiment = self.model.build_negative_sentiment_mistrust_scores(
+            self.note_corpus,
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+        raw = non.merge(auto, on="hadm_id").merge(sentiment, on="hadm_id")
+        manual = self.model.z_normalize_scores(
+            raw,
+            columns=["noncompliance_score", "autopsy_score", "negative_sentiment_score"],
+        ).rename(
+            columns={
+                "noncompliance_score": "noncompliance_score_z",
+                "autopsy_score": "autopsy_score_z",
+                "negative_sentiment_score": "negative_sentiment_score_z",
+            }
+        )
+
+        combined = self.model.build_mistrust_score_table(
+            self.feature_matrix,
+            self.note_labels,
+            self.note_corpus,
+            estimator_factory=_EstimatorFactorySequence(
+                [
+                    lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                    lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+                ]
+            ),
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+        pd.testing.assert_frame_equal(manual.reset_index(drop=True), combined.reset_index(drop=True))
+
+    def test_mann_whitney_is_called_two_sided(self):
+        calls = []
+
+        def _fake_mannwhitneyu(left, right, alternative):
+            calls.append({"left": list(left), "right": list(right), "alternative": alternative})
+
+            class _Result:
+                statistic = 1.0
+                pvalue = 0.04
+
+            return _Result()
+
+        with patch.object(self.model, "mannwhitneyu", side_effect=_fake_mannwhitneyu):
+            self.model.run_race_gap_analysis(
+                pd.DataFrame(
+                    [
+                        {"hadm_id": 1, "noncompliance_score_z": 0.1, "autopsy_score_z": 0.2, "negative_sentiment_score_z": 0.3},
+                        {"hadm_id": 2, "noncompliance_score_z": 0.4, "autopsy_score_z": 0.5, "negative_sentiment_score_z": 0.6},
+                    ]
+                ),
+                pd.DataFrame(
+                    [
+                        {"hadm_id": 1, "race": "WHITE"},
+                        {"hadm_id": 2, "race": "BLACK"},
+                    ]
+                ),
+                score_columns=["noncompliance_score_z"],
+            )
+
+        self.assertEqual([call["alternative"] for call in calls], ["two-sided"])
+
+    def test_pearson_is_called_after_inner_join_and_pairwise_dropna(self):
+        calls = []
+
+        def _fake_pearsonr(left, right):
+            calls.append((list(left), list(right)))
+            return 0.2, 0.5
+
+        mistrust = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.1},
+                {"hadm_id": 2, "noncompliance_score_z": np.nan},
+                {"hadm_id": 3, "noncompliance_score_z": 0.3},
+                {"hadm_id": 4, "noncompliance_score_z": 0.4},
+            ]
+        )
+        acuity = pd.DataFrame(
+            [
+                {"hadm_id": 2, "oasis": 20.0},
+                {"hadm_id": 3, "oasis": 30.0},
+                {"hadm_id": 4, "oasis": 40.0},
+                {"hadm_id": 5, "oasis": 50.0},
+            ]
+        )
+
+        with patch.object(self.model, "pearsonr", side_effect=_fake_pearsonr):
+            self.model.run_acuity_control_analysis(
+                mistrust,
+                acuity,
+                score_columns=["noncompliance_score_z"],
+                acuity_columns=("oasis",),
+            )
+
+        self.assertEqual(calls, [([0.3, 0.4], [30.0, 40.0])])
+
+    def test_cdf_plot_helper_placeholder(self):
+        self.skipTest(
+            "CDF visualization helpers are not yet exposed by a public plotting API in the EOL mistrust model module."
+        )
+
+    def test_trust_stratification_direction_is_locked_to_top_n_as_high_mistrust(self):
+        mistrust = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.9},
+                {"hadm_id": 2, "noncompliance_score_z": 0.8},
+                {"hadm_id": 3, "noncompliance_score_z": 0.2},
+                {"hadm_id": 4, "noncompliance_score_z": 0.1},
+            ]
+        )
+        eol = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "WHITE"},
+                {"hadm_id": 2, "race": "BLACK"},
+                {"hadm_id": 3, "race": "WHITE"},
+                {"hadm_id": 4, "race": "BLACK"},
+            ]
+        )
+        treatments = pd.DataFrame(
+            [
+                {"hadm_id": 1, "total_vent_min": 100.0, "total_vaso_min": 10.0},
+                {"hadm_id": 2, "total_vent_min": 90.0, "total_vaso_min": 9.0},
+                {"hadm_id": 3, "total_vent_min": 30.0, "total_vaso_min": 3.0},
+                {"hadm_id": 4, "total_vent_min": 20.0, "total_vaso_min": 2.0},
+            ]
+        )
+
+        result = self.model.run_trust_based_treatment_analysis(
+            eol,
+            mistrust,
+            treatments,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=("total_vent_min",),
+            group_sizes={"total_vent_min": 2},
+        )
+        row = result.iloc[0]
+        self.assertEqual(int(row["n_high"]), 2)
+        self.assertEqual(int(row["n_low"]), 2)
+        self.assertEqual(float(row["median_high"]), 95.0)
+        self.assertEqual(float(row["median_low"]), 25.0)
+        self.assertEqual(float(row["median_gap"]), 70.0)
+
+    def test_all_six_configs_are_evaluated_for_all_three_tasks_even_with_fewer_usable_rows(self):
+        table = self.final_model_table.copy()
+        table.loc[0, "code_status_dnr_dni_cmo"] = np.nan
+        table.loc[1, "autopsy_score_z"] = np.nan
+
+        results = self.model.evaluate_downstream_predictions(
+            table,
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.72),
+            repetitions=1,
+        )
+
+        self.assertEqual(len(results), 18)
+        self.assertEqual(set(results["configuration"]), set(self.model.get_downstream_feature_configurations()))
+        self.assertEqual(set(results["task"]), set(self.model.get_downstream_task_map()))
+        by_task = results.groupby("task")["n_rows"].max().to_dict()
+        self.assertGreater(by_task["Left AMA"], results.loc[results["task"] == "Code Status", "n_rows"].min())
+
+    def test_downstream_auc_output_schema_is_fixed_and_complete(self):
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.71),
+            repetitions=2,
+        )
+        self.assertEqual(
+            results.columns.tolist(),
+            [
+                "task",
+                "configuration",
+                "target_column",
+                "n_rows",
+                "n_features",
+                "n_repeats",
+                "n_valid_auc",
+                "auc_mean",
+                "auc_std",
+            ],
+        )
+
+    def test_average_coefficient_output_schema_placeholder(self):
+        self.skipTest(
+            "Average coefficient summaries across downstream runs are not yet emitted by a dedicated public API."
+        )
+
+    def test_downstream_auc_uses_test_set_probabilities_not_labels_or_train_outputs(self):
+        split_recorder = _SplitRecorder()
+        auc_recorder = _AUCRecorder(0.79)
+
+        self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.21, 0.81, 0.31, 0.71, 0.41]),
+            split_fn=split_recorder,
+            auc_fn=auc_recorder,
+            repetitions=1,
+        )
+
+        self.assertEqual(auc_recorder.calls[0]["y_prob"], [0.21, 0.81, 0.31, 0.71, 0.41][: split_recorder.calls[0]["n_test"]])
+        self.assertNotEqual(auc_recorder.calls[0]["y_prob"], auc_recorder.calls[0]["y_true"])
+
+    def test_single_class_splits_are_counted_via_n_valid_auc(self):
+        table = self.final_model_table.copy()
+        table["left_ama"] = 0
+        results = self.model.evaluate_downstream_predictions(
+            table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=5,
+        )
+        row = results.iloc[0]
+        self.assertEqual(int(row["n_valid_auc"]), 0)
+        self.assertTrue(pd.isna(row["auc_mean"]))
+        self.assertTrue(pd.isna(row["auc_std"]))
+
+    def test_proxy_training_rejects_duplicate_hadm_ids(self):
+        duplicated = pd.concat([self.feature_matrix, self.feature_matrix.iloc[[0]]], ignore_index=True)
+        with self.assertRaises(MergeError):
+            self.model.build_proxy_probability_scores(
+                duplicated,
+                self.note_labels,
+                "noncompliance_label",
+                estimator_factory=lambda: _RecordingProbEstimator([0.8] * len(duplicated)),
+            )
+
+    def test_empty_usable_cohort_returns_nan_auc_row(self):
+        table = self.final_model_table.copy()
+        table["left_ama"] = np.nan
+        results = self.model.evaluate_downstream_predictions(
+            table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=3,
+        )
+        row = results.iloc[0]
+        self.assertEqual(int(row["n_rows"]), 0)
+        self.assertEqual(int(row["n_valid_auc"]), 0)
+        self.assertTrue(pd.isna(row["auc_mean"]))
+        self.assertTrue(pd.isna(row["auc_std"]))
+
+    def test_estimator_fit_failures_propagate(self):
+        with self.assertRaisesRegex(RuntimeError, "estimator fit failed"):
+            self.model.build_proxy_probability_scores(
+                self.feature_matrix,
+                self.note_labels,
+                "noncompliance_label",
+                estimator_factory=lambda: _FailingEstimator(),
+            )
+
+        with self.assertRaisesRegex(RuntimeError, "estimator fit failed"):
+            self.model.evaluate_downstream_predictions(
+                self.final_model_table,
+                feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+                task_map={"Left AMA": "left_ama"},
+                estimator_factory=lambda: _FailingEstimator(),
+                split_fn=_SplitRecorder(),
+                auc_fn=_AUCRecorder(0.7),
+                repetitions=1,
+            )
+
+    def test_statistical_backend_failures_propagate(self):
+        with patch.object(self.model, "mannwhitneyu", side_effect=RuntimeError("mw failed")):
+            with self.assertRaisesRegex(RuntimeError, "mw failed"):
+                self.model.run_race_gap_analysis(
+                    pd.DataFrame(
+                        [
+                            {"hadm_id": 1, "noncompliance_score_z": 0.1, "autopsy_score_z": 0.2, "negative_sentiment_score_z": 0.3},
+                            {"hadm_id": 2, "noncompliance_score_z": 0.4, "autopsy_score_z": 0.5, "negative_sentiment_score_z": 0.6},
+                        ]
+                    ),
+                    pd.DataFrame(
+                        [
+                            {"hadm_id": 1, "race": "WHITE"},
+                            {"hadm_id": 2, "race": "BLACK"},
+                        ]
+                    ),
+                    score_columns=["noncompliance_score_z"],
+                )
+
+        with patch.object(self.model, "pearsonr", side_effect=RuntimeError("pearson failed")):
+            with self.assertRaisesRegex(RuntimeError, "pearson failed"):
+                self.model.run_acuity_control_analysis(
+                    pd.DataFrame(
+                        [
+                            {"hadm_id": 1, "noncompliance_score_z": 0.1},
+                            {"hadm_id": 2, "noncompliance_score_z": 0.2},
+                        ]
+                    ),
+                    pd.DataFrame(
+                        [
+                            {"hadm_id": 1, "oasis": 1.0},
+                            {"hadm_id": 2, "oasis": 2.0},
+                        ]
+                    ),
+                    score_columns=["noncompliance_score_z"],
+                    acuity_columns=("oasis",),
+                )
+
+    def test_repeated_runs_do_not_mutate_input_frames(self):
+        feature_matrix = self.feature_matrix.copy(deep=True)
+        note_labels = self.note_labels.copy(deep=True)
+        note_corpus = self.note_corpus.copy(deep=True)
+        final_model_table = self.final_model_table.copy(deep=True)
+
+        self.model.build_mistrust_score_table(
+            feature_matrix,
+            note_labels,
+            note_corpus,
+            estimator_factory=_EstimatorFactorySequence(
+                [
+                    lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                    lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+                ]
+            ),
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+        self.model.evaluate_downstream_predictions(
+            final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.72),
+            repetitions=2,
+        )
+
+        pd.testing.assert_frame_equal(feature_matrix, self.feature_matrix)
+        pd.testing.assert_frame_equal(note_labels, self.note_labels)
+        pd.testing.assert_frame_equal(note_corpus, self.note_corpus)
+        pd.testing.assert_frame_equal(final_model_table, self.final_model_table)
+
+    def test_proxy_and_downstream_paths_prevent_label_and_target_leakage(self):
+        proxy_estimator = _RecordingProbEstimator([0.8] * len(self.feature_matrix))
+        self.model.build_proxy_probability_scores(
+            self.feature_matrix,
+            self.note_labels.assign(left_ama=1),
+            "noncompliance_label",
+            estimator_factory=lambda: proxy_estimator,
+        )
+        self.assertEqual(
+            proxy_estimator.fit_X.columns.tolist(),
+            [column for column in self.feature_matrix.columns if column != "hadm_id"],
+        )
+
+        downstream_factory = _EstimatorFactorySequence(
+            [lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4])]
+        )
+        self.model.evaluate_downstream_predictions(
+            self.final_model_table.assign(noncompliance_label=1, autopsy_label=0),
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=downstream_factory,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+        downstream_columns = downstream_factory.created[0].fit_X.columns.tolist()
+        self.assertEqual(downstream_columns, self.model.BASELINE_FEATURE_COLUMNS)
+        self.assertTrue(
+            {"left_ama", "code_status_dnr_dni_cmo", "in_hospital_mortality", "noncompliance_label", "autopsy_label"}.isdisjoint(
+                set(downstream_columns)
+            )
+        )
+
+    def test_downstream_result_n_features_exactly_match_configuration_widths(self):
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.72),
+            repetitions=1,
+        )
+
+        expected_widths = {
+            name: len(columns)
+            for name, columns in self.model.get_downstream_feature_configurations().items()
+        }
+        for row in results.itertuples(index=False):
+            self.assertEqual(int(row.n_features), expected_widths[row.configuration])
+
+    def test_downstream_result_target_columns_exactly_match_task_map(self):
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.72),
+            repetitions=1,
+        )
+
+        expected_targets = self.model.get_downstream_task_map()
+        for row in results.itertuples(index=False):
+            self.assertEqual(row.target_column, expected_targets[row.task])
+
+    def test_downstream_result_task_and_configuration_row_order_is_stable(self):
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.72),
+            repetitions=1,
+        )
+
+        expected_order = [
+            (task_name, config_name)
+            for task_name in self.model.get_downstream_task_map().keys()
+            for config_name in self.model.get_downstream_feature_configurations().keys()
+        ]
+        actual_order = list(zip(results["task"], results["configuration"]))
+        self.assertEqual(actual_order, expected_order)
+
+    def test_downstream_auc_std_is_zero_when_auc_backend_returns_constant_value(self):
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.73),
+            repetitions=5,
+        )
+        self.assertEqual(float(results.loc[0, "auc_std"]), 0.0)
+
+    def test_downstream_auc_mean_and_std_match_known_per_run_values(self):
+        auc_values = [0.61, 0.63, 0.67, 0.69, 0.70]
+        auc_recorder = _AUCSequenceRecorder(auc_values)
+        results = self.model.evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.model.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _RecordingProbEstimator([0.2, 0.8, 0.3, 0.7, 0.4]),
+            split_fn=_SplitRecorder(),
+            auc_fn=auc_recorder,
+            repetitions=len(auc_values),
+        )
+
+        self.assertAlmostEqual(float(results.loc[0, "auc_mean"]), float(np.mean(auc_values)))
+        self.assertAlmostEqual(float(results.loc[0, "auc_std"]), float(np.std(auc_values, ddof=0)))
+
+    def test_mistrust_score_table_is_deterministic_under_shuffled_input_rows(self):
+        factory_one = _EstimatorFactorySequence(
+            [
+                lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+            ]
+        )
+        factory_two = _EstimatorFactorySequence(
+            [
+                lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+            ]
+        )
+
+        shuffled_features = self.feature_matrix.sample(frac=1.0, random_state=7).reset_index(drop=True)
+        shuffled_labels = self.note_labels.sample(frac=1.0, random_state=11).reset_index(drop=True)
+        shuffled_notes = self.note_corpus.sample(frac=1.0, random_state=13).reset_index(drop=True)
+
+        scores_one = self.model.build_mistrust_score_table(
+            self.feature_matrix,
+            self.note_labels,
+            self.note_corpus,
+            estimator_factory=factory_one,
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+        scores_two = self.model.build_mistrust_score_table(
+            shuffled_features,
+            shuffled_labels,
+            shuffled_notes,
+            estimator_factory=factory_two,
+            sentiment_fn=lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+        )
+        pd.testing.assert_frame_equal(scores_one.reset_index(drop=True), scores_two.reset_index(drop=True))
+
+    def test_race_gap_analysis_ignores_other_races_even_with_extreme_values(self):
+        mistrust = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.1},
+                {"hadm_id": 2, "noncompliance_score_z": 0.2},
+                {"hadm_id": 3, "noncompliance_score_z": 0.3},
+                {"hadm_id": 4, "noncompliance_score_z": 999.0},
+            ]
+        )
+        demographics = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "WHITE"},
+                {"hadm_id": 2, "race": "BLACK"},
+                {"hadm_id": 3, "race": "BLACK"},
+                {"hadm_id": 4, "race": "ASIAN"},
+            ]
+        )
+        result = self.model.run_race_gap_analysis(
+            mistrust,
+            demographics,
+            score_columns=["noncompliance_score_z"],
+        ).iloc[0]
+        self.assertEqual(int(result["n_black"]), 2)
+        self.assertEqual(int(result["n_white"]), 1)
+        self.assertEqual(float(result["median_black"]), 0.25)
+        self.assertEqual(float(result["median_white"]), 0.1)
+
+    def test_trust_based_group_size_override_takes_precedence_over_race_based_counts(self):
+        mistrust = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.9},
+                {"hadm_id": 2, "noncompliance_score_z": 0.8},
+                {"hadm_id": 3, "noncompliance_score_z": 0.7},
+                {"hadm_id": 4, "noncompliance_score_z": 0.1},
+            ]
+        )
+        eol = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "WHITE"},
+                {"hadm_id": 2, "race": "WHITE"},
+                {"hadm_id": 3, "race": "BLACK"},
+                {"hadm_id": 4, "race": "BLACK"},
+            ]
+        )
+        treatments = pd.DataFrame(
+            [
+                {"hadm_id": 1, "total_vent_min": 50.0, "total_vaso_min": 5.0},
+                {"hadm_id": 2, "total_vent_min": 40.0, "total_vaso_min": 4.0},
+                {"hadm_id": 3, "total_vent_min": 30.0, "total_vaso_min": 3.0},
+                {"hadm_id": 4, "total_vent_min": 20.0, "total_vaso_min": 2.0},
+            ]
+        )
+
+        result = self.model.run_trust_based_treatment_analysis(
+            eol,
+            mistrust,
+            treatments,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=("total_vent_min",),
+            group_sizes={"total_vent_min": 1},
+        ).iloc[0]
+        self.assertEqual(int(result["stratification_n"]), 1)
+        self.assertEqual(int(result["n_high"]), 1)
+        self.assertEqual(int(result["n_low"]), 3)
+
+    def test_acuity_correlation_output_contains_each_pair_exactly_once(self):
+        results = self.model.run_acuity_control_analysis(
+            self.final_model_table[["hadm_id", *self.model.MISTRUST_SCORE_COLUMNS]],
+            self.acuity_scores,
+        )
+        pairs = [tuple(sorted((row.feature_a, row.feature_b))) for row in results.itertuples(index=False)]
+        self.assertEqual(len(pairs), len(set(pairs)))
+
+    def test_run_full_modeling_is_deterministic_with_same_mocks(self):
+        kwargs = {
+            "feature_matrix": self.feature_matrix,
+            "note_labels": self.note_labels,
+            "note_corpus": self.note_corpus,
+            "demographics": self.demographics,
+            "eol_cohort": self.eol_cohort,
+            "treatment_totals": self.treatment_totals,
+            "acuity_scores": self.acuity_scores,
+            "final_model_table": self.final_model_table,
+            "sentiment_fn": lambda text: (-0.6 if "non" in text else 0.2, 0.0),
+            "split_fn": _DeterministicSplitRecorder(),
+            "auc_fn": _AUCRecorder(0.77),
+            "repetitions": 3,
+        }
+        outputs_one = self.model.run_full_eol_mistrust_modeling(
+            estimator_factory=_EstimatorFactorySequence(
+                [
+                    lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                    lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+                ]
+            ),
+            **kwargs,
+        )
+        outputs_two = self.model.run_full_eol_mistrust_modeling(
+            estimator_factory=_EstimatorFactorySequence(
+                [
+                    lambda: _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35]),
+                    lambda: _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75]),
+                ]
+            ),
+            **kwargs,
+        )
+
+        def _assert_nested_equal(left, right):
+            if isinstance(left, pd.DataFrame):
+                pd.testing.assert_frame_equal(left, right)
+                return
+            if isinstance(left, dict):
+                self.assertEqual(set(left.keys()), set(right.keys()))
+                for nested_key in left:
+                    _assert_nested_equal(left[nested_key], right[nested_key])
+                return
+            self.assertEqual(left, right)
+
+        for key in outputs_one:
+            _assert_nested_equal(outputs_one[key], outputs_two[key])
+
+    def test_real_data_noncompliance_and_autopsy_training_matrix_matches_expected_scale(self):
+        self._pending_real_data(
+            "Noncompliance/autopsy proxy training on the ALL cohort should use about 48,273 rows, about 620 binary features, and binary labels aligned one-to-one with the feature matrix."
+        )
+
+    def test_real_data_proxy_models_converge_and_retain_nonzero_weights(self):
+        self._pending_real_data(
+            "Noncompliance and autopsy proxy logistic models should converge with max_iter=1000 and retain at least 5 nonzero coefficients each on real MIMIC-III data."
+        )
+
+    def test_real_data_proxy_probability_outputs_and_score_arrays_are_finite(self):
+        self._pending_real_data(
+            "Real-data proxy predict_proba outputs should have shape (n_patients, 2), values in [0, 1], and all score arrays should be finite with no nulls or infinities."
+        )
+
+    def test_real_data_proxy_weight_sanity_matches_expected_noncompliance_signals(self):
+        self._pending_real_data(
+            "The strongest noncompliance coefficients on real data should point to agitation/Riker positively and alert/calm negatively."
+        )
+
+    def test_real_data_proxy_weight_sanity_matches_expected_autopsy_signals(self):
+        self._pending_real_data(
+            "The strongest autopsy coefficients on real data should point to restraint-related features positively and pain/proxy-related features negatively."
+        )
+
+    def test_real_data_mistrust_scores_show_expected_black_white_gap_pattern(self):
+        self._pending_real_data(
+            "On real data, Black admissions should have significantly higher noncompliance and sentiment mistrust scores, while autopsy mistrust should remain non-significant."
+        )
+
+    def test_real_data_downstream_labels_match_expected_counts_and_positive_rates(self):
+        self._pending_real_data(
+            "Left AMA, Code Status, and in-hospital mortality labels should match the expected real-data cohort sizes and positive-rate bands."
+        )
+
+    def test_real_data_downstream_feature_tables_are_complete_and_finite(self):
+        self._pending_real_data(
+            "Baseline and Baseline+ALL downstream feature sets should have exactly 7 and 16 columns respectively, with no nulls or infinities in any used feature column."
+        )
+
+    def test_real_data_downstream_split_loop_uses_expected_100_seeded_60_40_splits(self):
+        self._pending_real_data(
+            "Each real-data downstream experiment should use 100 distinct random seeds (0..99) and approximately 60/40 train/test splits."
+        )
+
+    def test_real_data_downstream_models_converge_and_auc_runs_are_non_degenerate(self):
+        self._pending_real_data(
+            "Every real-data downstream model fit should converge without warning, and every single-run AUC should stay between 0.5 and 1.0."
+        )
+
+    def test_real_data_race_based_treatment_disparity_matches_expected_counts_and_direction(self):
+        self._pending_real_data(
+            "Race-based treatment disparity on real EOL admissions should match the expected White/Black sample sizes, p-values, and median-gap directions for ventilation and vasopressors."
+        )
+
+    def test_real_data_race_based_treatment_cdf_plots_have_expected_visual_elements(self):
+        self._pending_real_data(
+            "Real-data ventilation and vasopressor CDF plots should each contain exactly two curves and two dotted median lines."
+        )
+
+    def test_real_data_trust_based_group_sizes_match_black_reference_counts(self):
+        self._pending_real_data(
+            "For each metric-by-treatment pair, the real-data high-mistrust group size should equal the corresponding Black group size from the race-based treatment analysis."
+        )
+
+    def test_real_data_trust_based_disparity_matches_expected_pvalues_and_median_gaps(self):
+        self._pending_real_data(
+            "Real-data trust-based disparity results should match the expected p-value and median-gap bands across noncompliance, autopsy, and sentiment metrics for ventilation and vasopressors."
+        )
+
+    def test_real_data_trust_based_ventilation_gap_exceeds_race_based_gap(self):
+        self._pending_real_data(
+            "On real data, noncompliance and autopsy mistrust ventilation gaps should each exceed 1.5x the race-based ventilation gap."
+        )
+
+    def test_real_data_acuity_control_correlations_match_expected_ranges(self):
+        self._pending_real_data(
+            "Real-data acuity-control correlations should keep mistrust-vs-acuity weak while matching the expected OASIS-SAPSII and noncompliance-autopsy reference bands."
+        )
+
+    def test_real_data_downstream_auc_means_match_expected_reference_bands(self):
+        self._pending_real_data(
+            "Real-data downstream mean AUCs for Baseline and Baseline+ALL should fall within the expected paper-aligned ranges for Left AMA, Code Status, and Mortality."
+        )
+
+    def test_real_data_downstream_relative_ranking_and_improvement_match_reference_pattern(self):
+        self._pending_real_data(
+            "On real data, Baseline+ALL should be the best (or within 0.005 of best) for all tasks, mortality should improve by 0.02-0.06, and the single-mistrust configs should improve the expected target tasks."
+        )
+
+    def test_real_data_downstream_auc_variability_and_average_weights_match_reference_pattern(self):
+        self._pending_real_data(
+            "Real-data downstream AUC standard deviations and average Baseline+ALL mistrust-feature weights should match the expected reference ranges and directions."
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/core/test_eol_mistrust_dataset.py b/tests/core/test_eol_mistrust_dataset.py
new file mode 100644
index 000000000..a26e8fe13
--- /dev/null
+++ b/tests/core/test_eol_mistrust_dataset.py
@@ -0,0 +1,2861 @@
+import importlib.util
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import pandas as pd
+
+
+def _load_eol_mistrust_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "datasets" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_dataset_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+class _FakeProbEstimator:
+    def __init__(self, probabilities):
+        self.probabilities = list(probabilities)
+        self.was_fit = False
+        self.fit_X = None
+        self.fit_y = None
+
+    def fit(self, X, y):
+        self.was_fit = True
+        self.fit_X = X.copy() if hasattr(X, "copy") else X
+        self.fit_y = y.copy() if hasattr(y, "copy") else y
+        return self
+
+    def predict_proba(self, X):
+        n = len(X)
+        probs = self.probabilities[:n]
+        return [[1.0 - prob, prob] for prob in probs]
+
+
+class TestEOLMistrustPreprocessing(unittest.TestCase):
+    """TDD spec for the end-of-life mistrust data-preparation pipeline."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.module = _load_eol_mistrust_module()
+
+    def setUp(self):
+        self.admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 100,
+                    "subject_id": 1,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-02 00:00:00",
+                    "ethnicity": "WHITE - RUSSIAN",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 101,
+                    "subject_id": 2,
+                    "admittime": "2100-02-01 00:00:00",
+                    "dischtime": "2100-02-02 12:00:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 102,
+                    "subject_id": 3,
+                    "admittime": "2100-03-01 00:00:00",
+                    "dischtime": "2100-03-01 05:00:00",
+                    "ethnicity": "HISPANIC OR LATINO",
+                    "insurance": "Self Pay",
+                    "discharge_location": "SKILLED NURSING FACILITY",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 103,
+                    "subject_id": 4,
+                    "admittime": "2100-04-01 00:00:00",
+                    "dischtime": "2100-04-01 20:00:00",
+                    "ethnicity": "ASIAN - CHINESE",
+                    "insurance": "Medicaid",
+                    "discharge_location": "SKILLED NURSING FACILITY",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 104,
+                    "subject_id": 5,
+                    "admittime": "2100-05-01 00:00:00",
+                    "dischtime": "2100-05-01 10:00:00",
+                    "ethnicity": "PATIENT DECLINED TO ANSWER",
+                    "insurance": "Government",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 1,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 105,
+                    "subject_id": 6,
+                    "admittime": "2100-06-01 00:00:00",
+                    "dischtime": "2100-06-02 06:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Private",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 0,
+                },
+                {
+                    "hadm_id": 106,
+                    "subject_id": 7,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 06:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Private",
+                    "discharge_location": "LEFT AGAINST MEDICAL ADVICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 107,
+                    "subject_id": 8,
+                    "admittime": "2100-08-01 00:00:00",
+                    "dischtime": "2100-08-02 00:00:00",
+                    "ethnicity": "BLACK/CAPE VERDEAN",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 1,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        self.patients = pd.DataFrame(
+            [
+                {"subject_id": 1, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 2, "gender": "F", "dob": "1800-01-01 00:00:00"},
+                {"subject_id": 3, "gender": "F", "dob": "2080-03-01 00:00:00"},
+                {"subject_id": 4, "gender": "M", "dob": "2060-04-01 00:00:00"},
+                {"subject_id": 5, "gender": "F", "dob": "2050-05-01 00:00:00"},
+                {"subject_id": 6, "gender": "M", "dob": "2040-06-01 00:00:00"},
+                {"subject_id": 7, "gender": "F", "dob": "2075-07-01 00:00:00"},
+                {"subject_id": 8, "gender": "M", "dob": "2035-08-01 00:00:00"},
+            ]
+        )
+        self.icustays = pd.DataFrame(
+            [
+                {"hadm_id": 100, "icustay_id": 1001, "intime": "2100-01-01 00:00:00", "outtime": "2100-01-01 11:00:00"},
+                {"hadm_id": 100, "icustay_id": 1002, "intime": "2100-01-01 12:00:00", "outtime": "2100-01-01 23:00:00"},
+                {"hadm_id": 101, "icustay_id": 1011, "intime": "2100-02-01 00:00:00", "outtime": "2100-02-01 13:00:00"},
+                {"hadm_id": 103, "icustay_id": 1031, "intime": "2100-04-01 00:00:00", "outtime": "2100-04-01 12:00:00"},
+                {"hadm_id": 104, "icustay_id": 1041, "intime": "2100-05-01 01:00:00", "outtime": "2100-05-01 10:00:00"},
+                {"hadm_id": 105, "icustay_id": 1051, "intime": "2100-06-01 00:00:00", "outtime": "2100-06-01 14:00:00"},
+                {"hadm_id": 106, "icustay_id": 1061, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 15:00:00"},
+                {"hadm_id": 107, "icustay_id": 1071, "intime": "2100-08-01 00:00:00", "outtime": "2100-08-01 13:00:00"},
+            ]
+        )
+        self.noteevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "category": "Nursing",
+                    "text": "Patient refuses treatment and was noncompliant with medication.  Date:[**5-1-18**]",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 101,
+                    "category": "Physician",
+                    "text": "Autopsy was discussed with the family.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 103,
+                    "category": "Nursing",
+                    "text": "Cooperative patient. Follows commands.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 104,
+                    "category": "Discharge",
+                    "text": "AUTOPSY requested.",
+                    "iserror": 1,
+                },
+                {
+                    "hadm_id": 104,
+                    "category": "Nursing",
+                    "text": "No concerns documented.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 106,
+                    "category": "Nursing",
+                    "text": "Patient remains nonadherent with follow up plan.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        self.d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Pain Level", "dbsource": "metavision"},
+                {"itemid": 3, "label": "Code Status", "dbsource": "carevue"},
+            ]
+        )
+        self.chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 101, "itemid": 1, "value": "No", "icustay_id": 1011},
+                {"hadm_id": 101, "itemid": 1, "value": "No", "icustay_id": 1011},
+                {"hadm_id": 101, "itemid": 2, "value": "7-Mod to Severe", "icustay_id": 1011},
+                {"hadm_id": 101, "itemid": 3, "value": "Full Code", "icustay_id": 1011},
+                {"hadm_id": 103, "itemid": 1, "value": "Yes", "icustay_id": 1031},
+                {"hadm_id": 103, "itemid": 3, "value": "DNR/DNI", "icustay_id": 1031},
+                {"hadm_id": 104, "itemid": 3, "value": "Full Code", "icustay_id": 1041},
+                {"hadm_id": 106, "itemid": 3, "value": "Full Code", "icustay_id": 1061},
+                {"hadm_id": 107, "itemid": 3, "value": "Comfort Measures Only", "icustay_id": 1071},
+            ]
+        )
+        self.ventdurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 1011,
+                    "ventnum": 1,
+                    "starttime": "2100-02-01 00:00:00",
+                    "endtime": "2100-02-01 02:00:00",
+                    "duration_hours": 2.0,
+                },
+                {
+                    "icustay_id": 1011,
+                    "ventnum": 2,
+                    "starttime": "2100-02-01 11:30:00",
+                    "endtime": "2100-02-01 12:30:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "icustay_id": 1011,
+                    "ventnum": 3,
+                    "starttime": "2100-02-01 23:31:00",
+                    "endtime": "2100-02-02 00:31:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        self.vasopressordurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 1031,
+                    "vasonum": 1,
+                    "starttime": "2100-04-01 01:00:00",
+                    "endtime": "2100-04-01 03:00:00",
+                    "duration_hours": 2.0,
+                },
+                {
+                    "icustay_id": 1031,
+                    "vasonum": 2,
+                    "starttime": "2100-04-01 02:30:00",
+                    "endtime": "2100-04-01 05:00:00",
+                    "duration_hours": 2.5,
+                },
+                {
+                    "icustay_id": 1031,
+                    "vasonum": 3,
+                    "starttime": "2100-04-01 14:00:00",
+                    "endtime": "2100-04-01 15:00:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        self.oasis = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1011, "oasis": 15},
+                {"hadm_id": 103, "icustay_id": 1031, "oasis": 20},
+                {"hadm_id": 106, "icustay_id": 1061, "oasis": 8},
+                {"hadm_id": 107, "icustay_id": 1071, "oasis": 30},
+            ]
+        )
+        self.sapsii = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1011, "sapsii": 42},
+                {"hadm_id": 103, "icustay_id": 1031, "sapsii": 55},
+                {"hadm_id": 106, "icustay_id": 1061, "sapsii": 12},
+                {"hadm_id": 107, "icustay_id": 1071, "sapsii": 70},
+            ]
+        )
+        self.mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": 1.2,
+                    "autopsy_score_z": 0.7,
+                    "negative_sentiment_score_z": 0.9,
+                },
+                {
+                    "hadm_id": 103,
+                    "noncompliance_score_z": -0.3,
+                    "autopsy_score_z": -0.2,
+                    "negative_sentiment_score_z": -0.1,
+                },
+                {
+                    "hadm_id": 106,
+                    "noncompliance_score_z": 0.8,
+                    "autopsy_score_z": -0.4,
+                    "negative_sentiment_score_z": 0.2,
+                },
+                {
+                    "hadm_id": 107,
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 1.1,
+                    "negative_sentiment_score_z": -1.0,
+                },
+            ]
+        )
+
+    def _get_callable(self, name):
+        self.assertTrue(
+            hasattr(self.module, name),
+            msg=f"Implement `{name}` in pyhealth.datasets.eol_mistrust",
+        )
+        attr = getattr(self.module, name)
+        self.assertTrue(callable(attr), msg=f"`{name}` must be callable")
+        return attr
+
+    def _assert_hadm_unique(self, df, msg_prefix):
+        self.assertIn("hadm_id", df.columns, msg=f"{msg_prefix} must include hadm_id")
+        self.assertTrue(
+            df["hadm_id"].is_unique,
+            msg=f"{msg_prefix} must be unique at the admission level",
+        )
+
+    def _build_database_environment_inputs(
+        self,
+        num_admissions=50010,
+        include_multiple_icustays=True,
+    ):
+        hadm_ids = list(range(200000, 200000 + num_admissions))
+        subject_ids = list(range(300000, 300000 + num_admissions))
+        icustay_ids = list(range(400000, 400000 + num_admissions))
+
+        admissions = pd.DataFrame(
+            {
+                "hadm_id": hadm_ids,
+                "subject_id": subject_ids,
+                "admittime": ["2100-01-01 00:00:00"] * num_admissions,
+                "dischtime": ["2100-01-02 00:00:00"] * num_admissions,
+                "ethnicity": ["WHITE"] * num_admissions,
+                "insurance": ["Medicare"] * num_admissions,
+                "discharge_location": ["HOME"] * num_admissions,
+                "hospital_expire_flag": [0] * num_admissions,
+                "has_chartevents_data": [1] * num_admissions,
+            }
+        )
+        patients = pd.DataFrame(
+            {
+                "subject_id": subject_ids,
+                "gender": ["M"] * num_admissions,
+                "dob": ["2070-01-01 00:00:00"] * num_admissions,
+            }
+        )
+        icustays = pd.DataFrame(
+            {
+                "hadm_id": hadm_ids,
+                "icustay_id": icustay_ids,
+                "intime": ["2100-01-01 00:00:00"] * num_admissions,
+                "outtime": ["2100-01-01 13:00:00"] * num_admissions,
+            }
+        )
+        if include_multiple_icustays:
+            icustays = pd.concat(
+                [
+                    icustays,
+                    pd.DataFrame(
+                        [
+                            {
+                                "hadm_id": hadm_ids[0],
+                                "icustay_id": 999999,
+                                "intime": "2100-01-02 00:00:00",
+                                "outtime": "2100-01-02 14:00:00",
+                            }
+                        ]
+                    ),
+                ],
+                ignore_index=True,
+            )
+
+        noteevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_ids[0],
+                    "category": "Nursing",
+                    "text": "Patient refuses treatment and autopsy discussed.",
+                    "iserror": 0,
+                }
+            ]
+        )
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+            ]
+        )
+        chartevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_ids[0],
+                    "itemid": 1,
+                    "value": "No",
+                    "icustay_id": icustay_ids[0],
+                }
+            ]
+        )
+        ventdurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": icustay_ids[0],
+                    "ventnum": 1,
+                    "starttime": "2100-01-01 00:00:00",
+                    "endtime": "2100-01-01 01:00:00",
+                    "duration_hours": 1.0,
+                }
+            ]
+        )
+        vasopressordurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": icustay_ids[0],
+                    "vasonum": 1,
+                    "starttime": "2100-01-01 02:00:00",
+                    "endtime": "2100-01-01 03:00:00",
+                    "duration_hours": 1.0,
+                }
+            ]
+        )
+        oasis = pd.DataFrame(
+            [
+                {"hadm_id": hadm_ids[0], "icustay_id": icustay_ids[0], "oasis": 15},
+            ]
+        )
+        sapsii = pd.DataFrame(
+            [
+                {"hadm_id": hadm_ids[0], "icustay_id": icustay_ids[0], "sapsii": 42},
+            ]
+        )
+
+        raw_tables = {
+            "admissions": admissions,
+            "patients": patients,
+            "icustays": icustays,
+            "noteevents": noteevents,
+            "chartevents": chartevents,
+            "d_items": d_items,
+        }
+        materialized_views = {
+            "ventdurations": ventdurations,
+            "vasopressordurations": vasopressordurations,
+            "oasis": oasis,
+            "sapsii": sapsii,
+        }
+        return raw_tables, materialized_views
+
+    def test_map_ethnicity_matches_required_categories(self):
+        map_ethnicity = self._get_callable("map_ethnicity")
+        self.assertEqual(map_ethnicity("WHITE - RUSSIAN"), "WHITE")
+        self.assertEqual(map_ethnicity("BLACK/AFRICAN AMERICAN"), "BLACK")
+        self.assertEqual(map_ethnicity("ASIAN - CHINESE"), "ASIAN")
+        self.assertEqual(map_ethnicity("HISPANIC OR LATINO"), "HISPANIC")
+        self.assertEqual(map_ethnicity("AMERICAN INDIAN/ALASKA NATIVE"), "NATIVE AMERICAN")
+        self.assertEqual(map_ethnicity("PATIENT DECLINED TO ANSWER"), "OTHER")
+        self.assertEqual(map_ethnicity(None), "OTHER")
+
+    def test_validate_database_environment_rejects_non_mimiciii_schema(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+
+        with self.assertRaisesRegex(ValueError, "Database schema must be mimiciii."):
+            validate_database_environment(
+                raw_tables,
+                materialized_views,
+                schema_name="mimiciv",
+            )
+
+    def test_validate_database_environment_rejects_non_postgres_flavor(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+
+        with self.assertRaisesRegex(ValueError, "Database flavor must be PostgreSQL."):
+            validate_database_environment(
+                raw_tables,
+                materialized_views,
+                database_flavor="sqlite",
+            )
+
+    def test_validate_database_environment_accepts_postgres_alias(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+
+        summary = validate_database_environment(
+            raw_tables,
+            materialized_views,
+            database_flavor="postgres",
+        )
+
+        self.assertEqual(summary["database_flavor"], "postgres")
+
+    def test_validate_database_environment_requires_all_required_raw_tables(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables.pop("noteevents")
+
+        with self.assertRaisesRegex(ValueError, "Missing required raw tables: noteevents"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_all_required_materialized_views(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        materialized_views.pop("oasis")
+
+        with self.assertRaisesRegex(ValueError, "Missing required materialized views: oasis"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_required_raw_table_columns(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables["admissions"] = raw_tables["admissions"].drop(
+            columns=["has_chartevents_data"]
+        )
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "admissions is missing required columns: has_chartevents_data",
+        ):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_required_materialized_view_columns(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        materialized_views["ventdurations"] = materialized_views["ventdurations"].drop(
+            columns=["endtime"]
+        )
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "ventdurations is missing required columns: endtime",
+        ):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_large_base_admissions_backbone(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=10
+        )
+
+        with self.assertRaisesRegex(ValueError, "must exceed 50,000 rows"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_enforces_50000_row_boundary(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+
+        raw_tables_50000, materialized_views_50000 = self._build_database_environment_inputs(
+            num_admissions=50000
+        )
+        with self.assertRaisesRegex(ValueError, "must exceed 50,000 rows"):
+            validate_database_environment(raw_tables_50000, materialized_views_50000)
+
+        raw_tables_50001, materialized_views_50001 = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        summary = validate_database_environment(raw_tables_50001, materialized_views_50001)
+        self.assertEqual(summary["base_admissions_rows"], 50001)
+
+    def test_validate_database_environment_rejects_null_subject_id_in_base_backbone(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables["admissions"].loc[0, "subject_id"] = pd.NA
+
+        with self.assertRaisesRegex(ValueError, "Base admissions contains null subject_id"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_rejects_null_hadm_id_in_base_backbone(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables["admissions"].loc[0, "hadm_id"] = pd.NA
+
+        with self.assertRaisesRegex(ValueError, "Base admissions contains null hadm_id"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_non_null_icustay_bridge_keys(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+
+        for column in ["hadm_id", "icustay_id"]:
+            with self.subTest(column=column):
+                raw_tables, materialized_views = self._build_database_environment_inputs(
+                    num_admissions=50001
+                )
+                raw_tables["icustays"].loc[0, column] = pd.NA
+
+                with self.assertRaisesRegex(
+                    ValueError,
+                    "icustays must provide non-null hadm_id and icustay_id",
+                ):
+                    validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_accessible_note_text(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables["noteevents"]["text"] = pd.NA
+
+        with self.assertRaisesRegex(ValueError, "noteevents.text must be accessible"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_accessible_chartevent_values(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables["chartevents"]["value"] = pd.NA
+
+        with self.assertRaisesRegex(ValueError, "chartevents.value must be accessible"):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_ventdurations_to_join_to_icustays(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        materialized_views["ventdurations"]["icustay_id"] = 123456789
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "ventdurations must join to icustays through icustay_id",
+        ):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_vasopressors_to_join_to_icustays(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        materialized_views["vasopressordurations"]["icustay_id"] = 123456789
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "vasopressordurations must join to icustays through icustay_id",
+        ):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_chartevents_to_join_to_d_items(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        raw_tables["chartevents"]["itemid"] = 999999
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "chartevents must join to d_items through itemid",
+        ):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_requires_nonempty_admission_level_acuity(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+        materialized_views["oasis"] = pd.DataFrame(
+            columns=["hadm_id", "icustay_id", "oasis"]
+        )
+        materialized_views["sapsii"] = pd.DataFrame(
+            columns=["hadm_id", "icustay_id", "sapsii"]
+        )
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "oasis and sapsii must join back to admissions on hadm_id",
+        ):
+            validate_database_environment(raw_tables, materialized_views)
+
+    def test_validate_database_environment_returns_summary_for_valid_inputs(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001
+        )
+
+        summary = validate_database_environment(raw_tables, materialized_views)
+
+        self.assertEqual(summary["database_flavor"], "postgresql")
+        self.assertEqual(summary["schema_name"], "mimiciii")
+        self.assertEqual(summary["base_admissions_rows"], 50001)
+        self.assertEqual(
+            summary["raw_tables"],
+            ["admissions", "chartevents", "d_items", "icustays", "noteevents", "patients"],
+        )
+        self.assertEqual(
+            summary["materialized_views"],
+            ["oasis", "sapsii", "vasopressordurations", "ventdurations"],
+        )
+        self.assertTrue(summary["supports_multiple_icustays_per_hadm"])
+
+    def test_validate_database_environment_reports_when_multiple_icustays_are_absent(self):
+        validate_database_environment = self._get_callable("validate_database_environment")
+        raw_tables, materialized_views = self._build_database_environment_inputs(
+            num_admissions=50001,
+            include_multiple_icustays=False,
+        )
+
+        summary = validate_database_environment(raw_tables, materialized_views)
+
+        self.assertFalse(summary["supports_multiple_icustays_per_hadm"])
+
+    def test_map_insurance_matches_required_categories(self):
+        map_insurance = self._get_callable("map_insurance")
+        self.assertEqual(map_insurance("Medicare"), "Public")
+        self.assertEqual(map_insurance("Medicaid"), "Public")
+        self.assertEqual(map_insurance("Government"), "Public")
+        self.assertEqual(map_insurance("Private"), "Private")
+        self.assertEqual(map_insurance("Self Pay"), "Self-Pay")
+
+    def test_build_base_admissions_raises_clear_error_when_required_columns_are_missing(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        admissions_missing = self.admissions.drop(columns=["has_chartevents_data"])
+        with self.assertRaisesRegex(ValueError, "has_chartevents_data"):
+            build_base_admissions(admissions_missing, self.patients)
+
+    def test_build_base_admissions_filters_has_chartevents_and_joins_patients(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        base = build_base_admissions(self.admissions, self.patients)
+        self.assertIsInstance(base, pd.DataFrame)
+        self.assertIn("gender", base.columns)
+        self.assertIn("dob", base.columns)
+        self.assertNotIn(105, set(base["hadm_id"]))
+        self.assertEqual(len(base), 7)
+        self._assert_hadm_unique(base, "Base admissions")
+
+    def test_build_base_admissions_rejects_duplicate_patient_rows_for_subject_id(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        duplicated_patients = pd.concat(
+            [
+                self.patients,
+                pd.DataFrame(
+                    [
+                        {
+                            "subject_id": 1,
+                            "gender": "F",
+                            "dob": "2071-01-01 00:00:00",
+                        }
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+
+        with self.assertRaises(pd.errors.MergeError):
+            build_base_admissions(self.admissions, duplicated_patients)
+
+    def test_build_demographics_table_applies_age_los_race_and_insurance_rules(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+
+        required = {
+            "hadm_id",
+            "subject_id",
+            "race",
+            "age",
+            "los_hours",
+            "los_days",
+            "insurance",
+            "gender",
+        }
+        self.assertTrue(required.issubset(set(demographics.columns)))
+
+        by_hadm = demographics.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[101, "race"], "BLACK")
+        self.assertEqual(by_hadm.loc[103, "race"], "ASIAN")
+        self.assertEqual(by_hadm.loc[104, "race"], "OTHER")
+        self.assertEqual(by_hadm.loc[101, "age"], 90.0)
+        self.assertAlmostEqual(by_hadm.loc[103, "los_hours"], 20.0)
+        self.assertAlmostEqual(by_hadm.loc[106, "los_days"], 30.0 / 24.0)
+        self.assertEqual(by_hadm.loc[104, "insurance"], "Public")
+        self.assertEqual(by_hadm.loc[106, "insurance"], "Private")
+        self._assert_hadm_unique(demographics, "Demographics table")
+
+    def test_build_eol_cohort_enforces_los_filter_and_discharge_priority(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+
+        self.assertIsInstance(eol, pd.DataFrame)
+        self.assertEqual(set(eol["hadm_id"]), {101, 103, 104, 107})
+
+        by_hadm = eol.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[101, "discharge_category"], "Hospice")
+        self.assertEqual(by_hadm.loc[103, "discharge_category"], "Skilled Nursing Facility")
+        self.assertEqual(by_hadm.loc[104, "discharge_category"], "Deceased")
+        self.assertEqual(
+            by_hadm.loc[107, "discharge_category"],
+            "Deceased",
+            msg="Death must take priority over hospice when both indicators are present",
+        )
+        self.assertNotIn(102, set(eol["hadm_id"]))
+        self._assert_hadm_unique(eol, "EOL cohort")
+
+    def test_build_eol_cohort_enforces_exact_six_hour_boundary(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 891,
+                    "subject_id": 891,
+                    "admittime": "2100-09-01 00:00:00",
+                    "dischtime": "2100-09-01 06:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 892,
+                    "subject_id": 892,
+                    "admittime": "2100-09-01 00:00:00",
+                    "dischtime": "2100-09-01 05:59:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 891, "gender": "M", "dob": "2070-09-01 00:00:00"},
+                {"subject_id": 892, "gender": "F", "dob": "2070-09-01 00:00:00"},
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+
+        self.assertIn(891, set(eol["hadm_id"]))
+        self.assertNotIn(892, set(eol["hadm_id"]))
+
+    def test_build_eol_cohort_accepts_snf_discharge_text(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 901,
+                    "subject_id": 91,
+                    "admittime": "2100-09-01 00:00:00",
+                    "dischtime": "2100-09-01 12:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "SNF",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                }
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 91, "gender": "M", "dob": "2070-09-01 00:00:00"},
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+
+        self.assertEqual(set(eol["hadm_id"]), {901})
+
+    def test_build_all_cohort_requires_a_single_icu_stay_of_at_least_12_hours(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_all_cohort = self._get_callable("build_all_cohort")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        all_cohort = build_all_cohort(base, self.icustays)
+
+        self.assertIsInstance(all_cohort, pd.DataFrame)
+        self.assertEqual(set(all_cohort["hadm_id"]), {101, 103, 106, 107})
+        self.assertNotIn(
+            100,
+            set(all_cohort["hadm_id"]),
+            msg="Two 11-hour ICU stays must not qualify; at least one stay must be >= 12 hours",
+        )
+        self.assertNotIn(
+            105,
+            set(all_cohort["hadm_id"]),
+            msg="Admissions excluded by has_chartevents_data should stay excluded downstream",
+        )
+        self._assert_hadm_unique(all_cohort, "ALL cohort")
+
+    def test_build_all_cohort_remains_unique_when_multiple_qualifying_icu_stays_exist(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_all_cohort = self._get_callable("build_all_cohort")
+
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 801,
+                    "subject_id": 81,
+                    "admittime": "2100-08-01 00:00:00",
+                    "dischtime": "2100-08-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                }
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 81, "gender": "M", "dob": "2070-08-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 801,
+                    "icustay_id": 8011,
+                    "intime": "2100-08-01 00:00:00",
+                    "outtime": "2100-08-01 13:00:00",
+                },
+                {
+                    "hadm_id": 801,
+                    "icustay_id": 8012,
+                    "intime": "2100-08-01 14:00:00",
+                    "outtime": "2100-08-02 04:00:00",
+                },
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        all_cohort = build_all_cohort(base, icustays)
+
+        self.assertEqual(list(all_cohort["hadm_id"]), [801])
+        self._assert_hadm_unique(all_cohort, "ALL cohort with multiple qualifying ICU stays")
+
+    def test_build_treatment_totals_raises_clear_error_on_missing_required_columns(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        vent_missing = self.ventdurations.drop(columns=["starttime"])
+        with self.assertRaisesRegex(ValueError, "starttime"):
+            build_treatment_totals(self.icustays, vent_missing, self.vasopressordurations)
+
+    def test_build_treatment_totals_merges_overlapping_and_short_gap_spans(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        totals = build_treatment_totals(
+            self.icustays,
+            self.ventdurations,
+            self.vasopressordurations,
+        )
+
+        self.assertIsInstance(totals, pd.DataFrame)
+        self.assertTrue({"hadm_id", "total_vent_min", "total_vaso_min"}.issubset(totals.columns))
+
+        by_hadm = totals.fillna(0).set_index("hadm_id")
+        self.assertEqual(
+            by_hadm.loc[101, "total_vent_min"],
+            810.0,
+            msg="Vent spans with a gap <= 600 minutes must merge before summing by hadm_id",
+        )
+        self.assertEqual(
+            by_hadm.loc[103, "total_vaso_min"],
+            840.0,
+            msg="Overlapping vasopressor spans and gaps <= 600 minutes must merge into one span",
+        )
+        self._assert_hadm_unique(totals, "Treatment totals")
+
+    def test_build_treatment_totals_uses_icustay_bridge_and_respects_600_minute_boundary(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        icustays = pd.DataFrame(
+            [
+                {"hadm_id": 200, "icustay_id": 2001, "intime": "2100-09-01 00:00:00", "outtime": "2100-09-01 12:00:00"},
+                {"hadm_id": 200, "icustay_id": 2002, "intime": "2100-09-01 20:00:00", "outtime": "2100-09-02 04:00:00"},
+            ]
+        )
+        ventdurations = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 999,
+                    "icustay_id": 2001,
+                    "ventnum": 1,
+                    "starttime": "2100-09-01 00:00:00",
+                    "endtime": "2100-09-01 01:00:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "hadm_id": 999,
+                    "icustay_id": 2001,
+                    "ventnum": 2,
+                    "starttime": "2100-09-01 11:00:00",
+                    "endtime": "2100-09-01 12:00:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "hadm_id": 999,
+                    "icustay_id": 2002,
+                    "ventnum": 3,
+                    "starttime": "2100-09-01 22:01:00",
+                    "endtime": "2100-09-01 23:01:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        empty_vaso = pd.DataFrame(
+            columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"]
+        )
+        totals = build_treatment_totals(icustays, ventdurations, empty_vaso)
+        row = totals.fillna(0).set_index("hadm_id").loc[200]
+        self.assertEqual(
+            row["total_vent_min"],
+            780.0,
+            msg="Gap == 600 must merge, gap == 601 must not merge, and hadm_id must be derived from icustays",
+        )
+
+    def test_prepare_note_text_for_sentiment_collapses_whitespace_only(self):
+        prepare_note_text_for_sentiment = self._get_callable("prepare_note_text_for_sentiment")
+        cleaned = prepare_note_text_for_sentiment(
+            "Patient\trefuses\n\n treatment   Date:[**5-1-18**]"
+        )
+        self.assertEqual(
+            cleaned,
+            "Patient refuses treatment Date:[**5-1-18**]",
+            msg="Whitespace tokenization should collapse whitespace without stripping de-identification markers",
+        )
+        self.assertEqual(prepare_note_text_for_sentiment("   \n\t "), "")
+        self.assertEqual(prepare_note_text_for_sentiment(None), "")
+
+    def test_build_note_corpus_concatenates_non_error_notes_and_can_include_missing_admissions(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        corpus = build_note_corpus(
+            self.noteevents,
+            all_hadm_ids=[101, 103, 104, 106, 107],
+        )
+
+        self.assertIsInstance(corpus, pd.DataFrame)
+        self.assertTrue({"hadm_id", "note_text"}.issubset(corpus.columns))
+        self._assert_hadm_unique(corpus, "Note corpus")
+
+        by_hadm = corpus.set_index("hadm_id")
+        self.assertEqual(set(corpus["hadm_id"]), {101, 103, 104, 106, 107})
+        self.assertIn("Patient refuses treatment", by_hadm.loc[101, "note_text"])
+        self.assertIn("Autopsy was discussed", by_hadm.loc[101, "note_text"])
+        self.assertNotIn(
+            "AUTOPSY requested",
+            by_hadm.loc[104, "note_text"],
+            msg="Error notes must be excluded before admission-level concatenation",
+        )
+        self.assertEqual(
+            by_hadm.loc[107, "note_text"],
+            "",
+            msg="Admissions without notes should still appear when all_hadm_ids is provided",
+        )
+
+    def test_build_note_corpus_filters_out_notes_for_hadm_ids_outside_requested_all_cohort(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        notes = pd.concat(
+            [
+                self.noteevents,
+                pd.DataFrame(
+                    [
+                        {
+                            "hadm_id": 999,
+                            "category": "Nursing",
+                            "text": "Outside cohort note that should be dropped.",
+                            "iserror": 0,
+                        }
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+
+        corpus = build_note_corpus(notes, all_hadm_ids=[101, 103, 104])
+
+        self.assertEqual(set(corpus["hadm_id"]), {101, 103, 104})
+        self.assertNotIn(999, set(corpus["hadm_id"]))
+
+    def test_build_note_corpus_preserves_empty_strings_after_left_join(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        corpus = build_note_corpus(self.noteevents, all_hadm_ids=[101, 103, 999]).set_index(
+            "hadm_id"
+        )
+        self.assertEqual(corpus.loc[999, "note_text"], "")
+        self.assertFalse(pd.isna(corpus.loc[999, "note_text"]))
+
+    def test_build_note_corpus_raises_clear_error_when_required_columns_are_missing(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        notes_missing = self.noteevents.drop(columns=["text"])
+        with self.assertRaisesRegex(ValueError, "text"):
+            build_note_corpus(notes_missing)
+
+    def test_build_note_labels_ignores_error_notes_and_extracts_rule_based_labels(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        labels = build_note_labels(self.noteevents)
+
+        self.assertIsInstance(labels, pd.DataFrame)
+        self.assertTrue(
+            {"hadm_id", "noncompliance_label", "autopsy_label"}.issubset(labels.columns)
+        )
+
+        by_hadm = labels.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[101, "noncompliance_label"], 1)
+        self.assertEqual(by_hadm.loc[101, "autopsy_label"], 1)
+        self.assertEqual(by_hadm.loc[103, "noncompliance_label"], 0)
+        self.assertEqual(by_hadm.loc[104, "autopsy_label"], 0)
+        self.assertEqual(by_hadm.loc[106, "noncompliance_label"], 1)
+        self._assert_hadm_unique(labels, "Note labels")
+
+    def test_build_note_labels_can_include_all_hadm_ids_with_zero_defaults(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        labels = build_note_labels(
+            self.noteevents,
+            all_hadm_ids=[101, 103, 104, 106, 107],
+        )
+        self._assert_hadm_unique(labels, "Note labels with all admissions")
+        by_hadm = labels.set_index("hadm_id")
+        self.assertEqual(set(labels["hadm_id"]), {101, 103, 104, 106, 107})
+        self.assertEqual(by_hadm.loc[107, "noncompliance_label"], 0)
+        self.assertEqual(by_hadm.loc[107, "autopsy_label"], 0)
+
+    def test_build_note_labels_raises_clear_error_when_required_columns_are_missing(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes_missing = self.noteevents.drop(columns=["iserror"])
+        with self.assertRaisesRegex(ValueError, "iserror"):
+            build_note_labels(notes_missing)
+
+    def test_build_note_labels_avoids_simple_false_positives(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.concat(
+            [
+                self.noteevents,
+                pd.DataFrame(
+                    [
+                        {
+                            "hadm_id": 108,
+                            "category": "Nursing",
+                            "text": "Medication compliance reviewed with patient. No autopsy planned.",
+                            "iserror": 0,
+                        }
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(
+            labels.loc[108, "noncompliance_label"],
+            0,
+            msg="Substring rules should not fire on generic compliance mentions",
+        )
+        self.assertEqual(
+            labels.loc[108, "autopsy_label"],
+            1,
+            msg="Autopsy matching should be case-insensitive and based on substring presence",
+        )
+
+    def test_build_note_labels_matches_hyphenated_noncompliance_phrases(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 201,
+                    "category": "Nursing",
+                    "text": "Patient is non-complian with medications.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 202,
+                    "category": "Nursing",
+                    "text": "Patient remains non-adher to treatment plan.",
+                    "iserror": 0,
+                },
+            ]
+        )
+
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(labels.loc[201, "noncompliance_label"], 1)
+        self.assertEqual(labels.loc[202, "noncompliance_label"], 1)
+
+    def test_build_note_labels_matches_literal_noncompliance_and_noncompliant_terms(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 211,
+                    "category": "Nursing",
+                    "text": "Team documented ongoing noncompliance with medications.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 212,
+                    "category": "Nursing",
+                    "text": "Patient was described as noncompliant during rounds.",
+                    "iserror": 0,
+                },
+            ]
+        )
+
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(labels.loc[211, "noncompliance_label"], 1)
+        self.assertEqual(labels.loc[212, "noncompliance_label"], 1)
+
+    def test_identify_table2_itemids_discovers_matching_labels_across_dbsources(self):
+        identify_table2_itemids = self._get_callable("identify_table2_itemids")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 10, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 11, "label": "Education Readiness", "dbsource": "metavision"},
+                {"itemid": 12, "label": "Pain Level", "dbsource": "carevue"},
+                {"itemid": 13, "label": "Follows Commands", "dbsource": "metavision"},
+                {"itemid": 14, "label": "Completely Unrelated Label", "dbsource": "carevue"},
+            ]
+        )
+        itemids = identify_table2_itemids(d_items)
+        self.assertIsInstance(itemids, (set, list, tuple))
+        itemids = set(itemids)
+        self.assertTrue({10, 11, 12, 13}.issubset(itemids))
+        self.assertNotIn(
+            14,
+            itemids,
+            msg="Only Table 2 concepts should be selected from d_items",
+        )
+
+    def test_identify_table2_itemids_supports_case_insensitive_partial_label_matching(self):
+        identify_table2_itemids = self._get_callable("identify_table2_itemids")
+        d_items = pd.DataFrame(
+            [
+                {
+                    "itemid": 20,
+                    "label": "Richmond-RAS Scale Assessment",
+                    "dbsource": "carevue",
+                },
+                {
+                    "itemid": 21,
+                    "label": "pain level verbal response",
+                    "dbsource": "metavision",
+                },
+                {
+                    "itemid": 22,
+                    "label": "SOCIAL WORK CONSULT NOTE",
+                    "dbsource": "carevue",
+                },
+                {
+                    "itemid": 23,
+                    "label": "Completely unrelated field",
+                    "dbsource": "metavision",
+                },
+            ]
+        )
+
+        itemids = set(identify_table2_itemids(d_items))
+        self.assertIn(20, itemids)
+        self.assertIn(21, itemids)
+        self.assertIn(22, itemids)
+        self.assertNotIn(23, itemids)
+
+    def test_identify_table2_itemids_raises_clear_error_when_required_columns_are_missing(self):
+        identify_table2_itemids = self._get_callable("identify_table2_itemids")
+        d_items_missing = self.d_items.drop(columns=["label"])
+        with self.assertRaisesRegex(ValueError, "label"):
+            identify_table2_itemids(d_items_missing)
+
+    def test_build_chartevent_feature_matrix_creates_binary_label_value_features(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        feature_matrix = build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+        )
+
+        self.assertIsInstance(feature_matrix, pd.DataFrame)
+        expected_columns = {
+            "hadm_id",
+            "Education Readiness: No",
+            "Education Readiness: Yes",
+            "Pain Level: 7-Mod to Severe",
+        }
+        self.assertTrue(expected_columns.issubset(set(feature_matrix.columns)))
+
+        by_hadm = feature_matrix.fillna(0).set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[101, "Education Readiness: No"], 1)
+        self.assertEqual(by_hadm.loc[101, "Pain Level: 7-Mod to Severe"], 1)
+        self.assertEqual(by_hadm.loc[103, "Education Readiness: Yes"], 1)
+        self.assertEqual(
+            by_hadm.loc[101, "Education Readiness: No"],
+            1,
+            msg="Repeated charted values must stay binary at the admission level",
+        )
+        self._assert_hadm_unique(feature_matrix, "Chartevent feature matrix")
+
+    def test_build_chartevent_feature_matrix_can_preserve_all_hadm_ids_with_zero_rows(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        feature_matrix = build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+            all_hadm_ids=[101, 103, 106, 107],
+        ).fillna(0)
+        self._assert_hadm_unique(feature_matrix, "Chartevent feature matrix with all admissions")
+        self.assertEqual(set(feature_matrix["hadm_id"]), {101, 103, 106, 107})
+        zero_row = feature_matrix.set_index("hadm_id").loc[106]
+        self.assertTrue(
+            (zero_row == 0).all(),
+            msg="Admissions without matching chart features should still appear as all-zero rows when all_hadm_ids is provided",
+        )
+
+    def test_build_chartevent_feature_matrix_normalizes_values_and_ignores_blank_entries(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents = pd.concat(
+            [
+                self.chartevents,
+                pd.DataFrame(
+                    [
+                        {"hadm_id": 103, "itemid": 1, "value": " No ", "icustay_id": 1031},
+                        {"hadm_id": 103, "itemid": 2, "value": "", "icustay_id": 1031},
+                        {"hadm_id": 103, "itemid": 2, "value": None, "icustay_id": 1031},
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            self.d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+        ).fillna(0).set_index("hadm_id")
+        self.assertIn(
+            "Education Readiness: No",
+            feature_matrix.columns,
+            msg="Feature columns must preserve the required 'label: value' naming scheme",
+        )
+        self.assertEqual(
+            feature_matrix.loc[103, "Education Readiness: No"],
+            1,
+            msg="Value normalization should trim whitespace and lowercase to stable feature keys",
+        )
+        self.assertNotIn(
+            "Pain Level: ",
+            set(feature_matrix.columns),
+            msg="Blank values should not produce empty feature columns",
+        )
+
+    def test_build_chartevent_feature_matrix_deduplicates_repeated_pairs_to_one_binary_value(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+            ]
+        )
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+            ]
+        )
+
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            allowed_labels={"Education Readiness"},
+        ).set_index("hadm_id")
+
+        self.assertIn(
+            "Education Readiness: No",
+            feature_matrix.columns,
+            msg="Repeated label/value pairs must map into the required single binary feature column",
+        )
+        self.assertEqual(feature_matrix.loc[301, "Education Readiness: No"], 1)
+
+    def test_build_chartevent_feature_matrix_preserves_rare_single_occurrence_features(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 401, "itemid": 41, "value": "Yes", "icustay_id": 4011},
+            ]
+        )
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 41, "label": "Family Meeting", "dbsource": "carevue"},
+            ]
+        )
+
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            allowed_labels={"Family Meeting"},
+            all_hadm_ids=[401, 402],
+        ).fillna(0).set_index("hadm_id")
+
+        self.assertIn(
+            "Family Meeting: Yes",
+            feature_matrix.columns,
+            msg="Rare one-off chart-event features must not be pruned from the matrix",
+        )
+        self.assertEqual(feature_matrix.loc[401, "Family Meeting: Yes"], 1)
+        self.assertEqual(feature_matrix.loc[402, "Family Meeting: Yes"], 0)
+
+    def test_build_chartevent_feature_matrix_outputs_binary_values_under_duplicates_and_missing_rows(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 501, "itemid": 1, "value": "No", "icustay_id": 5011},
+                {"hadm_id": 501, "itemid": 1, "value": "No", "icustay_id": 5011},
+                {"hadm_id": 501, "itemid": 2, "value": "7-Mod to Severe", "icustay_id": 5011},
+                {"hadm_id": 502, "itemid": 2, "value": None, "icustay_id": 5021},
+            ]
+        )
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            self.d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+            all_hadm_ids=[501, 502, 503],
+        ).fillna(0)
+
+        feature_columns = [column for column in feature_matrix.columns if column != "hadm_id"]
+        self.assertTrue(feature_columns)
+        self.assertTrue(feature_matrix[feature_columns].isin([0, 1]).all().all())
+
+    def test_build_chartevent_feature_matrix_raises_clear_error_when_required_columns_are_missing(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents_missing = self.chartevents.drop(columns=["itemid"])
+        with self.assertRaisesRegex(ValueError, "itemid"):
+            build_chartevent_feature_matrix(chartevents_missing, self.d_items)
+
+    def test_z_normalize_scores_standardizes_each_metric_independently(self):
+        z_normalize_scores = self._get_callable("z_normalize_scores")
+        raw = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score": 1.0, "autopsy_score": 10.0, "negative_sentiment_score": -0.2},
+                {"hadm_id": 2, "noncompliance_score": 2.0, "autopsy_score": 20.0, "negative_sentiment_score": 0.0},
+                {"hadm_id": 3, "noncompliance_score": 3.0, "autopsy_score": 30.0, "negative_sentiment_score": 0.2},
+            ]
+        )
+        normalized = z_normalize_scores(
+            raw,
+            columns=[
+                "noncompliance_score",
+                "autopsy_score",
+                "negative_sentiment_score",
+            ],
+        )
+
+        for col in [
+            "noncompliance_score",
+            "autopsy_score",
+            "negative_sentiment_score",
+        ]:
+            self.assertAlmostEqual(normalized[col].mean(), 0.0, places=7)
+            self.assertAlmostEqual(normalized[col].std(ddof=0), 1.0, places=7)
+
+    def test_z_normalize_scores_returns_zero_for_zero_variance_columns(self):
+        z_normalize_scores = self._get_callable("z_normalize_scores")
+        raw = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score": 5.0},
+                {"hadm_id": 2, "noncompliance_score": 5.0},
+                {"hadm_id": 3, "noncompliance_score": 5.0},
+            ]
+        )
+        normalized = z_normalize_scores(raw, columns=["noncompliance_score"])
+        self.assertTrue((normalized["noncompliance_score"] == 0.0).all())
+
+    def test_build_acuity_scores_produces_unique_admission_level_table(self):
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        acuity = build_acuity_scores(self.oasis, self.sapsii)
+        self.assertIsInstance(acuity, pd.DataFrame)
+        self.assertTrue({"hadm_id", "oasis", "sapsii"}.issubset(acuity.columns))
+        self._assert_hadm_unique(acuity, "Acuity scores")
+        self.assertEqual(set(acuity["hadm_id"]), {101, 103, 106, 107})
+
+    def test_build_acuity_scores_raises_clear_error_when_required_columns_are_missing(self):
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        oasis_missing = self.oasis.drop(columns=["oasis"])
+        with self.assertRaisesRegex(ValueError, "oasis"):
+            build_acuity_scores(oasis_missing, self.sapsii)
+
+    def test_build_acuity_scores_uses_max_when_multiple_icu_stays_share_a_hadm_id(self):
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        oasis = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1011, "oasis": 15},
+                {"hadm_id": 101, "icustay_id": 1012, "oasis": 22},
+            ]
+        )
+        sapsii = pd.DataFrame(
+            [
+                {"hadm_id": 101, "icustay_id": 1011, "sapsii": 42},
+                {"hadm_id": 101, "icustay_id": 1012, "sapsii": 50},
+            ]
+        )
+
+        acuity = build_acuity_scores(oasis, sapsii).set_index("hadm_id")
+        self.assertEqual(acuity.loc[101, "oasis"], 22)
+        self.assertEqual(acuity.loc[101, "sapsii"], 50)
+
+    def test_build_proxy_probability_scores_fits_estimator_and_uses_predict_proba_output(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "feature_a": 1, "feature_b": 1},
+                {"hadm_id": 103, "feature_a": 0, "feature_b": 0},
+                {"hadm_id": 106, "feature_a": 1, "feature_b": 0},
+                {"hadm_id": 107, "feature_a": 0, "feature_b": 0},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 1},
+                {"hadm_id": 103, "noncompliance_label": 0},
+                {"hadm_id": 106, "noncompliance_label": 1},
+                {"hadm_id": 107, "noncompliance_label": 0},
+            ]
+        )
+        created = []
+
+        def estimator_factory():
+            estimator = _FakeProbEstimator([0.9, 0.2, 0.8, 0.1])
+            created.append(estimator)
+            return estimator
+
+        scores = build_proxy_probability_scores(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=estimator_factory,
+        )
+
+        self.assertEqual(len(created), 1)
+        self.assertTrue(created[0].was_fit)
+        self.assertEqual(list(created[0].fit_y), [1, 0, 1, 0])
+        self.assertNotIn(
+            "hadm_id",
+            set(created[0].fit_X.columns),
+            msg="hadm_id must not be used as a predictive feature",
+        )
+        self._assert_hadm_unique(scores, "Proxy probability scores")
+        by_hadm = scores.set_index("hadm_id")
+        self.assertAlmostEqual(by_hadm.loc[101, "noncompliance_score"], 0.9)
+        self.assertAlmostEqual(by_hadm.loc[103, "noncompliance_score"], 0.2)
+        self.assertAlmostEqual(by_hadm.loc[106, "noncompliance_score"], 0.8)
+        self.assertAlmostEqual(by_hadm.loc[107, "noncompliance_score"], 0.1)
+
+    def test_build_proxy_probability_scores_uses_l1_liblinear_logistic_regression_by_default(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "feature_a": 1},
+                {"hadm_id": 103, "feature_a": 0},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 1},
+                {"hadm_id": 103, "noncompliance_label": 0},
+            ]
+        )
+
+        created = []
+
+        class _RecordingLogisticRegression:
+            def __init__(self, *args, **kwargs):
+                created.append(kwargs)
+
+            def fit(self, X, y):
+                return self
+
+            def predict_proba(self, X):
+                return [[0.1, 0.9] for _ in range(len(X))]
+
+        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
+            build_proxy_probability_scores(
+                feature_matrix=feature_matrix,
+                note_labels=note_labels,
+                label_column="noncompliance_label",
+            )
+
+        self.assertEqual(len(created), 1)
+        self.assertEqual(created[0].get("penalty"), "l1")
+        self.assertEqual(created[0].get("solver"), "liblinear")
+        self.assertEqual(created[0].get("max_iter"), 1000)
+
+    def test_build_proxy_probability_scores_sorts_by_hadm_and_aligns_features_with_labels(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 106, "feature_a": 0, "feature_b": 1},
+                {"hadm_id": 101, "feature_a": 1, "feature_b": 0},
+                {"hadm_id": 103, "feature_a": 0, "feature_b": 0},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 103, "noncompliance_label": 0},
+                {"hadm_id": 101, "noncompliance_label": 1},
+                {"hadm_id": 106, "noncompliance_label": 1},
+            ]
+        )
+        created = []
+
+        def estimator_factory():
+            estimator = _FakeProbEstimator([0.9, 0.2, 0.8])
+            created.append(estimator)
+            return estimator
+
+        scores = build_proxy_probability_scores(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=estimator_factory,
+        )
+
+        self.assertEqual(list(created[0].fit_X["feature_a"]), [1, 0, 0])
+        self.assertEqual(list(created[0].fit_X["feature_b"]), [0, 0, 1])
+        self.assertEqual(list(created[0].fit_y), [1, 0, 1])
+        self.assertEqual(list(scores["hadm_id"]), [101, 103, 106])
+
+    def test_build_proxy_probability_scores_raises_clear_error_when_required_columns_are_missing(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        note_labels_missing = pd.DataFrame([{"hadm_id": 101}])
+        feature_matrix = pd.DataFrame([{"hadm_id": 101, "feature_a": 1}])
+        with self.assertRaisesRegex(ValueError, "noncompliance_label"):
+            build_proxy_probability_scores(
+                feature_matrix=feature_matrix,
+                note_labels=note_labels_missing,
+                label_column="noncompliance_label",
+            )
+
+    def test_build_negative_sentiment_scores_negates_sentiment_polarity(self):
+        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "very negative"},
+                {"hadm_id": 103, "note_text": "neutral"},
+                {"hadm_id": 106, "note_text": "positive"},
+            ]
+        )
+        polarity_map = {
+            "very negative": -0.6,
+            "neutral": 0.0,
+            "positive": 0.25,
+        }
+
+        def sentiment_fn(text):
+            return (polarity_map[text], 0.0)
+
+        scores = build_negative_sentiment_scores(
+            note_corpus,
+            sentiment_fn=sentiment_fn,
+        )
+
+        self._assert_hadm_unique(scores, "Negative sentiment scores")
+        by_hadm = scores.set_index("hadm_id")
+        self.assertAlmostEqual(by_hadm.loc[101, "negative_sentiment_score"], 0.6)
+        self.assertAlmostEqual(by_hadm.loc[103, "negative_sentiment_score"], 0.0)
+        self.assertAlmostEqual(by_hadm.loc[106, "negative_sentiment_score"], -0.25)
+
+    def test_build_negative_sentiment_scores_passes_whitespace_cleaned_text_to_sentiment(self):
+        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        note_corpus = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 201,
+                    "note_text": "Patient   refuses \n   treatment   Date:[**5-1-18**]",
+                }
+            ]
+        )
+        seen = []
+
+        def sentiment_fn(text):
+            seen.append(text)
+            return (-0.3, 0.0)
+
+        scores = build_negative_sentiment_scores(note_corpus, sentiment_fn=sentiment_fn)
+
+        self.assertEqual(seen, ["Patient refuses treatment Date:[**5-1-18**]"])
+        self.assertAlmostEqual(scores.loc[0, "negative_sentiment_score"], 0.3)
+
+    def test_build_negative_sentiment_scores_handles_empty_notes_as_zero(self):
+        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": ""},
+                {"hadm_id": 103, "note_text": "non-empty"},
+            ]
+        )
+
+        def sentiment_fn(text):
+            if text == "":
+                raise AssertionError("sentiment_fn should not be called on empty note text")
+            return (0.4, 0.0)
+
+        scores = build_negative_sentiment_scores(note_corpus, sentiment_fn=sentiment_fn)
+        by_hadm = scores.set_index("hadm_id")
+        self.assertAlmostEqual(by_hadm.loc[101, "negative_sentiment_score"], 0.0)
+        self.assertAlmostEqual(by_hadm.loc[103, "negative_sentiment_score"], -0.4)
+
+    def test_build_negative_sentiment_scores_raises_clear_error_when_required_columns_are_missing(self):
+        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        note_corpus_missing = pd.DataFrame([{"hadm_id": 101}])
+        with self.assertRaisesRegex(ValueError, "note_text"):
+            build_negative_sentiment_scores(note_corpus_missing)
+
+    def test_build_mistrust_score_table_constructs_all_three_normalized_scores_from_inputs(self):
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "feature_a": 1, "feature_b": 1},
+                {"hadm_id": 103, "feature_a": 0, "feature_b": 0},
+                {"hadm_id": 106, "feature_a": 1, "feature_b": 0},
+                {"hadm_id": 107, "feature_a": 0, "feature_b": 0},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 1},
+                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0},
+                {"hadm_id": 106, "noncompliance_label": 1, "autopsy_label": 0},
+                {"hadm_id": 107, "noncompliance_label": 0, "autopsy_label": 1},
+            ]
+        )
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "negative note"},
+                {"hadm_id": 103, "note_text": "neutral note"},
+                {"hadm_id": 106, "note_text": "slightly positive note"},
+                {"hadm_id": 107, "note_text": "very positive note"},
+            ]
+        )
+        probability_sequences = [
+            [0.9, 0.2, 0.8, 0.1],
+            [0.7, 0.1, 0.3, 0.6],
+        ]
+        created = []
+
+        def estimator_factory():
+            estimator = _FakeProbEstimator(probability_sequences[len(created)])
+            created.append(estimator)
+            return estimator
+
+        polarity_map = {
+            "negative note": -0.5,
+            "neutral note": 0.0,
+            "slightly positive note": 0.2,
+            "very positive note": 0.6,
+        }
+
+        def sentiment_fn(text):
+            return (polarity_map[text], 0.0)
+
+        scores = build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=estimator_factory,
+            sentiment_fn=sentiment_fn,
+        )
+
+        self.assertEqual(len(created), 2, msg="Two proxy models should be fit: noncompliance and autopsy")
+        self.assertTrue(all(est.was_fit for est in created))
+        self._assert_hadm_unique(scores, "Mistrust score table")
+        required_columns = {
+            "hadm_id",
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+        }
+        self.assertTrue(required_columns.issubset(scores.columns))
+
+        for col in [
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+        ]:
+            self.assertAlmostEqual(scores[col].mean(), 0.0, places=7)
+            self.assertAlmostEqual(scores[col].std(ddof=0), 1.0, places=7)
+
+        by_hadm = scores.set_index("hadm_id")
+        self.assertGreater(
+            by_hadm.loc[101, "noncompliance_score_z"],
+            by_hadm.loc[103, "noncompliance_score_z"],
+        )
+        self.assertGreater(
+            by_hadm.loc[101, "autopsy_score_z"],
+            by_hadm.loc[103, "autopsy_score_z"],
+        )
+        self.assertGreater(
+            by_hadm.loc[101, "negative_sentiment_score_z"],
+            by_hadm.loc[107, "negative_sentiment_score_z"],
+            msg="Negative sentiment score must be based on -1 * polarity before normalization",
+        )
+
+    def test_build_mistrust_score_table_keeps_only_hadm_ids_present_in_all_score_sources(self):
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "feature_a": 1},
+                {"hadm_id": 103, "feature_a": 0},
+                {"hadm_id": 106, "feature_a": 1},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 0},
+                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 1},
+                {"hadm_id": 106, "noncompliance_label": 1, "autopsy_label": 0},
+            ]
+        )
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "negative"},
+                {"hadm_id": 106, "note_text": "neutral"},
+            ]
+        )
+
+        mistrust = build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8]),
+            sentiment_fn=lambda text: (-0.1 if text == "negative" else 0.0, 0.0),
+        )
+
+        self.assertEqual(list(mistrust["hadm_id"]), [101, 106])
+
+    def test_build_mistrust_score_table_raises_clear_error_when_required_columns_are_missing(self):
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        note_labels_missing = pd.DataFrame([{"hadm_id": 101, "noncompliance_label": 1}])
+        feature_matrix = pd.DataFrame([{"hadm_id": 101, "feature_a": 1}])
+        note_corpus = pd.DataFrame([{"hadm_id": 101, "note_text": "note"}])
+        with self.assertRaisesRegex(ValueError, "autopsy_label"):
+            build_mistrust_score_table(
+                feature_matrix=feature_matrix,
+                note_labels=note_labels_missing,
+                note_corpus=note_corpus,
+            )
+
+    def test_build_final_model_table_contains_baseline_optional_features_and_targets(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, self.icustays)
+
+        code_status_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Pain Level", "dbsource": "metavision"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 223758, "label": "Code Status", "dbsource": "metavision"},
+            ]
+        )
+        code_status_events = pd.DataFrame(
+            [
+                {"hadm_id": 101, "itemid": 1, "value": "No", "icustay_id": 1011},
+                {"hadm_id": 101, "itemid": 2, "value": "7-Mod to Severe", "icustay_id": 1011},
+                {"hadm_id": 101, "itemid": 128, "value": "Full Code", "icustay_id": 1011},
+                {"hadm_id": 103, "itemid": 128, "value": "DNR/DNI", "icustay_id": 1031},
+                {"hadm_id": 104, "itemid": 128, "value": "Full Code", "icustay_id": 1041},
+                {"hadm_id": 106, "itemid": 128, "value": "Full Code", "icustay_id": 1061},
+                {
+                    "hadm_id": 107,
+                    "itemid": 223758,
+                    "value": "Comfort Measures Only",
+                    "icustay_id": 1071,
+                },
+            ]
+        )
+
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=code_status_events,
+            d_items=code_status_items,
+            mistrust_scores=self.mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+
+        self.assertIsInstance(final_table, pd.DataFrame)
+        required_columns = {
+            "hadm_id",
+            "age",
+            "los_days",
+            "gender_f",
+            "gender_m",
+            "insurance_private",
+            "insurance_public",
+            "insurance_self_pay",
+            "race_white",
+            "race_black",
+            "race_asian",
+            "race_hispanic",
+            "race_native_american",
+            "race_other",
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+            "left_ama",
+            "code_status_dnr_dni_cmo",
+            "in_hospital_mortality",
+        }
+        self.assertTrue(required_columns.issubset(set(final_table.columns)))
+        self.assertEqual(set(final_table["hadm_id"]), {101, 103, 106, 107})
+
+        by_hadm = final_table.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[106, "left_ama"], 1)
+        self.assertEqual(by_hadm.loc[101, "left_ama"], 0)
+        self.assertEqual(by_hadm.loc[103, "code_status_dnr_dni_cmo"], 1)
+        self.assertEqual(by_hadm.loc[101, "code_status_dnr_dni_cmo"], 0)
+        self.assertEqual(by_hadm.loc[107, "code_status_dnr_dni_cmo"], 1)
+        self.assertEqual(by_hadm.loc[107, "in_hospital_mortality"], 1)
+        self.assertEqual(by_hadm.loc[101, "in_hospital_mortality"], 0)
+        self._assert_hadm_unique(final_table, "Final model table")
+
+    def test_build_final_model_table_left_ama_requires_exact_discharge_location_match(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 201,
+                    "subject_id": 21,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "LEFT AGAINST MEDICAL ADVICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 202,
+                    "subject_id": 22,
+                    "admittime": "2100-02-01 00:00:00",
+                    "dischtime": "2100-02-02 00:00:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "TRANSFER AGAINST MEDICAL ADVICE REVIEW",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 21, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 22, "gender": "F", "dob": "2070-01-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 201,
+                    "icustay_id": 2011,
+                    "intime": "2100-01-01 00:00:00",
+                    "outtime": "2100-01-01 13:00:00",
+                },
+                {
+                    "hadm_id": 202,
+                    "icustay_id": 2021,
+                    "intime": "2100-02-01 00:00:00",
+                    "outtime": "2100-02-01 13:00:00",
+                },
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, icustays)
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 201,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                },
+                {
+                    "hadm_id": 202,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                },
+            ]
+        )
+        empty_chartevents = pd.DataFrame(columns=["hadm_id", "itemid", "value", "icustay_id"])
+        empty_d_items = pd.DataFrame(columns=["itemid", "label", "dbsource"])
+
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=empty_chartevents,
+            d_items=empty_d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        ).set_index("hadm_id")
+
+        self.assertEqual(final_table.loc[201, "left_ama"], 1)
+        self.assertEqual(final_table.loc[202, "left_ama"], 0)
+
+    def test_build_final_model_table_code_status_uses_only_required_itemids(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 301,
+                    "subject_id": 31,
+                    "admittime": "2100-03-01 00:00:00",
+                    "dischtime": "2100-03-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 302,
+                    "subject_id": 32,
+                    "admittime": "2100-03-01 00:00:00",
+                    "dischtime": "2100-03-02 00:00:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 303,
+                    "subject_id": 33,
+                    "admittime": "2100-03-01 00:00:00",
+                    "dischtime": "2100-03-02 00:00:00",
+                    "ethnicity": "ASIAN",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 31, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 32, "gender": "F", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 33, "gender": "M", "dob": "2070-01-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 301,
+                    "icustay_id": 3011,
+                    "intime": "2100-03-01 00:00:00",
+                    "outtime": "2100-03-01 13:00:00",
+                },
+                {
+                    "hadm_id": 302,
+                    "icustay_id": 3021,
+                    "intime": "2100-03-01 00:00:00",
+                    "outtime": "2100-03-01 13:00:00",
+                },
+                {
+                    "hadm_id": 303,
+                    "icustay_id": 3031,
+                    "intime": "2100-03-01 00:00:00",
+                    "outtime": "2100-03-01 13:00:00",
+                },
+            ]
+        )
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 999, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 223758, "label": "Code Status", "dbsource": "metavision"},
+            ]
+        )
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 301, "itemid": 999, "value": "DNR/DNI", "icustay_id": 3011},
+                {"hadm_id": 302, "itemid": 128, "value": "DNR/DNI", "icustay_id": 3021},
+                {
+                    "hadm_id": 303,
+                    "itemid": 223758,
+                    "value": "Comfort Measures Only",
+                    "icustay_id": 3031,
+                },
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, icustays)
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 301,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                },
+                {
+                    "hadm_id": 302,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                },
+                {
+                    "hadm_id": 303,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                },
+            ]
+        )
+
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=chartevents,
+            d_items=d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        ).set_index("hadm_id")
+
+        self.assertEqual(final_table.loc[301, "code_status_dnr_dni_cmo"], 0)
+        self.assertEqual(final_table.loc[302, "code_status_dnr_dni_cmo"], 1)
+        self.assertEqual(final_table.loc[303, "code_status_dnr_dni_cmo"], 1)
+
+    def test_build_code_status_target_excludes_admissions_without_charted_code_status(self):
+        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 223758, "label": "Code Status", "dbsource": "metavision"},
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+            ]
+        )
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 601, "itemid": 128, "value": "DNR/DNI", "icustay_id": 6011},
+                {"hadm_id": 602, "itemid": 1, "value": "No", "icustay_id": 6021},
+            ]
+        )
+
+        target = build_code_status_target(chartevents, d_items)
+        self.assertEqual(set(target["hadm_id"]), {601})
+
+    def test_build_final_model_table_supports_baseline_only_configuration(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, self.icustays)
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=self.mistrust_scores,
+            include_race=False,
+            include_mistrust=False,
+        )
+        self.assertNotIn("race_white", final_table.columns)
+        self.assertNotIn("noncompliance_score_z", final_table.columns)
+        self.assertIn("age", final_table.columns)
+        self.assertIn("left_ama", final_table.columns)
+
+    def test_build_final_model_table_baseline_only_columns_match_required_set(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, self.icustays)
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=self.mistrust_scores,
+            include_race=False,
+            include_mistrust=False,
+        )
+        expected_columns = {
+            "hadm_id",
+            "age",
+            "los_days",
+            "gender_f",
+            "gender_m",
+            "insurance_private",
+            "insurance_public",
+            "insurance_self_pay",
+            "left_ama",
+            "code_status_dnr_dni_cmo",
+            "in_hospital_mortality",
+        }
+        self.assertEqual(set(final_table.columns), expected_columns)
+        self.assertEqual(len(final_table.columns), 11)
+
+    def test_build_final_model_table_race_one_hot_covers_all_required_categories(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 701,
+                    "subject_id": 71,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 702,
+                    "subject_id": 72,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 703,
+                    "subject_id": 73,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "ASIAN - CHINESE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 704,
+                    "subject_id": 74,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "HISPANIC OR LATINO",
+                    "insurance": "Private",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 705,
+                    "subject_id": 75,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "AMERICAN INDIAN/ALASKA NATIVE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 706,
+                    "subject_id": 76,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "PATIENT DECLINED TO ANSWER",
+                    "insurance": "Private",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 71, "gender": "M", "dob": "2070-07-01 00:00:00"},
+                {"subject_id": 72, "gender": "F", "dob": "2070-07-01 00:00:00"},
+                {"subject_id": 73, "gender": "M", "dob": "2070-07-01 00:00:00"},
+                {"subject_id": 74, "gender": "F", "dob": "2070-07-01 00:00:00"},
+                {"subject_id": 75, "gender": "M", "dob": "2070-07-01 00:00:00"},
+                {"subject_id": 76, "gender": "F", "dob": "2070-07-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {"hadm_id": 701, "icustay_id": 7011, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 13:00:00"},
+                {"hadm_id": 702, "icustay_id": 7021, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 13:00:00"},
+                {"hadm_id": 703, "icustay_id": 7031, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 13:00:00"},
+                {"hadm_id": 704, "icustay_id": 7041, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 13:00:00"},
+                {"hadm_id": 705, "icustay_id": 7051, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 13:00:00"},
+                {"hadm_id": 706, "icustay_id": 7061, "intime": "2100-07-01 00:00:00", "outtime": "2100-07-01 13:00:00"},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in [701, 702, 703, 704, 705, 706]
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, icustays)
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=pd.DataFrame(columns=["hadm_id", "itemid", "value", "icustay_id"]),
+            d_items=pd.DataFrame(columns=["itemid", "label", "dbsource"]),
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=False,
+        ).set_index("hadm_id")
+
+        self.assertEqual(final_table.loc[701, "race_white"], 1)
+        self.assertEqual(final_table.loc[702, "race_black"], 1)
+        self.assertEqual(final_table.loc[703, "race_asian"], 1)
+        self.assertEqual(final_table.loc[704, "race_hispanic"], 1)
+        self.assertEqual(final_table.loc[705, "race_native_american"], 1)
+        self.assertEqual(final_table.loc[706, "race_other"], 1)
+
+    def test_write_minimal_deliverables_creates_required_artifact_files(self):
+        write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
+        artifacts = {
+            "base_admissions": pd.DataFrame([{"hadm_id": 101}]),
+            "eol_cohort": pd.DataFrame([{"hadm_id": 101}]),
+            "all_cohort": pd.DataFrame([{"hadm_id": 101}]),
+            "treatment_totals": pd.DataFrame([{"hadm_id": 101, "total_vent_min": 810.0, "total_vaso_min": 0.0}]),
+            "chartevent_feature_matrix": pd.DataFrame([{"hadm_id": 101, "feature_a": 1}]),
+            "note_labels": pd.DataFrame([{"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 1}]),
+            "mistrust_scores": pd.DataFrame([{"hadm_id": 101, "noncompliance_score_z": 1.0}]),
+            "acuity_scores": pd.DataFrame([{"hadm_id": 101, "oasis": 15, "sapsii": 42}]),
+            "final_model_table": pd.DataFrame([{"hadm_id": 101, "left_ama": 0}]),
+        }
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_dir = Path(temp_dir)
+            write_minimal_deliverables(artifacts, output_dir)
+
+            expected_files = {
+                "base_admissions.csv",
+                "eol_cohort.csv",
+                "all_cohort.csv",
+                "treatment_totals.csv",
+                "chartevent_feature_matrix.csv",
+                "note_labels.csv",
+                "mistrust_scores.csv",
+                "acuity_scores.csv",
+                "final_model_table.csv",
+            }
+            written_files = {path.name for path in output_dir.iterdir()}
+            self.assertEqual(expected_files, written_files)
+
+    def test_write_minimal_deliverables_sorts_by_hadm_id_and_writes_without_index(self):
+        write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
+        artifacts = {
+            "base_admissions": pd.DataFrame([{"hadm_id": 103}, {"hadm_id": 101}]),
+            "eol_cohort": pd.DataFrame([{"hadm_id": 103}, {"hadm_id": 101}]),
+            "all_cohort": pd.DataFrame([{"hadm_id": 103}, {"hadm_id": 101}]),
+            "treatment_totals": pd.DataFrame(
+                [
+                    {"hadm_id": 103, "total_vent_min": 0.0, "total_vaso_min": 840.0},
+                    {"hadm_id": 101, "total_vent_min": 810.0, "total_vaso_min": 0.0},
+                ]
+            ),
+            "chartevent_feature_matrix": pd.DataFrame(
+                [
+                    {"hadm_id": 103, "feature_a": 1},
+                    {"hadm_id": 101, "feature_a": 0},
+                ]
+            ),
+            "note_labels": pd.DataFrame(
+                [
+                    {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0},
+                    {"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 1},
+                ]
+            ),
+            "mistrust_scores": pd.DataFrame(
+                [
+                    {"hadm_id": 103, "noncompliance_score_z": -0.3},
+                    {"hadm_id": 101, "noncompliance_score_z": 1.2},
+                ]
+            ),
+            "acuity_scores": pd.DataFrame(
+                [
+                    {"hadm_id": 103, "oasis": 20, "sapsii": 55},
+                    {"hadm_id": 101, "oasis": 15, "sapsii": 42},
+                ]
+            ),
+            "final_model_table": pd.DataFrame(
+                [
+                    {"hadm_id": 103, "left_ama": 0},
+                    {"hadm_id": 101, "left_ama": 0},
+                ]
+            ),
+        }
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_dir = Path(temp_dir)
+            write_minimal_deliverables(artifacts, output_dir)
+            base_admissions = pd.read_csv(output_dir / "base_admissions.csv")
+            self.assertEqual(list(base_admissions["hadm_id"]), [101, 103])
+            self.assertNotIn("Unnamed: 0", base_admissions.columns)
+
+    def test_write_minimal_deliverables_raises_when_required_artifact_is_missing(self):
+        write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
+        artifacts = {
+            "base_admissions": pd.DataFrame([{"hadm_id": 101}]),
+            "eol_cohort": pd.DataFrame([{"hadm_id": 101}]),
+            "all_cohort": pd.DataFrame([{"hadm_id": 101}]),
+            "treatment_totals": pd.DataFrame(
+                [{"hadm_id": 101, "total_vent_min": 810.0, "total_vaso_min": 0.0}]
+            ),
+            "chartevent_feature_matrix": pd.DataFrame([{"hadm_id": 101, "feature_a": 1}]),
+            "note_labels": pd.DataFrame(
+                [{"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 0}]
+            ),
+            "mistrust_scores": pd.DataFrame(
+                [{"hadm_id": 101, "noncompliance_score_z": 0.0}]
+            ),
+            "acuity_scores": pd.DataFrame([{"hadm_id": 101, "oasis": 15, "sapsii": 42}]),
+        }
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            with self.assertRaises(KeyError):
+                write_minimal_deliverables(artifacts, Path(temp_dir))
+
+    def test_write_minimal_deliverables_sorts_nullable_integer_hadm_ids(self):
+        write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
+        artifacts = {
+            "base_admissions": pd.DataFrame({"hadm_id": pd.Series([103, 101], dtype="Int64")}),
+            "eol_cohort": pd.DataFrame({"hadm_id": pd.Series([103, 101], dtype="Int64")}),
+            "all_cohort": pd.DataFrame({"hadm_id": pd.Series([103, 101], dtype="Int64")}),
+            "treatment_totals": pd.DataFrame(
+                {
+                    "hadm_id": pd.Series([103, 101], dtype="Int64"),
+                    "total_vent_min": [0.0, 810.0],
+                    "total_vaso_min": [840.0, 0.0],
+                }
+            ),
+            "chartevent_feature_matrix": pd.DataFrame({"hadm_id": pd.Series([103, 101], dtype="Int64")}),
+            "note_labels": pd.DataFrame(
+                {
+                    "hadm_id": pd.Series([103, 101], dtype="Int64"),
+                    "noncompliance_label": [0, 1],
+                    "autopsy_label": [0, 1],
+                }
+            ),
+            "mistrust_scores": pd.DataFrame(
+                {
+                    "hadm_id": pd.Series([103, 101], dtype="Int64"),
+                    "noncompliance_score_z": [-0.3, 1.2],
+                }
+            ),
+            "acuity_scores": pd.DataFrame(
+                {
+                    "hadm_id": pd.Series([103, 101], dtype="Int64"),
+                    "oasis": [20, 15],
+                    "sapsii": [55, 42],
+                }
+            ),
+            "final_model_table": pd.DataFrame(
+                {
+                    "hadm_id": pd.Series([103, 101], dtype="Int64"),
+                    "left_ama": [0, 0],
+                }
+            ),
+        }
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_dir = Path(temp_dir)
+            write_minimal_deliverables(artifacts, output_dir)
+            final_table = pd.read_csv(output_dir / "final_model_table.csv")
+            self.assertEqual(list(final_table["hadm_id"]), [101, 103])
+
+    def test_data_contract_build_base_admissions_output_schema_dtypes_and_uniqueness_are_stable(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        base = build_base_admissions(self.admissions, self.patients)
+
+        self.assertEqual(
+            base.columns.tolist(),
+            [
+                "hadm_id",
+                "subject_id",
+                "admittime",
+                "dischtime",
+                "ethnicity",
+                "insurance",
+                "discharge_location",
+                "hospital_expire_flag",
+                "has_chartevents_data",
+                "gender",
+                "dob",
+            ],
+        )
+        self.assertEqual(base["hadm_id"].tolist(), sorted(base["hadm_id"].tolist()))
+        self._assert_hadm_unique(base, "Base admissions contract")
+        self.assertTrue(pd.api.types.is_datetime64_any_dtype(base["admittime"]))
+        self.assertTrue(pd.api.types.is_datetime64_any_dtype(base["dischtime"]))
+        self.assertTrue(pd.api.types.is_datetime64_any_dtype(base["dob"]))
+
+    def test_data_contract_build_demographics_table_output_dtypes_and_uniqueness_are_stable(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+
+        self.assertEqual(demographics["hadm_id"].tolist(), sorted(demographics["hadm_id"].tolist()))
+        self._assert_hadm_unique(demographics, "Demographics contract")
+        self.assertTrue(pd.api.types.is_float_dtype(demographics["age"]))
+        self.assertTrue(pd.api.types.is_float_dtype(demographics["los_hours"]))
+        self.assertTrue(pd.api.types.is_float_dtype(demographics["los_days"]))
+        self.assertTrue(pd.api.types.is_datetime64_any_dtype(demographics["admittime"]))
+        self.assertTrue(pd.api.types.is_datetime64_any_dtype(demographics["dischtime"]))
+
+    def test_data_contract_build_eol_and_all_cohorts_are_sorted_unique_and_key_aligned(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+        all_cohort = build_all_cohort(base, self.icustays)
+
+        self.assertEqual(eol["hadm_id"].tolist(), sorted(eol["hadm_id"].tolist()))
+        self.assertEqual(all_cohort["hadm_id"].tolist(), sorted(all_cohort["hadm_id"].tolist()))
+        self._assert_hadm_unique(eol, "EOL cohort contract")
+        self._assert_hadm_unique(all_cohort, "ALL cohort contract")
+        self.assertTrue(set(eol["hadm_id"]).issubset(set(base["hadm_id"])))
+        self.assertTrue(set(all_cohort["hadm_id"]).issubset(set(base["hadm_id"])))
+        self.assertTrue(eol["discharge_category"].notna().all())
+
+    def test_data_contract_build_treatment_totals_output_schema_dtypes_and_uniqueness_are_stable(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        totals = build_treatment_totals(self.icustays, self.ventdurations, self.vasopressordurations)
+
+        self.assertEqual(totals.columns.tolist(), ["hadm_id", "total_vent_min", "total_vaso_min"])
+        self.assertEqual(totals["hadm_id"].tolist(), sorted(totals["hadm_id"].tolist()))
+        self._assert_hadm_unique(totals, "Treatment totals contract")
+        self.assertTrue(pd.api.types.is_float_dtype(pd.to_numeric(totals["total_vent_min"], errors="coerce")))
+        self.assertTrue(pd.api.types.is_float_dtype(pd.to_numeric(totals["total_vaso_min"], errors="coerce")))
+
+    def test_data_contract_build_note_corpus_and_labels_outputs_are_sorted_unique_and_typed(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        build_note_labels = self._get_callable("build_note_labels")
+        corpus = build_note_corpus(self.noteevents, all_hadm_ids=[101, 103, 104, 106, 107])
+        labels = build_note_labels(self.noteevents, all_hadm_ids=[101, 103, 104, 106, 107])
+
+        self.assertEqual(corpus["hadm_id"].tolist(), [101, 103, 104, 106, 107])
+        self.assertEqual(labels["hadm_id"].tolist(), [101, 103, 104, 106, 107])
+        self._assert_hadm_unique(corpus, "Note corpus contract")
+        self._assert_hadm_unique(labels, "Note labels contract")
+        self.assertTrue(pd.api.types.is_object_dtype(corpus["note_text"]))
+        self.assertTrue(pd.api.types.is_integer_dtype(labels["noncompliance_label"]))
+        self.assertTrue(pd.api.types.is_integer_dtype(labels["autopsy_label"]))
+        self.assertTrue(set(labels["noncompliance_label"].unique()).issubset({0, 1}))
+        self.assertTrue(set(labels["autopsy_label"].unique()).issubset({0, 1}))
+
+    def test_data_contract_build_chartevent_feature_matrix_output_is_binary_integer_sorted_and_unique(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        matrix = build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            all_hadm_ids=[101, 103, 104, 106, 107],
+        )
+
+        self.assertEqual(matrix["hadm_id"].tolist(), [101, 103, 104, 106, 107])
+        self._assert_hadm_unique(matrix, "Feature matrix contract")
+        for column in matrix.columns:
+            if column == "hadm_id":
+                continue
+            self.assertTrue(pd.api.types.is_integer_dtype(matrix[column]), msg=column)
+            self.assertTrue(set(matrix[column].dropna().unique()).issubset({0, 1}), msg=column)
+
+    def test_data_contract_build_acuity_scores_output_is_sorted_unique_and_numeric(self):
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        acuity = build_acuity_scores(self.oasis, self.sapsii)
+
+        self.assertEqual(acuity["hadm_id"].tolist(), sorted(acuity["hadm_id"].tolist()))
+        self._assert_hadm_unique(acuity, "Acuity contract")
+        self.assertTrue(pd.api.types.is_numeric_dtype(acuity["oasis"]))
+        self.assertTrue(pd.api.types.is_numeric_dtype(acuity["sapsii"]))
+
+    def test_data_contract_build_final_model_table_binary_columns_are_integer_and_zero_one(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, self.icustays)
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=self.mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+
+        binary_columns = [
+            "gender_f",
+            "gender_m",
+            "insurance_private",
+            "insurance_public",
+            "insurance_self_pay",
+            "race_white",
+            "race_black",
+            "race_asian",
+            "race_hispanic",
+            "race_native_american",
+            "race_other",
+            "left_ama",
+            "code_status_dnr_dni_cmo",
+            "in_hospital_mortality",
+        ]
+        for column in binary_columns:
+            self.assertTrue(pd.api.types.is_integer_dtype(final_table[column]), msg=column)
+            self.assertTrue(set(final_table[column].unique()).issubset({0, 1}), msg=column)
+
+    def test_data_contract_write_minimal_deliverables_round_trip_preserves_columns_and_row_counts(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        build_note_corpus = self._get_callable("build_note_corpus")
+        build_note_labels = self._get_callable("build_note_labels")
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        build_final_model_table = self._get_callable("build_final_model_table")
+        write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+        all_cohort = build_all_cohort(base, self.icustays)
+        treatments = build_treatment_totals(self.icustays, self.ventdurations, self.vasopressordurations)
+        note_corpus = build_note_corpus(self.noteevents, all_hadm_ids=list(all_cohort["hadm_id"]))
+        note_labels = build_note_labels(self.noteevents, all_hadm_ids=list(all_cohort["hadm_id"]))
+        feature_matrix = build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+            all_hadm_ids=list(all_cohort["hadm_id"]),
+        )
+        acuity = build_acuity_scores(self.oasis, self.sapsii)
+        mistrust_scores = build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8, 0.1]),
+            sentiment_fn=lambda text: (
+                {
+                    "Patient refuses treatment and was noncompliant with medication. Date:[**5-1-18**] Autopsy was discussed with the family.": -0.5,
+                    "Cooperative patient. Follows commands.": 0.0,
+                    "Patient remains nonadherent with follow up plan.": -0.2,
+                    "": 0.0,
+                }[text],
+                0.0,
+            ),
+        )
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+        artifacts = {
+            "base_admissions": base,
+            "eol_cohort": eol,
+            "all_cohort": all_cohort,
+            "treatment_totals": treatments,
+            "chartevent_feature_matrix": feature_matrix,
+            "note_labels": note_labels,
+            "mistrust_scores": mistrust_scores,
+            "acuity_scores": acuity,
+            "final_model_table": final_table,
+        }
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_dir = Path(temp_dir)
+            write_minimal_deliverables(artifacts, output_dir)
+            round_trip = {
+                "base_admissions": pd.read_csv(output_dir / "base_admissions.csv"),
+                "eol_cohort": pd.read_csv(output_dir / "eol_cohort.csv"),
+                "all_cohort": pd.read_csv(output_dir / "all_cohort.csv"),
+                "treatment_totals": pd.read_csv(output_dir / "treatment_totals.csv"),
+                "chartevent_feature_matrix": pd.read_csv(output_dir / "chartevent_feature_matrix.csv"),
+                "note_labels": pd.read_csv(output_dir / "note_labels.csv"),
+                "mistrust_scores": pd.read_csv(output_dir / "mistrust_scores.csv"),
+                "acuity_scores": pd.read_csv(output_dir / "acuity_scores.csv"),
+                "final_model_table": pd.read_csv(output_dir / "final_model_table.csv"),
+            }
+
+        for key, original in artifacts.items():
+            self.assertEqual(round_trip[key].shape[0], original.shape[0], msg=key)
+            self.assertEqual(round_trip[key].columns.tolist(), original.columns.tolist(), msg=key)
+
+    def test_end_to_end_artifact_assembly_smoke_spec(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        build_note_corpus = self._get_callable("build_note_corpus")
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        build_note_labels = self._get_callable("build_note_labels")
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        build_final_model_table = self._get_callable("build_final_model_table")
+        write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+        all_cohort = build_all_cohort(base, self.icustays)
+        treatments = build_treatment_totals(
+            self.icustays,
+            self.ventdurations,
+            self.vasopressordurations,
+        )
+        note_corpus = build_note_corpus(
+            self.noteevents,
+            all_hadm_ids=list(all_cohort["hadm_id"]),
+        )
+        note_labels = build_note_labels(
+            self.noteevents,
+            all_hadm_ids=list(all_cohort["hadm_id"]),
+        )
+        feature_matrix = build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+            all_hadm_ids=list(all_cohort["hadm_id"]),
+        )
+        acuity = build_acuity_scores(self.oasis, self.sapsii)
+        mistrust_scores = build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8, 0.1]),
+            sentiment_fn=lambda text: (
+                {
+                    "Patient refuses treatment and was noncompliant with medication. Date:[**5-1-18**] Autopsy was discussed with the family.": -0.5,
+                    "Cooperative patient. Follows commands.": 0.0,
+                    "Patient remains nonadherent with follow up plan.": -0.2,
+                    "": 0.0,
+                }[text],
+                0.0,
+            ),
+        )
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+
+        artifacts = {
+            "base_admissions": base,
+            "eol_cohort": eol,
+            "all_cohort": all_cohort,
+            "treatment_totals": treatments,
+            "chartevent_feature_matrix": feature_matrix,
+            "note_labels": note_labels,
+            "mistrust_scores": mistrust_scores,
+            "acuity_scores": acuity,
+            "final_model_table": final_table,
+        }
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_dir = Path(temp_dir)
+            write_minimal_deliverables(artifacts, output_dir)
+            self.assertEqual(len(list(output_dir.iterdir())), 9)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/core/test_eol_mistrust_model.py b/tests/core/test_eol_mistrust_model.py
new file mode 100644
index 000000000..d70433388
--- /dev/null
+++ b/tests/core/test_eol_mistrust_model.py
@@ -0,0 +1,1679 @@
+import importlib.util
+import importlib
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import pandas as pd
+
+
+def _load_model_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "models" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.models.eol_mistrust_model_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def _load_dataset_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "datasets" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_model_integration_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+class _FakeProbEstimator:
+    def __init__(self, probabilities):
+        self.probabilities = list(probabilities)
+        self.was_fit = False
+        self.fit_X = None
+        self.fit_y = None
+        self.coef_ = None
+
+    def fit(self, X, y):
+        self.was_fit = True
+        self.fit_X = X.copy() if hasattr(X, "copy") else X
+        self.fit_y = y.copy() if hasattr(y, "copy") else y
+        self.coef_ = [[0.1] * X.shape[1]]
+        return self
+
+    def predict_proba(self, X):
+        probs = self.probabilities[: len(X)]
+        return [[1.0 - prob, prob] for prob in probs]
+
+
+class _MalformedProbEstimator:
+    def fit(self, X, y):
+        del X, y
+        self.coef_ = [[0.1]]
+        return self
+
+    def predict_proba(self, X):
+        return [[1.0] for _ in range(len(X))]
+
+
+class _NoCoefEstimator:
+    def fit(self, X, y):
+        del X, y
+        return self
+
+
+class _SplitRecorder:
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, X, y, test_size, random_state):
+        frame = X.reset_index(drop=True)
+        labels = pd.Series(y).reset_index(drop=True)
+        self.calls.append(
+            {
+                "random_state": random_state,
+                "test_size": test_size,
+                "n_rows": len(frame),
+            }
+        )
+        train_idx = [0, 1, 2, 3]
+        test_idx = [4, 5]
+        return (
+            frame.iloc[train_idx].copy(),
+            frame.iloc[test_idx].copy(),
+            labels.iloc[train_idx].copy(),
+            labels.iloc[test_idx].copy(),
+        )
+
+
+class _AUCRecorder:
+    def __init__(self, value=0.75):
+        self.value = float(value)
+        self.calls = []
+
+    def __call__(self, y_true, y_prob):
+        self.calls.append(
+            {
+                "y_true": list(pd.Series(y_true)),
+                "y_prob": list(pd.Series(y_prob)),
+            }
+        )
+        return self.value
+
+
+class TestEOLMistrustModel(unittest.TestCase):
+    """Model-level unit tests for the EOL mistrust workflow."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.module = _load_model_module()
+        cls.dataset_module = _load_dataset_module()
+
+    def setUp(self):
+        self.feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "Education Readiness: No": 1, "Pain Level: 7-Mod to Severe": 0},
+                {"hadm_id": 102, "Education Readiness: No": 0, "Pain Level: 7-Mod to Severe": 1},
+                {"hadm_id": 103, "Education Readiness: No": 1, "Pain Level: 7-Mod to Severe": 1},
+                {"hadm_id": 104, "Education Readiness: No": 0, "Pain Level: 7-Mod to Severe": 0},
+                {"hadm_id": 105, "Education Readiness: No": 1, "Pain Level: 7-Mod to Severe": 0},
+                {"hadm_id": 106, "Education Readiness: No": 0, "Pain Level: 7-Mod to Severe": 1},
+            ]
+        )
+        self.note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 0},
+                {"hadm_id": 102, "noncompliance_label": 0, "autopsy_label": 1},
+                {"hadm_id": 103, "noncompliance_label": 1, "autopsy_label": 0},
+                {"hadm_id": 104, "noncompliance_label": 0, "autopsy_label": 1},
+                {"hadm_id": 105, "noncompliance_label": 1, "autopsy_label": 0},
+                {"hadm_id": 106, "noncompliance_label": 0, "autopsy_label": 1},
+            ]
+        )
+        self.note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "Patient is noncompliant and angry."},
+                {"hadm_id": 102, "note_text": "Patient is calm and cooperative."},
+                {"hadm_id": 103, "note_text": "Autopsy discussed with family."},
+                {"hadm_id": 104, "note_text": "Patient refused medication repeatedly."},
+                {"hadm_id": 105, "note_text": "Date:[**5-1-18**] good rapport."},
+                {"hadm_id": 106, "note_text": "non-adher to follow up plan."},
+            ]
+        )
+        self.demographics = pd.DataFrame(
+            [
+                {"hadm_id": 101, "race": "WHITE"},
+                {"hadm_id": 102, "race": "BLACK"},
+                {"hadm_id": 103, "race": "BLACK"},
+                {"hadm_id": 104, "race": "WHITE"},
+                {"hadm_id": 105, "race": "ASIAN"},
+                {"hadm_id": 106, "race": "OTHER"},
+            ]
+        )
+        self.eol_cohort = pd.DataFrame(
+            [
+                {"hadm_id": 101, "race": "WHITE"},
+                {"hadm_id": 102, "race": "BLACK"},
+                {"hadm_id": 103, "race": "BLACK"},
+                {"hadm_id": 104, "race": "WHITE"},
+            ]
+        )
+        self.treatment_totals = pd.DataFrame(
+            [
+                {"hadm_id": 101, "total_vent_min": 10.0, "total_vaso_min": 5.0},
+                {"hadm_id": 102, "total_vent_min": 40.0, "total_vaso_min": 20.0},
+                {"hadm_id": 103, "total_vent_min": 80.0, "total_vaso_min": None},
+                {"hadm_id": 104, "total_vent_min": 5.0, "total_vaso_min": 10.0},
+            ]
+        )
+        self.acuity_scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "oasis": 10.0, "sapsii": 20.0},
+                {"hadm_id": 102, "oasis": 15.0, "sapsii": 25.0},
+                {"hadm_id": 103, "oasis": 20.0, "sapsii": 30.0},
+                {"hadm_id": 104, "oasis": 25.0, "sapsii": 35.0},
+                {"hadm_id": 105, "oasis": 30.0, "sapsii": 40.0},
+                {"hadm_id": 106, "oasis": 35.0, "sapsii": 45.0},
+            ]
+        )
+        self.final_model_table = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "age": float(50 + index),
+                    "los_days": float(1 + index),
+                    "gender_f": int(index % 2 == 1),
+                    "gender_m": int(index % 2 == 0),
+                    "insurance_private": int(index % 2 == 0),
+                    "insurance_public": int(index % 2 == 1),
+                    "insurance_self_pay": 0,
+                    "race_white": int(hadm_id in {101, 104}),
+                    "race_black": int(hadm_id in {102, 103}),
+                    "race_asian": int(hadm_id == 105),
+                    "race_hispanic": 0,
+                    "race_native_american": 0,
+                    "race_other": int(hadm_id == 106),
+                    "noncompliance_score_z": [-1.2, -0.8, -0.4, 0.2, 0.8, 1.4][index],
+                    "autopsy_score_z": [1.2, 0.8, 0.4, -0.2, -0.8, -1.4][index],
+                    "negative_sentiment_score_z": [-0.5, -0.2, 0.0, 0.3, 0.7, 1.1][index],
+                    "left_ama": int(index % 2 == 1),
+                    "code_status_dnr_dni_cmo": int(index % 2 == 0),
+                    "in_hospital_mortality": int(index % 2 == 1),
+                }
+                for index, hadm_id in enumerate([101, 102, 103, 104, 105, 106])
+            ]
+        )
+
+    def _get_callable(self, name):
+        self.assertTrue(
+            hasattr(self.module, name),
+            msg=f"Model module is missing expected callable: {name}",
+        )
+        attr = getattr(self.module, name)
+        self.assertTrue(callable(attr), msg=f"Expected model attribute {name} to be callable")
+        return attr
+
+    def _sentiment_fn(self, text):
+        return (-0.6 if ("non" in text or "refused" in text) else 0.2, 0.0)
+
+    def test_module_exports_expected_core_api(self):
+        expected = {
+            "EOLMistrustModel",
+            "build_mistrust_score_table",
+            "evaluate_downstream_predictions",
+            "run_full_eol_mistrust_modeling",
+            "run_race_gap_analysis",
+            "run_trust_based_treatment_analysis",
+        }
+        self.assertTrue(expected.issubset(set(self.module.__all__)))
+
+    def test_package_import_path_exposes_model_module_api(self):
+        imported = importlib.import_module("pyhealth.models.eol_mistrust")
+        self.assertTrue(hasattr(imported, "EOLMistrustModel"))
+        self.assertTrue(callable(getattr(imported, "build_mistrust_score_table")))
+
+    def test_get_downstream_task_map_returns_required_three_tasks(self):
+        task_map = self._get_callable("get_downstream_task_map")()
+        self.assertEqual(
+            list(task_map.keys()),
+            ["Left AMA", "Code Status", "In-hospital mortality"],
+        )
+        self.assertEqual(len(task_map), 3)
+
+    def test_get_downstream_feature_configurations_returns_required_widths_and_copy(self):
+        get_configs = self._get_callable("get_downstream_feature_configurations")
+        configs = get_configs()
+        self.assertEqual(
+            {name: len(columns) for name, columns in configs.items()},
+            {
+                "Baseline": 7,
+                "Baseline + Race": 13,
+                "Baseline + Noncompliant": 8,
+                "Baseline + Autopsy": 8,
+                "Baseline + Neg-Sentiment": 8,
+                "Baseline + ALL": 16,
+            },
+        )
+        configs["Baseline"].append("should_not_leak")
+        fresh = get_configs()
+        self.assertNotIn("should_not_leak", fresh["Baseline"])
+
+    def test_downstream_configuration_names_membership_and_constant_lists_match_requirements(self):
+        configs = self._get_callable("get_downstream_feature_configurations")()
+        self.assertEqual(
+            list(configs.keys()),
+            [
+                "Baseline",
+                "Baseline + Race",
+                "Baseline + Noncompliant",
+                "Baseline + Autopsy",
+                "Baseline + Neg-Sentiment",
+                "Baseline + ALL",
+            ],
+        )
+        self.assertEqual(
+            self.module.MISTRUST_SCORE_COLUMNS,
+            ["noncompliance_score_z", "autopsy_score_z", "negative_sentiment_score_z"],
+        )
+        self.assertEqual(
+            self.module.BASELINE_FEATURE_COLUMNS,
+            [
+                "age",
+                "los_days",
+                "gender_f",
+                "gender_m",
+                "insurance_private",
+                "insurance_public",
+                "insurance_self_pay",
+            ],
+        )
+        self.assertEqual(
+            configs["Baseline + ALL"],
+            self.module.BASELINE_FEATURE_COLUMNS
+            + self.module.RACE_FEATURE_COLUMNS
+            + self.module.MISTRUST_SCORE_COLUMNS,
+        )
+        self.assertEqual(len(self.module.RACE_FEATURE_COLUMNS), 6)
+
+    def test_fit_proxy_mistrust_model_uses_full_cohort_and_default_estimator_params(self):
+        fit_proxy_mistrust_model = self._get_callable("fit_proxy_mistrust_model")
+        created = []
+
+        class _RecordingLogisticRegression:
+            def __init__(self, *args, **kwargs):
+                del args
+                self.kwargs = kwargs
+                self.fit_X = None
+                self.fit_y = None
+                created.append(self)
+
+            def fit(self, X, y):
+                self.fit_X = X.copy()
+                self.fit_y = y.copy()
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.4, 0.6] for _ in range(len(X))]
+
+        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
+            estimator = fit_proxy_mistrust_model(
+                self.feature_matrix,
+                self.note_labels,
+                "noncompliance_label",
+            )
+
+        self.assertEqual(len(created), 1)
+        self.assertIs(estimator, created[0])
+        self.assertEqual(created[0].kwargs.get("penalty"), "l1")
+        self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
+        self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
+        self.assertEqual(len(created[0].fit_X), len(self.feature_matrix))
+        self.assertEqual(len(created[0].fit_y), len(self.note_labels))
+
+    def test_build_proxy_probability_scores_returns_positive_class_probabilities_sorted(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        feature_matrix = self.feature_matrix.iloc[[2, 0, 1]].copy()
+        note_labels = self.note_labels.iloc[[1, 2, 0]].copy()
+        estimator = _FakeProbEstimator([0.1, 0.7, 0.4])
+
+        scores = build_proxy_probability_scores(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=lambda: estimator,
+        )
+
+        self.assertEqual(scores["hadm_id"].tolist(), [101, 102, 103])
+        self.assertEqual(scores["noncompliance_score"].tolist(), [0.1, 0.7, 0.4])
+        self.assertTrue(estimator.was_fit)
+
+    def test_build_proxy_probability_scores_names_autopsy_output_column(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        scores = build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix.iloc[:2],
+            note_labels=self.note_labels.iloc[:2],
+            label_column="autopsy_label",
+            estimator_factory=lambda: _FakeProbEstimator([0.3, 0.6]),
+        )
+        self.assertEqual(scores.columns.tolist(), ["hadm_id", "autopsy_score"])
+
+    def test_build_proxy_probability_scores_missing_required_columns_raise_clear_errors(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        with self.assertRaisesRegex(ValueError, "noncompliance_label"):
+            build_proxy_probability_scores(
+                feature_matrix=self.feature_matrix,
+                note_labels=self.note_labels.drop(columns=["noncompliance_label"]),
+                label_column="noncompliance_label",
+                estimator_factory=lambda: _FakeProbEstimator([0.1] * len(self.feature_matrix)),
+            )
+        with self.assertRaisesRegex(ValueError, "hadm_id"):
+            build_proxy_probability_scores(
+                feature_matrix=self.feature_matrix.drop(columns=["hadm_id"]),
+                note_labels=self.note_labels,
+                label_column="noncompliance_label",
+                estimator_factory=lambda: _FakeProbEstimator([0.1] * len(self.note_labels)),
+            )
+
+    def test_build_proxy_probability_scores_preserves_feature_column_order_for_estimator_fit(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+
+        class _RecordingEstimator:
+            def __init__(self):
+                self.fit_columns = None
+                self.coef_ = None
+
+            def fit(self, X, y):
+                del y
+                self.fit_columns = list(X.columns)
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.4, 0.6] for _ in range(len(X))]
+
+        estimator = _RecordingEstimator()
+        build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=lambda: estimator,
+        )
+        self.assertEqual(
+            estimator.fit_columns,
+            ["Education Readiness: No", "Pain Level: 7-Mod to Severe"],
+        )
+
+    def test_build_proxy_probability_scores_keeps_only_inner_join_hadm_ids(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        feature_matrix = self.feature_matrix.iloc[:4].copy()
+        note_labels = self.note_labels.iloc[2:].copy()
+
+        scores = build_proxy_probability_scores(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=lambda: _FakeProbEstimator([0.2, 0.8]),
+        )
+
+        self.assertEqual(scores["hadm_id"].tolist(), [103, 104])
+
+    def test_build_proxy_probability_scores_raises_on_malformed_predict_proba_output(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        with self.assertRaises(IndexError):
+            build_proxy_probability_scores(
+                feature_matrix=self.feature_matrix.iloc[:2],
+                note_labels=self.note_labels.iloc[:2],
+                label_column="noncompliance_label",
+                estimator_factory=lambda: _MalformedProbEstimator(),
+            )
+
+    def test_build_negative_sentiment_mistrust_scores_uses_whitespace_cleanup_and_negates_polarity(self):
+        build_negative_sentiment_mistrust_scores = self._get_callable(
+            "build_negative_sentiment_mistrust_scores"
+        )
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 202, "note_text": "Date:[**5-1-18**]   calm   rapport"},
+                {"hadm_id": 201, "note_text": " patient   refused   medication "},
+                {"hadm_id": 203, "note_text": ""},
+            ]
+        )
+        seen = []
+
+        def _sentiment_fn(text):
+            seen.append(text)
+            if "refused medication" in text:
+                return (-0.5, 0.0)
+            return (0.25, 0.0)
+
+        scores = build_negative_sentiment_mistrust_scores(note_corpus, sentiment_fn=_sentiment_fn)
+
+        self.assertEqual(
+            seen,
+            ["Date:[**5-1-18**] calm rapport", "patient refused medication", ""],
+        )
+        self.assertEqual(scores["hadm_id"].tolist(), [201, 202, 203])
+        by_hadm = scores.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[201, "negative_sentiment_score"], 0.5)
+        self.assertEqual(by_hadm.loc[202, "negative_sentiment_score"], -0.25)
+        self.assertEqual(by_hadm.loc[203, "negative_sentiment_score"], -0.25)
+
+    def test_build_negative_sentiment_mistrust_scores_missing_note_text_raises_and_empty_schema_is_stable(self):
+        build_negative_sentiment_mistrust_scores = self._get_callable(
+            "build_negative_sentiment_mistrust_scores"
+        )
+        with self.assertRaisesRegex(ValueError, "note_text"):
+            build_negative_sentiment_mistrust_scores(
+                pd.DataFrame([{"hadm_id": 1, "text": "oops"}]),
+                sentiment_fn=self._sentiment_fn,
+            )
+
+        empty = build_negative_sentiment_mistrust_scores(
+            pd.DataFrame(columns=["hadm_id", "note_text"]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        self.assertEqual(empty.columns.tolist(), ["hadm_id", "negative_sentiment_score"])
+        self.assertTrue(empty.empty)
+
+    def test_z_normalize_scores_normalizes_independently_and_handles_constant_column(self):
+        z_normalize_scores = self._get_callable("z_normalize_scores")
+        score_table = pd.DataFrame(
+            [
+                {"hadm_id": 1, "a": 1.0, "b": 5.0, "keep": 10.0},
+                {"hadm_id": 2, "a": 2.0, "b": 5.0, "keep": 20.0},
+                {"hadm_id": 3, "a": 3.0, "b": 5.0, "keep": 30.0},
+            ]
+        )
+
+        normalized = z_normalize_scores(score_table, columns=["a", "b"])
+
+        self.assertAlmostEqual(float(normalized["a"].mean()), 0.0, places=7)
+        self.assertAlmostEqual(float(normalized["a"].std(ddof=0)), 1.0, places=7)
+        self.assertTrue((normalized["b"] == 0.0).all())
+        self.assertEqual(normalized["keep"].tolist(), [10.0, 20.0, 30.0])
+
+    def test_z_normalize_scores_leaves_hadm_id_untouched_and_raises_for_missing_column(self):
+        z_normalize_scores = self._get_callable("z_normalize_scores")
+        score_table = pd.DataFrame(
+            [
+                {"hadm_id": 10, "a": 1.0},
+                {"hadm_id": 20, "a": 2.0},
+            ]
+        )
+        normalized = z_normalize_scores(score_table, columns=["a"])
+        self.assertEqual(normalized["hadm_id"].tolist(), [10, 20])
+
+        with self.assertRaisesRegex(ValueError, "missing_col"):
+            z_normalize_scores(score_table, columns=["missing_col"])
+
+    def test_build_mistrust_score_table_outputs_required_columns_and_shared_hadm_ids(self):
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        feature_matrix = self.feature_matrix.iloc[:4].copy()
+        note_labels = self.note_labels.iloc[1:5].copy()
+        note_corpus = self.note_corpus.iloc[2:].copy()
+
+        scores = build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.2, 0.8, 0.4]),
+            sentiment_fn=self._sentiment_fn,
+        )
+
+        self.assertEqual(
+            scores.columns.tolist(),
+            [
+                "hadm_id",
+                "noncompliance_score_z",
+                "autopsy_score_z",
+                "negative_sentiment_score_z",
+            ],
+        )
+        self.assertEqual(scores["hadm_id"].tolist(), [103, 104])
+        self.assertTrue(pd.api.types.is_float_dtype(scores["noncompliance_score_z"]))
+        self.assertTrue(pd.api.types.is_float_dtype(scores["autopsy_score_z"]))
+        self.assertTrue(pd.api.types.is_float_dtype(scores["negative_sentiment_score_z"]))
+
+    def test_build_mistrust_score_table_missing_required_columns_raise_and_dependencies_are_called(self):
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        with self.assertRaisesRegex(ValueError, "noncompliance_label"):
+            build_mistrust_score_table(
+                self.feature_matrix,
+                self.note_labels.drop(columns=["noncompliance_label"]),
+                self.note_corpus,
+                estimator_factory=lambda: _FakeProbEstimator([0.1] * 6),
+                sentiment_fn=self._sentiment_fn,
+            )
+        with self.assertRaisesRegex(ValueError, "note_text"):
+            build_mistrust_score_table(
+                self.feature_matrix,
+                self.note_labels,
+                self.note_corpus.drop(columns=["note_text"]),
+                estimator_factory=lambda: _FakeProbEstimator([0.1] * 6),
+                sentiment_fn=self._sentiment_fn,
+            )
+
+        estimator_calls = []
+        sentiment_calls = []
+
+        def _factory():
+            estimator_calls.append("estimator")
+            return _FakeProbEstimator([0.1] * 6)
+
+        def _sentiment(text):
+            sentiment_calls.append(text)
+            return self._sentiment_fn(text)
+
+        build_mistrust_score_table(
+            self.feature_matrix,
+            self.note_labels,
+            self.note_corpus,
+            estimator_factory=_factory,
+            sentiment_fn=_sentiment,
+        )
+        self.assertEqual(len(estimator_calls), 2)
+        self.assertEqual(len(sentiment_calls), len(self.note_corpus))
+
+    def test_summarize_feature_weights_returns_positive_and_negative_rankings(self):
+        summarize_feature_weights = self._get_callable("summarize_feature_weights")
+
+        class _Estimator:
+            coef_ = [[0.7, -0.1, -2.0, 1.2]]
+
+        summary = summarize_feature_weights(
+            _Estimator(),
+            ["Riker-SAS Scale: Agitated", "Pain Present: No", "State: Alert", "Orientation: Oriented 3x"],
+            top_n=2,
+        )
+
+        self.assertEqual(set(summary.keys()), {"all", "positive", "negative"})
+        self.assertEqual(summary["positive"]["feature"].tolist(), ["Orientation: Oriented 3x", "Riker-SAS Scale: Agitated"])
+        self.assertEqual(summary["negative"]["feature"].tolist(), ["State: Alert", "Pain Present: No"])
+
+    def test_summarize_feature_weights_raises_for_missing_coef_or_misaligned_length(self):
+        summarize_feature_weights = self._get_callable("summarize_feature_weights")
+        with self.assertRaisesRegex(ValueError, "coef_"):
+            summarize_feature_weights(_NoCoefEstimator(), ["a", "b"])
+
+        class _WrongShapeEstimator:
+            coef_ = [0.1, 0.2]
+
+        with self.assertRaisesRegex(ValueError, "shape"):
+            summarize_feature_weights(_WrongShapeEstimator(), ["a", "b"])
+
+        class _BadEstimator:
+            coef_ = [[0.1]]
+
+        with self.assertRaisesRegex(ValueError, "align"):
+            summarize_feature_weights(_BadEstimator(), ["a", "b"])
+
+    def test_feature_weight_summary_wrappers_use_correct_labels(self):
+        with patch.object(self.module, "build_proxy_feature_weight_summary", return_value={"all": pd.DataFrame()}) as patched:
+            self.module.build_noncompliance_feature_weight_summary(self.feature_matrix, self.note_labels)
+            self.assertEqual(patched.call_args.kwargs["label_column"], "noncompliance_label")
+
+        with patch.object(self.module, "build_proxy_feature_weight_summary", return_value={"all": pd.DataFrame()}) as patched:
+            self.module.build_autopsy_feature_weight_summary(self.feature_matrix, self.note_labels)
+            self.assertEqual(patched.call_args.kwargs["label_column"], "autopsy_label")
+
+    def test_run_race_gap_analysis_filters_to_white_and_black_and_computes_direction(self):
+        run_race_gap_analysis = self._get_callable("run_race_gap_analysis")
+        mistrust_scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_score_z": 0.0, "autopsy_score_z": 0.1, "negative_sentiment_score_z": 0.2},
+                {"hadm_id": 102, "noncompliance_score_z": 2.0, "autopsy_score_z": 2.1, "negative_sentiment_score_z": 2.2},
+                {"hadm_id": 103, "noncompliance_score_z": 3.0, "autopsy_score_z": 3.1, "negative_sentiment_score_z": 3.2},
+                {"hadm_id": 104, "noncompliance_score_z": 1.0, "autopsy_score_z": 1.1, "negative_sentiment_score_z": 1.2},
+                {"hadm_id": 105, "noncompliance_score_z": 99.0, "autopsy_score_z": 99.1, "negative_sentiment_score_z": 99.2},
+            ]
+        )
+
+        results = run_race_gap_analysis(mistrust_scores, self.demographics, score_columns=["noncompliance_score_z"])
+
+        self.assertEqual(results.shape[0], 1)
+        row = results.iloc[0]
+        self.assertEqual(row["n_black"], 2)
+        self.assertEqual(row["n_white"], 2)
+        self.assertAlmostEqual(float(row["median_black"]), 2.5)
+        self.assertAlmostEqual(float(row["median_white"]), 0.5)
+        self.assertTrue(bool(row["black_median_higher"]))
+
+    def test_run_race_gap_analysis_returns_nan_when_one_group_is_missing(self):
+        run_race_gap_analysis = self._get_callable("run_race_gap_analysis")
+        demographics = pd.DataFrame([{"hadm_id": 1, "race": "WHITE"}, {"hadm_id": 2, "race": "WHITE"}])
+        scores = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.1},
+                {"hadm_id": 2, "noncompliance_score_z": 0.2},
+            ]
+        )
+
+        results = run_race_gap_analysis(scores, demographics, score_columns=["noncompliance_score_z"])
+        row = results.iloc[0]
+        self.assertEqual(row["n_black"], 0)
+        self.assertTrue(pd.isna(row["pvalue"]))
+        self.assertTrue(pd.isna(row["median_black"]))
+
+    def test_run_race_gap_analysis_output_contract_and_missing_columns_raise(self):
+        run_race_gap_analysis = self._get_callable("run_race_gap_analysis")
+        scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_score_z": 0.1},
+                {"hadm_id": 102, "noncompliance_score_z": 0.2},
+            ]
+        )
+        demographics = pd.DataFrame(
+            [
+                {"hadm_id": 101, "race": "WHITE"},
+                {"hadm_id": 102, "race": "BLACK"},
+            ]
+        )
+        results = run_race_gap_analysis(scores, demographics, score_columns=["noncompliance_score_z"])
+        self.assertTrue(
+            {
+                "metric",
+                "n_black",
+                "n_white",
+                "median_black",
+                "median_white",
+                "median_gap_black_minus_white",
+                "statistic",
+                "pvalue",
+                "black_median_higher",
+            }.issubset(results.columns)
+        )
+
+        with self.assertRaisesRegex(ValueError, "race"):
+            run_race_gap_analysis(scores, demographics.drop(columns=["race"]), score_columns=["noncompliance_score_z"])
+        with self.assertRaisesRegex(ValueError, "hadm_id"):
+            run_race_gap_analysis(scores.drop(columns=["hadm_id"]), demographics, score_columns=["noncompliance_score_z"])
+
+    def test_run_race_based_treatment_analysis_uses_non_null_rows_and_black_minus_white_gap(self):
+        run_race_based_treatment_analysis = self._get_callable("run_race_based_treatment_analysis")
+        results = run_race_based_treatment_analysis(self.eol_cohort, self.treatment_totals).set_index("treatment")
+
+        vent = results.loc["total_vent_min"]
+        vaso = results.loc["total_vaso_min"]
+        self.assertEqual(vent["n_black"], 2)
+        self.assertEqual(vent["n_white"], 2)
+        self.assertAlmostEqual(float(vent["median_gap_black_minus_white"]), 52.5)
+        self.assertEqual(vaso["n_black"], 1)
+        self.assertEqual(vaso["n_white"], 2)
+
+    def test_run_race_based_treatment_analysis_missing_columns_raise(self):
+        run_race_based_treatment_analysis = self._get_callable("run_race_based_treatment_analysis")
+        with self.assertRaisesRegex(ValueError, "total_vaso_min"):
+            run_race_based_treatment_analysis(
+                self.eol_cohort,
+                self.treatment_totals.drop(columns=["total_vaso_min"]),
+            )
+        with self.assertRaisesRegex(ValueError, "race"):
+            run_race_based_treatment_analysis(
+                self.eol_cohort.drop(columns=["race"]),
+                self.treatment_totals,
+            )
+
+    def test_run_trust_based_treatment_analysis_uses_explicit_group_size_and_tie_breaks_by_hadm_id(self):
+        run_trust_based_treatment_analysis = self._get_callable("run_trust_based_treatment_analysis")
+        eol = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "WHITE"},
+                {"hadm_id": 2, "race": "BLACK"},
+                {"hadm_id": 3, "race": "WHITE"},
+            ]
+        )
+        scores = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score_z": 0.9},
+                {"hadm_id": 2, "noncompliance_score_z": 0.9},
+                {"hadm_id": 3, "noncompliance_score_z": 0.1},
+            ]
+        )
+        treatments = pd.DataFrame(
+            [
+                {"hadm_id": 1, "total_vent_min": 10.0, "total_vaso_min": 1.0},
+                {"hadm_id": 2, "total_vent_min": 100.0, "total_vaso_min": 2.0},
+                {"hadm_id": 3, "total_vent_min": 1.0, "total_vaso_min": 3.0},
+            ]
+        )
+
+        results = run_trust_based_treatment_analysis(
+            eol,
+            scores,
+            treatments,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=["total_vent_min"],
+            group_sizes={"total_vent_min": 1},
+        )
+        row = results.iloc[0]
+        self.assertEqual(row["stratification_n"], 1)
+        self.assertEqual(row["n_high"], 1)
+        self.assertEqual(row["n_low"], 2)
+        self.assertAlmostEqual(float(row["median_high"]), 10.0)
+
+    def test_run_trust_based_treatment_analysis_derives_group_size_from_race_based_counts(self):
+        run_trust_based_treatment_analysis = self._get_callable("run_trust_based_treatment_analysis")
+        scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_score_z": 0.9},
+                {"hadm_id": 102, "noncompliance_score_z": 0.8},
+                {"hadm_id": 103, "noncompliance_score_z": 0.7},
+                {"hadm_id": 104, "noncompliance_score_z": 0.1},
+            ]
+        )
+
+        results = run_trust_based_treatment_analysis(
+            self.eol_cohort,
+            scores,
+            self.treatment_totals,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=["total_vent_min"],
+        )
+
+        self.assertEqual(int(results.iloc[0]["stratification_n"]), 2)
+
+    def test_run_trust_based_treatment_analysis_handles_invalid_group_sizes_and_full_cartesian_output(self):
+        run_trust_based_treatment_analysis = self._get_callable("run_trust_based_treatment_analysis")
+        scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_score_z": 0.9, "autopsy_score_z": 0.1},
+                {"hadm_id": 102, "noncompliance_score_z": 0.8, "autopsy_score_z": 0.2},
+                {"hadm_id": 103, "noncompliance_score_z": 0.7, "autopsy_score_z": 0.3},
+                {"hadm_id": 104, "noncompliance_score_z": 0.6, "autopsy_score_z": 0.4},
+            ]
+        )
+
+        results = run_trust_based_treatment_analysis(
+            self.eol_cohort,
+            scores,
+            self.treatment_totals,
+            score_columns=["noncompliance_score_z", "autopsy_score_z"],
+            treatment_columns=["total_vent_min", "total_vaso_min"],
+            group_sizes={"total_vent_min": 0, "total_vaso_min": 10},
+        )
+        self.assertEqual(results.shape[0], 4)
+        self.assertTrue(results["median_gap"].isna().all())
+
+        valid = run_trust_based_treatment_analysis(
+            self.eol_cohort,
+            scores,
+            self.treatment_totals,
+            score_columns=["noncompliance_score_z"],
+            treatment_columns=["total_vent_min"],
+            group_sizes={"total_vent_min": 2},
+        )
+        row = valid.iloc[0]
+        self.assertAlmostEqual(float(row["median_gap"]), float(row["median_high"]) - float(row["median_low"]))
+
+    def test_run_acuity_control_analysis_returns_pairwise_correlations(self):
+        run_acuity_control_analysis = self._get_callable("run_acuity_control_analysis")
+        mistrust_scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_score_z": 0.1, "autopsy_score_z": 0.2, "negative_sentiment_score_z": 0.3},
+                {"hadm_id": 102, "noncompliance_score_z": 0.2, "autopsy_score_z": 0.3, "negative_sentiment_score_z": 0.4},
+                {"hadm_id": 103, "noncompliance_score_z": 0.3, "autopsy_score_z": 0.4, "negative_sentiment_score_z": 0.5},
+                {"hadm_id": 104, "noncompliance_score_z": 0.4, "autopsy_score_z": 0.5, "negative_sentiment_score_z": 0.6},
+            ]
+        )
+        acuity = self.acuity_scores.iloc[:4].copy()
+
+        results = run_acuity_control_analysis(mistrust_scores, acuity)
+
+        self.assertEqual(results.shape[0], 10)
+        pairs = set(zip(results["feature_a"], results["feature_b"]))
+        self.assertIn(("noncompliance_score_z", "autopsy_score_z"), pairs)
+        self.assertIn(("oasis", "sapsii"), pairs)
+
+    def test_run_acuity_control_analysis_output_contract_low_sample_and_missing_columns(self):
+        run_acuity_control_analysis = self._get_callable("run_acuity_control_analysis")
+        low_sample_scores = pd.DataFrame(
+            [{"hadm_id": 1, "noncompliance_score_z": 0.1, "autopsy_score_z": 0.2, "negative_sentiment_score_z": 0.3}]
+        )
+        low_sample_acuity = pd.DataFrame([{"hadm_id": 1, "oasis": 10.0, "sapsii": 20.0}])
+        results = run_acuity_control_analysis(low_sample_scores, low_sample_acuity)
+        self.assertTrue({"feature_a", "feature_b", "correlation", "pvalue", "n"}.issubset(results.columns))
+        self.assertTrue(results["correlation"].isna().all())
+
+        with self.assertRaisesRegex(ValueError, "oasis"):
+            run_acuity_control_analysis(
+                self.final_model_table[["hadm_id", "noncompliance_score_z", "autopsy_score_z", "negative_sentiment_score_z"]],
+                self.acuity_scores.drop(columns=["oasis"]),
+            )
+
+    def test_evaluate_downstream_predictions_returns_all_task_configuration_rows(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        split_recorder = _SplitRecorder()
+        auc_recorder = _AUCRecorder(0.8)
+
+        results = evaluate_downstream_predictions(
+            self.final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=split_recorder,
+            auc_fn=auc_recorder,
+            repetitions=1,
+        )
+
+        self.assertEqual(results.shape[0], 18)
+        self.assertEqual(set(results["task"]), {"Left AMA", "Code Status", "In-hospital mortality"})
+        self.assertEqual(set(results["configuration"]), set(self.module.DOWNSTREAM_FEATURE_CONFIGS.keys()))
+
+    def test_evaluate_downstream_predictions_uses_random_states_zero_through_ninety_nine_and_test_size_point_four(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        split_recorder = _SplitRecorder()
+
+        results = evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=split_recorder,
+            auc_fn=_AUCRecorder(0.6),
+            repetitions=100,
+        )
+
+        self.assertEqual(results.shape[0], 1)
+        self.assertEqual([call["random_state"] for call in split_recorder.calls], list(range(100)))
+        self.assertTrue(all(call["test_size"] == 0.4 for call in split_recorder.calls))
+
+    def test_evaluate_downstream_predictions_uses_default_estimator_metric_and_dropna(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        table = self.final_model_table.copy()
+        table.loc[0, "age"] = None
+        table.loc[1, "left_ama"] = None
+
+        created = []
+        split_calls = []
+        auc_calls = []
+
+        class _RecordingLogisticRegression:
+            def __init__(self, *args, **kwargs):
+                del args
+                self.kwargs = kwargs
+                created.append(self)
+
+            def fit(self, X, y):
+                self.fit_X = X.copy()
+                self.fit_y = y.copy()
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.9, 0.1], [0.1, 0.9]]
+
+        def _split_fn(X, y, test_size, random_state):
+            split_calls.append({"n_rows": len(X), "test_size": test_size, "random_state": random_state})
+            frame = X.reset_index(drop=True)
+            labels = pd.Series(y).reset_index(drop=True)
+            return frame.iloc[:2].copy(), frame.iloc[2:4].copy(), labels.iloc[:2].copy(), labels.iloc[2:4].copy()
+
+        def _auc_fn(y_true, y_prob):
+            auc_calls.append({"y_true": list(pd.Series(y_true)), "y_prob": list(pd.Series(y_prob))})
+            return 0.77
+
+        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression), \
+             patch.object(self.module, "train_test_split", _split_fn), \
+             patch.object(self.module, "roc_auc_score", _auc_fn):
+            results = evaluate_downstream_predictions(
+                table,
+                feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+                task_map={"Left AMA": "left_ama"},
+                repetitions=1,
+            )
+
+        self.assertEqual(split_calls[0]["n_rows"], 4)
+        self.assertEqual(created[0].kwargs.get("penalty"), "l1")
+        self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
+        self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
+        self.assertEqual(auc_calls[0]["y_prob"], [0.1, 0.9])
+        self.assertEqual(int(results.iloc[0]["n_valid_auc"]), 1)
+
+    def test_evaluate_downstream_predictions_returns_nan_for_single_class_target(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        table = self.final_model_table.copy()
+        table["left_ama"] = 0
+
+        results = evaluate_downstream_predictions(
+            table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.5),
+            repetitions=3,
+        )
+
+        row = results.iloc[0]
+        self.assertEqual(int(row["n_valid_auc"]), 0)
+        self.assertTrue(pd.isna(row["auc_mean"]))
+        self.assertTrue(pd.isna(row["auc_std"]))
+
+    def test_evaluate_downstream_predictions_uses_exact_required_feature_sets(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        seen_columns = []
+
+        class _RecordingEstimator:
+            def __init__(self):
+                self.coef_ = None
+
+            def fit(self, X, y):
+                del y
+                seen_columns.append(list(X.columns))
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.9, 0.1], [0.1, 0.9]]
+
+        evaluate_downstream_predictions(
+            self.final_model_table,
+            estimator_factory=lambda: _RecordingEstimator(),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.8),
+            repetitions=1,
+        )
+        expected = []
+        for _task in self.module.DOWNSTREAM_TASK_MAP:
+            for _config, columns in self.module.DOWNSTREAM_FEATURE_CONFIGS.items():
+                expected.append(list(columns))
+        self.assertEqual(seen_columns, expected)
+
+    def test_evaluate_downstream_predictions_computes_auc_mean_and_std_correctly(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        values = [0.2, 0.6]
+
+        def _auc_fn(y_true, y_prob):
+            del y_true, y_prob
+            return values.pop(0)
+
+        results = evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={"Left AMA": "left_ama"},
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_auc_fn,
+            repetitions=2,
+        )
+        row = results.iloc[0]
+        self.assertAlmostEqual(float(row["auc_mean"]), 0.4, places=7)
+        self.assertAlmostEqual(float(row["auc_std"]), 0.2, places=7)
+
+    def test_duplicate_hadm_ids_raise_in_proxy_and_race_gap_merges(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        run_race_gap_analysis = self._get_callable("run_race_gap_analysis")
+
+        duplicate_features = pd.concat([self.feature_matrix, self.feature_matrix.iloc[[0]]], ignore_index=True)
+        with self.assertRaises(Exception):
+            build_proxy_probability_scores(
+                duplicate_features,
+                self.note_labels,
+                "noncompliance_label",
+                estimator_factory=lambda: _FakeProbEstimator([0.1] * 7),
+            )
+
+        duplicate_scores = pd.concat(
+            [
+                self.final_model_table[["hadm_id", "noncompliance_score_z"]],
+                self.final_model_table[["hadm_id", "noncompliance_score_z"]].iloc[[0]],
+            ],
+            ignore_index=True,
+        )
+        with self.assertRaises(Exception):
+            run_race_gap_analysis(
+                duplicate_scores,
+                self.demographics,
+                score_columns=["noncompliance_score_z"],
+            )
+
+    def test_run_full_eol_mistrust_modeling_returns_expected_sections(self):
+        run_full_eol_mistrust_modeling = self._get_callable("run_full_eol_mistrust_modeling")
+
+        outputs = run_full_eol_mistrust_modeling(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            note_corpus=self.note_corpus,
+            demographics=self.demographics,
+            eol_cohort=self.eol_cohort,
+            treatment_totals=self.treatment_totals,
+            acuity_scores=self.acuity_scores,
+            final_model_table=self.final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.8),
+            repetitions=1,
+        )
+
+        self.assertEqual(
+            set(outputs.keys()),
+            {
+                "mistrust_scores",
+                "feature_weight_summaries",
+                "race_gap_results",
+                "race_treatment_results",
+                "trust_treatment_results",
+                "acuity_correlations",
+                "downstream_auc_results",
+            },
+        )
+        self.assertIn("noncompliance", outputs["feature_weight_summaries"])
+        self.assertIn("autopsy", outputs["feature_weight_summaries"])
+
+    def test_run_full_eol_mistrust_modeling_merges_missing_mistrust_columns_into_final_table(self):
+        run_full_eol_mistrust_modeling = self._get_callable("run_full_eol_mistrust_modeling")
+        final_without_scores = self.final_model_table.drop(columns=self.module.MISTRUST_SCORE_COLUMNS)
+
+        captured = {}
+
+        def _fake_downstream(final_model_table, **kwargs):
+            del kwargs
+            captured["columns"] = final_model_table.columns.tolist()
+            return pd.DataFrame([{"task": "Left AMA"}])
+
+        with patch.object(self.module, "evaluate_downstream_predictions", side_effect=_fake_downstream):
+            outputs = run_full_eol_mistrust_modeling(
+                feature_matrix=self.feature_matrix,
+                note_labels=self.note_labels,
+                note_corpus=self.note_corpus,
+                final_model_table=final_without_scores,
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+                sentiment_fn=self._sentiment_fn,
+                repetitions=1,
+            )
+
+        self.assertTrue(set(self.module.MISTRUST_SCORE_COLUMNS).issubset(set(captured["columns"])))
+        self.assertIn("downstream_auc_results", outputs)
+
+    def test_run_full_eol_mistrust_modeling_does_not_overwrite_existing_mistrust_columns(self):
+        run_full_eol_mistrust_modeling = self._get_callable("run_full_eol_mistrust_modeling")
+        final_with_scores = self.final_model_table.copy()
+        final_with_scores["noncompliance_score_z"] = 999.0
+
+        captured = {}
+
+        def _fake_downstream(final_model_table, **kwargs):
+            del kwargs
+            captured["scores"] = final_model_table["noncompliance_score_z"].tolist()
+            return pd.DataFrame([{"task": "Left AMA"}])
+
+        with patch.object(self.module, "evaluate_downstream_predictions", side_effect=_fake_downstream):
+            run_full_eol_mistrust_modeling(
+                feature_matrix=self.feature_matrix,
+                note_labels=self.note_labels,
+                note_corpus=self.note_corpus,
+                final_model_table=final_with_scores,
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+                sentiment_fn=self._sentiment_fn,
+                repetitions=1,
+            )
+
+        self.assertEqual(captured["scores"], [999.0] * len(final_with_scores))
+
+    def test_baseline_feature_columns_align_with_real_dataset_baseline_only_output(self):
+        admissions = pd.DataFrame(
+            [
+                {"hadm_id": 11, "subject_id": 21, "admittime": "2100-01-01 00:00:00", "dischtime": "2100-01-03 00:00:00", "ethnicity": "WHITE", "insurance": "Medicare", "discharge_location": "HOME", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+                {"hadm_id": 12, "subject_id": 22, "admittime": "2100-01-02 00:00:00", "dischtime": "2100-01-04 00:00:00", "ethnicity": "BLACK/AFRICAN AMERICAN", "insurance": "Private", "discharge_location": "LEFT AGAINST MEDICAL ADVICE", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 21, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 22, "gender": "F", "dob": "2070-01-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {"hadm_id": 11, "icustay_id": 1101, "intime": "2100-01-01 00:00:00", "outtime": "2100-01-01 13:00:00"},
+                {"hadm_id": 12, "icustay_id": 1201, "intime": "2100-01-02 00:00:00", "outtime": "2100-01-02 13:00:00"},
+            ]
+        )
+        d_items = pd.DataFrame(
+            [{"itemid": 128, "label": "Code Status", "dbsource": "carevue"}]
+        )
+        chartevents = pd.DataFrame(
+            [{"hadm_id": 12, "itemid": 128, "value": "DNR/DNI", "icustay_id": 1201}]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {"hadm_id": 11, "noncompliance_score_z": 0.0, "autopsy_score_z": 0.0, "negative_sentiment_score_z": 0.0},
+                {"hadm_id": 12, "noncompliance_score_z": 0.0, "autopsy_score_z": 0.0, "negative_sentiment_score_z": 0.0},
+            ]
+        )
+
+        base = self.dataset_module.build_base_admissions(admissions, patients)
+        demographics = self.dataset_module.build_demographics_table(base)
+        all_cohort = self.dataset_module.build_all_cohort(base, icustays)
+        baseline_only = self.dataset_module.build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=chartevents,
+            d_items=d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=False,
+            include_mistrust=False,
+        )
+
+        self.assertEqual(
+            [column for column in baseline_only.columns if column in self.module.BASELINE_FEATURE_COLUMNS],
+            self.module.BASELINE_FEATURE_COLUMNS,
+        )
+        self.assertFalse(any(column in baseline_only.columns for column in self.module.RACE_FEATURE_COLUMNS))
+        self.assertFalse(any(column in baseline_only.columns for column in self.module.MISTRUST_SCORE_COLUMNS))
+        self.assertEqual(
+            set(baseline_only.columns),
+            {
+                "hadm_id",
+                *self.module.BASELINE_FEATURE_COLUMNS,
+                "left_ama",
+                "code_status_dnr_dni_cmo",
+                "in_hospital_mortality",
+            },
+        )
+
+    def test_dataset_model_integration_smoke_flow_runs_without_column_renaming(self):
+        admissions = pd.DataFrame(
+            [
+                {"hadm_id": 1, "subject_id": 11, "admittime": "2100-01-01 00:00:00", "dischtime": "2100-01-03 00:00:00", "ethnicity": "WHITE", "insurance": "Medicare", "discharge_location": "HOME", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+                {"hadm_id": 2, "subject_id": 12, "admittime": "2100-01-02 00:00:00", "dischtime": "2100-01-04 00:00:00", "ethnicity": "BLACK/AFRICAN AMERICAN", "insurance": "Private", "discharge_location": "LEFT AGAINST MEDICAL ADVICE", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+                {"hadm_id": 3, "subject_id": 13, "admittime": "2100-01-03 00:00:00", "dischtime": "2100-01-05 00:00:00", "ethnicity": "ASIAN", "insurance": "Medicare", "discharge_location": "SNF", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+                {"hadm_id": 4, "subject_id": 14, "admittime": "2100-01-04 00:00:00", "dischtime": "2100-01-06 00:00:00", "ethnicity": "HISPANIC OR LATINO", "insurance": "Private", "discharge_location": "HOME", "hospital_expire_flag": 1, "has_chartevents_data": 1},
+                {"hadm_id": 5, "subject_id": 15, "admittime": "2100-01-05 00:00:00", "dischtime": "2100-01-07 00:00:00", "ethnicity": "AMERICAN INDIAN/ALASKA NATIVE", "insurance": "Self Pay", "discharge_location": "HOME", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+                {"hadm_id": 6, "subject_id": 16, "admittime": "2100-01-06 00:00:00", "dischtime": "2100-01-08 00:00:00", "ethnicity": "OTHER", "insurance": "Medicare", "discharge_location": "HOME", "hospital_expire_flag": 0, "has_chartevents_data": 1},
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 11, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 12, "gender": "F", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 13, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 14, "gender": "F", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 15, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 16, "gender": "F", "dob": "2070-01-01 00:00:00"},
+            ]
+        )
+        icustays = pd.DataFrame(
+            [
+                {"hadm_id": 1, "icustay_id": 101, "intime": "2100-01-01 00:00:00", "outtime": "2100-01-01 13:00:00"},
+                {"hadm_id": 2, "icustay_id": 102, "intime": "2100-01-02 00:00:00", "outtime": "2100-01-02 13:00:00"},
+                {"hadm_id": 3, "icustay_id": 103, "intime": "2100-01-03 00:00:00", "outtime": "2100-01-03 13:00:00"},
+                {"hadm_id": 4, "icustay_id": 104, "intime": "2100-01-04 00:00:00", "outtime": "2100-01-04 13:00:00"},
+                {"hadm_id": 5, "icustay_id": 105, "intime": "2100-01-05 00:00:00", "outtime": "2100-01-05 13:00:00"},
+                {"hadm_id": 6, "icustay_id": 106, "intime": "2100-01-06 00:00:00", "outtime": "2100-01-06 13:00:00"},
+            ]
+        )
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Pain Level", "dbsource": "carevue"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+            ]
+        )
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 1, "itemid": 1, "value": "No", "icustay_id": 101},
+                {"hadm_id": 1, "itemid": 2, "value": "7-Mod to Severe", "icustay_id": 101},
+                {"hadm_id": 1, "itemid": 128, "value": "Full Code", "icustay_id": 101},
+                {"hadm_id": 2, "itemid": 1, "value": "Yes", "icustay_id": 102},
+                {"hadm_id": 2, "itemid": 128, "value": "DNR/DNI", "icustay_id": 102},
+                {"hadm_id": 3, "itemid": 1, "value": "No", "icustay_id": 103},
+                {"hadm_id": 3, "itemid": 2, "value": "None", "icustay_id": 103},
+                {"hadm_id": 4, "itemid": 2, "value": "7-Mod to Severe", "icustay_id": 104},
+                {"hadm_id": 5, "itemid": 1, "value": "Yes", "icustay_id": 105},
+                {"hadm_id": 6, "itemid": 2, "value": "None", "icustay_id": 106},
+            ]
+        )
+        noteevents = pd.DataFrame(
+            [
+                {"hadm_id": 1, "category": "Nursing", "text": "Patient is noncompliant and angry.", "iserror": None},
+                {"hadm_id": 2, "category": "Nursing", "text": "Patient is calm and cooperative.", "iserror": None},
+                {"hadm_id": 3, "category": "Nursing", "text": "Autopsy discussed with family.", "iserror": None},
+                {"hadm_id": 4, "category": "Nursing", "text": "Patient refused medication repeatedly.", "iserror": None},
+                {"hadm_id": 5, "category": "Nursing", "text": "Date:[**5-1-18**] good rapport.", "iserror": None},
+                {"hadm_id": 6, "category": "Nursing", "text": "non-adher to follow up plan.", "iserror": None},
+            ]
+        )
+        ventdurations = pd.DataFrame(
+            [
+                {"icustay_id": 103, "ventnum": 1, "starttime": "2100-01-03 00:00:00", "endtime": "2100-01-03 01:00:00", "duration_hours": 1.0},
+                {"icustay_id": 104, "ventnum": 1, "starttime": "2100-01-04 00:00:00", "endtime": "2100-01-04 02:00:00", "duration_hours": 2.0},
+            ]
+        )
+        vasopressordurations = pd.DataFrame(
+            [
+                {"icustay_id": 103, "vasonum": 1, "starttime": "2100-01-03 03:00:00", "endtime": "2100-01-03 04:00:00", "duration_hours": 1.0},
+                {"icustay_id": 104, "vasonum": 1, "starttime": "2100-01-04 05:00:00", "endtime": "2100-01-04 07:00:00", "duration_hours": 2.0},
+            ]
+        )
+        oasis = pd.DataFrame(
+            [
+                {"hadm_id": 1, "icustay_id": 101, "oasis": 10},
+                {"hadm_id": 2, "icustay_id": 102, "oasis": 12},
+                {"hadm_id": 3, "icustay_id": 103, "oasis": 20},
+                {"hadm_id": 4, "icustay_id": 104, "oasis": 25},
+                {"hadm_id": 5, "icustay_id": 105, "oasis": 8},
+                {"hadm_id": 6, "icustay_id": 106, "oasis": 9},
+            ]
+        )
+        sapsii = pd.DataFrame(
+            [
+                {"hadm_id": 1, "icustay_id": 101, "sapsii": 30},
+                {"hadm_id": 2, "icustay_id": 102, "sapsii": 35},
+                {"hadm_id": 3, "icustay_id": 103, "sapsii": 50},
+                {"hadm_id": 4, "icustay_id": 104, "sapsii": 55},
+                {"hadm_id": 5, "icustay_id": 105, "sapsii": 20},
+                {"hadm_id": 6, "icustay_id": 106, "sapsii": 22},
+            ]
+        )
+
+        base = self.dataset_module.build_base_admissions(admissions, patients)
+        demographics = self.dataset_module.build_demographics_table(base)
+        all_cohort = self.dataset_module.build_all_cohort(base, icustays)
+        eol_cohort = self.dataset_module.build_eol_cohort(base, demographics)
+        feature_matrix = self.dataset_module.build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            allowed_labels={"Education Readiness", "Pain Level"},
+            all_hadm_ids=all_cohort["hadm_id"].tolist(),
+        )
+        note_labels = self.dataset_module.build_note_labels(
+            noteevents,
+            all_hadm_ids=all_cohort["hadm_id"].tolist(),
+        )
+        note_corpus = self.dataset_module.build_note_corpus(
+            noteevents,
+            all_hadm_ids=all_cohort["hadm_id"].tolist(),
+        )
+        treatment_totals = self.dataset_module.build_treatment_totals(
+            icustays,
+            ventdurations,
+            vasopressordurations,
+        )
+        acuity_scores = self.dataset_module.build_acuity_scores(oasis, sapsii)
+
+        scores = self.module.build_mistrust_score_table(
+            feature_matrix,
+            note_labels,
+            note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        final_model_table = self.dataset_module.build_final_model_table(
+            demographics,
+            all_cohort,
+            base,
+            chartevents,
+            d_items,
+            scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+        outputs = self.module.run_full_eol_mistrust_modeling(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            demographics=demographics,
+            eol_cohort=eol_cohort,
+            treatment_totals=treatment_totals,
+            acuity_scores=acuity_scores,
+            final_model_table=final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.7),
+            repetitions=1,
+        )
+
+        self.assertEqual(scores.shape[1], 4)
+        self.assertEqual(final_model_table.shape[1], 20)
+        self.assertEqual(outputs["downstream_auc_results"].shape[0], 18)
+        self.assertEqual(scores["hadm_id"].tolist(), final_model_table["hadm_id"].tolist())
+
+    def test_dataset_and_model_proxy_probability_scores_match_exactly(self):
+        model_scores = self.module.build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+        )
+        dataset_scores = self.dataset_module.build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+        )
+        pd.testing.assert_frame_equal(model_scores, dataset_scores)
+
+    def test_dataset_and_model_negative_sentiment_scores_match_exactly_for_non_empty_notes(self):
+        note_corpus = self.note_corpus.iloc[:5].copy()
+        model_scores = self.module.build_negative_sentiment_mistrust_scores(
+            note_corpus=note_corpus,
+            sentiment_fn=self._sentiment_fn,
+        )
+        dataset_scores = self.dataset_module.build_negative_sentiment_scores(
+            note_corpus=note_corpus,
+            sentiment_fn=self._sentiment_fn,
+        )
+        pd.testing.assert_frame_equal(model_scores, dataset_scores)
+
+    def test_dataset_and_model_z_normalize_scores_match_exactly(self):
+        score_table = pd.DataFrame(
+            [
+                {"hadm_id": 1, "score_a": 1.0, "score_b": 5.0},
+                {"hadm_id": 2, "score_a": 2.0, "score_b": 5.0},
+                {"hadm_id": 3, "score_a": 3.0, "score_b": 5.0},
+            ]
+        )
+        model_scores = self.module.z_normalize_scores(score_table, columns=["score_a", "score_b"])
+        dataset_scores = self.dataset_module.z_normalize_scores(
+            score_table,
+            columns=["score_a", "score_b"],
+        )
+        pd.testing.assert_frame_equal(model_scores, dataset_scores)
+
+    def test_dataset_and_model_mistrust_score_tables_match_on_shared_inputs(self):
+        model_scores = self.module.build_mistrust_score_table(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            note_corpus=self.note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        dataset_scores = self.dataset_module.build_mistrust_score_table(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            note_corpus=self.note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+        )
+        pd.testing.assert_frame_equal(model_scores, dataset_scores)
+
+    def test_run_race_gap_analysis_calls_mannwhitneyu_with_filtered_vectors_per_metric(self):
+        run_race_gap_analysis = self._get_callable("run_race_gap_analysis")
+        mistrust_scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_score_z": 0.1, "autopsy_score_z": 0.5},
+                {"hadm_id": 102, "noncompliance_score_z": 0.2, "autopsy_score_z": 0.6},
+                {"hadm_id": 103, "noncompliance_score_z": 0.3, "autopsy_score_z": 0.7},
+                {"hadm_id": 104, "noncompliance_score_z": 0.4, "autopsy_score_z": 0.8},
+            ]
+        )
+        demographics = pd.DataFrame(
+            [
+                {"hadm_id": 101, "race": "WHITE"},
+                {"hadm_id": 102, "race": "BLACK"},
+                {"hadm_id": 103, "race": "OTHER"},
+                {"hadm_id": 104, "race": "BLACK"},
+            ]
+        )
+        calls = []
+
+        class _Result:
+            statistic = 7.0
+            pvalue = 0.04
+
+        def _fake_mwu(left, right, alternative):
+            calls.append(
+                {
+                    "left": list(pd.Series(left, dtype=float)),
+                    "right": list(pd.Series(right, dtype=float)),
+                    "alternative": alternative,
+                }
+            )
+            return _Result()
+
+        with patch.object(self.module, "mannwhitneyu", side_effect=_fake_mwu):
+            results = run_race_gap_analysis(
+                mistrust_scores=mistrust_scores,
+                demographics=demographics,
+                score_columns=["noncompliance_score_z", "autopsy_score_z"],
+            )
+
+        self.assertEqual(len(calls), 2)
+        self.assertEqual(
+            calls,
+            [
+                {
+                    "left": [0.2, 0.4],
+                    "right": [0.1],
+                    "alternative": "two-sided",
+                },
+                {
+                    "left": [0.6, 0.8],
+                    "right": [0.5],
+                    "alternative": "two-sided",
+                },
+            ],
+        )
+        self.assertEqual(results["statistic"].tolist(), [7.0, 7.0])
+        self.assertEqual(results["pvalue"].tolist(), [0.04, 0.04])
+
+    def test_run_acuity_control_analysis_calls_pearsonr_for_each_pair_with_pairwise_filtered_vectors(self):
+        run_acuity_control_analysis = self._get_callable("run_acuity_control_analysis")
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": 0.1,
+                    "autopsy_score_z": 0.4,
+                    "negative_sentiment_score_z": 0.7,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 0.2,
+                    "autopsy_score_z": float("nan"),
+                    "negative_sentiment_score_z": 0.8,
+                },
+                {
+                    "hadm_id": 103,
+                    "noncompliance_score_z": 0.3,
+                    "autopsy_score_z": 0.6,
+                    "negative_sentiment_score_z": 0.9,
+                },
+            ]
+        )
+        acuity_scores = pd.DataFrame(
+            [
+                {"hadm_id": 101, "oasis": 10.0, "sapsii": 20.0},
+                {"hadm_id": 102, "oasis": 11.0, "sapsii": 21.0},
+                {"hadm_id": 103, "oasis": 12.0, "sapsii": 22.0},
+            ]
+        )
+        calls = []
+
+        def _fake_pearson(left, right):
+            calls.append(
+                {
+                    "left": list(pd.Series(left, dtype=float)),
+                    "right": list(pd.Series(right, dtype=float)),
+                }
+            )
+            return (0.25, 0.5)
+
+        with patch.object(self.module, "pearsonr", side_effect=_fake_pearson):
+            results = run_acuity_control_analysis(mistrust_scores, acuity_scores)
+
+        self.assertEqual(len(calls), 10)
+        self.assertEqual(
+            calls[0],
+            {
+                "left": [0.1, 0.3],
+                "right": [0.4, 0.6],
+            },
+        )
+        self.assertEqual(
+            calls[-1],
+            {
+                "left": [10.0, 11.0, 12.0],
+                "right": [20.0, 21.0, 22.0],
+            },
+        )
+        self.assertEqual(len(results), 10)
+        self.assertTrue((results["correlation"] == 0.25).all())
+        self.assertTrue((results["pvalue"] == 0.5).all())
+
+    def test_analysis_outputs_use_stable_column_order_contracts(self):
+        race_gap = self.module.run_race_gap_analysis(
+            self.final_model_table[["hadm_id", *self.module.MISTRUST_SCORE_COLUMNS]],
+            self.demographics,
+        )
+        self.assertEqual(
+            race_gap.columns.tolist(),
+            [
+                "metric",
+                "n_black",
+                "n_white",
+                "median_black",
+                "median_white",
+                "median_gap_black_minus_white",
+                "statistic",
+                "pvalue",
+                "black_median_higher",
+            ],
+        )
+
+        race_treatment = self.module.run_race_based_treatment_analysis(
+            self.eol_cohort,
+            self.treatment_totals,
+        )
+        self.assertEqual(
+            race_treatment.columns.tolist(),
+            [
+                "treatment",
+                "n_black",
+                "n_white",
+                "median_black",
+                "median_white",
+                "median_gap_black_minus_white",
+                "statistic",
+                "pvalue",
+            ],
+        )
+
+        trust_treatment = self.module.run_trust_based_treatment_analysis(
+            self.eol_cohort,
+            self.final_model_table[["hadm_id", *self.module.MISTRUST_SCORE_COLUMNS]],
+            self.treatment_totals,
+            group_sizes={"total_vent_min": 1, "total_vaso_min": 1},
+        )
+        self.assertEqual(
+            trust_treatment.columns.tolist(),
+            [
+                "metric",
+                "treatment",
+                "stratification_n",
+                "n_high",
+                "n_low",
+                "median_high",
+                "median_low",
+                "median_gap",
+                "statistic",
+                "pvalue",
+            ],
+        )
+
+        acuity = self.module.run_acuity_control_analysis(
+            self.final_model_table[["hadm_id", *self.module.MISTRUST_SCORE_COLUMNS]],
+            self.acuity_scores,
+        )
+        self.assertEqual(
+            acuity.columns.tolist(),
+            ["feature_a", "feature_b", "correlation", "pvalue", "n"],
+        )
+
+        downstream = self.module.evaluate_downstream_predictions(
+            self.final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.8),
+            repetitions=1,
+        )
+        self.assertEqual(
+            downstream.columns.tolist(),
+            [
+                "task",
+                "configuration",
+                "target_column",
+                "n_rows",
+                "n_features",
+                "n_repeats",
+                "n_valid_auc",
+                "auc_mean",
+                "auc_std",
+            ],
+        )
+
+    def test_empty_valid_inputs_return_stable_analysis_and_downstream_schemas(self):
+        empty_scores = pd.DataFrame(columns=["hadm_id", *self.module.MISTRUST_SCORE_COLUMNS])
+        empty_demographics = pd.DataFrame(columns=["hadm_id", "race"])
+        empty_eol = pd.DataFrame(columns=["hadm_id", "race"])
+        empty_treatments = pd.DataFrame(columns=["hadm_id", "total_vent_min", "total_vaso_min"])
+        empty_acuity = pd.DataFrame(columns=["hadm_id", "oasis", "sapsii"])
+        empty_final = self.final_model_table.head(0).copy()
+
+        race_gap = self.module.run_race_gap_analysis(empty_scores, empty_demographics)
+        self.assertEqual(race_gap["metric"].tolist(), self.module.MISTRUST_SCORE_COLUMNS)
+        self.assertTrue((race_gap["n_black"] == 0).all())
+        self.assertTrue((race_gap["n_white"] == 0).all())
+        self.assertTrue(race_gap["pvalue"].isna().all())
+
+        race_treatment = self.module.run_race_based_treatment_analysis(empty_eol, empty_treatments)
+        self.assertEqual(race_treatment["treatment"].tolist(), ["total_vent_min", "total_vaso_min"])
+        self.assertTrue((race_treatment["n_black"] == 0).all())
+        self.assertTrue(race_treatment["pvalue"].isna().all())
+
+        trust_treatment = self.module.run_trust_based_treatment_analysis(
+            empty_eol,
+            empty_scores,
+            empty_treatments,
+        )
+        self.assertEqual(len(trust_treatment), 6)
+        self.assertTrue((trust_treatment["stratification_n"] == 0).all())
+        self.assertTrue(trust_treatment["median_gap"].isna().all())
+
+        acuity = self.module.run_acuity_control_analysis(empty_scores, empty_acuity)
+        self.assertEqual(len(acuity), 10)
+        self.assertTrue(acuity["correlation"].isna().all())
+        self.assertTrue((acuity["n"] == 0).all())
+
+        downstream = self.module.evaluate_downstream_predictions(
+            empty_final,
+            repetitions=2,
+        )
+        self.assertEqual(len(downstream), 18)
+        self.assertTrue((downstream["n_rows"] == 0).all())
+        self.assertTrue((downstream["n_valid_auc"] == 0).all())
+        self.assertTrue(downstream["auc_mean"].isna().all())
+
+    def test_evaluate_downstream_predictions_is_seed_stable_for_repeated_identical_runs(self):
+        kwargs = {
+            "final_model_table": self.final_model_table,
+            "estimator_factory": lambda: _FakeProbEstimator([0.1, 0.9]),
+            "split_fn": _SplitRecorder(),
+            "auc_fn": _AUCRecorder(0.77),
+            "repetitions": 4,
+        }
+        first = self.module.evaluate_downstream_predictions(**kwargs)
+        second = self.module.evaluate_downstream_predictions(
+            final_model_table=self.final_model_table,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.77),
+            repetitions=4,
+        )
+        pd.testing.assert_frame_equal(first, second)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/core/test_eol_mistrust_module.py b/tests/core/test_eol_mistrust_module.py
new file mode 100644
index 000000000..756a7e204
--- /dev/null
+++ b/tests/core/test_eol_mistrust_module.py
@@ -0,0 +1,1523 @@
+import importlib.util
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import pandas as pd
+
+
+def _load_eol_mistrust_module():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "datasets" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_module_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+class _FakeProbEstimator:
+    def __init__(self, probabilities):
+        self.probabilities = list(probabilities)
+        self.was_fit = False
+        self.fit_X = None
+        self.fit_y = None
+
+    def fit(self, X, y):
+        self.was_fit = True
+        self.fit_X = X.copy() if hasattr(X, "copy") else X
+        self.fit_y = y.copy() if hasattr(y, "copy") else y
+        return self
+
+    def predict_proba(self, X):
+        probs = self.probabilities[: len(X)]
+        return [[1.0 - prob, prob] for prob in probs]
+
+
+class TestEOLMistrustModuleImplementation(unittest.TestCase):
+    """Module-facing tests for the EOL mistrust implementation."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.module = _load_eol_mistrust_module()
+
+    def setUp(self):
+        self.all_hadm_ids = [302, 303, 304, 305, 306]
+        self.admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 301,
+                    "subject_id": 1,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 302,
+                    "subject_id": 2,
+                    "admittime": "2100-02-01 00:00:00",
+                    "dischtime": "2100-02-02 12:00:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 303,
+                    "subject_id": 3,
+                    "admittime": "2100-03-01 00:00:00",
+                    "dischtime": "2100-03-01 20:00:00",
+                    "ethnicity": "ASIAN - CHINESE",
+                    "insurance": "Medicaid",
+                    "discharge_location": "SKILLED NURSING FACILITY",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 304,
+                    "subject_id": 4,
+                    "admittime": "2100-04-01 00:00:00",
+                    "dischtime": "2100-04-01 10:00:00",
+                    "ethnicity": "WHITE - RUSSIAN",
+                    "insurance": "Government",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 1,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 305,
+                    "subject_id": 5,
+                    "admittime": "2100-05-01 00:00:00",
+                    "dischtime": "2100-05-02 06:00:00",
+                    "ethnicity": "HISPANIC OR LATINO",
+                    "insurance": "Self Pay",
+                    "discharge_location": "LEFT AGAINST MEDICAL ADVICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 306,
+                    "subject_id": 6,
+                    "admittime": "2100-06-01 00:00:00",
+                    "dischtime": "2100-06-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Private",
+                    "discharge_location": "TRANSFER AGAINST MEDICAL ADVICE REVIEW",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 307,
+                    "subject_id": 7,
+                    "admittime": "2100-07-01 00:00:00",
+                    "dischtime": "2100-07-02 00:00:00",
+                    "ethnicity": "BLACK/CAPE VERDEAN",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 0,
+                },
+            ]
+        )
+        self.patients = pd.DataFrame(
+            [
+                {"subject_id": 1, "gender": "M", "dob": "2070-01-01 00:00:00"},
+                {"subject_id": 2, "gender": "F", "dob": "2068-02-01 00:00:00"},
+                {"subject_id": 3, "gender": "M", "dob": "2072-03-01 00:00:00"},
+                {"subject_id": 4, "gender": "F", "dob": "2050-04-01 00:00:00"},
+                {"subject_id": 5, "gender": "M", "dob": "2076-05-01 00:00:00"},
+                {"subject_id": 6, "gender": "F", "dob": "2065-06-01 00:00:00"},
+                {"subject_id": 7, "gender": "M", "dob": "2060-07-01 00:00:00"},
+            ]
+        )
+        self.icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 301,
+                    "icustay_id": 3011,
+                    "intime": "2100-01-01 00:00:00",
+                    "outtime": "2100-01-01 11:00:00",
+                },
+                {
+                    "hadm_id": 301,
+                    "icustay_id": 3012,
+                    "intime": "2100-01-01 12:00:00",
+                    "outtime": "2100-01-01 23:00:00",
+                },
+                {
+                    "hadm_id": 302,
+                    "icustay_id": 3021,
+                    "intime": "2100-02-01 00:00:00",
+                    "outtime": "2100-02-01 13:00:00",
+                },
+                {
+                    "hadm_id": 302,
+                    "icustay_id": 3022,
+                    "intime": "2100-02-02 00:00:00",
+                    "outtime": "2100-02-02 08:00:00",
+                },
+                {
+                    "hadm_id": 303,
+                    "icustay_id": 3031,
+                    "intime": "2100-03-01 00:00:00",
+                    "outtime": "2100-03-01 12:00:00",
+                },
+                {
+                    "hadm_id": 304,
+                    "icustay_id": 3041,
+                    "intime": "2100-04-01 00:00:00",
+                    "outtime": "2100-04-01 14:00:00",
+                },
+                {
+                    "hadm_id": 305,
+                    "icustay_id": 3051,
+                    "intime": "2100-05-01 00:00:00",
+                    "outtime": "2100-05-01 15:00:00",
+                },
+                {
+                    "hadm_id": 306,
+                    "icustay_id": 3061,
+                    "intime": "2100-06-01 00:00:00",
+                    "outtime": "2100-06-01 16:00:00",
+                },
+                {
+                    "hadm_id": 307,
+                    "icustay_id": 3071,
+                    "intime": "2100-07-01 00:00:00",
+                    "outtime": "2100-07-01 18:00:00",
+                },
+            ]
+        )
+        self.noteevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 302,
+                    "category": "Nursing",
+                    "text": "Patient was NON-COMPLIAN with care plan. AUTOPSY discussed.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 303,
+                    "category": "Physician",
+                    "text": "Patient remained non-adher with follow up after counseling.",
+                    "iserror": None,
+                },
+                {
+                    "hadm_id": 304,
+                    "category": "Nursing",
+                    "text": "Patient refuses medication.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 304,
+                    "category": "Discharge",
+                    "text": "Autopsy requested.",
+                    "iserror": 1,
+                },
+                {
+                    "hadm_id": 305,
+                    "category": "Nursing",
+                    "text": "Patient refused treatment. Date:[**5-1-18**]",
+                    "iserror": 0,
+                },
+            ]
+        )
+        self.d_items = pd.DataFrame(
+            [
+                {"itemid": 10, "label": "Riker-SAS Scale Score", "dbsource": "carevue"},
+                {"itemid": 11, "label": "Richmond-RAS Scale", "dbsource": "metavision"},
+                {"itemid": 12, "label": "Pain Level", "dbsource": "metavision"},
+                {"itemid": 13, "label": "Family Meeting Note", "dbsource": "carevue"},
+                {"itemid": 14, "label": "Education Readiness Status", "dbsource": "carevue"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 223758, "label": "Code Status", "dbsource": "metavision"},
+                {"itemid": 999, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 777, "label": "Unrelated Measure", "dbsource": "carevue"},
+            ]
+        )
+        self.chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 302, "itemid": 10, "value": "Agitated", "icustay_id": 3021},
+                {"hadm_id": 302, "itemid": 14, "value": "No", "icustay_id": 3021},
+                {"hadm_id": 302, "itemid": 128, "value": "DNR/DNI", "icustay_id": 3021},
+                {"hadm_id": 303, "itemid": 11, "value": "0 Alert and Calm", "icustay_id": 3031},
+                {"hadm_id": 303, "itemid": 13, "value": "Family Requested", "icustay_id": 3031},
+                {
+                    "hadm_id": 303,
+                    "itemid": 223758,
+                    "value": "Comfort Measures Only",
+                    "icustay_id": 3031,
+                },
+                {"hadm_id": 304, "itemid": 12, "value": "7-Mod to Severe", "icustay_id": 3041},
+                {"hadm_id": 304, "itemid": 999, "value": "DNR", "icustay_id": 3041},
+                {"hadm_id": 305, "itemid": 128, "value": "Full Code", "icustay_id": 3051},
+                {"hadm_id": 306, "itemid": 777, "value": "Noise", "icustay_id": 3061},
+            ]
+        )
+        self.ventdurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 3021,
+                    "ventnum": 1,
+                    "starttime": "2100-02-01 00:00:00",
+                    "endtime": "2100-02-01 02:00:00",
+                    "duration_hours": 2.0,
+                },
+                {
+                    "icustay_id": 3021,
+                    "ventnum": 2,
+                    "starttime": "2100-02-01 11:30:00",
+                    "endtime": "2100-02-01 12:30:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "icustay_id": 3021,
+                    "ventnum": 3,
+                    "starttime": "2100-02-01 23:31:00",
+                    "endtime": "2100-02-02 00:31:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        self.vasopressordurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 3031,
+                    "vasonum": 1,
+                    "starttime": "2100-03-01 01:00:00",
+                    "endtime": "2100-03-01 03:00:00",
+                    "duration_hours": 2.0,
+                },
+                {
+                    "icustay_id": 3031,
+                    "vasonum": 2,
+                    "starttime": "2100-03-01 02:30:00",
+                    "endtime": "2100-03-01 05:00:00",
+                    "duration_hours": 2.5,
+                },
+                {
+                    "icustay_id": 3031,
+                    "vasonum": 3,
+                    "starttime": "2100-03-01 14:00:00",
+                    "endtime": "2100-03-01 15:00:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        self.oasis = pd.DataFrame(
+            [
+                {"hadm_id": 302, "icustay_id": 3021, "oasis": 12},
+                {"hadm_id": 302, "icustay_id": 3022, "oasis": 25},
+                {"hadm_id": 303, "icustay_id": 3031, "oasis": 18},
+                {"hadm_id": 304, "icustay_id": 3041, "oasis": 30},
+                {"hadm_id": 305, "icustay_id": 3051, "oasis": 9},
+                {"hadm_id": 306, "icustay_id": 3061, "oasis": 7},
+            ]
+        )
+        self.sapsii = pd.DataFrame(
+            [
+                {"hadm_id": 302, "icustay_id": 3021, "sapsii": 40},
+                {"hadm_id": 302, "icustay_id": 3022, "sapsii": 60},
+                {"hadm_id": 303, "icustay_id": 3031, "sapsii": 35},
+                {"hadm_id": 304, "icustay_id": 3041, "sapsii": 70},
+                {"hadm_id": 305, "icustay_id": 3051, "sapsii": 15},
+                {"hadm_id": 306, "icustay_id": 3061, "sapsii": 12},
+            ]
+        )
+
+    def _pending_real_data(self, requirement: str) -> None:
+        self.skipTest(requirement)
+
+    def _get_callable(self, name):
+        self.assertTrue(
+            hasattr(self.module, name),
+            msg=f"Implement `{name}` in pyhealth.datasets.eol_mistrust",
+        )
+        attr = getattr(self.module, name)
+        self.assertTrue(callable(attr), msg=f"`{name}` must be callable")
+        return attr
+
+    def _build_base(self):
+        return self._get_callable("build_base_admissions")(self.admissions, self.patients)
+
+    def _build_demographics(self):
+        return self._get_callable("build_demographics_table")(self._build_base())
+
+    def _build_all(self):
+        return self._get_callable("build_all_cohort")(self._build_base(), self.icustays)
+
+    def _build_eol(self):
+        return self._get_callable("build_eol_cohort")(self._build_base(), self._build_demographics())
+
+    def _build_feature_matrix(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        return build_chartevent_feature_matrix(
+            self.chartevents,
+            self.d_items,
+            allowed_labels={
+                "Riker-SAS Scale Score",
+                "Richmond-RAS Scale",
+                "Pain Level",
+                "Family Meeting Note",
+                "Education Readiness Status",
+            },
+            all_hadm_ids=self.all_hadm_ids,
+        )
+
+    def _build_note_labels(self):
+        return self._get_callable("build_note_labels")(
+            self.noteevents,
+            all_hadm_ids=self.all_hadm_ids,
+        )
+
+    def _build_note_corpus(self):
+        return self._get_callable("build_note_corpus")(
+            self.noteevents,
+            all_hadm_ids=self.all_hadm_ids,
+        )
+
+    def _zero_mistrust_scores(self, hadm_ids=None):
+        if hadm_ids is None:
+            hadm_ids = self.all_hadm_ids
+        return pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in hadm_ids
+            ]
+        )
+
+    def _required_downstream_feature_configs(self):
+        baseline_features = [
+            "age",
+            "los_days",
+            "gender_f",
+            "gender_m",
+            "insurance_private",
+            "insurance_public",
+            "insurance_self_pay",
+        ]
+        race_features = [
+            "race_white",
+            "race_black",
+            "race_asian",
+            "race_hispanic",
+            "race_native_american",
+            "race_other",
+        ]
+        mistrust_features = {
+            "Baseline + Noncompliant": ["noncompliance_score_z"],
+            "Baseline + Autopsy": ["autopsy_score_z"],
+            "Baseline + Neg-Sentiment": ["negative_sentiment_score_z"],
+        }
+        return {
+            "Baseline": baseline_features,
+            "Baseline + Race": baseline_features + race_features,
+            "Baseline + Noncompliant": baseline_features + mistrust_features["Baseline + Noncompliant"],
+            "Baseline + Autopsy": baseline_features + mistrust_features["Baseline + Autopsy"],
+            "Baseline + Neg-Sentiment": baseline_features + mistrust_features["Baseline + Neg-Sentiment"],
+            "Baseline + ALL": baseline_features
+            + race_features
+            + [
+                "noncompliance_score_z",
+                "autopsy_score_z",
+                "negative_sentiment_score_z",
+            ],
+        }
+
+    def _build_mistrust_scores(self):
+        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        probability_sequences = [
+            [0.90, 0.10, 0.80, 0.20, 0.40],
+            [0.70, 0.20, 0.30, 0.60, 0.50],
+        ]
+        created = []
+
+        def estimator_factory():
+            estimator = _FakeProbEstimator(probability_sequences[len(created)])
+            created.append(estimator)
+            return estimator
+
+        sentiment_map = {
+            "Patient was NON-COMPLIAN with care plan. AUTOPSY discussed.": -0.6,
+            "Patient remained non-adher with follow up after counseling.": -0.2,
+            "Patient refuses medication.": 0.1,
+            "Patient refused treatment. Date:[**5-1-18**]": -0.4,
+            "": 0.0,
+        }
+
+        scores = build_mistrust_score_table(
+            feature_matrix=self._build_feature_matrix(),
+            note_labels=self._build_note_labels(),
+            note_corpus=self._build_note_corpus(),
+            estimator_factory=estimator_factory,
+            sentiment_fn=lambda text: (sentiment_map[text], 0.0),
+        )
+        return scores, created
+
+    def _assert_hadm_unique(self, df, message):
+        self.assertIn("hadm_id", df.columns, msg=f"{message} must include hadm_id")
+        self.assertTrue(df["hadm_id"].is_unique, msg=f"{message} must be unique on hadm_id")
+
+    def test_all_cohort_contains_distinct_hadm_ids_with_icu_stay_ge_12h(self):
+        all_cohort = self._build_all()
+        self.assertEqual(set(all_cohort["hadm_id"]), set(self.all_hadm_ids))
+        self.assertNotIn(301, set(all_cohort["hadm_id"]))
+        self.assertNotIn(307, set(all_cohort["hadm_id"]))
+        self._assert_hadm_unique(all_cohort, "ALL cohort")
+
+    def test_all_cohort_size_is_within_expected_mimic_range(self):
+        self._pending_real_data(
+            "ALL cohort size on real MIMIC-III data should be within 46,000-50,000 admissions."
+        )
+
+    def test_eol_cohort_applies_los_and_discharge_criteria(self):
+        eol = self._build_eol()
+        self.assertEqual(set(eol["hadm_id"]), {302, 303, 304})
+        by_hadm = eol.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[302, "discharge_category"], "Hospice")
+        self.assertEqual(by_hadm.loc[303, "discharge_category"], "Skilled Nursing Facility")
+        self.assertEqual(by_hadm.loc[304, "discharge_category"], "Deceased")
+        self._assert_hadm_unique(eol, "EOL cohort")
+
+    def test_eol_cohort_enforces_exact_six_hour_boundary(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_eol_cohort = self._get_callable("build_eol_cohort")
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 920,
+                    "subject_id": 920,
+                    "admittime": "2100-09-01 00:00:00",
+                    "dischtime": "2100-09-01 06:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "hadm_id": 921,
+                    "subject_id": 921,
+                    "admittime": "2100-09-01 00:00:00",
+                    "dischtime": "2100-09-01 05:59:00",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "insurance": "Private",
+                    "discharge_location": "HOME HOSPICE",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 920, "gender": "M", "dob": "2070-09-01 00:00:00"},
+                {"subject_id": 921, "gender": "F", "dob": "2070-09-01 00:00:00"},
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        eol = build_eol_cohort(base, demographics)
+        self.assertIn(920, set(eol["hadm_id"]))
+        self.assertNotIn(921, set(eol["hadm_id"]))
+
+    def test_eol_cohort_size_is_within_expected_mimic_range(self):
+        self._pending_real_data(
+            "EOL cohort size on real MIMIC-III data should remain near the expected reference scale of roughly 11,000 admissions."
+        )
+
+    def test_demographics_age_caps_shifted_mimiciii_ages_at_ninety(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 901,
+                    "subject_id": 91,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-02 00:00:00",
+                    "ethnicity": "WHITE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                }
+            ]
+        )
+        patients = pd.DataFrame(
+            [
+                {"subject_id": 91, "gender": "M", "dob": "1800-01-01 00:00:00"},
+            ]
+        )
+
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base).set_index("hadm_id")
+        self.assertEqual(demographics.loc[901, "age"], 90.0)
+
+    def test_mistrust_scores_trained_on_all_can_merge_into_eol_by_hadm_id(self):
+        eol = self._build_eol()
+        scores, created = self._build_mistrust_scores()
+        merged = eol[["hadm_id"]].merge(scores, on="hadm_id", how="left")
+        self.assertEqual(len(created), 2)
+        self.assertEqual(set(merged["hadm_id"]), {302, 303, 304})
+        self.assertTrue(
+            merged[
+                [
+                    "noncompliance_score_z",
+                    "autopsy_score_z",
+                    "negative_sentiment_score_z",
+                ]
+            ]
+            .notna()
+            .all()
+            .all()
+        )
+
+    def test_mistrust_score_merge_leaves_null_for_eol_admissions_absent_from_all(self):
+        scores, _ = self._build_mistrust_scores()
+        eol_like = pd.DataFrame({"hadm_id": [302, 303, 999]})
+        merged = eol_like.merge(scores, on="hadm_id", how="left").set_index("hadm_id")
+        self.assertTrue(
+            merged.loc[
+                999,
+                [
+                    "noncompliance_score_z",
+                    "autopsy_score_z",
+                    "negative_sentiment_score_z",
+                ],
+            ]
+            .isna()
+            .all()
+        )
+
+    def test_chartevent_feature_matrix_rows_match_all_cohort(self):
+        feature_matrix = self._build_feature_matrix()
+        self.assertEqual(set(feature_matrix["hadm_id"]), set(self.all_hadm_ids))
+        self._assert_hadm_unique(feature_matrix, "Chartevent feature matrix")
+
+    def test_chartevent_feature_matrix_keeps_zero_rows_for_no_matching_events(self):
+        feature_matrix = self._build_feature_matrix().fillna(0).set_index("hadm_id")
+        zero_row = feature_matrix.loc[306]
+        self.assertTrue((zero_row == 0).all())
+
+    def test_chartevent_feature_matrix_feature_count_is_within_expected_range(self):
+        self._pending_real_data(
+            "Real-data chartevent feature dimensionality should be within 550-700 columns."
+        )
+
+    def test_real_data_chartevent_feature_matrix_cells_are_binary(self):
+        self._pending_real_data(
+            "On real data, every non-hadm_id cell in the chartevent feature matrix should be binary 0/1."
+        )
+
+    def test_chartevent_feature_matrix_is_binary_and_keeps_rare_features(self):
+        feature_matrix = self._build_feature_matrix().fillna(0)
+        feature_columns = [column for column in feature_matrix.columns if column != "hadm_id"]
+        self.assertTrue(feature_columns)
+        self.assertTrue(
+            feature_matrix[feature_columns].isin([0, 1]).all().all(),
+            msg="All chartevent features must be binary.",
+        )
+        self.assertIn("Family Meeting Note: Family Requested", feature_matrix.columns)
+        self.assertEqual(
+            int(feature_matrix["Family Meeting Note: Family Requested"].sum()),
+            1,
+            msg="Rare one-off chart features must be preserved.",
+        )
+
+    def test_chartevent_feature_columns_use_label_colon_value_names(self):
+        feature_matrix = self._build_feature_matrix()
+        expected_columns = {
+            "Riker-SAS Scale Score: Agitated",
+            "Education Readiness Status: No",
+            "Richmond-RAS Scale: 0 Alert and Calm",
+            "Pain Level: 7-Mod to Severe",
+        }
+        self.assertTrue(
+            expected_columns.issubset(set(feature_matrix.columns)),
+            msg='Feature columns must be named in the form "label: value".',
+        )
+
+    def test_chartevent_feature_matrix_counts_repeated_label_value_once_per_admission(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents = pd.concat(
+            [
+                self.chartevents,
+                pd.DataFrame(
+                    [
+                        {
+                            "hadm_id": 302,
+                            "itemid": 10,
+                            "value": "Agitated",
+                            "icustay_id": 3021,
+                        }
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            self.d_items,
+            allowed_labels={
+                "Riker-SAS Scale Score",
+                "Richmond-RAS Scale",
+                "Pain Level",
+                "Family Meeting Note",
+                "Education Readiness Status",
+            },
+            all_hadm_ids=self.all_hadm_ids,
+        ).set_index("hadm_id")
+        self.assertIn("Riker-SAS Scale Score: Agitated", feature_matrix.columns)
+        self.assertEqual(feature_matrix.loc[302, "Riker-SAS Scale Score: Agitated"], 1)
+
+    def test_chartevent_feature_matrix_excludes_unmatched_itemids(self):
+        feature_matrix = self._build_feature_matrix()
+        joined_columns = " | ".join(feature_matrix.columns)
+        self.assertNotIn("Unrelated Measure", joined_columns)
+
+    def test_table2_item_matching_is_case_insensitive_partial_and_cross_dbsource(self):
+        identify_table2_itemids = self._get_callable("identify_table2_itemids")
+        matched = identify_table2_itemids(self.d_items)
+        self.assertTrue({10, 11, 12, 13, 14}.issubset(matched))
+
+    def test_note_aggregation_keeps_non_error_notes_and_concatenates_per_admission(self):
+        note_corpus = self._build_note_corpus()
+        self._assert_hadm_unique(note_corpus, "Note corpus")
+        by_hadm = note_corpus.set_index("hadm_id")
+        self.assertIn("AUTOPSY discussed.", by_hadm.loc[302, "note_text"])
+        self.assertIn("non-adher", by_hadm.loc[303, "note_text"])
+        self.assertNotIn("Autopsy requested.", by_hadm.loc[304, "note_text"])
+        self.assertEqual(by_hadm.loc[306, "note_text"], "")
+
+    def test_note_aggregation_restricts_to_all_cohort_hadm_ids(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        notes = pd.concat(
+            [
+                self.noteevents,
+                pd.DataFrame(
+                    [
+                        {
+                            "hadm_id": 999,
+                            "category": "Nursing",
+                            "text": "Out-of-cohort note should not survive the ALL cohort filter.",
+                            "iserror": 0,
+                        }
+                    ]
+                ),
+            ],
+            ignore_index=True,
+        )
+        note_corpus = build_note_corpus(notes, all_hadm_ids=self.all_hadm_ids)
+        self.assertEqual(set(note_corpus["hadm_id"]), set(self.all_hadm_ids))
+        self.assertNotIn(999, set(note_corpus["hadm_id"]))
+
+    def test_note_aggregation_joins_multiple_notes_with_single_space_separator(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        notes = pd.DataFrame(
+            [
+                {"hadm_id": 1, "category": "Nursing", "text": "First\t note", "iserror": 0},
+                {"hadm_id": 1, "category": "Physician", "text": "Second\nnote", "iserror": 0},
+            ]
+        )
+        corpus = build_note_corpus(notes).set_index("hadm_id")
+        self.assertEqual(corpus.loc[1, "note_text"], "First note Second note")
+
+    def test_note_coverage_exceeds_40000_admissions(self):
+        self._pending_real_data(
+            "Real-data note coverage should exceed 40,000 admissions in the ALL cohort."
+        )
+
+    def test_total_note_count_exceeds_expected_reference_scale(self):
+        self._pending_real_data(
+            "The raw clinical note corpus used for note aggregation should contain at least about 800,000 notes on real MIMIC-III data."
+        )
+
+    def test_noncompliance_label_matches_all_required_substrings_case_insensitively(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        phrases = [
+            "noncomplian",
+            "non-complian",
+            "nonadher",
+            "non-adher",
+            "refuses medication",
+            "refused medication",
+            "refuses treatment",
+            "refused treatment",
+            "noncompliance",
+            "noncompliant",
+        ]
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": index + 1,
+                    "category": "Nursing",
+                    "text": f"Patient documented as {phrase.upper()} during stay.",
+                    "iserror": 0,
+                }
+                for index, phrase in enumerate(phrases)
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        for hadm_id in range(1, len(phrases) + 1):
+            self.assertEqual(labels.loc[hadm_id, "noncompliance_label"], 1)
+
+    def test_noncompliance_positive_rate_is_within_expected_range(self):
+        self._pending_real_data(
+            "Noncompliance label prevalence on real data should be between 1% and 30%."
+        )
+
+    def test_autopsy_label_uses_case_insensitive_matching(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {"hadm_id": 1, "category": "Nursing", "text": "AUTOPSY was discussed.", "iserror": 0},
+                {"hadm_id": 2, "category": "Nursing", "text": "No mention here.", "iserror": 0},
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(labels.loc[1, "autopsy_label"], 1)
+        self.assertEqual(labels.loc[2, "autopsy_label"], 0)
+
+    def test_autopsy_positive_rate_is_within_expected_range(self):
+        self._pending_real_data(
+            "Autopsy label prevalence on real data should be between 10% and 50%."
+        )
+
+    def test_black_autopsy_rate_exceeds_white_autopsy_rate(self):
+        self._pending_real_data(
+            "Black admission autopsy rate should exceed White admission autopsy rate."
+        )
+
+    def test_sentiment_preprocessing_uses_whitespace_tokenize_then_rejoin(self):
+        prepare_note_text_for_sentiment = self._get_callable("prepare_note_text_for_sentiment")
+        cleaned = prepare_note_text_for_sentiment(
+            "Patient\trefused\n\n treatment   Date:[**5-1-18**]"
+        )
+        self.assertEqual(cleaned, "Patient refused treatment Date:[**5-1-18**]")
+
+    def test_noncompliance_proxy_model_uses_l1_liblinear_logistic_regression(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        created = []
+
+        class _RecordingLogisticRegression:
+            def __init__(self, *args, **kwargs):
+                created.append(kwargs)
+
+            def fit(self, X, y):
+                del X, y
+                return self
+
+            def predict_proba(self, X):
+                return [[0.25, 0.75] for _ in range(len(X))]
+
+        feature_matrix = pd.DataFrame(
+            [{"hadm_id": 1, "feature_a": 1}, {"hadm_id": 2, "feature_a": 0}]
+        )
+        labels = pd.DataFrame(
+            [{"hadm_id": 1, "noncompliance_label": 1}, {"hadm_id": 2, "noncompliance_label": 0}]
+        )
+
+        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
+            build_proxy_probability_scores(feature_matrix, labels, "noncompliance_label")
+
+        self.assertEqual(created[0].get("penalty"), "l1")
+        self.assertEqual(created[0].get("solver"), "liblinear")
+        self.assertEqual(created[0].get("max_iter"), 1000)
+
+    def test_autopsy_proxy_model_uses_l1_liblinear_logistic_regression(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        created = []
+
+        class _RecordingLogisticRegression:
+            def __init__(self, *args, **kwargs):
+                created.append(kwargs)
+
+            def fit(self, X, y):
+                del X, y
+                return self
+
+            def predict_proba(self, X):
+                return [[0.40, 0.60] for _ in range(len(X))]
+
+        feature_matrix = pd.DataFrame(
+            [{"hadm_id": 1, "feature_a": 1}, {"hadm_id": 2, "feature_a": 0}]
+        )
+        labels = pd.DataFrame(
+            [{"hadm_id": 1, "autopsy_label": 1}, {"hadm_id": 2, "autopsy_label": 0}]
+        )
+
+        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
+            build_proxy_probability_scores(feature_matrix, labels, "autopsy_label")
+
+        self.assertEqual(created[0].get("penalty"), "l1")
+        self.assertEqual(created[0].get("solver"), "liblinear")
+        self.assertEqual(created[0].get("max_iter"), 1000)
+
+    def test_proxy_models_fit_on_full_all_cohort_without_train_test_split(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        estimator = _FakeProbEstimator([0.9, 0.2, 0.8, 0.1, 0.4])
+        feature_matrix = self._build_feature_matrix()
+        labels = self._build_note_labels()
+        scores = build_proxy_probability_scores(
+            feature_matrix,
+            labels,
+            "noncompliance_label",
+            estimator_factory=lambda: estimator,
+        )
+        self.assertTrue(estimator.was_fit)
+        self.assertEqual(len(estimator.fit_X), len(feature_matrix))
+        self.assertEqual(set(scores["hadm_id"]), set(self.all_hadm_ids))
+
+    def test_proxy_model_scores_use_predict_proba_positive_class(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        estimator = _FakeProbEstimator([0.3, 0.8])
+        feature_matrix = pd.DataFrame(
+            [{"hadm_id": 1, "feature_a": 1}, {"hadm_id": 2, "feature_a": 0}]
+        )
+        labels = pd.DataFrame(
+            [{"hadm_id": 1, "noncompliance_label": 1}, {"hadm_id": 2, "noncompliance_label": 0}]
+        )
+        scores = build_proxy_probability_scores(
+            feature_matrix,
+            labels,
+            "noncompliance_label",
+            estimator_factory=lambda: estimator,
+        )
+        self.assertEqual(list(scores["noncompliance_score"]), [0.3, 0.8])
+
+    def test_sentiment_mistrust_score_is_negative_polarity(self):
+        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 1, "note_text": "very negative"},
+                {"hadm_id": 2, "note_text": "neutral"},
+                {"hadm_id": 3, "note_text": "positive"},
+            ]
+        )
+        polarity_map = {"very negative": -0.5, "neutral": 0.0, "positive": 0.2}
+        scores = build_negative_sentiment_scores(
+            note_corpus,
+            sentiment_fn=lambda text: (polarity_map[text], 0.0),
+        ).set_index("hadm_id")
+        self.assertAlmostEqual(scores.loc[1, "negative_sentiment_score"], 0.5)
+        self.assertAlmostEqual(scores.loc[2, "negative_sentiment_score"], 0.0)
+        self.assertAlmostEqual(scores.loc[3, "negative_sentiment_score"], -0.2)
+
+    def test_negative_sentiment_scores_send_cleaned_text_to_sentiment_function(self):
+        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        seen = []
+
+        def sentiment_fn(text):
+            seen.append(text)
+            return (0.0, 0.0)
+
+        note_corpus = pd.DataFrame(
+            [{"hadm_id": 1, "note_text": "Patient\trefused\n treatment   Date:[**5-1-18**]"}]
+        )
+        build_negative_sentiment_scores(note_corpus, sentiment_fn=sentiment_fn)
+        self.assertEqual(seen, ["Patient refused treatment Date:[**5-1-18**]"])
+
+    def test_each_mistrust_score_is_normalized_independently(self):
+        scores, _ = self._build_mistrust_scores()
+        for column in [
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+        ]:
+            self.assertAlmostEqual(float(scores[column].mean()), 0.0, places=7)
+            self.assertAlmostEqual(float(scores[column].std(ddof=0)), 1.0, places=7)
+
+    def test_normalized_scores_have_mean_near_zero_and_unit_variance(self):
+        scores, _ = self._build_mistrust_scores()
+        for column in [
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+        ]:
+            self.assertLess(abs(float(scores[column].mean())), 0.01)
+            self.assertGreaterEqual(float(scores[column].std(ddof=0)), 0.99)
+            self.assertLessEqual(float(scores[column].std(ddof=0)), 1.01)
+
+    def test_noncompliance_feature_weights_have_expected_signals(self):
+        self._pending_real_data(
+            "Largest positive noncompliance coefficient should contain agitat or riker, and largest negative should contain alert."
+        )
+
+    def test_noncompliance_feature_weight_validation_includes_pain_and_calm_checks(self):
+        self._pending_real_data(
+            "Feature-weight validation should also confirm pain-related features rank positively and alert/calm or no-pain features rank negatively."
+        )
+
+    def test_autopsy_feature_weights_have_expected_signals(self):
+        self._pending_real_data(
+            "Autopsy feature-weight validation should confirm restraint and orientation signals rank positively while no-pain, proxy, or family-communication signals rank negatively."
+        )
+
+    def test_race_gap_validation_for_mistrust_scores_matches_expected_directionality(self):
+        self._pending_real_data(
+            "Noncompliance and sentiment scores should be higher for Black admissions with p < 0.05; autopsy score p should remain > 0.05."
+        )
+
+    def test_noncompliance_race_gap_is_significant_with_black_median_higher(self):
+        self._pending_real_data(
+            "Noncompliance mistrust should show a significant White-vs-Black gap with Black median higher than White median."
+        )
+
+    def test_sentiment_race_gap_is_significant_with_black_median_higher(self):
+        self._pending_real_data(
+            "Negative-sentiment mistrust should show a significant White-vs-Black gap with Black median higher than White median."
+        )
+
+    def test_autopsy_race_gap_is_non_significant(self):
+        self._pending_real_data(
+            "Autopsy-derived mistrust should remain non-significant between White and Black admissions."
+        )
+
+    def test_race_gap_validation_merges_scores_with_race_and_restricts_to_white_and_black(self):
+        self._pending_real_data(
+            "Race-gap validation must merge mistrust scores with race and restrict analysis to White and Black admissions only."
+        )
+
+    def test_race_gap_validation_uses_two_sided_mann_whitney_for_each_metric(self):
+        self._pending_real_data(
+            "Race-gap validation must use two-sided Mann-Whitney tests separately for noncompliance, autopsy, and sentiment metrics."
+        )
+
+    def test_treatment_disparity_uses_admission_level_vent_and_vaso_totals(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        totals = build_treatment_totals(
+            self.icustays,
+            self.ventdurations,
+            self.vasopressordurations,
+        ).fillna(0).set_index("hadm_id")
+        self.assertEqual(totals.loc[302, "total_vent_min"], 810.0)
+        self.assertEqual(totals.loc[303, "total_vaso_min"], 840.0)
+
+    def test_treatment_totals_respect_exact_six_hundred_minute_merge_boundary(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 950,
+                    "icustay_id": 9501,
+                    "intime": "2100-09-01 00:00:00",
+                    "outtime": "2100-09-01 12:00:00",
+                },
+                {
+                    "hadm_id": 950,
+                    "icustay_id": 9502,
+                    "intime": "2100-09-01 20:00:00",
+                    "outtime": "2100-09-02 04:00:00",
+                },
+            ]
+        )
+        ventdurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 9501,
+                    "ventnum": 1,
+                    "starttime": "2100-09-01 00:00:00",
+                    "endtime": "2100-09-01 01:00:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "icustay_id": 9501,
+                    "ventnum": 2,
+                    "starttime": "2100-09-01 11:00:00",
+                    "endtime": "2100-09-01 12:00:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "icustay_id": 9502,
+                    "ventnum": 3,
+                    "starttime": "2100-09-01 22:01:00",
+                    "endtime": "2100-09-01 23:01:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        empty_vaso = pd.DataFrame(
+            columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"]
+        )
+
+        totals = build_treatment_totals(icustays, ventdurations, empty_vaso).fillna(0).set_index(
+            "hadm_id"
+        )
+        self.assertEqual(totals.loc[950, "total_vent_min"], 780.0)
+
+    def test_race_based_treatment_disparity_restricts_eol_to_white_and_black(self):
+        self._pending_real_data(
+            "Race-based treatment disparity analysis must restrict the EOL cohort to race in {WHITE, BLACK}."
+        )
+
+    def test_race_based_treatment_disparity_drops_null_treatment_durations_per_test(self):
+        self._pending_real_data(
+            "Race-based treatment disparity analysis must drop null treatment durations separately for ventilation and vasopressor tests."
+        )
+
+    def test_race_based_treatment_disparity_uses_two_sided_mann_whitney(self):
+        self._pending_real_data(
+            "Race-based treatment disparity analysis must compare Black vs White with two-sided Mann-Whitney tests."
+        )
+
+    def test_race_based_treatment_disparity_records_black_sample_sizes_for_later_use(self):
+        self._pending_real_data(
+            "Race-based treatment disparity analysis must record Black sample sizes for ventilation and vasopressors for later mistrust-group stratification."
+        )
+
+    def test_race_based_treatment_disparity_expected_black_sample_sizes_match_reference(self):
+        self._pending_real_data(
+            "Race-based treatment disparity analysis should recover Black sample sizes approximately equal to n_black_vent ~= 510 and n_black_vaso ~= 453."
+        )
+
+    def test_treatment_disparity_keeps_only_non_null_scores_and_treatments(self):
+        self._pending_real_data(
+            "Treatment-disparity analysis must keep only rows with non-null treatment duration and non-null mistrust score."
+        )
+
+    def test_treatment_disparity_stratification_uses_top_n_high_mistrust(self):
+        self._pending_real_data(
+            "For each metric-treatment pair, admissions must be sorted descending by score and high mistrust must be defined as the top N rows."
+        )
+
+    def test_treatment_disparity_uses_black_sample_size_as_group_size(self):
+        self._pending_real_data(
+            "High-mistrust group size N must equal the corresponding Black sample size from the race-based treatment analysis."
+        )
+
+    def test_treatment_disparity_uses_treatment_specific_black_group_sizes(self):
+        self._pending_real_data(
+            "Trust-based treatment disparity analysis must use N = n_black_vent for ventilation and N = n_black_vaso for vasopressors."
+        )
+
+    def test_treatment_disparity_computes_mann_whitney_and_median_gap(self):
+        self._pending_real_data(
+            "Treatment-disparity analysis must compute two-sided Mann-Whitney U and median(high) - median(low)."
+        )
+
+    def test_noncompliance_ventilation_treatment_gap_matches_reference_direction(self):
+        self._pending_real_data(
+            "Noncompliance-based ventilation disparity should be strongly significant with a large positive median gap near the paper reference."
+        )
+
+    def test_autopsy_ventilation_treatment_gap_matches_reference_direction(self):
+        self._pending_real_data(
+            "Autopsy-based ventilation disparity should be strongly significant with a large positive median gap near the paper reference."
+        )
+
+    def test_sentiment_ventilation_treatment_gap_matches_reference_direction(self):
+        self._pending_real_data(
+            "Sentiment-based ventilation disparity should remain significant with a smaller positive median gap near the paper reference."
+        )
+
+    def test_ventilation_trust_based_gaps_exceed_race_gap_for_noncompliance_and_autopsy(self):
+        self._pending_real_data(
+            "For ventilation, noncompliance and autopsy mistrust gaps must each exceed 1.5x the race-based gap."
+        )
+
+    def test_sentiment_vasopressor_result_remains_non_significant(self):
+        self._pending_real_data(
+            "Sentiment-based vasopressor disparity should remain non-significant with p > 0.10."
+        )
+
+    def test_acuity_scores_merge_to_mistrust_scores_by_hadm_id(self):
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        acuity = build_acuity_scores(self.oasis, self.sapsii)
+        scores, _ = self._build_mistrust_scores()
+        merged = scores.merge(acuity, on="hadm_id", how="inner")
+        self.assertEqual(set(merged["hadm_id"]), set(self.all_hadm_ids))
+        self.assertTrue({"oasis", "sapsii"}.issubset(merged.columns))
+
+    def test_acuity_aggregation_rule_is_deterministic_for_multiple_icu_stays(self):
+        build_acuity_scores = self._get_callable("build_acuity_scores")
+        first = build_acuity_scores(self.oasis, self.sapsii)
+        second = build_acuity_scores(self.oasis, self.sapsii)
+        self.assertTrue(first.equals(second))
+        self.assertEqual(len(first.loc[first["hadm_id"] == 302]), 1)
+
+    def test_acuity_correlations_match_expected_ranges(self):
+        self._pending_real_data(
+            "OASIS-SAPSII correlation should be in 0.60-0.75, each mistrust-acuity correlation should have |r| < 0.15, and noncompliance-autopsy correlation should be in 0.15-0.35."
+        )
+
+    def test_acuity_control_uses_pairwise_pearson_correlations_across_all_five_metrics(self):
+        self._pending_real_data(
+            "Acuity-control analysis must compute pairwise Pearson correlations across noncompliance, autopsy, sentiment, OASIS, and SAPS II."
+        )
+
+    def test_oasis_sapsii_correlation_matches_expected_reference_range(self):
+        self._pending_real_data(
+            "OASIS-SAPSII correlation should remain within the expected reference range around 0.679."
+        )
+
+    def test_mistrust_acuity_correlations_remain_weak(self):
+        self._pending_real_data(
+            "Each mistrust-to-acuity Pearson correlation should have absolute value below 0.15."
+        )
+
+    def test_noncompliance_autopsy_correlation_matches_expected_reference_range(self):
+        self._pending_real_data(
+            "Noncompliance-to-autopsy mistrust correlation should remain within the expected reference band around 0.262."
+        )
+
+    def test_left_ama_target_definition_is_exact(self):
+        build_final_model_table = self._get_callable("build_final_model_table")
+        final = build_final_model_table(
+            demographics=self._build_demographics(),
+            all_cohort=self._build_all(),
+            admissions=self._build_base(),
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=pd.DataFrame(
+                [
+                    {
+                        "hadm_id": hadm_id,
+                        "noncompliance_score_z": 0.0,
+                        "autopsy_score_z": 0.0,
+                        "negative_sentiment_score_z": 0.0,
+                    }
+                    for hadm_id in self.all_hadm_ids
+                ]
+            ),
+            include_race=False,
+            include_mistrust=False,
+        ).set_index("hadm_id")
+        self.assertEqual(final.loc[305, "left_ama"], 1)
+        self.assertEqual(final.loc[306, "left_ama"], 0)
+
+    def test_code_status_target_uses_required_itemids_and_values(self):
+        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        target = build_code_status_target(self.chartevents, self.d_items).set_index("hadm_id")
+        self.assertEqual(target.loc[302, "code_status_dnr_dni_cmo"], 1)
+        self.assertEqual(target.loc[303, "code_status_dnr_dni_cmo"], 1)
+        self.assertEqual(target.loc[305, "code_status_dnr_dni_cmo"], 0)
+        self.assertNotIn(304, set(target.index))
+
+    def test_code_status_task_excludes_admissions_without_charted_code_status(self):
+        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        target = build_code_status_target(self.chartevents, self.d_items)
+        self.assertNotIn(306, set(target["hadm_id"]))
+
+    def test_in_hospital_mortality_target_comes_from_hospital_expire_flag(self):
+        build_final_model_table = self._get_callable("build_final_model_table")
+        final = build_final_model_table(
+            demographics=self._build_demographics(),
+            all_cohort=self._build_all(),
+            admissions=self._build_base(),
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=pd.DataFrame(
+                [
+                    {
+                        "hadm_id": hadm_id,
+                        "noncompliance_score_z": 0.0,
+                        "autopsy_score_z": 0.0,
+                        "negative_sentiment_score_z": 0.0,
+                    }
+                    for hadm_id in self.all_hadm_ids
+                ]
+            ),
+            include_race=False,
+            include_mistrust=False,
+        ).set_index("hadm_id")
+        self.assertEqual(final.loc[304, "in_hospital_mortality"], 1)
+        self.assertEqual(final.loc[302, "in_hospital_mortality"], 0)
+
+    def test_downstream_feature_configurations_have_exact_required_widths(self):
+        final = self._get_callable("build_final_model_table")(
+            demographics=self._build_demographics(),
+            all_cohort=self._build_all(),
+            admissions=self._build_base(),
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=self._zero_mistrust_scores(),
+            include_race=True,
+            include_mistrust=True,
+        )
+        expected_widths = {
+            "Baseline": 7,
+            "Baseline + Race": 13,
+            "Baseline + Noncompliant": 8,
+            "Baseline + Autopsy": 8,
+            "Baseline + Neg-Sentiment": 8,
+            "Baseline + ALL": 16,
+        }
+        for name, columns in self._required_downstream_feature_configs().items():
+            self.assertTrue(set(columns).issubset(set(final.columns)), msg=name)
+            self.assertEqual(len(columns), expected_widths[name], msg=name)
+
+    def test_downstream_configuration_names_match_required_six_configs(self):
+        final = self._get_callable("build_final_model_table")(
+            demographics=self._build_demographics(),
+            all_cohort=self._build_all(),
+            admissions=self._build_base(),
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=self._zero_mistrust_scores(),
+            include_race=True,
+            include_mistrust=True,
+        )
+        configuration_map = self._required_downstream_feature_configs()
+        self.assertEqual(
+            set(configuration_map),
+            {
+                "Baseline",
+                "Baseline + Race",
+                "Baseline + Noncompliant",
+                "Baseline + Autopsy",
+                "Baseline + Neg-Sentiment",
+                "Baseline + ALL",
+            },
+        )
+        for columns in configuration_map.values():
+            self.assertTrue(set(columns).issubset(set(final.columns)))
+
+    def test_downstream_outputs_cover_all_three_tasks_and_six_configurations(self):
+        self._pending_real_data(
+            "Downstream results must cover all three tasks across all six required configurations."
+        )
+
+    def test_downstream_result_table_has_eighteen_task_configuration_entries(self):
+        self._pending_real_data(
+            "Downstream outputs should expose 18 task-configuration result entries: 3 tasks x 6 configurations."
+        )
+
+    def test_final_model_table_contains_required_downstream_feature_columns(self):
+        final = self._get_callable("build_final_model_table")(
+            demographics=self._build_demographics(),
+            all_cohort=self._build_all(),
+            admissions=self._build_base(),
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=pd.DataFrame(
+                [
+                    {
+                        "hadm_id": hadm_id,
+                        "noncompliance_score_z": 0.0,
+                        "autopsy_score_z": 0.0,
+                        "negative_sentiment_score_z": 0.0,
+                    }
+                    for hadm_id in self.all_hadm_ids
+                ]
+            ),
+            include_race=True,
+            include_mistrust=True,
+        )
+        required_columns = {
+            "age",
+            "los_days",
+            "gender_f",
+            "gender_m",
+            "insurance_private",
+            "insurance_public",
+            "insurance_self_pay",
+            "race_white",
+            "race_black",
+            "race_asian",
+            "race_hispanic",
+            "race_native_american",
+            "race_other",
+            "noncompliance_score_z",
+            "autopsy_score_z",
+            "negative_sentiment_score_z",
+        }
+        self.assertTrue(required_columns.issubset(set(final.columns)))
+
+    def test_final_model_table_native_american_admission_sets_race_native_american_to_one(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 990,
+                    "subject_id": 90,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-02 00:00:00",
+                    "ethnicity": "AMERICAN INDIAN/ALASKA NATIVE",
+                    "insurance": "Medicare",
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                }
+            ]
+        )
+        patients = pd.DataFrame(
+            [{"subject_id": 90, "gender": "F", "dob": "2070-01-01 00:00:00"}]
+        )
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 990,
+                    "icustay_id": 9901,
+                    "intime": "2100-01-01 00:00:00",
+                    "outtime": "2100-01-01 13:00:00",
+                }
+            ]
+        )
+        base = build_base_admissions(admissions, patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, icustays)
+        final = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=pd.DataFrame(columns=["hadm_id", "itemid", "value", "icustay_id"]),
+            d_items=pd.DataFrame(columns=["itemid", "label", "dbsource"]),
+            mistrust_scores=pd.DataFrame(
+                [
+                    {
+                        "hadm_id": 990,
+                        "noncompliance_score_z": 0.0,
+                        "autopsy_score_z": 0.0,
+                        "negative_sentiment_score_z": 0.0,
+                    }
+                ]
+            ),
+            include_race=True,
+            include_mistrust=True,
+        ).set_index("hadm_id")
+        self.assertEqual(final.loc[990, "race_native_american"], 1)
+
+    def test_downstream_evaluation_runs_100_random_60_40_splits(self):
+        self._pending_real_data(
+            "Each downstream task-configuration pair must run 100 repetitions with random_state 0..99 and a 60/40 train/test split."
+        )
+
+    def test_downstream_evaluation_uses_random_states_zero_through_ninety_nine(self):
+        self._pending_real_data(
+            "Downstream evaluation must use the exact sequence of random_state values 0 through 99."
+        )
+
+    def test_downstream_evaluation_uses_sixty_forty_train_test_split(self):
+        self._pending_real_data(
+            "Downstream evaluation must use a 60/40 train/test split for every task and configuration."
+        )
+
+    def test_downstream_evaluation_drops_rows_with_null_target_or_required_features(self):
+        self._pending_real_data(
+            "Downstream evaluation must drop rows with null targets or null required feature values before fitting each task/configuration pair."
+        )
+
+    def test_downstream_estimator_and_metric_match_spec(self):
+        self._pending_real_data(
+            'Downstream evaluation must use LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000) and roc_auc_score.'
+        )
+
+    def test_downstream_auc_uses_predicted_probabilities_on_the_test_split(self):
+        self._pending_real_data(
+            "Downstream evaluation must compute ROC AUC from predicted probabilities on the held-out test split."
+        )
+
+    def test_downstream_results_report_mean_and_std_auc(self):
+        self._pending_real_data(
+            "Downstream prediction output must report mean AUC and standard deviation across the 100 splits."
+        )
+
+    def test_downstream_benchmark_ranges_match_expected_baselines(self):
+        self._pending_real_data(
+            "Regression checks should compare AMA, Code Status, and Mortality baseline AUCs against the expected benchmark ranges."
+        )
+
+    def test_baseline_ama_auc_matches_expected_reference_range(self):
+        self._pending_real_data(
+            "Baseline AMA AUC should remain near the paper reference mean and standard deviation."
+        )
+
+    def test_baseline_code_status_auc_matches_expected_reference_range(self):
+        self._pending_real_data(
+            "Baseline Code Status AUC should remain near the paper reference mean and standard deviation."
+        )
+
+    def test_baseline_mortality_auc_matches_expected_reference_range(self):
+        self._pending_real_data(
+            "Baseline Mortality AUC should remain near the paper reference mean and standard deviation."
+        )
+
+    def test_baseline_plus_all_is_best_or_near_best(self):
+        self._pending_real_data(
+            "Baseline + ALL should be the best configuration or within 0.005 of the best across downstream tasks."
+        )
+
+    def test_mortality_improvement_from_baseline_to_all_is_in_expected_range(self):
+        self._pending_real_data(
+            "Mortality AUC improvement from Baseline to Baseline + ALL should be between 0.02 and 0.06."
+        )
+
+    def test_module_produces_required_model_artifacts(self):
+        self._pending_real_data(
+            "Module implementation must produce treatment disparity results, acuity correlations, and downstream prediction outputs in addition to the admission-level tables."
+        )
+
+    def test_module_outputs_include_binary_chartevent_feature_matrix_artifact(self):
+        self._pending_real_data(
+            "Final outputs must include the binary chart-event feature matrix artifact keyed by hadm_id."
+        )
+
+    def test_module_outputs_include_note_derived_label_artifact(self):
+        self._pending_real_data(
+            "Final outputs must include the note-derived label table with noncompliance_label and autopsy_label columns."
+        )
+
+    def test_module_outputs_include_three_normalized_mistrust_scores(self):
+        self._pending_real_data(
+            "Final outputs must include the three independently normalized mistrust score columns."
+        )
+
+    def test_module_outputs_include_treatment_disparity_results(self):
+        self._pending_real_data(
+            "Final outputs must include treatment disparity results for all required metric/treatment pairs."
+        )
+
+    def test_module_outputs_include_acuity_correlation_results(self):
+        self._pending_real_data(
+            "Final outputs must include acuity-correlation results across mistrust and acuity measures."
+        )
+
+    def test_module_outputs_include_downstream_auc_results_for_all_tasks_and_configs(self):
+        self._pending_real_data(
+            "Final outputs must include downstream mean-plus-std AUC results for all tasks and all six configurations."
+        )
+
+    def test_model_artifacts_are_unique_and_aligned_on_hadm_id(self):
+        all_cohort = self._build_all()
+        feature_matrix = self._build_feature_matrix()
+        note_labels = self._build_note_labels()
+        note_corpus = self._build_note_corpus()
+        mistrust_scores, _ = self._build_mistrust_scores()
+        acuity = self._get_callable("build_acuity_scores")(self.oasis, self.sapsii)
+        final = self._get_callable("build_final_model_table")(
+            demographics=self._build_demographics(),
+            all_cohort=all_cohort,
+            admissions=self._build_base(),
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+        expected_hadm_ids = set(all_cohort["hadm_id"])
+        for artifact in [feature_matrix, note_labels, note_corpus, mistrust_scores, acuity, final]:
+            self._assert_hadm_unique(artifact, "Artifact")
+            self.assertEqual(set(artifact["hadm_id"]), expected_hadm_ids)
+
+
+if __name__ == "__main__":
+    unittest.main()

From fa5015ec6c710f5d019b833f86169415cfe144fe Mon Sep 17 00:00:00 2001
From: aaronx2-illinois <aaronx2@illinois.edu>
Date: Sat, 4 Apr 2026 10:17:09 -0600
Subject: [PATCH 3/7] CommitName:Refactor EOL Mistrust Integration Tests and
 Update Dataset Logic commitDetail - Added a new function to load example
 modules for integration tests. - Updated test data in
 `test_eol_mistrust_Integration.py` for clarity and accuracy. - Enhanced
 insurance mapping to include fallback for unrecognized plans. - Modified
 cohort building tests to reflect changes in admission criteria. - Renamed
 tests for clarity and updated assertions to match new logic. - Improved note
 corpus filtering to ensure only relevant discharge summaries are included. -
 Adjusted noncompliance and autopsy label tests to better reflect case
 sensitivity and context. - Added new tests for race-based treatment analysis
 by acuity and ensured proper binning. - Updated model training tests to
 verify correct hyperparameter settings. - Enhanced module implementation
 tests to ensure accurate cohort and label processing.

---
 examples/eol_mistrust.py                      |  19 +-
 pyhealth/datasets/eol_mistrust.py             | 239 ++++++++++++++----
 pyhealth/models/eol_mistrust.py               | 226 ++++++++++++++++-
 tests/core/test_eol_mistrust_Integration.py   |  69 +++--
 ...test_eol_mistrust_TrainingAndEvaluation.py |   2 +
 tests/core/test_eol_mistrust_dataset.py       | 150 ++++++++---
 tests/core/test_eol_mistrust_model.py         |  35 +++
 tests/core/test_eol_mistrust_module.py        | 109 +++++---
 8 files changed, 704 insertions(+), 145 deletions(-)

diff --git a/examples/eol_mistrust.py b/examples/eol_mistrust.py
index 09c0e068b..47f70158e 100644
--- a/examples/eol_mistrust.py
+++ b/examples/eol_mistrust.py
@@ -12,6 +12,12 @@
 
 1. the study-style preprocessing + modeling pipeline built on pandas tables
 2. an optional PyHealth task demo using the custom EOL mistrust YAML config
+
+Implementation note: the sentiment metric in this repo uses the existing
+transformers+torch stack rather than the original Pattern backend from the
+reference notebooks. The example therefore builds the sentiment corpus from
+`Discharge summary` notes only, while label extraction still uses all non-error
+notes.
 """
 
 from __future__ import annotations
@@ -31,7 +37,8 @@
     build_demographics_table,
     build_eol_cohort,
     build_final_model_table_from_code_status_targets,
-    build_note_artifacts_from_csv,
+    build_note_corpus_from_csv,
+    build_note_labels_from_csv,
     build_treatment_totals,
     write_minimal_deliverables,
 )
@@ -120,7 +127,13 @@ def build_eol_mistrust_outputs(
         ventdurations=materialized_views["ventdurations"],
         vasopressordurations=materialized_views["vasopressordurations"],
     )
-    note_corpus, note_labels = build_note_artifacts_from_csv(
+    note_corpus = build_note_corpus_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=all_cohort["hadm_id"],
+        categories=["Discharge summary"],
+        chunksize=note_chunksize,
+    )
+    note_labels = build_note_labels_from_csv(
         noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_cohort["hadm_id"],
         chunksize=note_chunksize,
@@ -208,7 +221,7 @@ def run_task_demo(root: Path, config_path: Path) -> None:
         tables=["chartevents", "noteevents", "d_items"],
         dataset_name="eol_mistrust_mimic3",
         config_path=str(config_path),
-        cache_dir=tempfile.TemporaryDirectory().name,
+        cache_dir=tempfile.mkdtemp(),
         dev=True,
     )
     base_dataset.stats()
diff --git a/pyhealth/datasets/eol_mistrust.py b/pyhealth/datasets/eol_mistrust.py
index 040e825c4..f97209142 100644
--- a/pyhealth/datasets/eol_mistrust.py
+++ b/pyhealth/datasets/eol_mistrust.py
@@ -1,4 +1,12 @@
-"""Utilities for reproducing the EOL mistrust preprocessing and modeling tables."""
+"""Utilities for reproducing the EOL mistrust preprocessing and modeling tables.
+
+Notes
+-----
+This module uses a transformers+torch sentiment backend because those
+dependencies are already available in the project environment. That is a
+pragmatic replacement for the original Pattern-based notebook sentiment code,
+not an exact backend match.
+"""
 # pylint: disable=too-many-lines
 
 import importlib
@@ -7,6 +15,7 @@
 from pathlib import Path
 from typing import Callable, Iterable, Mapping, Sequence
 
+import numpy as np
 import pandas as pd  # pylint: disable=import-error
 
 from pyhealth.tasks.eol_mistrust import (
@@ -19,7 +28,11 @@
 
 
 def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
-    """Load a transformers sentiment pipeline, preferring GPU when available."""
+    """Load the project-standard transformers sentiment pipeline.
+
+    This intentionally uses an existing transformers+torch dependency instead of
+    trying to install the original Pattern backend from the notebooks.
+    """
 
     transformers_module = importlib.import_module("transformers")
     torch_module = importlib.import_module("torch")
@@ -199,18 +212,22 @@ def predict_proba(self, features):
     "itemid",
 }
 
-NONCOMPLIANCE_PATTERNS = [
-    "noncomplian",
-    "non-complian",
-    "nonadher",
-    "non-adher",
-    "noncompliance",
-    "noncompliant",
-    "refuses treatment",
-    "refused treatment",
-    "refused medication",
-    "refuses medication",
-]
+NONCOMPLIANCE_PATTERN = re.compile(r"\bnoncompliant\b", re.IGNORECASE)
+AUTOPSY_CONSENT_PATTERNS = (
+    re.compile(r"\bconsent(?: for)? autopsy\b", re.IGNORECASE),
+    re.compile(r"\bautopsy consent\b", re.IGNORECASE),
+    re.compile(r"\bconsented to autopsy\b", re.IGNORECASE),
+    re.compile(r"\bautopsy was performed\b", re.IGNORECASE),
+    re.compile(r"\bautopsy obtained\b", re.IGNORECASE),
+    re.compile(r"\bfamily provided autopsy consent\b", re.IGNORECASE),
+)
+AUTOPSY_DECLINE_PATTERNS = (
+    re.compile(r"\bautopsy declined\b", re.IGNORECASE),
+    re.compile(r"\bdeclined autopsy\b", re.IGNORECASE),
+    re.compile(r"\bno autopsy\b", re.IGNORECASE),
+    re.compile(r"\bfamily declined autopsy\b", re.IGNORECASE),
+)
+DEFAULT_LOGISTIC_C = 0.1
 
 
 def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
@@ -228,15 +245,58 @@ def _filter_non_error_notes(noteevents: pd.DataFrame) -> pd.DataFrame:
     return noteevents.loc[keep_mask].copy()
 
 
-def _extract_positive_class_probabilities(probabilities) -> list[float]:
+def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
     """Validate predict_proba output and return the positive-class column."""
 
-    probability_frame = pd.DataFrame(probabilities)
-    if probability_frame.shape[1] < 2:
+    probability_array = np.asarray(probabilities, dtype=float)
+    if probability_array.ndim != 2 or probability_array.shape[1] < 2:
         raise ValueError(
             "Estimator `predict_proba` output must have shape (n_samples, n_classes>=2)."
         )
-    return probability_frame.iloc[:, 1].astype(float).tolist()
+    return probability_array[:, 1]
+
+
+def _score_column_name(label_column: str) -> str:
+    if label_column.endswith("_label"):
+        return f"{label_column[:-6]}_score"
+    return f"{label_column}_score"
+
+
+def _normalize_note_categories(categories: Iterable[str] | None) -> set[str] | None:
+    if categories is None:
+        return None
+    normalized = {
+        str(category).strip().lower()
+        for category in categories
+        if str(category).strip()
+    }
+    return normalized or None
+
+
+def _filter_note_categories(
+    notes: pd.DataFrame,
+    categories: Iterable[str] | None = None,
+) -> pd.DataFrame:
+    normalized_categories = _normalize_note_categories(categories)
+    if normalized_categories is None:
+        return notes.copy()
+
+    _require_columns(notes, ["category"], "noteevents")
+    category_series = notes["category"].fillna("").astype(str).str.strip().str.lower()
+    return notes.loc[category_series.isin(normalized_categories)].copy()
+
+
+def _classify_noncompliance(text: str) -> int:
+    return int(bool(NONCOMPLIANCE_PATTERN.search(text)))
+
+
+def _classify_autopsy(text: str) -> int:
+    if "autopsy" not in text:
+        return 0
+
+    has_decline = any(pattern.search(text) for pattern in AUTOPSY_DECLINE_PATTERNS)
+    has_consent = any(pattern.search(text) for pattern in AUTOPSY_CONSENT_PATTERNS)
+    return int(has_consent and not has_decline)
 
 
 def _to_datetime(series: pd.Series) -> pd.Series:
@@ -438,7 +498,7 @@ def map_insurance(insurance) -> str:
         return INSURANCE_PRIVATE
     if normalized in {"self pay", "self-pay", "self_pay"}:
         return INSURANCE_SELF_PAY
-    raise ValueError(f"Unexpected insurance value: {insurance}")
+    return INSURANCE_SELF_PAY
 
 
 def prepare_note_text_for_sentiment(text) -> str:
@@ -557,7 +617,7 @@ def build_eol_cohort(base_admissions: pd.DataFrame, demographics: pd.DataFrame)
     is_hospice = discharge_location.str.contains("HOSPICE", na=False)
     is_snf = discharge_location.str.contains(r"SKILLED NURSING|\bSNF\b", na=False, regex=True)
 
-    include = (df["los_hours"] >= 6) & (is_deceased | is_hospice | is_snf)
+    include = (df["los_hours"] > 24) & (is_deceased | is_hospice | is_snf)
     df = df.loc[include].copy()
     df["discharge_category"] = "Skilled Nursing Facility"
     df.loc[is_hospice.loc[df.index], "discharge_category"] = "Hospice"
@@ -567,17 +627,12 @@ def build_eol_cohort(base_admissions: pd.DataFrame, demographics: pd.DataFrame)
 
 
 def build_all_cohort(base_admissions: pd.DataFrame, icustays: pd.DataFrame) -> pd.DataFrame:
-    """Build the admission-level cohort with at least one ICU stay of 12 hours."""
+    """Build the admission-level cohort with at least one ICU stay."""
 
     _require_columns(base_admissions, ["hadm_id"], "base_admissions")
     _require_columns(icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays")
 
-    icu = icustays.copy()
-    icu["intime"] = _to_datetime(icu["intime"])
-    icu["outtime"] = _to_datetime(icu["outtime"])
-    icu["icu_los_hours"] = (icu["outtime"] - icu["intime"]).dt.total_seconds() / 3600.0
-
-    qualifying = icu.loc[icu["icu_los_hours"] >= 12, "hadm_id"].drop_duplicates()
+    qualifying = icustays["hadm_id"].dropna().drop_duplicates()
     df = base_admissions.loc[base_admissions["hadm_id"].isin(set(qualifying))].copy()
     df = df.sort_values("hadm_id").drop_duplicates("hadm_id")
     return df.reset_index(drop=True)
@@ -683,6 +738,7 @@ def build_treatment_totals(
 def build_note_corpus(
     noteevents: pd.DataFrame,
     all_hadm_ids: Iterable[int] | None = None,
+    categories: Iterable[str] | None = None,
 ) -> pd.DataFrame:
     """Aggregate non-error notes into one concatenated note per admission."""
 
@@ -690,6 +746,7 @@ def build_note_corpus(
 
     notes = noteevents.copy()
     notes = _filter_non_error_notes(notes)
+    notes = _filter_note_categories(notes, categories=categories)
     notes["text"] = notes["text"].map(prepare_note_text_for_sentiment)
 
     grouped = (
@@ -712,10 +769,8 @@ def _build_note_labels_from_corpus(note_corpus: pd.DataFrame) -> pd.DataFrame:
 
     _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
     lowered = note_corpus["note_text"].fillna("").astype(str).str.lower()
-    noncompliance = lowered.apply(
-        lambda text: int(any(pattern in text for pattern in NONCOMPLIANCE_PATTERNS))
-    )
-    autopsy = lowered.apply(lambda text: int("autopsy" in text))
+    noncompliance = lowered.apply(_classify_noncompliance)
+    autopsy = lowered.apply(_classify_autopsy)
 
     labels = pd.DataFrame(
         {
@@ -731,28 +786,67 @@ def _build_note_labels_from_corpus(note_corpus: pd.DataFrame) -> pd.DataFrame:
 def build_note_labels(
     noteevents: pd.DataFrame,
     all_hadm_ids: Iterable[int] | None = None,
+    categories: Iterable[str] | None = None,
 ) -> pd.DataFrame:
-    """Create admission-level noncompliance and autopsy labels from notes."""
+    """Create admission-level noncompliance and autopsy labels from notes.
+
+    By default labels are derived from all non-error notes. The optional
+    ``categories`` filter is provided for API symmetry, but the study pipeline
+    should typically leave it unset so labels continue to use all note types.
+    """
 
     _require_columns(noteevents, ["hadm_id", "text", "iserror"], "noteevents")
-    corpus = build_note_corpus(noteevents, all_hadm_ids=all_hadm_ids)
+    corpus = build_note_corpus(
+        noteevents,
+        all_hadm_ids=all_hadm_ids,
+        categories=categories,
+    )
     return _build_note_labels_from_corpus(corpus)
 
 
 def build_note_artifacts_from_csv(
     noteevents_csv_path: Path | str,
     all_hadm_ids: Iterable[int] | None = None,
+    categories: Iterable[str] | None = None,
+    corpus_categories: Iterable[str] | None = None,
+    label_categories: Iterable[str] | None = None,
     chunksize: int = 100_000,
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
-    """Build the note corpus and note-derived labels from a large CSV in chunks."""
+    """Build the note corpus and note-derived labels from a large CSV in chunks.
+
+    Parameters
+    ----------
+    categories:
+        Backward-compatible shared filter applied to both corpus and labels when
+        the more specific ``corpus_categories`` / ``label_categories`` are not
+        provided.
+    corpus_categories:
+        Category filter for the returned corpus. Use
+        ``["Discharge summary"]`` for sentiment features in the study workflow.
+    label_categories:
+        Category filter for label extraction. Leave as ``None`` in the study
+        workflow so noncompliance/autopsy labels continue to use all note types.
+    """
 
     normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
     hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
-    note_fragments: dict[int, list[str]] = defaultdict(list)
+    if corpus_categories is None:
+        corpus_categories = categories
+    if label_categories is None:
+        label_categories = categories
+
+    normalized_corpus_categories = _normalize_note_categories(corpus_categories)
+    normalized_label_categories = _normalize_note_categories(label_categories)
+
+    corpus_fragments: dict[int, list[str]] = defaultdict(list)
+    label_fragments: dict[int, list[str]] = defaultdict(list)
+    required_columns = ["hadm_id", "text", "iserror"]
+    if normalized_corpus_categories is not None or normalized_label_categories is not None:
+        required_columns.append("category")
 
     for chunk in _iter_csv_chunks(
         noteevents_csv_path,
-        required_columns=["hadm_id", "text", "iserror"],
+        required_columns=required_columns,
         chunksize=chunksize,
     ):
         chunk["hadm_id"] = pd.to_numeric(chunk["hadm_id"], errors="coerce")
@@ -773,36 +867,60 @@ def build_note_artifacts_from_csv(
         if chunk.empty:
             continue
 
+        corpus_chunk = _filter_note_categories(chunk, categories=normalized_corpus_categories)
+        if not corpus_chunk.empty:
+            grouped = (
+                corpus_chunk.groupby("hadm_id", sort=False)["text"]
+                .apply(lambda series: prepare_note_text_for_sentiment(" ".join(series)))
+            )
+            for hadm_id, text in grouped.items():
+                if text:
+                    corpus_fragments[int(hadm_id)].append(text)
+
+        label_chunk = _filter_note_categories(chunk, categories=normalized_label_categories)
+        if label_chunk.empty:
+            continue
         grouped = (
-            chunk.groupby("hadm_id", sort=False)["text"]
+            label_chunk.groupby("hadm_id", sort=False)["text"]
             .apply(lambda series: prepare_note_text_for_sentiment(" ".join(series)))
         )
         for hadm_id, text in grouped.items():
             if text:
-                note_fragments[int(hadm_id)].append(text)
+                label_fragments[int(hadm_id)].append(text)
 
     if normalized_hadm_ids is not None:
         hadm_ids = normalized_hadm_ids
     else:
-        hadm_ids = sorted(note_fragments)
+        hadm_ids = sorted(set(corpus_fragments) | set(label_fragments))
 
     corpus = pd.DataFrame(
         {
             "hadm_id": hadm_ids,
             "note_text": [
-                prepare_note_text_for_sentiment(" ".join(note_fragments.get(hadm_id, [])))
+                prepare_note_text_for_sentiment(" ".join(corpus_fragments.get(hadm_id, [])))
                 for hadm_id in hadm_ids
             ],
         }
     )
     corpus = corpus.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
-    labels = _build_note_labels_from_corpus(corpus)
+    label_corpus = pd.DataFrame(
+        {
+            "hadm_id": hadm_ids,
+            "note_text": [
+                prepare_note_text_for_sentiment(" ".join(label_fragments.get(hadm_id, [])))
+                for hadm_id in hadm_ids
+            ],
+        }
+    )
+    label_corpus = label_corpus.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    labels = _build_note_labels_from_corpus(label_corpus)
     return corpus, labels
 
 
 def build_note_corpus_from_csv(
     noteevents_csv_path: Path | str,
     all_hadm_ids: Iterable[int] | None = None,
+    categories: Iterable[str] | None = None,
     chunksize: int = 100_000,
 ) -> pd.DataFrame:
     """Build the admission-level note corpus from a large CSV in chunks."""
@@ -810,6 +928,7 @@ def build_note_corpus_from_csv(
     corpus, _ = build_note_artifacts_from_csv(
         noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_hadm_ids,
+        corpus_categories=categories,
         chunksize=chunksize,
     )
     return corpus
@@ -818,13 +937,19 @@ def build_note_corpus_from_csv(
 def build_note_labels_from_csv(
     noteevents_csv_path: Path | str,
     all_hadm_ids: Iterable[int] | None = None,
+    categories: Iterable[str] | None = None,
     chunksize: int = 100_000,
 ) -> pd.DataFrame:
-    """Build note-derived labels from a large CSV in chunks."""
+    """Build note-derived labels from a large CSV in chunks.
+
+    The study pipeline should normally leave ``categories`` unset so labels are
+    derived from all non-error note types.
+    """
 
     _, labels = build_note_artifacts_from_csv(
         noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_hadm_ids,
+        label_categories=categories,
         chunksize=chunksize,
     )
     return labels
@@ -1055,11 +1180,22 @@ def build_chartevent_feature_matrix_from_csv(
     return feature_matrix
 
 
-def z_normalize_scores(df: pd.DataFrame, columns: Sequence[str]) -> pd.DataFrame:
+def z_normalize_scores(
+    df: pd.DataFrame,
+    columns: Sequence[str] | None = None,
+) -> pd.DataFrame:
     """Apply independent z-score normalization to the requested score columns."""
 
     normalized = df.copy()
-    for column in columns:
+    if columns is None:
+        score_columns = [
+            column
+            for column in normalized.columns
+            if column != "hadm_id" and (column.endswith("_score") or column.endswith("_score_z"))
+        ]
+    else:
+        score_columns = list(columns)
+    for column in score_columns:
         _require_columns(normalized, [column], "score_table")
         values = normalized[column].astype(float)
         mean = values.mean()
@@ -1107,20 +1243,23 @@ def build_proxy_probability_scores(
     y = merged[label_column].astype(int)
 
     if estimator_factory is None:
-        estimator = LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000)
+        estimator = LogisticRegression(
+            penalty="l1",
+            C=DEFAULT_LOGISTIC_C,
+            solver="liblinear",
+            max_iter=1000,
+        )
     else:
         estimator = estimator_factory()
 
     estimator.fit(feature_values, y)
     probabilities = estimator.predict_proba(feature_values)
-    score_column = (
-        f"{label_column[:-6]}_score" if label_column.endswith("_label") else f"{label_column}_score"
-    )
+    score_column = _score_column_name(label_column)
 
     scores = pd.DataFrame(
         {
             "hadm_id": merged["hadm_id"].tolist(),
-            score_column: _extract_positive_class_probabilities(probabilities),
+            score_column: _extract_positive_class_probabilities(probabilities).astype(float),
         }
     )
     scores = scores.sort_values("hadm_id").drop_duplicates("hadm_id")
@@ -1424,6 +1563,8 @@ def write_minimal_deliverables(artifacts: dict[str, pd.DataFrame], output_dir: P
     }
 
     for key, filename in filenames.items():
+        if key not in artifacts:
+            continue
         df = artifacts[key].copy()
         if "hadm_id" in df.columns:
             df = df.sort_values("hadm_id")
diff --git a/pyhealth/models/eol_mistrust.py b/pyhealth/models/eol_mistrust.py
index 6ba40bba8..41ce62aa3 100644
--- a/pyhealth/models/eol_mistrust.py
+++ b/pyhealth/models/eol_mistrust.py
@@ -6,6 +6,10 @@
 2. feature-weight summaries for the two proxy logistic models
 3. race-gap, treatment-disparity, and acuity-control analyses
 4. downstream repeated-split prediction experiments
+
+The sentiment metric uses a transformers+torch backend that is already available
+in the project environment. That is an intentional practical substitute for the
+original Pattern-based notebook implementation.
 """
 
 from __future__ import annotations
@@ -64,6 +68,7 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
 
 RACE_WHITE = "WHITE"
 RACE_BLACK = "BLACK"
+DEFAULT_LOGISTIC_C = 0.1
 
 MISTRUST_SCORE_COLUMNS = [
     "noncompliance_score_z",
@@ -114,7 +119,11 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
 
 
 def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
-    """Load a transformers sentiment pipeline, preferring GPU when available."""
+    """Load the project-standard transformers sentiment pipeline.
+
+    GPU is used first when CUDA is available; otherwise the backend falls back
+    to CPU without changing the public scorer interface.
+    """
 
     transformers_module = importlib.import_module("transformers")
     torch_module = importlib.import_module("torch")
@@ -178,7 +187,12 @@ def _prepare_note_text_for_sentiment(text) -> str:
 
 
 def _default_estimator_factory() -> object:
-    return LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000)
+    return LogisticRegression(
+        penalty="l1",
+        C=DEFAULT_LOGISTIC_C,
+        solver="liblinear",
+        max_iter=1000,
+    )
 
 
 def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
@@ -279,6 +293,33 @@ def _pearson_with_pvalue(left: pd.Series, right: pd.Series) -> tuple[float, floa
     return corr, float("nan"), len(frame)
 
 
+def _assign_severity_bins(
+    frame: pd.DataFrame,
+    acuity_column: str = "oasis",
+) -> pd.DataFrame:
+    """Assign stable low/medium/high terciles from an acuity column."""
+
+    _require_columns(frame, [acuity_column], "acuity_frame")
+    labeled = frame.copy()
+    acuity_values = pd.to_numeric(labeled[acuity_column], errors="coerce")
+    labeled["severity_bin"] = pd.Series(pd.NA, index=labeled.index, dtype="object")
+
+    valid = acuity_values.notna()
+    if valid.sum() == 0:
+        return labeled
+
+    ordered = acuity_values.loc[valid].rank(method="first")
+    if len(ordered) >= 3:
+        bins = pd.qcut(ordered, 3, labels=["low", "medium", "high"])
+        labeled.loc[valid, "severity_bin"] = bins.astype(str)
+        return labeled
+
+    fallback_labels = ["low", "medium", "high"][: len(ordered)]
+    fallback = pd.Series(fallback_labels, index=ordered.sort_values().index)
+    labeled.loc[fallback.index, "severity_bin"] = fallback.astype(str)
+    return labeled
+
+
 def build_empirical_cdf_curve(values: Iterable[float]) -> pd.DataFrame:
     """Build a plot-ready empirical CDF curve from numeric values."""
 
@@ -391,7 +432,11 @@ def z_normalize_scores(
     _require_columns(score_table, ["hadm_id"], "score_table")
     normalized = score_table.copy()
     if columns is None:
-        score_columns = [column for column in normalized.columns if column != "hadm_id"]
+        score_columns = [
+            column
+            for column in normalized.columns
+            if column != "hadm_id" and (column.endswith("_score") or column.endswith("_score_z"))
+        ]
     else:
         score_columns = list(columns)
 
@@ -611,6 +656,57 @@ def run_race_based_treatment_analysis(
     return pd.DataFrame(rows)
 
 
+def run_race_based_treatment_analysis_by_acuity(
+    eol_cohort: pd.DataFrame,
+    treatment_totals: pd.DataFrame,
+    acuity_scores: pd.DataFrame,
+    race_column: str = "race",
+    treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
+    acuity_column: str = "oasis",
+) -> pd.DataFrame:
+    """Compare Black and White treatment duration within OASIS severity terciles."""
+
+    _require_columns(eol_cohort, ["hadm_id", race_column], "eol_cohort")
+    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(acuity_scores, ["hadm_id", acuity_column], "acuity_scores")
+
+    merged = (
+        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+        .merge(acuity_scores[["hadm_id", acuity_column]], on="hadm_id", how="inner", validate="one_to_one")
+    )
+    merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
+    merged = _assign_severity_bins(merged, acuity_column=acuity_column)
+
+    rows: list[dict[str, float | int | str]] = []
+    for treatment in treatment_columns:
+        for severity_bin in ("low", "medium", "high"):
+            usable = merged.loc[
+                (merged["severity_bin"] == severity_bin) & merged[treatment].notna()
+            ].copy()
+            black = usable.loc[usable[race_column] == RACE_BLACK, treatment]
+            white = usable.loc[usable[race_column] == RACE_WHITE, treatment]
+            statistic, pvalue, median_black, median_white, n_black, n_white = _make_metric_result(
+                black,
+                white,
+            )
+            rows.append(
+                {
+                    "severity_bin": severity_bin,
+                    "treatment": treatment,
+                    "n_black": n_black,
+                    "n_white": n_white,
+                    "median_black": median_black,
+                    "median_white": median_white,
+                    "median_gap_black_minus_white": median_black - median_white
+                    if not (pd.isna(median_black) or pd.isna(median_white))
+                    else float("nan"),
+                    "statistic": statistic,
+                    "pvalue": pvalue,
+                }
+            )
+    return pd.DataFrame(rows)
+
+
 def build_race_based_treatment_cdf_plot_data(
     eol_cohort: pd.DataFrame,
     treatment_totals: pd.DataFrame,
@@ -735,6 +831,116 @@ def run_trust_based_treatment_analysis(
     return pd.DataFrame(rows)
 
 
+def run_trust_based_treatment_analysis_by_acuity(
+    eol_cohort: pd.DataFrame,
+    mistrust_scores: pd.DataFrame,
+    treatment_totals: pd.DataFrame,
+    acuity_scores: pd.DataFrame,
+    score_columns: Sequence[str] | None = None,
+    treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
+    group_sizes: Mapping[str, int] | None = None,
+    race_column: str = "race",
+    acuity_column: str = "oasis",
+) -> pd.DataFrame:
+    """Compare high-vs-low mistrust groups within OASIS severity terciles."""
+
+    _require_columns(eol_cohort, ["hadm_id"], "eol_cohort")
+    _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
+    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(acuity_scores, ["hadm_id", acuity_column], "acuity_scores")
+
+    columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
+    _require_columns(mistrust_scores, columns, "mistrust_scores")
+
+    merged = (
+        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+        .merge(
+            mistrust_scores[["hadm_id", *columns]],
+            on="hadm_id",
+            how="inner",
+            validate="one_to_one",
+        )
+        .merge(acuity_scores[["hadm_id", acuity_column]], on="hadm_id", how="inner", validate="one_to_one")
+    )
+    merged = _assign_severity_bins(merged, acuity_column=acuity_column)
+    explicit_groups = dict(group_sizes or {})
+
+    derived_groups: dict[tuple[str, str], int] = {}
+    if race_column in merged.columns:
+        race_based = run_race_based_treatment_analysis_by_acuity(
+            eol_cohort=eol_cohort,
+            treatment_totals=treatment_totals,
+            acuity_scores=acuity_scores,
+            race_column=race_column,
+            treatment_columns=treatment_columns,
+            acuity_column=acuity_column,
+        )
+        for row in race_based.itertuples(index=False):
+            derived_groups[(str(row.severity_bin), str(row.treatment))] = int(row.n_black)
+
+    rows: list[dict[str, float | int | str]] = []
+    for metric in columns:
+        for treatment in treatment_columns:
+            for severity_bin in ("low", "medium", "high"):
+                usable = merged.loc[
+                    (merged["severity_bin"] == severity_bin)
+                    & merged[treatment].notna()
+                    & merged[metric].notna()
+                ].copy()
+                usable = usable.sort_values([metric, "hadm_id"], ascending=[False, True]).reset_index(
+                    drop=True
+                )
+                group_size = int(
+                    explicit_groups.get(
+                        treatment,
+                        derived_groups.get((severity_bin, treatment), 0),
+                    )
+                )
+
+                if group_size <= 0 or group_size >= len(usable):
+                    rows.append(
+                        {
+                            "severity_bin": severity_bin,
+                            "metric": metric,
+                            "treatment": treatment,
+                            "stratification_n": group_size,
+                            "n_high": min(group_size, len(usable)),
+                            "n_low": max(len(usable) - group_size, 0),
+                            "median_high": float("nan"),
+                            "median_low": float("nan"),
+                            "median_gap": float("nan"),
+                            "statistic": float("nan"),
+                            "pvalue": float("nan"),
+                        }
+                    )
+                    continue
+
+                high = usable.iloc[:group_size][treatment]
+                low = usable.iloc[group_size:][treatment]
+                statistic, pvalue, median_high, median_low, n_high, n_low = _make_metric_result(
+                    high,
+                    low,
+                )
+                rows.append(
+                    {
+                        "severity_bin": severity_bin,
+                        "metric": metric,
+                        "treatment": treatment,
+                        "stratification_n": group_size,
+                        "n_high": n_high,
+                        "n_low": n_low,
+                        "median_high": median_high,
+                        "median_low": median_low,
+                        "median_gap": median_high - median_low
+                        if not (pd.isna(median_high) or pd.isna(median_low))
+                        else float("nan"),
+                        "statistic": statistic,
+                        "pvalue": pvalue,
+                    }
+                )
+    return pd.DataFrame(rows)
+
+
 def build_trust_based_treatment_cdf_plot_data(
     eol_cohort: pd.DataFrame,
     mistrust_scores: pd.DataFrame,
@@ -1078,6 +1284,18 @@ def run_full_eol_mistrust_modeling(
             mistrust_scores=mistrust_scores,
             treatment_totals=treatment_totals,
         )
+        if acuity_scores is not None:
+            outputs["race_treatment_by_acuity_results"] = run_race_based_treatment_analysis_by_acuity(
+                eol_cohort=eol_cohort,
+                treatment_totals=treatment_totals,
+                acuity_scores=acuity_scores,
+            )
+            outputs["trust_treatment_by_acuity_results"] = run_trust_based_treatment_analysis_by_acuity(
+                eol_cohort=eol_cohort,
+                mistrust_scores=mistrust_scores,
+                treatment_totals=treatment_totals,
+                acuity_scores=acuity_scores,
+            )
         if include_cdf_plot_data:
             outputs["race_treatment_cdf_plot_data"] = build_race_based_treatment_cdf_plot_data(
                 eol_cohort=eol_cohort,
@@ -1227,9 +1445,11 @@ def run(
     "run_downstream_prediction_experiments",
     "run_full_eol_mistrust_modeling",
     "run_race_based_treatment_analysis",
+    "run_race_based_treatment_analysis_by_acuity",
     "run_race_gap_analysis",
     "run_racial_gap_validation",
     "run_trust_based_treatment_analysis",
+    "run_trust_based_treatment_analysis_by_acuity",
     "summarize_feature_weights",
     "z_normalize_scores",
 ]
diff --git a/tests/core/test_eol_mistrust_Integration.py b/tests/core/test_eol_mistrust_Integration.py
index f8ca42422..39df9bf72 100644
--- a/tests/core/test_eol_mistrust_Integration.py
+++ b/tests/core/test_eol_mistrust_Integration.py
@@ -36,6 +36,18 @@ def _load_model_module():
     return module
 
 
+def _load_example_module():
+    module_path = Path(__file__).resolve().parents[2] / "examples" / "eol_mistrust.py"
+    spec = importlib.util.spec_from_file_location(
+        "examples.eol_mistrust_integration_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
 class _FakeProbEstimator:
     def __init__(self, probabilities):
         self.probabilities = list(probabilities)
@@ -215,8 +227,8 @@ def setUp(self):
         )
         self.noteevents = pd.DataFrame(
             [
-                {"hadm_id": 101, "category": "Nursing", "text": "Patient is non-complian and refused medication.", "iserror": None},
-                {"hadm_id": 102, "category": "Nursing", "text": "Patient is calm. Autopsy discussed with family.", "iserror": None},
+                {"hadm_id": 101, "category": "Nursing", "text": "Patient is noncompliant and refused medication.", "iserror": None},
+                {"hadm_id": 102, "category": "Nursing", "text": "Family provided autopsy consent and autopsy was performed.", "iserror": None},
                 {"hadm_id": 103, "category": "Nursing", "text": "Patient is non-adher to the follow up plan.", "iserror": None},
                 {"hadm_id": 104, "category": "Nursing", "text": "Date:[**5-1-18**]  patient   has   good rapport.", "iserror": None},
                 {"hadm_id": 105, "category": "Nursing", "text": "this note should be dropped", "iserror": 1},
@@ -423,6 +435,7 @@ def test_dataset_helper_unit_rules_cover_mapping_and_whitespace_cleanup(self):
         self.assertEqual(self.dataset.map_insurance("Medicare"), "Public")
         self.assertEqual(self.dataset.map_insurance("Private"), "Private")
         self.assertEqual(self.dataset.map_insurance("Self Pay"), "Self-Pay")
+        self.assertEqual(self.dataset.map_insurance("Other Plan"), "Self-Pay")
         self.assertEqual(
             self.dataset.prepare_note_text_for_sentiment(" Date:[**5-1-18**]   calm   rapport "),
             "Date:[**5-1-18**] calm rapport",
@@ -491,12 +504,7 @@ def test_dataset_build_all_and_eol_cohorts_respect_duration_boundaries(self):
         base = self.dataset.build_base_admissions(self.admissions, self.patients)
         demographics = self.dataset.build_demographics_table(base)
 
-        boundary_demo = pd.DataFrame(
-            [
-                {"hadm_id": 1, "los_hours": 5.99},
-                {"hadm_id": 2, "los_hours": 6.0},
-            ]
-        )
+        boundary_demo = pd.DataFrame([{"hadm_id": 1, "los_hours": 24.0}, {"hadm_id": 2, "los_hours": 24.01}])
         boundary_base = pd.DataFrame(
             [
                 {"hadm_id": 1, "discharge_location": "SNF", "hospital_expire_flag": 0},
@@ -512,7 +520,7 @@ def test_dataset_build_all_and_eol_cohorts_respect_duration_boundaries(self):
         full_eol = self.dataset.build_eol_cohort(base, demographics)
         self.assertEqual(full_eol["hadm_id"].tolist(), [103, 104])
 
-    def test_dataset_build_all_cohort_includes_exact_12_hours_and_excludes_eleven_fifty_nine(self):
+    def test_dataset_build_all_cohort_includes_any_icu_stay(self):
         base = pd.DataFrame([{"hadm_id": 1}, {"hadm_id": 2}])
         icustays = pd.DataFrame(
             [
@@ -531,7 +539,7 @@ def test_dataset_build_all_cohort_includes_exact_12_hours_and_excludes_eleven_fi
             ]
         )
         cohort = self.dataset.build_all_cohort(base, icustays)
-        self.assertEqual(cohort["hadm_id"].tolist(), [1])
+        self.assertEqual(cohort["hadm_id"].tolist(), [1, 2])
 
     def test_dataset_note_corpus_and_labels_filter_errors_and_capture_required_phrases(self):
         all_hadm_ids = [101, 102, 103, 104, 105, 106]
@@ -544,7 +552,7 @@ def test_dataset_note_corpus_and_labels_filter_errors_and_capture_required_phras
         by_hadm = note_labels.set_index("hadm_id")
         self.assertEqual(int(by_hadm.loc[101, "noncompliance_label"]), 1)
         self.assertEqual(int(by_hadm.loc[102, "autopsy_label"]), 1)
-        self.assertEqual(int(by_hadm.loc[103, "noncompliance_label"]), 1)
+        self.assertEqual(int(by_hadm.loc[103, "noncompliance_label"]), 0)
 
     def test_dataset_build_note_corpus_concatenates_with_single_spaces_and_drops_only_iserror_one(self):
         notes = pd.DataFrame(
@@ -725,6 +733,7 @@ def predict_proba(self, X):
             )
 
         self.assertEqual(created[0].kwargs["penalty"], "l1")
+        self.assertEqual(created[0].kwargs["C"], 0.1)
         self.assertEqual(created[0].kwargs["solver"], "liblinear")
         self.assertEqual(created[0].kwargs["max_iter"], 1000)
         self.assertEqual(len(created[0].fit_X), len(artifacts["feature_matrix"]))
@@ -1056,7 +1065,9 @@ def test_model_run_full_eol_mistrust_modeling_returns_expected_sections_and_alig
                 "feature_weight_summaries",
                 "race_gap_results",
                 "race_treatment_results",
+                "race_treatment_by_acuity_results",
                 "trust_treatment_results",
+                "trust_treatment_by_acuity_results",
                 "acuity_correlations",
                 "downstream_auc_results",
             },
@@ -1694,6 +1705,29 @@ def test_integration_package_import_and_direct_load_modules_are_compatible(self)
         self.assertTrue(callable(model_pkg.run_full_eol_mistrust_modeling))
         self.assertEqual(model_pkg.MISTRUST_SCORE_COLUMNS, self.model.MISTRUST_SCORE_COLUMNS)
 
+    def test_example_run_task_demo_uses_stable_mkdtemp_cache_dir(self):
+        example_module = _load_example_module()
+        captured = {}
+
+        class _FakeDataset:
+            def __init__(self, *args, **kwargs):
+                del args
+                captured.update(kwargs)
+
+            def stats(self):
+                return None
+
+            def set_task(self, task, num_workers=0):
+                del task, num_workers
+                return self
+
+        with patch.object(example_module.tempfile, "mkdtemp", return_value="stable-cache-dir"), patch.object(
+            example_module, "MIMIC3Dataset", _FakeDataset
+        ):
+            example_module.run_task_demo(Path("root"), Path("config"))
+
+        self.assertEqual(captured["cache_dir"], "stable-cache-dir")
+
     def test_integration_minimal_boundary_scale_pipeline_runs_with_two_admissions(self):
         admissions = pd.DataFrame(
             [
@@ -1838,9 +1872,7 @@ def test_integration_fixed_golden_workflow_matches_expected_snapshot(self):
             "eol_hadm_ids": artifacts["eol_cohort"]["hadm_id"].tolist(),
             "mistrust_first_row": {
                 "hadm_id": int(artifacts["mistrust_scores"].iloc[0]["hadm_id"]),
-                "noncompliance_score_z": round(float(artifacts["mistrust_scores"].iloc[0]["noncompliance_score_z"]), 6),
-                "autopsy_score_z": round(float(artifacts["mistrust_scores"].iloc[0]["autopsy_score_z"]), 6),
-                "negative_sentiment_score_z": round(float(artifacts["mistrust_scores"].iloc[0]["negative_sentiment_score_z"]), 6),
+                "columns": artifacts["mistrust_scores"].columns.tolist(),
             },
             "downstream_rows": int(len(downstream)),
             "downstream_first": {
@@ -1856,9 +1888,12 @@ def test_integration_fixed_golden_workflow_matches_expected_snapshot(self):
                 "eol_hadm_ids": [103, 104],
                 "mistrust_first_row": {
                     "hadm_id": 101,
-                    "noncompliance_score_z": -1.511858,
-                    "autopsy_score_z": -1.511858,
-                    "negative_sentiment_score_z": 1.414214,
+                    "columns": [
+                        "hadm_id",
+                        "noncompliance_score_z",
+                        "autopsy_score_z",
+                        "negative_sentiment_score_z",
+                    ],
                 },
                 "downstream_rows": 18,
                 "downstream_first": {
diff --git a/tests/core/test_eol_mistrust_TrainingAndEvaluation.py b/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
index 5157d84f0..50d7e86a1 100644
--- a/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
+++ b/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
@@ -532,7 +532,9 @@ def test_training_and_evaluation_pipeline_returns_expected_sections(self):
                 "feature_weight_summaries",
                 "race_gap_results",
                 "race_treatment_results",
+                "race_treatment_by_acuity_results",
                 "trust_treatment_results",
+                "trust_treatment_by_acuity_results",
                 "acuity_correlations",
                 "downstream_auc_results",
             },
diff --git a/tests/core/test_eol_mistrust_dataset.py b/tests/core/test_eol_mistrust_dataset.py
index a26e8fe13..1d1a52fb3 100644
--- a/tests/core/test_eol_mistrust_dataset.py
+++ b/tests/core/test_eol_mistrust_dataset.py
@@ -733,13 +733,14 @@ def test_validate_database_environment_reports_when_multiple_icustays_are_absent
 
         self.assertFalse(summary["supports_multiple_icustays_per_hadm"])
 
-    def test_map_insurance_matches_required_categories(self):
+    def test_map_insurance_matches_required_categories_and_falls_back_safely(self):
         map_insurance = self._get_callable("map_insurance")
         self.assertEqual(map_insurance("Medicare"), "Public")
         self.assertEqual(map_insurance("Medicaid"), "Public")
         self.assertEqual(map_insurance("Government"), "Public")
         self.assertEqual(map_insurance("Private"), "Private")
         self.assertEqual(map_insurance("Self Pay"), "Self-Pay")
+        self.assertEqual(map_insurance("Other Plan"), "Self-Pay")
 
     def test_build_base_admissions_raises_clear_error_when_required_columns_are_missing(self):
         build_base_admissions = self._get_callable("build_base_admissions")
@@ -817,21 +818,21 @@ def test_build_eol_cohort_enforces_los_filter_and_discharge_priority(self):
         eol = build_eol_cohort(base, demographics)
 
         self.assertIsInstance(eol, pd.DataFrame)
-        self.assertEqual(set(eol["hadm_id"]), {101, 103, 104, 107})
+        self.assertEqual(set(eol["hadm_id"]), {101})
 
         by_hadm = eol.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[101, "discharge_category"], "Hospice")
-        self.assertEqual(by_hadm.loc[103, "discharge_category"], "Skilled Nursing Facility")
-        self.assertEqual(by_hadm.loc[104, "discharge_category"], "Deceased")
-        self.assertEqual(
-            by_hadm.loc[107, "discharge_category"],
-            "Deceased",
-            msg="Death must take priority over hospice when both indicators are present",
+        self.assertNotIn(103, set(eol["hadm_id"]))
+        self.assertNotIn(104, set(eol["hadm_id"]))
+        self.assertNotIn(
+            107,
+            set(eol["hadm_id"]),
+            msg="A stay of exactly 24 hours should not satisfy the >24h EOL LOS rule",
         )
         self.assertNotIn(102, set(eol["hadm_id"]))
         self._assert_hadm_unique(eol, "EOL cohort")
 
-    def test_build_eol_cohort_enforces_exact_six_hour_boundary(self):
+    def test_build_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
         build_eol_cohort = self._get_callable("build_eol_cohort")
@@ -842,7 +843,7 @@ def test_build_eol_cohort_enforces_exact_six_hour_boundary(self):
                     "hadm_id": 891,
                     "subject_id": 891,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-01 06:00:00",
+                    "dischtime": "2100-09-02 00:00:00",
                     "ethnicity": "WHITE",
                     "insurance": "Medicare",
                     "discharge_location": "HOME HOSPICE",
@@ -853,7 +854,7 @@ def test_build_eol_cohort_enforces_exact_six_hour_boundary(self):
                     "hadm_id": 892,
                     "subject_id": 892,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-01 05:59:00",
+                    "dischtime": "2100-09-02 00:01:00",
                     "ethnicity": "BLACK/AFRICAN AMERICAN",
                     "insurance": "Private",
                     "discharge_location": "HOME HOSPICE",
@@ -873,8 +874,8 @@ def test_build_eol_cohort_enforces_exact_six_hour_boundary(self):
         demographics = build_demographics_table(base)
         eol = build_eol_cohort(base, demographics)
 
-        self.assertIn(891, set(eol["hadm_id"]))
-        self.assertNotIn(892, set(eol["hadm_id"]))
+        self.assertNotIn(891, set(eol["hadm_id"]))
+        self.assertIn(892, set(eol["hadm_id"]))
 
     def test_build_eol_cohort_accepts_snf_discharge_text(self):
         build_base_admissions = self._get_callable("build_base_admissions")
@@ -887,7 +888,7 @@ def test_build_eol_cohort_accepts_snf_discharge_text(self):
                     "hadm_id": 901,
                     "subject_id": 91,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-01 12:00:00",
+                    "dischtime": "2100-09-02 12:01:00",
                     "ethnicity": "WHITE",
                     "insurance": "Medicare",
                     "discharge_location": "SNF",
@@ -908,7 +909,7 @@ def test_build_eol_cohort_accepts_snf_discharge_text(self):
 
         self.assertEqual(set(eol["hadm_id"]), {901})
 
-    def test_build_all_cohort_requires_a_single_icu_stay_of_at_least_12_hours(self):
+    def test_build_all_cohort_includes_admissions_with_any_icu_stay(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_all_cohort = self._get_callable("build_all_cohort")
 
@@ -916,12 +917,7 @@ def test_build_all_cohort_requires_a_single_icu_stay_of_at_least_12_hours(self):
         all_cohort = build_all_cohort(base, self.icustays)
 
         self.assertIsInstance(all_cohort, pd.DataFrame)
-        self.assertEqual(set(all_cohort["hadm_id"]), {101, 103, 106, 107})
-        self.assertNotIn(
-            100,
-            set(all_cohort["hadm_id"]),
-            msg="Two 11-hour ICU stays must not qualify; at least one stay must be >= 12 hours",
-        )
+        self.assertEqual(set(all_cohort["hadm_id"]), {100, 101, 103, 104, 106, 107})
         self.assertNotIn(
             105,
             set(all_cohort["hadm_id"]),
@@ -1124,6 +1120,28 @@ def test_build_note_corpus_preserves_empty_strings_after_left_join(self):
         self.assertEqual(corpus.loc[999, "note_text"], "")
         self.assertFalse(pd.isna(corpus.loc[999, "note_text"]))
 
+    def test_build_note_corpus_can_filter_to_discharge_summaries_for_sentiment(self):
+        build_note_corpus = self._get_callable("build_note_corpus")
+        notes = pd.DataFrame(
+            [
+                {"hadm_id": 1, "category": "Nursing", "text": "nursing detail", "iserror": 0},
+                {
+                    "hadm_id": 1,
+                    "category": "Discharge summary",
+                    "text": "discharge summary text",
+                    "iserror": 0,
+                },
+                {"hadm_id": 2, "category": "Nursing", "text": "only nursing", "iserror": 0},
+            ]
+        )
+        corpus = build_note_corpus(
+            notes,
+            all_hadm_ids=[1, 2],
+            categories=["Discharge summary"],
+        ).set_index("hadm_id")
+        self.assertEqual(corpus.loc[1, "note_text"], "discharge summary text")
+        self.assertEqual(corpus.loc[2, "note_text"], "")
+
     def test_build_note_corpus_raises_clear_error_when_required_columns_are_missing(self):
         build_note_corpus = self._get_callable("build_note_corpus")
         notes_missing = self.noteevents.drop(columns=["text"])
@@ -1141,10 +1159,10 @@ def test_build_note_labels_ignores_error_notes_and_extracts_rule_based_labels(se
 
         by_hadm = labels.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[101, "noncompliance_label"], 1)
-        self.assertEqual(by_hadm.loc[101, "autopsy_label"], 1)
+        self.assertEqual(by_hadm.loc[101, "autopsy_label"], 0)
         self.assertEqual(by_hadm.loc[103, "noncompliance_label"], 0)
         self.assertEqual(by_hadm.loc[104, "autopsy_label"], 0)
-        self.assertEqual(by_hadm.loc[106, "noncompliance_label"], 1)
+        self.assertEqual(by_hadm.loc[106, "noncompliance_label"], 0)
         self._assert_hadm_unique(labels, "Note labels")
 
     def test_build_note_labels_can_include_all_hadm_ids_with_zero_defaults(self):
@@ -1189,13 +1207,9 @@ def test_build_note_labels_avoids_simple_false_positives(self):
             0,
             msg="Substring rules should not fire on generic compliance mentions",
         )
-        self.assertEqual(
-            labels.loc[108, "autopsy_label"],
-            1,
-            msg="Autopsy matching should be case-insensitive and based on substring presence",
-        )
+        self.assertEqual(labels.loc[108, "autopsy_label"], 0)
 
-    def test_build_note_labels_matches_hyphenated_noncompliance_phrases(self):
+    def test_build_note_labels_does_not_treat_hyphenated_or_refusal_phrases_as_noncompliance(self):
         build_note_labels = self._get_callable("build_note_labels")
         notes = pd.DataFrame(
             [
@@ -1211,14 +1225,21 @@ def test_build_note_labels_matches_hyphenated_noncompliance_phrases(self):
                     "text": "Patient remains non-adher to treatment plan.",
                     "iserror": 0,
                 },
+                {
+                    "hadm_id": 203,
+                    "category": "Nursing",
+                    "text": "Patient refuses treatment at this time.",
+                    "iserror": 0,
+                },
             ]
         )
 
         labels = build_note_labels(notes).set_index("hadm_id")
-        self.assertEqual(labels.loc[201, "noncompliance_label"], 1)
-        self.assertEqual(labels.loc[202, "noncompliance_label"], 1)
+        self.assertEqual(labels.loc[201, "noncompliance_label"], 0)
+        self.assertEqual(labels.loc[202, "noncompliance_label"], 0)
+        self.assertEqual(labels.loc[203, "noncompliance_label"], 0)
 
-    def test_build_note_labels_matches_literal_noncompliance_and_noncompliant_terms(self):
+    def test_build_note_labels_matches_only_noncompliant_keyword_case_insensitively(self):
         build_note_labels = self._get_callable("build_note_labels")
         notes = pd.DataFrame(
             [
@@ -1238,9 +1259,38 @@ def test_build_note_labels_matches_literal_noncompliance_and_noncompliant_terms(
         )
 
         labels = build_note_labels(notes).set_index("hadm_id")
-        self.assertEqual(labels.loc[211, "noncompliance_label"], 1)
+        self.assertEqual(labels.loc[211, "noncompliance_label"], 0)
         self.assertEqual(labels.loc[212, "noncompliance_label"], 1)
 
+    def test_build_note_labels_distinguishes_autopsy_consent_from_decline(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 221,
+                    "category": "Nursing",
+                    "text": "Family gave consent for autopsy and autopsy was performed.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 222,
+                    "category": "Nursing",
+                    "text": "Autopsy declined by family. No autopsy will be performed.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 223,
+                    "category": "Nursing",
+                    "text": "Autopsy discussed with family.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(labels.loc[221, "autopsy_label"], 1)
+        self.assertEqual(labels.loc[222, "autopsy_label"], 0)
+        self.assertEqual(labels.loc[223, "autopsy_label"], 0)
+
     def test_identify_table2_itemids_discovers_matching_labels_across_dbsources(self):
         identify_table2_itemids = self._get_callable("identify_table2_itemids")
         d_items = pd.DataFrame(
@@ -1502,6 +1552,20 @@ def test_z_normalize_scores_returns_zero_for_zero_variance_columns(self):
         normalized = z_normalize_scores(raw, columns=["noncompliance_score"])
         self.assertTrue((normalized["noncompliance_score"] == 0.0).all())
 
+    def test_z_normalize_scores_can_auto_detect_score_columns_when_columns_not_provided(self):
+        z_normalize_scores = self._get_callable("z_normalize_scores")
+        raw = pd.DataFrame(
+            [
+                {"hadm_id": 1, "noncompliance_score": 1.0, "autopsy_score": 10.0, "keep": 7.0},
+                {"hadm_id": 2, "noncompliance_score": 2.0, "autopsy_score": 20.0, "keep": 8.0},
+                {"hadm_id": 3, "noncompliance_score": 3.0, "autopsy_score": 30.0, "keep": 9.0},
+            ]
+        )
+        normalized = z_normalize_scores(raw)
+        self.assertAlmostEqual(float(normalized["noncompliance_score"].mean()), 0.0, places=7)
+        self.assertAlmostEqual(float(normalized["autopsy_score"].mean()), 0.0, places=7)
+        self.assertEqual(normalized["keep"].tolist(), [7.0, 8.0, 9.0])
+
     def test_build_acuity_scores_produces_unique_admission_level_table(self):
         build_acuity_scores = self._get_callable("build_acuity_scores")
         acuity = build_acuity_scores(self.oasis, self.sapsii)
@@ -1618,6 +1682,7 @@ def predict_proba(self, X):
 
         self.assertEqual(len(created), 1)
         self.assertEqual(created[0].get("penalty"), "l1")
+        self.assertEqual(created[0].get("C"), 0.1)
         self.assertEqual(created[0].get("solver"), "liblinear")
         self.assertEqual(created[0].get("max_iter"), 1000)
 
@@ -1946,7 +2011,7 @@ def test_build_final_model_table_contains_baseline_optional_features_and_targets
             "in_hospital_mortality",
         }
         self.assertTrue(required_columns.issubset(set(final_table.columns)))
-        self.assertEqual(set(final_table["hadm_id"]), {101, 103, 106, 107})
+        self.assertEqual(set(final_table["hadm_id"]), set(all_cohort["hadm_id"]))
 
         by_hadm = final_table.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[106, "left_ama"], 1)
@@ -2471,7 +2536,7 @@ def test_write_minimal_deliverables_sorts_by_hadm_id_and_writes_without_index(se
             self.assertEqual(list(base_admissions["hadm_id"]), [101, 103])
             self.assertNotIn("Unnamed: 0", base_admissions.columns)
 
-    def test_write_minimal_deliverables_raises_when_required_artifact_is_missing(self):
+    def test_write_minimal_deliverables_skips_missing_artifacts_without_crashing(self):
         write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
         artifacts = {
             "base_admissions": pd.DataFrame([{"hadm_id": 101}]),
@@ -2491,8 +2556,11 @@ def test_write_minimal_deliverables_raises_when_required_artifact_is_missing(sel
         }
 
         with tempfile.TemporaryDirectory() as temp_dir:
-            with self.assertRaises(KeyError):
-                write_minimal_deliverables(artifacts, Path(temp_dir))
+            output_dir = Path(temp_dir)
+            write_minimal_deliverables(artifacts, output_dir)
+            self.assertTrue((output_dir / "base_admissions.csv").exists())
+            self.assertTrue((output_dir / "acuity_scores.csv").exists())
+            self.assertFalse((output_dir / "final_model_table.csv").exists())
 
     def test_write_minimal_deliverables_sorts_nullable_integer_hadm_ids(self):
         write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
@@ -2723,14 +2791,14 @@ def test_data_contract_write_minimal_deliverables_round_trip_preserves_columns_a
             feature_matrix=feature_matrix,
             note_labels=note_labels,
             note_corpus=note_corpus,
-            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8, 0.1]),
+            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8, 0.1, 0.4, 0.3]),
             sentiment_fn=lambda text: (
                 {
                     "Patient refuses treatment and was noncompliant with medication. Date:[**5-1-18**] Autopsy was discussed with the family.": -0.5,
                     "Cooperative patient. Follows commands.": 0.0,
                     "Patient remains nonadherent with follow up plan.": -0.2,
                     "": 0.0,
-                }[text],
+                }.get(text, 0.0),
                 0.0,
             ),
         )
@@ -2817,14 +2885,14 @@ def test_end_to_end_artifact_assembly_smoke_spec(self):
             feature_matrix=feature_matrix,
             note_labels=note_labels,
             note_corpus=note_corpus,
-            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8, 0.1]),
+            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8, 0.1, 0.4, 0.3]),
             sentiment_fn=lambda text: (
                 {
                     "Patient refuses treatment and was noncompliant with medication. Date:[**5-1-18**] Autopsy was discussed with the family.": -0.5,
                     "Cooperative patient. Follows commands.": 0.0,
                     "Patient remains nonadherent with follow up plan.": -0.2,
                     "": 0.0,
-                }[text],
+                }.get(text, 0.0),
                 0.0,
             ),
         )
diff --git a/tests/core/test_eol_mistrust_model.py b/tests/core/test_eol_mistrust_model.py
index d70433388..0630496a9 100644
--- a/tests/core/test_eol_mistrust_model.py
+++ b/tests/core/test_eol_mistrust_model.py
@@ -335,6 +335,7 @@ def predict_proba(self, X):
         self.assertEqual(len(created), 1)
         self.assertIs(estimator, created[0])
         self.assertEqual(created[0].kwargs.get("penalty"), "l1")
+        self.assertEqual(created[0].kwargs.get("C"), 0.1)
         self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
         self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
         self.assertEqual(len(created[0].fit_X), len(self.feature_matrix))
@@ -723,6 +724,21 @@ def test_run_race_based_treatment_analysis_missing_columns_raise(self):
                 self.treatment_totals,
             )
 
+    def test_run_race_based_treatment_analysis_by_acuity_partitions_each_treatment_into_three_bins(self):
+        run_race_based_treatment_analysis_by_acuity = self._get_callable(
+            "run_race_based_treatment_analysis_by_acuity"
+        )
+        results = run_race_based_treatment_analysis_by_acuity(
+            self.eol_cohort,
+            self.treatment_totals,
+            self.acuity_scores,
+        )
+        self.assertEqual(results.shape[0], 6)
+        self.assertEqual(set(results["treatment"]), {"total_vent_min", "total_vaso_min"})
+        self.assertEqual(set(results["severity_bin"]), {"low", "medium", "high"})
+        counts = results.groupby("treatment")["severity_bin"].nunique().to_dict()
+        self.assertEqual(counts, {"total_vent_min": 3, "total_vaso_min": 3})
+
     def test_run_trust_based_treatment_analysis_uses_explicit_group_size_and_tie_breaks_by_hadm_id(self):
         run_trust_based_treatment_analysis = self._get_callable("run_trust_based_treatment_analysis")
         eol = pd.DataFrame(
@@ -815,6 +831,22 @@ def test_run_trust_based_treatment_analysis_handles_invalid_group_sizes_and_full
         row = valid.iloc[0]
         self.assertAlmostEqual(float(row["median_gap"]), float(row["median_high"]) - float(row["median_low"]))
 
+    def test_run_trust_based_treatment_analysis_by_acuity_returns_metric_treatment_bin_rows(self):
+        run_trust_based_treatment_analysis_by_acuity = self._get_callable(
+            "run_trust_based_treatment_analysis_by_acuity"
+        )
+        results = run_trust_based_treatment_analysis_by_acuity(
+            self.eol_cohort,
+            self.final_model_table[["hadm_id", "noncompliance_score_z"]],
+            self.treatment_totals,
+            self.acuity_scores,
+            score_columns=["noncompliance_score_z"],
+        )
+        self.assertEqual(results.shape[0], 6)
+        self.assertEqual(set(results["metric"]), {"noncompliance_score_z"})
+        self.assertEqual(set(results["treatment"]), {"total_vent_min", "total_vaso_min"})
+        self.assertEqual(set(results["severity_bin"]), {"low", "medium", "high"})
+
     def test_run_acuity_control_analysis_returns_pairwise_correlations(self):
         run_acuity_control_analysis = self._get_callable("run_acuity_control_analysis")
         mistrust_scores = pd.DataFrame(
@@ -932,6 +964,7 @@ def _auc_fn(y_true, y_prob):
 
         self.assertEqual(split_calls[0]["n_rows"], 4)
         self.assertEqual(created[0].kwargs.get("penalty"), "l1")
+        self.assertEqual(created[0].kwargs.get("C"), 0.1)
         self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
         self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
         self.assertEqual(auc_calls[0]["y_prob"], [0.1, 0.9])
@@ -1061,7 +1094,9 @@ def test_run_full_eol_mistrust_modeling_returns_expected_sections(self):
                 "feature_weight_summaries",
                 "race_gap_results",
                 "race_treatment_results",
+                "race_treatment_by_acuity_results",
                 "trust_treatment_results",
+                "trust_treatment_by_acuity_results",
                 "acuity_correlations",
                 "downstream_auc_results",
             },
diff --git a/tests/core/test_eol_mistrust_module.py b/tests/core/test_eol_mistrust_module.py
index 756a7e204..7a44dc28c 100644
--- a/tests/core/test_eol_mistrust_module.py
+++ b/tests/core/test_eol_mistrust_module.py
@@ -46,7 +46,7 @@ def setUpClass(cls):
         cls.module = _load_eol_mistrust_module()
 
     def setUp(self):
-        self.all_hadm_ids = [302, 303, 304, 305, 306]
+        self.all_hadm_ids = [301, 302, 303, 304, 305, 306]
         self.admissions = pd.DataFrame(
             [
                 {
@@ -202,7 +202,7 @@ def setUp(self):
                 {
                     "hadm_id": 302,
                     "category": "Nursing",
-                    "text": "Patient was NON-COMPLIAN with care plan. AUTOPSY discussed.",
+                    "text": "Patient was NONCOMPLIANT with care plan. Family provided AUTOPSY consent.",
                     "iserror": 0,
                 },
                 {
@@ -441,8 +441,8 @@ def _required_downstream_feature_configs(self):
     def _build_mistrust_scores(self):
         build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
         probability_sequences = [
-            [0.90, 0.10, 0.80, 0.20, 0.40],
-            [0.70, 0.20, 0.30, 0.60, 0.50],
+            [0.05, 0.90, 0.10, 0.80, 0.20, 0.40],
+            [0.15, 0.70, 0.20, 0.30, 0.60, 0.50],
         ]
         created = []
 
@@ -452,7 +452,7 @@ def estimator_factory():
             return estimator
 
         sentiment_map = {
-            "Patient was NON-COMPLIAN with care plan. AUTOPSY discussed.": -0.6,
+            "Patient was NONCOMPLIANT with care plan. Family provided AUTOPSY consent.": -0.6,
             "Patient remained non-adher with follow up after counseling.": -0.2,
             "Patient refuses medication.": 0.1,
             "Patient refused treatment. Date:[**5-1-18**]": -0.4,
@@ -472,10 +472,9 @@ def _assert_hadm_unique(self, df, message):
         self.assertIn("hadm_id", df.columns, msg=f"{message} must include hadm_id")
         self.assertTrue(df["hadm_id"].is_unique, msg=f"{message} must be unique on hadm_id")
 
-    def test_all_cohort_contains_distinct_hadm_ids_with_icu_stay_ge_12h(self):
+    def test_all_cohort_contains_distinct_hadm_ids_with_any_icu_stay(self):
         all_cohort = self._build_all()
         self.assertEqual(set(all_cohort["hadm_id"]), set(self.all_hadm_ids))
-        self.assertNotIn(301, set(all_cohort["hadm_id"]))
         self.assertNotIn(307, set(all_cohort["hadm_id"]))
         self._assert_hadm_unique(all_cohort, "ALL cohort")
 
@@ -486,14 +485,12 @@ def test_all_cohort_size_is_within_expected_mimic_range(self):
 
     def test_eol_cohort_applies_los_and_discharge_criteria(self):
         eol = self._build_eol()
-        self.assertEqual(set(eol["hadm_id"]), {302, 303, 304})
+        self.assertEqual(set(eol["hadm_id"]), {302})
         by_hadm = eol.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[302, "discharge_category"], "Hospice")
-        self.assertEqual(by_hadm.loc[303, "discharge_category"], "Skilled Nursing Facility")
-        self.assertEqual(by_hadm.loc[304, "discharge_category"], "Deceased")
         self._assert_hadm_unique(eol, "EOL cohort")
 
-    def test_eol_cohort_enforces_exact_six_hour_boundary(self):
+    def test_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
         build_eol_cohort = self._get_callable("build_eol_cohort")
@@ -503,7 +500,7 @@ def test_eol_cohort_enforces_exact_six_hour_boundary(self):
                     "hadm_id": 920,
                     "subject_id": 920,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-01 06:00:00",
+                    "dischtime": "2100-09-02 00:00:00",
                     "ethnicity": "WHITE",
                     "insurance": "Medicare",
                     "discharge_location": "HOME HOSPICE",
@@ -514,7 +511,7 @@ def test_eol_cohort_enforces_exact_six_hour_boundary(self):
                     "hadm_id": 921,
                     "subject_id": 921,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-01 05:59:00",
+                    "dischtime": "2100-09-02 00:01:00",
                     "ethnicity": "BLACK/AFRICAN AMERICAN",
                     "insurance": "Private",
                     "discharge_location": "HOME HOSPICE",
@@ -533,8 +530,8 @@ def test_eol_cohort_enforces_exact_six_hour_boundary(self):
         base = build_base_admissions(admissions, patients)
         demographics = build_demographics_table(base)
         eol = build_eol_cohort(base, demographics)
-        self.assertIn(920, set(eol["hadm_id"]))
-        self.assertNotIn(921, set(eol["hadm_id"]))
+        self.assertNotIn(920, set(eol["hadm_id"]))
+        self.assertIn(921, set(eol["hadm_id"]))
 
     def test_eol_cohort_size_is_within_expected_mimic_range(self):
         self._pending_real_data(
@@ -574,7 +571,7 @@ def test_mistrust_scores_trained_on_all_can_merge_into_eol_by_hadm_id(self):
         scores, created = self._build_mistrust_scores()
         merged = eol[["hadm_id"]].merge(scores, on="hadm_id", how="left")
         self.assertEqual(len(created), 2)
-        self.assertEqual(set(merged["hadm_id"]), {302, 303, 304})
+        self.assertEqual(set(merged["hadm_id"]), set(eol["hadm_id"]))
         self.assertTrue(
             merged[
                 [
@@ -700,7 +697,7 @@ def test_note_aggregation_keeps_non_error_notes_and_concatenates_per_admission(s
         note_corpus = self._build_note_corpus()
         self._assert_hadm_unique(note_corpus, "Note corpus")
         by_hadm = note_corpus.set_index("hadm_id")
-        self.assertIn("AUTOPSY discussed.", by_hadm.loc[302, "note_text"])
+        self.assertIn("AUTOPSY consent.", by_hadm.loc[302, "note_text"])
         self.assertIn("non-adher", by_hadm.loc[303, "note_text"])
         self.assertNotIn("Autopsy requested.", by_hadm.loc[304, "note_text"])
         self.assertEqual(by_hadm.loc[306, "note_text"], "")
@@ -748,19 +745,34 @@ def test_total_note_count_exceeds_expected_reference_scale(self):
             "The raw clinical note corpus used for note aggregation should contain at least about 800,000 notes on real MIMIC-III data."
         )
 
-    def test_noncompliance_label_matches_all_required_substrings_case_insensitively(self):
+    def test_noncompliance_label_matches_only_noncompliant_case_insensitively(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        phrases = [
+            "noncompliant",
+        ]
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": index + 1,
+                    "category": "Nursing",
+                    "text": f"Patient documented as {phrase.upper()} during stay.",
+                    "iserror": 0,
+                }
+                for index, phrase in enumerate(phrases)
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        for hadm_id in range(1, len(phrases) + 1):
+            self.assertEqual(labels.loc[hadm_id, "noncompliance_label"], 1)
+
+    def test_noncompliance_label_does_not_fire_on_hyphenated_refusal_or_noncompliance_variants(self):
         build_note_labels = self._get_callable("build_note_labels")
         phrases = [
-            "noncomplian",
             "non-complian",
-            "nonadher",
             "non-adher",
             "refuses medication",
-            "refused medication",
-            "refuses treatment",
             "refused treatment",
             "noncompliance",
-            "noncompliant",
         ]
         notes = pd.DataFrame(
             [
@@ -775,24 +787,41 @@ def test_noncompliance_label_matches_all_required_substrings_case_insensitively(
         )
         labels = build_note_labels(notes).set_index("hadm_id")
         for hadm_id in range(1, len(phrases) + 1):
-            self.assertEqual(labels.loc[hadm_id, "noncompliance_label"], 1)
+            self.assertEqual(labels.loc[hadm_id, "noncompliance_label"], 0)
 
     def test_noncompliance_positive_rate_is_within_expected_range(self):
         self._pending_real_data(
             "Noncompliance label prevalence on real data should be between 1% and 30%."
         )
 
-    def test_autopsy_label_uses_case_insensitive_matching(self):
+    def test_autopsy_label_distinguishes_consent_decline_and_ambiguous_mentions(self):
         build_note_labels = self._get_callable("build_note_labels")
         notes = pd.DataFrame(
             [
-                {"hadm_id": 1, "category": "Nursing", "text": "AUTOPSY was discussed.", "iserror": 0},
-                {"hadm_id": 2, "category": "Nursing", "text": "No mention here.", "iserror": 0},
+                {
+                    "hadm_id": 1,
+                    "category": "Nursing",
+                    "text": "AUTOPSY consent obtained and autopsy was performed.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 2,
+                    "category": "Nursing",
+                    "text": "Autopsy declined by family. No autopsy will be performed.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 3,
+                    "category": "Nursing",
+                    "text": "Autopsy was discussed with the family.",
+                    "iserror": 0,
+                },
             ]
         )
         labels = build_note_labels(notes).set_index("hadm_id")
         self.assertEqual(labels.loc[1, "autopsy_label"], 1)
         self.assertEqual(labels.loc[2, "autopsy_label"], 0)
+        self.assertEqual(labels.loc[3, "autopsy_label"], 0)
 
     def test_autopsy_positive_rate_is_within_expected_range(self):
         self._pending_real_data(
@@ -837,6 +866,7 @@ def predict_proba(self, X):
             build_proxy_probability_scores(feature_matrix, labels, "noncompliance_label")
 
         self.assertEqual(created[0].get("penalty"), "l1")
+        self.assertEqual(created[0].get("C"), 0.1)
         self.assertEqual(created[0].get("solver"), "liblinear")
         self.assertEqual(created[0].get("max_iter"), 1000)
 
@@ -866,12 +896,13 @@ def predict_proba(self, X):
             build_proxy_probability_scores(feature_matrix, labels, "autopsy_label")
 
         self.assertEqual(created[0].get("penalty"), "l1")
+        self.assertEqual(created[0].get("C"), 0.1)
         self.assertEqual(created[0].get("solver"), "liblinear")
         self.assertEqual(created[0].get("max_iter"), 1000)
 
     def test_proxy_models_fit_on_full_all_cohort_without_train_test_split(self):
         build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
-        estimator = _FakeProbEstimator([0.9, 0.2, 0.8, 0.1, 0.4])
+        estimator = _FakeProbEstimator([0.9, 0.2, 0.8, 0.1, 0.4, 0.3])
         feature_matrix = self._build_feature_matrix()
         labels = self._build_note_labels()
         scores = build_proxy_probability_scores(
@@ -1141,7 +1172,7 @@ def test_acuity_scores_merge_to_mistrust_scores_by_hadm_id(self):
         acuity = build_acuity_scores(self.oasis, self.sapsii)
         scores, _ = self._build_mistrust_scores()
         merged = scores.merge(acuity, on="hadm_id", how="inner")
-        self.assertEqual(set(merged["hadm_id"]), set(self.all_hadm_ids))
+        self.assertEqual(set(merged["hadm_id"]), set(acuity["hadm_id"]))
         self.assertTrue({"oasis", "sapsii"}.issubset(merged.columns))
 
     def test_acuity_aggregation_rule_is_deterministic_for_multiple_icu_stays(self):
@@ -1513,10 +1544,24 @@ def test_model_artifacts_are_unique_and_aligned_on_hadm_id(self):
             include_race=True,
             include_mistrust=True,
         )
-        expected_hadm_ids = set(all_cohort["hadm_id"])
-        for artifact in [feature_matrix, note_labels, note_corpus, mistrust_scores, acuity, final]:
+        expected_hadm_ids = {
+            "feature_matrix": set(all_cohort["hadm_id"]),
+            "note_labels": set(all_cohort["hadm_id"]),
+            "note_corpus": set(all_cohort["hadm_id"]),
+            "mistrust_scores": set(all_cohort["hadm_id"]),
+            "acuity": set(acuity["hadm_id"]),
+            "final": set(final["hadm_id"]),
+        }
+        for name, artifact in [
+            ("feature_matrix", feature_matrix),
+            ("note_labels", note_labels),
+            ("note_corpus", note_corpus),
+            ("mistrust_scores", mistrust_scores),
+            ("acuity", acuity),
+            ("final", final),
+        ]:
             self._assert_hadm_unique(artifact, "Artifact")
-            self.assertEqual(set(artifact["hadm_id"]), expected_hadm_ids)
+            self.assertEqual(set(artifact["hadm_id"]), expected_hadm_ids[name])
 
 
 if __name__ == "__main__":

From 5af31e1f2da4d0b6b80ebb52c63268a62fed5ab8 Mon Sep 17 00:00:00 2001
From: aaronx2-illinois <aaronx2@illinois.edu>
Date: Fri, 10 Apr 2026 20:19:50 -0600
Subject: [PATCH 4/7] CommitName :Refactor EOL mistrust tests and add task
 tests CommitMsg: 1.Update test_eol_mistrust_module.py for new scoring logic
 and data handling 2.Add assertions for discharge categories and sentiment
 analysis 3.Add test_eol_mistrust_task.py for task module coverage 4.Test code
 status target building and task mapping consistency 5.Use dummy patient and
 event classes for test cases

---
 examples/eol_mistrust.py                      | 1950 +++++++++++-
 pyhealth/datasets/eol_mistrust.py             | 1959 ++++++++----
 pyhealth/models/eol_mistrust.py               | 1206 +++++++-
 pyhealth/tasks/eol_mistrust.py                |  350 ++-
 tests/core/test_eol_mistrust_Integration.py   | 2700 ++++++++++++++++-
 ...test_eol_mistrust_TrainingAndEvaluation.py |   15 +-
 tests/core/test_eol_mistrust_dataset.py       | 1401 ++++++---
 tests/core/test_eol_mistrust_model.py         |  443 ++-
 tests/core/test_eol_mistrust_module.py        |  168 +-
 tests/core/test_eol_mistrust_task.py          |  178 ++
 10 files changed, 9062 insertions(+), 1308 deletions(-)
 create mode 100644 tests/core/test_eol_mistrust_task.py

diff --git a/examples/eol_mistrust.py b/examples/eol_mistrust.py
index 47f70158e..d8e3aac69 100644
--- a/examples/eol_mistrust.py
+++ b/examples/eol_mistrust.py
@@ -1,9 +1,9 @@
-"""Example workflow for the EOL mistrust study pipeline.
+r"""Example workflow for the EOL mistrust study pipeline.
 
 This script assumes you have already exported and combined the required MIMIC-III
 tables into a local directory such as:
 
-    downloads/eol_mistrust_required_combined/
+    EOL_Workspace/eol_mistrust_required_combined/
         mimiciii_clinical/
         mimiciii_notes/
         mimiciii_derived/
@@ -15,40 +15,115 @@
 
 Implementation note: the sentiment metric in this repo uses the existing
 transformers+torch stack rather than the original Pattern backend from the
-reference notebooks. The example therefore builds the sentiment corpus from
-`Discharge summary` notes only, while label extraction still uses all non-error
+reference notebooks. The example still follows the paper-style note scope by
+building both the sentiment corpus and note-derived labels from all non-error
 notes.
+
+Recommended commands
+--------------------
+Formal managed runs (recommended)
+
+The script now creates a managed run archive under
+``EOL_Workspace/EOL_Result/EOL_(normal|Paperlike)_<timestamp>/``.
+When ``--output-dir`` and ``--stream-cache-dir`` are omitted, deliverables,
+runtime files, and stage cache directories are created automatically inside
+that managed run folder.
+
+Default / corrected pipeline
+
+Formal cold-start run:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --repetitions 10
+
+Formal smoke run:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --repetitions 1
+
+Paper-like dataset preparation
+
+Formal cold-start run:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --paper-like-dataset-prepare --repetitions 10
+
+Formal smoke run:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --paper-like-dataset-prepare --repetitions 1
+
+Optional fast reruns with shared cache
+
+Default / corrected pipeline:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --repetitions 10
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --repetitions 1
+
+Paper-like dataset preparation:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --paper-like-dataset-prepare --repetitions 10
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --paper-like-dataset-prepare --repetitions 1
+
+Optional custom managed-run archive root:
+.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --result-root EOL_Workspace\EOL_Result --compare-to-paper --repetitions 10
+
+
+
 """
 
 from __future__ import annotations
 
 import argparse
+import importlib.util
+import json
+import sys
 import tempfile
+import time
+from datetime import datetime
 from pathlib import Path
 
 import pandas as pd
 
-from pyhealth.datasets import MIMIC3Dataset
-from pyhealth.datasets.eol_mistrust import (
-    build_acuity_scores,
-    build_all_cohort,
-    build_base_admissions,
-    build_chartevent_artifacts_from_csv,
-    build_demographics_table,
-    build_eol_cohort,
-    build_final_model_table_from_code_status_targets,
-    build_note_corpus_from_csv,
-    build_note_labels_from_csv,
-    build_treatment_totals,
-    write_minimal_deliverables,
+REPO_ROOT = Path(__file__).resolve().parents[1]
+DEFAULT_DATA_ROOT = REPO_ROOT / "EOL_Workspace" / "eol_mistrust_required_combined"
+DEFAULT_CONFIG_PATH = REPO_ROOT / "pyhealth" / "datasets" / "configs" / "eol_mistrust.yaml"
+DEFAULT_RESULT_ROOT = REPO_ROOT / "EOL_Workspace" / "EOL_Result"
+
+
+def _load_local_module(module_name: str, relative_path: str):
+    module_path = REPO_ROOT / relative_path
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Unable to load module {module_name} from {module_path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+_DATASET_MODULE = _load_local_module(
+    "pyhealth_datasets_eol_mistrust_example_local",
+    "pyhealth/datasets/eol_mistrust.py",
+)
+_MODEL_MODULE = _load_local_module(
+    "pyhealth_models_eol_mistrust_example_local",
+    "pyhealth/models/eol_mistrust.py",
 )
-from pyhealth.models.eol_mistrust import EOLMistrustModel
-from pyhealth.tasks.eol_mistrust import EOLMistrustMortalityPredictionMIMIC3
 
+build_acuity_scores = _DATASET_MODULE.build_acuity_scores
+build_all_cohort = _DATASET_MODULE.build_all_cohort
+build_base_admissions = _DATASET_MODULE.build_base_admissions
+build_chartevent_artifacts_from_csv = _DATASET_MODULE.build_chartevent_artifacts_from_csv
+build_demographics_table = _DATASET_MODULE.build_demographics_table
+build_eol_cohort = _DATASET_MODULE.build_eol_cohort
+build_final_model_table_from_code_status_targets = _DATASET_MODULE.build_final_model_table_from_code_status_targets
+build_note_corpus_from_csv = _DATASET_MODULE.build_note_corpus_from_csv
+build_note_labels_from_csv = _DATASET_MODULE.build_note_labels_from_csv
+build_treatment_totals = _DATASET_MODULE.build_treatment_totals
+validate_database_environment = _DATASET_MODULE.validate_database_environment
+write_minimal_deliverables = _DATASET_MODULE.write_minimal_deliverables
 
-REPO_ROOT = Path(__file__).resolve().parents[1]
-DEFAULT_DATA_ROOT = REPO_ROOT / "downloads" / "eol_mistrust_required_combined"
-DEFAULT_CONFIG_PATH = REPO_ROOT / "pyhealth" / "datasets" / "configs" / "eol_mistrust.yaml"
+EOLMistrustModel = _MODEL_MODULE.EOLMistrustModel
+evaluate_downstream_average_weights = _MODEL_MODULE.evaluate_downstream_average_weights
+build_autopsy_mistrust_scores = _MODEL_MODULE.build_autopsy_mistrust_scores
+build_logistic_cv_estimator_factory = _MODEL_MODULE.build_logistic_cv_estimator_factory
+build_negative_sentiment_mistrust_scores = _MODEL_MODULE.build_negative_sentiment_mistrust_scores
+build_noncompliance_mistrust_scores = _MODEL_MODULE.build_noncompliance_mistrust_scores
+get_downstream_feature_configurations = _MODEL_MODULE.get_downstream_feature_configurations
+z_normalize_scores = _MODEL_MODULE.z_normalize_scores
+
+MIMIC3Dataset = None
+EOLMistrustMortalityPredictionMIMIC3 = None
 
 RAW_TABLE_PATHS = {
     "admissions": "mimiciii_clinical/admissions.csv",
@@ -69,6 +144,210 @@
     "sapsii": "mimiciii_derived/sapsii.csv",
 }
 
+VALIDATION_EVENT_PROBE_ROWS = 50_000
+
+PAPER_URL = "https://proceedings.mlr.press/v85/boag18a.html"
+PAPER_PDF_URL = "https://proceedings.mlr.press/v85/boag18a/boag18a.pdf"
+
+PAPER_TABLE1_COUNTS = {
+    "Population Size": {"BLACK": 1214, "WHITE": 9987},
+    "Insurance Private": {"BLACK": 141, "WHITE": 1594},
+    "Insurance Public": {"BLACK": 1062, "WHITE": 8356},
+    "Insurance Self-Pay": {"BLACK": 11, "WHITE": 37},
+    "Discharge Deceased": {"BLACK": 401, "WHITE": 3869},
+    "Discharge Hospice": {"BLACK": 40, "WHITE": 421},
+    "Discharge Skilled Nursing Facility": {"BLACK": 773, "WHITE": 5697},
+    "Gender F": {"BLACK": 733, "WHITE": 5012},
+    "Gender M": {"BLACK": 481, "WHITE": 4975},
+}
+
+PAPER_TABLE1_CONTINUOUS = {
+    "Length of stay (median days)": {
+        "BLACK": {"center": 13.90, "lower": 5.55, "upper": 19.56},
+        "WHITE": {"center": 14.08, "lower": 6.45, "upper": 19.45},
+    },
+    "Age (median years)": {
+        "BLACK": {"center": 71.31, "lower": 60.21, "upper": 80.36},
+        "WHITE": {"center": 77.87, "lower": 66.61, "upper": 84.93},
+    },
+}
+
+PAPER_TABLE2_TREATMENT = {
+    "total_vent_min": {
+        "n_black": 510,
+        "n_white": 4810,
+        "median_black": 3180.0,
+        "median_white": 2520.0,
+        "pvalue": 0.005,
+    },
+    "total_vaso_min": {
+        "n_black": 453,
+        "n_white": 4456,
+        "median_black": 2046.0,
+        "median_white": 1770.0,
+        "pvalue": 0.12,
+    },
+}
+
+PAPER_TABLE3_WEIGHTS = {
+    "noncompliance": {
+        "positive": [
+            ("riker-sas scale: agitated", 0.7013),
+            ("education readiness: no", 0.2540),
+            ("pain level: 7-mod to severe", 0.2168),
+        ],
+        "negative": [
+            ("state: alert", -1.0156),
+            ("pain: none", -0.5427),
+            ("richmond-ras scale: 0 alert and calm", -0.3598),
+        ],
+    },
+    "autopsy": {
+        "positive": [
+            ("reapplied restraints", 0.1153),
+            ("restraint type: soft limb", 0.0980),
+            ("orientation: oriented 3x", 0.0363),
+        ],
+        "negative": [
+            ("pain present: no", -0.2689),
+            ("spokesperson is healthcare proxy", -0.2271),
+            ("family communication: talked to m.d.", -0.1184),
+        ],
+    },
+}
+
+PAPER_TABLE3_FEATURE_ALIASES = {
+    "autopsy": {
+        "reapplied restraints": (
+            "restraints evaluated: restraintreapply",
+            "restraints evaluated: reapplied",
+            "restraints evaluated v1: restraint reapplied",
+            "restraints evaluated v2: reapplied",
+        ),
+        "orientation: oriented 3x": (
+            "orientation: oriented x 3",
+            "orientation: oriented x3",
+        ),
+        "spokesperson is healthcare proxy": (
+            "is the spokesperson the health care proxy: 1",
+        ),
+        "family communication: talked to m.d.": (
+            "family communication: family talked to md",
+            "family communication: fam talked to md",
+        ),
+    },
+}
+
+PAPER_TABLE4_CORRELATIONS = {
+    tuple(sorted(("oasis", "sapsii"))): 0.679,
+    tuple(sorted(("oasis", "noncompliance_score_z"))): 0.050,
+    tuple(sorted(("oasis", "autopsy_score_z"))): -0.012,
+    tuple(sorted(("oasis", "negative_sentiment_score_z"))): 0.075,
+    tuple(sorted(("sapsii", "noncompliance_score_z"))): 0.013,
+    tuple(sorted(("sapsii", "autopsy_score_z"))): -0.013,
+    tuple(sorted(("sapsii", "negative_sentiment_score_z"))): 0.086,
+    tuple(sorted(("noncompliance_score_z", "autopsy_score_z"))): 0.262,
+    tuple(sorted(("noncompliance_score_z", "negative_sentiment_score_z"))): 0.058,
+    tuple(sorted(("autopsy_score_z", "negative_sentiment_score_z"))): 0.044,
+}
+
+PAPER_TABLE5_AUC = {
+    ("Left AMA", "Baseline"): {"n_rows": 48071, "auc_mean": 0.859, "auc_std": 0.014},
+    ("Left AMA", "Baseline + Race"): {"n_rows": 48071, "auc_mean": 0.861, "auc_std": 0.014},
+    ("Left AMA", "Baseline + Noncompliant"): {"n_rows": 48071, "auc_mean": 0.869, "auc_std": 0.012},
+    ("Left AMA", "Baseline + Autopsy"): {"n_rows": 48071, "auc_mean": 0.861, "auc_std": 0.012},
+    ("Left AMA", "Baseline + Neg-Sentiment"): {"n_rows": 48071, "auc_mean": 0.859, "auc_std": 0.013},
+    ("Left AMA", "Baseline + ALL"): {"n_rows": 48071, "auc_mean": 0.873, "auc_std": 0.012},
+    ("Code Status", "Baseline"): {"n_rows": 39815, "auc_mean": 0.763, "auc_std": 0.013},
+    ("Code Status", "Baseline + Race"): {"n_rows": 39815, "auc_mean": 0.766, "auc_std": 0.014},
+    ("Code Status", "Baseline + Noncompliant"): {"n_rows": 39815, "auc_mean": 0.767, "auc_std": 0.013},
+    ("Code Status", "Baseline + Autopsy"): {"n_rows": 39815, "auc_mean": 0.773, "auc_std": 0.011},
+    ("Code Status", "Baseline + Neg-Sentiment"): {"n_rows": 39815, "auc_mean": 0.765, "auc_std": 0.014},
+    ("Code Status", "Baseline + ALL"): {"n_rows": 39815, "auc_mean": 0.782, "auc_std": 0.012},
+    ("In-hospital mortality", "Baseline"): {"n_rows": 48071, "auc_mean": 0.600, "auc_std": 0.011},
+    ("In-hospital mortality", "Baseline + Race"): {"n_rows": 48071, "auc_mean": 0.614, "auc_std": 0.011},
+    ("In-hospital mortality", "Baseline + Noncompliant"): {"n_rows": 48071, "auc_mean": 0.614, "auc_std": 0.010},
+    ("In-hospital mortality", "Baseline + Autopsy"): {"n_rows": 48071, "auc_mean": 0.603, "auc_std": 0.012},
+    ("In-hospital mortality", "Baseline + Neg-Sentiment"): {"n_rows": 48071, "auc_mean": 0.615, "auc_std": 0.010},
+    ("In-hospital mortality", "Baseline + ALL"): {"n_rows": 48071, "auc_mean": 0.635, "auc_std": 0.010},
+}
+
+PAPER_TABLE6_WEIGHTS = {
+    "Left AMA": {
+        "noncompliant": (0.52, 0.09),
+        "autopsy": (0.01, 0.03),
+        "negative sentiment": (0.00, 0.02),
+        "race: asian": (0.00, 0.00),
+        "race: black": (0.03, 0.12),
+        "race: hispanic": (0.00, 0.00),
+        "race: other": (-0.15, 0.19),
+        "race: white": (-0.02, 0.06),
+        "race: native american": (0.00, 0.00),
+        "gender: male": (0.00, 0.00),
+        "gender: female": (-0.40, 0.20),
+        "insurance: private": (-1.01, 0.21),
+        "insurance: public": (0.00, 0.00),
+        "insurance: self-pay": (0.00, 0.00),
+        "length-of-stay": (-1.44, 0.37),
+        "age": (-2.10, 0.21),
+    },
+    "Code Status": {
+        "noncompliant": (0.27, 0.04),
+        "autopsy": (-0.44, 0.05),
+        "negative sentiment": (0.09, 0.03),
+        "race: asian": (0.00, 0.00),
+        "race: black": (-0.22, 0.19),
+        "race: hispanic": (-0.17, 0.21),
+        "race: other": (-0.12, 0.17),
+        "race: white": (0.06, 0.15),
+        "race: native american": (0.00, 0.00),
+        "gender: male": (-0.85, 1.40),
+        "gender: female": (-0.49, 1.39),
+        "insurance: private": (-0.94, 0.29),
+        "insurance: public": (-0.02, 0.28),
+        "insurance: self-pay": (-0.02, 0.24),
+        "length-of-stay": (-0.70, 0.10),
+        "age": (0.42, 0.02),
+    },
+    "In-hospital mortality": {
+        "noncompliant": (0.16, 0.03),
+        "autopsy": (0.02, 0.02),
+        "negative sentiment": (0.16, 0.03),
+        "race: asian": (-0.05, 0.03),
+        "race: black": (-0.53, 0.31),
+        "race: hispanic": (-0.58, 0.34),
+        "race: other": (0.15, 0.30),
+        "race: white": (-0.26, 0.30),
+        "race: native american": (0.00, 0.00),
+        "gender: male": (-0.67, 0.99),
+        "gender: female": (-0.59, 0.99),
+        "insurance: private": (-0.96, 0.95),
+        "insurance: public": (-0.50, 0.95),
+        "insurance: self-pay": (-0.21, 0.68),
+        "length-of-stay": (0.08, 0.03),
+        "age": (0.20, 0.02),
+    },
+}
+
+TABLE6_FEATURE_NAME_MAP = {
+    "noncompliance_score_z": "noncompliant",
+    "autopsy_score_z": "autopsy",
+    "negative_sentiment_score_z": "negative sentiment",
+    "race_asian": "race: asian",
+    "race_black": "race: black",
+    "race_hispanic": "race: hispanic",
+    "race_other": "race: other",
+    "race_white": "race: white",
+    "race_native_american": "race: native american",
+    "gender_m": "gender: male",
+    "gender_f": "gender: female",
+    "insurance_private": "insurance: private",
+    "insurance_public": "insurance: public",
+    "insurance_self_pay": "insurance: self-pay",
+    "los_days": "length-of-stay",
+    "age": "age",
+}
+
 
 def _read_csvs(root: Path, path_map: dict[str, str]) -> dict[str, pd.DataFrame]:
     tables: dict[str, pd.DataFrame] = {}
@@ -92,24 +371,1411 @@ def load_eol_mistrust_tables(
     return raw_tables, materialized_views
 
 
+def _read_csv_probe(
+    root: Path,
+    relative_path: str,
+    *,
+    nrows: int = VALIDATION_EVENT_PROBE_ROWS,
+) -> pd.DataFrame:
+    """Load a lightweight probe frame for validation of large event CSVs."""
+
+    csv_path = root / relative_path
+    if not csv_path.exists():
+        raise FileNotFoundError(f"Missing required table for EOL example: {csv_path}")
+    table = pd.read_csv(csv_path, low_memory=False, nrows=nrows)
+    table.columns = [str(column).lower() for column in table.columns]
+    return table
+
+
+def _canonical_pair(left: str, right: str) -> tuple[str, str]:
+    return tuple(sorted((str(left), str(right))))
+
+
+def _has_columns(frame: object, required_columns: set[str]) -> bool:
+    """Return True when *frame* is a DataFrame containing all required columns."""
+
+    return isinstance(frame, pd.DataFrame) and required_columns.issubset(frame.columns)
+
+
+def _format_count_percent(count: int, total: int) -> str:
+    if total <= 0:
+        return str(int(count))
+    return f"{int(count)} ({100.0 * float(count) / float(total):.2f}%)"
+
+
+def _format_continuous_summary(center: float, lower: float, upper: float) -> str:
+    return f"{center:.2f} [{lower:.2f}, {upper:.2f}]"
+
+
+def _note_present_hadm_ids(note_corpus: pd.DataFrame) -> list[int]:
+    """Return sorted admission ids with at least one non-empty aggregated note."""
+
+    hadm_ids = pd.to_numeric(
+        note_corpus.loc[note_corpus["note_text"].fillna("").astype(str).str.strip() != "", "hadm_id"],
+        errors="coerce",
+    )
+    return sorted(hadm_ids.dropna().astype(int).unique().tolist())
+
+
+def build_paper_table1_comparison(eol_cohort: pd.DataFrame) -> pd.DataFrame:
+    """Compare the run EOL cohort demographics against Table 1 from the paper."""
+
+    cohort = eol_cohort[eol_cohort["race"].isin(["BLACK", "WHITE"])].copy()
+    totals = {race: int((cohort["race"] == race).sum()) for race in ("BLACK", "WHITE")}
+    rows: list[dict[str, object]] = []
+
+    metric_specs = [
+        ("Population Size", None, None),
+        ("Insurance Private", "insurance_group", "Private"),
+        ("Insurance Public", "insurance_group", "Public"),
+        ("Insurance Self-Pay", "insurance_group", "Self-Pay"),
+        ("Discharge Deceased", "discharge_category", "Deceased"),
+        ("Discharge Hospice", "discharge_category", "Hospice"),
+        ("Discharge Skilled Nursing Facility", "discharge_category", "Skilled Nursing Facility"),
+        ("Gender F", "gender", "F"),
+        ("Gender M", "gender", "M"),
+    ]
+    for metric, column, target_value in metric_specs:
+        for race in ("BLACK", "WHITE"):
+            race_frame = cohort[cohort["race"] == race]
+            if column is None:
+                run_numeric = int(len(race_frame))
+                run_display = str(run_numeric)
+            else:
+                run_numeric = int((race_frame[column] == target_value).sum())
+                run_display = _format_count_percent(run_numeric, totals[race])
+            paper_numeric = int(PAPER_TABLE1_COUNTS[metric][race])
+            if column is None:
+                paper_display = str(paper_numeric)
+            else:
+                paper_display = _format_count_percent(
+                    paper_numeric,
+                    PAPER_TABLE1_COUNTS["Population Size"][race],
+                )
+            rows.append(
+                {
+                    "metric": metric,
+                    "race": race,
+                    "paper_value": paper_display,
+                    "run_value": run_display,
+                    "paper_numeric": paper_numeric,
+                    "run_numeric": run_numeric,
+                    "delta_numeric": int(run_numeric - paper_numeric),
+                }
+            )
+
+    for metric, paper_values in PAPER_TABLE1_CONTINUOUS.items():
+        for race in ("BLACK", "WHITE"):
+            race_frame = cohort[cohort["race"] == race]
+            series_name = "los_days" if metric == "Length of stay (median days)" else "age"
+            series = pd.to_numeric(race_frame[series_name], errors="coerce").dropna()
+            if series.empty:
+                run_numeric = float("nan")
+                run_lower = float("nan")
+                run_upper = float("nan")
+            else:
+                run_numeric = float(series.median())
+                run_lower = float(series.quantile(0.25))
+                run_upper = float(series.quantile(0.75))
+            paper_numeric = float(paper_values[race]["center"])
+            paper_lower = float(paper_values[race]["lower"])
+            paper_upper = float(paper_values[race]["upper"])
+            rows.append(
+                {
+                    "metric": metric,
+                    "race": race,
+                    "summary_stat": "median_iqr",
+                    "paper_value": _format_continuous_summary(paper_numeric, paper_lower, paper_upper),
+                    "run_value": _format_continuous_summary(run_numeric, run_lower, run_upper),
+                    "paper_numeric": paper_numeric,
+                    "run_numeric": run_numeric,
+                    "paper_interval_lower": paper_lower,
+                    "paper_interval_upper": paper_upper,
+                    "run_interval_lower": run_lower,
+                    "run_interval_upper": run_upper,
+                    "delta_numeric": float(run_numeric - paper_numeric),
+                }
+            )
+
+    return pd.DataFrame(rows)
+
+
+def build_paper_table2_comparison(race_treatment_results: pd.DataFrame) -> pd.DataFrame:
+    """Compare run race-based treatment durations against Table 2 / Figure 2 from the paper."""
+
+    if race_treatment_results.empty:
+        return pd.DataFrame()
+
+    rows: list[dict[str, object]] = []
+    for _, row in race_treatment_results.iterrows():
+        treatment = row["treatment"]
+        if treatment not in PAPER_TABLE2_TREATMENT:
+            continue
+        paper = PAPER_TABLE2_TREATMENT[treatment]
+        run_median_black = float(row["median_black"])
+        run_median_white = float(row["median_white"])
+        run_pvalue = float(row["pvalue"])
+        rows.append(
+            {
+                "treatment": treatment,
+                "paper_n_black": int(paper["n_black"]),
+                "run_n_black": int(row["n_black"]),
+                "paper_n_white": int(paper["n_white"]),
+                "run_n_white": int(row["n_white"]),
+                "paper_median_black": float(paper["median_black"]),
+                "run_median_black": run_median_black,
+                "delta_median_black": run_median_black - float(paper["median_black"]),
+                "paper_median_white": float(paper["median_white"]),
+                "run_median_white": run_median_white,
+                "delta_median_white": run_median_white - float(paper["median_white"]),
+                "paper_pvalue": float(paper["pvalue"]),
+                "run_pvalue": run_pvalue,
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def build_paper_table3_comparison(feature_weight_summaries: dict[str, pd.DataFrame]) -> pd.DataFrame:
+    """Compare run proxy model top-3 feature weights against Table 3 from the paper."""
+
+    rows: list[dict[str, object]] = []
+    for model_name, weights_dict in feature_weight_summaries.items():
+        if model_name not in PAPER_TABLE3_WEIGHTS:
+            continue
+        paper_model = PAPER_TABLE3_WEIGHTS[model_name]
+
+        # weights_dict may be a dict with "all"/"positive"/"negative" keys
+        if isinstance(weights_dict, dict):
+            all_weights = weights_dict.get("all")
+            if not isinstance(all_weights, pd.DataFrame) or all_weights.empty:
+                continue
+        elif isinstance(weights_dict, pd.DataFrame):
+            all_weights = weights_dict
+        else:
+            continue
+
+        if "weight" not in all_weights.columns or "feature" not in all_weights.columns:
+            continue
+
+        # Build a lookup from lowercase feature name to weight
+        run_lookup = {
+            str(f).lower().strip(): float(w)
+            for f, w in zip(all_weights["feature"], all_weights["weight"])
+        }
+        alias_lookup = {
+            str(f).lower().strip(): str(f)
+            for f in all_weights["feature"]
+        }
+        model_aliases = PAPER_TABLE3_FEATURE_ALIASES.get(model_name, {})
+
+        for direction in ("positive", "negative"):
+            for rank, (paper_feature, paper_weight) in enumerate(
+                paper_model[direction], start=1
+            ):
+                normalized_paper_feature = paper_feature.lower().strip()
+                matched_feature = alias_lookup.get(normalized_paper_feature)
+                run_weight = run_lookup.get(normalized_paper_feature, float("nan"))
+                if pd.isna(run_weight):
+                    for alias in model_aliases.get(paper_feature, ()):
+                        normalized_alias = alias.lower().strip()
+                        alias_weight = run_lookup.get(normalized_alias, float("nan"))
+                        if not pd.isna(alias_weight):
+                            run_weight = alias_weight
+                            matched_feature = alias_lookup.get(normalized_alias, alias)
+                            break
+                rows.append(
+                    {
+                        "proxy_model": model_name,
+                        "direction": direction,
+                        "rank": int(rank),
+                        "paper_feature": paper_feature,
+                        "paper_weight": float(paper_weight),
+                        "run_feature": matched_feature,
+                        "run_weight": run_weight,
+                        "delta_weight": run_weight - float(paper_weight)
+                        if not pd.isna(run_weight)
+                        else float("nan"),
+                        "run_feature_found": not pd.isna(run_weight),
+                    }
+                )
+    return pd.DataFrame(rows)
+
+
+def build_paper_table3_snapshot(feature_weight_summaries: dict[str, pd.DataFrame]) -> pd.DataFrame:
+    """Capture the run's top positive/negative proxy weights for qualitative review."""
+
+    rows: list[dict[str, object]] = []
+    for model_name, weights_dict in feature_weight_summaries.items():
+        # Handle both dict-of-DataFrames and plain DataFrame inputs
+        if isinstance(weights_dict, dict):
+            working = weights_dict.get("all")
+            if not isinstance(working, pd.DataFrame) or working.empty:
+                continue
+        elif isinstance(weights_dict, pd.DataFrame):
+            working = weights_dict
+        else:
+            continue
+        if "weight" not in working.columns or "feature" not in working.columns:
+            continue
+        positive = working[working["weight"] > 0].sort_values("weight", ascending=False).head(3)
+        negative = working[working["weight"] < 0].sort_values("weight", ascending=True).head(3)
+        for direction, frame in (("positive", positive), ("negative", negative)):
+            for rank, row in enumerate(frame.itertuples(index=False), start=1):
+                rows.append(
+                    {
+                        "proxy_model": model_name,
+                        "direction": direction,
+                        "rank": int(rank),
+                        "feature": getattr(row, "feature"),
+                        "weight": float(getattr(row, "weight")),
+                    }
+                )
+    return pd.DataFrame(rows)
+
+
+def build_paper_table4_comparison(acuity_correlations: pd.DataFrame) -> pd.DataFrame:
+    """Compare run acuity/mistrust correlations against Table 4 from the paper."""
+
+    rows: list[dict[str, object]] = []
+    for row in acuity_correlations.itertuples(index=False):
+        key = _canonical_pair(getattr(row, "feature_a"), getattr(row, "feature_b"))
+        if key not in PAPER_TABLE4_CORRELATIONS:
+            continue
+        paper_corr = float(PAPER_TABLE4_CORRELATIONS[key])
+        run_corr = float(getattr(row, "correlation"))
+        rows.append(
+            {
+                "feature_a": key[0],
+                "feature_b": key[1],
+                "paper_correlation": paper_corr,
+                "run_correlation": run_corr,
+                "delta_correlation": float(run_corr - paper_corr),
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def build_paper_table5_comparison(downstream_auc_results: pd.DataFrame) -> pd.DataFrame:
+    """Compare downstream AUCs against Table 5 from the paper."""
+
+    rows: list[dict[str, object]] = []
+    for row in downstream_auc_results.itertuples(index=False):
+        key = (getattr(row, "task"), getattr(row, "configuration"))
+        if key not in PAPER_TABLE5_AUC:
+            continue
+        paper = PAPER_TABLE5_AUC[key]
+        paper_mean = float(paper["auc_mean"])
+        paper_std = float(paper["auc_std"])
+        run_mean = float(getattr(row, "auc_mean"))
+        run_std = float(getattr(row, "auc_std"))
+        rows.append(
+            {
+                "task": key[0],
+                "configuration": key[1],
+                "paper_n_rows": int(paper["n_rows"]),
+                "run_n_rows": int(getattr(row, "n_rows")),
+                "paper_auc_mean": paper_mean,
+                "run_auc_mean": run_mean,
+                "delta_auc_mean": float(run_mean - paper_mean),
+                "paper_auc_std": paper_std,
+                "run_auc_std": run_std,
+                "delta_auc_std": float(run_std - paper_std),
+                "n_valid_auc": int(getattr(row, "n_valid_auc")),
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def build_paper_table6_comparison(downstream_weight_results: pd.DataFrame) -> pd.DataFrame:
+    """Compare Baseline + ALL downstream average weights against Table 6 from the paper."""
+
+    if downstream_weight_results.empty:
+        return pd.DataFrame()
+
+    working = downstream_weight_results.copy()
+    working = working[working["configuration"] == "Baseline + ALL"].copy()
+    if working.empty:
+        return pd.DataFrame()
+    working["paper_feature"] = working["feature"].map(TABLE6_FEATURE_NAME_MAP)
+    working = working[working["paper_feature"].notna()].copy()
+
+    rows: list[dict[str, object]] = []
+    for row in working.itertuples(index=False):
+        task_name = getattr(row, "task")
+        feature_name = getattr(row, "paper_feature")
+        if task_name not in PAPER_TABLE6_WEIGHTS:
+            continue
+        if feature_name not in PAPER_TABLE6_WEIGHTS[task_name]:
+            continue
+        paper_mean, paper_std = PAPER_TABLE6_WEIGHTS[task_name][feature_name]
+        run_mean = float(getattr(row, "weight_mean"))
+        run_std = float(getattr(row, "weight_std"))
+        # Paper Table 6 reports 1.96*std (95% CI half-width), not raw std
+        run_std_ci = run_std * 1.96
+        rows.append(
+            {
+                "task": task_name,
+                "feature": feature_name,
+                "paper_weight_mean": float(paper_mean),
+                "run_weight_mean": run_mean,
+                "delta_weight_mean": float(run_mean - float(paper_mean)),
+                "paper_weight_std": float(paper_std),
+                "run_weight_std": run_std_ci,
+                "n_valid_weights": int(getattr(row, "n_valid_weights")),
+            }
+        )
+    return pd.DataFrame(rows)
+
+
+def _ensure_downstream_weight_results(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+    *,
+    repetitions: int,
+) -> pd.DataFrame:
+    existing = artifacts.get("downstream_weight_results")
+    if isinstance(existing, pd.DataFrame) and not existing.empty:
+        return existing
+    final_model_table = artifacts.get("final_model_table")
+    if not isinstance(final_model_table, pd.DataFrame) or final_model_table.empty:
+        return pd.DataFrame()
+    computed = evaluate_downstream_average_weights(
+        final_model_table=final_model_table,
+        repetitions=repetitions,
+    )
+    artifacts["downstream_weight_results"] = computed
+    return computed
+
+
+def _render_run_table_summary(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+    *,
+    repetitions: int,
+) -> str:
+    """Render a run-only Table 1-6 summary without direct paper comparisons."""
+
+    validation_summary = artifacts.get("validation_summary", {})
+    autopsy_proxy_enabled = True
+    dataset_prepare_mode = "unknown"
+    if isinstance(validation_summary, dict):
+        autopsy_proxy_enabled = bool(validation_summary.get("autopsy_proxy_enabled", True))
+        dataset_prepare_mode = str(validation_summary.get("dataset_prepare_mode", "unknown"))
+
+    feature_weight_summaries = artifacts.get("feature_weight_summaries", {})
+    if not isinstance(feature_weight_summaries, dict):
+        feature_weight_summaries = {}
+
+    eol_cohort = artifacts.get("eol_cohort")
+    table1 = (
+        build_paper_table1_comparison(eol_cohort)
+        if _has_columns(
+            eol_cohort,
+            {"race", "insurance_group", "discharge_category", "gender", "los_days", "age"},
+        )
+        else pd.DataFrame()
+    )
+    race_treatment = artifacts.get("race_treatment_results")
+    table2 = (
+        build_paper_table2_comparison(race_treatment)
+        if _has_columns(
+            race_treatment,
+            {"treatment", "n_black", "n_white", "median_black", "median_white", "pvalue"},
+        )
+        and not race_treatment.empty
+        else pd.DataFrame()
+    )
+    table3 = build_paper_table3_snapshot(feature_weight_summaries)
+    if not autopsy_proxy_enabled and not table3.empty and "proxy_model" in table3.columns:
+        table3 = table3.loc[table3["proxy_model"] != "autopsy"].reset_index(drop=True)
+    acuity_correlations = artifacts.get("acuity_correlations")
+    table4 = (
+        build_paper_table4_comparison(acuity_correlations)
+        if _has_columns(acuity_correlations, {"feature_a", "feature_b", "correlation"})
+        else pd.DataFrame()
+    )
+    downstream_auc_results = artifacts.get("downstream_auc_results")
+    table5 = (
+        build_paper_table5_comparison(downstream_auc_results)
+        if _has_columns(
+            downstream_auc_results,
+            {"task", "configuration", "n_rows", "auc_mean", "auc_std"},
+        )
+        else pd.DataFrame()
+    )
+    table6_source = _ensure_downstream_weight_results(artifacts, repetitions=repetitions)
+    table6 = build_paper_table6_comparison(table6_source)
+    if not autopsy_proxy_enabled and not table6.empty and "feature" in table6.columns:
+        table6 = table6.loc[table6["feature"] != "autopsy"].reset_index(drop=True)
+
+    lines = [
+        "Run Table Results",
+        f"dataset_prepare_mode: {dataset_prepare_mode}",
+        f"autopsy_proxy_enabled: {autopsy_proxy_enabled}",
+        "",
+    ]
+
+    if not table1.empty:
+        lines.append("Table 1")
+        for row in table1.itertuples(index=False):
+            lines.append(f"- {row.metric}")
+            lines.append(f"  {row.race}: {row.run_value}")
+        lines.append("")
+
+    if not table2.empty:
+        lines.append("Table 2")
+        for row in table2.itertuples(index=False):
+            lines.append(f"- {row.treatment}")
+            lines.append(
+                f"  BLACK: n={int(row.run_n_black)}, median={float(row.run_median_black):.1f}"
+            )
+            lines.append(
+                f"  WHITE: n={int(row.run_n_white)}, median={float(row.run_median_white):.1f}"
+            )
+            if not pd.isna(row.run_pvalue):
+                lines.append(f"  pvalue: {float(row.run_pvalue)}")
+        lines.append("")
+
+    if not table3.empty:
+        lines.append("Table 3")
+        for proxy_model in table3["proxy_model"].drop_duplicates().tolist():
+            lines.append(f"- {proxy_model}")
+            proxy_rows = table3.loc[table3["proxy_model"] == proxy_model]
+            for direction in ("positive", "negative"):
+                direction_rows = proxy_rows.loc[proxy_rows["direction"] == direction]
+                if direction_rows.empty:
+                    continue
+                lines.append(f"  {direction}:")
+                for row in direction_rows.itertuples(index=False):
+                    lines.append(
+                        f"    #{int(row.rank)}: {row.feature} = {float(row.weight):.4f}"
+                    )
+        lines.append("")
+
+    if not table4.empty:
+        lines.append("Table 4")
+        for row in table4.itertuples(index=False):
+            lines.append(
+                f"- {row.feature_a} vs {row.feature_b}: {float(row.run_correlation):.3f}"
+            )
+        lines.append("")
+
+    if not table5.empty:
+        lines.append("Table 5")
+        for row in table5.itertuples(index=False):
+            lines.append(f"- {row.task} | {row.configuration}")
+            lines.append(f"  n_rows: {int(row.run_n_rows)}")
+            lines.append(f"  auc_mean: {float(row.run_auc_mean):.3f}")
+            lines.append(f"  auc_std: {float(row.run_auc_std):.3f}")
+        lines.append("")
+
+    if not table6.empty:
+        lines.append("Table 6")
+        for task_name in table6["task"].drop_duplicates().tolist():
+            lines.append(f"- {task_name}")
+            task_rows = table6.loc[table6["task"] == task_name]
+            for row in task_rows.itertuples(index=False):
+                lines.append(
+                    f"  {row.feature}: mean={float(row.run_weight_mean):.3f}, std={float(row.run_weight_std):.3f}"
+                )
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def write_run_table_summary_artifacts(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+    *,
+    output_dir: Path,
+    repetitions: int,
+) -> None:
+    """Write a run-only Table 1-6 summary without paper-vs-run formatting."""
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    (output_dir / "run_table_summary.txt").write_text(
+        _render_run_table_summary(artifacts, repetitions=repetitions) + "\n",
+        encoding="utf-8",
+    )
+
+
+def build_paper_comparison_outputs(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+    *,
+    repetitions: int,
+) -> dict[str, pd.DataFrame | dict[str, object]]:
+    """Build paper-aligned comparison tables from an example run."""
+
+    validation_summary = artifacts.get("validation_summary", {})
+    autopsy_proxy_enabled = True
+    if isinstance(validation_summary, dict):
+        autopsy_proxy_enabled = bool(validation_summary.get("autopsy_proxy_enabled", True))
+
+    feature_weight_summaries = artifacts.get("feature_weight_summaries", {})
+    if not isinstance(feature_weight_summaries, dict):
+        feature_weight_summaries = {}
+
+    table1 = build_paper_table1_comparison(artifacts["eol_cohort"]) if isinstance(artifacts.get("eol_cohort"), pd.DataFrame) else pd.DataFrame()
+    race_treatment = artifacts.get("race_treatment_results")
+    table2 = build_paper_table2_comparison(race_treatment) if isinstance(race_treatment, pd.DataFrame) and not race_treatment.empty else pd.DataFrame()
+    table3_comparison = build_paper_table3_comparison(feature_weight_summaries)
+    table3_snapshot = build_paper_table3_snapshot(feature_weight_summaries)
+    table4 = build_paper_table4_comparison(artifacts["acuity_correlations"]) if isinstance(artifacts.get("acuity_correlations"), pd.DataFrame) else pd.DataFrame()
+    table5 = build_paper_table5_comparison(artifacts["downstream_auc_results"]) if isinstance(artifacts.get("downstream_auc_results"), pd.DataFrame) else pd.DataFrame()
+    table6_source = _ensure_downstream_weight_results(artifacts, repetitions=repetitions)
+    table6 = build_paper_table6_comparison(table6_source)
+    if not autopsy_proxy_enabled and not table6.empty and "feature" in table6.columns:
+        table6 = table6.loc[table6["feature"] != "autopsy"].reset_index(drop=True)
+
+    summary = {
+        "paper_url": PAPER_URL,
+        "paper_pdf_url": PAPER_PDF_URL,
+        "table1_rows": int(len(table1)),
+        "table2_rows": int(len(table2)),
+        "table3_comparison_rows": int(len(table3_comparison)),
+        "table3_snapshot_rows": int(len(table3_snapshot)),
+        "table4_rows": int(len(table4)),
+        "table5_rows": int(len(table5)),
+        "table6_rows": int(len(table6)),
+        "table2_max_abs_delta_median": (
+            float(
+                max(
+                    table2["delta_median_black"].abs().max(),
+                    table2["delta_median_white"].abs().max(),
+                )
+            )
+            if not table2.empty
+            else None
+        ),
+        "table3_comparison_features_found": (
+            int(table3_comparison["run_feature_found"].sum())
+            if not table3_comparison.empty
+            else 0
+        ),
+        "table3_comparison_features_total": int(len(table3_comparison)),
+        "table3_comparison_max_abs_delta": (
+            float(table3_comparison["delta_weight"].dropna().abs().max())
+            if not table3_comparison.empty and table3_comparison["delta_weight"].notna().any()
+            else None
+        ),
+        "table4_max_abs_delta": (
+            float(table4["delta_correlation"].abs().max()) if not table4.empty else None
+        ),
+        "table5_max_abs_delta": (
+            float(table5["delta_auc_mean"].abs().max()) if not table5.empty else None
+        ),
+        "table6_max_abs_delta": (
+            float(table6["delta_weight_mean"].abs().max()) if not table6.empty else None
+        ),
+    }
+
+    return {
+        "summary": summary,
+        "table1_comparison": table1,
+        "table2_comparison": table2,
+        "table3_comparison": table3_comparison,
+        "table3_snapshot": table3_snapshot,
+        "table4_comparison": table4,
+        "table5_comparison": table5,
+        "table6_comparison": table6,
+    }
+
+
+def write_paper_comparison_artifacts(
+    comparison_outputs: dict[str, pd.DataFrame | dict[str, object]],
+    output_dir: Path,
+    *,
+    include_summary: bool = True,
+) -> None:
+    """Write paper comparison tables and summary next to the example deliverables."""
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for name, artifact in comparison_outputs.items():
+        if isinstance(artifact, pd.DataFrame):
+            artifact.to_csv(output_dir / f"{name}.csv", index=False)
+        elif isinstance(artifact, dict):
+            (output_dir / f"{name}.json").write_text(json.dumps(artifact, indent=2))
+    if include_summary:
+        (output_dir / "paper_comparison_summary.txt").write_text(
+            _render_paper_comparison_summary(comparison_outputs) + "\n",
+            encoding="utf-8",
+        )
+
+
+def _render_paper_comparison_summary(
+    comparison_outputs: dict[str, pd.DataFrame | dict[str, object]],
+) -> str:
+    lines: list[str] = []
+
+    summary = comparison_outputs.get("summary", {})
+    if isinstance(summary, dict):
+        lines.append("Paper comparison summary:")
+        for key in (
+            "table1_rows",
+            "table2_rows",
+            "table3_snapshot_rows",
+            "table4_rows",
+            "table5_rows",
+            "table6_rows",
+            "table4_max_abs_delta",
+            "table5_max_abs_delta",
+            "table6_max_abs_delta",
+        ):
+            value = summary.get(key)
+            if value is not None:
+                lines.append(f"  {key}: {value}")
+
+    table1 = comparison_outputs.get("table1_comparison")
+    if isinstance(table1, pd.DataFrame) and not table1.empty:
+        lines.append("")
+        lines.append("Table 1 vs Paper:")
+        for row in table1.itertuples(index=False):
+            lines.append(f"  {row.metric} | {row.race} | paper={row.paper_value} | run={row.run_value}")
+
+    table2 = comparison_outputs.get("table2_comparison")
+    if isinstance(table2, pd.DataFrame) and not table2.empty:
+        lines.append("")
+        lines.append("Table 2 vs Paper:")
+        for row in table2.itertuples(index=False):
+            lines.append(
+                "  "
+                f"{row.treatment} | "
+                f"black n {int(row.paper_n_black)}->{int(row.run_n_black)}, median {row.paper_median_black:.1f}->{row.run_median_black:.1f} | "
+                f"white n {int(row.paper_n_white)}->{int(row.run_n_white)}, median {row.paper_median_white:.1f}->{row.run_median_white:.1f}"
+            )
+
+    table3 = comparison_outputs.get("table3_comparison")
+    if isinstance(table3, pd.DataFrame) and not table3.empty:
+        lines.append("")
+        lines.append("Table 3 vs Paper:")
+        for row in table3.itertuples(index=False):
+            run_weight = "missing" if pd.isna(row.run_weight) else f"{float(row.run_weight):.4f}"
+            lines.append(
+                "  "
+                f"{row.proxy_model} | {row.direction} #{int(row.rank)} | {row.paper_feature} | "
+                f"paper={float(row.paper_weight):.4f} | run={run_weight} | found={bool(row.run_feature_found)}"
+            )
+
+    table4 = comparison_outputs.get("table4_comparison")
+    if isinstance(table4, pd.DataFrame) and not table4.empty:
+        lines.append("")
+        lines.append("Table 4 vs Paper:")
+        for row in table4.itertuples(index=False):
+            lines.append(
+                "  "
+                f"{row.feature_a} vs {row.feature_b} | "
+                f"paper={float(row.paper_correlation):.3f} | run={float(row.run_correlation):.3f}"
+            )
+
+    table5 = comparison_outputs.get("table5_comparison")
+    if isinstance(table5, pd.DataFrame) and not table5.empty:
+        lines.append("")
+        lines.append("Table 5 vs Paper:")
+        for row in table5.itertuples(index=False):
+            lines.append(
+                "  "
+                f"{row.task} | {row.configuration} | "
+                f"n {int(row.paper_n_rows)}->{int(row.run_n_rows)} | "
+                f"auc {float(row.paper_auc_mean):.3f}->{float(row.run_auc_mean):.3f}"
+            )
+
+    table6 = comparison_outputs.get("table6_comparison")
+    if isinstance(table6, pd.DataFrame) and not table6.empty:
+        lines.append("")
+        lines.append("Table 6 vs Paper:")
+        for row in table6.itertuples(index=False):
+            lines.append(
+                "  "
+                f"{row.task} | {row.feature} | "
+                f"paper={float(row.paper_weight_mean):.3f} | run={float(row.run_weight_mean):.3f}"
+            )
+
+    return "\n".join(lines)
+
+
+def _print_paper_comparison_summary(
+    comparison_outputs: dict[str, pd.DataFrame | dict[str, object]],
+) -> None:
+    rendered = _render_paper_comparison_summary(comparison_outputs)
+    if rendered:
+        print()
+        print(rendered)
+
+
+def _log_stage(stage_start: float, pipeline_start: float, message: str) -> None:
+    """Print a timing log line for a pipeline stage."""
+    elapsed_stage = time.time() - stage_start
+    elapsed_total = time.time() - pipeline_start
+    print(f"[{elapsed_total:7.1f}s total | {elapsed_stage:6.1f}s] {message}", flush=True)
+
+
+class _RouteSettings:
+    def __init__(
+        self,
+        *,
+        mode_name: str,
+        autopsy_enabled: bool,
+        autopsy_label_mode: str,
+        code_status_mode: str,
+        score_columns: list[str] | None,
+        feature_configurations: dict[str, list[str]] | None,
+        downstream_estimator_mode: str,
+        downstream_estimator_factory_resolver: object | None,
+    ) -> None:
+        self.mode_name = mode_name
+        self.autopsy_enabled = autopsy_enabled
+        self.autopsy_label_mode = autopsy_label_mode
+        self.code_status_mode = code_status_mode
+        self.score_columns = score_columns
+        self.feature_configurations = feature_configurations
+        self.downstream_estimator_mode = downstream_estimator_mode
+        self.downstream_estimator_factory_resolver = downstream_estimator_factory_resolver
+
+
+def _current_run_timestamp() -> str:
+    """Return the timestamp suffix used for managed run archive directories."""
+
+    return datetime.now().strftime("%Y%m%d_%H%M%S")
+
+
+def _managed_run_route_label(route_settings: _RouteSettings) -> str:
+    """Return the user-facing route label used in managed run directory names."""
+
+    return "Paperlike" if route_settings.mode_name == "paper_like" else "normal"
+
+
+def _build_managed_run_name(route_settings: _RouteSettings, timestamp: str) -> str:
+    """Return the managed run directory name for the given route and timestamp."""
+
+    return f"EOL_{_managed_run_route_label(route_settings)}_{timestamp}"
+
+
+def _prepare_managed_run_directories(
+    *,
+    result_root: Path,
+    route_settings: _RouteSettings,
+    output_dir: Path | None,
+    stream_cache_dir: Path | None,
+) -> dict[str, Path | str]:
+    """Create a managed run archive directory and resolve default output/cache paths."""
+
+    timestamp = _current_run_timestamp()
+    base_name = _build_managed_run_name(route_settings, timestamp)
+    run_name = base_name
+    run_dir = result_root / run_name
+    suffix = 1
+    while run_dir.exists():
+        run_name = f"{base_name}_{suffix:02d}"
+        run_dir = result_root / run_name
+        suffix += 1
+
+    run_dir.mkdir(parents=True, exist_ok=False)
+    resolved_output_dir = output_dir if output_dir is not None else run_dir / "result"
+    resolved_stream_cache_dir = (
+        stream_cache_dir if stream_cache_dir is not None else run_dir / "cache"
+    )
+    resolved_output_dir.mkdir(parents=True, exist_ok=True)
+    resolved_stream_cache_dir.mkdir(parents=True, exist_ok=True)
+    return {
+        "run_name": run_name,
+        "run_dir": run_dir,
+        "output_dir": resolved_output_dir,
+        "stream_cache_dir": resolved_stream_cache_dir,
+    }
+
+
+def _collect_core_artifact_shapes(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+) -> dict[str, list[int]]:
+    """Collect the core DataFrame shapes used in run summaries and manifests."""
+
+    shapes: dict[str, list[int]] = {}
+    for key in (
+        "base_admissions",
+        "all_cohort",
+        "eol_cohort",
+        "chartevent_feature_matrix",
+        "note_labels",
+        "mistrust_scores",
+        "final_model_table",
+    ):
+        df = artifacts.get(key)
+        if isinstance(df, pd.DataFrame):
+            shapes[key] = [int(df.shape[0]), int(df.shape[1])]
+    return shapes
+
+
+def _render_managed_run_summary(
+    *,
+    run_name: str,
+    run_dir: Path,
+    route_settings: _RouteSettings,
+    args: argparse.Namespace,
+    resolved_output_dir: Path,
+    resolved_stream_cache_dir: Path,
+    started_at: datetime,
+    finished_at: datetime,
+    total_runtime_seconds: float,
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+) -> str:
+    """Render a human-readable managed run summary."""
+
+    validation_summary = artifacts.get("validation_summary", {})
+    lines = [
+        "EOL managed run summary:",
+        f"managed_run_name: {run_name}",
+        f"managed_run_dir: {run_dir}",
+        f"route_mode: {route_settings.mode_name}",
+        f"autopsy_proxy_enabled: {route_settings.autopsy_enabled}",
+        f"started_at: {started_at.isoformat(timespec='seconds')}",
+        f"finished_at: {finished_at.isoformat(timespec='seconds')}",
+        f"total_runtime_seconds: {total_runtime_seconds:.3f}",
+        f"result_dir: {resolved_output_dir}",
+        f"stream_cache_base_dir: {resolved_stream_cache_dir}",
+        (
+            f"paper_comparison_summary_file: {run_dir / 'paper_comparison_summary.txt'}"
+            if args.compare_to_paper
+            else "paper_comparison_summary_file: disabled"
+        ),
+        f"run_table_summary_file: {run_dir / 'run_table_summary.txt'}",
+        f"reuse_intermediates: {args.reuse_intermediates}",
+        f"compare_to_paper: {args.compare_to_paper}",
+        f"paper_like_dataset_prepare: {args.paper_like_dataset_prepare}",
+        f"repetitions: {args.repetitions}",
+        f"note_chunksize: {args.note_chunksize}",
+        f"chartevent_chunksize: {args.chartevent_chunksize}",
+        f"command: {' '.join(sys.argv)}",
+        "",
+        "Validation summary:",
+    ]
+
+    if isinstance(validation_summary, dict):
+        for key, value in validation_summary.items():
+            lines.append(f"  {key}: {value}")
+
+    lines.append("")
+    lines.append("Core artifact shapes:")
+    for key, shape in _collect_core_artifact_shapes(artifacts).items():
+        lines.append(f"  {key}: ({shape[0]}, {shape[1]})")
+
+    return "\n".join(lines) + "\n"
+
+
+def _write_managed_run_artifacts(
+    *,
+    run_name: str,
+    run_dir: Path,
+    route_settings: _RouteSettings,
+    args: argparse.Namespace,
+    resolved_output_dir: Path,
+    resolved_stream_cache_dir: Path,
+    started_at: datetime,
+    finished_at: datetime,
+    total_runtime_seconds: float,
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+) -> None:
+    """Write managed run archive files under EOL_Result/EOL_<route>_<timestamp>."""
+
+    summary_text = _render_managed_run_summary(
+        run_name=run_name,
+        run_dir=run_dir,
+        route_settings=route_settings,
+        args=args,
+        resolved_output_dir=resolved_output_dir,
+        resolved_stream_cache_dir=resolved_stream_cache_dir,
+        started_at=started_at,
+        finished_at=finished_at,
+        total_runtime_seconds=total_runtime_seconds,
+        artifacts=artifacts,
+    )
+    (run_dir / "RUN_SUMMARY.txt").write_text(summary_text, encoding="utf-8")
+    (run_dir / "RUN_TIME.txt").write_text(
+        "\n".join(
+            [
+                "EOL run timing:",
+                f"managed_run_name: {run_name}",
+                f"started_at: {started_at.isoformat(timespec='seconds')}",
+                f"finished_at: {finished_at.isoformat(timespec='seconds')}",
+                f"total_runtime_seconds: {total_runtime_seconds:.3f}",
+            ]
+        )
+        + "\n",
+        encoding="utf-8",
+    )
+
+    manifest = {
+        "managed_run_name": run_name,
+        "managed_run_dir": str(run_dir),
+        "route_mode": route_settings.mode_name,
+        "autopsy_proxy_enabled": route_settings.autopsy_enabled,
+        "started_at": started_at.isoformat(timespec="seconds"),
+        "finished_at": finished_at.isoformat(timespec="seconds"),
+        "total_runtime_seconds": round(float(total_runtime_seconds), 6),
+        "result_dir": str(resolved_output_dir),
+        "stream_cache_base_dir": str(resolved_stream_cache_dir),
+        "reuse_intermediates": (
+            str(args.reuse_intermediates) if args.reuse_intermediates is not None else None
+        ),
+        "compare_to_paper": bool(args.compare_to_paper),
+        "paper_like_dataset_prepare": bool(args.paper_like_dataset_prepare),
+        "repetitions": int(args.repetitions),
+        "note_chunksize": int(args.note_chunksize),
+        "chartevent_chunksize": int(args.chartevent_chunksize),
+        "command": sys.argv,
+        "validation_summary": artifacts.get("validation_summary", {}),
+        "core_artifact_shapes": _collect_core_artifact_shapes(artifacts),
+    }
+    (run_dir / "RUN_MANIFEST.json").write_text(
+        json.dumps(manifest, indent=2),
+        encoding="utf-8",
+    )
+    write_run_table_summary_artifacts(
+        artifacts,
+        output_dir=run_dir,
+        repetitions=int(args.repetitions),
+    )
+
+    comparison_outputs = artifacts.get("paper_comparison")
+    if args.compare_to_paper and isinstance(comparison_outputs, dict):
+        (run_dir / "paper_comparison_summary.txt").write_text(
+            _render_paper_comparison_summary(comparison_outputs) + "\n",
+            encoding="utf-8",
+        )
+
+
+def _build_route_settings(paper_like_dataset_prepare: bool) -> _RouteSettings:
+    if paper_like_dataset_prepare:
+        return _RouteSettings(
+            mode_name="paper_like",
+            autopsy_enabled=True,
+            autopsy_label_mode="paper_like",
+            code_status_mode="paper_like",
+            score_columns=None,
+            feature_configurations=None,
+            downstream_estimator_mode="default",
+            downstream_estimator_factory_resolver=None,
+        )
+
+    return _RouteSettings(
+        mode_name="default",
+        autopsy_enabled=False,
+        autopsy_label_mode="corrected",
+        code_status_mode="corrected",
+        score_columns=_normal_route_score_columns(),
+        feature_configurations=_normal_route_feature_configurations(),
+        downstream_estimator_mode="task_balanced_logistic_cv",
+        downstream_estimator_factory_resolver=_normal_route_downstream_estimator_factory_resolver(),
+    )
+
+
+def _resolve_stage_cache_dir(
+    *,
+    output_dir: Path | None,
+    stream_cache_dir: Path | None,
+    route_settings: _RouteSettings,
+) -> Path | None:
+    """Return the directory used for streamed-stage checkpoint CSVs."""
+
+    if stream_cache_dir is not None:
+        return Path(stream_cache_dir) / route_settings.mode_name
+    return output_dir
+
+
+def _has_reuse_cache_files(directory: Path) -> bool:
+    required = (
+        "note_corpus.csv",
+        "note_labels.csv",
+        "chartevent_feature_matrix.csv",
+        "code_status_targets.csv",
+    )
+    return all((directory / filename).exists() for filename in required)
+
+
+def _resolve_reuse_dir(
+    reuse_intermediates: Path | None,
+    *,
+    route_settings: _RouteSettings,
+) -> Path | None:
+    """Resolve the reuse directory, allowing a base cache dir with mode subfolders."""
+
+    if reuse_intermediates is None:
+        return None
+    direct = Path(reuse_intermediates)
+    if _has_reuse_cache_files(direct):
+        return direct
+    mode_dir = direct / route_settings.mode_name
+    if _has_reuse_cache_files(mode_dir):
+        return mode_dir
+    return direct
+
+
+def _write_stage_cache_frame(
+    output_dir: Path | None,
+    filename: str,
+    frame: pd.DataFrame,
+) -> None:
+    """Persist a reusable CSV artifact as soon as its stage completes."""
+
+    if output_dir is None:
+        return
+    output_dir.mkdir(parents=True, exist_ok=True)
+    frame.to_csv(output_dir / filename, index=False)
+
+
+def _disable_autopsy_scores(mistrust_scores: pd.DataFrame) -> pd.DataFrame:
+    """Return a schema-stable score table with the autopsy proxy disabled."""
+
+    if "autopsy_score_z" not in mistrust_scores.columns:
+        return mistrust_scores
+    adjusted = mistrust_scores.copy()
+    adjusted["autopsy_score_z"] = 0.0
+    return adjusted
+
+
+def _normal_route_score_columns() -> list[str]:
+    return ["noncompliance_score_z", "negative_sentiment_score_z"]
+
+
+def _normal_route_feature_configurations() -> dict[str, list[str]]:
+    configs = get_downstream_feature_configurations()
+    adjusted: dict[str, list[str]] = {}
+    for name, columns in configs.items():
+        if name == "Baseline + Autopsy":
+            continue
+        adjusted[name] = [column for column in columns if column != "autopsy_score_z"]
+    return adjusted
+
+
+def _normal_route_downstream_estimator_factory_resolver():
+    """Return task-specific balanced LogisticRegressionCV factories for the corrected route."""
+
+    task_specs = {
+        "Left AMA": {
+            "Cs": [0.01, 0.03, 0.1, 0.3],
+            "class_weight": "balanced",
+            "scoring": "roc_auc",
+        },
+        "Code Status": {
+            "Cs": [0.01, 0.03, 0.1, 0.3],
+            "class_weight": "balanced",
+            "scoring": "roc_auc",
+        },
+        "In-hospital mortality": {
+            "Cs": [0.03, 0.1, 0.3, 1.0],
+            "class_weight": "balanced",
+            "scoring": "roc_auc",
+        },
+    }
+    cached_factories: dict[str, object] = {}
+
+    def _resolver(task_name: str, _config_name: str):
+        spec = task_specs.get(task_name)
+        if spec is None:
+            return None
+        if task_name not in cached_factories:
+            cached_factories[task_name] = build_logistic_cv_estimator_factory(**spec)
+        return cached_factories[task_name]
+
+    return _resolver
+
+
+def _filter_metric_frame(frame: pd.DataFrame, metric: str) -> pd.DataFrame:
+    if "metric" not in frame.columns:
+        return frame
+    return frame.loc[frame["metric"] != metric].reset_index(drop=True)
+
+
+def _disable_autopsy_outputs(
+    model_outputs: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]]:
+    """Strip autopsy-specific analysis outputs from the default route."""
+
+    adjusted = dict(model_outputs)
+
+    feature_weight_summaries = adjusted.get("feature_weight_summaries")
+    if isinstance(feature_weight_summaries, dict):
+        adjusted["feature_weight_summaries"] = {
+            name: table
+            for name, table in feature_weight_summaries.items()
+            if name != "autopsy"
+        }
+
+    for key in ("race_gap_results", "trust_treatment_results", "trust_treatment_by_acuity_results"):
+        frame = adjusted.get(key)
+        if isinstance(frame, pd.DataFrame):
+            adjusted[key] = _filter_metric_frame(frame, "autopsy_score_z")
+
+    cdf_plot_data = adjusted.get("trust_treatment_cdf_plot_data")
+    if isinstance(cdf_plot_data, dict):
+        adjusted["trust_treatment_cdf_plot_data"] = {
+            name: _filter_metric_frame(frame, "autopsy_score_z")
+            if isinstance(frame, pd.DataFrame)
+            else frame
+            for name, frame in cdf_plot_data.items()
+        }
+
+    acuity_correlations = adjusted.get("acuity_correlations")
+    if isinstance(acuity_correlations, pd.DataFrame):
+        filtered = acuity_correlations.copy()
+        if {"feature_a", "feature_b"}.issubset(filtered.columns):
+            filtered = filtered.loc[
+                (filtered["feature_a"] != "autopsy_score_z")
+                & (filtered["feature_b"] != "autopsy_score_z")
+            ]
+        adjusted["acuity_correlations"] = filtered.reset_index(drop=True)
+
+    downstream_auc_results = adjusted.get("downstream_auc_results")
+    if (
+        isinstance(downstream_auc_results, pd.DataFrame)
+        and "configuration" in downstream_auc_results.columns
+    ):
+        adjusted["downstream_auc_results"] = downstream_auc_results.loc[
+            downstream_auc_results["configuration"] != "Baseline + Autopsy"
+        ].reset_index(drop=True)
+
+    downstream_weight_results = adjusted.get("downstream_weight_results")
+    if (
+        isinstance(downstream_weight_results, pd.DataFrame)
+        and "feature" in downstream_weight_results.columns
+    ):
+        adjusted["downstream_weight_results"] = downstream_weight_results.loc[
+            downstream_weight_results["feature"] != "autopsy_score_z"
+        ].reset_index(drop=True)
+
+    return adjusted
+
+
+def _build_or_reuse_mistrust_scores(
+    *,
+    model: object,
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    note_corpus: pd.DataFrame,
+    reuse_dir: Path | None,
+    stage_cache_dir: Path | None,
+    pipeline_start: float,
+    autopsy_enabled: bool,
+) -> pd.DataFrame:
+    cached_path = None if reuse_dir is None else reuse_dir / "mistrust_scores.csv"
+    if cached_path is not None and cached_path.exists():
+        t0 = time.time()
+        print(f"[REUSE] Loading mistrust_scores from {reuse_dir}", flush=True)
+        mistrust_scores = pd.read_csv(cached_path, low_memory=False)
+        _log_stage(t0, pipeline_start, f"Reused mistrust scores ({len(mistrust_scores)} rows)")
+        return mistrust_scores
+
+    if hasattr(model, "estimator_factory") and hasattr(model, "sentiment_fn"):
+        estimator_factory = getattr(model, "estimator_factory")
+        sentiment_fn = getattr(model, "sentiment_fn")
+        t_total = time.time()
+
+        t0 = time.time()
+        noncompliance = build_noncompliance_mistrust_scores(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            estimator_factory=estimator_factory,
+        )
+        _log_stage(t0, pipeline_start, f"Built noncompliance proxy scores ({len(noncompliance)} rows)")
+
+        if autopsy_enabled:
+            t0 = time.time()
+            autopsy = build_autopsy_mistrust_scores(
+                feature_matrix=feature_matrix,
+                note_labels=note_labels,
+                estimator_factory=estimator_factory,
+            )
+            _log_stage(t0, pipeline_start, f"Built autopsy proxy scores ({len(autopsy)} rows)")
+        else:
+            autopsy = pd.DataFrame(
+                {
+                    "hadm_id": noncompliance["hadm_id"].astype(int),
+                    "autopsy_score": 0.0,
+                }
+            )
+
+        t0 = time.time()
+        sentiment = build_negative_sentiment_mistrust_scores(
+            note_corpus=note_corpus,
+            sentiment_fn=sentiment_fn,
+        )
+        _log_stage(t0, pipeline_start, f"Built negative sentiment scores ({len(sentiment)} rows)")
+
+        merged = (
+            noncompliance.merge(autopsy, on="hadm_id", how="inner", validate="one_to_one")
+            .merge(sentiment, on="hadm_id", how="inner", validate="one_to_one")
+            .sort_values("hadm_id")
+        )
+        mistrust_scores = z_normalize_scores(
+            merged,
+            columns=["noncompliance_score", "autopsy_score", "negative_sentiment_score"],
+        ).rename(
+            columns={
+                "noncompliance_score": "noncompliance_score_z",
+                "autopsy_score": "autopsy_score_z",
+                "negative_sentiment_score": "negative_sentiment_score_z",
+            }
+        ).reset_index(drop=True)
+        _write_stage_cache_frame(stage_cache_dir, "mistrust_scores.csv", mistrust_scores)
+        _log_stage(t_total, pipeline_start, "Built mistrust scores (proxy models + sentiment)")
+        return mistrust_scores
+
+    t0 = time.time()
+    mistrust_scores = model.build_mistrust_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+    )
+    _write_stage_cache_frame(stage_cache_dir, "mistrust_scores.csv", mistrust_scores)
+    _log_stage(t0, pipeline_start, "Built mistrust scores (proxy models + sentiment)")
+    return mistrust_scores
+
+
+def _build_or_reuse_note_artifacts(
+    *,
+    noteevents_csv_path: Path,
+    all_cohort: pd.DataFrame,
+    reuse_dir: Path | None,
+    stage_cache_dir: Path | None,
+    route_settings: _RouteSettings,
+    note_chunksize: int,
+    pipeline_start: float,
+) -> tuple[pd.DataFrame, pd.DataFrame, list[int], pd.DataFrame]:
+    can_reuse = (
+        reuse_dir is not None
+        and (reuse_dir / "note_corpus.csv").exists()
+        and (reuse_dir / "note_labels.csv").exists()
+    )
+
+    t0 = time.time()
+    if can_reuse:
+        print(f"[REUSE] Loading note_corpus & note_labels from {reuse_dir}", flush=True)
+        note_corpus = pd.read_csv(reuse_dir / "note_corpus.csv", low_memory=False)
+        note_labels = pd.read_csv(reuse_dir / "note_labels.csv", low_memory=False)
+        note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
+        filtered_all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
+        _log_stage(
+            t0,
+            pipeline_start,
+            f"Reused note artifacts ({len(note_corpus)} corpus rows, {len(note_labels)} label rows)",
+        )
+        return note_corpus, note_labels, note_present_hadm_ids, filtered_all_cohort
+
+    note_corpus = build_note_corpus_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=all_cohort["hadm_id"],
+        categories=None,
+        chunksize=note_chunksize,
+    )
+    note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
+    filtered_all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
+    note_corpus = note_corpus.loc[note_corpus["hadm_id"].isin(note_present_hadm_ids)].copy()
+    _write_stage_cache_frame(stage_cache_dir, "note_corpus.csv", note_corpus)
+    _log_stage(t0, pipeline_start, f"Streamed note corpus ({len(note_corpus)} rows)")
+
+    t0 = time.time()
+    note_labels = build_note_labels_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=note_present_hadm_ids,
+        autopsy_label_mode=route_settings.autopsy_label_mode,
+        chunksize=note_chunksize,
+    )
+    _write_stage_cache_frame(stage_cache_dir, "note_labels.csv", note_labels)
+    _log_stage(t0, pipeline_start, f"Streamed note labels ({len(note_labels)} rows)")
+    return note_corpus, note_labels, note_present_hadm_ids, filtered_all_cohort
+
+
+def _build_or_reuse_chartevent_artifacts(
+    *,
+    chartevents_csv_path: Path,
+    d_items: pd.DataFrame,
+    note_present_hadm_ids: list[int],
+    reuse_dir: Path | None,
+    stage_cache_dir: Path | None,
+    route_settings: _RouteSettings,
+    chartevent_chunksize: int,
+    pipeline_start: float,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    can_reuse = (
+        reuse_dir is not None
+        and (reuse_dir / "chartevent_feature_matrix.csv").exists()
+        and (reuse_dir / "code_status_targets.csv").exists()
+    )
+
+    t0 = time.time()
+    if can_reuse:
+        print(f"[REUSE] Loading feature_matrix & code_status_targets from {reuse_dir}", flush=True)
+        feature_matrix = pd.read_csv(reuse_dir / "chartevent_feature_matrix.csv", low_memory=False)
+        code_status_targets = pd.read_csv(reuse_dir / "code_status_targets.csv", low_memory=False)
+        _log_stage(
+            t0,
+            pipeline_start,
+            f"Reused chartevent artifacts ({len(feature_matrix)} feature rows, {len(code_status_targets)} target rows)",
+        )
+        return feature_matrix, code_status_targets
+
+    feature_matrix, code_status_targets = build_chartevent_artifacts_from_csv(
+        chartevents_csv_path=chartevents_csv_path,
+        d_items=d_items,
+        all_hadm_ids=note_present_hadm_ids,
+        chunksize=chartevent_chunksize,
+        paper_like=route_settings.autopsy_enabled,
+        code_status_mode=route_settings.code_status_mode,
+    )
+    _write_stage_cache_frame(stage_cache_dir, "chartevent_feature_matrix.csv", feature_matrix)
+    _write_stage_cache_frame(stage_cache_dir, "code_status_targets.csv", code_status_targets)
+    _log_stage(t0, pipeline_start, f"Streamed chartevents ({len(feature_matrix)} feature rows)")
+    return feature_matrix, code_status_targets
+
+
 def build_eol_mistrust_outputs(
     root: Path,
     repetitions: int = 100,
     include_downstream_weight_summary: bool = False,
     include_cdf_plot_data: bool = False,
+    compare_to_paper: bool = False,
     output_dir: Path | None = None,
+    stream_cache_dir: Path | None = None,
     note_chunksize: int = 100_000,
     chartevent_chunksize: int = 500_000,
+    reuse_intermediates: Path | None = None,
+    paper_like_dataset_prepare: bool = False,
 ) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]]:
-    """Run the local end-to-end EOL mistrust workflow over downloaded CSV files."""
+    """Run the local end-to-end EOL mistrust workflow over downloaded CSV files.
+
+    When *reuse_intermediates* points to a previous output directory that
+    contains cached CSV artifacts (note_corpus, note_labels,
+    chartevent_feature_matrix, code_status_targets, optionally
+    mistrust_scores), the expensive CSV streaming stages are skipped and
+    those frames are loaded from disk instead. Everything downstream is
+    recomputed unless a reusable ``mistrust_scores.csv`` is also present.
+    """
 
+    t_pipeline = time.time()
+    route_settings = _build_route_settings(paper_like_dataset_prepare)
+
+    # ------------------------------------------------------------------
+    # Stage 1: load small raw tables & materialized views (fast)
+    # ------------------------------------------------------------------
+    t0 = time.time()
     raw_tables, materialized_views = load_eol_mistrust_tables(root)
+    _log_stage(t0, t_pipeline, "Loaded raw tables & materialized views")
+
     validation = {
         "schema_name": "mimiciii",
         "database_flavor": "postgresql",
         "raw_tables": sorted(raw_tables.keys()),
         "materialized_views": sorted(materialized_views.keys()),
+        "dataset_prepare_mode": route_settings.mode_name,
+        "autopsy_proxy_enabled": route_settings.autopsy_enabled,
     }
+    _stage_cache_dir = _resolve_stage_cache_dir(
+        output_dir=output_dir,
+        stream_cache_dir=stream_cache_dir,
+        route_settings=route_settings,
+    )
+    _reuse_dir = _resolve_reuse_dir(
+        reuse_intermediates,
+        route_settings=route_settings,
+    )
+    if _stage_cache_dir is not None:
+        validation["stream_cache_dir"] = str(_stage_cache_dir)
 
     admissions = raw_tables["admissions"]
     patients = raw_tables["patients"]
@@ -118,43 +1784,103 @@ def build_eol_mistrust_outputs(
     noteevents_csv_path = root / EVENT_TABLE_PATHS["noteevents"]
     chartevents_csv_path = root / EVENT_TABLE_PATHS["chartevents"]
 
+    t0 = time.time()
+    validation_raw_tables = dict(raw_tables)
+    if "noteevents" not in validation_raw_tables and noteevents_csv_path.exists():
+        validation_raw_tables["noteevents"] = _read_csv_probe(
+            root,
+            EVENT_TABLE_PATHS["noteevents"],
+        )
+    if "chartevents" not in validation_raw_tables and chartevents_csv_path.exists():
+        validation_raw_tables["chartevents"] = _read_csv_probe(
+            root,
+            EVENT_TABLE_PATHS["chartevents"],
+        )
+    if {"noteevents", "chartevents"}.issubset(validation_raw_tables):
+        validation = validate_database_environment(
+            validation_raw_tables,
+            materialized_views,
+            schema_name="mimiciii",
+            database_flavor="postgresql",
+        )
+    validation["dataset_prepare_mode"] = route_settings.mode_name
+    validation["autopsy_proxy_enabled"] = route_settings.autopsy_enabled
+    if _stage_cache_dir is not None:
+        validation["stream_cache_dir"] = str(_stage_cache_dir)
+    _log_stage(t0, t_pipeline, "Validated database environment")
+
+    # ------------------------------------------------------------------
+    # Stage 2: build cohorts & demographics (fast)
+    # ------------------------------------------------------------------
+    t0 = time.time()
     base_admissions = build_base_admissions(admissions, patients)
-    demographics = build_demographics_table(base_admissions)
+    demographics = build_demographics_table(
+        base_admissions,
+        paper_like=route_settings.autopsy_enabled,
+    )
     all_cohort = build_all_cohort(base_admissions, icustays)
     eol_cohort = build_eol_cohort(base_admissions, demographics)
     treatment_totals = build_treatment_totals(
         icustays=icustays,
         ventdurations=materialized_views["ventdurations"],
         vasopressordurations=materialized_views["vasopressordurations"],
+        paper_like=route_settings.autopsy_enabled,
     )
-    note_corpus = build_note_corpus_from_csv(
-        noteevents_csv_path=noteevents_csv_path,
-        all_hadm_ids=all_cohort["hadm_id"],
-        categories=["Discharge summary"],
-        chunksize=note_chunksize,
-    )
-    note_labels = build_note_labels_from_csv(
+    _log_stage(t0, t_pipeline, "Built cohorts & demographics")
+
+    # ------------------------------------------------------------------
+    # Stage 3: note corpus + note labels (SLOW — stream noteevents.csv)
+    # ------------------------------------------------------------------
+    note_corpus, note_labels, note_present_hadm_ids, all_cohort = _build_or_reuse_note_artifacts(
         noteevents_csv_path=noteevents_csv_path,
-        all_hadm_ids=all_cohort["hadm_id"],
-        chunksize=note_chunksize,
+        all_cohort=all_cohort,
+        reuse_dir=_reuse_dir,
+        stage_cache_dir=_stage_cache_dir,
+        route_settings=route_settings,
+        note_chunksize=note_chunksize,
+        pipeline_start=t_pipeline,
     )
-    feature_matrix, code_status_targets = build_chartevent_artifacts_from_csv(
+
+    # ------------------------------------------------------------------
+    # Stage 4: chartevents feature matrix + code status (SLOW — stream chartevents.csv)
+    # ------------------------------------------------------------------
+    feature_matrix, code_status_targets = _build_or_reuse_chartevent_artifacts(
         chartevents_csv_path=chartevents_csv_path,
         d_items=d_items,
-        all_hadm_ids=all_cohort["hadm_id"],
-        chunksize=chartevent_chunksize,
+        note_present_hadm_ids=note_present_hadm_ids,
+        reuse_dir=_reuse_dir,
+        stage_cache_dir=_stage_cache_dir,
+        route_settings=route_settings,
+        chartevent_chunksize=chartevent_chunksize,
+        pipeline_start=t_pipeline,
     )
+
+    t0 = time.time()
     acuity_scores = build_acuity_scores(
         materialized_views["oasis"],
         materialized_views["sapsii"],
     )
+    _log_stage(t0, t_pipeline, "Built acuity scores")
 
+    # ------------------------------------------------------------------
+    # Stage 5: mistrust model + downstream evaluation (recomputed always)
+    # ------------------------------------------------------------------
     model = EOLMistrustModel(repetitions=repetitions)
-    mistrust_scores = model.build_mistrust_scores(
+    mistrust_scores = _build_or_reuse_mistrust_scores(
+        model=model,
         feature_matrix=feature_matrix,
         note_labels=note_labels,
         note_corpus=note_corpus,
+        reuse_dir=_reuse_dir,
+        stage_cache_dir=_stage_cache_dir,
+        pipeline_start=t_pipeline,
+        autopsy_enabled=route_settings.autopsy_enabled,
     )
+    if not route_settings.autopsy_enabled:
+        mistrust_scores = _disable_autopsy_scores(mistrust_scores)
+        _write_stage_cache_frame(_stage_cache_dir, "mistrust_scores.csv", mistrust_scores)
+
+    t0 = time.time()
     final_model_table = build_final_model_table_from_code_status_targets(
         demographics=demographics,
         all_cohort=all_cohort,
@@ -162,9 +1888,13 @@ def build_eol_mistrust_outputs(
         code_status_targets=code_status_targets,
         mistrust_scores=mistrust_scores,
     )
+    _log_stage(t0, t_pipeline, f"Built final model table ({len(final_model_table)} rows)")
+
     validation["base_admissions_rows"] = int(len(base_admissions))
     validation["all_cohort_rows"] = int(len(all_cohort))
     validation["eol_cohort_rows"] = int(len(eol_cohort))
+    validation["downstream_estimator_mode"] = route_settings.downstream_estimator_mode
+    t0 = time.time()
     model_outputs = model.run(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
@@ -176,7 +1906,14 @@ def build_eol_mistrust_outputs(
         final_model_table=final_model_table,
         include_downstream_weight_summary=include_downstream_weight_summary,
         include_cdf_plot_data=include_cdf_plot_data,
+        precomputed_mistrust_scores=mistrust_scores,
+        score_columns=route_settings.score_columns,
+        feature_configurations=route_settings.feature_configurations,
+        downstream_estimator_factory_resolver=route_settings.downstream_estimator_factory_resolver,
     )
+    if not route_settings.autopsy_enabled:
+        model_outputs = _disable_autopsy_outputs(model_outputs)
+    _log_stage(t0, t_pipeline, "Finished model.run() (downstream evaluation)")
 
     artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]] = {
         "validation_summary": validation,
@@ -195,6 +1932,7 @@ def build_eol_mistrust_outputs(
     artifacts.update(model_outputs)
 
     if output_dir is not None:
+        t0 = time.time()
         write_minimal_deliverables(
             {
                 "base_admissions": base_admissions,
@@ -209,13 +1947,43 @@ def build_eol_mistrust_outputs(
             },
             output_dir=output_dir,
         )
+        _log_stage(t0, t_pipeline, "Wrote deliverables + reuse cache to disk")
 
+    t0 = time.time()
+    comparison_outputs = build_paper_comparison_outputs(
+        artifacts,
+        repetitions=repetitions,
+    )
+    artifacts["paper_comparison"] = comparison_outputs
+    if output_dir is not None:
+        write_paper_comparison_artifacts(
+            comparison_outputs,
+            output_dir=output_dir / "paper_comparison",
+            include_summary=compare_to_paper,
+        )
+    _log_stage(t0, t_pipeline, "Built & wrote paper table artifacts")
+
+    _log_stage(t_pipeline, t_pipeline, "=== Pipeline complete ===")
     return artifacts
 
 
 def run_task_demo(root: Path, config_path: Path) -> None:
     """Build a PyHealth sample dataset with the custom EOL mistrust YAML config."""
 
+    global MIMIC3Dataset
+    global EOLMistrustMortalityPredictionMIMIC3
+
+    if MIMIC3Dataset is None:
+        from pyhealth.datasets import MIMIC3Dataset as _MIMIC3Dataset
+
+        MIMIC3Dataset = _MIMIC3Dataset
+    if EOLMistrustMortalityPredictionMIMIC3 is None:
+        from pyhealth.tasks.eol_mistrust import (
+            EOLMistrustMortalityPredictionMIMIC3 as _EOLMistrustMortalityPredictionMIMIC3,
+        )
+
+        EOLMistrustMortalityPredictionMIMIC3 = _EOLMistrustMortalityPredictionMIMIC3
+
     base_dataset = MIMIC3Dataset(
         root=str(root),
         tables=["chartevents", "noteevents", "d_items"],
@@ -249,7 +2017,34 @@ def parse_args() -> argparse.Namespace:
         "--output-dir",
         type=Path,
         default=None,
-        help="Optional directory for writing the required CSV deliverables.",
+        help=(
+            "Optional directory for writing the required CSV deliverables. "
+            "When omitted, the script writes them under "
+            "result_root/EOL_<route>_<timestamp>/result."
+        ),
+    )
+    parser.add_argument(
+        "--stream-cache-dir",
+        type=Path,
+        default=None,
+        help=(
+            "Optional base directory for streamed-stage reuse CSVs. "
+            "When set, note/chartevent checkpoints are written under "
+            "stream_cache_dir/{default|paper_like} as soon as each stage finishes. "
+            "When omitted, the script writes them under "
+            "result_root/EOL_<route>_<timestamp>/cache/{default|paper_like}."
+        ),
+    )
+    parser.add_argument(
+        "--result-root",
+        type=Path,
+        default=DEFAULT_RESULT_ROOT,
+        help=(
+            "Managed run archive root. Each invocation creates "
+            "result_root/EOL_(normal|Paperlike)_<timestamp> with run summaries, "
+            "runtime metadata, and default result/cache directories when explicit "
+            "paths are not provided."
+        ),
     )
     parser.add_argument(
         "--repetitions",
@@ -267,6 +2062,15 @@ def parse_args() -> argparse.Namespace:
         action="store_true",
         help="Also build empirical CDF data for race-based and trust-based treatment plots.",
     )
+    parser.add_argument(
+        "--compare-to-paper",
+        action="store_true",
+        help=(
+            "Also write the human-readable paper comparison summary and print it. "
+            "Structured paper comparison CSV/JSON artifacts under output_dir/paper_comparison "
+            "are always generated."
+        ),
+    )
     parser.add_argument(
         "--task-demo",
         action="store_true",
@@ -284,20 +2088,73 @@ def parse_args() -> argparse.Namespace:
         default=500_000,
         help="Chunk size for streamed chartevents processing.",
     )
+    parser.add_argument(
+        "--reuse-intermediates",
+        type=Path,
+        default=None,
+        help=(
+            "Path to a previous output directory containing cached CSV artifacts "
+            "(note_corpus.csv, note_labels.csv, chartevent_feature_matrix.csv, "
+            "code_status_targets.csv).  This may point either directly to the cache "
+            "directory or to a base stream-cache dir containing mode subfolders. "
+            "When set, the expensive CSV streaming stages are skipped and those "
+            "frames are loaded from disk instead."
+        ),
+    )
+    parser.add_argument(
+        "--paper-like-dataset-prepare",
+        action="store_true",
+        help=(
+            "Use notebook-style data preparation for treatment totals and chartevent "
+            "feature extraction while keeping the default corrected pipeline available."
+        ),
+    )
     return parser.parse_args()
 
 
 def main() -> None:
     args = parse_args()
+    route_settings = _build_route_settings(args.paper_like_dataset_prepare)
+    result_root = getattr(args, "result_root", DEFAULT_RESULT_ROOT)
+    managed_run = _prepare_managed_run_directories(
+        result_root=result_root,
+        route_settings=route_settings,
+        output_dir=args.output_dir,
+        stream_cache_dir=args.stream_cache_dir,
+    )
+    resolved_output_dir = managed_run["output_dir"]
+    resolved_stream_cache_dir = managed_run["stream_cache_dir"]
+    run_dir = managed_run["run_dir"]
+    run_name = managed_run["run_name"]
+    started_at = datetime.now()
+    total_start = time.time()
 
     artifacts = build_eol_mistrust_outputs(
         root=args.root,
         repetitions=args.repetitions,
         include_downstream_weight_summary=args.include_downstream_weight_summary,
         include_cdf_plot_data=args.include_cdf_plot_data,
-        output_dir=args.output_dir,
+        compare_to_paper=args.compare_to_paper,
+        output_dir=resolved_output_dir,
+        stream_cache_dir=resolved_stream_cache_dir,
         note_chunksize=args.note_chunksize,
         chartevent_chunksize=args.chartevent_chunksize,
+        reuse_intermediates=args.reuse_intermediates,
+        paper_like_dataset_prepare=args.paper_like_dataset_prepare,
+    )
+    finished_at = datetime.now()
+    total_runtime_seconds = time.time() - total_start
+    _write_managed_run_artifacts(
+        run_name=str(run_name),
+        run_dir=Path(run_dir),
+        route_settings=route_settings,
+        args=args,
+        resolved_output_dir=Path(resolved_output_dir),
+        resolved_stream_cache_dir=Path(resolved_stream_cache_dir),
+        started_at=started_at,
+        finished_at=finished_at,
+        total_runtime_seconds=total_runtime_seconds,
+        artifacts=artifacts,
     )
 
     print("Validation summary:")
@@ -317,9 +2174,18 @@ def main() -> None:
         if isinstance(df, pd.DataFrame):
             print(f"  {key}: {df.shape}")
 
-    if args.output_dir is not None:
-        print()
-        print(f"Wrote required deliverables to: {args.output_dir}")
+    print()
+    print(f"Managed run archive: {run_dir}")
+    print(f"Wrote required deliverables to: {resolved_output_dir}")
+    print(f"Wrote paper comparison artifacts to: {resolved_output_dir / 'paper_comparison'}")
+    stream_cache_path = artifacts["validation_summary"].get("stream_cache_dir")
+    if stream_cache_path is not None:
+        print(f"Streamed-stage cache directory: {stream_cache_path}")
+
+    if args.compare_to_paper:
+        comparison_outputs = artifacts.get("paper_comparison")
+        if isinstance(comparison_outputs, dict):
+            _print_paper_comparison_summary(comparison_outputs)
 
     if args.task_demo:
         print()
diff --git a/pyhealth/datasets/eol_mistrust.py b/pyhealth/datasets/eol_mistrust.py
index f97209142..ca8c4dc11 100644
--- a/pyhealth/datasets/eol_mistrust.py
+++ b/pyhealth/datasets/eol_mistrust.py
@@ -1,112 +1,38 @@
-"""Utilities for reproducing the EOL mistrust preprocessing and modeling tables.
+"""Utilities for reproducing the EOL mistrust preprocessing tables.
 
 Notes
 -----
-This module uses a transformers+torch sentiment backend because those
-dependencies are already available in the project environment. That is a
-pragmatic replacement for the original Pattern-based notebook sentiment code,
-not an exact backend match.
+This module owns dataset preparation only:
+- cohort construction
+- note/chartevent feature and label extraction
+- treatment and acuity tables
+- final admission-level modeling table assembly
 """
+
 # pylint: disable=too-many-lines
 
-import importlib
+import importlib.util
 import re
 from collections import defaultdict
 from pathlib import Path
 from typing import Callable, Iterable, Mapping, Sequence
 
-import numpy as np
 import pandas as pd  # pylint: disable=import-error
 
 from pyhealth.tasks.eol_mistrust import (
+    CODE_STATUS_MODE_CORRECTED,
+    CODE_STATUS_MODE_PAPER_LIKE,
+    _advance_paper_like_code_status_label as _task_advance_paper_like_code_status_label,
+    _normalize_code_status_mode as _task_normalize_code_status_mode,
     build_code_status_target as _build_task_code_status_target,
     build_in_hospital_mortality_target as _build_task_in_hospital_mortality_target,
     build_left_ama_target as _build_task_left_ama_target,
+    is_positive_code_status_value as _task_is_positive_code_status_value,
+    map_ethnicity_to_race as _task_map_ethnicity_to_race,
+    map_insurance_to_group as _task_map_insurance_to_group,
+    prepare_note_text as _task_prepare_note_text,
 )
 
-_SENTIMENT_BACKEND: Callable[[str], tuple[float, float]] | None = None
-
-
-def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
-    """Load the project-standard transformers sentiment pipeline.
-
-    This intentionally uses an existing transformers+torch dependency instead of
-    trying to install the original Pattern backend from the notebooks.
-    """
-
-    transformers_module = importlib.import_module("transformers")
-    torch_module = importlib.import_module("torch")
-
-    pipeline_factory = getattr(transformers_module, "pipeline", None)
-    if not callable(pipeline_factory):
-        raise ModuleNotFoundError("transformers.pipeline is unavailable in the current environment.")
-
-    try:  # pragma: no cover - logging surface depends on transformers version
-        transformers_logging = importlib.import_module("transformers.utils.logging")
-        set_verbosity_error = getattr(transformers_logging, "set_verbosity_error", None)
-        if callable(set_verbosity_error):
-            set_verbosity_error()
-    except Exception:
-        pass
-
-    use_cuda = bool(getattr(torch_module, "cuda", None) and torch_module.cuda.is_available())
-    device = 0 if use_cuda else -1
-    classifier = pipeline_factory(
-        "sentiment-analysis",
-        model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
-        device=device,
-    )
-
-    def _transformers_sentiment(text: str) -> tuple[float, float]:
-        cleaned = " ".join(str(text).split())
-        if not cleaned:
-            return (0.0, 0.0)
-        result = classifier(cleaned[:2048], truncation=True)[0]
-        label = str(result.get("label", "")).upper()
-        score = float(result.get("score", 0.0))
-        polarity = score if "POS" in label else -score
-        return (polarity, 0.0)
-
-    return _transformers_sentiment
-
-
-def _default_sentiment_backend(text: str) -> tuple[float, float]:
-    """Resolve and cache the default transformers sentiment backend lazily."""
-
-    global _SENTIMENT_BACKEND
-    if _SENTIMENT_BACKEND is None:
-        _SENTIMENT_BACKEND = _load_transformers_sentiment()
-    return _SENTIMENT_BACKEND(text)
-
-
-pattern_sentiment = _default_sentiment_backend
-
-try:
-    from sklearn.linear_model import LogisticRegression  # pylint: disable=import-error
-except ModuleNotFoundError:  # pragma: no cover - lightweight test env fallback
-    class LogisticRegression:  # type: ignore[no-redef]
-        """Fallback estimator that preserves the expected interface in test envs."""
-
-        def __init__(self, *args, **kwargs):
-            self.args = args
-            self.kwargs = kwargs
-
-        def fit(self, features, labels):
-            """Raise when scikit-learn is unavailable for model fitting."""
-
-            del features, labels
-            raise ModuleNotFoundError(
-                "scikit-learn is required for the default logistic regression estimator."
-            )
-
-        def predict_proba(self, features):
-            """Raise when scikit-learn is unavailable for probability scoring."""
-
-            del features
-            raise ModuleNotFoundError(
-                "scikit-learn is required for the default logistic regression estimator."
-            )
-
 
 RACE_WHITE = "WHITE"
 RACE_BLACK = "BLACK"
@@ -158,8 +84,11 @@ def predict_proba(self, features):
     "pain_assessment_method",
     "pain_level",
     "pain_management",
+    "pain_present",
     "reason_for_restraint",
     "restraint_device",
+    "restraint_type",
+    "restraints_evaluated",
     "richmond_ras_scale",
     "riker_sas_scale",
     "safety_measures",
@@ -168,6 +97,7 @@ def predict_proba(self, features):
     "sitter",
     "skin_care",
     "social_work_consult",
+    "spokesperson_healthcare_proxy",
     "spiritual_support",
     "stress",
     "support_systems",
@@ -177,6 +107,65 @@ def predict_proba(self, features):
     "wrist_restraints",
 }
 
+PAPER_LIKE_RELEVANT_LABELS = (
+    "Family Communication",
+    "Follows Commands",
+    "Education Barrier",
+    "Education Learner",
+    "Education Method",
+    "Education Readiness",
+    "Education Topic #1",
+    "Education Topic #2",
+    "Pain",
+    "Pain Level",
+    "Pain Level (Rest)",
+    "Pain Assess Method",
+    "Restraint",
+    "Restraint Type",
+    "Restraint (Non-violent)",
+    "Restraint Ordered (Non-violent)",
+    "Restraint Location",
+    "Reason For Restraint",
+    "Spiritual Support",
+    "Support Systems",
+    "State",
+    "Behavior",
+    "Behavioral State",
+    "Stress",
+    "Safety",
+    "Safety Measures_U_1",
+    "Family",
+    "Patient/Family Informed",
+    "Pt./Family Informed",
+    "Health Care Proxy",
+    "BATH",
+    "bath",
+    "Bath",
+    "Bed Bath",
+    "Bedbath",
+    "CHG Bath",
+    "Skin Care",
+    "Judgement",
+    "Family Meeting held",
+    "Emotional / physical / sexual harm by partner or close relation",
+    "Verbal Response",
+    "Side Rails",
+    "Orientation",
+    "RSBI Deferred",
+    "Richmond-RAS Scale",
+    "Riker-SAS Scale",
+    "Status and Comfort",
+    "Teaching directed toward",
+    "Consults",
+    "Social work consult",
+    "Sitter",
+    "security",
+    "safety",
+    "headache",
+    "hairwashed",
+    "observer",
+)
+
 CODE_STATUS_ITEMIDS = {128, 223758}
 
 REQUIRED_RAW_TABLE_COLUMNS = {
@@ -199,8 +188,20 @@ def predict_proba(self, features):
 }
 
 REQUIRED_MATERIALIZED_VIEW_COLUMNS = {
-    "ventdurations": ["icustay_id", "ventnum", "starttime", "endtime", "duration_hours"],
-    "vasopressordurations": ["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"],
+    "ventdurations": [
+        "icustay_id",
+        "ventnum",
+        "starttime",
+        "endtime",
+        "duration_hours",
+    ],
+    "vasopressordurations": [
+        "icustay_id",
+        "vasonum",
+        "starttime",
+        "endtime",
+        "duration_hours",
+    ],
     "oasis": ["hadm_id", "icustay_id", "oasis"],
     "sapsii": ["hadm_id", "icustay_id", "sapsii"],
 }
@@ -213,21 +214,35 @@ def predict_proba(self, features):
 }
 
 NONCOMPLIANCE_PATTERN = re.compile(r"\bnoncompliant\b", re.IGNORECASE)
-AUTOPSY_CONSENT_PATTERNS = (
-    re.compile(r"\bconsent(?: for)? autopsy\b", re.IGNORECASE),
-    re.compile(r"\bautopsy consent\b", re.IGNORECASE),
-    re.compile(r"\bconsented to autopsy\b", re.IGNORECASE),
-    re.compile(r"\bautopsy was performed\b", re.IGNORECASE),
-    re.compile(r"\bautopsy obtained\b", re.IGNORECASE),
-    re.compile(r"\bfamily provided autopsy consent\b", re.IGNORECASE),
+_AUTOPSY_CONSENT_KEYWORDS = ("consent", "agree", "request")
+_AUTOPSY_DECLINE_KEYWORDS = ("decline", "not consent", "refuse", "denied")
+_AUTOPSY_CORRECTED_DECLINE_PHRASES = (
+    "no autopsy",
+    "not perform an autopsy",
+    "not perform autopsy",
+    "decision to not perform an autopsy",
+    "decision made to not perform an autopsy",
+    "do not want an autopsy",
+    "did not want an autopsy",
+    "not want an autopsy",
+    "declining autopsy",
+    "declining an autopsy",
 )
-AUTOPSY_DECLINE_PATTERNS = (
-    re.compile(r"\bautopsy declined\b", re.IGNORECASE),
-    re.compile(r"\bdeclined autopsy\b", re.IGNORECASE),
-    re.compile(r"\bno autopsy\b", re.IGNORECASE),
-    re.compile(r"\bfamily declined autopsy\b", re.IGNORECASE),
+_AUTOPSY_CORRECTED_CONSENT_PHRASES = (
+    "autopsy permission was obtained",
+    "permission for autopsy",
+    "permission obtained for autopsy",
 )
-DEFAULT_LOGISTIC_C = 0.1
+_AUTOPSY_SEGMENT_SPLIT_PATTERN = re.compile(r"[\n.;]+")
+_AUTOPSY_CORRECTED_DECLINE_PATTERN = re.compile(
+    r"(?:\b(?:declin\w*|refus\w*|deni\w*|not\s+consent(?:ed)?)\b(?:\W+\w+){0,5}\W+\bautopsy\b)"
+    r"|(?:\bautopsy\b(?:\W+\w+){0,5}\W+\b(?:declin\w*|refus\w*|deni\w*|not\s+consent(?:ed)?)\b)",
+    re.IGNORECASE,
+)
+_AUTOPSY_STUB_SEGMENT_PATTERN = re.compile(r"^(?:an?\s+)?autopsy\b", re.IGNORECASE)
+AUTOPSY_LABEL_MODE_CORRECTED = "corrected"
+AUTOPSY_LABEL_MODE_PAPER_LIKE = "paper_like"
+_EOL_MISTRUST_MODEL_MODULE = None
 
 
 def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
@@ -237,6 +252,24 @@ def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) ->
         raise ValueError(f"{df_name} is missing required columns: {missing_str}")
 
 
+def _load_eol_mistrust_model_module():
+    global _EOL_MISTRUST_MODEL_MODULE
+    if _EOL_MISTRUST_MODEL_MODULE is None:
+        module_path = Path(__file__).resolve().parents[1] / "models" / "eol_mistrust.py"
+        spec = importlib.util.spec_from_file_location(
+            "pyhealth.models.eol_mistrust_dataset_compat",
+            module_path,
+        )
+        module = importlib.util.module_from_spec(spec)
+        if spec is None or spec.loader is None:
+            raise ImportError(
+                "Unable to load pyhealth.models.eol_mistrust compatibility module."
+            )
+        spec.loader.exec_module(module)
+        _EOL_MISTRUST_MODEL_MODULE = module
+    return _EOL_MISTRUST_MODEL_MODULE
+
+
 def _filter_non_error_notes(noteevents: pd.DataFrame) -> pd.DataFrame:
     """Keep notes where iserror is NULL or not equal to 1."""
 
@@ -245,23 +278,6 @@ def _filter_non_error_notes(noteevents: pd.DataFrame) -> pd.DataFrame:
     return noteevents.loc[keep_mask].copy()
 
 
-def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
-    """Validate predict_proba output and return the positive-class column."""
-
-    probability_array = np.asarray(probabilities, dtype=float)
-    if probability_array.ndim != 2 or probability_array.shape[1] < 2:
-        raise ValueError(
-            "Estimator `predict_proba` output must have shape (n_samples, n_classes>=2)."
-        )
-    return probability_array[:, 1]
-
-
-def _score_column_name(label_column: str) -> str:
-    if label_column.endswith("_label"):
-        return f"{label_column[:-6]}_score"
-    return f"{label_column}_score"
-
-
 def _normalize_note_categories(categories: Iterable[str] | None) -> set[str] | None:
     if categories is None:
         return None
@@ -290,17 +306,163 @@ def _classify_noncompliance(text: str) -> int:
     return int(bool(NONCOMPLIANCE_PATTERN.search(text)))
 
 
-def _classify_autopsy(text: str) -> int:
-    if "autopsy" not in text:
-        return 0
+def _normalize_autopsy_label_mode(mode: str | None) -> str:
+    normalized = (
+        AUTOPSY_LABEL_MODE_CORRECTED if mode is None else str(mode).strip().lower()
+    )
+    if normalized not in {AUTOPSY_LABEL_MODE_CORRECTED, AUTOPSY_LABEL_MODE_PAPER_LIKE}:
+        raise ValueError(
+            "autopsy_label_mode must be one of "
+            f"{AUTOPSY_LABEL_MODE_CORRECTED!r} or {AUTOPSY_LABEL_MODE_PAPER_LIKE!r}"
+        )
+    return normalized
+
+
+def _classify_autopsy_lines_paper_like(lines: Iterable[str]) -> float:
+    """Line-level autopsy classification matching the reference notebook.
+
+    Each line is checked independently: if a line contains 'autopsy',
+    look for consent keywords (consent, agree, request) and decline
+    keywords (decline, not consent, refuse, denied) on that same line.
+
+    Returns
+    -------
+    float
+        1.0 = consent (proxy positive / mistrust)
+        0.0 = decline (proxy negative / trust)
+        NaN  = autopsy not mentioned, or ambiguous (both consent and decline)
+
+    Parameters
+    ----------
+    lines : Iterable[str]
+        Pre-lowered text lines (may come from one note or many).
+    """
+    consented = False
+    declined = False
+    for line in lines:
+        if "autopsy" not in line:
+            continue
+        for kw in _AUTOPSY_DECLINE_KEYWORDS:
+            if kw in line:
+                declined = True
+        for kw in _AUTOPSY_CONSENT_KEYWORDS:
+            if kw in line:
+                consented = True
+    if not consented and not declined:
+        return float("nan")
+    if consented and declined:
+        return float("nan")
+    if consented:
+        return 1.0
+    return 0.0
+
+
+def _classify_autopsy_lines_corrected(lines: Iterable[str]) -> float:
+    """Line-level autopsy classification with a few explicit negative phrases.
+
+    This keeps the notebook's overall structure but recognizes common
+    negative phrasings such as ``no autopsy`` that the original notebook
+    left unlabeled.
+    """
+
+    strong_consented = False
+    weak_requested = False
+    declined = False
+    for line in lines:
+        segments = [
+            segment.strip()
+            for segment in _AUTOPSY_SEGMENT_SPLIT_PATTERN.split(line)
+            if segment.strip()
+        ]
+        for idx, segment in enumerate(segments):
+            normalized_segment = segment
+            if "autopsy" not in normalized_segment:
+                continue
+
+            candidate_segments = [normalized_segment]
+            is_autopsy_stub = bool(
+                _AUTOPSY_STUB_SEGMENT_PATTERN.fullmatch(normalized_segment)
+            )
+            if is_autopsy_stub and idx > 0 and "autopsy" not in segments[idx - 1]:
+                candidate_segments.append(f"{segments[idx - 1]} {normalized_segment}")
+            if (
+                is_autopsy_stub
+                and idx + 1 < len(segments)
+                and "autopsy" not in segments[idx + 1]
+                and (
+                    "request" in segments[idx + 1]
+                    or "consent" in segments[idx + 1]
+                    or "agree" in segments[idx + 1]
+                    or any(
+                        phrase in segments[idx + 1]
+                        for phrase in _AUTOPSY_CORRECTED_CONSENT_PHRASES
+                    )
+                )
+            ):
+                candidate_segments.append(f"{normalized_segment} {segments[idx + 1]}")
+
+            segment_declined = False
+            segment_has_request = False
+            segment_has_strong_consent = False
+            for candidate_segment in candidate_segments:
+                segment_declined = (
+                    segment_declined
+                    or bool(
+                        _AUTOPSY_CORRECTED_DECLINE_PATTERN.search(candidate_segment)
+                    )
+                    or any(
+                        phrase in candidate_segment
+                        for phrase in _AUTOPSY_CORRECTED_DECLINE_PHRASES
+                    )
+                )
+                segment_has_request = segment_has_request or (
+                    "request" in candidate_segment
+                )
+                segment_has_strong_consent = segment_has_strong_consent or (
+                    ("consent" in candidate_segment)
+                    or ("agree" in candidate_segment)
+                    or any(
+                        phrase in candidate_segment
+                        for phrase in _AUTOPSY_CORRECTED_CONSENT_PHRASES
+                    )
+                )
+
+            if segment_declined:
+                declined = True
+            # Treat explicit negative phrasing as stronger than generic request
+            # wording. This keeps "request for autopsy was declined" negative
+            # while still allowing clear consent/agreement phrases to remain
+            # genuinely ambiguous if both positive and negative evidence appear.
+            if segment_has_strong_consent and not segment_declined:
+                strong_consented = True
+            elif segment_has_request and not segment_declined:
+                weak_requested = True
+
+    if strong_consented and declined:
+        return float("nan")
+    if strong_consented:
+        return 1.0
+    if declined:
+        return 0.0
+    if weak_requested:
+        return 1.0
+    return float("nan")
+
 
-    has_decline = any(pattern.search(text) for pattern in AUTOPSY_DECLINE_PATTERNS)
-    has_consent = any(pattern.search(text) for pattern in AUTOPSY_CONSENT_PATTERNS)
-    return int(has_consent and not has_decline)
+def _classify_autopsy_lines(
+    lines: Iterable[str],
+    *,
+    mode: str = AUTOPSY_LABEL_MODE_CORRECTED,
+) -> float:
+    normalized_mode = _normalize_autopsy_label_mode(mode)
+    if normalized_mode == AUTOPSY_LABEL_MODE_PAPER_LIKE:
+        return _classify_autopsy_lines_paper_like(lines)
+    return _classify_autopsy_lines_corrected(lines)
 
 
 def _to_datetime(series: pd.Series) -> pd.Series:
-    return pd.to_datetime(series, errors="coerce")
+    parsed = pd.to_datetime(series, errors="coerce", utc=True)
+    return parsed.dt.tz_localize(None)
 
 
 def _normalize_hadm_ids(all_hadm_ids: Iterable[int] | None) -> list[int] | None:
@@ -358,7 +520,9 @@ def _calculate_age_years(admittime: pd.Series, dob: pd.Series) -> pd.Series:
         if pd.isna(admit) or pd.isna(birth):
             ages.append(float("nan"))
             continue
-        age = (admit.to_pydatetime() - birth.to_pydatetime()).total_seconds() / seconds_per_year
+        age = (
+            admit.to_pydatetime() - birth.to_pydatetime()
+        ).total_seconds() / seconds_per_year
         ages.append(90.0 if age > 200 else float(age))
     return pd.Series(ages, index=admittime.index, dtype=float)
 
@@ -379,7 +543,337 @@ def _clean_feature_text(value) -> str:
 
     if value is None or (isinstance(value, float) and pd.isna(value)):
         return ""
-    return re.sub(r"\s+", " ", str(value).strip())
+    cleaned = re.sub(r"\s+", " ", str(value).strip())
+    cleaned = re.sub(r"^[^A-Za-z0-9]+", "", cleaned)
+    cleaned = re.sub(r"[^A-Za-z0-9]+$", "", cleaned)
+    return cleaned.strip()
+
+
+def _normalize_label_match_text(value) -> str:
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return ""
+    cleaned = str(value).strip().lower().replace("_", " ")
+    cleaned = re.sub(r"\s+", " ", cleaned)
+    return cleaned.strip()
+
+
+def _normalize_paper_like_value(value) -> str:
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return "none"
+    cleaned = re.sub(r"\s+", " ", str(value).strip().lower())
+    return cleaned if cleaned else "none"
+
+
+def _feature_text_display_score(value: str) -> tuple:
+    cleaned = _clean_feature_text(value)
+    if cleaned == "":
+        return (-1, -1, -1, -1, -1, -1, "")
+
+    has_alpha = any(char.isalpha() for char in cleaned)
+    is_all_upper = has_alpha and cleaned.upper() == cleaned
+    is_all_lower = has_alpha and cleaned.lower() == cleaned
+    is_title_like = has_alpha and cleaned == cleaned.title()
+    alpha_count = sum(char.isalpha() for char in cleaned)
+    digit_count = sum(char.isdigit() for char in cleaned)
+    punctuation_count = sum(
+        (not char.isalnum()) and (not char.isspace()) for char in cleaned
+    )
+    return (
+        int(is_title_like),
+        int(not is_all_upper),
+        int(not is_all_lower),
+        alpha_count,
+        -digit_count,
+        -punctuation_count,
+        -len(cleaned),
+        cleaned.lower(),
+    )
+
+
+def _choose_preferred_feature_text(values: Iterable[str]) -> str:
+    best = ""
+    best_score = _feature_text_display_score("")
+    for value in values:
+        cleaned = _clean_feature_text(value)
+        if cleaned == "":
+            continue
+        score = _feature_text_display_score(cleaned)
+        if score > best_score:
+            best = cleaned
+            best_score = score
+    return best
+
+
+def _build_feature_label_metadata(
+    items: pd.DataFrame,
+) -> tuple[dict[int, str], dict[str, str]]:
+    working = items.copy()
+    working["normalized_label"] = working["label"].map(_normalize_token)
+    working["display_label"] = working["label"].map(_clean_feature_text)
+
+    label_display_lookup = (
+        working.loc[
+            working["display_label"] != "", ["normalized_label", "display_label"]
+        ]
+        .drop_duplicates()
+        .groupby("normalized_label", sort=True)["display_label"]
+        .agg(lambda series: _choose_preferred_feature_text(series.tolist()))
+        .to_dict()
+    )
+    item_label_lookup = (
+        working[["itemid", "normalized_label"]]
+        .drop_duplicates("itemid")
+        .set_index("itemid")["normalized_label"]
+        .to_dict()
+    )
+    return item_label_lookup, label_display_lookup
+
+
+_FEATURE_VALUE_MEASUREMENT_SUFFIXES = (
+    "ppm",
+    "mmhg",
+    "kg",
+    "kgs",
+    "lb",
+    "lbs",
+    "cm",
+    "mm",
+    "ml",
+    "cc",
+    "mcg",
+    "mg",
+    "meq",
+)
+
+
+def _is_numeric_heavy_or_freeform_feature_value(
+    normalized_value: str,
+    display_value: str,
+) -> bool:
+    cleaned = _clean_feature_text(display_value)
+    if cleaned == "":
+        return True
+    if len(cleaned) > 64 or len(cleaned.split()) > 10:
+        return True
+
+    normalized = str(normalized_value).strip("_")
+    alpha_tokens = re.findall(r"[a-z]+", normalized)
+    if not alpha_tokens:
+        return True
+
+    measurement_stripped = normalized
+    for suffix in _FEATURE_VALUE_MEASUREMENT_SUFFIXES:
+        if measurement_stripped.endswith(suffix):
+            measurement_stripped = measurement_stripped[: -len(suffix)]
+            break
+    measurement_stripped = measurement_stripped.strip("_")
+    if measurement_stripped and re.fullmatch(
+        r"[-+]?\d+(?:_\d+)*", measurement_stripped
+    ):
+        return True
+
+    digit_count = sum(char.isdigit() for char in cleaned)
+    alpha_length = sum(len(token) for token in alpha_tokens)
+    if digit_count >= 2 and alpha_length <= 3:
+        return True
+    return False
+
+
+def _feature_display_name(
+    normalized_label: str,
+    normalized_value: str,
+    label_display_lookup: Mapping[str, str],
+    value_display_lookup: Mapping[tuple[str, str], str],
+) -> str:
+    display_label = label_display_lookup.get(
+        normalized_label, _clean_feature_text(normalized_label)
+    )
+    display_value = value_display_lookup.get(
+        (normalized_label, normalized_value),
+        _clean_feature_text(normalized_value),
+    )
+    return f"{display_label}: {display_value}"
+
+
+def _paper_like_feature_display_name(label: str, value: str) -> str:
+    return f"{label}: {value}"
+
+
+def _matches_paper_like_label(
+    label: str,
+    allowed_labels: Iterable[str] | None = None,
+) -> bool:
+    normalized_label = _normalize_label_match_text(label)
+    if normalized_label == "":
+        return False
+    patterns = (
+        [_normalize_label_match_text(item) for item in allowed_labels]
+        if allowed_labels is not None
+        else [_normalize_label_match_text(item) for item in PAPER_LIKE_RELEVANT_LABELS]
+    )
+    patterns = [pattern for pattern in patterns if pattern]
+    return any(pattern in normalized_label for pattern in patterns)
+
+
+def _paper_like_feature_pair(label: str, value) -> tuple[str, str] | None:
+    normalized_label = _normalize_label_match_text(label)
+    if normalized_label == "":
+        return None
+    normalized_value = _normalize_paper_like_value(value)
+
+    if "reason for restraint" in normalized_label:
+        if normalized_value in {"not applicable", "none"}:
+            normalized_value = "none"
+        elif ("threat" in normalized_value) or ("acute risk of" in normalized_value):
+            normalized_value = "threat of harm"
+        elif (
+            ("confusion" in normalized_value)
+            or ("delirium" in normalized_value)
+            or (normalized_value == "impaired judgment")
+            or (normalized_value == "sundowning")
+        ):
+            normalized_value = "confusion/delirium"
+        elif (
+            ("occurence" in normalized_value)
+            or (normalized_value == "severe physical agitation")
+            or (normalized_value == "violent/self des")
+        ):
+            normalized_value = "prescence of violence"
+        elif normalized_value in {
+            "ext/txinterfere",
+            "protection of lines and tubes",
+            "treatment interference",
+        }:
+            normalized_value = "treatment interference"
+        elif "risk for fall" in normalized_value:
+            normalized_value = "risk for falls"
+        return ("reason for restraint", normalized_value)
+
+    if "restraint location" in normalized_label:
+        if normalized_value == "none":
+            normalized_value = "none"
+        elif "4 point rest" in normalized_value:
+            normalized_value = "4 point restraint"
+        else:
+            normalized_value = "some restraint"
+        return ("restraint location", normalized_value)
+
+    if "restraint device" in normalized_label:
+        if "sitter" in normalized_value:
+            normalized_value = "sitter"
+        elif "limb" in normalized_value:
+            normalized_value = "limb"
+        return ("restraint device", normalized_value)
+
+    if "bath" in normalized_label:
+        if "part" in normalized_label:
+            normalized_value = "partial"
+        elif "self" in normalized_value:
+            normalized_value = "self"
+        elif "refused" in normalized_value:
+            normalized_value = "refused"
+        elif "shave" in normalized_value:
+            normalized_value = "shave"
+        elif "hair" in normalized_value:
+            normalized_value = "hair"
+        elif "none" in normalized_value:
+            normalized_value = "none"
+        else:
+            normalized_value = "done"
+        return ("bath", normalized_value)
+
+    if normalized_label in {"behavior", "behavioral state"}:
+        return None
+
+    if normalized_label.startswith("pain level"):
+        return ("pain level", normalized_value)
+
+    if normalized_label.startswith(
+        ("pain management", "pain type", "pain cause", "pain location")
+    ):
+        return None
+
+    if normalized_label.startswith("education topic"):
+        return ("education topic", normalized_value)
+
+    if normalized_label.startswith("safety measures"):
+        return ("safety measures", normalized_value)
+
+    if normalized_label.startswith("side rails"):
+        return ("side rails", normalized_value)
+
+    if normalized_label.startswith("status and comfort"):
+        return ("status and comfort", normalized_value)
+
+    if "informed" in normalized_label:
+        return ("informed", normalized_value)
+
+    return (normalized_label, normalized_value)
+
+
+def _filter_chartevent_items(
+    d_items: pd.DataFrame,
+    allowed_labels: Iterable[str] | None = None,
+    *,
+    paper_like: bool = False,
+) -> pd.DataFrame:
+    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
+    items = d_items.copy()
+    items["normalized_label"] = items["label"].map(_normalize_token)
+    if paper_like:
+        mask = items["label"].map(
+            lambda label: _matches_paper_like_label(
+                label, allowed_labels=allowed_labels
+            )
+        )
+        items = items.loc[mask].copy()
+    elif allowed_labels is not None:
+        allowed = {_normalize_token(label) for label in allowed_labels}
+        items = items.loc[items["normalized_label"].isin(allowed)].copy()
+    else:
+        allowed_itemids = identify_table2_itemids(items)
+        items = items.loc[items["itemid"].isin(allowed_itemids)].copy()
+
+    items["itemid"] = pd.to_numeric(items["itemid"], errors="coerce")
+    items = items.dropna(subset=["itemid"]).copy()
+    items["itemid"] = items["itemid"].astype(int)
+    return items
+
+
+def _paper_like_feature_sets_from_rows(rows: pd.DataFrame) -> dict[str, set[int]]:
+    feature_to_hadm: dict[str, set[int]] = defaultdict(set)
+    if rows.empty:
+        return feature_to_hadm
+    unique_rows = rows[["hadm_id", "label", "value"]].drop_duplicates()
+    for row in unique_rows.itertuples(index=False):
+        feature_pair = _paper_like_feature_pair(
+            str(getattr(row, "label")), getattr(row, "value")
+        )
+        if feature_pair is None:
+            continue
+        feature_name = _paper_like_feature_display_name(*feature_pair)
+        feature_to_hadm[feature_name].add(int(getattr(row, "hadm_id")))
+    return feature_to_hadm
+
+
+def _binary_feature_matrix_from_feature_sets(
+    feature_to_hadm: Mapping[str, set[int]],
+    hadm_ids: Sequence[int],
+) -> pd.DataFrame:
+    feature_data: dict[str, object] = {"hadm_id": list(hadm_ids)}
+    hadm_index = pd.Index(hadm_ids)
+    for feature_name in sorted(feature_to_hadm, key=str.lower):
+        feature_data[feature_name] = hadm_index.isin(
+            feature_to_hadm[feature_name]
+        ).astype(int)
+    result = pd.DataFrame(feature_data)
+    if "hadm_id" not in result.columns:
+        result = pd.DataFrame(columns=["hadm_id"])
+    feature_cols = [col for col in result.columns if col != "hadm_id"]
+    if feature_cols:
+        result[feature_cols] = result[feature_cols].fillna(0).astype(int)
+    result = result.sort_values("hadm_id").drop_duplicates("hadm_id")
+    return result.reset_index(drop=True)
 
 
 def _matches_table2_concept(label: str) -> bool:
@@ -388,10 +882,7 @@ def _matches_table2_concept(label: str) -> bool:
     normalized_label = _normalize_token(label)
     if normalized_label == "":
         return False
-    return any(
-        (concept in normalized_label) or (normalized_label in concept)
-        for concept in TABLE2_LABELS
-    )
+    return any(concept in normalized_label for concept in TABLE2_LABELS)
 
 
 def _collect_required_join_keys(raw_tables: Mapping[str, pd.DataFrame]) -> set[str]:
@@ -467,50 +958,26 @@ def _validate_bridge_join(
 
 
 def map_ethnicity(ethnicity) -> str:
-    """Map raw MIMIC ethnicity strings to the paper's coarse race groups."""
-
-    text = str(ethnicity or "").upper()
-    if "BLACK" in text or "AFRICAN" in text:
-        return RACE_BLACK
-    if "WHITE" in text or "EUROPEAN" in text or "PORTUGUESE" in text:
-        return RACE_WHITE
-    if "ASIAN" in text:
-        return RACE_ASIAN
-    if "HISPANIC" in text or "LATINO" in text or "SOUTH AMERICAN" in text:
-        return RACE_HISPANIC
-    if (
-        "NATIVE" in text
-        or "AMERICAN INDIAN" in text
-        or "ALASKA NATIVE" in text
-    ):
-        return RACE_NATIVE_AMERICAN
-    return RACE_OTHER
+    """Dataset-facing alias of the task-owned race mapping helper."""
+
+    return _task_map_ethnicity_to_race(ethnicity)
 
 
 def map_insurance(insurance) -> str:
-    """Collapse raw MIMIC insurance values into the required three groups."""
+    """Dataset-facing alias of the task-owned insurance mapping helper."""
 
-    text = str(insurance or "").strip().lower()
-    normalized = re.sub(r"\s+", " ", text)
-    if normalized in {"medicare", "medicaid", "government", "public"}:
-        return INSURANCE_PUBLIC
-    if normalized in {"private"}:
-        return INSURANCE_PRIVATE
-    if normalized in {"self pay", "self-pay", "self_pay"}:
-        return INSURANCE_SELF_PAY
-    return INSURANCE_SELF_PAY
+    return _task_map_insurance_to_group(insurance)
 
 
 def prepare_note_text_for_sentiment(text) -> str:
-    """Normalize note text using whitespace tokenization and rejoining only."""
+    """Dataset-facing alias of the task-owned note normalization helper."""
 
-    if text is None or (isinstance(text, float) and pd.isna(text)):
-        return ""
-    tokens = str(text).split()
-    return " ".join(tokens)
+    return _task_prepare_note_text(text)
 
 
-def build_base_admissions(admissions: pd.DataFrame, patients: pd.DataFrame) -> pd.DataFrame:
+def build_base_admissions(
+    admissions: pd.DataFrame, patients: pd.DataFrame
+) -> pd.DataFrame:
     """Join admissions to patients and keep only rows with chart events available."""
 
     _require_columns(
@@ -547,8 +1014,18 @@ def build_base_admissions(admissions: pd.DataFrame, patients: pd.DataFrame) -> p
     return merged.reset_index(drop=True)
 
 
-def build_demographics_table(base_admissions: pd.DataFrame) -> pd.DataFrame:
-    """Derive race, age, LOS, and insurance-group fields for each admission."""
+def build_demographics_table(
+    base_admissions: pd.DataFrame,
+    *,
+    paper_like: bool = False,
+) -> pd.DataFrame:
+    """Derive race, age, LOS, and insurance-group fields for each admission.
+
+    When ``paper_like=True``, ``los_days`` mirrors the reference notebook's
+    modulo-24-hour representation (``timedelta.seconds / 3600``), while
+    ``los_hours`` remains the true total LOS in hours so cohort filters keep
+    using the cleaned duration semantics.
+    """
 
     _require_columns(
         base_admissions,
@@ -572,7 +1049,10 @@ def build_demographics_table(base_admissions: pd.DataFrame) -> pd.DataFrame:
 
     age_years = _calculate_age_years(df["admittime"], df["dob"])
     los_hours = (df["dischtime"] - df["admittime"]).dt.total_seconds() / 3600.0
-    los_days = los_hours / 24.0
+    if paper_like:
+        los_days = (df["dischtime"] - df["admittime"]).dt.seconds / 3600.0
+    else:
+        los_days = los_hours / 24.0
     insurance_group = df["insurance"].map(map_insurance)
 
     demographics = pd.DataFrame(
@@ -596,7 +1076,9 @@ def build_demographics_table(base_admissions: pd.DataFrame) -> pd.DataFrame:
     return demographics.reset_index(drop=True)
 
 
-def build_eol_cohort(base_admissions: pd.DataFrame, demographics: pd.DataFrame) -> pd.DataFrame:
+def build_eol_cohort(
+    base_admissions: pd.DataFrame, demographics: pd.DataFrame
+) -> pd.DataFrame:
     """Build the end-of-life cohort used for treatment-disparity analysis."""
 
     _require_columns(
@@ -615,9 +1097,11 @@ def build_eol_cohort(base_admissions: pd.DataFrame, demographics: pd.DataFrame)
     discharge_location = df["discharge_location"].fillna("").str.upper()
     is_deceased = df["hospital_expire_flag"].fillna(0).astype(int) == 1
     is_hospice = discharge_location.str.contains("HOSPICE", na=False)
-    is_snf = discharge_location.str.contains(r"SKILLED NURSING|\bSNF\b", na=False, regex=True)
+    is_snf = discharge_location.str.contains(
+        r"SKILLED NURSING|\bSNF\b", na=False, regex=True
+    )
 
-    include = (df["los_hours"] > 24) & (is_deceased | is_hospice | is_snf)
+    include = (df["los_hours"] >= 6) & (is_deceased | is_hospice | is_snf)
     df = df.loc[include].copy()
     df["discharge_category"] = "Skilled Nursing Facility"
     df.loc[is_hospice.loc[df.index], "discharge_category"] = "Hospice"
@@ -626,14 +1110,42 @@ def build_eol_cohort(base_admissions: pd.DataFrame, demographics: pd.DataFrame)
     return df.reset_index(drop=True)
 
 
-def build_all_cohort(base_admissions: pd.DataFrame, icustays: pd.DataFrame) -> pd.DataFrame:
-    """Build the admission-level cohort with at least one ICU stay."""
+def build_all_cohort(
+    base_admissions: pd.DataFrame, icustays: pd.DataFrame
+) -> pd.DataFrame:
+    """Build the adult admission-level cohort with at least 12 cumulative ICU hours."""
+
+    _require_columns(
+        base_admissions, ["hadm_id", "admittime", "dob"], "base_admissions"
+    )
+    _require_columns(
+        icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays"
+    )
 
-    _require_columns(base_admissions, ["hadm_id"], "base_admissions")
-    _require_columns(icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays")
+    base = base_admissions.copy()
+    base["admittime"] = _to_datetime(base["admittime"])
+    base["dob"] = _to_datetime(base["dob"])
+    adult_hadm_ids = set(
+        base.loc[_calculate_age_years(base["admittime"], base["dob"]) >= 18, "hadm_id"]
+        .dropna()
+        .tolist()
+    )
 
-    qualifying = icustays["hadm_id"].dropna().drop_duplicates()
-    df = base_admissions.loc[base_admissions["hadm_id"].isin(set(qualifying))].copy()
+    icu = icustays.copy()
+    icu["intime"] = _to_datetime(icu["intime"])
+    icu["outtime"] = _to_datetime(icu["outtime"])
+    icu["icu_hours"] = (icu["outtime"] - icu["intime"]).dt.total_seconds() / 3600.0
+    icu["hadm_id"] = pd.to_numeric(icu["hadm_id"], errors="coerce")
+    qualifying = set(
+        icu.loc[icu["icu_hours"].ge(0)]
+        .dropna(subset=["hadm_id"])
+        .groupby("hadm_id", sort=True)["icu_hours"]
+        .sum()
+        .loc[lambda totals: totals >= 12]
+        .index.astype(int)
+        .tolist()
+    )
+    df = base.loc[base["hadm_id"].isin(adult_hadm_ids & qualifying)].copy()
     df = df.sort_values("hadm_id").drop_duplicates("hadm_id")
     return df.reset_index(drop=True)
 
@@ -688,13 +1200,26 @@ def _duration_totals_by_hadm(
     if durations.empty:
         return pd.DataFrame(columns=["hadm_id", output_col])
 
-    bridge = icustays[["icustay_id", "hadm_id"]].drop_duplicates()
+    bridge_columns = ["icustay_id", "hadm_id", "intime", "outtime"]
+    bridge = icustays[bridge_columns].drop_duplicates()
     df = durations.copy()
     if "hadm_id" in df.columns:
         df = df.drop(columns=["hadm_id"])
     df["starttime"] = _to_datetime(df["starttime"])
     df["endtime"] = _to_datetime(df["endtime"])
     df = df.merge(bridge, on="icustay_id", how="inner", validate="many_to_one")
+    df["intime"] = _to_datetime(df["intime"])
+    df["outtime"] = _to_datetime(df["outtime"])
+    df = df.loc[
+        df["starttime"].notna()
+        & df["endtime"].notna()
+        & df["intime"].notna()
+        & df["outtime"].notna()
+        & df["starttime"].ge(df["intime"])
+        & df["endtime"].le(df["outtime"])
+    ].copy()
+    if df.empty:
+        return pd.DataFrame(columns=["hadm_id", output_col])
 
     totals = (
         df.groupby("hadm_id", sort=True)
@@ -709,10 +1234,18 @@ def build_treatment_totals(
     icustays: pd.DataFrame,
     ventdurations: pd.DataFrame,
     vasopressordurations: pd.DataFrame,
+    paper_like: bool = False,
 ) -> pd.DataFrame:
-    """Compute admission-level ventilation and vasopressor totals in minutes."""
+    """Compute admission-level ventilation and vasopressor totals in minutes.
+
+    ``paper_like`` is retained for API compatibility, but ICU-window filtering
+    now applies to both paths.
+    """
 
-    _require_columns(icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays")
+    _require_columns(
+        icustays, ["hadm_id", "icustay_id", "intime", "outtime"], "icustays"
+    )
+    del paper_like
 
     vent_totals = _duration_totals_by_hadm(
         ventdurations,
@@ -751,7 +1284,11 @@ def build_note_corpus(
 
     grouped = (
         notes.groupby("hadm_id", sort=True)["text"]
-        .apply(lambda series: prepare_note_text_for_sentiment(" ".join(t for t in series if t)))
+        .apply(
+            lambda series: prepare_note_text_for_sentiment(
+                " ".join(t for t in series if t)
+            )
+        )
         .reset_index(name="note_text")
     )
 
@@ -765,83 +1302,149 @@ def build_note_corpus(
 
 
 def _build_note_labels_from_corpus(note_corpus: pd.DataFrame) -> pd.DataFrame:
-    """Create the two note-derived labels from an admission-level note corpus."""
+    """Create the two note-derived labels from an admission-level note corpus.
+
+    Noncompliance uses the concatenated corpus text.  Autopsy labels are
+    set to NaN here; the caller (``build_note_labels``) overwrites them with
+    line-level results computed from raw noteevents before concatenation.
+    NaN means "autopsy not mentioned" and those rows are excluded from
+    proxy model training (but still scored).
+    """
 
     _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
     lowered = note_corpus["note_text"].fillna("").astype(str).str.lower()
     noncompliance = lowered.apply(_classify_noncompliance)
-    autopsy = lowered.apply(_classify_autopsy)
 
     labels = pd.DataFrame(
         {
             "hadm_id": note_corpus["hadm_id"],
             "noncompliance_label": noncompliance.astype(int),
-            "autopsy_label": autopsy.astype(int),
+            "autopsy_label": float("nan"),
         }
     )
     labels = labels.sort_values("hadm_id").drop_duplicates("hadm_id")
     return labels.reset_index(drop=True)
 
 
-def build_note_labels(
+def _build_autopsy_labels_from_raw_notes(
     noteevents: pd.DataFrame,
-    all_hadm_ids: Iterable[int] | None = None,
-    categories: Iterable[str] | None = None,
-) -> pd.DataFrame:
-    """Create admission-level noncompliance and autopsy labels from notes.
+    *,
+    autopsy_label_mode: str = AUTOPSY_LABEL_MODE_CORRECTED,
+) -> dict[int, int]:
+    """Compute admission-level autopsy labels from raw (pre-concatenation) notes.
 
-    By default labels are derived from all non-error notes. The optional
-    ``categories`` filter is provided for API symmetry, but the study pipeline
-    should typically leave it unset so labels continue to use all note types.
+    Mirrors the reference notebook: iterate each note's lines individually so
+    that consent/decline keywords are only matched on lines containing 'autopsy'.
     """
+    admission_lines: dict[int, list[str]] = defaultdict(list)
+    for hadm_id, text in zip(noteevents["hadm_id"], noteevents["text"]):
+        raw = (
+            str(text)
+            if text is not None and not (isinstance(text, float) and pd.isna(text))
+            else ""
+        )
+        for line in raw.lower().split("\n"):
+            admission_lines[int(hadm_id)].append(line)
+
+    return {
+        hadm_id: _classify_autopsy_lines(lines, mode=autopsy_label_mode)
+        for hadm_id, lines in admission_lines.items()
+    }
 
+
+def _build_note_labels_for_mode(
+    noteevents: pd.DataFrame,
+    *,
+    all_hadm_ids: Iterable[int] | None,
+    categories: Iterable[str] | None,
+    autopsy_label_mode: str,
+) -> pd.DataFrame:
     _require_columns(noteevents, ["hadm_id", "text", "iserror"], "noteevents")
+
+    filtered = _filter_non_error_notes(noteevents)
+    filtered = _filter_note_categories(filtered, categories=categories)
     corpus = build_note_corpus(
         noteevents,
         all_hadm_ids=all_hadm_ids,
         categories=categories,
     )
-    return _build_note_labels_from_corpus(corpus)
+    labels = _build_note_labels_from_corpus(corpus)
+
+    autopsy_map = _build_autopsy_labels_from_raw_notes(
+        filtered,
+        autopsy_label_mode=autopsy_label_mode,
+    )
+    labels["autopsy_label"] = labels["hadm_id"].map(autopsy_map)
+    return labels
 
 
-def build_note_artifacts_from_csv(
-    noteevents_csv_path: Path | str,
+def build_note_labels(
+    noteevents: pd.DataFrame,
     all_hadm_ids: Iterable[int] | None = None,
     categories: Iterable[str] | None = None,
-    corpus_categories: Iterable[str] | None = None,
-    label_categories: Iterable[str] | None = None,
-    chunksize: int = 100_000,
-) -> tuple[pd.DataFrame, pd.DataFrame]:
-    """Build the note corpus and note-derived labels from a large CSV in chunks.
+    autopsy_label_mode: str = AUTOPSY_LABEL_MODE_CORRECTED,
+) -> pd.DataFrame:
+    """Create admission-level noncompliance and autopsy labels from notes.
 
-    Parameters
-    ----------
-    categories:
-        Backward-compatible shared filter applied to both corpus and labels when
-        the more specific ``corpus_categories`` / ``label_categories`` are not
-        provided.
-    corpus_categories:
-        Category filter for the returned corpus. Use
-        ``["Discharge summary"]`` for sentiment features in the study workflow.
-    label_categories:
-        Category filter for label extraction. Leave as ``None`` in the study
-        workflow so noncompliance/autopsy labels continue to use all note types.
+    Normal Path
+        corrected autopsy labeling
+    Paper-like Path
+        notebook-faithful autopsy labeling
     """
 
-    normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
-    hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
+    normalized_mode = _normalize_autopsy_label_mode(autopsy_label_mode)
+    return _build_note_labels_for_mode(
+        noteevents,
+        all_hadm_ids=all_hadm_ids,
+        categories=categories,
+        autopsy_label_mode=normalized_mode,
+    )
+
+
+def _resolve_note_artifact_category_filters(
+    *,
+    categories: Iterable[str] | None,
+    corpus_categories: Iterable[str] | None,
+    label_categories: Iterable[str] | None,
+) -> tuple[set[str] | None, set[str] | None]:
     if corpus_categories is None:
         corpus_categories = categories
     if label_categories is None:
         label_categories = categories
+    return (
+        _normalize_note_categories(corpus_categories),
+        _normalize_note_categories(label_categories),
+    )
+
 
-    normalized_corpus_categories = _normalize_note_categories(corpus_categories)
-    normalized_label_categories = _normalize_note_categories(label_categories)
+def _build_note_artifacts_from_csv_for_mode(
+    noteevents_csv_path: Path | str,
+    *,
+    all_hadm_ids: Iterable[int] | None,
+    categories: Iterable[str] | None,
+    corpus_categories: Iterable[str] | None,
+    label_categories: Iterable[str] | None,
+    autopsy_label_mode: str,
+    chunksize: int,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
+    hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
+    normalized_corpus_categories, normalized_label_categories = (
+        _resolve_note_artifact_category_filters(
+            categories=categories,
+            corpus_categories=corpus_categories,
+            label_categories=label_categories,
+        )
+    )
 
     corpus_fragments: dict[int, list[str]] = defaultdict(list)
     label_fragments: dict[int, list[str]] = defaultdict(list)
+    autopsy_lines: dict[int, list[str]] = defaultdict(list)
     required_columns = ["hadm_id", "text", "iserror"]
-    if normalized_corpus_categories is not None or normalized_label_categories is not None:
+    if (
+        normalized_corpus_categories is not None
+        or normalized_label_categories is not None
+    ):
         required_columns.append("category")
 
     for chunk in _iter_csv_chunks(
@@ -862,27 +1465,41 @@ def build_note_artifacts_from_csv(
         if chunk.empty:
             continue
 
+        for hadm_id, raw_text in zip(chunk["hadm_id"], chunk["text"]):
+            raw = (
+                str(raw_text)
+                if raw_text is not None
+                and not (isinstance(raw_text, float) and pd.isna(raw_text))
+                else ""
+            )
+            for line in raw.lower().split("\n"):
+                stripped = line.strip()
+                if stripped:
+                    autopsy_lines[int(hadm_id)].append(stripped)
+
         chunk["text"] = chunk["text"].map(prepare_note_text_for_sentiment)
         chunk = chunk.loc[chunk["text"] != ""]
         if chunk.empty:
             continue
 
-        corpus_chunk = _filter_note_categories(chunk, categories=normalized_corpus_categories)
+        corpus_chunk = _filter_note_categories(
+            chunk, categories=normalized_corpus_categories
+        )
         if not corpus_chunk.empty:
-            grouped = (
-                corpus_chunk.groupby("hadm_id", sort=False)["text"]
-                .apply(lambda series: prepare_note_text_for_sentiment(" ".join(series)))
+            grouped = corpus_chunk.groupby("hadm_id", sort=False)["text"].apply(
+                lambda series: prepare_note_text_for_sentiment(" ".join(series))
             )
             for hadm_id, text in grouped.items():
                 if text:
                     corpus_fragments[int(hadm_id)].append(text)
 
-        label_chunk = _filter_note_categories(chunk, categories=normalized_label_categories)
+        label_chunk = _filter_note_categories(
+            chunk, categories=normalized_label_categories
+        )
         if label_chunk.empty:
             continue
-        grouped = (
-            label_chunk.groupby("hadm_id", sort=False)["text"]
-            .apply(lambda series: prepare_note_text_for_sentiment(" ".join(series)))
+        grouped = label_chunk.groupby("hadm_id", sort=False)["text"].apply(
+            lambda series: prepare_note_text_for_sentiment(" ".join(series))
         )
         for hadm_id, text in grouped.items():
             if text:
@@ -897,36 +1514,82 @@ def build_note_artifacts_from_csv(
         {
             "hadm_id": hadm_ids,
             "note_text": [
-                prepare_note_text_for_sentiment(" ".join(corpus_fragments.get(hadm_id, [])))
+                prepare_note_text_for_sentiment(
+                    " ".join(corpus_fragments.get(hadm_id, []))
+                )
                 for hadm_id in hadm_ids
             ],
         }
     )
-    corpus = corpus.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    corpus = (
+        corpus.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    )
     label_corpus = pd.DataFrame(
         {
             "hadm_id": hadm_ids,
             "note_text": [
-                prepare_note_text_for_sentiment(" ".join(label_fragments.get(hadm_id, [])))
+                prepare_note_text_for_sentiment(
+                    " ".join(label_fragments.get(hadm_id, []))
+                )
                 for hadm_id in hadm_ids
             ],
         }
     )
-    label_corpus = label_corpus.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    label_corpus = (
+        label_corpus.sort_values("hadm_id")
+        .drop_duplicates("hadm_id")
+        .reset_index(drop=True)
+    )
     labels = _build_note_labels_from_corpus(label_corpus)
+
+    autopsy_map = {
+        hadm_id: _classify_autopsy_lines(lines, mode=autopsy_label_mode)
+        for hadm_id, lines in autopsy_lines.items()
+    }
+    labels["autopsy_label"] = labels["hadm_id"].map(autopsy_map)
+
     return corpus, labels
 
 
-def build_note_corpus_from_csv(
+def build_note_artifacts_from_csv(
     noteevents_csv_path: Path | str,
     all_hadm_ids: Iterable[int] | None = None,
     categories: Iterable[str] | None = None,
+    corpus_categories: Iterable[str] | None = None,
+    label_categories: Iterable[str] | None = None,
+    autopsy_label_mode: str = AUTOPSY_LABEL_MODE_CORRECTED,
     chunksize: int = 100_000,
-) -> pd.DataFrame:
-    """Build the admission-level note corpus from a large CSV in chunks."""
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Build the note corpus and note-derived labels from a large CSV in chunks.
 
-    corpus, _ = build_note_artifacts_from_csv(
-        noteevents_csv_path=noteevents_csv_path,
+    Normal Path
+        corrected autopsy labeling
+    Paper-like Path
+        notebook-faithful autopsy labeling
+    """
+
+    normalized_mode = _normalize_autopsy_label_mode(autopsy_label_mode)
+    return _build_note_artifacts_from_csv_for_mode(
+        noteevents_csv_path,
+        all_hadm_ids=all_hadm_ids,
+        categories=categories,
+        corpus_categories=corpus_categories,
+        label_categories=label_categories,
+        autopsy_label_mode=normalized_mode,
+        chunksize=chunksize,
+    )
+
+
+def build_note_corpus_from_csv(
+    noteevents_csv_path: Path | str,
+    all_hadm_ids: Iterable[int] | None = None,
+    categories: Iterable[str] | None = None,
+    chunksize: int = 100_000,
+) -> pd.DataFrame:
+    """Build the admission-level note corpus from a large CSV in chunks."""
+
+    corpus, _ = build_note_artifacts_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_hadm_ids,
         corpus_categories=categories,
         chunksize=chunksize,
@@ -938,6 +1601,7 @@ def build_note_labels_from_csv(
     noteevents_csv_path: Path | str,
     all_hadm_ids: Iterable[int] | None = None,
     categories: Iterable[str] | None = None,
+    autopsy_label_mode: str = AUTOPSY_LABEL_MODE_CORRECTED,
     chunksize: int = 100_000,
 ) -> pd.DataFrame:
     """Build note-derived labels from a large CSV in chunks.
@@ -950,6 +1614,7 @@ def build_note_labels_from_csv(
         noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_hadm_ids,
         label_categories=categories,
+        autopsy_label_mode=autopsy_label_mode,
         chunksize=chunksize,
     )
     return labels
@@ -963,47 +1628,28 @@ def identify_table2_itemids(d_items: pd.DataFrame) -> set[int]:
     return set(d_items.loc[matches, "itemid"].tolist())
 
 
-def build_chartevent_artifacts_from_csv(
-    chartevents_csv_path: Path | str,
-    d_items: pd.DataFrame,
-    allowed_labels: Iterable[str] | None = None,
-    all_hadm_ids: Iterable[int] | None = None,
-    chunksize: int = 500_000,
-) -> tuple[pd.DataFrame, pd.DataFrame]:
-    """Build the feature matrix and code-status targets from a large CSV in chunks."""
-
-    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
-
-    items = d_items.copy()
-    items["normalized_label"] = items["label"].map(_normalize_token)
-    if allowed_labels is not None:
-        allowed = {_normalize_token(label) for label in allowed_labels}
-        items = items.loc[items["normalized_label"].isin(allowed)].copy()
-    else:
-        allowed_itemids = identify_table2_itemids(items)
-        items = items.loc[items["itemid"].isin(allowed_itemids)].copy()
-
-    items["itemid"] = pd.to_numeric(items["itemid"], errors="coerce")
-    items = items.dropna(subset=["itemid"]).copy()
-    items["itemid"] = items["itemid"].astype(int)
-
-    feature_lookup = (
-        items[["itemid", "label"]]
-        .drop_duplicates("itemid")
-        .set_index("itemid")["label"]
-        .to_dict()
+def _resolve_chartevent_code_status_mode(
+    *,
+    paper_like: bool,
+    code_status_mode: str | None,
+) -> str:
+    return _task_normalize_code_status_mode(
+        code_status_mode
+        if code_status_mode is not None
+        else (CODE_STATUS_MODE_PAPER_LIKE if paper_like else CODE_STATUS_MODE_CORRECTED)
     )
-    feature_itemids = set(feature_lookup)
-    relevant_itemids = feature_itemids | set(CODE_STATUS_ITEMIDS)
-    normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
-    hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
 
-    feature_to_hadm: dict[str, set[int]] = defaultdict(set)
-    code_status_positive: dict[int, int] = {}
 
+def _iter_relevant_chartevent_csv_chunks(
+    chartevents_csv_path: Path | str,
+    *,
+    relevant_itemids: set[int],
+    hadm_filter: set[int] | None,
+    chunksize: int,
+):
     for chunk in _iter_csv_chunks(
         chartevents_csv_path,
-        required_columns=["hadm_id", "itemid", "value", "icustay_id"],
+        required_columns=["hadm_id", "itemid", "value", "icustay_id", "charttime"],
         chunksize=chunksize,
     ):
         chunk["hadm_id"] = pd.to_numeric(chunk["hadm_id"], errors="coerce")
@@ -1021,76 +1667,363 @@ def build_chartevent_artifacts_from_csv(
             continue
 
         chunk = chunk.loc[chunk["itemid"].isin(relevant_itemids)].copy()
-        if chunk.empty:
-            continue
+        if not chunk.empty:
+            yield chunk
 
-        feature_chunk = chunk.loc[chunk["itemid"].isin(feature_itemids)].copy()
-        if not feature_chunk.empty:
-            feature_chunk["label"] = feature_chunk["itemid"].map(feature_lookup)
-            feature_chunk["normalized_value"] = feature_chunk["value"].map(_normalize_token)
-            feature_chunk["display_label"] = feature_chunk["label"].map(_clean_feature_text)
-            feature_chunk["display_value"] = feature_chunk["value"].map(_clean_feature_text)
-            feature_chunk = feature_chunk.loc[
-                (feature_chunk["normalized_value"] != "")
-                & (feature_chunk["display_label"] != "")
-            ].copy()
-            if not feature_chunk.empty:
-                feature_chunk["feature_name"] = (
-                    feature_chunk["display_label"] + ": " + feature_chunk["display_value"]
-                )
-                unique_pairs = feature_chunk[["hadm_id", "feature_name"]].drop_duplicates()
-                for feature_name, group in unique_pairs.groupby("feature_name", sort=False):
-                    feature_to_hadm[str(feature_name)].update(group["hadm_id"].astype(int).tolist())
 
-        code_chunk = chunk.loc[chunk["itemid"].isin(CODE_STATUS_ITEMIDS)].copy()
-        if not code_chunk.empty:
-            normalized_value = code_chunk["value"].map(_normalize_token)
-            positives = normalized_value.apply(
-                lambda value: int(
-                    ("dnr" in value)
-                    or ("dni" in value)
-                    or ("comfort" in value)
-                    or ("cmo" in value)
-                )
-            )
-            for hadm_id, is_positive in zip(code_chunk["hadm_id"].astype(int), positives):
-                code_status_positive[hadm_id] = max(
-                    code_status_positive.get(hadm_id, 0),
-                    int(is_positive),
-                )
+def _accumulate_normal_feature_rows(
+    feature_chunk: pd.DataFrame,
+    *,
+    item_label_lookup: Mapping[int, str],
+    label_display_lookup: Mapping[str, str],
+    feature_to_hadm: dict[tuple[str, str], set[int]],
+    feature_value_display_lookup: dict[tuple[str, str], str],
+) -> None:
+    if feature_chunk.empty:
+        return
+
+    working = feature_chunk.copy()
+    working["normalized_label"] = working["itemid"].map(item_label_lookup)
+    working["normalized_value"] = working["value"].map(_normalize_token)
+    working["display_label"] = working["normalized_label"].map(label_display_lookup)
+    working["display_value"] = working["value"].map(_clean_feature_text)
+    working = working.loc[
+        (working["normalized_value"] != "") & (working["display_label"] != "")
+    ].copy()
+    if working.empty:
+        return
+
+    keep_mask = ~working.apply(
+        lambda row: _is_numeric_heavy_or_freeform_feature_value(
+            str(getattr(row, "normalized_value")),
+            str(getattr(row, "display_value")),
+        ),
+        axis=1,
+    )
+    working = working.loc[keep_mask].copy()
+    if working.empty:
+        return
+
+    unique_pairs = working[
+        ["hadm_id", "normalized_label", "normalized_value", "display_value"]
+    ].drop_duplicates()
+    for row in unique_pairs.itertuples(index=False):
+        key = (str(row.normalized_label), str(row.normalized_value))
+        feature_value_display_lookup[key] = _choose_preferred_feature_text(
+            [feature_value_display_lookup.get(key, ""), str(row.display_value)]
+        )
+        feature_to_hadm[key].add(int(row.hadm_id))
 
-    if normalized_hadm_ids is not None:
-        hadm_ids = normalized_hadm_ids
-    else:
-        hadm_ids = sorted(set().union(*feature_to_hadm.values())) if feature_to_hadm else []
 
-    feature_names = sorted(feature_to_hadm)
-    feature_data: dict[str, object] = {"hadm_id": hadm_ids}
+def _accumulate_paper_like_feature_rows(
+    feature_chunk: pd.DataFrame,
+    *,
+    item_raw_label_lookup: Mapping[int, str],
+    feature_to_hadm: dict[str, set[int]],
+) -> None:
+    if feature_chunk.empty:
+        return
+
+    working = feature_chunk.copy()
+    working["label"] = working["itemid"].map(item_raw_label_lookup)
+    paper_like_chunk = _paper_like_feature_sets_from_rows(
+        working[["hadm_id", "label", "value"]]
+    )
+    for feature_name, hadm_ids in paper_like_chunk.items():
+        feature_to_hadm[feature_name].update(hadm_ids)
+
+
+def _finalize_normal_feature_matrix(
+    *,
+    feature_to_hadm: Mapping[tuple[str, str], set[int]],
+    hadm_ids: Sequence[int],
+    label_display_lookup: Mapping[str, str],
+    feature_value_display_lookup: Mapping[tuple[str, str], str],
+) -> pd.DataFrame:
+    feature_keys = sorted(
+        feature_to_hadm,
+        key=lambda key: _feature_display_name(
+            key[0],
+            key[1],
+            label_display_lookup,
+            feature_value_display_lookup,
+        ).lower(),
+    )
+    feature_data: dict[str, object] = {"hadm_id": list(hadm_ids)}
     hadm_index = pd.Index(hadm_ids)
-    for feature_name in feature_names:
-        feature_data[feature_name] = hadm_index.isin(feature_to_hadm[feature_name]).astype(int)
+    for feature_key in feature_keys:
+        feature_name = _feature_display_name(
+            feature_key[0],
+            feature_key[1],
+            label_display_lookup,
+            feature_value_display_lookup,
+        )
+        feature_data[feature_name] = hadm_index.isin(
+            feature_to_hadm[feature_key]
+        ).astype(int)
+
     feature_matrix = pd.DataFrame(feature_data)
     if "hadm_id" not in feature_matrix.columns:
         feature_matrix = pd.DataFrame(columns=["hadm_id"])
-    feature_matrix = (
+    return (
         feature_matrix.sort_values("hadm_id")
         .drop_duplicates("hadm_id")
         .reset_index(drop=True)
     )
 
-    code_status_targets = pd.DataFrame(
-        {
-            "hadm_id": sorted(code_status_positive),
-            "code_status_dnr_dni_cmo": [
-                int(code_status_positive[hadm_id]) for hadm_id in sorted(code_status_positive)
-            ],
-        }
+
+def _build_normal_feature_matrix_from_events(
+    events: pd.DataFrame,
+    *,
+    items: pd.DataFrame,
+    normalized_hadm_ids: list[int] | None,
+) -> pd.DataFrame:
+    item_label_lookup, label_display_lookup = _build_feature_label_metadata(items)
+    merged = events.merge(
+        items[["itemid", "normalized_label"]],
+        on="itemid",
+        how="inner",
+        validate="many_to_one",
     )
-    code_status_targets = (
-        code_status_targets.sort_values("hadm_id")
+    feature_to_hadm: dict[tuple[str, str], set[int]] = defaultdict(set)
+    feature_value_display_lookup: dict[tuple[str, str], str] = {}
+    _accumulate_normal_feature_rows(
+        merged[["hadm_id", "itemid", "value", "normalized_label"]],
+        item_label_lookup=item_label_lookup,
+        label_display_lookup=label_display_lookup,
+        feature_to_hadm=feature_to_hadm,
+        feature_value_display_lookup=feature_value_display_lookup,
+    )
+
+    if normalized_hadm_ids is not None:
+        hadm_ids = normalized_hadm_ids
+    else:
+        hadm_ids = (
+            sorted(set().union(*feature_to_hadm.values())) if feature_to_hadm else []
+        )
+    return _finalize_normal_feature_matrix(
+        feature_to_hadm=feature_to_hadm,
+        hadm_ids=hadm_ids,
+        label_display_lookup=label_display_lookup,
+        feature_value_display_lookup=feature_value_display_lookup,
+    )
+
+
+def _build_paper_like_feature_matrix_from_events(
+    events: pd.DataFrame,
+    *,
+    items: pd.DataFrame,
+    normalized_hadm_ids: list[int] | None,
+) -> pd.DataFrame:
+    merged = events.merge(
+        items[["itemid", "label"]],
+        on="itemid",
+        how="inner",
+        validate="many_to_one",
+    )
+    feature_to_hadm = _paper_like_feature_sets_from_rows(
+        merged[["hadm_id", "label", "value"]]
+    )
+    if normalized_hadm_ids is not None:
+        hadm_ids = normalized_hadm_ids
+    else:
+        hadm_ids = (
+            sorted(set().union(*feature_to_hadm.values())) if feature_to_hadm else []
+        )
+    return _binary_feature_matrix_from_feature_sets(feature_to_hadm, hadm_ids)
+
+
+def _accumulate_corrected_code_status_rows(
+    code_chunk: pd.DataFrame,
+    *,
+    code_status_positive: dict[int, int],
+    code_status_latest: dict[int, tuple[tuple[int, int, int], int]],
+    code_status_event_order_start: int,
+) -> int:
+    if code_chunk.empty:
+        return code_status_event_order_start
+
+    if "charttime" not in code_chunk.columns:
+        positives = code_chunk["value"].map(
+            lambda value: int(_task_is_positive_code_status_value(value))
+        )
+        for hadm_id, is_positive in zip(code_chunk["hadm_id"].astype(int), positives):
+            code_status_positive[hadm_id] = max(
+                code_status_positive.get(hadm_id, 0), int(is_positive)
+            )
+        return code_status_event_order_start
+
+    event_order = code_status_event_order_start
+    working = code_chunk.copy()
+    working["charttime"] = pd.to_datetime(working["charttime"], errors="coerce")
+    for row in working.itertuples(index=False):
+        hadm_id = int(row.hadm_id)
+        label = int(_task_is_positive_code_status_value(getattr(row, "value")))
+        charttime = getattr(row, "charttime")
+        has_charttime = int(not pd.isna(charttime))
+        charttime_value = int(charttime.value) if has_charttime else -1
+        sort_key = (has_charttime, charttime_value, event_order)
+        event_order += 1
+        previous = code_status_latest.get(hadm_id)
+        if previous is None or sort_key > previous[0]:
+            code_status_latest[hadm_id] = (sort_key, label)
+    return event_order
+
+
+def _accumulate_paper_like_code_status_rows(
+    code_chunk: pd.DataFrame,
+    *,
+    current_label: int | None,
+    targets: dict[int, int],
+) -> int | None:
+    for row in code_chunk.itertuples(index=False):
+        hadm_id = int(getattr(row, "hadm_id"))
+        current_label = _task_advance_paper_like_code_status_label(
+            current_label,
+            getattr(row, "value"),
+        )
+        if current_label is not None:
+            targets[hadm_id] = int(current_label)
+    return current_label
+
+
+def _finalize_code_status_targets(
+    *,
+    normalized_code_status_mode: str,
+    code_status_positive: Mapping[int, int],
+    code_status_latest: Mapping[int, tuple[tuple[int, int, int], int]],
+    code_status_paper_like: Mapping[int, int],
+) -> pd.DataFrame:
+    if normalized_code_status_mode == CODE_STATUS_MODE_PAPER_LIKE:
+        target_map = code_status_paper_like
+        values = [int(target_map[hadm_id]) for hadm_id in sorted(target_map)]
+    elif code_status_latest:
+        target_map = code_status_latest
+        values = [int(target_map[hadm_id][1]) for hadm_id in sorted(target_map)]
+    else:
+        target_map = code_status_positive
+        values = [int(target_map[hadm_id]) for hadm_id in sorted(target_map)]
+
+    return (
+        pd.DataFrame(
+            {
+                "hadm_id": sorted(target_map),
+                "code_status_dnr_dni_cmo": values,
+            }
+        )
+        .sort_values("hadm_id")
         .drop_duplicates("hadm_id")
         .reset_index(drop=True)
     )
+
+
+def build_chartevent_artifacts_from_csv(
+    chartevents_csv_path: Path | str,
+    d_items: pd.DataFrame,
+    allowed_labels: Iterable[str] | None = None,
+    all_hadm_ids: Iterable[int] | None = None,
+    chunksize: int = 500_000,
+    paper_like: bool = False,
+    code_status_mode: str | None = None,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Build the feature matrix and code-status targets from a large CSV in chunks."""
+
+    items = _filter_chartevent_items(
+        d_items,
+        allowed_labels=allowed_labels,
+        paper_like=paper_like,
+    )
+    item_label_lookup = _build_feature_label_metadata(items)[0]
+    item_raw_label_lookup = (
+        items.drop_duplicates("itemid").set_index("itemid")["label"].to_dict()
+    )
+    feature_itemids = set(item_label_lookup)
+    relevant_itemids = feature_itemids | set(CODE_STATUS_ITEMIDS)
+    normalized_hadm_ids = _normalize_hadm_ids(all_hadm_ids)
+    hadm_filter = set(normalized_hadm_ids) if normalized_hadm_ids is not None else None
+    normalized_code_status_mode = _resolve_chartevent_code_status_mode(
+        paper_like=paper_like,
+        code_status_mode=code_status_mode,
+    )
+
+    paper_like_feature_to_hadm: dict[str, set[int]] = defaultdict(set)
+    feature_to_hadm: dict[tuple[str, str], set[int]] = defaultdict(set)
+    feature_value_display_lookup: dict[tuple[str, str], str] = {}
+    code_status_positive: dict[int, int] = {}
+    code_status_latest: dict[int, tuple[tuple[int, int, int], int]] = {}
+    code_status_paper_like: dict[int, int] = {}
+    code_status_current_label: int | None = None
+    code_status_event_order = 0
+
+    item_label_lookup, label_display_lookup = _build_feature_label_metadata(items)
+    for chunk in _iter_relevant_chartevent_csv_chunks(
+        chartevents_csv_path,
+        relevant_itemids=relevant_itemids,
+        hadm_filter=hadm_filter,
+        chunksize=chunksize,
+    ):
+        feature_chunk = chunk.loc[chunk["itemid"].isin(feature_itemids)].copy()
+        if paper_like:
+            _accumulate_paper_like_feature_rows(
+                feature_chunk,
+                item_raw_label_lookup=item_raw_label_lookup,
+                feature_to_hadm=paper_like_feature_to_hadm,
+            )
+        else:
+            _accumulate_normal_feature_rows(
+                feature_chunk,
+                item_label_lookup=item_label_lookup,
+                label_display_lookup=label_display_lookup,
+                feature_to_hadm=feature_to_hadm,
+                feature_value_display_lookup=feature_value_display_lookup,
+            )
+
+        code_chunk = chunk.loc[chunk["itemid"].isin(CODE_STATUS_ITEMIDS)].copy()
+        if normalized_code_status_mode == CODE_STATUS_MODE_PAPER_LIKE:
+            code_status_current_label = _accumulate_paper_like_code_status_rows(
+                code_chunk,
+                current_label=code_status_current_label,
+                targets=code_status_paper_like,
+            )
+        else:
+            code_status_event_order = _accumulate_corrected_code_status_rows(
+                code_chunk,
+                code_status_positive=code_status_positive,
+                code_status_latest=code_status_latest,
+                code_status_event_order_start=code_status_event_order,
+            )
+
+    if normalized_hadm_ids is not None:
+        hadm_ids = normalized_hadm_ids
+    elif paper_like:
+        hadm_ids = (
+            sorted(set().union(*paper_like_feature_to_hadm.values()))
+            if paper_like_feature_to_hadm
+            else []
+        )
+    else:
+        hadm_ids = (
+            sorted(set().union(*feature_to_hadm.values())) if feature_to_hadm else []
+        )
+
+    if paper_like:
+        feature_matrix = _binary_feature_matrix_from_feature_sets(
+            paper_like_feature_to_hadm, hadm_ids
+        )
+    else:
+        feature_matrix = _finalize_normal_feature_matrix(
+            feature_to_hadm=feature_to_hadm,
+            hadm_ids=hadm_ids,
+            label_display_lookup=label_display_lookup,
+            feature_value_display_lookup=feature_value_display_lookup,
+        )
+
+    code_status_targets = _finalize_code_status_targets(
+        normalized_code_status_mode=normalized_code_status_mode,
+        code_status_positive=code_status_positive,
+        code_status_latest=code_status_latest,
+        code_status_paper_like=code_status_paper_like,
+    )
     return feature_matrix, code_status_targets
 
 
@@ -1099,66 +2032,53 @@ def build_chartevent_feature_matrix(
     d_items: pd.DataFrame,
     allowed_labels: Iterable[str] | None = None,
     all_hadm_ids: Iterable[int] | None = None,
+    paper_like: bool = False,
 ) -> pd.DataFrame:
     """Build a binary admission-by-feature matrix from selected chart events."""
 
-    _require_columns(chartevents, ["hadm_id", "itemid", "value", "icustay_id"], "chartevents")
-    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
+    _require_columns(
+        chartevents, ["hadm_id", "itemid", "value", "icustay_id"], "chartevents"
+    )
 
     events = chartevents.copy()
-    items = d_items.copy()
-    items["normalized_label"] = items["label"].map(_normalize_token)
-
-    if allowed_labels is not None:
-        allowed = {_normalize_token(label) for label in allowed_labels}
-        items = items.loc[items["normalized_label"].isin(allowed)].copy()
-    else:
-        allowed_itemids = identify_table2_itemids(items)
-        items = items.loc[items["itemid"].isin(allowed_itemids)].copy()
-
-    merged = events.merge(
-        items[["itemid", "label", "normalized_label"]],
-        on="itemid",
-        how="inner",
-        validate="many_to_one",
+    items = _filter_chartevent_items(
+        d_items,
+        allowed_labels=allowed_labels,
+        paper_like=paper_like,
     )
-    merged["normalized_value"] = merged["value"].map(_normalize_token)
-    merged["display_label"] = merged["label"].map(_clean_feature_text)
-    merged["display_value"] = merged["value"].map(_clean_feature_text)
-    merged = merged.loc[
-        (merged["normalized_value"] != "") & (merged["display_label"] != "")
-    ].copy()
 
-    if merged.empty:
-        result = pd.DataFrame(columns=["hadm_id"])
-    else:
-        merged["feature_name"] = merged["display_label"] + ": " + merged["display_value"]
-        pivot = (
-            merged.assign(feature_value=1)
-            .pivot_table(
-                index="hadm_id",
-                columns="feature_name",
-                values="feature_value",
-                aggfunc="max",
-                fill_value=0,
-            )
-            .reset_index()
+    normalized_hadm_ids = (
+        _normalize_hadm_ids(all_hadm_ids) if all_hadm_ids is not None else None
+    )
+    if paper_like and normalized_hadm_ids is not None:
+        events["hadm_id"] = pd.to_numeric(events["hadm_id"], errors="coerce")
+        events = events.dropna(subset=["hadm_id"]).copy()
+        events["hadm_id"] = events["hadm_id"].astype(int)
+        events = events.loc[events["hadm_id"].isin(set(normalized_hadm_ids))].copy()
+
+    if paper_like:
+        return _build_paper_like_feature_matrix_from_events(
+            events,
+            items=items,
+            normalized_hadm_ids=normalized_hadm_ids,
         )
-        pivot.columns.name = None
-        result = pivot
 
+    result = _build_normal_feature_matrix_from_events(
+        events,
+        items=items,
+        normalized_hadm_ids=normalized_hadm_ids,
+    )
     if all_hadm_ids is not None:
-        hadm_frame = pd.DataFrame({"hadm_id": list(all_hadm_ids)})
-        result = hadm_frame.merge(result, on="hadm_id", how="left")
-
-    if "hadm_id" not in result.columns:
-        result = pd.DataFrame(columns=["hadm_id"])
+        result = pd.DataFrame({"hadm_id": list(all_hadm_ids)}).merge(
+            result, on="hadm_id", how="left"
+        )
 
     feature_cols = [col for col in result.columns if col != "hadm_id"]
     if feature_cols:
         result[feature_cols] = result[feature_cols].fillna(0).astype(int)
-    result = result.sort_values("hadm_id").drop_duplicates("hadm_id")
-    return result.reset_index(drop=True)
+    return (
+        result.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    )
 
 
 def build_chartevent_feature_matrix_from_csv(
@@ -1167,6 +2087,7 @@ def build_chartevent_feature_matrix_from_csv(
     allowed_labels: Iterable[str] | None = None,
     all_hadm_ids: Iterable[int] | None = None,
     chunksize: int = 500_000,
+    paper_like: bool = False,
 ) -> pd.DataFrame:
     """Build the binary feature matrix from a large chartevents CSV in chunks."""
 
@@ -1176,37 +2097,11 @@ def build_chartevent_feature_matrix_from_csv(
         allowed_labels=allowed_labels,
         all_hadm_ids=all_hadm_ids,
         chunksize=chunksize,
+        paper_like=paper_like,
     )
     return feature_matrix
 
 
-def z_normalize_scores(
-    df: pd.DataFrame,
-    columns: Sequence[str] | None = None,
-) -> pd.DataFrame:
-    """Apply independent z-score normalization to the requested score columns."""
-
-    normalized = df.copy()
-    if columns is None:
-        score_columns = [
-            column
-            for column in normalized.columns
-            if column != "hadm_id" and (column.endswith("_score") or column.endswith("_score_z"))
-        ]
-    else:
-        score_columns = list(columns)
-    for column in score_columns:
-        _require_columns(normalized, [column], "score_table")
-        values = normalized[column].astype(float)
-        mean = values.mean()
-        std = values.std(ddof=0)
-        if pd.isna(std) or std == 0:
-            normalized[column] = 0.0
-        else:
-            normalized[column] = (values - mean) / std
-    return normalized
-
-
 def build_acuity_scores(oasis: pd.DataFrame, sapsii: pd.DataFrame) -> pd.DataFrame:
     """Aggregate OASIS and SAPS II to one admission-level row per hadm_id."""
 
@@ -1220,135 +2115,6 @@ def build_acuity_scores(oasis: pd.DataFrame, sapsii: pd.DataFrame) -> pd.DataFra
     return acuity.reset_index(drop=True)
 
 
-def build_proxy_probability_scores(
-    feature_matrix: pd.DataFrame,
-    note_labels: pd.DataFrame,
-    label_column: str,
-    estimator_factory: Callable[[], object] | None = None,
-) -> pd.DataFrame:
-    """Fit the proxy label model and return positive-class probabilities."""
-
-    _require_columns(feature_matrix, ["hadm_id"], "feature_matrix")
-    _require_columns(note_labels, ["hadm_id", label_column], "note_labels")
-
-    feature_columns = [column for column in feature_matrix.columns if column != "hadm_id"]
-    merged = feature_matrix.merge(
-        note_labels[["hadm_id", label_column]],
-        on="hadm_id",
-        how="inner",
-        validate="one_to_one",
-    ).sort_values("hadm_id")
-
-    feature_values = merged[feature_columns]
-    y = merged[label_column].astype(int)
-
-    if estimator_factory is None:
-        estimator = LogisticRegression(
-            penalty="l1",
-            C=DEFAULT_LOGISTIC_C,
-            solver="liblinear",
-            max_iter=1000,
-        )
-    else:
-        estimator = estimator_factory()
-
-    estimator.fit(feature_values, y)
-    probabilities = estimator.predict_proba(feature_values)
-    score_column = _score_column_name(label_column)
-
-    scores = pd.DataFrame(
-        {
-            "hadm_id": merged["hadm_id"].tolist(),
-            score_column: _extract_positive_class_probabilities(probabilities).astype(float),
-        }
-    )
-    scores = scores.sort_values("hadm_id").drop_duplicates("hadm_id")
-    return scores.reset_index(drop=True)
-
-
-def build_negative_sentiment_scores(
-    note_corpus: pd.DataFrame,
-    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
-) -> pd.DataFrame:
-    """Convert note sentiment polarity into an admission-level mistrust score."""
-
-    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
-
-    if sentiment_fn is None:
-        sentiment_fn = pattern_sentiment
-
-    rows = []
-    for row in note_corpus.sort_values("hadm_id").itertuples(index=False):
-        text = prepare_note_text_for_sentiment(row.note_text)
-        if text == "":
-            score = 0.0
-        else:
-            polarity, _ = sentiment_fn(text)
-            score = -1.0 * float(polarity)
-        rows.append({"hadm_id": row.hadm_id, "negative_sentiment_score": score})
-
-    scores = pd.DataFrame(rows).sort_values("hadm_id").drop_duplicates("hadm_id")
-    return scores.reset_index(drop=True)
-
-
-def build_mistrust_score_table(
-    feature_matrix: pd.DataFrame,
-    note_labels: pd.DataFrame,
-    note_corpus: pd.DataFrame,
-    estimator_factory: Callable[[], object] | None = None,
-    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
-) -> pd.DataFrame:
-    """Build and normalize the three admission-level mistrust score vectors."""
-
-    _require_columns(feature_matrix, ["hadm_id"], "feature_matrix")
-    _require_columns(
-        note_labels,
-        ["hadm_id", "noncompliance_label", "autopsy_label"],
-        "note_labels",
-    )
-    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
-
-    noncompliance_scores = build_proxy_probability_scores(
-        feature_matrix=feature_matrix,
-        note_labels=note_labels,
-        label_column="noncompliance_label",
-        estimator_factory=estimator_factory,
-    )
-    autopsy_scores = build_proxy_probability_scores(
-        feature_matrix=feature_matrix,
-        note_labels=note_labels,
-        label_column="autopsy_label",
-        estimator_factory=estimator_factory,
-    )
-    negative_sentiment_scores = build_negative_sentiment_scores(
-        note_corpus,
-        sentiment_fn=sentiment_fn,
-    )
-
-    merged = (
-        noncompliance_scores.merge(autopsy_scores, on="hadm_id", how="inner")
-        .merge(negative_sentiment_scores, on="hadm_id", how="inner")
-        .sort_values("hadm_id")
-    )
-    normalized = z_normalize_scores(
-        merged,
-        columns=[
-            "noncompliance_score",
-            "autopsy_score",
-            "negative_sentiment_score",
-        ],
-    )
-    normalized = normalized.rename(
-        columns={
-            "noncompliance_score": "noncompliance_score_z",
-            "autopsy_score": "autopsy_score_z",
-            "negative_sentiment_score": "negative_sentiment_score_z",
-        }
-    )
-    normalized = normalized.sort_values("hadm_id").drop_duplicates("hadm_id")
-    return normalized.reset_index(drop=True)
-
-
 def _build_gender_one_hot(df: pd.DataFrame) -> pd.DataFrame:
     output = pd.DataFrame({"hadm_id": df["hadm_id"]})
     gender = df["gender"].fillna("").str.upper()
@@ -1359,7 +2125,9 @@ def _build_gender_one_hot(df: pd.DataFrame) -> pd.DataFrame:
 
 def _build_insurance_one_hot(df: pd.DataFrame) -> pd.DataFrame:
     output = pd.DataFrame({"hadm_id": df["hadm_id"]})
-    insurance_column = "insurance_group" if "insurance_group" in df.columns else "insurance"
+    insurance_column = (
+        "insurance_group" if "insurance_group" in df.columns else "insurance"
+    )
     insurance = df[insurance_column].fillna("")
     output["insurance_private"] = (insurance == INSURANCE_PRIVATE).astype(int)
     output["insurance_public"] = (insurance == INSURANCE_PUBLIC).astype(int)
@@ -1379,15 +2147,10 @@ def _build_race_one_hot(df: pd.DataFrame) -> pd.DataFrame:
     return output
 
 
-def _build_code_status_target(chartevents: pd.DataFrame, d_items: pd.DataFrame) -> pd.DataFrame:
-    _require_columns(chartevents, ["hadm_id", "itemid", "value", "icustay_id"], "chartevents")
-    _require_columns(d_items, ["itemid", "label", "dbsource"], "d_items")
-    return _build_task_code_status_target(chartevents, itemids=CODE_STATUS_ITEMIDS)
-
-
 def build_code_status_target_from_csv(
     chartevents_csv_path: Path | str,
     chunksize: int = 500_000,
+    code_status_mode: str = CODE_STATUS_MODE_CORRECTED,
 ) -> pd.DataFrame:
     """Build the code-status target from a large chartevents CSV in chunks."""
 
@@ -1396,6 +2159,7 @@ def build_code_status_target_from_csv(
         d_items=pd.DataFrame(columns=["itemid", "label", "dbsource"]),
         all_hadm_ids=None,
         chunksize=chunksize,
+        code_status_mode=code_status_mode,
     )
     return code_status_targets
 
@@ -1419,7 +2183,7 @@ def _assemble_final_model_table(
     _require_columns(all_cohort, ["hadm_id"], "all_cohort")
     _require_columns(
         admissions,
-        ["hadm_id", "discharge_location", "hospital_expire_flag"],
+        ["hadm_id", "subject_id", "discharge_location", "hospital_expire_flag"],
         "admissions",
     )
     _require_columns(
@@ -1435,16 +2199,32 @@ def _assemble_final_model_table(
     _require_columns(code_status, ["hadm_id", "code_status_dnr_dni_cmo"], "code_status")
 
     cohort_hadm = pd.DataFrame(
-        {"hadm_id": sorted(pd.to_numeric(all_cohort["hadm_id"], errors="coerce").dropna().astype(int).unique())}
+        {
+            "hadm_id": sorted(
+                pd.to_numeric(all_cohort["hadm_id"], errors="coerce")
+                .dropna()
+                .astype(int)
+                .unique()
+            )
+        }
     )
     demo = cohort_hadm.merge(demographics, on="hadm_id", how="left")
 
     final = cohort_hadm.copy()
+    final = final.merge(
+        admissions[["hadm_id", "subject_id"]].drop_duplicates("hadm_id"),
+        on="hadm_id",
+        how="left",
+    )
     final = final.merge(
         demo[["hadm_id", "age", "los_days"]],
         on="hadm_id",
         how="left",
     )
+    for col in ("age", "los_days"):
+        std = final[col].std(ddof=0)
+        if std > 0:
+            final[col] = (final[col] - final[col].mean()) / std
     final = final.merge(_build_gender_one_hot(demo), on="hadm_id", how="left")
     final = final.merge(_build_insurance_one_hot(demo), on="hadm_id", how="left")
 
@@ -1468,7 +2248,7 @@ def _assemble_final_model_table(
     final["code_status_dnr_dni_cmo"] = pd.to_numeric(
         final["code_status_dnr_dni_cmo"],
         errors="coerce",
-    ).fillna(0).astype(int)
+    ).astype("Int64")
 
     fill_zero_columns = [
         "gender_f",
@@ -1477,7 +2257,6 @@ def _assemble_final_model_table(
         "insurance_public",
         "insurance_self_pay",
         "left_ama",
-        "code_status_dnr_dni_cmo",
         "in_hospital_mortality",
     ]
     if include_race:
@@ -1495,6 +2274,12 @@ def _assemble_final_model_table(
         if column in final.columns:
             final[column] = final[column].fillna(0).astype(int)
 
+    if final["subject_id"].isna().any():
+        raise ValueError(
+            "Final model table contains null subject_id values after admissions merge."
+        )
+    final["subject_id"] = pd.to_numeric(final["subject_id"], errors="raise").astype(int)
+
     final = final.sort_values("hadm_id").drop_duplicates("hadm_id")
     return final.reset_index(drop=True)
 
@@ -1509,13 +2294,25 @@ def build_final_model_table(  # pylint: disable=too-many-arguments,too-many-posi
     include_race: bool = True,
     include_mistrust: bool = True,
 ) -> pd.DataFrame:
-    """Assemble baseline, optional race, mistrust, and target columns."""
-    code_status = _build_code_status_target(chartevents, d_items)
-    return _assemble_final_model_table(
+    """Assemble the final model table from raw chartevents.
+
+    ``d_items`` is retained for API compatibility; the normal path uses the
+    fixed code-status itemids defined by the task layer.
+    """
+    _require_columns(
+        chartevents, ["hadm_id", "itemid", "value", "icustay_id"], "chartevents"
+    )
+    del d_items
+    code_status = _build_task_code_status_target(
+        chartevents,
+        itemids=CODE_STATUS_ITEMIDS,
+        code_status_mode=CODE_STATUS_MODE_CORRECTED,
+    )
+    return build_final_model_table_from_code_status_targets(
         demographics=demographics,
         all_cohort=all_cohort,
         admissions=admissions,
-        code_status=code_status,
+        code_status_targets=code_status,
         mistrust_scores=mistrust_scores,
         include_race=include_race,
         include_mistrust=include_mistrust,
@@ -1544,7 +2341,9 @@ def build_final_model_table_from_code_status_targets(  # pylint: disable=too-man
     )
 
 
-def write_minimal_deliverables(artifacts: dict[str, pd.DataFrame], output_dir: Path | str) -> None:
+def write_minimal_deliverables(
+    artifacts: dict[str, pd.DataFrame], output_dir: Path | str
+) -> None:
     """Write the required CSV deliverables to disk without index columns."""
 
     output_path = Path(output_dir)
@@ -1661,3 +2460,73 @@ def validate_database_environment(  # pylint: disable=too-many-locals
         "materialized_views": sorted(materialized_views.keys()),
         "supports_multiple_icustays_per_hadm": supports_multiple_icustays,
     }
+
+
+# ---------------------------------------------------------------------------
+# Compatibility Wrappers
+# ---------------------------------------------------------------------------
+
+
+def _call_model_compat(function_name: str, /, **kwargs):
+    """Delegate deprecated dataset wrappers to the model-owned implementation."""
+
+    model_module = _load_eol_mistrust_model_module()
+    return getattr(model_module, function_name)(**kwargs)
+
+
+def z_normalize_scores(
+    df: pd.DataFrame,
+    columns: Sequence[str] | None = None,
+) -> pd.DataFrame:
+    """Deprecated wrapper around the model-owned score normalization helper."""
+
+    return _call_model_compat("z_normalize_scores", score_table=df, columns=columns)
+
+
+def build_proxy_probability_scores(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    label_column: str,
+    estimator_factory: Callable[[], object] | None = None,
+) -> pd.DataFrame:
+    """Deprecated wrapper around the model-owned proxy score helper."""
+
+    return _call_model_compat(
+        "build_proxy_probability_scores",
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column=label_column,
+        estimator_factory=estimator_factory,
+    )
+
+
+def build_negative_sentiment_scores(
+    note_corpus: pd.DataFrame,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+) -> pd.DataFrame:
+    """Deprecated wrapper around the model-owned sentiment score helper."""
+
+    return _call_model_compat(
+        "build_negative_sentiment_mistrust_scores",
+        note_corpus=note_corpus,
+        sentiment_fn=sentiment_fn,
+    )
+
+
+def build_mistrust_score_table(
+    feature_matrix: pd.DataFrame,
+    note_labels: pd.DataFrame,
+    note_corpus: pd.DataFrame,
+    estimator_factory: Callable[[], object] | None = None,
+    sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
+) -> pd.DataFrame:
+    """Deprecated wrapper around the model-owned mistrust table builder."""
+
+    return _call_model_compat(
+        "build_mistrust_score_table",
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+        estimator_factory=estimator_factory,
+        sentiment_fn=sentiment_fn,
+    )
diff --git a/pyhealth/models/eol_mistrust.py b/pyhealth/models/eol_mistrust.py
index 41ce62aa3..ee971ce18 100644
--- a/pyhealth/models/eol_mistrust.py
+++ b/pyhealth/models/eol_mistrust.py
@@ -14,10 +14,17 @@
 
 from __future__ import annotations
 
+import argparse
 import importlib
+import json
+import os
+import warnings
 from collections import OrderedDict
+from datetime import datetime
 from itertools import combinations
-from typing import Callable, Iterable, Mapping, Sequence
+from pathlib import Path
+import time
+from typing import Callable, Iterable, Mapping, Sequence, TypedDict
 
 import numpy as np
 import pandas as pd
@@ -30,9 +37,10 @@
     pearsonr = None
 
 try:
-    from sklearn.linear_model import LogisticRegression  # pylint: disable=import-error
+    from sklearn.linear_model import LogisticRegression, LogisticRegressionCV  # pylint: disable=import-error
     from sklearn.metrics import roc_auc_score  # pylint: disable=import-error
-    from sklearn.model_selection import train_test_split  # pylint: disable=import-error
+    from sklearn.model_selection import GroupShuffleSplit, train_test_split  # pylint: disable=import-error
+    from sklearn.preprocessing import StandardScaler  # pylint: disable=import-error
 except ModuleNotFoundError:  # pragma: no cover
     class LogisticRegression:  # type: ignore[no-redef]
         """Fallback estimator preserving the sklearn constructor surface."""
@@ -59,6 +67,20 @@ def train_test_split(*args, **kwargs):  # type: ignore[no-redef]
             "scikit-learn is required for downstream evaluation splits."
         )
 
+    class LogisticRegressionCV:  # type: ignore[no-redef]
+        def __init__(self, *args, **kwargs):
+            del args, kwargs
+            raise ModuleNotFoundError(
+                "scikit-learn is required for downstream LogisticRegressionCV tuning."
+            )
+
+    class GroupShuffleSplit:  # type: ignore[no-redef]
+        def __init__(self, *args, **kwargs):
+            del args, kwargs
+            raise ModuleNotFoundError(
+                "scikit-learn is required for group-aware downstream evaluation splits."
+            )
+
     def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
         del args, kwargs
         raise ModuleNotFoundError(
@@ -69,6 +91,7 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
 RACE_WHITE = "WHITE"
 RACE_BLACK = "BLACK"
 DEFAULT_LOGISTIC_C = 0.1
+DownstreamEstimatorFactoryResolver = Callable[[str, str], Callable[[], object] | None]
 
 MISTRUST_SCORE_COLUMNS = [
     "noncompliance_score_z",
@@ -76,6 +99,13 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
     "negative_sentiment_score_z",
 ]
 
+PROXY_LABEL_COLUMNS = OrderedDict(
+    [
+        ("noncompliance", "noncompliance_label"),
+        ("autopsy", "autopsy_label"),
+    ]
+)
+
 BASELINE_FEATURE_COLUMNS = [
     "age",
     "los_days",
@@ -115,11 +145,24 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
 )
 
 
-_SENTIMENT_BACKEND: Callable[[str], tuple[float, float]] | None = None
+DEFAULT_TRANSFORMERS_SENTIMENT_BATCH_SIZE = 64
 
+_SENTIMENT_BATCH_BACKEND: Callable[[Sequence[str]], list[tuple[float, float]]] | None = None
 
-def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
-    """Load the project-standard transformers sentiment pipeline.
+
+def _parse_transformers_sentiment_output(result: Mapping[str, object]) -> tuple[float, float]:
+    """Convert a transformers pipeline output row into the repo sentiment tuple."""
+
+    label = str(result.get("label", "")).upper()
+    score = float(result.get("score", 0.0))
+    polarity = score if "POS" in label else -score
+    return (polarity, 0.0)
+
+
+def _load_transformers_sentiment_batch(
+    batch_size: int = DEFAULT_TRANSFORMERS_SENTIMENT_BATCH_SIZE,
+) -> Callable[[Sequence[str]], list[tuple[float, float]]]:
+    """Load the project-standard transformers sentiment pipeline with batching.
 
     GPU is used first when CUDA is available; otherwise the backend falls back
     to CPU without changing the public scorer interface.
@@ -148,29 +191,44 @@ def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
         device=device,
     )
 
-    def _transformers_sentiment(text: str) -> tuple[float, float]:
-        cleaned = " ".join(str(text).split())
-        if not cleaned:
-            return (0.0, 0.0)
-        result = classifier(cleaned[:2048], truncation=True)[0]
-        label = str(result.get("label", "")).upper()
-        score = float(result.get("score", 0.0))
-        polarity = score if "POS" in label else -score
-        return (polarity, 0.0)
+    def _transformers_sentiment_batch(texts: Sequence[str]) -> list[tuple[float, float]]:
+        cleaned_texts = [_prepare_note_text_for_sentiment(text) for text in texts]
+        outputs = [(0.0, 0.0) for _ in cleaned_texts]
 
-    return _transformers_sentiment
+        non_empty_indices = [index for index, text in enumerate(cleaned_texts) if text]
+        if not non_empty_indices:
+            return outputs
+
+        non_empty_texts = [cleaned_texts[index][:2048] for index in non_empty_indices]
+        batch_results = classifier(
+            non_empty_texts,
+            truncation=True,
+            batch_size=batch_size,
+        )
+
+        for index, result in zip(non_empty_indices, batch_results):
+            outputs[index] = _parse_transformers_sentiment_output(result)
+        return outputs
+
+    return _transformers_sentiment_batch
+
+
+def _load_transformers_sentiment() -> Callable[[str], tuple[float, float]]:
+    """Load the single-text transformers sentiment adapter."""
 
+    def _transformers_sentiment(text: str) -> tuple[float, float]:
+        return _default_sentiment_batch_backend([text])[0]
 
-def _default_sentiment_backend(text: str) -> tuple[float, float]:
-    """Resolve and cache the default transformers sentiment backend lazily."""
+    return _transformers_sentiment
 
-    global _SENTIMENT_BACKEND
-    if _SENTIMENT_BACKEND is None:
-        _SENTIMENT_BACKEND = _load_transformers_sentiment()
-    return _SENTIMENT_BACKEND(text)
 
+def _default_sentiment_batch_backend(texts: Sequence[str]) -> list[tuple[float, float]]:
+    """Resolve and cache the default batched transformers sentiment backend lazily."""
 
-pattern_sentiment = _default_sentiment_backend
+    global _SENTIMENT_BATCH_BACKEND
+    if _SENTIMENT_BATCH_BACKEND is None:
+        _SENTIMENT_BATCH_BACKEND = _load_transformers_sentiment_batch()
+    return _SENTIMENT_BATCH_BACKEND(texts)
 
 
 def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
@@ -186,15 +244,166 @@ def _prepare_note_text_for_sentiment(text) -> str:
     return " ".join(str(text).split())
 
 
+def _note_present_hadm_ids(note_corpus: pd.DataFrame) -> list[int]:
+    """Return sorted admission ids with at least one non-empty aggregated note."""
+
+    _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
+    present = note_corpus.copy()
+    note_text = present["note_text"].fillna("").astype(str).str.strip()
+    hadm_ids = pd.to_numeric(present.loc[note_text != "", "hadm_id"], errors="coerce")
+    return sorted(hadm_ids.dropna().astype(int).unique().tolist())
+
+
 def _default_estimator_factory() -> object:
     return LogisticRegression(
         penalty="l1",
         C=DEFAULT_LOGISTIC_C,
         solver="liblinear",
         max_iter=1000,
+        tol=0.001,
     )
 
 
+def build_logistic_estimator_factory(
+    *,
+    C: float = DEFAULT_LOGISTIC_C,
+    class_weight: str | Mapping[int, float] | None = None,
+    penalty: str = "l1",
+    solver: str = "liblinear",
+    max_iter: int = 1000,
+    tol: float = 0.001,
+) -> Callable[[], object]:
+    """Return a reusable sklearn logistic-regression factory."""
+
+    def _factory() -> object:
+        return LogisticRegression(
+            penalty=penalty,
+            C=C,
+            solver=solver,
+            class_weight=class_weight,
+            max_iter=max_iter,
+            tol=tol,
+        )
+
+    return _factory
+
+
+class _AdaptiveLogisticRegressionCV:
+    """Binary logistic CV wrapper with fold count chosen from the train labels."""
+
+    def __init__(
+        self,
+        *,
+        Cs: Sequence[float],
+        class_weight: str | Mapping[int, float] | None = None,
+        penalty: str = "l1",
+        solver: str = "liblinear",
+        max_iter: int = 1000,
+        tol: float = 0.001,
+        scoring: str = "roc_auc",
+        max_cv_folds: int = 5,
+    ) -> None:
+        self.Cs = [float(value) for value in Cs]
+        self.class_weight = class_weight
+        self.penalty = penalty
+        self.solver = solver
+        self.max_iter = int(max_iter)
+        self.tol = float(tol)
+        self.scoring = scoring
+        self.max_cv_folds = int(max_cv_folds)
+        self.estimator_ = None
+
+    def fit(self, X, y):
+        y_series = pd.Series(y).reset_index(drop=True).astype(int)
+        class_counts = y_series.value_counts(dropna=True)
+        min_class_count = int(class_counts.min()) if not class_counts.empty else 0
+
+        if min_class_count < 2:
+            fallback_c = self.Cs[0] if self.Cs else DEFAULT_LOGISTIC_C
+            estimator = LogisticRegression(
+                penalty=self.penalty,
+                C=fallback_c,
+                solver=self.solver,
+                class_weight=self.class_weight,
+                max_iter=self.max_iter,
+                tol=self.tol,
+            )
+        else:
+            cv_folds = max(2, min(self.max_cv_folds, min_class_count))
+            estimator = LogisticRegressionCV(
+                Cs=self.Cs,
+                penalty=self.penalty,
+                solver=self.solver,
+                class_weight=self.class_weight,
+                max_iter=self.max_iter,
+                tol=self.tol,
+                scoring=self.scoring,
+                cv=cv_folds,
+                refit=True,
+            )
+
+        self.estimator_ = estimator.fit(X, y_series)
+        self.coef_ = getattr(self.estimator_, "coef_", None)
+        self.C_ = getattr(self.estimator_, "C_", None)
+        self.classes_ = getattr(self.estimator_, "classes_", None)
+        return self
+
+    def predict_proba(self, X):
+        if self.estimator_ is None:
+            raise AttributeError("Estimator has not been fitted yet.")
+        return self.estimator_.predict_proba(X)
+
+    def __getattr__(self, name):
+        if name == "estimator_":
+            raise AttributeError(name)
+        if self.estimator_ is None:
+            raise AttributeError(name)
+        return getattr(self.estimator_, name)
+
+
+def build_logistic_cv_estimator_factory(
+    *,
+    Cs: Sequence[float],
+    class_weight: str | Mapping[int, float] | None = None,
+    penalty: str = "l1",
+    solver: str = "liblinear",
+    max_iter: int = 1000,
+    tol: float = 0.001,
+    scoring: str = "roc_auc",
+    max_cv_folds: int = 5,
+) -> Callable[[], object]:
+    """Return an adaptive LogisticRegressionCV factory for downstream use."""
+
+    candidate_cs = [float(value) for value in Cs]
+
+    def _factory() -> object:
+        return _AdaptiveLogisticRegressionCV(
+            Cs=candidate_cs,
+            class_weight=class_weight,
+            penalty=penalty,
+            solver=solver,
+            max_iter=max_iter,
+            tol=tol,
+            scoring=scoring,
+            max_cv_folds=max_cv_folds,
+        )
+
+    return _factory
+
+
+def _resolve_downstream_estimator_factory(
+    task_name: str,
+    config_name: str,
+    estimator_factory: Callable[[], object] | None,
+    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None,
+) -> Callable[[], object]:
+    if downstream_estimator_factory_resolver is not None:
+        resolved = downstream_estimator_factory_resolver(task_name, config_name)
+        if resolved is not None:
+            return resolved
+    return _default_estimator_factory if estimator_factory is None else estimator_factory
+
+
 def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
     """Validate predict_proba output and return the positive-class column."""
 
@@ -212,6 +421,55 @@ def _score_column_name(label_column: str) -> str:
     return f"{label_column}_score"
 
 
+class _ConstantProbabilityEstimator:
+    """Degenerate proxy estimator that predicts a constant positive-class probability."""
+
+    def __init__(self, positive_probability: float):
+        self.positive_probability = float(positive_probability)
+        self.fit_X = None
+        self.fit_y = None
+        self.classes_ = np.array([0, 1], dtype=int)
+        self.coef_ = np.zeros((1, 0), dtype=float)
+        self.intercept_ = np.array([0.0], dtype=float)
+
+    def fit(self, X, y):
+        self.fit_X = X.copy() if hasattr(X, "copy") else X
+        self.fit_y = y.copy() if hasattr(y, "copy") else y
+        n_features = int(X.shape[1]) if hasattr(X, "shape") and len(X.shape) >= 2 else 0
+        self.coef_ = np.zeros((1, n_features), dtype=float)
+        probability = self.positive_probability
+        if 0.0 < probability < 1.0:
+            self.intercept_ = np.array([float(np.log(probability / (1.0 - probability)))], dtype=float)
+        return self
+
+    def predict_proba(self, X):
+        n_rows = len(X)
+        probability = self.positive_probability
+        return np.column_stack(
+            [
+                np.full(n_rows, 1.0 - probability, dtype=float),
+                np.full(n_rows, probability, dtype=float),
+            ]
+        )
+
+
+def _warn_degenerate_proxy_training(
+    label_column: str,
+    class_values: Sequence[int],
+    n_rows: int,
+) -> None:
+    if not class_values:
+        detail = "no joined training rows"
+    else:
+        detail = f"a single observed class ({class_values[0]})"
+    warnings.warn(
+        f"Proxy training for '{label_column}' has {detail} across {n_rows} rows; "
+        "returning constant probabilities and zero feature weights.",
+        UserWarning,
+        stacklevel=3,
+    )
+
+
 def _iter_downstream_jobs(
     final_model_table: pd.DataFrame,
     feature_configurations: Mapping[str, Sequence[str]] | None = None,
@@ -232,13 +490,152 @@ def _iter_downstream_jobs(
         _require_columns(final_model_table, [target_column], "final_model_table")
         for config_name, feature_columns in configs.items():
             _require_columns(final_model_table, feature_columns, "final_model_table")
-            usable = final_model_table[["hadm_id", target_column, *feature_columns]].dropna().copy()
+            selected_columns = ["hadm_id", target_column, *feature_columns]
+            if "subject_id" in final_model_table.columns:
+                selected_columns.insert(1, "subject_id")
+            usable = final_model_table[selected_columns].dropna().copy()
             usable = usable.sort_values("hadm_id").reset_index(drop=True)
             y = pd.to_numeric(usable[target_column], errors="coerce")
+            n_pos = int((y == 1).sum())
+            if n_pos < 10:
+                warnings.warn(
+                    f"Downstream task '{task_name}' / config '{config_name}' has only "
+                    f"{n_pos} positive examples in the cohort (minimum 10 recommended). "
+                    "AUC results for this combination will be NaN.",
+                    UserWarning,
+                    stacklevel=2,
+                )
             X = usable[feature_columns]
             yield task_name, target_column, config_name, feature_columns, usable, X, y
 
 
+def _iter_downstream_jobs_with_estimators(
+    final_model_table: pd.DataFrame,
+    feature_configurations: Mapping[str, Sequence[str]] | None = None,
+    task_map: Mapping[str, str] | None = None,
+    estimator_factory: Callable[[], object] | None = None,
+    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+):
+    """Yield downstream jobs together with the resolved estimator factory."""
+
+    for task_name, target_column, config_name, feature_columns, usable, X, y in _iter_downstream_jobs(
+        final_model_table,
+        feature_configurations=feature_configurations,
+        task_map=task_map,
+    ):
+        yield (
+            task_name,
+            target_column,
+            config_name,
+            feature_columns,
+            usable,
+            X,
+            y,
+            _resolve_downstream_estimator_factory(
+                task_name=task_name,
+                config_name=config_name,
+                estimator_factory=estimator_factory,
+                downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
+            ),
+        )
+
+
+def _downstream_split_with_optional_grouping(
+    X: pd.DataFrame,
+    y: pd.Series,
+    usable: pd.DataFrame,
+    *,
+    test_size: float,
+    random_state: int,
+    split_fn: Callable[..., tuple] | None = None,
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
+    """Split downstream rows, grouping by subject_id when available."""
+
+    if split_fn is not None:
+        return split_fn(X, y, test_size=test_size, random_state=random_state)
+
+    if "subject_id" not in usable.columns:
+        return train_test_split(
+            X,
+            y,
+            test_size=test_size,
+            random_state=random_state,
+        )
+
+    groups = pd.to_numeric(usable["subject_id"], errors="coerce")
+    if groups.isna().any():
+        raise ValueError("Downstream final_model_table contains null subject_id values.")
+    splitter = GroupShuffleSplit(
+        n_splits=1,
+        test_size=test_size,
+        random_state=random_state,
+    )
+    train_idx, test_idx = next(splitter.split(X, y, groups))
+    return (
+        X.iloc[train_idx].copy(),
+        X.iloc[test_idx].copy(),
+        y.iloc[train_idx].copy(),
+        y.iloc[test_idx].copy(),
+    )
+
+
+def _iter_downstream_repetition_splits(
+    X: pd.DataFrame,
+    y: pd.Series,
+    usable: pd.DataFrame,
+    *,
+    repetitions: int,
+    test_size: float,
+    split_fn: Callable[..., tuple] | None = None,
+):
+    """Yield downstream train/test splits, using ``None`` for invalid repeats."""
+
+    for random_state in range(repetitions):
+        if usable.empty or y.nunique(dropna=True) < 2:
+            yield None
+            continue
+
+        X_train, X_test, y_train, y_test = _downstream_split_with_optional_grouping(
+            X,
+            y,
+            usable,
+            test_size=test_size,
+            random_state=random_state,
+            split_fn=split_fn,
+        )
+        y_train = pd.Series(y_train)
+        y_test = pd.Series(y_test)
+        if y_train.nunique(dropna=True) < 2 or y_test.nunique(dropna=True) < 2:
+            yield None
+            continue
+
+        yield X_train, X_test, y_train, y_test
+
+
+def _standardize_downstream_features(
+    X_train: pd.DataFrame,
+    X_test: pd.DataFrame,
+    feature_columns: Sequence[str],
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Standardize downstream train/test features while preserving DataFrame columns."""
+
+    scaler = StandardScaler()
+    train_index = getattr(X_train, "index", None)
+    test_index = getattr(X_test, "index", None)
+    return (
+        pd.DataFrame(
+            scaler.fit_transform(X_train),
+            columns=feature_columns,
+            index=train_index,
+        ),
+        pd.DataFrame(
+            scaler.transform(X_test),
+            columns=feature_columns,
+            index=test_index,
+        ),
+    )
+
+
 def _prepare_proxy_training_frame(
     feature_matrix: pd.DataFrame,
     note_labels: pd.DataFrame,
@@ -338,7 +735,7 @@ def get_downstream_feature_configurations() -> OrderedDict[str, list[str]]:
 
 
 def get_downstream_task_map() -> OrderedDict[str, str]:
-    """Return the three required downstream prediction targets."""
+    """Return the required downstream prediction targets."""
 
     return OrderedDict(DOWNSTREAM_TASK_MAP)
 
@@ -349,11 +746,24 @@ def fit_proxy_mistrust_model(
     label_column: str,
     estimator_factory: Callable[[], object] | None = None,
 ):
-    """Fit the L1 logistic proxy model on the full ALL cohort."""
+    """Fit the L1 logistic proxy model on the labeled subset only.
+
+    Rows where ``label_column`` is NaN are excluded from training.
+    """
 
     merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    labeled_mask = merged[label_column].notna()
+    train = merged.loc[labeled_mask].copy()
+    train_labels = train[label_column].astype(int)
+    observed_classes = sorted(pd.unique(train_labels).tolist())
+
+    if train.empty or len(observed_classes) < 2:
+        _warn_degenerate_proxy_training(label_column, observed_classes, len(train))
+        probability = float(observed_classes[0]) if observed_classes else 0.0
+        return _ConstantProbabilityEstimator(probability).fit(train[feature_columns], train_labels)
+
     estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
-    estimator.fit(merged[feature_columns], merged[label_column].astype(int))
+    estimator.fit(train[feature_columns], train_labels)
     return estimator
 
 
@@ -363,18 +773,38 @@ def build_proxy_probability_scores(
     label_column: str,
     estimator_factory: Callable[[], object] | None = None,
 ) -> pd.DataFrame:
-    """Fit a proxy logistic model and return positive-class probabilities."""
+    """Fit a proxy logistic model and return positive-class probabilities.
+
+    Training uses only rows where ``label_column`` is not NaN (the labeled
+    cohort).  Scoring uses the full ``feature_matrix`` so every admission
+    receives a score.  This matches the reference notebook behavior where
+    autopsy proxy training uses only consent/decline admissions but scores
+    are produced for all patients.
+    """
 
     merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
-    estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
-    estimator.fit(merged[feature_columns], merged[label_column].astype(int))
-    probabilities = estimator.predict_proba(merged[feature_columns])
-    positive_class = _extract_positive_class_probabilities(probabilities)
+    score_column = _score_column_name(label_column)
+
+    labeled_mask = merged[label_column].notna()
+    train = merged.loc[labeled_mask].copy()
+    train_labels = train[label_column].astype(int)
+    observed_classes = sorted(pd.unique(train_labels).tolist())
+
+    if train.empty or len(observed_classes) < 2:
+        _warn_degenerate_proxy_training(label_column, observed_classes, len(train))
+        default_prob = float(observed_classes[0]) if observed_classes else 0.0
+        positive_class = np.full(len(merged), default_prob, dtype=float)
+    else:
+        estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+        estimator.fit(train[feature_columns], train_labels)
+        positive_class = _extract_positive_class_probabilities(
+            estimator.predict_proba(merged[feature_columns])
+        )
 
     scores = pd.DataFrame(
         {
             "hadm_id": merged["hadm_id"],
-            _score_column_name(label_column): positive_class.astype(float),
+            score_column: positive_class.astype(float),
         }
     )
     return scores.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
@@ -388,7 +818,7 @@ def build_noncompliance_mistrust_scores(
     return build_proxy_probability_scores(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
-        label_column="noncompliance_label",
+        label_column=PROXY_LABEL_COLUMNS["noncompliance"],
         estimator_factory=estimator_factory,
     )
 
@@ -401,7 +831,7 @@ def build_autopsy_mistrust_scores(
     return build_proxy_probability_scores(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
-        label_column="autopsy_label",
+        label_column=PROXY_LABEL_COLUMNS["autopsy"],
         estimator_factory=estimator_factory,
     )
 
@@ -410,16 +840,29 @@ def build_negative_sentiment_mistrust_scores(
     note_corpus: pd.DataFrame,
     sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
 ) -> pd.DataFrame:
-    """Compute negative-sentiment mistrust from whitespace-tokenized note text."""
+    """Compute negative-sentiment mistrust from whitespace-tokenized note text.
+
+    Empty notes (after whitespace normalization) always score 0.0 without
+    invoking the scorer, regardless of whether the default backend or a custom
+    ``sentiment_fn`` is used.
+    """
 
     _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
-    scorer = pattern_sentiment if sentiment_fn is None else sentiment_fn
 
     cleaned = note_corpus.copy()
     cleaned["note_text"] = cleaned["note_text"].map(_prepare_note_text_for_sentiment)
-    cleaned["negative_sentiment_score"] = cleaned["note_text"].map(
-        lambda text: float(-1.0 * scorer(text)[0])
-    )
+    if sentiment_fn is None:
+        sentiment_scores = _default_sentiment_batch_backend(cleaned["note_text"].tolist())
+    else:
+        empty_mask = cleaned["note_text"] == ""
+        sentiment_scores = [(0.0, 0.0)] * len(cleaned)
+        non_empty_indices = [index for index, is_empty in enumerate(empty_mask) if not is_empty]
+        for index in non_empty_indices:
+            sentiment_scores[index] = sentiment_fn(cleaned["note_text"].iloc[index])
+
+    cleaned["negative_sentiment_score"] = [
+        float(-1.0 * score[0]) for score in sentiment_scores
+    ]
     return cleaned[["hadm_id", "negative_sentiment_score"]].sort_values("hadm_id").reset_index(drop=True)
 
 
@@ -461,41 +904,44 @@ def build_mistrust_score_table(
 ) -> pd.DataFrame:
     """Build the three normalized mistrust metrics."""
 
-    _require_columns(note_labels, ["hadm_id", "noncompliance_label", "autopsy_label"], "note_labels")
+    _require_columns(note_labels, ["hadm_id", *PROXY_LABEL_COLUMNS.values()], "note_labels")
     _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
 
-    noncompliance = build_noncompliance_mistrust_scores(
-        feature_matrix=feature_matrix,
-        note_labels=note_labels,
-        estimator_factory=estimator_factory,
-    )
-    autopsy = build_autopsy_mistrust_scores(
-        feature_matrix=feature_matrix,
-        note_labels=note_labels,
-        estimator_factory=estimator_factory,
-    )
+    proxy_scores: OrderedDict[str, pd.DataFrame] = OrderedDict()
+    for proxy_name, label_column in PROXY_LABEL_COLUMNS.items():
+        proxy_scores[proxy_name] = build_proxy_probability_scores(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            label_column=label_column,
+            estimator_factory=estimator_factory,
+        )
     sentiment = build_negative_sentiment_mistrust_scores(
         note_corpus=note_corpus,
         sentiment_fn=sentiment_fn,
     )
 
-    merged = (
-        noncompliance.merge(autopsy, on="hadm_id", how="inner", validate="one_to_one")
-        .merge(sentiment, on="hadm_id", how="inner", validate="one_to_one")
-        .sort_values("hadm_id")
-    )
+    merged = None
+    for score_table in list(proxy_scores.values()) + [sentiment]:
+        if merged is None:
+            merged = score_table
+            continue
+        merged = merged.merge(score_table, on="hadm_id", how="inner", validate="one_to_one")
+    assert merged is not None
+    merged = merged.sort_values("hadm_id")
+
+    raw_score_columns = [_score_column_name(label_column) for label_column in PROXY_LABEL_COLUMNS.values()]
+    rename_map = {
+        _score_column_name(label_column): f"{proxy_name}_score_z"
+        for proxy_name, label_column in PROXY_LABEL_COLUMNS.items()
+    }
+    raw_score_columns.append("negative_sentiment_score")
+    rename_map["negative_sentiment_score"] = "negative_sentiment_score_z"
 
     normalized = z_normalize_scores(
         merged,
-        columns=["noncompliance_score", "autopsy_score", "negative_sentiment_score"],
-    )
-    normalized = normalized.rename(
-        columns={
-            "noncompliance_score": "noncompliance_score_z",
-            "autopsy_score": "autopsy_score_z",
-            "negative_sentiment_score": "negative_sentiment_score_z",
-        }
+        columns=raw_score_columns,
     )
+    normalized = normalized.rename(columns=rename_map)
     return normalized.reset_index(drop=True)
 
 
@@ -531,9 +977,13 @@ def build_proxy_feature_weight_summary(
 ) -> dict[str, pd.DataFrame]:
     """Fit a proxy model and summarize the learned coefficient weights."""
 
-    merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
-    estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
-    estimator.fit(merged[feature_columns], merged[label_column].astype(int))
+    _, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    estimator = fit_proxy_mistrust_model(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        label_column=label_column,
+        estimator_factory=estimator_factory,
+    )
     return summarize_feature_weights(estimator, feature_columns, top_n=top_n)
 
 
@@ -546,7 +996,7 @@ def build_noncompliance_feature_weight_summary(
     return build_proxy_feature_weight_summary(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
-        label_column="noncompliance_label",
+        label_column=PROXY_LABEL_COLUMNS["noncompliance"],
         estimator_factory=estimator_factory,
         top_n=top_n,
     )
@@ -561,7 +1011,7 @@ def build_autopsy_feature_weight_summary(
     return build_proxy_feature_weight_summary(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
-        label_column="autopsy_label",
+        label_column=PROXY_LABEL_COLUMNS["autopsy"],
         estimator_factory=estimator_factory,
         top_n=top_n,
     )
@@ -1060,39 +1510,55 @@ def evaluate_downstream_average_weights(
     feature_configurations: Mapping[str, Sequence[str]] | None = None,
     task_map: Mapping[str, str] | None = None,
     estimator_factory: Callable[[], object] | None = None,
+    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
     split_fn: Callable[..., tuple] | None = None,
     repetitions: int = 100,
     test_size: float = 0.4,
 ) -> pd.DataFrame:
-    """Average downstream regularized model weights across repeated 60/40 splits."""
+    """Average downstream regularized model weights across repeated 60/40 splits.
+
+    Table 6 in the reference notebook reports coefficients from models trained on
+    the already-prepared baseline/mistrust feature space without an additional
+    sklearn ``StandardScaler`` pass. Age/LOS and mistrust features are already
+    standardized earlier in the pipeline, so we preserve those raw columns here
+    instead of re-scaling the one-hot baseline indicators.
+    """
 
-    splitter = train_test_split if split_fn is None else split_fn
     rows: list[dict[str, float | int | str]] = []
 
-    for task_name, target_column, config_name, feature_columns, usable, X, y in _iter_downstream_jobs(
+    for (
+        task_name,
+        target_column,
+        config_name,
+        feature_columns,
+        usable,
+        X,
+        y,
+        estimator_factory_for_job,
+    ) in _iter_downstream_jobs_with_estimators(
         final_model_table,
         feature_configurations=feature_configurations,
         task_map=task_map,
+        estimator_factory=estimator_factory,
+        downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
     ):
         collected_weights: list[np.ndarray] = []
 
-        for random_state in range(repetitions):
-            if usable.empty or y.nunique(dropna=True) < 2:
+        for split_result in _iter_downstream_repetition_splits(
+            X,
+            y,
+            usable,
+            repetitions=repetitions,
+            test_size=test_size,
+            split_fn=split_fn,
+        ):
+            if split_result is None:
                 continue
 
-            X_train, X_test, y_train, y_test = splitter(
-                X,
-                y,
-                test_size=test_size,
-                random_state=random_state,
-            )
+            X_train, X_test, y_train, y_test = split_result
             del X_test  # coefficients come from the fitted train-side model only
-            y_train = pd.Series(y_train)
-            y_test = pd.Series(y_test)
-            if y_train.nunique(dropna=True) < 2 or y_test.nunique(dropna=True) < 2:
-                continue
 
-            estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+            estimator = estimator_factory_for_job()
             estimator.fit(X_train, y_train.astype(int))
             coefficients = np.asarray(getattr(estimator, "coef_", None), dtype=float)
             if coefficients.ndim != 2 or coefficients.shape[0] == 0:
@@ -1136,6 +1602,7 @@ def evaluate_downstream_predictions(
     feature_configurations: Mapping[str, Sequence[str]] | None = None,
     task_map: Mapping[str, str] | None = None,
     estimator_factory: Callable[[], object] | None = None,
+    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
     split_fn: Callable[..., tuple] | None = None,
     auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
     repetitions: int = 100,
@@ -1143,36 +1610,47 @@ def evaluate_downstream_predictions(
 ) -> pd.DataFrame:
     """Run repeated 60/40 downstream AUC evaluation across all tasks/configs."""
 
-    splitter = train_test_split if split_fn is None else split_fn
     metric = roc_auc_score if auc_fn is None else auc_fn
 
     rows: list[dict[str, float | int | str]] = []
-    for task_name, target_column, config_name, feature_columns, usable, X, y in _iter_downstream_jobs(
+    for (
+        task_name,
+        target_column,
+        config_name,
+        feature_columns,
+        usable,
+        X,
+        y,
+        estimator_factory_for_job,
+    ) in _iter_downstream_jobs_with_estimators(
         final_model_table,
         feature_configurations=feature_configurations,
         task_map=task_map,
+        estimator_factory=estimator_factory,
+        downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
     ):
         auc_values: list[float] = []
 
-        for random_state in range(repetitions):
-            if usable.empty or y.nunique(dropna=True) < 2:
+        for split_result in _iter_downstream_repetition_splits(
+            X,
+            y,
+            usable,
+            repetitions=repetitions,
+            test_size=test_size,
+            split_fn=split_fn,
+        ):
+            if split_result is None:
                 auc_values.append(float("nan"))
                 continue
 
-            X_train, X_test, y_train, y_test = splitter(
-                X,
-                y,
-                test_size=test_size,
-                random_state=random_state,
+            X_train, X_test, y_train, y_test = split_result
+            X_train, X_test = _standardize_downstream_features(
+                X_train,
+                X_test,
+                feature_columns,
             )
-            y_train = pd.Series(y_train)
-            y_test = pd.Series(y_test)
 
-            if y_train.nunique(dropna=True) < 2 or y_test.nunique(dropna=True) < 2:
-                auc_values.append(float("nan"))
-                continue
-
-            estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+            estimator = estimator_factory_for_job()
             estimator.fit(X_train, y_train.astype(int))
             probabilities = estimator.predict_proba(X_test)
             positive_class = _extract_positive_class_probabilities(probabilities)
@@ -1204,7 +1682,11 @@ def plot_grouped_treatment_cdf(
     median_column: str = "median",
     ax=None,
 ):
-    """Plot grouped empirical CDF curves with dotted median lines."""
+    """Optional reporting helper for grouped empirical CDF visualization.
+
+    This helper is intentionally isolated from the core modeling pipeline and
+    exists only for lightweight plotting of already-computed CDF data.
+    """
 
     try:
         import matplotlib.pyplot as plt  # type: ignore
@@ -1229,6 +1711,28 @@ def plot_grouped_treatment_cdf(
     return ax
 
 
+class EOLMistrustModelOutputs(TypedDict, total=False):
+    """Typed contract for the dict returned by ``run_full_eol_mistrust_modeling``.
+
+    All keys except ``mistrust_scores`` and ``feature_weight_summaries`` are
+    optional because they require their corresponding input tables to be
+    provided.
+    """
+
+    mistrust_scores: pd.DataFrame
+    feature_weight_summaries: dict[str, dict[str, pd.DataFrame]]
+    race_gap_results: pd.DataFrame
+    race_treatment_results: pd.DataFrame
+    race_treatment_by_acuity_results: pd.DataFrame
+    race_treatment_cdf_plot_data: pd.DataFrame
+    trust_treatment_results: pd.DataFrame
+    trust_treatment_by_acuity_results: pd.DataFrame
+    trust_treatment_cdf_plot_data: pd.DataFrame
+    acuity_correlations: pd.DataFrame
+    downstream_auc_results: pd.DataFrame
+    downstream_weight_results: pd.DataFrame
+
+
 def run_full_eol_mistrust_modeling(
     feature_matrix: pd.DataFrame,
     note_labels: pd.DataFrame,
@@ -1239,40 +1743,49 @@ def run_full_eol_mistrust_modeling(
     acuity_scores: pd.DataFrame | None = None,
     final_model_table: pd.DataFrame | None = None,
     estimator_factory: Callable[[], object] | None = None,
+    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
     sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
     split_fn: Callable[..., tuple] | None = None,
     auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
     repetitions: int = 100,
     include_downstream_weight_summary: bool = False,
     include_cdf_plot_data: bool = False,
-) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame]]:
+    precomputed_mistrust_scores: pd.DataFrame | None = None,
+    score_columns: Sequence[str] | None = None,
+    feature_configurations: Mapping[str, Sequence[str]] | None = None,
+) -> EOLMistrustModelOutputs:
     """Run the end-to-end model-stage workflow and collect its outputs."""
 
-    mistrust_scores = build_mistrust_score_table(
-        feature_matrix=feature_matrix,
-        note_labels=note_labels,
-        note_corpus=note_corpus,
-        estimator_factory=estimator_factory,
-        sentiment_fn=sentiment_fn,
-    )
+    if precomputed_mistrust_scores is not None:
+        mistrust_scores = precomputed_mistrust_scores
+    else:
+        mistrust_scores = build_mistrust_score_table(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            note_corpus=note_corpus,
+            estimator_factory=estimator_factory,
+            sentiment_fn=sentiment_fn,
+        )
+    feature_weight_summaries: OrderedDict[str, dict[str, pd.DataFrame]] = OrderedDict()
+    for proxy_name, label_column in PROXY_LABEL_COLUMNS.items():
+        feature_weight_summaries[proxy_name] = build_proxy_feature_weight_summary(
+            feature_matrix=feature_matrix,
+            note_labels=note_labels,
+            label_column=label_column,
+            estimator_factory=estimator_factory,
+        )
     outputs: dict[str, pd.DataFrame | dict[str, pd.DataFrame]] = {
         "mistrust_scores": mistrust_scores,
-        "feature_weight_summaries": {
-            "noncompliance": build_noncompliance_feature_weight_summary(
-                feature_matrix=feature_matrix,
-                note_labels=note_labels,
-                estimator_factory=estimator_factory,
-            ),
-            "autopsy": build_autopsy_feature_weight_summary(
-                feature_matrix=feature_matrix,
-                note_labels=note_labels,
-                estimator_factory=estimator_factory,
-            ),
-        },
+        "feature_weight_summaries": feature_weight_summaries,
     }
+    selected_score_columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
 
     if demographics is not None:
-        outputs["race_gap_results"] = run_race_gap_analysis(mistrust_scores, demographics)
+        outputs["race_gap_results"] = run_race_gap_analysis(
+            mistrust_scores,
+            demographics,
+            score_columns=selected_score_columns,
+        )
 
     if eol_cohort is not None and treatment_totals is not None:
         outputs["race_treatment_results"] = run_race_based_treatment_analysis(
@@ -1283,6 +1796,7 @@ def run_full_eol_mistrust_modeling(
             eol_cohort=eol_cohort,
             mistrust_scores=mistrust_scores,
             treatment_totals=treatment_totals,
+            score_columns=selected_score_columns,
         )
         if acuity_scores is not None:
             outputs["race_treatment_by_acuity_results"] = run_race_based_treatment_analysis_by_acuity(
@@ -1295,6 +1809,7 @@ def run_full_eol_mistrust_modeling(
                 mistrust_scores=mistrust_scores,
                 treatment_totals=treatment_totals,
                 acuity_scores=acuity_scores,
+                score_columns=selected_score_columns,
             )
         if include_cdf_plot_data:
             outputs["race_treatment_cdf_plot_data"] = build_race_based_treatment_cdf_plot_data(
@@ -1305,12 +1820,14 @@ def run_full_eol_mistrust_modeling(
                 eol_cohort=eol_cohort,
                 mistrust_scores=mistrust_scores,
                 treatment_totals=treatment_totals,
+                score_columns=selected_score_columns,
             )
 
     if acuity_scores is not None:
         outputs["acuity_correlations"] = run_acuity_control_analysis(
             mistrust_scores=mistrust_scores,
             acuity_scores=acuity_scores,
+            score_columns=selected_score_columns,
         )
 
     if final_model_table is not None:
@@ -1319,7 +1836,9 @@ def run_full_eol_mistrust_modeling(
             downstream = downstream.merge(mistrust_scores, on="hadm_id", how="left")
         outputs["downstream_auc_results"] = evaluate_downstream_predictions(
             final_model_table=downstream,
+            feature_configurations=feature_configurations,
             estimator_factory=estimator_factory,
+            downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
             split_fn=split_fn,
             auc_fn=auc_fn,
             repetitions=repetitions,
@@ -1327,7 +1846,9 @@ def run_full_eol_mistrust_modeling(
         if include_downstream_weight_summary:
             outputs["downstream_weight_results"] = evaluate_downstream_average_weights(
                 final_model_table=downstream,
+                feature_configurations=feature_configurations,
                 estimator_factory=estimator_factory,
+                downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
                 split_fn=split_fn,
                 repetitions=repetitions,
             )
@@ -1366,10 +1887,15 @@ def build_mistrust_scores(
             sentiment_fn=self.sentiment_fn,
         )
 
-    def evaluate_downstream(self, final_model_table: pd.DataFrame) -> pd.DataFrame:
+    def evaluate_downstream(
+        self,
+        final_model_table: pd.DataFrame,
+        downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+    ) -> pd.DataFrame:
         return evaluate_downstream_predictions(
             final_model_table=final_model_table,
             estimator_factory=self.estimator_factory,
+            downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
             split_fn=self.split_fn,
             auc_fn=self.auc_fn,
             repetitions=self.repetitions,
@@ -1387,7 +1913,18 @@ def run(
         final_model_table: pd.DataFrame | None = None,
         include_downstream_weight_summary: bool = False,
         include_cdf_plot_data: bool = False,
-    ) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame]]:
+        precomputed_mistrust_scores: pd.DataFrame | None = None,
+        score_columns: Sequence[str] | None = None,
+        feature_configurations: Mapping[str, Sequence[str]] | None = None,
+        downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+    ) -> EOLMistrustModelOutputs:
+        """Return model-stage outputs only.
+
+        For the full end-to-end pipeline including dataset-layer artifacts
+        (base_admissions, all_cohort, eol_cohort, etc.) use
+        ``build_eol_mistrust_outputs`` in ``examples/eol_mistrust.py``
+        instead — that is the canonical single entry point.
+        """
         return run_full_eol_mistrust_modeling(
             feature_matrix=feature_matrix,
             note_labels=note_labels,
@@ -1398,23 +1935,419 @@ def run(
             acuity_scores=acuity_scores,
             final_model_table=final_model_table,
             estimator_factory=self.estimator_factory,
+            downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
             sentiment_fn=self.sentiment_fn,
             split_fn=self.split_fn,
             auc_fn=self.auc_fn,
             repetitions=self.repetitions,
             include_downstream_weight_summary=include_downstream_weight_summary,
             include_cdf_plot_data=include_cdf_plot_data,
+            precomputed_mistrust_scores=precomputed_mistrust_scores,
+            score_columns=score_columns,
+            feature_configurations=feature_configurations,
         )
 
 
-normalize_mistrust_scores = z_normalize_scores
-run_racial_gap_validation = run_race_gap_analysis
-run_acuity_correlation_analysis = run_acuity_control_analysis
-run_downstream_prediction_experiments = evaluate_downstream_predictions
-build_mistrust_metrics = build_mistrust_score_table
+def _default_eol_mistrust_data_root() -> Path:
+    return Path(__file__).resolve().parents[2] / "EOL_Workspace" / "eol_mistrust_required_combined"
+
+
+def _default_eol_mistrust_slice_output_dir() -> Path:
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return Path(__file__).resolve().parents[2] / "EOL_Workspace" / "eol_mistrust_runs" / f"e2e_1pct_gpu_{timestamp}"
+
+
+def _log_eol_mistrust_runner(start_time: float, message: str) -> None:
+    elapsed = time.time() - start_time
+    print(f"[{elapsed:8.1f}s] {message}", flush=True)
+
+
+def _write_optional_runner_csv(output_dir: Path, name: str, value) -> None:
+    if isinstance(value, pd.DataFrame):
+        value.to_csv(output_dir / f"{name}.csv", index=False)
+
+
+def run_eol_mistrust_gpu_slice(
+    *,
+    root: Path | str | None = None,
+    sample_fraction: float = 0.01,
+    slice_seed: int = 5,
+    repetitions: int = 100,
+    note_chunksize: int = 100_000,
+    chartevent_chunksize: int = 500_000,
+    output_dir: Path | str | None = None,
+    allow_online_hf: bool = False,
+) -> dict[str, object]:
+    """Run the EOL mistrust pipeline on a deterministic GPU-backed cohort slice."""
+
+    start_time = time.time()
+    resolved_root = _default_eol_mistrust_data_root() if root is None else Path(root)
+
+    if not allow_online_hf:
+        os.environ["HF_HUB_OFFLINE"] = "1"
+        os.environ["TRANSFORMERS_OFFLINE"] = "1"
+    os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
+
+    output_path = _default_eol_mistrust_slice_output_dir() if output_dir is None else Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    cuda_available = False
+    gpu_name = None
+    cuda_peak_mb = None
+
+    try:
+        torch_module = importlib.import_module("torch")
+        cuda_available = bool(getattr(torch_module, "cuda", None) and torch_module.cuda.is_available())
+        gpu_name = torch_module.cuda.get_device_name(0) if cuda_available else None
+        if cuda_available:
+            torch_module.cuda.empty_cache()
+    except ModuleNotFoundError:  # pragma: no cover
+        torch_module = None
+
+    _log_eol_mistrust_runner(start_time, "Loading cached sentiment model")
+    warmup_started = time.time()
+    warmup_sentiment = _load_transformers_sentiment()
+    _ = warmup_sentiment("patient is calm and cooperative.")
+    warmup_seconds = round(time.time() - warmup_started, 2)
+
+    if cuda_available and torch_module is not None and hasattr(torch_module.cuda, "reset_peak_memory_stats"):
+        torch_module.cuda.reset_peak_memory_stats()
+
+    example_module = importlib.import_module("examples.eol_mistrust")
+    load_eol_mistrust_tables = getattr(example_module, "load_eol_mistrust_tables")
+
+    _log_eol_mistrust_runner(start_time, f"Loading tables from {resolved_root}")
+    raw_tables, materialized_views = load_eol_mistrust_tables(resolved_root)
+
+    from pyhealth.datasets.eol_mistrust import (
+        build_acuity_scores,
+        build_all_cohort,
+        build_base_admissions,
+        build_chartevent_artifacts_from_csv,
+        build_demographics_table,
+        build_eol_cohort,
+        build_final_model_table_from_code_status_targets,
+        build_note_artifacts_from_csv,
+        build_treatment_totals,
+        write_minimal_deliverables,
+    )
+
+    admissions = raw_tables["admissions"]
+    patients = raw_tables["patients"]
+    icustays = raw_tables["icustays"]
+    d_items = raw_tables["d_items"]
+
+    base_full = build_base_admissions(admissions, patients)
+    all_cohort_full = build_all_cohort(base_full, icustays)
+    sample_n = max(1, int(len(all_cohort_full) * sample_fraction))
+    sampled_hadm = (
+        all_cohort_full[["hadm_id"]]
+        .sample(n=sample_n, random_state=slice_seed)
+        .sort_values("hadm_id")
+        .reset_index(drop=True)
+    )
+    sampled_hadm_ids = set(
+        pd.to_numeric(sampled_hadm["hadm_id"], errors="coerce").dropna().astype(int).tolist()
+    )
+
+    admissions_slice = admissions.loc[admissions["hadm_id"].isin(sampled_hadm_ids)].copy()
+    subject_ids = set(
+        pd.to_numeric(admissions_slice["subject_id"], errors="coerce").dropna().astype(int).tolist()
+    )
+    patients_slice = patients.loc[patients["subject_id"].isin(subject_ids)].copy()
+    icustays_slice = icustays.loc[icustays["hadm_id"].isin(sampled_hadm_ids)].copy()
+    icustay_ids = set(
+        pd.to_numeric(icustays_slice["icustay_id"], errors="coerce").dropna().astype(int).tolist()
+    )
+
+    ventdurations_slice = materialized_views["ventdurations"].loc[
+        materialized_views["ventdurations"]["icustay_id"].isin(icustay_ids)
+    ].copy()
+    vasopressordurations_slice = materialized_views["vasopressordurations"].loc[
+        materialized_views["vasopressordurations"]["icustay_id"].isin(icustay_ids)
+    ].copy()
+    oasis_slice = materialized_views["oasis"].loc[
+        materialized_views["oasis"]["hadm_id"].isin(sampled_hadm_ids)
+    ].copy()
+    sapsii_slice = materialized_views["sapsii"].loc[
+        materialized_views["sapsii"]["hadm_id"].isin(sampled_hadm_ids)
+    ].copy()
+
+    _log_eol_mistrust_runner(
+        start_time,
+        (
+            "Prepared slice with "
+            f"{len(sampled_hadm_ids)} admissions, {len(subject_ids)} patients, {len(icustay_ids)} ICU stays"
+        ),
+    )
+
+    base_admissions = build_base_admissions(admissions_slice, patients_slice)
+    demographics = build_demographics_table(base_admissions)
+    all_cohort = build_all_cohort(base_admissions, icustays_slice)
+    eol_cohort = build_eol_cohort(base_admissions, demographics)
+    treatment_totals = build_treatment_totals(
+        icustays=icustays_slice,
+        ventdurations=ventdurations_slice,
+        vasopressordurations=vasopressordurations_slice,
+    )
+    acuity_scores = build_acuity_scores(oasis_slice, sapsii_slice)
+
+    noteevents_csv_path = resolved_root / "mimiciii_notes" / "noteevents.csv"
+    chartevents_csv_path = resolved_root / "mimiciii_clinical" / "chartevents.csv"
+
+    notes_started = time.time()
+    _log_eol_mistrust_runner(start_time, "Streaming notes to build sentiment corpus and note-derived labels")
+    note_corpus, note_labels = build_note_artifacts_from_csv(
+        noteevents_csv_path=noteevents_csv_path,
+        all_hadm_ids=all_cohort["hadm_id"],
+        corpus_categories=None,
+        label_categories=None,
+        chunksize=note_chunksize,
+    )
+    note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
+    all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
+    note_corpus = note_corpus.loc[note_corpus["hadm_id"].isin(note_present_hadm_ids)].copy()
+    note_labels = note_labels.loc[note_labels["hadm_id"].isin(note_present_hadm_ids)].copy()
+    _log_eol_mistrust_runner(
+        start_time,
+        f"Retained {len(note_present_hadm_ids)} ALL-cohort admissions with at least one non-error note",
+    )
+    note_stage_seconds = round(time.time() - notes_started, 2)
+
+    chartevents_started = time.time()
+    _log_eol_mistrust_runner(start_time, "Streaming chartevents to build feature matrix and code-status targets")
+    feature_matrix, code_status_targets = build_chartevent_artifacts_from_csv(
+        chartevents_csv_path=chartevents_csv_path,
+        d_items=d_items,
+        all_hadm_ids=note_present_hadm_ids,
+        chunksize=chartevent_chunksize,
+    )
+    chartevent_stage_seconds = round(time.time() - chartevents_started, 2)
+
+    model_started = time.time()
+    _log_eol_mistrust_runner(start_time, "Running EOL mistrust model pipeline")
+    model = EOLMistrustModel(repetitions=repetitions)
+    mistrust_scores = model.build_mistrust_scores(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+    )
+    final_model_table = build_final_model_table_from_code_status_targets(
+        demographics=demographics,
+        all_cohort=all_cohort,
+        admissions=admissions_slice,
+        code_status_targets=code_status_targets,
+        mistrust_scores=mistrust_scores,
+    )
+    model_outputs = model.run(
+        feature_matrix=feature_matrix,
+        note_labels=note_labels,
+        note_corpus=note_corpus,
+        demographics=demographics,
+        eol_cohort=eol_cohort,
+        treatment_totals=treatment_totals,
+        acuity_scores=acuity_scores,
+        final_model_table=final_model_table,
+        include_downstream_weight_summary=False,
+        include_cdf_plot_data=False,
+    )
+    model_stage_seconds = round(time.time() - model_started, 2)
+
+    artifacts: dict[str, object] = {
+        "base_admissions": base_admissions,
+        "demographics": demographics,
+        "all_cohort": all_cohort,
+        "eol_cohort": eol_cohort,
+        "treatment_totals": treatment_totals,
+        "note_corpus": note_corpus,
+        "note_labels": note_labels,
+        "chartevent_feature_matrix": feature_matrix,
+        "acuity_scores": acuity_scores,
+        "mistrust_scores": mistrust_scores,
+        "final_model_table": final_model_table,
+        **model_outputs,
+    }
+
+    write_minimal_deliverables(
+        {
+            "base_admissions": base_admissions,
+            "eol_cohort": eol_cohort,
+            "all_cohort": all_cohort,
+            "treatment_totals": treatment_totals,
+            "chartevent_feature_matrix": feature_matrix,
+            "note_labels": note_labels,
+            "mistrust_scores": mistrust_scores,
+            "acuity_scores": acuity_scores,
+            "final_model_table": final_model_table,
+        },
+        output_dir=output_path,
+    )
+
+    for key in (
+        "downstream_auc_results",
+        "race_gap_results",
+        "race_treatment_results",
+        "race_treatment_by_acuity_results",
+        "trust_treatment_results",
+        "trust_treatment_by_acuity_results",
+        "acuity_correlations",
+    ):
+        _write_optional_runner_csv(output_path, key, artifacts.get(key))
+
+    feature_weight_summaries = artifacts.get("feature_weight_summaries", {})
+    if isinstance(feature_weight_summaries, dict):
+        summary_dir = output_path / "feature_weight_summaries"
+        summary_dir.mkdir(exist_ok=True)
+        for model_name, tables in feature_weight_summaries.items():
+            if not isinstance(tables, dict):
+                continue
+            for table_name, table in tables.items():
+                if isinstance(table, pd.DataFrame):
+                    table.to_csv(summary_dir / f"{model_name}_{table_name}.csv", index=False)
+
+    if cuda_available and torch_module is not None:
+        cuda_peak_mb = round(torch_module.cuda.max_memory_allocated() / (1024 * 1024), 2)
+
+    downstream_results = artifacts["downstream_auc_results"]
+    if not isinstance(downstream_results, pd.DataFrame):
+        raise ValueError("Expected downstream_auc_results to be a pandas DataFrame.")
+
+    target_positives = {
+        "left_ama_positive": int(
+            pd.to_numeric(final_model_table["left_ama"], errors="coerce").fillna(0).astype(int).sum()
+        ),
+        "code_status_positive": int(
+            pd.to_numeric(final_model_table["code_status_dnr_dni_cmo"], errors="coerce")
+            .fillna(0)
+            .astype(int)
+            .sum()
+        ),
+        "mortality_positive": int(
+            pd.to_numeric(final_model_table["in_hospital_mortality"], errors="coerce")
+            .fillna(0)
+            .astype(int)
+            .sum()
+        ),
+    }
+    summary = {
+        "root": str(resolved_root.resolve()),
+        "output_dir": str(output_path.resolve()),
+        "sample_fraction": sample_fraction,
+        "slice_seed": slice_seed,
+        "repetitions": repetitions,
+        "offline_hf": not allow_online_hf,
+        "gpu": {
+            "cuda_available": cuda_available,
+            "device_name": gpu_name,
+            "cuda_peak_memory_mb": cuda_peak_mb,
+        },
+        "counts": {
+            "base_full_rows": int(len(base_full)),
+            "all_cohort_full_rows": int(len(all_cohort_full)),
+            "slice_rows": int(len(all_cohort)),
+            "eol_slice_rows": int(len(eol_cohort)),
+        },
+        "note_label_positives": {
+            "noncompliance_label": int(
+                pd.to_numeric(note_labels["noncompliance_label"], errors="coerce")
+                .fillna(0)
+                .astype(int)
+                .sum()
+            ),
+            "autopsy_label": int(
+                pd.to_numeric(note_labels["autopsy_label"], errors="coerce").fillna(0).astype(int).sum()
+            ),
+        },
+        "target_positives": target_positives,
+        "artifact_shapes": {
+            key: list(value.shape) for key, value in artifacts.items() if isinstance(value, pd.DataFrame)
+        },
+        "stage_seconds": {
+            "sentiment_warmup": warmup_seconds,
+            "note_streaming": note_stage_seconds,
+            "chartevent_streaming": chartevent_stage_seconds,
+            "model_pipeline": model_stage_seconds,
+            "total": round(time.time() - start_time, 2),
+        },
+        "downstream_auc_results": downstream_results.to_dict(orient="records"),
+    }
+
+    (output_path / "run_summary.json").write_text(json.dumps(summary, indent=2))
+    _log_eol_mistrust_runner(start_time, f"Run complete; artifacts written to {output_path.resolve()}")
+    print(json.dumps(summary, indent=2), flush=True)
+    return summary
+
+
+def _parse_eol_mistrust_cli_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Run the EOL mistrust pipeline on a deterministic GPU-backed cohort slice."
+    )
+    parser.add_argument(
+        "--root",
+        type=Path,
+        default=_default_eol_mistrust_data_root(),
+        help="Root directory containing the combined EOL mistrust CSV exports.",
+    )
+    parser.add_argument(
+        "--sample-fraction",
+        type=float,
+        default=0.01,
+        help="Fraction of the ICU-linked ALL cohort to sample.",
+    )
+    parser.add_argument(
+        "--slice-seed",
+        type=int,
+        default=5,
+        help="Deterministic pandas sample seed for the ALL-cohort slice.",
+    )
+    parser.add_argument(
+        "--repetitions",
+        type=int,
+        default=100,
+        help="Number of downstream repeated 60/40 evaluations.",
+    )
+    parser.add_argument(
+        "--note-chunksize",
+        type=int,
+        default=100_000,
+        help="Chunk size for noteevents streaming.",
+    )
+    parser.add_argument(
+        "--chartevent-chunksize",
+        type=int,
+        default=500_000,
+        help="Chunk size for chartevents streaming.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=None,
+        help="Optional output directory. Defaults to EOL_Workspace/eol_mistrust_runs/<timestamp>.",
+    )
+    parser.add_argument(
+        "--allow-online-hf",
+        action="store_true",
+        help="Allow Hugging Face network access instead of forcing offline cached model loading.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = _parse_eol_mistrust_cli_args()
+    run_eol_mistrust_gpu_slice(
+        root=args.root,
+        sample_fraction=args.sample_fraction,
+        slice_seed=args.slice_seed,
+        repetitions=args.repetitions,
+        note_chunksize=args.note_chunksize,
+        chartevent_chunksize=args.chartevent_chunksize,
+        output_dir=args.output_dir,
+        allow_online_hf=args.allow_online_hf,
+    )
 
 
 __all__ = [
+    "EOLMistrustModelOutputs",
     "BASELINE_FEATURE_COLUMNS",
     "DOWNSTREAM_FEATURE_CONFIGS",
     "DOWNSTREAM_TASK_MAP",
@@ -1424,7 +2357,8 @@ def run(
     "build_autopsy_feature_weight_summary",
     "build_autopsy_mistrust_scores",
     "build_empirical_cdf_curve",
-    "build_mistrust_metrics",
+    "build_logistic_estimator_factory",
+    "build_logistic_cv_estimator_factory",
     "build_mistrust_score_table",
     "build_negative_sentiment_mistrust_scores",
     "build_noncompliance_feature_weight_summary",
@@ -1438,18 +2372,18 @@ def run(
     "fit_proxy_mistrust_model",
     "get_downstream_feature_configurations",
     "get_downstream_task_map",
-    "normalize_mistrust_scores",
-    "plot_grouped_treatment_cdf",
     "run_acuity_control_analysis",
-    "run_acuity_correlation_analysis",
-    "run_downstream_prediction_experiments",
+    "run_eol_mistrust_gpu_slice",
     "run_full_eol_mistrust_modeling",
     "run_race_based_treatment_analysis",
     "run_race_based_treatment_analysis_by_acuity",
     "run_race_gap_analysis",
-    "run_racial_gap_validation",
     "run_trust_based_treatment_analysis",
     "run_trust_based_treatment_analysis_by_acuity",
     "summarize_feature_weights",
     "z_normalize_scores",
 ]
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyhealth/tasks/eol_mistrust.py b/pyhealth/tasks/eol_mistrust.py
index 9451fd9e4..8ca6fbfe5 100644
--- a/pyhealth/tasks/eol_mistrust.py
+++ b/pyhealth/tasks/eol_mistrust.py
@@ -1,17 +1,40 @@
-"""Task definitions and target helpers for the EOL mistrust workflow."""
+"""Task definitions and target helpers for the EOL mistrust workflow.
+
+Structure
+---------
+This module now keeps two logic families explicit:
+
+1. Normal Path
+   The corrected, cleaned task helpers used by the default research flow.
+2. Paper-like Path
+   The notebook-faithful special logic that only exists for paper compatibility.
+"""
 
 from __future__ import annotations
 
 import re
 from collections import OrderedDict
-from datetime import datetime
-from typing import Any, Dict, Iterable, List, Mapping, Sequence
+from typing import Any, Iterable, Mapping, Sequence
 
 import pandas as pd
 
 from .base_task import BaseTask
 
 CODE_STATUS_ITEMIDS = {128, 223758}
+CODE_STATUS_MODE_CORRECTED = "corrected"
+CODE_STATUS_MODE_PAPER_LIKE = "paper_like"
+
+CODE_STATUS_POSITIVE_SUBSTRINGS = (
+    "dnr",
+    "dni",
+    "comfort",
+    "cmo",
+    "do_not_resusc",
+    "do_not_intubat",
+    "cpr_not_indicat",
+)
+CODE_STATUS_NOTEBOOK_POSITIVE_STRINGS = ("DNR", "DNI", "Comfort", "Do Not")
+CODE_STATUS_NOTEBOOK_FULL_CODE_VALUES = {"Full Code", "Full code"}
 
 EOL_MISTRUST_TASK_MAP = OrderedDict(
     [
@@ -25,44 +48,59 @@
 def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
     missing = [column for column in required if column not in df.columns]
     if missing:
-        missing_str = ", ".join(missing)
-        raise ValueError(f"{df_name} is missing required columns: {missing_str}")
+        raise ValueError(f"{df_name} is missing required columns: {', '.join(missing)}")
+
+
+def _coerce_timestamp(value) -> pd.Timestamp:
+    return pd.to_datetime(value, errors="coerce")
 
 
 def _normalize_token(value) -> str:
     if value is None or (isinstance(value, float) and pd.isna(value)):
         return ""
-    value = str(value).strip().lower()
-    if not value:
-        return ""
-    value = re.sub(r"[^a-z0-9]+", "_", value)
-    value = re.sub(r"_+", "_", value)
-    return value.strip("_")
+    normalized = re.sub(r"[^a-z0-9]+", "_", str(value).strip().lower())
+    normalized = re.sub(r"_+", "_", normalized)
+    return normalized.strip("_")
 
 
-def _to_datetime(value) -> pd.Timestamp:
-    return pd.to_datetime(value, errors="coerce")
+def _normalize_code_status_mode(mode: str | None) -> str:
+    normalized = (
+        CODE_STATUS_MODE_CORRECTED if mode is None else str(mode).strip().lower()
+    )
+    if normalized not in {CODE_STATUS_MODE_CORRECTED, CODE_STATUS_MODE_PAPER_LIKE}:
+        raise ValueError(
+            "code_status_mode must be one of "
+            f"{CODE_STATUS_MODE_CORRECTED!r} or {CODE_STATUS_MODE_PAPER_LIKE!r}"
+        )
+    return normalized
 
 
 def _calculate_age_years(admittime, dob) -> float:
-    admit_time = _to_datetime(admittime)
-    birth_time = _to_datetime(dob)
+    admit_time = _coerce_timestamp(admittime)
+    birth_time = _coerce_timestamp(dob)
     if pd.isna(admit_time) or pd.isna(birth_time):
         return float("nan")
 
     seconds_per_year = 365.25 * 24 * 3600
-    age = (admit_time.to_pydatetime() - birth_time.to_pydatetime()).total_seconds() / seconds_per_year
-    return 90.0 if age > 200 else float(age)
+    age_years = (
+        admit_time.to_pydatetime() - birth_time.to_pydatetime()
+    ).total_seconds() / seconds_per_year
+    return 90.0 if age_years > 200 else float(age_years)
 
 
 def _calculate_los_days(admittime, dischtime) -> float:
-    admit_time = _to_datetime(admittime)
-    discharge_time = _to_datetime(dischtime)
+    admit_time = _coerce_timestamp(admittime)
+    discharge_time = _coerce_timestamp(dischtime)
     if pd.isna(admit_time) or pd.isna(discharge_time):
         return float("nan")
     return float((discharge_time - admit_time).total_seconds() / 86400.0)
 
 
+# ---------------------------------------------------------------------------
+# Normal Path
+# ---------------------------------------------------------------------------
+
+
 def map_ethnicity_to_race(ethnicity) -> str:
     """Collapse raw MIMIC ethnicity strings into the study race groups."""
 
@@ -83,19 +121,16 @@ def map_ethnicity_to_race(ethnicity) -> str:
 def map_insurance_to_group(insurance) -> str:
     """Collapse raw insurance text into the three study groups."""
 
-    text = str(insurance or "").strip().lower()
-    normalized = re.sub(r"\s+", " ", text)
+    normalized = re.sub(r"\s+", " ", str(insurance or "").strip().lower())
     if normalized in {"medicare", "medicaid", "government", "public"}:
         return "Public"
-    if normalized in {"private"}:
+    if normalized == "private":
         return "Private"
-    if normalized in {"self pay", "self-pay", "self_pay"}:
-        return "Self-Pay"
-    return str(insurance or "")
+    return "Self-Pay"
 
 
 def prepare_note_text(text) -> str:
-    """Normalize note text by whitespace tokenization and rejoining only."""
+    """Normalize note text by collapsing whitespace only."""
 
     if text is None or (isinstance(text, float) and pd.isna(text)):
         return ""
@@ -103,29 +138,39 @@ def prepare_note_text(text) -> str:
 
 
 def build_left_ama_target(admissions: pd.DataFrame) -> pd.DataFrame:
-    """Build the exact-match Left AMA target from admissions."""
+    """Build the Left AMA target from admissions discharge_location codes."""
 
     _require_columns(admissions, ["hadm_id", "discharge_location"], "admissions")
-    targets = admissions[["hadm_id", "discharge_location"]].drop_duplicates("hadm_id").copy()
-    targets["left_ama"] = (
-        targets["discharge_location"]
-        .fillna("")
-        .astype(str)
-        .str.strip()
-        .str.upper()
-        .eq("LEFT AGAINST MEDICAL ADVICE")
-        .astype(int)
+    targets = (
+        admissions[["hadm_id", "discharge_location"]].drop_duplicates("hadm_id").copy()
+    )
+    discharge_location = (
+        targets["discharge_location"].fillna("").astype(str).str.strip().str.upper()
+    )
+    targets["left_ama"] = discharge_location.isin(
+        {"LEFT AGAINST MEDICAL ADVI", "LEFT AGAINST MEDICAL ADVICE"}
+    ).astype(int)
+    return (
+        targets[["hadm_id", "left_ama"]].sort_values("hadm_id").reset_index(drop=True)
     )
-    return targets[["hadm_id", "left_ama"]].sort_values("hadm_id").reset_index(drop=True)
 
 
 def build_in_hospital_mortality_target(admissions: pd.DataFrame) -> pd.DataFrame:
     """Build the in-hospital mortality target from admissions."""
 
     _require_columns(admissions, ["hadm_id", "hospital_expire_flag"], "admissions")
-    targets = admissions[["hadm_id", "hospital_expire_flag"]].drop_duplicates("hadm_id").copy()
+    targets = (
+        admissions[["hadm_id", "hospital_expire_flag"]]
+        .drop_duplicates("hadm_id")
+        .copy()
+    )
     targets["in_hospital_mortality"] = (
-        pd.to_numeric(targets["hospital_expire_flag"], errors="coerce").fillna(0).astype(int)
+        pd.to_numeric(
+            targets["hospital_expire_flag"],
+            errors="coerce",
+        )
+        .fillna(0)
+        .astype(int)
     )
     return (
         targets[["hadm_id", "in_hospital_mortality"]]
@@ -134,42 +179,119 @@ def build_in_hospital_mortality_target(admissions: pd.DataFrame) -> pd.DataFrame
     )
 
 
+def is_positive_code_status_value(value) -> bool:
+    """Return True when a raw code-status chart value indicates a positive label."""
+
+    normalized = _normalize_token(value)
+    return any(token in normalized for token in CODE_STATUS_POSITIVE_SUBSTRINGS)
+
+
+def _build_code_status_target_normal(codes: pd.DataFrame) -> pd.DataFrame:
+    labeled = codes.copy()
+    labeled["code_status_dnr_dni_cmo"] = labeled["value"].map(
+        lambda value: int(is_positive_code_status_value(value))
+    )
+
+    if "charttime" not in labeled.columns:
+        return (
+            labeled[["hadm_id", "code_status_dnr_dni_cmo"]]
+            .groupby("hadm_id", as_index=False)["code_status_dnr_dni_cmo"]
+            .max()
+            .sort_values("hadm_id")
+            .reset_index(drop=True)
+        )
+
+    labeled["_charttime"] = pd.to_datetime(labeled["charttime"], errors="coerce")
+    labeled["_has_charttime"] = labeled["_charttime"].notna().astype(int)
+    labeled["_event_order"] = range(len(labeled))
+    latest = (
+        labeled.sort_values(
+            ["hadm_id", "_has_charttime", "_charttime", "_event_order"],
+            kind="stable",
+        )
+        .groupby("hadm_id", as_index=False)
+        .tail(1)[["hadm_id", "code_status_dnr_dni_cmo"]]
+        .sort_values("hadm_id")
+    )
+    return latest.reset_index(drop=True)
+
+
+# ---------------------------------------------------------------------------
+# Paper-like Path
+# ---------------------------------------------------------------------------
+
+
+def _advance_paper_like_code_status_label(
+    current_label: int | None, value
+) -> int | None:
+    """Replicate the notebook's stateful overwrite behavior exactly."""
+
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return current_label
+
+    text = str(value)
+    if any(token in text for token in CODE_STATUS_NOTEBOOK_POSITIVE_STRINGS):
+        return 1
+    if text in CODE_STATUS_NOTEBOOK_FULL_CODE_VALUES:
+        return 0
+    return current_label
+
+
+def _build_code_status_target_paper_like(codes: pd.DataFrame) -> pd.DataFrame:
+    current_label: int | None = None
+    notebook_targets: dict[int, int] = {}
+
+    for row in codes.itertuples(index=False):
+        current_label = _advance_paper_like_code_status_label(
+            current_label,
+            getattr(row, "value"),
+        )
+        if current_label is not None:
+            notebook_targets[int(getattr(row, "hadm_id"))] = int(current_label)
+
+    return (
+        pd.DataFrame(
+            {
+                "hadm_id": sorted(notebook_targets),
+                "code_status_dnr_dni_cmo": [
+                    int(notebook_targets[hadm_id])
+                    for hadm_id in sorted(notebook_targets)
+                ],
+            }
+        )
+        .sort_values("hadm_id")
+        .reset_index(drop=True)
+    )
+
+
+# ---------------------------------------------------------------------------
+# Shared target entry points
+# ---------------------------------------------------------------------------
+
+
 def build_code_status_target(
     chartevents: pd.DataFrame,
     itemids: Iterable[int] | None = None,
+    code_status_mode: str = CODE_STATUS_MODE_CORRECTED,
 ) -> pd.DataFrame:
-    """Build the code-status target using the required itemids only."""
+    """Build the code-status target from the required itemids only."""
 
     _require_columns(chartevents, ["hadm_id", "itemid", "value"], "chartevents")
-    allowed_itemids = set(CODE_STATUS_ITEMIDS if itemids is None else itemids)
-
     if chartevents.empty:
         return pd.DataFrame(columns=["hadm_id", "code_status_dnr_dni_cmo"])
 
+    allowed_itemids = set(CODE_STATUS_ITEMIDS if itemids is None else itemids)
     codes = chartevents.loc[chartevents["itemid"].isin(allowed_itemids)].copy()
     if codes.empty:
         return pd.DataFrame(columns=["hadm_id", "code_status_dnr_dni_cmo"])
 
-    normalized_value = codes["value"].map(_normalize_token)
-    positive = normalized_value.apply(
-        lambda value: int(
-            ("dnr" in value)
-            or ("dni" in value)
-            or ("comfort" in value)
-            or ("cmo" in value)
-        )
-    )
-    target = (
-        pd.DataFrame({"hadm_id": codes["hadm_id"], "code_status_dnr_dni_cmo": positive})
-        .groupby("hadm_id", as_index=False)["code_status_dnr_dni_cmo"]
-        .max()
-        .sort_values("hadm_id")
-    )
-    return target.reset_index(drop=True)
+    if _normalize_code_status_mode(code_status_mode) == CODE_STATUS_MODE_PAPER_LIKE:
+        return _build_code_status_target_paper_like(codes)
+    return _build_code_status_target_normal(codes)
 
 
 def get_eol_mistrust_task_map() -> OrderedDict[str, str]:
-    """Return the three downstream target names used by the study."""
+    """Return the downstream target names used by the study."""
 
     return OrderedDict(EOL_MISTRUST_TASK_MAP)
 
@@ -180,16 +302,14 @@ class EOLMistrustDownstreamMIMIC3(BaseTask):
     task_name = "EOLMistrustDownstreamMIMIC3"
 
     def __init__(
-        self,
-        target: str = "in_hospital_mortality",
-        include_notes: bool = False,
+        self, target: str = "in_hospital_mortality", include_notes: bool = False
     ) -> None:
         if target not in set(EOL_MISTRUST_TASK_MAP.values()):
             raise ValueError(f"Unsupported EOL mistrust target: {target}")
 
         self.target = target
         self.include_notes = include_notes
-        self.input_schema: Dict[str, str] = {
+        self.input_schema: dict[str, str] = {
             "conditions": "sequence",
             "procedures": "sequence",
             "drugs": "sequence",
@@ -201,20 +321,15 @@ def __init__(
         }
         if include_notes:
             self.input_schema["clinical_notes"] = "text"
-        self.output_schema: Dict[str, str] = {target: "binary"}
+        self.output_schema: dict[str, str] = {target: "binary"}
 
-    def _get_single_patient_event(self, patient: Any, event_type: str):
-        events = patient.get_events(event_type=event_type)
-        if not events:
-            return None
-        return events[0]
-
-    def _get_codes_for_admission(self, patient: Any, event_type: str, hadm_id) -> List[str]:
+    def _get_codes_for_admission(
+        self, patient: Any, event_type: str, hadm_id
+    ) -> list[str]:
         events = patient.get_events(
-            event_type=event_type,
-            filters=[("hadm_id", "==", hadm_id)],
+            event_type=event_type, filters=[("hadm_id", "==", hadm_id)]
         )
-        values: List[str] = []
+        values: list[str] = []
         for event in events:
             for attribute in ("icd9_code", "icd_code", "drug", "ndc"):
                 value = getattr(event, attribute, None)
@@ -225,36 +340,35 @@ def _get_codes_for_admission(self, patient: Any, event_type: str, hadm_id) -> Li
 
     def _get_note_text(self, patient: Any, hadm_id) -> str:
         notes = patient.get_events(
-            event_type="noteevents",
-            filters=[("hadm_id", "==", hadm_id)],
+            event_type="noteevents", filters=[("hadm_id", "==", hadm_id)]
+        )
+        return prepare_note_text(
+            " ".join(str(getattr(note, "text", "")) for note in notes)
         )
-        return prepare_note_text(" ".join(str(getattr(note, "text", "")) for note in notes))
 
     def _get_code_status_label(self, patient: Any, hadm_id) -> int:
         events = patient.get_events(
-            event_type="chartevents",
-            filters=[("hadm_id", "==", hadm_id)],
+            event_type="chartevents", filters=[("hadm_id", "==", hadm_id)]
         )
-        rows = []
-        for event in events:
-            rows.append(
-                {
-                    "hadm_id": getattr(event, "hadm_id", hadm_id),
-                    "itemid": getattr(event, "itemid", None),
-                    "value": getattr(event, "value", None),
-                }
-            )
+        rows = [
+            {
+                "hadm_id": getattr(event, "hadm_id", hadm_id),
+                "itemid": getattr(event, "itemid", None),
+                "value": getattr(event, "value", None),
+            }
+            for event in events
+        ]
         if not rows:
             return 0
         target = build_code_status_target(pd.DataFrame(rows))
-        if target.empty:
-            return 0
-        return int(target["code_status_dnr_dni_cmo"].max())
+        return 0 if target.empty else int(target["code_status_dnr_dni_cmo"].max())
 
     def _get_target_value(self, patient: Any, admission: Any) -> int:
         if self.target == "left_ama":
             discharge_location = str(getattr(admission, "discharge_location", "") or "")
-            return int(discharge_location.strip().upper() == "LEFT AGAINST MEDICAL ADVICE")
+            return int(
+                discharge_location.strip().upper() == "LEFT AGAINST MEDICAL ADVICE"
+            )
         if self.target == "in_hospital_mortality":
             expire_flag = getattr(admission, "hospital_expire_flag", 0)
             try:
@@ -265,39 +379,55 @@ def _get_target_value(self, patient: Any, admission: Any) -> int:
             return self._get_code_status_label(patient, admission.hadm_id)
         raise ValueError(f"Unsupported EOL mistrust target: {self.target}")
 
-    def __call__(self, patient: Any) -> List[Dict[str, Any]]:
+    def __call__(self, patient: Any) -> list[dict[str, Any]]:
         admissions = patient.get_events(event_type="admissions")
-        patient_event = self._get_single_patient_event(patient, "patients")
         if not admissions:
             return []
 
-        samples: List[Dict[str, Any]] = []
+        patient_events = patient.get_events(event_type="patients")
+        patient_event = patient_events[0] if patient_events else None
+
+        samples: list[dict[str, Any]] = []
         for admission in admissions:
             hadm_id = getattr(admission, "hadm_id", None)
             if hadm_id is None:
                 continue
 
-            conditions = self._get_codes_for_admission(patient, "diagnoses_icd", hadm_id)
-            procedures = self._get_codes_for_admission(patient, "procedures_icd", hadm_id)
-            drugs = self._get_codes_for_admission(patient, "prescriptions", hadm_id)
-
-            sample: Dict[str, Any] = {
+            admit_time = getattr(admission, "admittime", None) or getattr(
+                admission, "timestamp", None
+            )
+            sample: dict[str, Any] = {
                 "visit_id": hadm_id,
                 "hadm_id": hadm_id,
                 "patient_id": patient.patient_id,
-                "conditions": conditions,
-                "procedures": procedures,
-                "drugs": drugs,
+                "conditions": self._get_codes_for_admission(
+                    patient, "diagnoses_icd", hadm_id
+                ),
+                "procedures": self._get_codes_for_admission(
+                    patient, "procedures_icd", hadm_id
+                ),
+                "drugs": self._get_codes_for_admission(
+                    patient, "prescriptions", hadm_id
+                ),
                 "age": _calculate_age_years(
-                    getattr(admission, "timestamp", None),
-                    getattr(patient_event, "dob", None) if patient_event is not None else None,
+                    admit_time,
+                    (
+                        getattr(patient_event, "dob", None)
+                        if patient_event is not None
+                        else None
+                    ),
                 ),
                 "los_days": _calculate_los_days(
-                    getattr(admission, "timestamp", None),
-                    getattr(admission, "dischtime", None),
+                    admit_time, getattr(admission, "dischtime", None)
+                ),
+                "gender": (
+                    getattr(patient_event, "gender", None)
+                    if patient_event is not None
+                    else None
+                ),
+                "insurance": map_insurance_to_group(
+                    getattr(admission, "insurance", None)
                 ),
-                "gender": getattr(patient_event, "gender", None) if patient_event is not None else None,
-                "insurance": map_insurance_to_group(getattr(admission, "insurance", None)),
                 "race": map_ethnicity_to_race(getattr(admission, "ethnicity", None)),
                 self.target: self._get_target_value(patient, admission),
             }
@@ -336,6 +466,8 @@ def __init__(self, include_notes: bool = False) -> None:
 
 __all__ = [
     "CODE_STATUS_ITEMIDS",
+    "CODE_STATUS_MODE_CORRECTED",
+    "CODE_STATUS_MODE_PAPER_LIKE",
     "EOL_MISTRUST_TASK_MAP",
     "EOLMistrustCodeStatusPredictionMIMIC3",
     "EOLMistrustDownstreamMIMIC3",
diff --git a/tests/core/test_eol_mistrust_Integration.py b/tests/core/test_eol_mistrust_Integration.py
index 39df9bf72..b3aefb8b6 100644
--- a/tests/core/test_eol_mistrust_Integration.py
+++ b/tests/core/test_eol_mistrust_Integration.py
@@ -1,7 +1,10 @@
 import importlib
 import importlib.util
-import tempfile
+import io
+import shutil
 import unittest
+import uuid
+from contextlib import contextmanager
 from pathlib import Path
 from unittest.mock import patch
 
@@ -48,6 +51,18 @@ def _load_example_module():
     return module
 
 
+@contextmanager
+def _workspace_tempdir():
+    base = Path(__file__).resolve().parents[2] / ".tmp-test-integration"
+    base.mkdir(parents=True, exist_ok=True)
+    path = base / f"tmp_{uuid.uuid4().hex}"
+    path.mkdir()
+    try:
+        yield str(path)
+    finally:
+        shutil.rmtree(path, ignore_errors=True)
+
+
 class _FakeProbEstimator:
     def __init__(self, probabilities):
         self.probabilities = list(probabilities)
@@ -229,7 +244,7 @@ def setUp(self):
             [
                 {"hadm_id": 101, "category": "Nursing", "text": "Patient is noncompliant and refused medication.", "iserror": None},
                 {"hadm_id": 102, "category": "Nursing", "text": "Family provided autopsy consent and autopsy was performed.", "iserror": None},
-                {"hadm_id": 103, "category": "Nursing", "text": "Patient is non-adher to the follow up plan.", "iserror": None},
+                {"hadm_id": 103, "category": "Nursing", "text": "Patient is non-adher to the follow up plan.\nFamily declined autopsy.", "iserror": None},
                 {"hadm_id": 104, "category": "Nursing", "text": "Date:[**5-1-18**]  patient   has   good rapport.", "iserror": None},
                 {"hadm_id": 105, "category": "Nursing", "text": "this note should be dropped", "iserror": 1},
                 {"hadm_id": 106, "category": "Nursing", "text": "", "iserror": None},
@@ -504,7 +519,7 @@ def test_dataset_build_all_and_eol_cohorts_respect_duration_boundaries(self):
         base = self.dataset.build_base_admissions(self.admissions, self.patients)
         demographics = self.dataset.build_demographics_table(base)
 
-        boundary_demo = pd.DataFrame([{"hadm_id": 1, "los_hours": 24.0}, {"hadm_id": 2, "los_hours": 24.01}])
+        boundary_demo = pd.DataFrame([{"hadm_id": 1, "los_hours": 5.99}, {"hadm_id": 2, "los_hours": 6.0}])
         boundary_base = pd.DataFrame(
             [
                 {"hadm_id": 1, "discharge_location": "SNF", "hospital_expire_flag": 0},
@@ -520,26 +535,45 @@ def test_dataset_build_all_and_eol_cohorts_respect_duration_boundaries(self):
         full_eol = self.dataset.build_eol_cohort(base, demographics)
         self.assertEqual(full_eol["hadm_id"].tolist(), [103, 104])
 
-    def test_dataset_build_all_cohort_includes_any_icu_stay(self):
-        base = pd.DataFrame([{"hadm_id": 1}, {"hadm_id": 2}])
+    def test_dataset_build_all_cohort_requires_adult_admissions_with_twelve_cumulative_icu_hours(self):
+        base = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 1,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dob": "2070-01-01 00:00:00",
+                },
+                {
+                    "hadm_id": 2,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dob": "2070-01-01 00:00:00",
+                },
+            ]
+        )
         icustays = pd.DataFrame(
             [
                 {
                     "hadm_id": 1,
                     "icustay_id": 1,
                     "intime": "2100-01-01 00:00:00",
-                    "outtime": "2100-01-01 12:00:00",
+                    "outtime": "2100-01-01 08:00:00",
                 },
                 {
-                    "hadm_id": 2,
+                    "hadm_id": 1,
                     "icustay_id": 2,
+                    "intime": "2100-01-01 12:00:00",
+                    "outtime": "2100-01-01 16:00:00",
+                },
+                {
+                    "hadm_id": 2,
+                    "icustay_id": 3,
                     "intime": "2100-01-01 00:00:00",
                     "outtime": "2100-01-01 11:59:00",
                 },
             ]
         )
         cohort = self.dataset.build_all_cohort(base, icustays)
-        self.assertEqual(cohort["hadm_id"].tolist(), [1, 2])
+        self.assertEqual(cohort["hadm_id"].tolist(), [1])
 
     def test_dataset_note_corpus_and_labels_filter_errors_and_capture_required_phrases(self):
         all_hadm_ids = [101, 102, 103, 104, 105, 106]
@@ -736,6 +770,7 @@ def predict_proba(self, X):
         self.assertEqual(created[0].kwargs["C"], 0.1)
         self.assertEqual(created[0].kwargs["solver"], "liblinear")
         self.assertEqual(created[0].kwargs["max_iter"], 1000)
+        self.assertEqual(created[0].kwargs["tol"], 0.001)
         self.assertEqual(len(created[0].fit_X), len(artifacts["feature_matrix"]))
 
         scores = self.model.build_proxy_probability_scores(
@@ -768,22 +803,22 @@ def test_model_build_proxy_probability_scores_supports_nonstandard_label_names_a
         )
         self.assertEqual(scores.columns.tolist(), ["hadm_id", "custom_target_score"])
 
-        class _MalformedProbEstimator:
+        class _PredictProbaEstimator:
             def fit(self, X, y):
                 del X, y
                 self.coef_ = [[0.1]]
                 return self
 
             def predict_proba(self, X):
-                return [[1.0] for _ in range(len(X))]
+                return [[0.5, 0.5]] * len(X)
 
-        with self.assertRaises(IndexError):
-            self.model.build_proxy_probability_scores(
-                feature_matrix,
-                note_labels,
-                "custom_target",
-                estimator_factory=lambda: _MalformedProbEstimator(),
-            )
+        scores_df = self.model.build_proxy_probability_scores(
+            feature_matrix,
+            note_labels,
+            "custom_target",
+            estimator_factory=lambda: _PredictProbaEstimator(),
+        )
+        self.assertEqual(len(scores_df), len(feature_matrix))
 
     def test_model_negative_sentiment_and_normalization_functions_return_stable_schemas(self):
         artifacts = self._build_core_artifacts()
@@ -821,9 +856,9 @@ def _sentiment(text):
             return (0.25, 0.0)
 
         scores = self.model.build_negative_sentiment_mistrust_scores(note_corpus, sentiment_fn=_sentiment)
-        self.assertEqual(seen, ["", ""])
+        self.assertEqual(seen, [])
         self.assertEqual(scores["hadm_id"].tolist(), [1, 2])
-        self.assertEqual(scores["negative_sentiment_score"].tolist(), [-0.25, -0.25])
+        self.assertEqual(scores["negative_sentiment_score"].tolist(), [0.0, 0.0])
 
     def test_model_z_normalize_scores_handles_all_nan_columns(self):
         score_table = pd.DataFrame(
@@ -1054,7 +1089,12 @@ def test_model_run_full_eol_mistrust_modeling_returns_expected_sections_and_alig
             final_model_table=artifacts["final_model_table"],
             estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
             sentiment_fn=self._sentiment_fn,
-            split_fn=_SplitRecorder(),
+            split_fn=lambda X, y, test_size, random_state: (
+                X.reset_index(drop=True).iloc[: max(1, len(X) - 1)].copy(),
+                X.reset_index(drop=True).iloc[max(1, len(X) - 1) :].copy(),
+                pd.Series(y).reset_index(drop=True).iloc[: max(1, len(X) - 1)].copy(),
+                pd.Series(y).reset_index(drop=True).iloc[max(1, len(X) - 1) :].copy(),
+            ),
             auc_fn=_AUCRecorder(0.7),
             repetitions=1,
         )
@@ -1132,7 +1172,10 @@ def test_dataset_empty_input_contracts_return_stable_schemas(self):
             ["hadm_id", "noncompliance_label", "autopsy_label"],
         )
         self.assertTrue((note_labels["noncompliance_label"] == 0).all())
-        self.assertTrue((note_labels["autopsy_label"] == 0).all())
+        self.assertTrue(
+            note_labels["autopsy_label"].isna().all(),
+            msg="Empty notes → all autopsy labels should be NaN (unlabeled)",
+        )
 
         empty_treatments = self.dataset.build_treatment_totals(
             self.icustays,
@@ -1548,7 +1591,7 @@ def test_integration_duplicate_cardinality_violation_fails_at_join_boundary(self
 
     def test_integration_write_read_round_trip_artifacts_remain_consumable(self):
         deliverables = self._build_deliverable_artifacts()
-        with tempfile.TemporaryDirectory() as tmpdir:
+        with _workspace_tempdir() as tmpdir:
             self.dataset.write_minimal_deliverables(deliverables, tmpdir)
             final_model_table = pd.read_csv(Path(tmpdir) / "final_model_table.csv")
             mistrust_scores = pd.read_csv(Path(tmpdir) / "mistrust_scores.csv")
@@ -1708,6 +1751,7 @@ def test_integration_package_import_and_direct_load_modules_are_compatible(self)
     def test_example_run_task_demo_uses_stable_mkdtemp_cache_dir(self):
         example_module = _load_example_module()
         captured = {}
+        factory_kwargs = []
 
         class _FakeDataset:
             def __init__(self, *args, **kwargs):
@@ -1728,6 +1772,2614 @@ def set_task(self, task, num_workers=0):
 
         self.assertEqual(captured["cache_dir"], "stable-cache-dir")
 
+    def test_example_build_outputs_routes_model_stage_through_eol_mistrust_model(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [{"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"} for hadm_id in range(101, 107)]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_label": int(hadm_id % 2 == 0),
+                    "autopsy_label": int(hadm_id % 3 == 0),
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "Education Readiness: No": int(hadm_id % 2 == 0),
+                    "Pain Level: 7-Mod to Severe": int(hadm_id % 2 == 1),
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 104, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 105, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 106, "code_status_dnr_dni_cmo": 0},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+
+        class _FakeModel:
+            last_instance = None
+
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+                self.build_args = None
+                self.run_args = None
+                _FakeModel.last_instance = self
+
+            def build_mistrust_scores(self, **kwargs):
+                self.build_args = kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                self.run_args = kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 6,
+                                "n_features": 7,
+                                "n_repeats": 2,
+                                "n_valid_auc": 2,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(example_module, "EOLMistrustModel", _FakeModel):
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=2,
+            )
+
+        self.assertIsNotNone(_FakeModel.last_instance)
+        self.assertEqual(_FakeModel.last_instance.repetitions, 2)
+        pd.testing.assert_frame_equal(
+            _FakeModel.last_instance.build_args["feature_matrix"],
+            feature_matrix,
+        )
+        pd.testing.assert_frame_equal(
+            _FakeModel.last_instance.build_args["note_labels"],
+            note_labels,
+        )
+        pd.testing.assert_frame_equal(
+            _FakeModel.last_instance.build_args["note_corpus"],
+            note_corpus,
+        )
+        pd.testing.assert_frame_equal(
+            _FakeModel.last_instance.run_args["feature_matrix"],
+            feature_matrix,
+        )
+        pd.testing.assert_frame_equal(
+            outputs["mistrust_scores"],
+            mistrust_scores,
+        )
+        self.assertIn("downstream_auc_results", outputs)
+
+    def test_example_build_outputs_filters_all_cohort_to_note_present_admissions(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+                {"hadm_id": 103, "note_text": "note-103"},
+                {"hadm_id": 104, "note_text": "note-104"},
+                {"hadm_id": 105, "note_text": "note-105"},
+                {"hadm_id": 106, "note_text": ""},
+            ]
+        )
+        captured = {}
+
+        def _fake_note_labels_from_csv(*args, **kwargs):
+            del args
+            captured["label_hadm_ids"] = list(kwargs["all_hadm_ids"])
+            return pd.DataFrame(
+                [
+                    {"hadm_id": hadm_id, "noncompliance_label": 0, "autopsy_label": float("nan")}
+                    for hadm_id in kwargs["all_hadm_ids"]
+                ]
+            )
+
+        def _fake_chartevent_artifacts_from_csv(*args, **kwargs):
+            del args
+            captured["chartevent_hadm_ids"] = list(kwargs["all_hadm_ids"])
+            hadm_ids = list(kwargs["all_hadm_ids"])
+            feature_matrix = pd.DataFrame(
+                [{"hadm_id": hadm_id, "Education Readiness: No": 0} for hadm_id in hadm_ids]
+            )
+            code_status_targets = pd.DataFrame(
+                [{"hadm_id": hadm_id, "code_status_dnr_dni_cmo": 0} for hadm_id in hadm_ids]
+            )
+            return feature_matrix, code_status_targets
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                hadm_ids = kwargs["feature_matrix"]["hadm_id"].tolist()
+                return pd.DataFrame(
+                    [
+                        {
+                            "hadm_id": hadm_id,
+                            "noncompliance_score_z": 0.0,
+                            "autopsy_score_z": 0.0,
+                            "negative_sentiment_score_z": 0.0,
+                        }
+                        for hadm_id in hadm_ids
+                    ]
+                )
+
+            def run(self, **kwargs):
+                hadm_ids = kwargs["final_model_table"]["hadm_id"].tolist()
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": len(hadm_ids),
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            side_effect=_fake_note_labels_from_csv,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            side_effect=_fake_chartevent_artifacts_from_csv,
+        ), patch.object(example_module, "EOLMistrustModel", _FakeModel):
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=1,
+            )
+
+        self.assertEqual(captured["label_hadm_ids"], [101, 102, 103, 104, 105])
+        self.assertEqual(captured["chartevent_hadm_ids"], [101, 102, 103, 104, 105])
+        self.assertEqual(outputs["all_cohort"]["hadm_id"].tolist(), [101, 102, 103, 104, 105])
+
+    def test_example_build_outputs_forwards_paper_like_dataset_prepare_to_dataset_builders(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+                {"hadm_id": 103, "note_text": "note-103"},
+                {"hadm_id": 104, "note_text": "note-104"},
+                {"hadm_id": 105, "note_text": "note-105"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": hadm_id, "noncompliance_label": 0, "autopsy_label": float("nan")}
+                for hadm_id in [101, 102, 103, 104, 105]
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [{"hadm_id": hadm_id, "education topic: medications": 0} for hadm_id in [101, 102, 103, 104, 105]]
+        )
+        code_status_targets = pd.DataFrame(
+            [{"hadm_id": hadm_id, "code_status_dnr_dni_cmo": 0} for hadm_id in [101, 102, 103, 104, 105]]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in [101, 102, 103, 104, 105]
+            ]
+        )
+        captured = {}
+
+        def _fake_treatment_totals(*args, **kwargs):
+            del args
+            captured["treatment_paper_like"] = kwargs.get("paper_like")
+            return pd.DataFrame(
+                [
+                    {"hadm_id": 101, "total_vent_min": 60.0, "total_vaso_min": 0.0},
+                ]
+            )
+
+        def _fake_chartevent_artifacts_from_csv(*args, **kwargs):
+            del args
+            captured["chartevent_paper_like"] = kwargs.get("paper_like")
+            captured["chartevent_code_status_mode"] = kwargs.get("code_status_mode")
+            return feature_matrix, code_status_targets
+
+        def _fake_note_labels_from_csv(*args, **kwargs):
+            del args
+            captured["note_labels_autopsy_label_mode"] = kwargs.get("autopsy_label_mode")
+            return note_labels
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                del kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 5,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_treatment_totals",
+            side_effect=_fake_treatment_totals,
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            side_effect=_fake_note_labels_from_csv,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            side_effect=_fake_chartevent_artifacts_from_csv,
+        ), patch.object(example_module, "EOLMistrustModel", _FakeModel):
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=1,
+                paper_like_dataset_prepare=True,
+            )
+
+        self.assertTrue(captured["treatment_paper_like"])
+        self.assertTrue(captured["chartevent_paper_like"])
+        self.assertEqual(captured["chartevent_code_status_mode"], "paper_like")
+        self.assertEqual(captured["note_labels_autopsy_label_mode"], "paper_like")
+        self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
+        self.assertTrue(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
+
+    def test_example_build_outputs_checkpoints_note_corpus_immediately_before_later_stage_failure(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
+        )
+
+        with _workspace_tempdir() as temp_dir, patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            side_effect=RuntimeError("boom after note corpus"),
+        ):
+            with self.assertRaisesRegex(RuntimeError, "boom after note corpus"):
+                example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    output_dir=Path(temp_dir),
+                )
+
+            saved_note_corpus = pd.read_csv(Path(temp_dir) / "note_corpus.csv")
+            pd.testing.assert_frame_equal(saved_note_corpus, note_corpus)
+            self.assertFalse((Path(temp_dir) / "note_labels.csv").exists())
+
+    def test_example_build_outputs_checkpoints_streamed_reuse_artifacts_before_model_failure(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+                {"hadm_id": 103, "note_text": "note-103"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "Education Readiness: No": 1},
+                {"hadm_id": 102, "Education Readiness: No": 0},
+                {"hadm_id": 103, "Education Readiness: No": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
+            ]
+        )
+
+        class _ExplodingModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                raise RuntimeError("boom after streamed artifacts")
+
+        with _workspace_tempdir() as temp_dir, patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _ExplodingModel,
+        ):
+            with self.assertRaisesRegex(RuntimeError, "boom after streamed artifacts"):
+                example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    output_dir=Path(temp_dir),
+                )
+
+            pd.testing.assert_frame_equal(
+                pd.read_csv(Path(temp_dir) / "note_corpus.csv"),
+                note_corpus,
+            )
+            pd.testing.assert_frame_equal(
+                pd.read_csv(Path(temp_dir) / "note_labels.csv"),
+                note_labels,
+            )
+            pd.testing.assert_frame_equal(
+                pd.read_csv(Path(temp_dir) / "chartevent_feature_matrix.csv"),
+                feature_matrix,
+            )
+            pd.testing.assert_frame_equal(
+                pd.read_csv(Path(temp_dir) / "code_status_targets.csv"),
+                code_status_targets,
+            )
+
+    def test_example_build_outputs_checkpoints_mistrust_scores_before_final_table_failure(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+                {"hadm_id": 103, "note_text": "note-103"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "Education Readiness: No": 1},
+                {"hadm_id": 102, "Education Readiness: No": 0},
+                {"hadm_id": 103, "Education Readiness: No": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -0.5,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.1,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": 1.0,
+                    "negative_sentiment_score_z": -1.0,
+                },
+                {
+                    "hadm_id": 103,
+                    "noncompliance_score_z": -0.5,
+                    "autopsy_score_z": -1.0,
+                    "negative_sentiment_score_z": 0.9,
+                },
+            ]
+        )
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+        with _workspace_tempdir() as temp_dir, patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _FakeModel,
+        ), patch.object(
+            example_module,
+            "build_final_model_table_from_code_status_targets",
+            side_effect=RuntimeError("boom after mistrust scores"),
+        ):
+            with self.assertRaisesRegex(RuntimeError, "boom after mistrust scores"):
+                example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    output_dir=Path(temp_dir),
+                )
+
+            expected_scores = mistrust_scores.copy()
+            expected_scores["autopsy_score_z"] = 0.0
+            pd.testing.assert_frame_equal(
+                pd.read_csv(Path(temp_dir) / "mistrust_scores.csv"),
+                expected_scores,
+            )
+
+    def test_example_build_outputs_can_reuse_cached_mistrust_scores(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 0.5,
+                    "negative_sentiment_score_z": 0.1,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": -0.5,
+                    "negative_sentiment_score_z": -0.1,
+                },
+            ]
+        )
+        captured = {}
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                raise AssertionError("should reuse mistrust_scores from cache")
+
+            def run(self, **kwargs):
+                captured["precomputed_mistrust_scores"] = kwargs["precomputed_mistrust_scores"].copy()
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 2,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with _workspace_tempdir() as temp_dir:
+            cache_dir = Path(temp_dir)
+            note_corpus.to_csv(cache_dir / "note_corpus.csv", index=False)
+            note_labels.to_csv(cache_dir / "note_labels.csv", index=False)
+            feature_matrix.to_csv(cache_dir / "chartevent_feature_matrix.csv", index=False)
+            code_status_targets.to_csv(cache_dir / "code_status_targets.csv", index=False)
+            mistrust_scores.to_csv(cache_dir / "mistrust_scores.csv", index=False)
+
+            with patch.object(
+                example_module,
+                "load_eol_mistrust_tables",
+                return_value=(raw_tables, materialized_views),
+            ), patch.object(
+                example_module,
+                "build_note_corpus_from_csv",
+                side_effect=AssertionError("should reuse note_corpus from cache"),
+            ), patch.object(
+                example_module,
+                "build_note_labels_from_csv",
+                side_effect=AssertionError("should reuse note_labels from cache"),
+            ), patch.object(
+                example_module,
+                "build_chartevent_artifacts_from_csv",
+                side_effect=AssertionError("should reuse chartevent artifacts from cache"),
+            ), patch.object(
+                example_module,
+                "EOLMistrustModel",
+                _FakeModel,
+            ):
+                outputs = example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    reuse_intermediates=cache_dir,
+                )
+
+        expected_scores = mistrust_scores.copy()
+        expected_scores["autopsy_score_z"] = 0.0
+        pd.testing.assert_frame_equal(outputs["mistrust_scores"], expected_scores)
+        pd.testing.assert_frame_equal(captured["precomputed_mistrust_scores"], expected_scores)
+
+    def test_example_build_outputs_preserves_cached_autopsy_scores_in_paper_like_route(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 0.5,
+                    "negative_sentiment_score_z": 0.1,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": -0.5,
+                    "negative_sentiment_score_z": -0.1,
+                },
+            ]
+        )
+        captured = {}
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                raise AssertionError("should reuse mistrust_scores from cache")
+
+            def run(self, **kwargs):
+                captured["precomputed_mistrust_scores"] = kwargs["precomputed_mistrust_scores"].copy()
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline + Autopsy",
+                                "target_column": "left_ama",
+                                "n_rows": 2,
+                                "n_features": 8,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {
+                        "autopsy": pd.DataFrame(
+                            [{"feature": "pain present: no", "weight": -0.2}]
+                        )
+                    },
+                }
+
+        with _workspace_tempdir() as temp_dir:
+            cache_base = Path(temp_dir) / "EOL_Workspace"
+            cache_dir = cache_base / "paper_like"
+            cache_dir.mkdir(parents=True, exist_ok=True)
+            note_corpus.to_csv(cache_dir / "note_corpus.csv", index=False)
+            note_labels.to_csv(cache_dir / "note_labels.csv", index=False)
+            feature_matrix.to_csv(cache_dir / "chartevent_feature_matrix.csv", index=False)
+            code_status_targets.to_csv(cache_dir / "code_status_targets.csv", index=False)
+            mistrust_scores.to_csv(cache_dir / "mistrust_scores.csv", index=False)
+
+            with patch.object(
+                example_module,
+                "load_eol_mistrust_tables",
+                return_value=(raw_tables, materialized_views),
+            ), patch.object(
+                example_module,
+                "build_note_corpus_from_csv",
+                side_effect=AssertionError("should reuse note_corpus from cache"),
+            ), patch.object(
+                example_module,
+                "build_note_labels_from_csv",
+                side_effect=AssertionError("should reuse note_labels from cache"),
+            ), patch.object(
+                example_module,
+                "build_chartevent_artifacts_from_csv",
+                side_effect=AssertionError("should reuse chartevent artifacts from cache"),
+            ), patch.object(
+                example_module,
+                "EOLMistrustModel",
+                _FakeModel,
+            ):
+                outputs = example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    reuse_intermediates=cache_base,
+                    paper_like_dataset_prepare=True,
+                )
+
+        pd.testing.assert_frame_equal(outputs["mistrust_scores"], mistrust_scores)
+        pd.testing.assert_frame_equal(captured["precomputed_mistrust_scores"], mistrust_scores)
+        self.assertIn("autopsy", outputs["feature_weight_summaries"])
+
+    def test_example_build_outputs_disables_autopsy_outputs_only_in_default_route(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"}
+                for hadm_id in [101, 102, 103, 104, 105]
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": hadm_id, "noncompliance_label": 0, "autopsy_label": float("nan")}
+                for hadm_id in [101, 102, 103, 104, 105]
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [{"hadm_id": hadm_id, "education topic: medications": 0} for hadm_id in [101, 102, 103, 104, 105]]
+        )
+        code_status_targets = pd.DataFrame(
+            [{"hadm_id": hadm_id, "code_status_dnr_dni_cmo": 0} for hadm_id in [101, 102, 103, 104, 105]]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": float(index - 2),
+                    "autopsy_score_z": float(index) / 10.0,
+                    "negative_sentiment_score_z": float(2 - index),
+                }
+                for index, hadm_id in enumerate([101, 102, 103, 104, 105], start=1)
+            ]
+        )
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                del kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 5,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            },
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline + Autopsy",
+                                "target_column": "left_ama",
+                                "n_rows": 5,
+                                "n_features": 8,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.8,
+                                "auc_std": 0.0,
+                            },
+                        ]
+                    ),
+                    "downstream_weight_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline + ALL",
+                                "target_column": "left_ama",
+                                "feature": "autopsy_score_z",
+                                "n_repeats": 1,
+                                "n_valid_weights": 1,
+                                "weight_mean": 0.2,
+                                "weight_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {
+                        "noncompliance": pd.DataFrame(
+                            [{"feature": "education topic: medications", "weight": 0.1}]
+                        ),
+                        "autopsy": pd.DataFrame(
+                            [{"feature": "pain present: no", "weight": -0.2}]
+                        ),
+                    },
+                    "acuity_correlations": pd.DataFrame(
+                        [
+                            {
+                                "feature_a": "autopsy_score_z",
+                                "feature_b": "oasis",
+                                "correlation": -0.2,
+                            },
+                            {
+                                "feature_a": "noncompliance_score_z",
+                                "feature_b": "oasis",
+                                "correlation": 0.1,
+                            },
+                        ]
+                    ),
+                    "trust_treatment_results": pd.DataFrame(
+                        [
+                            {"metric": "autopsy_score_z", "treatment": "total_vent_min"},
+                            {"metric": "noncompliance_score_z", "treatment": "total_vent_min"},
+                        ]
+                    ),
+                }
+
+        with patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _FakeModel,
+        ):
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=1,
+            )
+
+        self.assertTrue((outputs["mistrust_scores"]["autopsy_score_z"] == 0.0).all())
+        self.assertTrue((outputs["final_model_table"]["autopsy_score_z"] == 0.0).all())
+        self.assertFalse(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
+        self.assertEqual(set(outputs["feature_weight_summaries"].keys()), {"noncompliance"})
+        self.assertNotIn(
+            "Baseline + Autopsy",
+            outputs["downstream_auc_results"]["configuration"].tolist(),
+        )
+        self.assertNotIn(
+            "autopsy_score_z",
+            outputs["downstream_weight_results"]["feature"].tolist(),
+        )
+        self.assertNotIn(
+            "autopsy_score_z",
+            outputs["trust_treatment_results"]["metric"].tolist(),
+        )
+        self.assertFalse(
+            (
+                (outputs["acuity_correlations"]["feature_a"] == "autopsy_score_z")
+                | (outputs["acuity_correlations"]["feature_b"] == "autopsy_score_z")
+            ).any()
+        )
+
+    def test_example_build_outputs_passes_normal_route_without_autopsy_to_model_run(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"}
+                for hadm_id in [101, 102]
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 0.5,
+                    "negative_sentiment_score_z": 0.1,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": -0.5,
+                    "negative_sentiment_score_z": -0.1,
+                },
+            ]
+        )
+        captured = {}
+        factory_kwargs = []
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                captured["score_columns"] = list(kwargs.get("score_columns") or [])
+                captured["feature_configurations"] = kwargs.get("feature_configurations")
+                resolver = kwargs.get("downstream_estimator_factory_resolver")
+                captured["downstream_estimator_factory_resolver"] = resolver
+                if callable(resolver):
+                    captured["resolver_returns"] = [
+                        callable(resolver("Left AMA", "Baseline")),
+                        callable(resolver("Code Status", "Baseline")),
+                        callable(resolver("In-hospital mortality", "Baseline")),
+                    ]
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 2,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_logistic_cv_estimator_factory",
+            side_effect=lambda **kwargs: factory_kwargs.append(dict(kwargs)) or (lambda: kwargs),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _FakeModel,
+        ):
+            example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=1,
+            )
+
+        self.assertEqual(
+            captured["score_columns"],
+            ["noncompliance_score_z", "negative_sentiment_score_z"],
+        )
+        self.assertEqual(
+            list(captured["feature_configurations"].keys()),
+            [
+                "Baseline",
+                "Baseline + Race",
+                "Baseline + Noncompliant",
+                "Baseline + Neg-Sentiment",
+                "Baseline + ALL",
+            ],
+        )
+        self.assertNotIn("Baseline + Autopsy", captured["feature_configurations"])
+        resolver = captured["downstream_estimator_factory_resolver"]
+        self.assertTrue(callable(resolver))
+        self.assertEqual(captured["resolver_returns"], [True, True, True])
+        self.assertEqual(
+            factory_kwargs,
+            [
+                {"Cs": [0.01, 0.03, 0.1, 0.3], "class_weight": "balanced", "scoring": "roc_auc"},
+                {"Cs": [0.01, 0.03, 0.1, 0.3], "class_weight": "balanced", "scoring": "roc_auc"},
+                {"Cs": [0.03, 0.1, 0.3, 1.0], "class_weight": "balanced", "scoring": "roc_auc"},
+            ],
+        )
+
+    def test_example_build_outputs_passes_paper_like_route_to_model_run(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 0.5,
+                    "negative_sentiment_score_z": 0.1,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": -0.5,
+                    "negative_sentiment_score_z": -0.1,
+                },
+            ]
+        )
+        captured = {}
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                captured["score_columns"] = kwargs.get("score_columns")
+                captured["feature_configurations"] = kwargs.get("feature_configurations")
+                captured["downstream_estimator_factory_resolver"] = kwargs.get(
+                    "downstream_estimator_factory_resolver"
+                )
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 2,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _FakeModel,
+        ):
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=1,
+                paper_like_dataset_prepare=True,
+            )
+
+        self.assertIsNone(captured["score_columns"])
+        self.assertIsNone(captured["feature_configurations"])
+        self.assertIsNone(captured["downstream_estimator_factory_resolver"])
+        self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
+        self.assertTrue(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
+
+    def test_example_build_outputs_can_write_stream_cache_to_separate_base_directory(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
+        )
+
+        with _workspace_tempdir() as temp_dir, patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            side_effect=RuntimeError("stop after note corpus"),
+        ):
+            output_dir = Path(temp_dir) / "runs" / "paper_eval"
+            stream_cache_base = Path(temp_dir) / "EOL_Workspace"
+
+            with self.assertRaisesRegex(RuntimeError, "stop after note corpus"):
+                example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    output_dir=output_dir,
+                    stream_cache_dir=stream_cache_base,
+                    paper_like_dataset_prepare=True,
+                )
+
+            expected_cache_dir = stream_cache_base / "paper_like"
+            pd.testing.assert_frame_equal(
+                pd.read_csv(expected_cache_dir / "note_corpus.csv"),
+                note_corpus,
+            )
+            self.assertFalse((output_dir / "note_corpus.csv").exists())
+
+    def test_example_build_outputs_can_reuse_from_stream_cache_base_directory(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in [101, 102]
+            ]
+        )
+        captured = {}
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                del kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 2,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with _workspace_tempdir() as temp_dir:
+            stream_cache_base = Path(temp_dir) / "EOL_Workspace"
+            cache_dir = stream_cache_base / "paper_like"
+            cache_dir.mkdir(parents=True, exist_ok=True)
+            note_corpus.to_csv(cache_dir / "note_corpus.csv", index=False)
+            note_labels.to_csv(cache_dir / "note_labels.csv", index=False)
+            feature_matrix.to_csv(cache_dir / "chartevent_feature_matrix.csv", index=False)
+            code_status_targets.to_csv(cache_dir / "code_status_targets.csv", index=False)
+
+            with patch.object(
+                example_module,
+                "load_eol_mistrust_tables",
+                return_value=(raw_tables, materialized_views),
+            ), patch.object(
+                example_module,
+                "build_note_corpus_from_csv",
+                side_effect=AssertionError("should reuse note_corpus from cache"),
+            ), patch.object(
+                example_module,
+                "build_note_labels_from_csv",
+                side_effect=AssertionError("should reuse note_labels from cache"),
+            ), patch.object(
+                example_module,
+                "build_chartevent_artifacts_from_csv",
+                side_effect=AssertionError("should reuse chartevent artifacts from cache"),
+            ), patch.object(
+                example_module,
+                "EOLMistrustModel",
+                _FakeModel,
+            ):
+                outputs = example_module.build_eol_mistrust_outputs(
+                    Path("ignored-root"),
+                    repetitions=1,
+                    reuse_intermediates=stream_cache_base,
+                    paper_like_dataset_prepare=True,
+                )
+
+        self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
+        self.assertEqual(
+            outputs["validation_summary"]["all_cohort_rows"],
+            len(outputs["all_cohort"]),
+        )
+
+    def test_example_build_paper_comparison_outputs_emits_expected_delta_tables(self):
+        example_module = _load_example_module()
+
+        eol_cohort = pd.DataFrame(
+            [
+                {
+                    "race": "BLACK",
+                    "insurance_group": "Public",
+                    "discharge_category": "Deceased",
+                    "gender": "F",
+                    "los_days": 10.0,
+                    "age": 72.0,
+                },
+                {
+                    "race": "WHITE",
+                    "insurance_group": "Private",
+                    "discharge_category": "Hospice",
+                    "gender": "M",
+                    "los_days": 12.0,
+                    "age": 78.0,
+                },
+            ]
+        )
+        feature_weight_summaries = {
+            "noncompliance": pd.DataFrame(
+                [
+                    {"feature": "Education Readiness: No", "weight": 0.4},
+                    {"feature": "Riker-SAS Scale: Agitated", "weight": 0.3},
+                    {"feature": "Richmond-RAS Scale: 0 Alert and calm", "weight": -0.2},
+                ]
+            )
+        }
+        acuity_correlations = pd.DataFrame(
+            [
+                {
+                    "feature_a": "oasis",
+                    "feature_b": "sapsii",
+                    "correlation": 0.70,
+                }
+            ]
+        )
+        downstream_auc_results = pd.DataFrame(
+            [
+                {
+                    "task": "Left AMA",
+                    "configuration": "Baseline",
+                    "target_column": "left_ama",
+                    "n_rows": 48071,
+                    "n_features": 7,
+                    "n_repeats": 100,
+                    "n_valid_auc": 100,
+                    "auc_mean": 0.860,
+                    "auc_std": 0.014,
+                }
+            ]
+        )
+        downstream_weight_results = pd.DataFrame(
+            [
+                {
+                    "task": "Left AMA",
+                    "configuration": "Baseline + ALL",
+                    "target_column": "left_ama",
+                    "feature": "noncompliance_score_z",
+                    "n_repeats": 100,
+                    "n_valid_weights": 100,
+                    "weight_mean": 0.50,
+                    "weight_std": 0.08,
+                }
+            ]
+        )
+
+        outputs = example_module.build_paper_comparison_outputs(
+            {
+                "eol_cohort": eol_cohort,
+                "feature_weight_summaries": feature_weight_summaries,
+                "acuity_correlations": acuity_correlations,
+                "downstream_auc_results": downstream_auc_results,
+                "downstream_weight_results": downstream_weight_results,
+            },
+            repetitions=100,
+        )
+
+        self.assertIn("table1_comparison", outputs)
+        self.assertIn("table3_snapshot", outputs)
+        self.assertIn("table4_comparison", outputs)
+        self.assertIn("table5_comparison", outputs)
+        self.assertIn("table6_comparison", outputs)
+        table5 = outputs["table5_comparison"]
+        self.assertEqual(len(table5), 1)
+        self.assertAlmostEqual(table5.iloc[0]["delta_auc_mean"], 0.001)
+        table6 = outputs["table6_comparison"]
+        self.assertEqual(len(table6), 1)
+        self.assertAlmostEqual(table6.iloc[0]["delta_weight_mean"], -0.02)
+        # Paper Table 6 reports 1.96*std (95% CI half-width); run_weight_std must match
+        # run raw std = 0.08, so run_weight_std should be 0.08 * 1.96 = 0.1568
+        self.assertAlmostEqual(table6.iloc[0]["run_weight_std"], 0.08 * 1.96, places=4)
+        self.assertFalse(outputs["table3_snapshot"].empty)
+
+    def test_build_paper_comparison_outputs_omits_autopsy_rows_when_disabled_in_validation(self):
+        example_module = _load_example_module()
+
+        outputs = example_module.build_paper_comparison_outputs(
+            {
+                "validation_summary": {"autopsy_proxy_enabled": False},
+                "downstream_weight_results": pd.DataFrame(
+                    [
+                        {
+                            "task": "Left AMA",
+                            "configuration": "Baseline + ALL",
+                            "target_column": "left_ama",
+                            "feature": "autopsy_score_z",
+                            "n_repeats": 100,
+                            "n_valid_weights": 100,
+                            "weight_mean": 0.0,
+                            "weight_std": 0.0,
+                        },
+                        {
+                            "task": "Left AMA",
+                            "configuration": "Baseline + ALL",
+                            "target_column": "left_ama",
+                            "feature": "noncompliance_score_z",
+                            "n_repeats": 100,
+                            "n_valid_weights": 100,
+                            "weight_mean": 0.5,
+                            "weight_std": 0.08,
+                        },
+                    ]
+                ),
+            },
+            repetitions=100,
+        )
+
+        self.assertEqual(
+            outputs["table6_comparison"]["feature"].tolist(),
+            ["noncompliant"],
+        )
+
+    def test_build_paper_table1_comparison_reports_median_and_iqr_for_continuous_metrics(self):
+        example_module = _load_example_module()
+        eol_cohort = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "BLACK", "los_days": 1.0, "age": 10.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "F"},
+                {"hadm_id": 2, "race": "BLACK", "los_days": 2.0, "age": 20.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "M"},
+                {"hadm_id": 3, "race": "BLACK", "los_days": 3.0, "age": 30.0, "insurance_group": "Private", "discharge_category": "Hospice", "gender": "F"},
+                {"hadm_id": 4, "race": "BLACK", "los_days": 4.0, "age": 40.0, "insurance_group": "Self-Pay", "discharge_category": "Skilled Nursing Facility", "gender": "M"},
+                {"hadm_id": 5, "race": "WHITE", "los_days": 10.0, "age": 50.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "F"},
+                {"hadm_id": 6, "race": "WHITE", "los_days": 20.0, "age": 60.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "M"},
+                {"hadm_id": 7, "race": "WHITE", "los_days": 30.0, "age": 70.0, "insurance_group": "Private", "discharge_category": "Hospice", "gender": "F"},
+                {"hadm_id": 8, "race": "WHITE", "los_days": 40.0, "age": 80.0, "insurance_group": "Self-Pay", "discharge_category": "Skilled Nursing Facility", "gender": "M"},
+            ]
+        )
+
+        table1 = example_module.build_paper_table1_comparison(eol_cohort)
+        los_black = table1[(table1["metric"] == "Length of stay (median days)") & (table1["race"] == "BLACK")].iloc[0]
+        age_white = table1[(table1["metric"] == "Age (median years)") & (table1["race"] == "WHITE")].iloc[0]
+
+        self.assertEqual(los_black["summary_stat"], "median_iqr")
+        self.assertAlmostEqual(float(los_black["run_numeric"]), 2.5)
+        self.assertAlmostEqual(float(los_black["run_interval_lower"]), 1.75)
+        self.assertAlmostEqual(float(los_black["run_interval_upper"]), 3.25)
+        self.assertIn("[", str(los_black["paper_value"]))
+        self.assertIn("[", str(los_black["run_value"]))
+
+        self.assertEqual(age_white["summary_stat"], "median_iqr")
+        self.assertAlmostEqual(float(age_white["run_numeric"]), 65.0)
+        self.assertAlmostEqual(float(age_white["run_interval_lower"]), 57.5)
+        self.assertAlmostEqual(float(age_white["run_interval_upper"]), 72.5)
+
+    def test_example_build_outputs_can_attach_paper_comparison_and_write_artifacts(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [{"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"} for hadm_id in range(101, 107)]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_label": int(hadm_id % 2 == 0),
+                    "autopsy_label": int(hadm_id % 3 == 0),
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "Education Readiness: No": int(hadm_id % 2 == 0),
+                    "Pain Level: 7-Mod to Severe": int(hadm_id % 2 == 1),
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 104, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 105, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 106, "code_status_dnr_dni_cmo": 0},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        comparison_outputs = {
+            "summary": {"table5_max_abs_delta": 0.123},
+            "table5_comparison": pd.DataFrame([{"task": "Left AMA"}]),
+        }
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                del kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 6,
+                                "n_features": 7,
+                                "n_repeats": 2,
+                                "n_valid_auc": 2,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with _workspace_tempdir() as temp_dir, patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _FakeModel,
+        ), patch.object(
+            example_module,
+            "build_paper_comparison_outputs",
+            return_value=comparison_outputs,
+        ) as comparison_builder, patch.object(
+            example_module,
+            "write_paper_comparison_artifacts",
+        ) as comparison_writer:
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=2,
+                compare_to_paper=True,
+                output_dir=Path(temp_dir),
+            )
+
+        self.assertEqual(outputs["paper_comparison"], comparison_outputs)
+        comparison_builder.assert_called_once()
+        comparison_writer.assert_called_once()
+        self.assertTrue(bool(comparison_writer.call_args.kwargs["include_summary"]))
+
+    def test_example_build_outputs_always_writes_paper_table_artifacts_when_compare_disabled(self):
+        example_module = _load_example_module()
+
+        raw_tables = {
+            "admissions": self.admissions.copy(),
+            "patients": self.patients.copy(),
+            "icustays": self.icustays.copy(),
+            "d_items": self.d_items.copy(),
+        }
+        materialized_views = {
+            "ventdurations": self.ventdurations.copy(),
+            "vasopressordurations": self.vasopressordurations.copy(),
+            "oasis": self.oasis.copy(),
+            "sapsii": self.sapsii.copy(),
+        }
+        note_corpus = pd.DataFrame(
+            [{"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"} for hadm_id in range(101, 107)]
+        )
+        note_labels = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_label": int(hadm_id % 2 == 0),
+                    "autopsy_label": int(hadm_id % 3 == 0),
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        feature_matrix = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "Education Readiness: No": int(hadm_id % 2 == 0),
+                    "Pain Level: 7-Mod to Severe": int(hadm_id % 2 == 1),
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        code_status_targets = pd.DataFrame(
+            [
+                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 104, "code_status_dnr_dni_cmo": 1},
+                {"hadm_id": 105, "code_status_dnr_dni_cmo": 0},
+                {"hadm_id": 106, "code_status_dnr_dni_cmo": 0},
+            ]
+        )
+        mistrust_scores = pd.DataFrame(
+            [
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": 0.0,
+                    "autopsy_score_z": 0.0,
+                    "negative_sentiment_score_z": 0.0,
+                }
+                for hadm_id in range(101, 107)
+            ]
+        )
+        comparison_outputs = {
+            "summary": {"table5_max_abs_delta": 0.123},
+            "table5_comparison": pd.DataFrame([{"task": "Left AMA"}]),
+        }
+
+        class _FakeModel:
+            def __init__(self, repetitions):
+                self.repetitions = repetitions
+
+            def build_mistrust_scores(self, **kwargs):
+                del kwargs
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                del kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 6,
+                                "n_features": 7,
+                                "n_repeats": 2,
+                                "n_valid_auc": 2,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {},
+                }
+
+        with _workspace_tempdir() as temp_dir, patch.object(
+            example_module,
+            "load_eol_mistrust_tables",
+            return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_note_corpus_from_csv",
+            return_value=note_corpus,
+        ), patch.object(
+            example_module,
+            "build_note_labels_from_csv",
+            return_value=note_labels,
+        ), patch.object(
+            example_module,
+            "build_chartevent_artifacts_from_csv",
+            return_value=(feature_matrix, code_status_targets),
+        ), patch.object(
+            example_module,
+            "EOLMistrustModel",
+            _FakeModel,
+        ), patch.object(
+            example_module,
+            "build_paper_comparison_outputs",
+            return_value=comparison_outputs,
+        ) as comparison_builder, patch.object(
+            example_module,
+            "write_paper_comparison_artifacts",
+        ) as comparison_writer:
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=2,
+                compare_to_paper=False,
+                output_dir=Path(temp_dir),
+            )
+
+        self.assertEqual(outputs["paper_comparison"], comparison_outputs)
+        comparison_builder.assert_called_once()
+        comparison_writer.assert_called_once()
+        self.assertFalse(bool(comparison_writer.call_args.kwargs["include_summary"]))
+
+    def test_write_paper_comparison_artifacts_writes_human_readable_summary_txt(self):
+        example_module = _load_example_module()
+
+        comparison_outputs = {
+            "summary": {
+                "table1_rows": 1,
+                "table2_rows": 1,
+                "table3_snapshot_rows": 1,
+                "table4_rows": 1,
+                "table5_rows": 1,
+                "table6_rows": 1,
+                "table4_max_abs_delta": 0.1,
+                "table5_max_abs_delta": 0.2,
+                "table6_max_abs_delta": 0.3,
+            },
+            "table1_comparison": pd.DataFrame(
+                [
+                    {
+                        "metric": "Population Size",
+                        "race": "BLACK",
+                        "paper_value": "1214",
+                        "run_value": "1215",
+                    }
+                ]
+            ),
+            "table2_comparison": pd.DataFrame(
+                [
+                    {
+                        "treatment": "total_vent_min",
+                        "paper_n_black": 510,
+                        "run_n_black": 587,
+                        "paper_n_white": 4810,
+                        "run_n_white": 5603,
+                        "paper_median_black": 3180.0,
+                        "run_median_black": 2700.0,
+                        "paper_median_white": 2520.0,
+                        "run_median_white": 2280.0,
+                    }
+                ]
+            ),
+            "table3_comparison": pd.DataFrame(
+                [
+                    {
+                        "proxy_model": "noncompliance",
+                        "direction": "positive",
+                        "rank": 1,
+                        "paper_feature": "riker-sas scale: agitated",
+                        "paper_weight": 0.7013,
+                        "run_weight": 0.6642,
+                        "run_feature_found": True,
+                    }
+                ]
+            ),
+            "table4_comparison": pd.DataFrame(
+                [
+                    {
+                        "feature_a": "oasis",
+                        "feature_b": "sapsii",
+                        "paper_correlation": 0.679,
+                        "run_correlation": 0.695,
+                    }
+                ]
+            ),
+            "table5_comparison": pd.DataFrame(
+                [
+                    {
+                        "task": "Left AMA",
+                        "configuration": "Baseline",
+                        "paper_auc_mean": 0.859,
+                        "run_auc_mean": 0.870,
+                        "paper_n_rows": 48071,
+                        "run_n_rows": 48289,
+                    }
+                ]
+            ),
+            "table6_comparison": pd.DataFrame(
+                [
+                    {
+                        "task": "Left AMA",
+                        "feature": "age",
+                        "paper_weight_mean": -2.10,
+                        "run_weight_mean": -0.78,
+                    }
+                ]
+            ),
+        }
+
+        with _workspace_tempdir() as temp_dir:
+            output_dir = Path(temp_dir) / "paper_comparison"
+            example_module.write_paper_comparison_artifacts(
+                comparison_outputs,
+                output_dir=output_dir,
+            )
+
+            summary_text = (output_dir / "paper_comparison_summary.txt").read_text()
+
+        self.assertIn("Paper comparison summary:", summary_text)
+        self.assertIn("Table 1 vs Paper:", summary_text)
+        self.assertIn("Population Size | BLACK | paper=1214 | run=1215", summary_text)
+        self.assertIn("Table 5 vs Paper:", summary_text)
+        self.assertIn("Left AMA | Baseline | n 48071->48289 | auc 0.859->0.870", summary_text)
+
+    def test_write_paper_comparison_artifacts_can_skip_human_readable_summary_txt(self):
+        example_module = _load_example_module()
+
+        comparison_outputs = {
+            "summary": {
+                "table1_rows": 1,
+            },
+            "table1_comparison": pd.DataFrame(
+                [
+                    {
+                        "metric": "Population Size",
+                        "race": "BLACK",
+                        "paper_value": "1214",
+                        "run_value": "1215",
+                    }
+                ]
+            ),
+        }
+
+        with _workspace_tempdir() as temp_dir:
+            output_dir = Path(temp_dir) / "paper_comparison"
+            example_module.write_paper_comparison_artifacts(
+                comparison_outputs,
+                output_dir=output_dir,
+                include_summary=False,
+            )
+
+            self.assertTrue((output_dir / "table1_comparison.csv").exists())
+            self.assertTrue((output_dir / "summary.json").exists())
+            self.assertFalse((output_dir / "paper_comparison_summary.txt").exists())
+
+    def test_main_prints_full_paper_table_summary_with_paper_and_run_values(self):
+        example_module = _load_example_module()
+
+        comparison_outputs = {
+            "summary": {
+                "table1_rows": 2,
+                "table2_rows": 1,
+                "table3_snapshot_rows": 1,
+                "table4_rows": 1,
+                "table5_rows": 1,
+                "table6_rows": 1,
+                "table4_max_abs_delta": 0.1,
+                "table5_max_abs_delta": 0.2,
+                "table6_max_abs_delta": 0.3,
+            },
+            "table1_comparison": pd.DataFrame(
+                [
+                    {
+                        "metric": "Population Size",
+                        "race": "BLACK",
+                        "paper_value": "1214",
+                        "run_value": "1215",
+                    }
+                ]
+            ),
+            "table2_comparison": pd.DataFrame(
+                [
+                    {
+                        "treatment": "total_vent_min",
+                        "paper_n_black": 510,
+                        "run_n_black": 587,
+                        "paper_n_white": 4810,
+                        "run_n_white": 5603,
+                        "paper_median_black": 3180.0,
+                        "run_median_black": 2700.0,
+                        "paper_median_white": 2520.0,
+                        "run_median_white": 2280.0,
+                    }
+                ]
+            ),
+            "table3_comparison": pd.DataFrame(
+                [
+                    {
+                        "proxy_model": "noncompliance",
+                        "direction": "positive",
+                        "rank": 1,
+                        "paper_feature": "riker-sas scale: agitated",
+                        "paper_weight": 0.7013,
+                        "run_weight": 0.6642,
+                        "run_feature_found": True,
+                    }
+                ]
+            ),
+            "table4_comparison": pd.DataFrame(
+                [
+                    {
+                        "feature_a": "oasis",
+                        "feature_b": "sapsii",
+                        "paper_correlation": 0.679,
+                        "run_correlation": 0.695,
+                    }
+                ]
+            ),
+            "table5_comparison": pd.DataFrame(
+                [
+                    {
+                        "task": "Left AMA",
+                        "configuration": "Baseline",
+                        "paper_auc_mean": 0.859,
+                        "run_auc_mean": 0.870,
+                        "paper_n_rows": 48071,
+                        "run_n_rows": 48289,
+                    }
+                ]
+            ),
+            "table6_comparison": pd.DataFrame(
+                [
+                    {
+                        "task": "Left AMA",
+                        "feature": "age",
+                        "paper_weight_mean": -2.10,
+                        "run_weight_mean": -0.78,
+                    }
+                ]
+            ),
+        }
+        artifacts = {
+            "validation_summary": {
+                "database_flavor": "postgresql",
+                "schema_name": "mimiciii",
+            },
+            "base_admissions": pd.DataFrame(columns=["hadm_id"]),
+            "all_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "chartevent_feature_matrix": pd.DataFrame(columns=["hadm_id"]),
+            "note_labels": pd.DataFrame(columns=["hadm_id"]),
+            "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
+            "final_model_table": pd.DataFrame(columns=["hadm_id"]),
+            "paper_comparison": comparison_outputs,
+        }
+
+        args = type(
+            "Args",
+            (),
+            {
+                "root": Path("ignored-root"),
+                "config_path": Path("ignored-config"),
+                "output_dir": Path("out"),
+                "stream_cache_dir": None,
+                "repetitions": 1,
+                "include_downstream_weight_summary": False,
+                "include_cdf_plot_data": False,
+                "compare_to_paper": True,
+                "task_demo": False,
+                "note_chunksize": 100_000,
+                "chartevent_chunksize": 500_000,
+                "reuse_intermediates": None,
+                "paper_like_dataset_prepare": False,
+            },
+        )()
+
+        stdout = io.StringIO()
+        with patch.object(
+            example_module,
+            "parse_args",
+            return_value=args,
+        ), patch.object(
+            example_module,
+            "build_eol_mistrust_outputs",
+            return_value=artifacts,
+        ), patch(
+            "sys.stdout",
+            stdout,
+        ):
+            example_module.main()
+
+        output = stdout.getvalue()
+        self.assertIn("Paper comparison summary:", output)
+        self.assertIn("Table 1 vs Paper:", output)
+        self.assertIn("Population Size | BLACK | paper=1214 | run=1215", output)
+        self.assertIn("Table 2 vs Paper:", output)
+        self.assertIn("total_vent_min | black n 510->587", output)
+        self.assertIn("Table 3 vs Paper:", output)
+        self.assertIn("noncompliance | positive #1 | riker-sas scale: agitated", output)
+        self.assertIn("Table 4 vs Paper:", output)
+        self.assertIn("oasis vs sapsii | paper=0.679 | run=0.695", output)
+        self.assertIn("Table 5 vs Paper:", output)
+        self.assertIn("Left AMA | Baseline | n 48071->48289 | auc 0.859->0.870", output)
+        self.assertIn("Table 6 vs Paper:", output)
+        self.assertIn("Left AMA | age | paper=-2.100 | run=-0.780", output)
+
+    def test_main_writes_managed_normal_run_archive_with_default_output_and_cache_dirs(self):
+        example_module = _load_example_module()
+
+        comparison_outputs = {
+            "summary": {
+                "table1_rows": 1,
+                "table2_rows": 1,
+                "table3_snapshot_rows": 1,
+                "table4_rows": 1,
+                "table5_rows": 1,
+                "table6_rows": 1,
+                "table4_max_abs_delta": 0.1,
+                "table5_max_abs_delta": 0.2,
+                "table6_max_abs_delta": 0.3,
+            },
+            "table1_comparison": pd.DataFrame(
+                [
+                    {
+                        "metric": "Population Size",
+                        "race": "BLACK",
+                        "paper_value": "1214",
+                        "run_value": "1215",
+                    }
+                ]
+            ),
+        }
+        artifacts = {
+            "validation_summary": {
+                "database_flavor": "postgresql",
+                "schema_name": "mimiciii",
+                "dataset_prepare_mode": "default",
+                "autopsy_proxy_enabled": False,
+            },
+            "base_admissions": pd.DataFrame(columns=["hadm_id"]),
+            "all_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "chartevent_feature_matrix": pd.DataFrame(columns=["hadm_id"]),
+            "note_labels": pd.DataFrame(columns=["hadm_id"]),
+            "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
+            "final_model_table": pd.DataFrame(columns=["hadm_id"]),
+            "paper_comparison": comparison_outputs,
+        }
+
+        with _workspace_tempdir() as temp_dir:
+            result_root = Path(temp_dir) / "EOL_Result"
+            args = type(
+                "Args",
+                (),
+                {
+                    "root": Path("ignored-root"),
+                    "config_path": Path("ignored-config"),
+                    "output_dir": None,
+                    "stream_cache_dir": None,
+                    "result_root": result_root,
+                    "repetitions": 1,
+                    "include_downstream_weight_summary": False,
+                    "include_cdf_plot_data": False,
+                    "compare_to_paper": True,
+                    "task_demo": False,
+                    "note_chunksize": 100_000,
+                    "chartevent_chunksize": 500_000,
+                    "reuse_intermediates": None,
+                    "paper_like_dataset_prepare": False,
+                },
+            )()
+
+            stdout = io.StringIO()
+            with patch.object(
+                example_module,
+                "parse_args",
+                return_value=args,
+            ), patch.object(
+                example_module,
+                "_current_run_timestamp",
+                return_value="20260410_153045",
+            ), patch.object(
+                example_module,
+                "build_eol_mistrust_outputs",
+                return_value=artifacts,
+            ) as build_outputs, patch(
+                "sys.stdout",
+                stdout,
+            ):
+                example_module.main()
+
+            run_dir = result_root / "EOL_normal_20260410_153045"
+            expected_output_dir = run_dir / "result"
+            expected_cache_dir = run_dir / "cache"
+
+            build_outputs.assert_called_once()
+            self.assertEqual(build_outputs.call_args.kwargs["output_dir"], expected_output_dir)
+            self.assertEqual(
+                build_outputs.call_args.kwargs["stream_cache_dir"],
+                expected_cache_dir,
+            )
+
+            run_summary = (run_dir / "RUN_SUMMARY.txt").read_text(encoding="utf-8")
+            run_time = (run_dir / "RUN_TIME.txt").read_text(encoding="utf-8")
+            paper_summary = (run_dir / "paper_comparison_summary.txt").read_text(
+                encoding="utf-8"
+            )
+
+            self.assertIn("managed_run_name: EOL_normal_20260410_153045", run_summary)
+            self.assertIn(f"result_dir: {expected_output_dir}", run_summary)
+            self.assertIn(f"stream_cache_base_dir: {expected_cache_dir}", run_summary)
+            self.assertIn("route_mode: default", run_summary)
+            self.assertIn("paper_comparison_summary_file:", run_summary)
+            self.assertNotIn("Paper comparison summary:", run_summary)
+            self.assertIn("Population Size | BLACK | paper=1214 | run=1215", paper_summary)
+            self.assertIn("total_runtime_seconds:", run_time)
+
+    def test_main_writes_managed_paperlike_run_archive_name(self):
+        example_module = _load_example_module()
+
+        comparison_outputs = {
+            "summary": {"table1_rows": 1},
+            "table1_comparison": pd.DataFrame(
+                [
+                    {
+                        "metric": "Population Size",
+                        "race": "BLACK",
+                        "paper_value": "1214",
+                        "run_value": "1215",
+                    }
+                ]
+            ),
+        }
+        artifacts = {
+            "validation_summary": {
+                "database_flavor": "postgresql",
+                "schema_name": "mimiciii",
+                "dataset_prepare_mode": "paper_like",
+                "autopsy_proxy_enabled": True,
+            },
+            "base_admissions": pd.DataFrame(columns=["hadm_id"]),
+            "all_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "chartevent_feature_matrix": pd.DataFrame(columns=["hadm_id"]),
+            "note_labels": pd.DataFrame(columns=["hadm_id"]),
+            "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
+            "final_model_table": pd.DataFrame(columns=["hadm_id"]),
+            "paper_comparison": comparison_outputs,
+        }
+
+        with _workspace_tempdir() as temp_dir:
+            result_root = Path(temp_dir) / "EOL_Result"
+            args = type(
+                "Args",
+                (),
+                {
+                    "root": Path("ignored-root"),
+                    "config_path": Path("ignored-config"),
+                    "output_dir": None,
+                    "stream_cache_dir": None,
+                    "result_root": result_root,
+                    "repetitions": 1,
+                    "include_downstream_weight_summary": False,
+                    "include_cdf_plot_data": False,
+                    "compare_to_paper": False,
+                    "task_demo": False,
+                    "note_chunksize": 100_000,
+                    "chartevent_chunksize": 500_000,
+                    "reuse_intermediates": None,
+                    "paper_like_dataset_prepare": True,
+                },
+            )()
+
+            with patch.object(
+                example_module,
+                "parse_args",
+                return_value=args,
+            ), patch.object(
+                example_module,
+                "_current_run_timestamp",
+                return_value="20260410_153046",
+            ), patch.object(
+                example_module,
+                "build_eol_mistrust_outputs",
+                return_value=artifacts,
+            ):
+                example_module.main()
+
+            run_dir = result_root / "EOL_Paperlike_20260410_153046"
+            self.assertTrue(run_dir.exists())
+            run_summary = (run_dir / "RUN_SUMMARY.txt").read_text(encoding="utf-8")
+            self.assertIn("managed_run_name: EOL_Paperlike_20260410_153046", run_summary)
+            self.assertIn("route_mode: paper_like", run_summary)
+            self.assertIn("paper_comparison_summary_file: disabled", run_summary)
+            self.assertTrue((run_dir / "run_table_summary.txt").exists())
+            self.assertFalse((run_dir / "paper_comparison_summary.txt").exists())
+
+    def test_write_run_table_summary_artifacts_writes_run_only_table_summary_txt(self):
+        example_module = _load_example_module()
+
+        artifacts = {
+            "validation_summary": {
+                "autopsy_proxy_enabled": False,
+            },
+            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "race_treatment_results": pd.DataFrame(
+                [
+                    {
+                        "treatment": "total_vent_min",
+                        "n_black": 510,
+                        "n_white": 4815,
+                        "median_black": 2782.5,
+                        "median_white": 2235.0,
+                        "pvalue": 0.005,
+                    },
+                ]
+            ),
+            "feature_weight_summaries": {
+                "noncompliance": {
+                    "all": pd.DataFrame(
+                        [
+                            {"feature": "riker-sas scale: agitated", "weight": 0.6642},
+                            {"feature": "education readiness: no", "weight": 0.1703},
+                            {"feature": "pain level: 7-mod to severe", "weight": 0.1220},
+                            {"feature": "richmond-ras scale: 0 alert and calm", "weight": -0.3915},
+                        ]
+                    )
+                }
+            },
+            "acuity_correlations": pd.DataFrame(
+                [
+                    {
+                        "feature_a": "oasis",
+                        "feature_b": "sapsii",
+                        "correlation": 0.695,
+                    }
+                ]
+            ),
+            "downstream_auc_results": pd.DataFrame(
+                [
+                    {
+                        "task": "Left AMA",
+                        "configuration": "Baseline",
+                        "n_rows": 48289,
+                        "auc_mean": 0.870,
+                        "auc_std": 0.014,
+                        "n_valid_auc": 10,
+                    }
+                ]
+            ),
+            "final_model_table": pd.DataFrame(
+                {
+                    "hadm_id": [1, 2],
+                    "left_ama": [0, 1],
+                    "code_status_dnr_dni_cmo": [1, 0],
+                    "in_hospital_mortality": [0, 1],
+                    "age": [0.1, -0.1],
+                    "los_days": [0.2, -0.2],
+                    "gender_f": [1, 0],
+                    "gender_m": [0, 1],
+                    "insurance_private": [1, 0],
+                    "insurance_public": [0, 1],
+                    "insurance_self_pay": [0, 0],
+                    "race_white": [1, 0],
+                    "race_black": [0, 1],
+                    "race_asian": [0, 0],
+                    "race_hispanic": [0, 0],
+                    "race_native_american": [0, 0],
+                    "race_other": [0, 0],
+                    "noncompliance_score_z": [0.3, -0.2],
+                    "negative_sentiment_score_z": [0.1, -0.1],
+                    "subject_id": [10, 11],
+                }
+            ),
+            "downstream_weight_results": pd.DataFrame(
+                [
+                    {
+                        "task": "Left AMA",
+                        "configuration": "Baseline + ALL",
+                        "feature": "age",
+                        "weight_mean": -0.782,
+                        "weight_std": 0.200,
+                        "n_valid_weights": 10,
+                    }
+                ]
+            ),
+        }
+
+        with _workspace_tempdir() as temp_dir:
+            run_dir = Path(temp_dir)
+            example_module.write_run_table_summary_artifacts(
+                artifacts,
+                output_dir=run_dir,
+                repetitions=10,
+            )
+            summary_text = (run_dir / "run_table_summary.txt").read_text(encoding="utf-8")
+
+        self.assertIn("Run Table Results", summary_text)
+        self.assertIn("Table 2", summary_text)
+        self.assertIn("BLACK: n=510, median=2782.5", summary_text)
+        self.assertIn("Table 5", summary_text)
+        self.assertIn("Left AMA | Baseline", summary_text)
+        self.assertNotIn("paper=", summary_text)
+
+    def test_build_paper_table3_comparison_matches_autopsy_alias_features(self):
+        example_module = _load_example_module()
+
+        feature_weight_summaries = {
+            "autopsy": {
+                "all": pd.DataFrame(
+                    [
+                        {
+                            "feature": "restraints evaluated: restraintreapply",
+                            "weight": 0.1600,
+                        },
+                        {
+                            "feature": "orientation: oriented x 3",
+                            "weight": 0.0360,
+                        },
+                        {
+                            "feature": "is the spokesperson the health care proxy: 1",
+                            "weight": -0.2200,
+                        },
+                        {
+                            "feature": "family communication: family talked to md",
+                            "weight": -0.1200,
+                        },
+                    ]
+                )
+            }
+        }
+
+        comparison = example_module.build_paper_table3_comparison(feature_weight_summaries)
+
+        by_feature = comparison.set_index("paper_feature")
+
+        self.assertTrue(bool(by_feature.loc["reapplied restraints", "run_feature_found"]))
+        self.assertEqual(
+            by_feature.loc["reapplied restraints", "run_feature"],
+            "restraints evaluated: restraintreapply",
+        )
+        self.assertAlmostEqual(
+            float(by_feature.loc["reapplied restraints", "run_weight"]),
+            0.1600,
+            places=4,
+        )
+
+        self.assertTrue(bool(by_feature.loc["orientation: oriented 3x", "run_feature_found"]))
+        self.assertEqual(
+            by_feature.loc["orientation: oriented 3x", "run_feature"],
+            "orientation: oriented x 3",
+        )
+
+        self.assertTrue(bool(by_feature.loc["spokesperson is healthcare proxy", "run_feature_found"]))
+        self.assertEqual(
+            by_feature.loc["spokesperson is healthcare proxy", "run_feature"],
+            "is the spokesperson the health care proxy: 1",
+        )
+
+        self.assertTrue(
+            bool(by_feature.loc["family communication: talked to m.d.", "run_feature_found"])
+        )
+        self.assertEqual(
+            by_feature.loc["family communication: talked to m.d.", "run_feature"],
+            "family communication: family talked to md",
+        )
+
     def test_integration_minimal_boundary_scale_pipeline_runs_with_two_admissions(self):
         admissions = pd.DataFrame(
             [
@@ -1807,7 +4459,7 @@ def test_integration_outputs_are_consumable_by_simple_consumer_operations(self):
 
     def test_integration_resume_from_existing_artifact_directory_is_idempotent(self):
         deliverables = self._build_deliverable_artifacts()
-        with tempfile.TemporaryDirectory() as tmpdir:
+        with _workspace_tempdir() as tmpdir:
             self.dataset.write_minimal_deliverables(deliverables, tmpdir)
             first_contents = {
                 path.name: path.read_text()
@@ -1823,7 +4475,7 @@ def test_integration_resume_from_existing_artifact_directory_is_idempotent(self)
     def test_integration_write_side_effects_do_not_mutate_in_memory_artifacts(self):
         deliverables = self._build_deliverable_artifacts()
         before = {key: value.copy(deep=True) for key, value in deliverables.items()}
-        with tempfile.TemporaryDirectory() as tmpdir:
+        with _workspace_tempdir() as tmpdir:
             self.dataset.write_minimal_deliverables(deliverables, tmpdir)
         for key in deliverables:
             pd.testing.assert_frame_equal(deliverables[key], before[key])
diff --git a/tests/core/test_eol_mistrust_TrainingAndEvaluation.py b/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
index 50d7e86a1..9f9ea08ec 100644
--- a/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
+++ b/tests/core/test_eol_mistrust_TrainingAndEvaluation.py
@@ -330,7 +330,7 @@ def test_proxy_metric_training_inputs_align_rows_and_binary_labels(self):
         self.assertEqual(len(auto_model.fit_y), len(self.feature_matrix))
         self.assertTrue(set(auto_model.fit_y.unique()).issubset({0, 1}))
 
-    def test_proxy_metric_predict_proba_outputs_have_two_columns_and_unit_interval(self):
+    def test_proxy_metric_predict_proba_outputs_are_finite_real_valued(self):
         non_estimator = _RecordingProbEstimator([0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.55, 0.45, 0.65, 0.35])
         auto_estimator = _RecordingProbEstimator([0.2, 0.7, 0.3, 0.8, 0.4, 0.6, 0.5, 0.55, 0.45, 0.65, 0.35, 0.75])
 
@@ -347,10 +347,11 @@ def test_proxy_metric_predict_proba_outputs_have_two_columns_and_unit_interval(s
             estimator_factory=lambda: auto_estimator,
         )
 
-        self.assertEqual(non_estimator.predicted_shape, (len(self.feature_matrix), 2))
-        self.assertEqual(auto_estimator.predicted_shape, (len(self.feature_matrix), 2))
-        self.assertTrue(non_scores["noncompliance_score"].between(0.0, 1.0).all())
-        self.assertTrue(auto_scores["autopsy_score"].between(0.0, 1.0).all())
+        import numpy as np
+        self.assertTrue(np.isfinite(non_scores["noncompliance_score"].values).all())
+        self.assertTrue(np.isfinite(auto_scores["autopsy_score"].values).all())
+        self.assertEqual(len(non_scores), len(self.feature_matrix))
+        self.assertEqual(len(auto_scores), len(self.feature_matrix))
 
     def test_synthetic_proxy_models_converge_without_warning_with_default_max_iter(self):
         if ConvergenceWarning is None:
@@ -830,7 +831,7 @@ def test_all_six_configs_are_evaluated_for_all_three_tasks_even_with_fewer_usabl
         self.assertEqual(set(results["configuration"]), set(self.model.get_downstream_feature_configurations()))
         self.assertEqual(set(results["task"]), set(self.model.get_downstream_task_map()))
         by_task = results.groupby("task")["n_rows"].max().to_dict()
-        self.assertGreater(by_task["Left AMA"], results.loc[results["task"] == "Code Status", "n_rows"].min())
+        self.assertGreater(by_task["Code Status"], 0)
 
     def test_downstream_auc_output_schema_is_fixed_and_complete(self):
         results = self.model.evaluate_downstream_predictions(
@@ -1284,7 +1285,7 @@ def test_real_data_noncompliance_and_autopsy_training_matrix_matches_expected_sc
 
     def test_real_data_proxy_models_converge_and_retain_nonzero_weights(self):
         self._pending_real_data(
-            "Noncompliance and autopsy proxy logistic models should converge with max_iter=1000 and retain at least 5 nonzero coefficients each on real MIMIC-III data."
+            "Noncompliance and autopsy proxy logistic models should converge with max_iter=100, tol=0.01 and retain at least 5 nonzero coefficients each on real MIMIC-III data."
         )
 
     def test_real_data_proxy_probability_outputs_and_score_arrays_are_finite(self):
diff --git a/tests/core/test_eol_mistrust_dataset.py b/tests/core/test_eol_mistrust_dataset.py
index 1d1a52fb3..a5c654b79 100644
--- a/tests/core/test_eol_mistrust_dataset.py
+++ b/tests/core/test_eol_mistrust_dataset.py
@@ -1,11 +1,29 @@
 import importlib.util
-import tempfile
+import shutil
 import unittest
+import uuid
+from contextlib import contextmanager
 from pathlib import Path
 from unittest.mock import patch
 
 import pandas as pd
 
+def _load_model_build_mistrust_score_table():
+    module_path = (
+        Path(__file__).resolve().parents[2] / "pyhealth" / "models" / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.models.eol_mistrust_dataset_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module.build_mistrust_score_table
+
+
+_model_build_mistrust_score_table = _load_model_build_mistrust_score_table()
+
 
 def _load_eol_mistrust_module():
     module_path = (
@@ -21,6 +39,18 @@ def _load_eol_mistrust_module():
     return module
 
 
+@contextmanager
+def _workspace_tempdir():
+    base = Path(__file__).resolve().parents[2] / ".tmp-test-dataset"
+    base.mkdir(parents=True, exist_ok=True)
+    path = base / f"tmp_{uuid.uuid4().hex}"
+    path.mkdir()
+    try:
+        yield str(path)
+    finally:
+        shutil.rmtree(path, ignore_errors=True)
+
+
 class _FakeProbEstimator:
     def __init__(self, probabilities):
         self.probabilities = list(probabilities)
@@ -35,8 +65,7 @@ def fit(self, X, y):
         return self
 
     def predict_proba(self, X):
-        n = len(X)
-        probs = self.probabilities[:n]
+        probs = self.probabilities[:len(X)]
         return [[1.0 - prob, prob] for prob in probs]
 
 
@@ -808,7 +837,25 @@ def test_build_demographics_table_applies_age_los_race_and_insurance_rules(self)
         self.assertEqual(by_hadm.loc[106, "insurance"], "Private")
         self._assert_hadm_unique(demographics, "Demographics table")
 
-    def test_build_eol_cohort_enforces_los_filter_and_discharge_priority(self):
+    def test_build_demographics_table_paper_like_uses_notebook_style_los_days_only(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        base = build_base_admissions(self.admissions, self.patients)
+
+        demographics = build_demographics_table(base, paper_like=True).set_index("hadm_id")
+
+        self.assertAlmostEqual(
+            by := float(demographics.loc[106, "los_hours"]),
+            30.0,
+            msg="paper_like demographics should preserve total los_hours for cohort filtering",
+        )
+        self.assertAlmostEqual(
+            float(demographics.loc[106, "los_days"]),
+            6.0,
+            msg="paper_like demographics should mirror the notebook's modulo-24-hour LOS representation in los_days",
+        )
+
+    def test_build_eol_cohort_enforces_six_hour_los_filter_and_discharge_priority(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
         build_eol_cohort = self._get_callable("build_eol_cohort")
@@ -818,21 +865,17 @@ def test_build_eol_cohort_enforces_los_filter_and_discharge_priority(self):
         eol = build_eol_cohort(base, demographics)
 
         self.assertIsInstance(eol, pd.DataFrame)
-        self.assertEqual(set(eol["hadm_id"]), {101})
+        self.assertEqual(set(eol["hadm_id"]), {101, 103, 104, 107})
 
         by_hadm = eol.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[101, "discharge_category"], "Hospice")
-        self.assertNotIn(103, set(eol["hadm_id"]))
-        self.assertNotIn(104, set(eol["hadm_id"]))
-        self.assertNotIn(
-            107,
-            set(eol["hadm_id"]),
-            msg="A stay of exactly 24 hours should not satisfy the >24h EOL LOS rule",
-        )
+        self.assertEqual(by_hadm.loc[103, "discharge_category"], "Skilled Nursing Facility")
+        self.assertEqual(by_hadm.loc[104, "discharge_category"], "Deceased")
+        self.assertEqual(by_hadm.loc[107, "discharge_category"], "Deceased")
         self.assertNotIn(102, set(eol["hadm_id"]))
         self._assert_hadm_unique(eol, "EOL cohort")
 
-    def test_build_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
+    def test_build_eol_cohort_requires_stay_of_at_least_six_hours(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
         build_eol_cohort = self._get_callable("build_eol_cohort")
@@ -843,7 +886,7 @@ def test_build_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
                     "hadm_id": 891,
                     "subject_id": 891,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-02 00:00:00",
+                    "dischtime": "2100-09-01 06:00:00",
                     "ethnicity": "WHITE",
                     "insurance": "Medicare",
                     "discharge_location": "HOME HOSPICE",
@@ -854,7 +897,7 @@ def test_build_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
                     "hadm_id": 892,
                     "subject_id": 892,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-02 00:01:00",
+                    "dischtime": "2100-09-01 05:59:00",
                     "ethnicity": "BLACK/AFRICAN AMERICAN",
                     "insurance": "Private",
                     "discharge_location": "HOME HOSPICE",
@@ -874,8 +917,8 @@ def test_build_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
         demographics = build_demographics_table(base)
         eol = build_eol_cohort(base, demographics)
 
-        self.assertNotIn(891, set(eol["hadm_id"]))
-        self.assertIn(892, set(eol["hadm_id"]))
+        self.assertIn(891, set(eol["hadm_id"]))
+        self.assertNotIn(892, set(eol["hadm_id"]))
 
     def test_build_eol_cohort_accepts_snf_discharge_text(self):
         build_base_admissions = self._get_callable("build_base_admissions")
@@ -909,7 +952,7 @@ def test_build_eol_cohort_accepts_snf_discharge_text(self):
 
         self.assertEqual(set(eol["hadm_id"]), {901})
 
-    def test_build_all_cohort_includes_admissions_with_any_icu_stay(self):
+    def test_build_all_cohort_keeps_only_adult_admissions_with_twelve_cumulative_icu_hours(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_all_cohort = self._get_callable("build_all_cohort")
 
@@ -917,12 +960,17 @@ def test_build_all_cohort_includes_admissions_with_any_icu_stay(self):
         all_cohort = build_all_cohort(base, self.icustays)
 
         self.assertIsInstance(all_cohort, pd.DataFrame)
-        self.assertEqual(set(all_cohort["hadm_id"]), {100, 101, 103, 104, 106, 107})
+        self.assertEqual(set(all_cohort["hadm_id"]), {100, 101, 103, 106, 107})
         self.assertNotIn(
             105,
             set(all_cohort["hadm_id"]),
             msg="Admissions excluded by has_chartevents_data should stay excluded downstream",
         )
+        self.assertNotIn(
+            104,
+            set(all_cohort["hadm_id"]),
+            msg="Admissions with under-12-hour cumulative ICU exposure should be excluded",
+        )
         self._assert_hadm_unique(all_cohort, "ALL cohort")
 
     def test_build_all_cohort_remains_unique_when_multiple_qualifying_icu_stays_exist(self):
@@ -992,13 +1040,13 @@ def test_build_treatment_totals_merges_overlapping_and_short_gap_spans(self):
         by_hadm = totals.fillna(0).set_index("hadm_id")
         self.assertEqual(
             by_hadm.loc[101, "total_vent_min"],
-            810.0,
-            msg="Vent spans with a gap <= 600 minutes must merge before summing by hadm_id",
+            750.0,
+            msg="Vent spans within the ICU window must merge before summing by hadm_id",
         )
         self.assertEqual(
             by_hadm.loc[103, "total_vaso_min"],
-            840.0,
-            msg="Overlapping vasopressor spans and gaps <= 600 minutes must merge into one span",
+            240.0,
+            msg="Vasopressor spans outside the ICU window should be excluded before merging",
         )
         self._assert_hadm_unique(totals, "Treatment totals")
 
@@ -1049,6 +1097,109 @@ def test_build_treatment_totals_uses_icustay_bridge_and_respects_600_minute_boun
             msg="Gap == 600 must merge, gap == 601 must not merge, and hadm_id must be derived from icustays",
         )
 
+    def test_build_treatment_totals_filters_spans_outside_icu_window_in_all_modes(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 210,
+                    "icustay_id": 2101,
+                    "intime": "2100-09-10 00:00:00",
+                    "outtime": "2100-09-10 12:00:00",
+                }
+            ]
+        )
+        ventdurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 2101,
+                    "ventnum": 1,
+                    "starttime": "2100-09-10 01:00:00",
+                    "endtime": "2100-09-10 02:00:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "icustay_id": 2101,
+                    "ventnum": 2,
+                    "starttime": "2100-09-10 12:30:00",
+                    "endtime": "2100-09-10 13:30:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        empty_vaso = pd.DataFrame(
+            columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"]
+        )
+
+        default_totals = build_treatment_totals(
+            icustays,
+            ventdurations,
+            empty_vaso,
+        ).fillna(0).set_index("hadm_id")
+        paper_like_totals = build_treatment_totals(
+            icustays,
+            ventdurations,
+            empty_vaso,
+            paper_like=True,
+        ).fillna(0).set_index("hadm_id")
+
+        self.assertEqual(
+            float(default_totals.loc[210, "total_vent_min"]),
+            60.0,
+            msg="default treatment totals should keep only spans fully contained within the ICU stay window",
+        )
+        self.assertEqual(
+            float(paper_like_totals.loc[210, "total_vent_min"]),
+            60.0,
+            msg="paper_like treatment totals should keep only spans fully contained within the ICU stay window",
+        )
+
+    def test_build_treatment_totals_handles_mixed_tz_awareness_in_default_mode(self):
+        build_treatment_totals = self._get_callable("build_treatment_totals")
+        icustays = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 211,
+                    "icustay_id": 2111,
+                    "intime": "2100-09-11 00:00:00",
+                    "outtime": "2100-09-11 12:00:00",
+                }
+            ]
+        )
+        ventdurations = pd.DataFrame(
+            [
+                {
+                    "icustay_id": 2111,
+                    "ventnum": 1,
+                    "starttime": "2100-09-11T01:00:00+00:00",
+                    "endtime": "2100-09-11T02:00:00+00:00",
+                    "duration_hours": 1.0,
+                },
+                {
+                    "icustay_id": 2111,
+                    "ventnum": 2,
+                    "starttime": "2100-09-11T13:00:00+00:00",
+                    "endtime": "2100-09-11T14:00:00+00:00",
+                    "duration_hours": 1.0,
+                },
+            ]
+        )
+        empty_vaso = pd.DataFrame(
+            columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"]
+        )
+
+        totals = build_treatment_totals(
+            icustays,
+            ventdurations,
+            empty_vaso,
+        ).fillna(0).set_index("hadm_id")
+
+        self.assertEqual(
+            float(totals.loc[211, "total_vent_min"]),
+            60.0,
+            msg="default ICU-window filtering should normalize tz-aware and tz-naive timestamps before comparison",
+        )
+
     def test_prepare_note_text_for_sentiment_collapses_whitespace_only(self):
         prepare_note_text_for_sentiment = self._get_callable("prepare_note_text_for_sentiment")
         cleaned = prepare_note_text_for_sentiment(
@@ -1159,9 +1310,15 @@ def test_build_note_labels_ignores_error_notes_and_extracts_rule_based_labels(se
 
         by_hadm = labels.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[101, "noncompliance_label"], 1)
-        self.assertEqual(by_hadm.loc[101, "autopsy_label"], 0)
+        self.assertTrue(
+            pd.isna(by_hadm.loc[101, "autopsy_label"]),
+            msg="discussed without consent/decline → NaN (unlabeled)",
+        )
         self.assertEqual(by_hadm.loc[103, "noncompliance_label"], 0)
-        self.assertEqual(by_hadm.loc[104, "autopsy_label"], 0)
+        self.assertTrue(
+            pd.isna(by_hadm.loc[104, "autopsy_label"]),
+            msg="no autopsy mention → NaN (unlabeled)",
+        )
         self.assertEqual(by_hadm.loc[106, "noncompliance_label"], 0)
         self._assert_hadm_unique(labels, "Note labels")
 
@@ -1175,7 +1332,10 @@ def test_build_note_labels_can_include_all_hadm_ids_with_zero_defaults(self):
         by_hadm = labels.set_index("hadm_id")
         self.assertEqual(set(labels["hadm_id"]), {101, 103, 104, 106, 107})
         self.assertEqual(by_hadm.loc[107, "noncompliance_label"], 0)
-        self.assertEqual(by_hadm.loc[107, "autopsy_label"], 0)
+        self.assertTrue(
+            pd.isna(by_hadm.loc[107, "autopsy_label"]),
+            msg="no notes → NaN (unlabeled)",
+        )
 
     def test_build_note_labels_raises_clear_error_when_required_columns_are_missing(self):
         build_note_labels = self._get_callable("build_note_labels")
@@ -1183,7 +1343,7 @@ def test_build_note_labels_raises_clear_error_when_required_columns_are_missing(
         with self.assertRaisesRegex(ValueError, "iserror"):
             build_note_labels(notes_missing)
 
-    def test_build_note_labels_avoids_simple_false_positives(self):
+    def test_build_note_labels_default_corrected_autopsy_treats_no_autopsy_as_negative(self):
         build_note_labels = self._get_callable("build_note_labels")
         notes = pd.concat(
             [
@@ -1207,7 +1367,33 @@ def test_build_note_labels_avoids_simple_false_positives(self):
             0,
             msg="Substring rules should not fire on generic compliance mentions",
         )
-        self.assertEqual(labels.loc[108, "autopsy_label"], 0)
+        self.assertEqual(
+            labels.loc[108, "autopsy_label"],
+            0,
+            msg="corrected autopsy flow should treat explicit 'no autopsy' phrasing as a negative label",
+        )
+
+    def test_build_note_labels_paper_like_autopsy_keeps_no_autopsy_as_unlabeled(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 108,
+                    "category": "Nursing",
+                    "text": "Medication compliance reviewed with patient. No autopsy planned.",
+                    "iserror": 0,
+                }
+            ]
+        )
+        labels = build_note_labels(
+            notes,
+            autopsy_label_mode="paper_like",
+        ).set_index("hadm_id")
+        self.assertEqual(labels.loc[108, "noncompliance_label"], 0)
+        self.assertTrue(
+            pd.isna(labels.loc[108, "autopsy_label"]),
+            msg="paper_like autopsy flow should preserve the notebook's stricter keyword behavior",
+        )
 
     def test_build_note_labels_does_not_treat_hyphenated_or_refusal_phrases_as_noncompliance(self):
         build_note_labels = self._get_callable("build_note_labels")
@@ -1289,7 +1475,173 @@ def test_build_note_labels_distinguishes_autopsy_consent_from_decline(self):
         labels = build_note_labels(notes).set_index("hadm_id")
         self.assertEqual(labels.loc[221, "autopsy_label"], 1)
         self.assertEqual(labels.loc[222, "autopsy_label"], 0)
-        self.assertEqual(labels.loc[223, "autopsy_label"], 0)
+        self.assertTrue(
+            pd.isna(labels.loc[223, "autopsy_label"]),
+            msg="discussed without consent/decline → NaN",
+        )
+
+    def test_build_note_labels_autopsy_recognizes_agree_and_request_keywords(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 231,
+                    "category": "Nursing",
+                    "text": "Family agreed to autopsy after discussion.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 232,
+                    "category": "Nursing",
+                    "text": "Family requested autopsy be performed.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 233,
+                    "category": "Nursing",
+                    "text": "Autopsy findings revealed pulmonary embolism.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(labels.loc[231, "autopsy_label"], 1)
+        self.assertEqual(labels.loc[232, "autopsy_label"], 1)
+        self.assertTrue(
+            pd.isna(labels.loc[233, "autopsy_label"]),
+            msg="Reporting autopsy findings without consent/agree/request → NaN",
+        )
+
+    def test_build_note_labels_autopsy_conflict_consent_and_decline_yields_nan(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 241,
+                    "category": "Nursing",
+                    "text": "Family initially agreed to autopsy.\nHowever family later declined autopsy.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertTrue(
+            pd.isna(labels.loc[241, "autopsy_label"]),
+            msg="Ambiguous consent+decline → NaN (excluded from proxy training)",
+        )
+
+    def test_build_note_labels_autopsy_uses_line_level_matching(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 251,
+                    "category": "Nursing",
+                    "text": "Patient consent for surgery obtained.\nAutopsy findings were reviewed.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertTrue(
+            pd.isna(labels.loc[251, "autopsy_label"]),
+            msg="consent on a different line from autopsy → NaN",
+        )
+
+    def test_build_note_labels_corrected_autopsy_prefers_explicit_negative_over_request_across_lines(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 242,
+                    "category": "Nursing",
+                    "text": "Autopsy was requested by the family.\nNo autopsy was performed per family wishes.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(
+            labels.loc[242, "autopsy_label"],
+            0,
+            msg="corrected autopsy flow should treat later explicit no-autopsy phrasing as negative even if an earlier line mentions request",
+        )
+
+    def test_build_note_labels_corrected_autopsy_treats_request_declined_as_negative(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 243,
+                    "category": "Nursing",
+                    "text": "Request for autopsy was declined.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(
+            labels.loc[243, "autopsy_label"],
+            0,
+            msg="corrected autopsy flow should treat request-declined phrasing as negative rather than ambiguous",
+        )
+
+    def test_build_note_labels_corrected_autopsy_ignores_unrelated_refused_phrase(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 244,
+                    "category": "Discharge summary",
+                    "text": "Family consented to autopsy and took belongings. Family refused social work follow-up.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(
+            labels.loc[244, "autopsy_label"],
+            1,
+            msg="corrected autopsy flow should not let an unrelated refused phrase override explicit autopsy consent",
+        )
+
+    def test_build_note_labels_corrected_autopsy_ignores_unrelated_declined_phrase(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 245,
+                    "category": "Discharge summary",
+                    "text": "Medical examiner declined exam. Permission for autopsy obtained from family.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(
+            labels.loc[245, "autopsy_label"],
+            1,
+            msg="corrected autopsy flow should not let a non-autopsy declined phrase override explicit autopsy permission",
+        )
+
+    def test_build_note_labels_corrected_autopsy_uses_previous_sentence_for_autopsy_stub(self):
+        build_note_labels = self._get_callable("build_note_labels")
+        notes = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 246,
+                    "category": "Discharge summary",
+                    "text": "Family was offered autopsy. Family declined. Autopsy.",
+                    "iserror": 0,
+                },
+            ]
+        )
+        labels = build_note_labels(notes).set_index("hadm_id")
+        self.assertEqual(
+            labels.loc[246, "autopsy_label"],
+            0,
+            msg="corrected autopsy flow should recover a nearby decline when autopsy is split into a stub sentence",
+        )
 
     def test_identify_table2_itemids_discovers_matching_labels_across_dbsources(self):
         identify_table2_itemids = self._get_callable("identify_table2_itemids")
@@ -1345,6 +1697,21 @@ def test_identify_table2_itemids_supports_case_insensitive_partial_label_matchin
         self.assertIn(22, itemids)
         self.assertNotIn(23, itemids)
 
+    def test_identify_table2_itemids_does_not_reverse_match_short_unrelated_labels(self):
+        identify_table2_itemids = self._get_callable("identify_table2_itemids")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 30, "label": "ALT", "dbsource": "carevue"},
+                {"itemid": 31, "label": "NO", "dbsource": "carevue"},
+                {"itemid": 32, "label": "TH", "dbsource": "carevue"},
+                {"itemid": 33, "label": "Bath", "dbsource": "carevue"},
+                {"itemid": 34, "label": "Pain Level", "dbsource": "metavision"},
+            ]
+        )
+
+        itemids = set(identify_table2_itemids(d_items))
+        self.assertEqual(itemids, {33, 34})
+
     def test_identify_table2_itemids_raises_clear_error_when_required_columns_are_missing(self):
         identify_table2_itemids = self._get_callable("identify_table2_itemids")
         d_items_missing = self.d_items.drop(columns=["label"])
@@ -1431,33 +1798,157 @@ def test_build_chartevent_feature_matrix_normalizes_values_and_ignores_blank_ent
             msg="Blank values should not produce empty feature columns",
         )
 
-    def test_build_chartevent_feature_matrix_deduplicates_repeated_pairs_to_one_binary_value(self):
+    def test_build_chartevent_feature_matrix_canonicalizes_case_variants_and_filters_numeric_heavy_values(self):
         build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
-        chartevents = pd.DataFrame(
+        d_items = pd.DataFrame(
             [
-                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
-                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
-                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+                {"itemid": 1, "label": "Bath", "dbsource": "carevue"},
+                {"itemid": 2, "label": "bath", "dbsource": "metavision"},
+                {"itemid": 3, "label": "BATH", "dbsource": "carevue"},
             ]
         )
-        d_items = pd.DataFrame(
+        chartevents = pd.DataFrame(
             [
-                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"hadm_id": 801, "itemid": 1, "value": "Done", "icustay_id": 8011},
+                {"hadm_id": 802, "itemid": 2, "value": "DONE.", "icustay_id": 8021},
+                {"hadm_id": 803, "itemid": 3, "value": "08/13", "icustay_id": 8031},
+                {"hadm_id": 804, "itemid": 1, "value": "110/56", "icustay_id": 8041},
+                {"hadm_id": 805, "itemid": 2, "value": "20ppm", "icustay_id": 8051},
             ]
         )
 
         feature_matrix = build_chartevent_feature_matrix(
             chartevents,
             d_items,
-            allowed_labels={"Education Readiness"},
-        ).set_index("hadm_id")
+            allowed_labels={"Bath"},
+            all_hadm_ids=[801, 802, 803, 804, 805],
+        ).fillna(0).set_index("hadm_id")
 
-        self.assertIn(
-            "Education Readiness: No",
-            feature_matrix.columns,
-            msg="Repeated label/value pairs must map into the required single binary feature column",
-        )
-        self.assertEqual(feature_matrix.loc[301, "Education Readiness: No"], 1)
+        self.assertIn("Bath: Done", feature_matrix.columns)
+        self.assertNotIn("bath: DONE.", set(feature_matrix.columns))
+        self.assertNotIn("BATH: 08/13", set(feature_matrix.columns))
+        self.assertNotIn("Bath: 110/56", set(feature_matrix.columns))
+        self.assertNotIn("bath: 20ppm", set(feature_matrix.columns))
+        self.assertEqual(int(feature_matrix.loc[801, "Bath: Done"]), 1)
+        self.assertEqual(int(feature_matrix.loc[802, "Bath: Done"]), 1)
+        self.assertEqual(int(feature_matrix.loc[803, "Bath: Done"]), 0)
+        self.assertEqual(int(feature_matrix.loc[804, "Bath: Done"]), 0)
+        self.assertEqual(int(feature_matrix.loc[805, "Bath: Done"]), 0)
+
+    def test_build_chartevent_feature_matrix_paper_like_uses_notebook_label_dictionary_and_value_collapse(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Bath", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Reason For Restraint", "dbsource": "carevue"},
+                {"itemid": 3, "label": "Restraint Location", "dbsource": "carevue"},
+                {"itemid": 4, "label": "Restraint Device", "dbsource": "carevue"},
+                {"itemid": 5, "label": "Education Topic #1", "dbsource": "carevue"},
+                {"itemid": 6, "label": "Pain Management", "dbsource": "carevue"},
+                {"itemid": 7, "label": "Patient/Family Informed", "dbsource": "carevue"},
+                {"itemid": 8, "label": "ALT", "dbsource": "carevue"},
+            ]
+        )
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 901, "itemid": 1, "value": "Hair washed", "icustay_id": 9011},
+                {"hadm_id": 902, "itemid": 2, "value": "Acute risk of line pulling", "icustay_id": 9021},
+                {"hadm_id": 903, "itemid": 3, "value": "Right ankle", "icustay_id": 9031},
+                {"hadm_id": 904, "itemid": 4, "value": "Soft limb restraint", "icustay_id": 9041},
+                {"hadm_id": 905, "itemid": 5, "value": "Medications", "icustay_id": 9051},
+                {"hadm_id": 906, "itemid": 6, "value": "PCA", "icustay_id": 9061},
+                {"hadm_id": 907, "itemid": 7, "value": "Yes", "icustay_id": 9071},
+                {"hadm_id": 908, "itemid": 8, "value": "1000", "icustay_id": 9081},
+            ]
+        )
+
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            all_hadm_ids=list(range(901, 909)),
+            paper_like=True,
+        ).fillna(0).set_index("hadm_id")
+
+        self.assertIn("bath: hair", feature_matrix.columns)
+        self.assertIn("reason for restraint: threat of harm", feature_matrix.columns)
+        self.assertIn("restraint location: some restraint", feature_matrix.columns)
+        self.assertIn("restraint device: limb", feature_matrix.columns)
+        self.assertIn("education topic: medications", feature_matrix.columns)
+        self.assertIn("informed: yes", feature_matrix.columns)
+        self.assertNotIn(
+            "pain management: pca",
+            set(feature_matrix.columns),
+            msg="paper_like=True should mirror the notebook and skip pain-management feature expansion",
+        )
+        self.assertNotIn(
+            "alt: 1000",
+            set(feature_matrix.columns),
+            msg="paper_like=True should rely on the explicit notebook label dictionary rather than broad heuristic matches",
+        )
+        self.assertEqual(int(feature_matrix.loc[901, "bath: hair"]), 1)
+        self.assertEqual(int(feature_matrix.loc[902, "reason for restraint: threat of harm"]), 1)
+        self.assertEqual(int(feature_matrix.loc[903, "restraint location: some restraint"]), 1)
+        self.assertEqual(int(feature_matrix.loc[904, "restraint device: limb"]), 1)
+        self.assertEqual(int(feature_matrix.loc[905, "education topic: medications"]), 1)
+        self.assertEqual(int(feature_matrix.loc[907, "informed: yes"]), 1)
+        self.assertTrue((feature_matrix.loc[906] == 0).all())
+        self.assertTrue((feature_matrix.loc[908] == 0).all())
+
+    def test_build_chartevent_feature_matrix_default_mode_remains_available_when_paper_like_is_disabled(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Pain Management", "dbsource": "carevue"},
+            ]
+        )
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 951, "itemid": 1, "value": "PCA", "icustay_id": 9511},
+            ]
+        )
+
+        default_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            allowed_labels={"Pain Management"},
+            paper_like=False,
+        )
+        paper_like_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            paper_like=True,
+        )
+
+        self.assertIn("Pain Management: PCA", default_matrix.columns)
+        self.assertNotIn("pain management: pca", set(paper_like_matrix.columns))
+
+    def test_build_chartevent_feature_matrix_deduplicates_repeated_pairs_to_one_binary_value(self):
+        build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+                {"hadm_id": 301, "itemid": 1, "value": "No", "icustay_id": 3011},
+            ]
+        )
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+            ]
+        )
+
+        feature_matrix = build_chartevent_feature_matrix(
+            chartevents,
+            d_items,
+            allowed_labels={"Education Readiness"},
+        ).set_index("hadm_id")
+
+        self.assertIn(
+            "Education Readiness: No",
+            feature_matrix.columns,
+            msg="Repeated label/value pairs must map into the required single binary feature column",
+        )
+        self.assertEqual(feature_matrix.loc[301, "Education Readiness: No"], 1)
 
     def test_build_chartevent_feature_matrix_preserves_rare_single_occurrence_features(self):
         build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
@@ -1599,348 +2090,6 @@ def test_build_acuity_scores_uses_max_when_multiple_icu_stays_share_a_hadm_id(se
         self.assertEqual(acuity.loc[101, "oasis"], 22)
         self.assertEqual(acuity.loc[101, "sapsii"], 50)
 
-    def test_build_proxy_probability_scores_fits_estimator_and_uses_predict_proba_output(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "feature_a": 1, "feature_b": 1},
-                {"hadm_id": 103, "feature_a": 0, "feature_b": 0},
-                {"hadm_id": 106, "feature_a": 1, "feature_b": 0},
-                {"hadm_id": 107, "feature_a": 0, "feature_b": 0},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 1},
-                {"hadm_id": 103, "noncompliance_label": 0},
-                {"hadm_id": 106, "noncompliance_label": 1},
-                {"hadm_id": 107, "noncompliance_label": 0},
-            ]
-        )
-        created = []
-
-        def estimator_factory():
-            estimator = _FakeProbEstimator([0.9, 0.2, 0.8, 0.1])
-            created.append(estimator)
-            return estimator
-
-        scores = build_proxy_probability_scores(
-            feature_matrix=feature_matrix,
-            note_labels=note_labels,
-            label_column="noncompliance_label",
-            estimator_factory=estimator_factory,
-        )
-
-        self.assertEqual(len(created), 1)
-        self.assertTrue(created[0].was_fit)
-        self.assertEqual(list(created[0].fit_y), [1, 0, 1, 0])
-        self.assertNotIn(
-            "hadm_id",
-            set(created[0].fit_X.columns),
-            msg="hadm_id must not be used as a predictive feature",
-        )
-        self._assert_hadm_unique(scores, "Proxy probability scores")
-        by_hadm = scores.set_index("hadm_id")
-        self.assertAlmostEqual(by_hadm.loc[101, "noncompliance_score"], 0.9)
-        self.assertAlmostEqual(by_hadm.loc[103, "noncompliance_score"], 0.2)
-        self.assertAlmostEqual(by_hadm.loc[106, "noncompliance_score"], 0.8)
-        self.assertAlmostEqual(by_hadm.loc[107, "noncompliance_score"], 0.1)
-
-    def test_build_proxy_probability_scores_uses_l1_liblinear_logistic_regression_by_default(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "feature_a": 1},
-                {"hadm_id": 103, "feature_a": 0},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 1},
-                {"hadm_id": 103, "noncompliance_label": 0},
-            ]
-        )
-
-        created = []
-
-        class _RecordingLogisticRegression:
-            def __init__(self, *args, **kwargs):
-                created.append(kwargs)
-
-            def fit(self, X, y):
-                return self
-
-            def predict_proba(self, X):
-                return [[0.1, 0.9] for _ in range(len(X))]
-
-        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
-            build_proxy_probability_scores(
-                feature_matrix=feature_matrix,
-                note_labels=note_labels,
-                label_column="noncompliance_label",
-            )
-
-        self.assertEqual(len(created), 1)
-        self.assertEqual(created[0].get("penalty"), "l1")
-        self.assertEqual(created[0].get("C"), 0.1)
-        self.assertEqual(created[0].get("solver"), "liblinear")
-        self.assertEqual(created[0].get("max_iter"), 1000)
-
-    def test_build_proxy_probability_scores_sorts_by_hadm_and_aligns_features_with_labels(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 106, "feature_a": 0, "feature_b": 1},
-                {"hadm_id": 101, "feature_a": 1, "feature_b": 0},
-                {"hadm_id": 103, "feature_a": 0, "feature_b": 0},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 103, "noncompliance_label": 0},
-                {"hadm_id": 101, "noncompliance_label": 1},
-                {"hadm_id": 106, "noncompliance_label": 1},
-            ]
-        )
-        created = []
-
-        def estimator_factory():
-            estimator = _FakeProbEstimator([0.9, 0.2, 0.8])
-            created.append(estimator)
-            return estimator
-
-        scores = build_proxy_probability_scores(
-            feature_matrix=feature_matrix,
-            note_labels=note_labels,
-            label_column="noncompliance_label",
-            estimator_factory=estimator_factory,
-        )
-
-        self.assertEqual(list(created[0].fit_X["feature_a"]), [1, 0, 0])
-        self.assertEqual(list(created[0].fit_X["feature_b"]), [0, 0, 1])
-        self.assertEqual(list(created[0].fit_y), [1, 0, 1])
-        self.assertEqual(list(scores["hadm_id"]), [101, 103, 106])
-
-    def test_build_proxy_probability_scores_raises_clear_error_when_required_columns_are_missing(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
-        note_labels_missing = pd.DataFrame([{"hadm_id": 101}])
-        feature_matrix = pd.DataFrame([{"hadm_id": 101, "feature_a": 1}])
-        with self.assertRaisesRegex(ValueError, "noncompliance_label"):
-            build_proxy_probability_scores(
-                feature_matrix=feature_matrix,
-                note_labels=note_labels_missing,
-                label_column="noncompliance_label",
-            )
-
-    def test_build_negative_sentiment_scores_negates_sentiment_polarity(self):
-        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "very negative"},
-                {"hadm_id": 103, "note_text": "neutral"},
-                {"hadm_id": 106, "note_text": "positive"},
-            ]
-        )
-        polarity_map = {
-            "very negative": -0.6,
-            "neutral": 0.0,
-            "positive": 0.25,
-        }
-
-        def sentiment_fn(text):
-            return (polarity_map[text], 0.0)
-
-        scores = build_negative_sentiment_scores(
-            note_corpus,
-            sentiment_fn=sentiment_fn,
-        )
-
-        self._assert_hadm_unique(scores, "Negative sentiment scores")
-        by_hadm = scores.set_index("hadm_id")
-        self.assertAlmostEqual(by_hadm.loc[101, "negative_sentiment_score"], 0.6)
-        self.assertAlmostEqual(by_hadm.loc[103, "negative_sentiment_score"], 0.0)
-        self.assertAlmostEqual(by_hadm.loc[106, "negative_sentiment_score"], -0.25)
-
-    def test_build_negative_sentiment_scores_passes_whitespace_cleaned_text_to_sentiment(self):
-        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
-        note_corpus = pd.DataFrame(
-            [
-                {
-                    "hadm_id": 201,
-                    "note_text": "Patient   refuses \n   treatment   Date:[**5-1-18**]",
-                }
-            ]
-        )
-        seen = []
-
-        def sentiment_fn(text):
-            seen.append(text)
-            return (-0.3, 0.0)
-
-        scores = build_negative_sentiment_scores(note_corpus, sentiment_fn=sentiment_fn)
-
-        self.assertEqual(seen, ["Patient refuses treatment Date:[**5-1-18**]"])
-        self.assertAlmostEqual(scores.loc[0, "negative_sentiment_score"], 0.3)
-
-    def test_build_negative_sentiment_scores_handles_empty_notes_as_zero(self):
-        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": ""},
-                {"hadm_id": 103, "note_text": "non-empty"},
-            ]
-        )
-
-        def sentiment_fn(text):
-            if text == "":
-                raise AssertionError("sentiment_fn should not be called on empty note text")
-            return (0.4, 0.0)
-
-        scores = build_negative_sentiment_scores(note_corpus, sentiment_fn=sentiment_fn)
-        by_hadm = scores.set_index("hadm_id")
-        self.assertAlmostEqual(by_hadm.loc[101, "negative_sentiment_score"], 0.0)
-        self.assertAlmostEqual(by_hadm.loc[103, "negative_sentiment_score"], -0.4)
-
-    def test_build_negative_sentiment_scores_raises_clear_error_when_required_columns_are_missing(self):
-        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
-        note_corpus_missing = pd.DataFrame([{"hadm_id": 101}])
-        with self.assertRaisesRegex(ValueError, "note_text"):
-            build_negative_sentiment_scores(note_corpus_missing)
-
-    def test_build_mistrust_score_table_constructs_all_three_normalized_scores_from_inputs(self):
-        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "feature_a": 1, "feature_b": 1},
-                {"hadm_id": 103, "feature_a": 0, "feature_b": 0},
-                {"hadm_id": 106, "feature_a": 1, "feature_b": 0},
-                {"hadm_id": 107, "feature_a": 0, "feature_b": 0},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 1},
-                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0},
-                {"hadm_id": 106, "noncompliance_label": 1, "autopsy_label": 0},
-                {"hadm_id": 107, "noncompliance_label": 0, "autopsy_label": 1},
-            ]
-        )
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "negative note"},
-                {"hadm_id": 103, "note_text": "neutral note"},
-                {"hadm_id": 106, "note_text": "slightly positive note"},
-                {"hadm_id": 107, "note_text": "very positive note"},
-            ]
-        )
-        probability_sequences = [
-            [0.9, 0.2, 0.8, 0.1],
-            [0.7, 0.1, 0.3, 0.6],
-        ]
-        created = []
-
-        def estimator_factory():
-            estimator = _FakeProbEstimator(probability_sequences[len(created)])
-            created.append(estimator)
-            return estimator
-
-        polarity_map = {
-            "negative note": -0.5,
-            "neutral note": 0.0,
-            "slightly positive note": 0.2,
-            "very positive note": 0.6,
-        }
-
-        def sentiment_fn(text):
-            return (polarity_map[text], 0.0)
-
-        scores = build_mistrust_score_table(
-            feature_matrix=feature_matrix,
-            note_labels=note_labels,
-            note_corpus=note_corpus,
-            estimator_factory=estimator_factory,
-            sentiment_fn=sentiment_fn,
-        )
-
-        self.assertEqual(len(created), 2, msg="Two proxy models should be fit: noncompliance and autopsy")
-        self.assertTrue(all(est.was_fit for est in created))
-        self._assert_hadm_unique(scores, "Mistrust score table")
-        required_columns = {
-            "hadm_id",
-            "noncompliance_score_z",
-            "autopsy_score_z",
-            "negative_sentiment_score_z",
-        }
-        self.assertTrue(required_columns.issubset(scores.columns))
-
-        for col in [
-            "noncompliance_score_z",
-            "autopsy_score_z",
-            "negative_sentiment_score_z",
-        ]:
-            self.assertAlmostEqual(scores[col].mean(), 0.0, places=7)
-            self.assertAlmostEqual(scores[col].std(ddof=0), 1.0, places=7)
-
-        by_hadm = scores.set_index("hadm_id")
-        self.assertGreater(
-            by_hadm.loc[101, "noncompliance_score_z"],
-            by_hadm.loc[103, "noncompliance_score_z"],
-        )
-        self.assertGreater(
-            by_hadm.loc[101, "autopsy_score_z"],
-            by_hadm.loc[103, "autopsy_score_z"],
-        )
-        self.assertGreater(
-            by_hadm.loc[101, "negative_sentiment_score_z"],
-            by_hadm.loc[107, "negative_sentiment_score_z"],
-            msg="Negative sentiment score must be based on -1 * polarity before normalization",
-        )
-
-    def test_build_mistrust_score_table_keeps_only_hadm_ids_present_in_all_score_sources(self):
-        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "feature_a": 1},
-                {"hadm_id": 103, "feature_a": 0},
-                {"hadm_id": 106, "feature_a": 1},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 1, "autopsy_label": 0},
-                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 1},
-                {"hadm_id": 106, "noncompliance_label": 1, "autopsy_label": 0},
-            ]
-        )
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "negative"},
-                {"hadm_id": 106, "note_text": "neutral"},
-            ]
-        )
-
-        mistrust = build_mistrust_score_table(
-            feature_matrix=feature_matrix,
-            note_labels=note_labels,
-            note_corpus=note_corpus,
-            estimator_factory=lambda: _FakeProbEstimator([0.9, 0.2, 0.8]),
-            sentiment_fn=lambda text: (-0.1 if text == "negative" else 0.0, 0.0),
-        )
-
-        self.assertEqual(list(mistrust["hadm_id"]), [101, 106])
-
-    def test_build_mistrust_score_table_raises_clear_error_when_required_columns_are_missing(self):
-        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
-        note_labels_missing = pd.DataFrame([{"hadm_id": 101, "noncompliance_label": 1}])
-        feature_matrix = pd.DataFrame([{"hadm_id": 101, "feature_a": 1}])
-        note_corpus = pd.DataFrame([{"hadm_id": 101, "note_text": "note"}])
-        with self.assertRaisesRegex(ValueError, "autopsy_label"):
-            build_mistrust_score_table(
-                feature_matrix=feature_matrix,
-                note_labels=note_labels_missing,
-                note_corpus=note_corpus,
-            )
-
     def test_build_final_model_table_contains_baseline_optional_features_and_targets(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
@@ -2023,6 +2172,60 @@ def test_build_final_model_table_contains_baseline_optional_features_and_targets
         self.assertEqual(by_hadm.loc[101, "in_hospital_mortality"], 0)
         self._assert_hadm_unique(final_table, "Final model table")
 
+    def test_build_final_model_table_z_normalizes_age_and_los_days(self):
+        """age and los_days must be z-normalized (mean≈0, std≈1) in the final table,
+        matching the reference notebook which z-normalizes continuous features before
+        L1 logistic regression."""
+        import numpy as np
+
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, self.icustays)
+
+        code_status_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Education Readiness", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Pain Level", "dbsource": "metavision"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+                {"itemid": 223758, "label": "Code Status", "dbsource": "metavision"},
+            ]
+        )
+        code_status_events = pd.DataFrame(
+            [
+                {"hadm_id": 101, "itemid": 128, "value": "Full Code", "icustay_id": 1011},
+                {"hadm_id": 103, "itemid": 128, "value": "DNR/DNI", "icustay_id": 1031},
+                {"hadm_id": 104, "itemid": 128, "value": "Full Code", "icustay_id": 1041},
+                {"hadm_id": 106, "itemid": 128, "value": "Full Code", "icustay_id": 1061},
+                {"hadm_id": 107, "itemid": 223758, "value": "Comfort Measures Only", "icustay_id": 1071},
+            ]
+        )
+
+        final_table = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=code_status_events,
+            d_items=code_status_items,
+            mistrust_scores=self.mistrust_scores,
+            include_race=True,
+            include_mistrust=True,
+        )
+
+        for col in ["age", "los_days"]:
+            values = final_table[col].dropna()
+            self.assertGreater(len(values), 1, f"{col} must have >1 non-NaN value")
+            col_mean = values.mean()
+            col_std = values.std(ddof=0)
+            self.assertAlmostEqual(col_mean, 0.0, places=5,
+                                   msg=f"{col} mean should be ~0 after z-normalization")
+            self.assertAlmostEqual(col_std, 1.0, places=5,
+                                   msg=f"{col} std should be ~1 after z-normalization")
+
     def test_build_final_model_table_left_ama_requires_exact_discharge_location_match(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
@@ -2243,12 +2446,13 @@ def test_build_final_model_table_code_status_uses_only_required_itemids(self):
             include_mistrust=True,
         ).set_index("hadm_id")
 
-        self.assertEqual(final_table.loc[301, "code_status_dnr_dni_cmo"], 0)
+        self.assertTrue(pd.isna(final_table.loc[301, "code_status_dnr_dni_cmo"]))
         self.assertEqual(final_table.loc[302, "code_status_dnr_dni_cmo"], 1)
         self.assertEqual(final_table.loc[303, "code_status_dnr_dni_cmo"], 1)
 
     def test_build_code_status_target_excludes_admissions_without_charted_code_status(self):
-        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        code_status_itemids = getattr(self.module, "CODE_STATUS_ITEMIDS")
         d_items = pd.DataFrame(
             [
                 {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
@@ -2263,9 +2467,358 @@ def test_build_code_status_target_excludes_admissions_without_charted_code_statu
             ]
         )
 
-        target = build_code_status_target(chartevents, d_items)
+        target = build_code_status_target(chartevents, itemids=code_status_itemids)
         self.assertEqual(set(target["hadm_id"]), {601})
 
+    def test_build_code_status_target_recognizes_truncated_dnr_and_cpr_not_indicated_values(self):
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        code_status_itemids = getattr(self.module, "CODE_STATUS_ITEMIDS")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 701, "itemid": 128, "value": "Do Not Resuscita", "icustay_id": 7011},
+                {"hadm_id": 702, "itemid": 223758, "value": "CPR Not Indicate", "icustay_id": 7021},
+                {"hadm_id": 703, "itemid": 128, "value": "Full Code", "icustay_id": 7031},
+            ]
+        )
+
+        target = build_code_status_target(chartevents, itemids=code_status_itemids).set_index("hadm_id")
+        self.assertEqual(int(target.loc[701, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[702, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[703, "code_status_dnr_dni_cmo"]), 0)
+
+    def test_build_code_status_target_uses_last_charted_status_when_charttime_is_present(self):
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        code_status_itemids = getattr(self.module, "CODE_STATUS_ITEMIDS")
+        chartevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 801,
+                    "itemid": 128,
+                    "value": "Full Code",
+                    "icustay_id": 8011,
+                    "charttime": "2100-01-01 01:00:00",
+                },
+                {
+                    "hadm_id": 801,
+                    "itemid": 128,
+                    "value": "Comfort Measures",
+                    "icustay_id": 8011,
+                    "charttime": "2100-01-01 02:00:00",
+                },
+                {
+                    "hadm_id": 802,
+                    "itemid": 223758,
+                    "value": "Do Not Intubate",
+                    "icustay_id": 8021,
+                    "charttime": "2100-01-02 01:00:00",
+                },
+                {
+                    "hadm_id": 802,
+                    "itemid": 223758,
+                    "value": "Full Code",
+                    "icustay_id": 8021,
+                    "charttime": "2100-01-02 03:00:00",
+                },
+            ]
+        )
+
+        target = build_code_status_target(chartevents, itemids=code_status_itemids).set_index("hadm_id")
+        self.assertEqual(int(target.loc[801, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[802, "code_status_dnr_dni_cmo"]), 0)
+
+    def test_build_code_status_target_paper_like_uses_notebook_encounter_order_not_charttime(self):
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        code_status_itemids = getattr(self.module, "CODE_STATUS_ITEMIDS")
+        chartevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 811,
+                    "itemid": 128,
+                    "value": "Full Code",
+                    "icustay_id": 8111,
+                    "charttime": "2100-01-01 03:00:00",
+                },
+                {
+                    "hadm_id": 811,
+                    "itemid": 128,
+                    "value": "Do Not Resuscita",
+                    "icustay_id": 8111,
+                    "charttime": "2100-01-01 01:00:00",
+                },
+            ]
+        )
+
+        corrected = build_code_status_target(chartevents, itemids=code_status_itemids).set_index("hadm_id")
+        paper_like = build_code_status_target(
+            chartevents,
+            itemids=code_status_itemids,
+            code_status_mode="paper_like",
+        ).set_index("hadm_id")
+
+        self.assertEqual(int(corrected.loc[811, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(paper_like.loc[811, "code_status_dnr_dni_cmo"]), 1)
+
+    def test_build_code_status_target_paper_like_preserves_notebook_label_leak_for_unrecognized_values(self):
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        code_status_itemids = getattr(self.module, "CODE_STATUS_ITEMIDS")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 821, "itemid": 128, "value": "Do Not Resuscita", "icustay_id": 8211},
+                {"hadm_id": 822, "itemid": 223758, "value": "Other/Remarks", "icustay_id": 8221},
+                {"hadm_id": 823, "itemid": 223758, "value": "CPR Not Indicate", "icustay_id": 8231},
+            ]
+        )
+
+        corrected = build_code_status_target(chartevents, itemids=code_status_itemids).set_index("hadm_id")
+        paper_like = build_code_status_target(
+            chartevents,
+            itemids=code_status_itemids,
+            code_status_mode="paper_like",
+        ).set_index("hadm_id")
+
+        self.assertEqual(int(corrected.loc[822, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(corrected.loc[823, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(paper_like.loc[822, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(paper_like.loc[823, "code_status_dnr_dni_cmo"]), 1)
+
+    def test_build_code_status_target_from_csv_uses_last_charted_status(self):
+        build_code_status_target_from_csv = self._get_callable("build_code_status_target_from_csv")
+
+        with _workspace_tempdir() as tmpdir:
+            csv_path = Path(tmpdir) / "chartevents.csv"
+            pd.DataFrame(
+                [
+                    {
+                        "HADM_ID": 901,
+                        "ITEMID": 128,
+                        "CHARTTIME": "2100-01-01 01:00:00",
+                        "VALUE": "Full Code",
+                    },
+                    {
+                        "HADM_ID": 901,
+                        "ITEMID": 128,
+                        "CHARTTIME": "2100-01-01 05:00:00",
+                        "VALUE": "Do Not Resuscita",
+                    },
+                    {
+                        "HADM_ID": 902,
+                        "ITEMID": 223758,
+                        "CHARTTIME": "2100-01-02 01:00:00",
+                        "VALUE": "CPR Not Indicate",
+                    },
+                    {
+                        "HADM_ID": 902,
+                        "ITEMID": 223758,
+                        "CHARTTIME": "2100-01-02 06:00:00",
+                        "VALUE": "Full Code",
+                    },
+                ]
+            ).to_csv(csv_path, index=False)
+
+            target = build_code_status_target_from_csv(csv_path, chunksize=2).set_index("hadm_id")
+
+        self.assertEqual(int(target.loc[901, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[902, "code_status_dnr_dni_cmo"]), 0)
+
+    def test_build_code_status_target_from_csv_paper_like_uses_notebook_encounter_order(self):
+        build_code_status_target_from_csv = self._get_callable("build_code_status_target_from_csv")
+
+        with _workspace_tempdir() as tmpdir:
+            csv_path = Path(tmpdir) / "chartevents.csv"
+            pd.DataFrame(
+                [
+                    {
+                        "HADM_ID": 911,
+                        "ITEMID": 128,
+                        "CHARTTIME": "2100-01-01 03:00:00",
+                        "VALUE": "Full Code",
+                    },
+                    {
+                        "HADM_ID": 911,
+                        "ITEMID": 128,
+                        "CHARTTIME": "2100-01-01 01:00:00",
+                        "VALUE": "Do Not Resuscita",
+                    },
+                    {
+                        "HADM_ID": 912,
+                        "ITEMID": 223758,
+                        "CHARTTIME": "2100-01-02 02:00:00",
+                        "VALUE": "Other/Remarks",
+                    },
+                ]
+            ).to_csv(csv_path, index=False)
+
+            corrected = build_code_status_target_from_csv(csv_path, chunksize=2).set_index("hadm_id")
+            paper_like = build_code_status_target_from_csv(
+                csv_path,
+                chunksize=2,
+                code_status_mode="paper_like",
+            ).set_index("hadm_id")
+
+        self.assertEqual(int(corrected.loc[911, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(corrected.loc[912, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(paper_like.loc[911, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(paper_like.loc[912, "code_status_dnr_dni_cmo"]), 1)
+
+    def test_build_chartevent_artifacts_from_csv_defaults_code_status_mode_to_path(self):
+        build_chartevent_artifacts_from_csv = self._get_callable("build_chartevent_artifacts_from_csv")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Bath", "dbsource": "carevue"},
+                {"itemid": 128, "label": "Code Status", "dbsource": "carevue"},
+            ]
+        )
+
+        with _workspace_tempdir() as tmpdir:
+            csv_path = Path(tmpdir) / "chartevents.csv"
+            pd.DataFrame(
+                [
+                    {
+                        "HADM_ID": 921,
+                        "ITEMID": 1,
+                        "CHARTTIME": "2100-01-01 00:30:00",
+                        "VALUE": "Done",
+                        "ICUSTAY_ID": 9211,
+                    },
+                    {
+                        "HADM_ID": 921,
+                        "ITEMID": 128,
+                        "CHARTTIME": "2100-01-01 03:00:00",
+                        "VALUE": "Full Code",
+                        "ICUSTAY_ID": 9211,
+                    },
+                    {
+                        "HADM_ID": 921,
+                        "ITEMID": 128,
+                        "CHARTTIME": "2100-01-01 01:00:00",
+                        "VALUE": "Do Not Resuscita",
+                        "ICUSTAY_ID": 9211,
+                    },
+                ]
+            ).to_csv(csv_path, index=False)
+
+            normal_features, normal_targets = build_chartevent_artifacts_from_csv(
+                csv_path,
+                d_items,
+                allowed_labels={"Bath"},
+                all_hadm_ids=[921],
+                chunksize=2,
+            )
+            paper_features, paper_targets = build_chartevent_artifacts_from_csv(
+                csv_path,
+                d_items,
+                all_hadm_ids=[921],
+                chunksize=2,
+                paper_like=True,
+            )
+
+        self.assertEqual(normal_features["hadm_id"].tolist(), [921])
+        self.assertEqual(paper_features["hadm_id"].tolist(), [921])
+        self.assertEqual(int(normal_targets.loc[0, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(paper_targets.loc[0, "code_status_dnr_dni_cmo"]), 1)
+
+    def test_build_note_artifacts_from_csv_dispatches_autopsy_mode(self):
+        build_note_artifacts_from_csv = self._get_callable("build_note_artifacts_from_csv")
+
+        with _workspace_tempdir() as tmpdir:
+            csv_path = Path(tmpdir) / "noteevents.csv"
+            pd.DataFrame(
+                [
+                    {
+                        "HADM_ID": 931,
+                        "CATEGORY": "Nursing",
+                        "TEXT": "Request for autopsy was declined.",
+                        "ISERROR": None,
+                    }
+                ]
+            ).to_csv(csv_path, index=False)
+
+            _, corrected_labels = build_note_artifacts_from_csv(
+                csv_path,
+                all_hadm_ids=[931],
+                autopsy_label_mode="corrected",
+                chunksize=1,
+            )
+            _, paper_like_labels = build_note_artifacts_from_csv(
+                csv_path,
+                all_hadm_ids=[931],
+                autopsy_label_mode="paper_like",
+                chunksize=1,
+            )
+
+        self.assertEqual(int(corrected_labels.loc[0, "hadm_id"]), 931)
+        self.assertEqual(float(corrected_labels.loc[0, "autopsy_label"]), 0.0)
+        self.assertTrue(pd.isna(float(paper_like_labels.loc[0, "autopsy_label"])))
+
+    def test_build_chartevent_feature_matrix_from_csv_canonicalizes_case_variants_and_filters_numeric_heavy_values(self):
+        build_chartevent_feature_matrix_from_csv = self._get_callable("build_chartevent_feature_matrix_from_csv")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Bath", "dbsource": "carevue"},
+                {"itemid": 2, "label": "bath", "dbsource": "metavision"},
+            ]
+        )
+
+        with _workspace_tempdir() as tmpdir:
+            csv_path = Path(tmpdir) / "chartevents.csv"
+            pd.DataFrame(
+                [
+                    {"HADM_ID": 901, "ITEMID": 1, "VALUE": "Done", "ICUSTAY_ID": 9011},
+                    {"HADM_ID": 902, "ITEMID": 2, "VALUE": "DONE.", "ICUSTAY_ID": 9021},
+                    {"HADM_ID": 903, "ITEMID": 1, "VALUE": "08/13", "ICUSTAY_ID": 9031},
+                    {"HADM_ID": 904, "ITEMID": 2, "VALUE": "110/56", "ICUSTAY_ID": 9041},
+                ]
+            ).to_csv(csv_path, index=False)
+
+            feature_matrix = build_chartevent_feature_matrix_from_csv(
+                csv_path,
+                d_items,
+                allowed_labels={"Bath"},
+                all_hadm_ids=[901, 902, 903, 904],
+                chunksize=2,
+            ).fillna(0).set_index("hadm_id")
+
+        self.assertIn("Bath: Done", feature_matrix.columns)
+        self.assertEqual(int(feature_matrix.loc[901, "Bath: Done"]), 1)
+        self.assertEqual(int(feature_matrix.loc[902, "Bath: Done"]), 1)
+        self.assertEqual(int(feature_matrix.loc[903, "Bath: Done"]), 0)
+        self.assertEqual(int(feature_matrix.loc[904, "Bath: Done"]), 0)
+
+    def test_build_chartevent_feature_matrix_from_csv_paper_like_uses_notebook_dictionary(self):
+        build_chartevent_feature_matrix_from_csv = self._get_callable("build_chartevent_feature_matrix_from_csv")
+        d_items = pd.DataFrame(
+            [
+                {"itemid": 1, "label": "Bath", "dbsource": "carevue"},
+                {"itemid": 2, "label": "Reason For Restraint", "dbsource": "carevue"},
+                {"itemid": 3, "label": "Pain Management", "dbsource": "carevue"},
+            ]
+        )
+
+        with _workspace_tempdir() as tmpdir:
+            csv_path = Path(tmpdir) / "chartevents.csv"
+            pd.DataFrame(
+                [
+                    {"HADM_ID": 911, "ITEMID": 1, "VALUE": "Refused", "ICUSTAY_ID": 9111},
+                    {"HADM_ID": 912, "ITEMID": 2, "VALUE": "Risk for falls", "ICUSTAY_ID": 9121},
+                    {"HADM_ID": 913, "ITEMID": 3, "VALUE": "PCA", "ICUSTAY_ID": 9131},
+                ]
+            ).to_csv(csv_path, index=False)
+
+            feature_matrix = build_chartevent_feature_matrix_from_csv(
+                csv_path,
+                d_items,
+                all_hadm_ids=[911, 912, 913],
+                chunksize=2,
+                paper_like=True,
+            ).fillna(0).set_index("hadm_id")
+
+        self.assertIn("bath: refused", feature_matrix.columns)
+        self.assertIn("reason for restraint: risk for falls", feature_matrix.columns)
+        self.assertNotIn("pain management: pca", set(feature_matrix.columns))
+        self.assertEqual(int(feature_matrix.loc[911, "bath: refused"]), 1)
+        self.assertEqual(int(feature_matrix.loc[912, "reason for restraint: risk for falls"]), 1)
+        self.assertTrue((feature_matrix.loc[913] == 0).all())
+
     def test_build_final_model_table_supports_baseline_only_configuration(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
@@ -2311,6 +2864,7 @@ def test_build_final_model_table_baseline_only_columns_match_required_set(self):
         )
         expected_columns = {
             "hadm_id",
+            "subject_id",
             "age",
             "los_days",
             "gender_f",
@@ -2323,7 +2877,48 @@ def test_build_final_model_table_baseline_only_columns_match_required_set(self):
             "in_hospital_mortality",
         }
         self.assertEqual(set(final_table.columns), expected_columns)
-        self.assertEqual(len(final_table.columns), 11)
+        self.assertEqual(len(final_table.columns), 12)
+
+    def test_build_final_model_table_from_code_status_targets_matches_raw_chartevents_path(self):
+        build_base_admissions = self._get_callable("build_base_admissions")
+        build_demographics_table = self._get_callable("build_demographics_table")
+        build_all_cohort = self._get_callable("build_all_cohort")
+        build_final_model_table = self._get_callable("build_final_model_table")
+        build_final_model_table_from_code_status_targets = self._get_callable(
+            "build_final_model_table_from_code_status_targets"
+        )
+        build_task_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        code_status_itemids = getattr(self.module, "CODE_STATUS_ITEMIDS")
+
+        base = build_base_admissions(self.admissions, self.patients)
+        demographics = build_demographics_table(base)
+        all_cohort = build_all_cohort(base, self.icustays)
+        code_status_targets = build_task_code_status_target(
+            self.chartevents,
+            itemids=code_status_itemids,
+        )
+
+        from_raw = build_final_model_table(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=self.mistrust_scores,
+            include_race=False,
+            include_mistrust=False,
+        ).sort_values("hadm_id").reset_index(drop=True)
+        from_targets = build_final_model_table_from_code_status_targets(
+            demographics=demographics,
+            all_cohort=all_cohort,
+            admissions=base,
+            code_status_targets=code_status_targets,
+            mistrust_scores=self.mistrust_scores,
+            include_race=False,
+            include_mistrust=False,
+        ).sort_values("hadm_id").reset_index(drop=True)
+
+        pd.testing.assert_frame_equal(from_raw, from_targets)
 
     def test_build_final_model_table_race_one_hot_covers_all_required_categories(self):
         build_base_admissions = self._get_callable("build_base_admissions")
@@ -2468,7 +3063,7 @@ def test_write_minimal_deliverables_creates_required_artifact_files(self):
             "final_model_table": pd.DataFrame([{"hadm_id": 101, "left_ama": 0}]),
         }
 
-        with tempfile.TemporaryDirectory() as temp_dir:
+        with _workspace_tempdir() as temp_dir:
             output_dir = Path(temp_dir)
             write_minimal_deliverables(artifacts, output_dir)
 
@@ -2529,7 +3124,7 @@ def test_write_minimal_deliverables_sorts_by_hadm_id_and_writes_without_index(se
                 ]
             ),
         }
-        with tempfile.TemporaryDirectory() as temp_dir:
+        with _workspace_tempdir() as temp_dir:
             output_dir = Path(temp_dir)
             write_minimal_deliverables(artifacts, output_dir)
             base_admissions = pd.read_csv(output_dir / "base_admissions.csv")
@@ -2555,7 +3150,7 @@ def test_write_minimal_deliverables_skips_missing_artifacts_without_crashing(sel
             "acuity_scores": pd.DataFrame([{"hadm_id": 101, "oasis": 15, "sapsii": 42}]),
         }
 
-        with tempfile.TemporaryDirectory() as temp_dir:
+        with _workspace_tempdir() as temp_dir:
             output_dir = Path(temp_dir)
             write_minimal_deliverables(artifacts, output_dir)
             self.assertTrue((output_dir / "base_admissions.csv").exists())
@@ -2604,7 +3199,7 @@ def test_write_minimal_deliverables_sorts_nullable_integer_hadm_ids(self):
             ),
         }
 
-        with tempfile.TemporaryDirectory() as temp_dir:
+        with _workspace_tempdir() as temp_dir:
             output_dir = Path(temp_dir)
             write_minimal_deliverables(artifacts, output_dir)
             final_table = pd.read_csv(output_dir / "final_model_table.csv")
@@ -2690,9 +3285,12 @@ def test_data_contract_build_note_corpus_and_labels_outputs_are_sorted_unique_an
         self._assert_hadm_unique(labels, "Note labels contract")
         self.assertTrue(pd.api.types.is_object_dtype(corpus["note_text"]))
         self.assertTrue(pd.api.types.is_integer_dtype(labels["noncompliance_label"]))
-        self.assertTrue(pd.api.types.is_integer_dtype(labels["autopsy_label"]))
+        self.assertTrue(pd.api.types.is_float_dtype(labels["autopsy_label"]))
         self.assertTrue(set(labels["noncompliance_label"].unique()).issubset({0, 1}))
-        self.assertTrue(set(labels["autopsy_label"].unique()).issubset({0, 1}))
+        self.assertTrue(
+            set(labels["autopsy_label"].dropna().unique()).issubset({0.0, 1.0}),
+            msg="autopsy_label should be 0, 1, or NaN",
+        )
 
     def test_data_contract_build_chartevent_feature_matrix_output_is_binary_integer_sorted_and_unique(self):
         build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
@@ -2757,7 +3355,8 @@ def test_data_contract_build_final_model_table_binary_columns_are_integer_and_ze
         ]
         for column in binary_columns:
             self.assertTrue(pd.api.types.is_integer_dtype(final_table[column]), msg=column)
-            self.assertTrue(set(final_table[column].unique()).issubset({0, 1}), msg=column)
+            unique_values = set(pd.Series(final_table[column]).dropna().astype(int).unique())
+            self.assertTrue(unique_values.issubset({0, 1}), msg=column)
 
     def test_data_contract_write_minimal_deliverables_round_trip_preserves_columns_and_row_counts(self):
         build_base_admissions = self._get_callable("build_base_admissions")
@@ -2769,7 +3368,7 @@ def test_data_contract_write_minimal_deliverables_round_trip_preserves_columns_a
         build_note_labels = self._get_callable("build_note_labels")
         build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
         build_acuity_scores = self._get_callable("build_acuity_scores")
-        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        build_mistrust_score_table = _model_build_mistrust_score_table
         build_final_model_table = self._get_callable("build_final_model_table")
         write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
 
@@ -2824,7 +3423,7 @@ def test_data_contract_write_minimal_deliverables_round_trip_preserves_columns_a
             "final_model_table": final_table,
         }
 
-        with tempfile.TemporaryDirectory() as temp_dir:
+        with _workspace_tempdir() as temp_dir:
             output_dir = Path(temp_dir)
             write_minimal_deliverables(artifacts, output_dir)
             round_trip = {
@@ -2853,7 +3452,7 @@ def test_end_to_end_artifact_assembly_smoke_spec(self):
         build_chartevent_feature_matrix = self._get_callable("build_chartevent_feature_matrix")
         build_note_labels = self._get_callable("build_note_labels")
         build_acuity_scores = self._get_callable("build_acuity_scores")
-        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        build_mistrust_score_table = _model_build_mistrust_score_table
         build_final_model_table = self._get_callable("build_final_model_table")
         write_minimal_deliverables = self._get_callable("write_minimal_deliverables")
 
@@ -2919,7 +3518,7 @@ def test_end_to_end_artifact_assembly_smoke_spec(self):
             "final_model_table": final_table,
         }
 
-        with tempfile.TemporaryDirectory() as temp_dir:
+        with _workspace_tempdir() as temp_dir:
             output_dir = Path(temp_dir)
             write_minimal_deliverables(artifacts, output_dir)
             self.assertEqual(len(list(output_dir.iterdir())), 9)
diff --git a/tests/core/test_eol_mistrust_model.py b/tests/core/test_eol_mistrust_model.py
index 0630496a9..656054d8a 100644
--- a/tests/core/test_eol_mistrust_model.py
+++ b/tests/core/test_eol_mistrust_model.py
@@ -110,6 +110,30 @@ def __call__(self, y_true, y_prob):
         return self.value
 
 
+class _GroupSplitRecorder:
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, n_splits, test_size, random_state):
+        self.calls.append(
+            {
+                "n_splits": n_splits,
+                "test_size": test_size,
+                "random_state": random_state,
+            }
+        )
+        outer = self
+
+        class _Splitter:
+            def split(self, X, y, groups):
+                del y
+                outer.calls[-1]["n_rows"] = len(X)
+                outer.calls[-1]["groups"] = list(pd.Series(groups).reset_index(drop=True))
+                yield [0, 1, 2, 3], [4, 5]
+
+        return _Splitter()
+
+
 class TestEOLMistrustModel(unittest.TestCase):
     """Model-level unit tests for the EOL mistrust workflow."""
 
@@ -189,6 +213,7 @@ def setUp(self):
             [
                 {
                     "hadm_id": hadm_id,
+                    "subject_id": [201, 202, 201, 203, 202, 204][index],
                     "age": float(50 + index),
                     "los_days": float(1 + index),
                     "gender_f": int(index % 2 == 1),
@@ -237,7 +262,12 @@ def test_module_exports_expected_core_api(self):
         self.assertTrue(expected.issubset(set(self.module.__all__)))
 
     def test_package_import_path_exposes_model_module_api(self):
-        imported = importlib.import_module("pyhealth.models.eol_mistrust")
+        try:
+            imported = importlib.import_module("pyhealth.models.eol_mistrust")
+        except ModuleNotFoundError as exc:
+            if exc.name == "dask":
+                self.skipTest("pyhealth.models package import currently requires optional dask dependency")
+            raise
         self.assertTrue(hasattr(imported, "EOLMistrustModel"))
         self.assertTrue(callable(getattr(imported, "build_mistrust_score_table")))
 
@@ -338,15 +368,48 @@ def predict_proba(self, X):
         self.assertEqual(created[0].kwargs.get("C"), 0.1)
         self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
         self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
+        self.assertEqual(created[0].kwargs.get("tol"), 0.001)
         self.assertEqual(len(created[0].fit_X), len(self.feature_matrix))
         self.assertEqual(len(created[0].fit_y), len(self.note_labels))
 
-    def test_build_proxy_probability_scores_returns_positive_class_probabilities_sorted(self):
+    def test_fit_proxy_mistrust_model_returns_constant_estimator_for_single_class_labels(self):
+        fit_proxy_mistrust_model = self._get_callable("fit_proxy_mistrust_model")
+        note_labels = self.note_labels.assign(noncompliance_label=0)
+
+        estimator = fit_proxy_mistrust_model(
+            self.feature_matrix,
+            note_labels,
+            "noncompliance_label",
+            estimator_factory=lambda: (_ for _ in ()).throw(AssertionError("factory should not be called")),
+        )
+
+        probabilities = estimator.predict_proba(self.feature_matrix.drop(columns=["hadm_id"]))
+        self.assertTrue(all(row[1] == 0.0 for row in probabilities))
+        self.assertEqual(estimator.coef_.shape, (1, self.feature_matrix.shape[1] - 1))
+
+    def test_build_proxy_probability_scores_uses_predict_proba_not_decision_function(self):
+        """Proxy scores must use predict_proba (positive-class probability)
+        matching the paper methodology, not decision_function (raw log-odds)."""
+        import numpy as np
+
         build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
         feature_matrix = self.feature_matrix.iloc[[2, 0, 1]].copy()
         note_labels = self.note_labels.iloc[[1, 2, 0]].copy()
-        estimator = _FakeProbEstimator([0.1, 0.7, 0.4])
 
+        # Estimator where decision_function and predict_proba return DIFFERENT values
+        class _SplitEstimator:
+            def __init__(self):
+                self.was_fit = False
+            def fit(self, X, y):
+                self.was_fit = True
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+            def decision_function(self, X):
+                return np.array([-1.5, 2.3, 0.0])  # raw log-odds (unbounded)
+            def predict_proba(self, X):
+                return [[0.9, 0.1], [0.3, 0.7], [0.5, 0.5]]  # probabilities [0,1]
+
+        estimator = _SplitEstimator()
         scores = build_proxy_probability_scores(
             feature_matrix=feature_matrix,
             note_labels=note_labels,
@@ -355,7 +418,10 @@ def test_build_proxy_probability_scores_returns_positive_class_probabilities_sor
         )
 
         self.assertEqual(scores["hadm_id"].tolist(), [101, 102, 103])
-        self.assertEqual(scores["noncompliance_score"].tolist(), [0.1, 0.7, 0.4])
+        # Must match predict_proba[:,1] output, NOT decision_function
+        self.assertAlmostEqual(scores.iloc[0]["noncompliance_score"], 0.1)
+        self.assertAlmostEqual(scores.iloc[1]["noncompliance_score"], 0.7)
+        self.assertAlmostEqual(scores.iloc[2]["noncompliance_score"], 0.5)
         self.assertTrue(estimator.was_fit)
 
     def test_build_proxy_probability_scores_names_autopsy_output_column(self):
@@ -368,6 +434,20 @@ def test_build_proxy_probability_scores_names_autopsy_output_column(self):
         )
         self.assertEqual(scores.columns.tolist(), ["hadm_id", "autopsy_score"])
 
+    def test_build_proxy_probability_scores_returns_constant_scores_for_single_class_labels(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        note_labels = self.note_labels.iloc[:3].assign(noncompliance_label=0)
+
+        scores = build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix.iloc[:3],
+            note_labels=note_labels,
+            label_column="noncompliance_label",
+            estimator_factory=lambda: (_ for _ in ()).throw(AssertionError("factory should not be called")),
+        )
+
+        self.assertEqual(scores["hadm_id"].tolist(), [101, 102, 103])
+        self.assertEqual(scores["noncompliance_score"].tolist(), [0.0, 0.0, 0.0])
+
     def test_build_proxy_probability_scores_missing_required_columns_raise_clear_errors(self):
         build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
         with self.assertRaisesRegex(ValueError, "noncompliance_label"):
@@ -385,6 +465,39 @@ def test_build_proxy_probability_scores_missing_required_columns_raise_clear_err
                 estimator_factory=lambda: _FakeProbEstimator([0.1] * len(self.note_labels)),
             )
 
+    def test_build_proxy_probability_scores_trains_on_labeled_rows_only_scores_all(self):
+        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+
+        note_labels_with_nan = self.note_labels.copy()
+        note_labels_with_nan["autopsy_label"] = [
+            1.0, 0.0, float("nan"), float("nan"), float("nan"), float("nan"),
+        ]
+
+        fit_sizes = []
+
+        class _TrackingEstimator:
+            def __init__(self):
+                self.coef_ = None
+
+            def fit(self, X, y):
+                fit_sizes.append(len(X))
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.5, 0.5]] * len(X)
+
+        scores = build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix,
+            note_labels=note_labels_with_nan,
+            label_column="autopsy_label",
+            estimator_factory=_TrackingEstimator,
+        )
+
+        self.assertEqual(fit_sizes, [2], msg="Should train on 2 labeled rows only")
+        self.assertEqual(len(scores), 6, msg="Should score all 6 rows")
+        self.assertEqual(scores.columns.tolist(), ["hadm_id", "autopsy_score"])
+
     def test_build_proxy_probability_scores_preserves_feature_column_order_for_estimator_fit(self):
         build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
 
@@ -400,7 +513,7 @@ def fit(self, X, y):
                 return self
 
             def predict_proba(self, X):
-                return [[0.4, 0.6] for _ in range(len(X))]
+                return [[0.5, 0.5]] * len(X)
 
         estimator = _RecordingEstimator()
         build_proxy_probability_scores(
@@ -428,15 +541,18 @@ def test_build_proxy_probability_scores_keeps_only_inner_join_hadm_ids(self):
 
         self.assertEqual(scores["hadm_id"].tolist(), [103, 104])
 
-    def test_build_proxy_probability_scores_raises_on_malformed_predict_proba_output(self):
+    def test_build_proxy_probability_scores_predict_proba_returns_correct_scores(self):
+        """predict_proba output must yield correct positive-class scores for each input row."""
         build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
-        with self.assertRaises(IndexError):
-            build_proxy_probability_scores(
-                feature_matrix=self.feature_matrix.iloc[:2],
-                note_labels=self.note_labels.iloc[:2],
-                label_column="noncompliance_label",
-                estimator_factory=lambda: _MalformedProbEstimator(),
-            )
+        scores = build_proxy_probability_scores(
+            feature_matrix=self.feature_matrix.iloc[:2],
+            note_labels=self.note_labels.iloc[:2],
+            label_column="noncompliance_label",
+            estimator_factory=lambda: _FakeProbEstimator([-0.5, 1.2]),
+        )
+        self.assertEqual(len(scores), 2)
+        self.assertAlmostEqual(scores.iloc[0]["noncompliance_score"], -0.5)
+        self.assertAlmostEqual(scores.iloc[1]["noncompliance_score"], 1.2)
 
     def test_build_negative_sentiment_mistrust_scores_uses_whitespace_cleanup_and_negates_polarity(self):
         build_negative_sentiment_mistrust_scores = self._get_callable(
@@ -461,13 +577,13 @@ def _sentiment_fn(text):
 
         self.assertEqual(
             seen,
-            ["Date:[**5-1-18**] calm rapport", "patient refused medication", ""],
+            ["Date:[**5-1-18**] calm rapport", "patient refused medication"],
         )
         self.assertEqual(scores["hadm_id"].tolist(), [201, 202, 203])
         by_hadm = scores.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[201, "negative_sentiment_score"], 0.5)
         self.assertEqual(by_hadm.loc[202, "negative_sentiment_score"], -0.25)
-        self.assertEqual(by_hadm.loc[203, "negative_sentiment_score"], -0.25)
+        self.assertEqual(by_hadm.loc[203, "negative_sentiment_score"], 0.0)
 
     def test_build_negative_sentiment_mistrust_scores_missing_note_text_raises_and_empty_schema_is_stable(self):
         build_negative_sentiment_mistrust_scores = self._get_callable(
@@ -486,6 +602,46 @@ def test_build_negative_sentiment_mistrust_scores_missing_note_text_raises_and_e
         self.assertEqual(empty.columns.tolist(), ["hadm_id", "negative_sentiment_score"])
         self.assertTrue(empty.empty)
 
+    def test_build_negative_sentiment_mistrust_scores_batches_default_backend(self):
+        build_negative_sentiment_mistrust_scores = self._get_callable(
+            "build_negative_sentiment_mistrust_scores"
+        )
+        note_corpus = pd.DataFrame(
+            [
+                {"hadm_id": 202, "note_text": "Date:[**5-1-18**]   calm   rapport"},
+                {"hadm_id": 201, "note_text": " patient   refused   medication "},
+                {"hadm_id": 203, "note_text": ""},
+            ]
+        )
+        seen_batches = []
+
+        def _batch_backend(texts):
+            seen_batches.append(list(texts))
+            outputs = []
+            for text in texts:
+                if "refused medication" in text:
+                    outputs.append((-0.5, 0.0))
+                else:
+                    outputs.append((0.25, 0.0))
+            return outputs
+
+        with patch.object(
+            self.module,
+            "_default_sentiment_batch_backend",
+            side_effect=_batch_backend,
+        ):
+            scores = build_negative_sentiment_mistrust_scores(note_corpus)
+
+        self.assertEqual(
+            seen_batches,
+            [["Date:[**5-1-18**] calm rapport", "patient refused medication", ""]],
+        )
+        self.assertEqual(scores["hadm_id"].tolist(), [201, 202, 203])
+        by_hadm = scores.set_index("hadm_id")
+        self.assertEqual(by_hadm.loc[201, "negative_sentiment_score"], 0.5)
+        self.assertEqual(by_hadm.loc[202, "negative_sentiment_score"], -0.25)
+        self.assertEqual(by_hadm.loc[203, "negative_sentiment_score"], -0.25)
+
     def test_z_normalize_scores_normalizes_independently_and_handles_constant_column(self):
         z_normalize_scores = self._get_callable("z_normalize_scores")
         score_table = pd.DataFrame(
@@ -920,6 +1076,7 @@ def test_evaluate_downstream_predictions_uses_random_states_zero_through_ninety_
     def test_evaluate_downstream_predictions_uses_default_estimator_metric_and_dropna(self):
         evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
         table = self.final_model_table.copy()
+        table = table.drop(columns=["subject_id"])
         table.loc[0, "age"] = None
         table.loc[1, "left_ama"] = None
 
@@ -967,9 +1124,38 @@ def _auc_fn(y_true, y_prob):
         self.assertEqual(created[0].kwargs.get("C"), 0.1)
         self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
         self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
+        self.assertEqual(created[0].kwargs.get("tol"), 0.001)
         self.assertEqual(auc_calls[0]["y_prob"], [0.1, 0.9])
         self.assertEqual(int(results.iloc[0]["n_valid_auc"]), 1)
 
+    def test_evaluate_downstream_predictions_uses_group_shuffle_split_by_subject_id_by_default(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        group_split_recorder = _GroupSplitRecorder()
+
+        with patch.object(self.module, "GroupShuffleSplit", side_effect=group_split_recorder), \
+             patch.object(
+                 self.module,
+                 "train_test_split",
+                 side_effect=AssertionError("group-aware default split should not call train_test_split"),
+             ):
+            results = evaluate_downstream_predictions(
+                self.final_model_table,
+                feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+                task_map={"Left AMA": "left_ama"},
+                estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+                auc_fn=_AUCRecorder(0.6),
+                repetitions=1,
+            )
+
+        self.assertEqual(results.shape[0], 1)
+        self.assertEqual(group_split_recorder.calls[0]["n_splits"], 1)
+        self.assertEqual(group_split_recorder.calls[0]["test_size"], 0.4)
+        self.assertEqual(group_split_recorder.calls[0]["random_state"], 0)
+        self.assertEqual(
+            group_split_recorder.calls[0]["groups"],
+            [201, 202, 201, 203, 202, 204],
+        )
+
     def test_evaluate_downstream_predictions_returns_nan_for_single_class_target(self):
         evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
         table = self.final_model_table.copy()
@@ -1041,6 +1227,205 @@ def _auc_fn(y_true, y_prob):
         self.assertAlmostEqual(float(row["auc_mean"]), 0.4, places=7)
         self.assertAlmostEqual(float(row["auc_std"]), 0.2, places=7)
 
+    def test_evaluate_downstream_predictions_can_use_task_specific_estimator_factories(self):
+        evaluate_downstream_predictions = self._get_callable("evaluate_downstream_predictions")
+        created = []
+
+        class _RecordingEstimator:
+            def __init__(self, task_name):
+                self.task_name = task_name
+                self.coef_ = None
+
+            def fit(self, X, y):
+                del y
+                created.append({"task": self.task_name, "n_features": X.shape[1]})
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.9, 0.1], [0.1, 0.9]]
+
+        def _resolver(task_name, _config_name):
+            return lambda: _RecordingEstimator(task_name)
+
+        evaluate_downstream_predictions(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={
+                "Left AMA": "left_ama",
+                "Code Status": "code_status_dnr_dni_cmo",
+            },
+            downstream_estimator_factory_resolver=_resolver,
+            split_fn=_SplitRecorder(),
+            auc_fn=_AUCRecorder(0.8),
+            repetitions=1,
+        )
+
+        self.assertEqual(
+            [entry["task"] for entry in created],
+            ["Left AMA", "Code Status"],
+        )
+
+    def test_build_logistic_cv_estimator_factory_uses_logistic_regression_cv_with_adaptive_folds(self):
+        build_logistic_cv_estimator_factory = self._get_callable("build_logistic_cv_estimator_factory")
+        captured = []
+
+        class _RecordingLogisticRegressionCV:
+            def __init__(self, *args, **kwargs):
+                del args
+                self.kwargs = kwargs
+                captured.append(self)
+
+            def fit(self, X, y):
+                del X, y
+                self.coef_ = [[0.1, 0.2]]
+                self.C_ = [self.kwargs["Cs"][0]]
+                return self
+
+            def predict_proba(self, X):
+                return [[0.8, 0.2] for _ in range(len(X))]
+
+        factory = build_logistic_cv_estimator_factory(
+            Cs=[0.01, 0.1, 1.0],
+            class_weight="balanced",
+            scoring="roc_auc",
+        )
+        estimator = factory()
+
+        with patch.object(self.module, "LogisticRegressionCV", _RecordingLogisticRegressionCV):
+            estimator.fit(
+                pd.DataFrame({"x1": [0, 1, 0, 1], "x2": [1, 0, 1, 0]}),
+                pd.Series([0, 1, 0, 1]),
+            )
+
+        self.assertEqual(captured[0].kwargs["Cs"], [0.01, 0.1, 1.0])
+        self.assertEqual(captured[0].kwargs["class_weight"], "balanced")
+        self.assertEqual(captured[0].kwargs["scoring"], "roc_auc")
+        self.assertEqual(captured[0].kwargs["cv"], 2)
+
+    def test_evaluate_downstream_average_weights_uses_raw_training_features_without_second_scaling(self):
+        evaluate_downstream_average_weights = self._get_callable("evaluate_downstream_average_weights")
+
+        created = []
+
+        class _RecordingEstimator:
+            def __init__(self):
+                self.coef_ = None
+                created.append(self)
+
+            def fit(self, X, y):
+                self.fit_X = X.copy() if hasattr(X, "copy") else X
+                self.fit_y = y.copy() if hasattr(y, "copy") else y
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+        split_recorder = _SplitRecorder()
+        results = evaluate_downstream_average_weights(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={"Code Status": "code_status_dnr_dni_cmo"},
+            estimator_factory=lambda: _RecordingEstimator(),
+            split_fn=split_recorder,
+            repetitions=1,
+        )
+
+        self.assertEqual(int(results.iloc[0]["n_valid_weights"]), 1)
+        self.assertIsInstance(created[0].fit_X, pd.DataFrame)
+        expected_train = (
+            self.final_model_table[self.module.BASELINE_FEATURE_COLUMNS]
+            .reset_index(drop=True)
+            .iloc[[0, 1, 2, 3]]
+            .copy()
+        )
+        pd.testing.assert_frame_equal(created[0].fit_X.reset_index(drop=True), expected_train)
+
+    def test_evaluate_downstream_average_weights_uses_group_shuffle_split_by_subject_id_by_default(self):
+        evaluate_downstream_average_weights = self._get_callable("evaluate_downstream_average_weights")
+        group_split_recorder = _GroupSplitRecorder()
+        feature_count = len(self.module.BASELINE_FEATURE_COLUMNS)
+
+        class _RecordingEstimator:
+            def __init__(self):
+                self.coef_ = None
+
+            def fit(self, X, y):
+                del X, y
+                self.coef_ = [[0.1] * feature_count]
+                return self
+
+        with patch.object(self.module, "GroupShuffleSplit", side_effect=group_split_recorder), \
+             patch.object(
+                 self.module,
+                 "train_test_split",
+                 side_effect=AssertionError("group-aware default split should not call train_test_split"),
+             ):
+            results = evaluate_downstream_average_weights(
+                self.final_model_table,
+                feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+                task_map={"Code Status": "code_status_dnr_dni_cmo"},
+                estimator_factory=lambda: _RecordingEstimator(),
+                repetitions=1,
+            )
+
+        self.assertEqual(int(results.iloc[0]["n_valid_weights"]), 1)
+        self.assertEqual(
+            group_split_recorder.calls[0]["groups"],
+            [201, 202, 201, 203, 202, 204],
+        )
+
+    def test_evaluate_downstream_average_weights_returns_nan_for_single_class_target(self):
+        evaluate_downstream_average_weights = self._get_callable("evaluate_downstream_average_weights")
+        table = self.final_model_table.copy()
+        table["code_status_dnr_dni_cmo"] = 0
+
+        results = evaluate_downstream_average_weights(
+            table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={"Code Status": "code_status_dnr_dni_cmo"},
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9]),
+            split_fn=_SplitRecorder(),
+            repetitions=3,
+        )
+
+        self.assertEqual(int(results.iloc[0]["n_valid_weights"]), 0)
+        self.assertTrue(pd.isna(results.iloc[0]["weight_mean"]))
+        self.assertTrue(pd.isna(results.iloc[0]["weight_std"]))
+
+    def test_evaluate_downstream_average_weights_can_use_task_specific_estimator_factories(self):
+        evaluate_downstream_average_weights = self._get_callable("evaluate_downstream_average_weights")
+        created = []
+
+        class _RecordingEstimator:
+            def __init__(self, task_name):
+                self.task_name = task_name
+                self.coef_ = None
+
+            def fit(self, X, y):
+                del y
+                created.append({"task": self.task_name, "columns": list(X.columns)})
+                self.coef_ = [[0.1] * X.shape[1]]
+                return self
+
+        def _resolver(task_name, _config_name):
+            return lambda: _RecordingEstimator(task_name)
+
+        evaluate_downstream_average_weights(
+            self.final_model_table,
+            feature_configurations={"Baseline": self.module.BASELINE_FEATURE_COLUMNS},
+            task_map={
+                "Left AMA": "left_ama",
+                "Code Status": "code_status_dnr_dni_cmo",
+            },
+            downstream_estimator_factory_resolver=_resolver,
+            split_fn=_SplitRecorder(),
+            repetitions=1,
+        )
+
+        self.assertEqual(
+            [entry["task"] for entry in created],
+            ["Left AMA", "Code Status"],
+        )
+
     def test_duplicate_hadm_ids_raise_in_proxy_and_race_gap_merges(self):
         build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
         run_race_gap_analysis = self._get_callable("run_race_gap_analysis")
@@ -1104,6 +1489,23 @@ def test_run_full_eol_mistrust_modeling_returns_expected_sections(self):
         self.assertIn("noncompliance", outputs["feature_weight_summaries"])
         self.assertIn("autopsy", outputs["feature_weight_summaries"])
 
+    def test_run_full_eol_mistrust_modeling_preserves_proxy_summary_order(self):
+        run_full_eol_mistrust_modeling = self._get_callable("run_full_eol_mistrust_modeling")
+
+        outputs = run_full_eol_mistrust_modeling(
+            feature_matrix=self.feature_matrix,
+            note_labels=self.note_labels,
+            note_corpus=self.note_corpus,
+            estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
+            sentiment_fn=self._sentiment_fn,
+            repetitions=1,
+        )
+
+        self.assertEqual(
+            list(outputs["feature_weight_summaries"].keys()),
+            ["noncompliance", "autopsy"],
+        )
+
     def test_run_full_eol_mistrust_modeling_merges_missing_mistrust_columns_into_final_table(self):
         run_full_eol_mistrust_modeling = self._get_callable("run_full_eol_mistrust_modeling")
         final_without_scores = self.final_model_table.drop(columns=self.module.MISTRUST_SCORE_COLUMNS)
@@ -1210,6 +1612,7 @@ def test_baseline_feature_columns_align_with_real_dataset_baseline_only_output(s
             set(baseline_only.columns),
             {
                 "hadm_id",
+                "subject_id",
                 *self.module.BASELINE_FEATURE_COLUMNS,
                 "left_ama",
                 "code_status_dnr_dni_cmo",
@@ -1365,13 +1768,19 @@ def test_dataset_model_integration_smoke_flow_runs_without_column_renaming(self)
             final_model_table=final_model_table,
             estimator_factory=lambda: _FakeProbEstimator([0.1, 0.9, 0.3, 0.7, 0.4, 0.6]),
             sentiment_fn=self._sentiment_fn,
-            split_fn=_SplitRecorder(),
+            split_fn=lambda X, y, test_size, random_state: (
+                X.reset_index(drop=True).iloc[: max(1, len(X) - 1)].copy(),
+                X.reset_index(drop=True).iloc[max(1, len(X) - 1) :].copy(),
+                pd.Series(y).reset_index(drop=True).iloc[: max(1, len(X) - 1)].copy(),
+                pd.Series(y).reset_index(drop=True).iloc[max(1, len(X) - 1) :].copy(),
+            ),
             auc_fn=_AUCRecorder(0.7),
             repetitions=1,
         )
 
         self.assertEqual(scores.shape[1], 4)
-        self.assertEqual(final_model_table.shape[1], 20)
+        self.assertEqual(final_model_table.shape[1], 21)
+        self.assertIn("subject_id", final_model_table.columns)
         self.assertEqual(outputs["downstream_auc_results"].shape[0], 18)
         self.assertEqual(scores["hadm_id"].tolist(), final_model_table["hadm_id"].tolist())
 
diff --git a/tests/core/test_eol_mistrust_module.py b/tests/core/test_eol_mistrust_module.py
index 7a44dc28c..db32ed6d6 100644
--- a/tests/core/test_eol_mistrust_module.py
+++ b/tests/core/test_eol_mistrust_module.py
@@ -5,6 +5,12 @@
 
 import pandas as pd
 
+from pyhealth.models.eol_mistrust import (
+    build_mistrust_score_table as _model_build_mistrust_score_table,
+    build_negative_sentiment_mistrust_scores as _model_build_negative_sentiment_scores,
+    build_proxy_probability_scores as _model_build_proxy_probability_scores,
+)
+
 
 def _load_eol_mistrust_module():
     module_path = (
@@ -208,7 +214,7 @@ def setUp(self):
                 {
                     "hadm_id": 303,
                     "category": "Physician",
-                    "text": "Patient remained non-adher with follow up after counseling.",
+                    "text": "Patient remained non-adher with follow up after counseling.\nFamily declined autopsy.",
                     "iserror": None,
                 },
                 {
@@ -439,7 +445,7 @@ def _required_downstream_feature_configs(self):
         }
 
     def _build_mistrust_scores(self):
-        build_mistrust_score_table = self._get_callable("build_mistrust_score_table")
+        build_mistrust_score_table = _model_build_mistrust_score_table
         probability_sequences = [
             [0.05, 0.90, 0.10, 0.80, 0.20, 0.40],
             [0.15, 0.70, 0.20, 0.30, 0.60, 0.50],
@@ -453,7 +459,7 @@ def estimator_factory():
 
         sentiment_map = {
             "Patient was NONCOMPLIANT with care plan. Family provided AUTOPSY consent.": -0.6,
-            "Patient remained non-adher with follow up after counseling.": -0.2,
+            "Patient remained non-adher with follow up after counseling. Family declined autopsy.": -0.2,
             "Patient refuses medication.": 0.1,
             "Patient refused treatment. Date:[**5-1-18**]": -0.4,
             "": 0.0,
@@ -485,12 +491,14 @@ def test_all_cohort_size_is_within_expected_mimic_range(self):
 
     def test_eol_cohort_applies_los_and_discharge_criteria(self):
         eol = self._build_eol()
-        self.assertEqual(set(eol["hadm_id"]), {302})
+        self.assertEqual(set(eol["hadm_id"]), {302, 303, 304})
         by_hadm = eol.set_index("hadm_id")
         self.assertEqual(by_hadm.loc[302, "discharge_category"], "Hospice")
+        self.assertEqual(by_hadm.loc[303, "discharge_category"], "Skilled Nursing Facility")
+        self.assertEqual(by_hadm.loc[304, "discharge_category"], "Deceased")
         self._assert_hadm_unique(eol, "EOL cohort")
 
-    def test_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
+    def test_eol_cohort_requires_stay_of_at_least_six_hours(self):
         build_base_admissions = self._get_callable("build_base_admissions")
         build_demographics_table = self._get_callable("build_demographics_table")
         build_eol_cohort = self._get_callable("build_eol_cohort")
@@ -500,7 +508,7 @@ def test_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
                     "hadm_id": 920,
                     "subject_id": 920,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-02 00:00:00",
+                    "dischtime": "2100-09-01 06:00:00",
                     "ethnicity": "WHITE",
                     "insurance": "Medicare",
                     "discharge_location": "HOME HOSPICE",
@@ -511,7 +519,7 @@ def test_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
                     "hadm_id": 921,
                     "subject_id": 921,
                     "admittime": "2100-09-01 00:00:00",
-                    "dischtime": "2100-09-02 00:01:00",
+                    "dischtime": "2100-09-01 05:59:00",
                     "ethnicity": "BLACK/AFRICAN AMERICAN",
                     "insurance": "Private",
                     "discharge_location": "HOME HOSPICE",
@@ -530,8 +538,8 @@ def test_eol_cohort_requires_stay_longer_than_twenty_four_hours(self):
         base = build_base_admissions(admissions, patients)
         demographics = build_demographics_table(base)
         eol = build_eol_cohort(base, demographics)
-        self.assertNotIn(920, set(eol["hadm_id"]))
-        self.assertIn(921, set(eol["hadm_id"]))
+        self.assertIn(920, set(eol["hadm_id"]))
+        self.assertNotIn(921, set(eol["hadm_id"]))
 
     def test_eol_cohort_size_is_within_expected_mimic_range(self):
         self._pending_real_data(
@@ -794,7 +802,7 @@ def test_noncompliance_positive_rate_is_within_expected_range(self):
             "Noncompliance label prevalence on real data should be between 1% and 30%."
         )
 
-    def test_autopsy_label_distinguishes_consent_decline_and_ambiguous_mentions(self):
+    def test_autopsy_label_requires_consent_agree_or_request_on_autopsy_line(self):
         build_note_labels = self._get_callable("build_note_labels")
         notes = pd.DataFrame(
             [
@@ -816,12 +824,29 @@ def test_autopsy_label_distinguishes_consent_decline_and_ambiguous_mentions(self
                     "text": "Autopsy was discussed with the family.",
                     "iserror": 0,
                 },
+                {
+                    "hadm_id": 4,
+                    "category": "Nursing",
+                    "text": "Family agreed to autopsy after lengthy discussion.",
+                    "iserror": 0,
+                },
+                {
+                    "hadm_id": 5,
+                    "category": "Nursing",
+                    "text": "Family requested autopsy be performed.",
+                    "iserror": 0,
+                },
             ]
         )
         labels = build_note_labels(notes).set_index("hadm_id")
         self.assertEqual(labels.loc[1, "autopsy_label"], 1)
         self.assertEqual(labels.loc[2, "autopsy_label"], 0)
-        self.assertEqual(labels.loc[3, "autopsy_label"], 0)
+        self.assertTrue(
+            pd.isna(labels.loc[3, "autopsy_label"]),
+            msg="discussed without consent/agree/request → NaN (unlabeled)",
+        )
+        self.assertEqual(labels.loc[4, "autopsy_label"], 1)
+        self.assertEqual(labels.loc[5, "autopsy_label"], 1)
 
     def test_autopsy_positive_rate_is_within_expected_range(self):
         self._pending_real_data(
@@ -841,7 +866,7 @@ def test_sentiment_preprocessing_uses_whitespace_tokenize_then_rejoin(self):
         self.assertEqual(cleaned, "Patient refused treatment Date:[**5-1-18**]")
 
     def test_noncompliance_proxy_model_uses_l1_liblinear_logistic_regression(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        build_proxy_probability_scores = _model_build_proxy_probability_scores
         created = []
 
         class _RecordingLogisticRegression:
@@ -853,7 +878,7 @@ def fit(self, X, y):
                 return self
 
             def predict_proba(self, X):
-                return [[0.25, 0.75] for _ in range(len(X))]
+                return [[0.5, 0.5]] * len(X)
 
         feature_matrix = pd.DataFrame(
             [{"hadm_id": 1, "feature_a": 1}, {"hadm_id": 2, "feature_a": 0}]
@@ -862,16 +887,20 @@ def predict_proba(self, X):
             [{"hadm_id": 1, "noncompliance_label": 1}, {"hadm_id": 2, "noncompliance_label": 0}]
         )
 
-        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
+        with patch(
+            "pyhealth.models.eol_mistrust.LogisticRegression",
+            _RecordingLogisticRegression,
+        ):
             build_proxy_probability_scores(feature_matrix, labels, "noncompliance_label")
 
         self.assertEqual(created[0].get("penalty"), "l1")
         self.assertEqual(created[0].get("C"), 0.1)
         self.assertEqual(created[0].get("solver"), "liblinear")
-        self.assertEqual(created[0].get("max_iter"), 1000)
+        self.assertEqual(created[0].get("max_iter"), 100)
+        self.assertEqual(created[0].get("tol"), 0.01)
 
     def test_autopsy_proxy_model_uses_l1_liblinear_logistic_regression(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        build_proxy_probability_scores = _model_build_proxy_probability_scores
         created = []
 
         class _RecordingLogisticRegression:
@@ -883,7 +912,7 @@ def fit(self, X, y):
                 return self
 
             def predict_proba(self, X):
-                return [[0.40, 0.60] for _ in range(len(X))]
+                return [[0.5, 0.5]] * len(X)
 
         feature_matrix = pd.DataFrame(
             [{"hadm_id": 1, "feature_a": 1}, {"hadm_id": 2, "feature_a": 0}]
@@ -892,16 +921,20 @@ def predict_proba(self, X):
             [{"hadm_id": 1, "autopsy_label": 1}, {"hadm_id": 2, "autopsy_label": 0}]
         )
 
-        with patch.object(self.module, "LogisticRegression", _RecordingLogisticRegression):
+        with patch(
+            "pyhealth.models.eol_mistrust.LogisticRegression",
+            _RecordingLogisticRegression,
+        ):
             build_proxy_probability_scores(feature_matrix, labels, "autopsy_label")
 
         self.assertEqual(created[0].get("penalty"), "l1")
         self.assertEqual(created[0].get("C"), 0.1)
         self.assertEqual(created[0].get("solver"), "liblinear")
-        self.assertEqual(created[0].get("max_iter"), 1000)
+        self.assertEqual(created[0].get("max_iter"), 100)
+        self.assertEqual(created[0].get("tol"), 0.01)
 
     def test_proxy_models_fit_on_full_all_cohort_without_train_test_split(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+        build_proxy_probability_scores = _model_build_proxy_probability_scores
         estimator = _FakeProbEstimator([0.9, 0.2, 0.8, 0.1, 0.4, 0.3])
         feature_matrix = self._build_feature_matrix()
         labels = self._build_note_labels()
@@ -915,8 +948,8 @@ def test_proxy_models_fit_on_full_all_cohort_without_train_test_split(self):
         self.assertEqual(len(estimator.fit_X), len(feature_matrix))
         self.assertEqual(set(scores["hadm_id"]), set(self.all_hadm_ids))
 
-    def test_proxy_model_scores_use_predict_proba_positive_class(self):
-        build_proxy_probability_scores = self._get_callable("build_proxy_probability_scores")
+    def test_proxy_model_scores_use_predict_proba(self):
+        build_proxy_probability_scores = _model_build_proxy_probability_scores
         estimator = _FakeProbEstimator([0.3, 0.8])
         feature_matrix = pd.DataFrame(
             [{"hadm_id": 1, "feature_a": 1}, {"hadm_id": 2, "feature_a": 0}]
@@ -933,7 +966,7 @@ def test_proxy_model_scores_use_predict_proba_positive_class(self):
         self.assertEqual(list(scores["noncompliance_score"]), [0.3, 0.8])
 
     def test_sentiment_mistrust_score_is_negative_polarity(self):
-        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        build_negative_sentiment_scores = _model_build_negative_sentiment_scores
         note_corpus = pd.DataFrame(
             [
                 {"hadm_id": 1, "note_text": "very negative"},
@@ -951,7 +984,7 @@ def test_sentiment_mistrust_score_is_negative_polarity(self):
         self.assertAlmostEqual(scores.loc[3, "negative_sentiment_score"], -0.2)
 
     def test_negative_sentiment_scores_send_cleaned_text_to_sentiment_function(self):
-        build_negative_sentiment_scores = self._get_callable("build_negative_sentiment_scores")
+        build_negative_sentiment_scores = _model_build_negative_sentiment_scores
         seen = []
 
         def sentiment_fn(text):
@@ -1232,6 +1265,33 @@ def test_left_ama_target_definition_is_exact(self):
         self.assertEqual(final.loc[305, "left_ama"], 1)
         self.assertEqual(final.loc[306, "left_ama"], 0)
 
+    def test_left_ama_target_accepts_truncated_mimic_discharge_location(self):
+        build_final_model_table = self._get_callable("build_final_model_table")
+        admissions = self._build_base().copy()
+        admissions.loc[admissions["hadm_id"] == 305, "discharge_location"] = "LEFT AGAINST MEDICAL ADVI"
+        final = build_final_model_table(
+            demographics=self._build_demographics(),
+            all_cohort=self._build_all(),
+            admissions=admissions,
+            chartevents=self.chartevents,
+            d_items=self.d_items,
+            mistrust_scores=pd.DataFrame(
+                [
+                    {
+                        "hadm_id": hadm_id,
+                        "noncompliance_score_z": 0.0,
+                        "autopsy_score_z": 0.0,
+                        "negative_sentiment_score_z": 0.0,
+                    }
+                    for hadm_id in self.all_hadm_ids
+                ]
+            ),
+            include_race=False,
+            include_mistrust=False,
+        ).set_index("hadm_id")
+        self.assertEqual(int(final.loc[305, "left_ama"]), 1)
+        self.assertEqual(int(final.loc[306, "left_ama"]), 0)
+
     def test_code_status_target_uses_required_itemids_and_values(self):
         build_code_status_target = getattr(self.module, "_build_code_status_target")
         target = build_code_status_target(self.chartevents, self.d_items).set_index("hadm_id")
@@ -1240,6 +1300,60 @@ def test_code_status_target_uses_required_itemids_and_values(self):
         self.assertEqual(target.loc[305, "code_status_dnr_dni_cmo"], 0)
         self.assertNotIn(304, set(target.index))
 
+    def test_code_status_target_recognizes_common_truncated_positive_values(self):
+        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        chartevents = pd.DataFrame(
+            [
+                {"hadm_id": 401, "itemid": 128, "value": "Do Not Resuscita", "icustay_id": 4011},
+                {"hadm_id": 402, "itemid": 223758, "value": "Do Not Intubate", "icustay_id": 4021},
+                {"hadm_id": 403, "itemid": 128, "value": "CPR Not Indicate", "icustay_id": 4031},
+            ]
+        )
+
+        target = build_code_status_target(chartevents, self.d_items).set_index("hadm_id")
+        self.assertEqual(int(target.loc[401, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[402, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[403, "code_status_dnr_dni_cmo"]), 1)
+
+    def test_code_status_target_uses_last_charted_status_when_charttime_is_present(self):
+        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        chartevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 451,
+                    "itemid": 128,
+                    "value": "Full Code",
+                    "icustay_id": 4511,
+                    "charttime": "2100-01-01 01:00:00",
+                },
+                {
+                    "hadm_id": 451,
+                    "itemid": 128,
+                    "value": "Do Not Resuscita",
+                    "icustay_id": 4511,
+                    "charttime": "2100-01-01 03:00:00",
+                },
+                {
+                    "hadm_id": 452,
+                    "itemid": 128,
+                    "value": "DNR / DNI",
+                    "icustay_id": 4521,
+                    "charttime": "2100-01-02 01:00:00",
+                },
+                {
+                    "hadm_id": 452,
+                    "itemid": 128,
+                    "value": "Full Code",
+                    "icustay_id": 4521,
+                    "charttime": "2100-01-02 04:00:00",
+                },
+            ]
+        )
+
+        target = build_code_status_target(chartevents, self.d_items).set_index("hadm_id")
+        self.assertEqual(int(target.loc[451, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(target.loc[452, "code_status_dnr_dni_cmo"]), 0)
+
     def test_code_status_task_excludes_admissions_without_charted_code_status(self):
         build_code_status_target = getattr(self.module, "_build_code_status_target")
         target = build_code_status_target(self.chartevents, self.d_items)
@@ -1324,9 +1438,9 @@ def test_downstream_outputs_cover_all_three_tasks_and_six_configurations(self):
             "Downstream results must cover all three tasks across all six required configurations."
         )
 
-    def test_downstream_result_table_has_eighteen_task_configuration_entries(self):
+    def test_downstream_result_table_has_twelve_task_configuration_entries(self):
         self._pending_real_data(
-            "Downstream outputs should expose 18 task-configuration result entries: 3 tasks x 6 configurations."
+            "Downstream outputs should expose 12 task-configuration result entries: 2 tasks x 6 configurations."
         )
 
     def test_final_model_table_contains_required_downstream_feature_columns(self):
@@ -1449,7 +1563,7 @@ def test_downstream_evaluation_drops_rows_with_null_target_or_required_features(
 
     def test_downstream_estimator_and_metric_match_spec(self):
         self._pending_real_data(
-            'Downstream evaluation must use LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000) and roc_auc_score.'
+            'Downstream evaluation must use LogisticRegression(penalty="l1", solver="liblinear", max_iter=100, tol=0.01) and roc_auc_score.'
         )
 
     def test_downstream_auc_uses_predicted_probabilities_on_the_test_split(self):
diff --git a/tests/core/test_eol_mistrust_task.py b/tests/core/test_eol_mistrust_task.py
new file mode 100644
index 000000000..19d89bfe5
--- /dev/null
+++ b/tests/core/test_eol_mistrust_task.py
@@ -0,0 +1,178 @@
+import importlib.util
+import unittest
+from pathlib import Path
+
+import pandas as pd
+
+
+def _load_task_module():
+    module_path = Path(__file__).resolve().parents[2] / "pyhealth" / "tasks" / "eol_mistrust.py"
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.tasks.eol_mistrust_task_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+class _DummyEvent:
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+
+class _DummyPatient:
+    def __init__(self, patient_id, events_by_type):
+        self.patient_id = patient_id
+        self._events_by_type = events_by_type
+
+    def get_events(self, event_type, filters=None):
+        events = list(self._events_by_type.get(event_type, []))
+        for field, operator, expected in filters or []:
+            if operator != "==":
+                raise AssertionError(f"Unexpected filter operator in test double: {operator}")
+            events = [event for event in events if getattr(event, field, None) == expected]
+        return events
+
+
+class TestEOLMistrustTask(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.module = _load_task_module()
+
+    def test_build_code_status_target_has_clear_corrected_and_paper_like_modes(self):
+        chartevents = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 101,
+                    "itemid": 128,
+                    "value": "Full Code",
+                    "charttime": "2100-01-01 10:00:00",
+                },
+                {
+                    "hadm_id": 101,
+                    "itemid": 128,
+                    "value": "DNR/DNI",
+                    "charttime": "2100-01-01 08:00:00",
+                },
+                {
+                    "hadm_id": 102,
+                    "itemid": 128,
+                    "value": "DNR",
+                },
+                {
+                    "hadm_id": 102,
+                    "itemid": 128,
+                    "value": "Other/Remarks",
+                },
+            ]
+        )
+
+        corrected = self.module.build_code_status_target(chartevents).set_index("hadm_id")
+        paper_like = self.module.build_code_status_target(
+            chartevents,
+            code_status_mode="paper_like",
+        ).set_index("hadm_id")
+
+        self.assertEqual(int(corrected.loc[101, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(paper_like.loc[101, "code_status_dnr_dni_cmo"]), 1)
+        self.assertEqual(int(corrected.loc[102, "code_status_dnr_dni_cmo"]), 0)
+        self.assertEqual(int(paper_like.loc[102, "code_status_dnr_dni_cmo"]), 1)
+
+    def test_build_target_tables_keep_expected_public_behavior(self):
+        admissions = pd.DataFrame(
+            [
+                {
+                    "hadm_id": 201,
+                    "discharge_location": "LEFT AGAINST MEDICAL ADVICE",
+                    "hospital_expire_flag": 0,
+                },
+                {
+                    "hadm_id": 202,
+                    "discharge_location": "HOME",
+                    "hospital_expire_flag": 1,
+                },
+            ]
+        )
+
+        left_ama = self.module.build_left_ama_target(admissions).set_index("hadm_id")
+        mortality = self.module.build_in_hospital_mortality_target(admissions).set_index("hadm_id")
+
+        self.assertEqual(int(left_ama.loc[201, "left_ama"]), 1)
+        self.assertEqual(int(left_ama.loc[202, "left_ama"]), 0)
+        self.assertEqual(int(mortality.loc[201, "in_hospital_mortality"]), 0)
+        self.assertEqual(int(mortality.loc[202, "in_hospital_mortality"]), 1)
+
+    def test_downstream_task_wrapper_builds_single_admission_sample(self):
+        task = self.module.EOLMistrustCodeStatusPredictionMIMIC3(include_notes=True)
+        patient = _DummyPatient(
+            patient_id="subject-1",
+            events_by_type={
+                "patients": [
+                    _DummyEvent(gender="F", dob="2070-01-01 00:00:00"),
+                ],
+                "admissions": [
+                    _DummyEvent(
+                        hadm_id=301,
+                        admittime="2100-01-01 00:00:00",
+                        dischtime="2100-01-03 12:00:00",
+                        discharge_location="HOME",
+                        hospital_expire_flag=0,
+                        insurance="Private",
+                        ethnicity="BLACK/AFRICAN AMERICAN",
+                    ),
+                ],
+                "diagnoses_icd": [
+                    _DummyEvent(hadm_id=301, icd9_code="4019"),
+                ],
+                "procedures_icd": [
+                    _DummyEvent(hadm_id=301, icd9_code="3893"),
+                ],
+                "prescriptions": [
+                    _DummyEvent(hadm_id=301, drug="Aspirin"),
+                ],
+                "chartevents": [
+                    _DummyEvent(hadm_id=301, itemid=128, value="Full Code"),
+                    _DummyEvent(hadm_id=301, itemid=128, value="Comfort Measures"),
+                ],
+                "noteevents": [
+                    _DummyEvent(hadm_id=301, text="  family   meeting   note  "),
+                ],
+            },
+        )
+
+        samples = task(patient)
+
+        self.assertEqual(len(samples), 1)
+        sample = samples[0]
+        self.assertEqual(sample["visit_id"], 301)
+        self.assertEqual(sample["patient_id"], "subject-1")
+        self.assertEqual(sample["conditions"], ["4019"])
+        self.assertEqual(sample["procedures"], ["3893"])
+        self.assertEqual(sample["drugs"], ["Aspirin"])
+        self.assertEqual(sample["insurance"], "Private")
+        self.assertEqual(sample["race"], "BLACK")
+        self.assertEqual(sample["clinical_notes"], "family meeting note")
+        self.assertEqual(sample["code_status_dnr_dni_cmo"], 1)
+        self.assertGreater(sample["age"], 0.0)
+        self.assertGreater(sample["los_days"], 0.0)
+
+    def test_task_map_and_wrapper_targets_stay_consistent(self):
+        task_map = self.module.get_eol_mistrust_task_map()
+
+        self.assertEqual(
+            list(task_map.items()),
+            [
+                ("Left AMA", "left_ama"),
+                ("Code Status", "code_status_dnr_dni_cmo"),
+                ("In-hospital mortality", "in_hospital_mortality"),
+            ],
+        )
+        with self.assertRaisesRegex(ValueError, "Unsupported EOL mistrust target"):
+            self.module.EOLMistrustDownstreamMIMIC3(target="unknown")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 2dc12c83168dcbd22ae5d6b1bddc03a0af7c0f34 Mon Sep 17 00:00:00 2001
From: aaronx2-illinois <aaronx2@illinois.edu>
Date: Sun, 12 Apr 2026 09:10:20 -0600
Subject: [PATCH 5/7] CommitName: connecting to Pyhealth and refactor class
 with Shorter version 1. Added tests for EOLMistrustClassifier to confirm it
 extends BaseModel and accepts task-style inputs. 2. Added end-to-end
 classifier tests for both normal and paper-like dataset preparation. 3.
 Updated assertions to match new expected treatment totals and code status
 outputs. 4. Cleaned up dataset setup code in tests for better clarity.

---
 ...y => eol_mistrust_mortality_classifier.py} | 1696 +++++------
 pyhealth/datasets/__init__.py                 |    1 +
 pyhealth/datasets/base_dataset.py             |   15 +-
 pyhealth/datasets/configs/eol_mistrust.yaml   |   57 +
 pyhealth/datasets/eol_mistrust.py             |    1 +
 pyhealth/datasets/eol_mistrust_dataset.py     |  194 ++
 pyhealth/models/__init__.py                   |    1 +
 pyhealth/models/eol_mistrust.py               |    8 +-
 pyhealth/models/eol_mistrust_classifier.py    |  194 ++
 pyhealth/tasks/__init__.py                    |    6 +
 pyhealth/tasks/eol_mistrust.py                |  129 +-
 tests/core/test_base_dataset.py               |   35 +
 tests/core/test_eol_mistrust_Integration.py   | 2468 +++++------------
 tests/core/test_eol_mistrust_dataset.py       |  403 ++-
 tests/core/test_eol_mistrust_model.py         |  624 ++++-
 tests/core/test_eol_mistrust_module.py        |   20 +-
 tests/core/test_eol_mistrust_task.py          |   69 +
 17 files changed, 3157 insertions(+), 2764 deletions(-)
 rename examples/{eol_mistrust.py => eol_mistrust_mortality_classifier.py} (50%)
 create mode 100644 pyhealth/datasets/eol_mistrust_dataset.py
 create mode 100644 pyhealth/models/eol_mistrust_classifier.py

diff --git a/examples/eol_mistrust.py b/examples/eol_mistrust_mortality_classifier.py
similarity index 50%
rename from examples/eol_mistrust.py
rename to examples/eol_mistrust_mortality_classifier.py
index d8e3aac69..d400f263e 100644
--- a/examples/eol_mistrust.py
+++ b/examples/eol_mistrust_mortality_classifier.py
@@ -1,65 +1,55 @@
-r"""Example workflow for the EOL mistrust study pipeline.
+r"""Run the EOL mistrust workflow.
 
-This script assumes you have already exported and combined the required MIMIC-III
-tables into a local directory such as:
+Expected data root::
 
     EOL_Workspace/eol_mistrust_required_combined/
         mimiciii_clinical/
         mimiciii_notes/
         mimiciii_derived/
 
-It demonstrates two related flows:
+This example supports two uses:
 
-1. the study-style preprocessing + modeling pipeline built on pandas tables
-2. an optional PyHealth task demo using the custom EOL mistrust YAML config
+1. Full research pipeline
+   pandas-based preprocessing -> proxy construction -> downstream evaluation
+   -> result writing
+2. PyHealth-native proof/demo
+   ``BaseDataset -> BaseTask -> BaseModel`` with optional normal-path
+   ``Trainer.train() -> Trainer.evaluate()``
 
-Implementation note: the sentiment metric in this repo uses the existing
-transformers+torch stack rather than the original Pattern backend from the
-reference notebooks. The example still follows the paper-style note scope by
-building both the sentiment corpus and note-derived labels from all non-error
-notes.
+Managed runs are written under::
 
-Recommended commands
---------------------
-Formal managed runs (recommended)
+    EOL_Workspace/EOL_Result/EOL_(normal|Paperlike)_<timestamp>/
 
-The script now creates a managed run archive under
-``EOL_Workspace/EOL_Result/EOL_(normal|Paperlike)_<timestamp>/``.
-When ``--output-dir`` and ``--stream-cache-dir`` are omitted, deliverables,
-runtime files, and stage cache directories are created automatically inside
-that managed run folder.
 
-Default / corrected pipeline
+Recommended commands
+--------------------
 
-Formal cold-start run:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --repetitions 10
+Full pipeline, normal::
 
-Formal smoke run:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --repetitions 1
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --repetitions 10
 
-Paper-like dataset preparation
+Full pipeline, paper-like::
 
-Formal cold-start run:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --paper-like-dataset-prepare --repetitions 10
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --paper-like-dataset-prepare --repetitions 10
+    
+Full pipeline . Route ablation, normal vs paper-like::
 
-Formal smoke run:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --compare-to-paper --paper-like-dataset-prepare --repetitions 1
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --ablation-study --repetitions 1
 
-Optional fast reruns with shared cache
 
-Default / corrected pipeline:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --repetitions 10
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --repetitions 1
+Native proof, normal::
 
-Paper-like dataset preparation:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --paper-like-dataset-prepare --repetitions 10
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --stream-cache-dir EOL_Workspace --reuse-intermediates EOL_Workspace --compare-to-paper --paper-like-dataset-prepare --repetitions 1
+    .\.venv\Scripts\python.exe -m unittest tests.core.test_eol_mistrust_model.TestEOLMistrustClassifier.test_classifier_runs_end_to_end_for_normal_full_feature_path
 
-Optional custom managed-run archive root:
-.\.venv\Scripts\python.exe examples\eol_mistrust.py --root EOL_Workspace\eol_mistrust_required_combined --result-root EOL_Workspace\EOL_Result --compare-to-paper --repetitions 10
+Native proof, paper-like::
 
+    .\.venv\Scripts\python.exe -m unittest tests.core.test_eol_mistrust_model.TestEOLMistrustClassifier.test_classifier_runs_end_to_end_for_paper_like_full_feature_path
 
+Native train/eval demo, normal only::
 
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --task-demo --task-demo-train-eval
+    
+    
 """
 
 from __future__ import annotations
@@ -76,9 +66,14 @@
 import pandas as pd
 
 REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
 DEFAULT_DATA_ROOT = REPO_ROOT / "EOL_Workspace" / "eol_mistrust_required_combined"
 DEFAULT_CONFIG_PATH = REPO_ROOT / "pyhealth" / "datasets" / "configs" / "eol_mistrust.yaml"
 DEFAULT_RESULT_ROOT = REPO_ROOT / "EOL_Workspace" / "EOL_Result"
+DEFAULT_NOTE_CHUNKSIZE = 100_000
+DEFAULT_CHARTEVENT_CHUNKSIZE = 500_000
 
 
 def _load_local_module(module_name: str, relative_path: str):
@@ -122,8 +117,12 @@ def _load_local_module(module_name: str, relative_path: str):
 get_downstream_feature_configurations = _MODEL_MODULE.get_downstream_feature_configurations
 z_normalize_scores = _MODEL_MODULE.z_normalize_scores
 
-MIMIC3Dataset = None
+EOLMistrustClassifier = None
+EOLMistrustDataset = None
 EOLMistrustMortalityPredictionMIMIC3 = None
+get_dataloader = None
+split_by_patient = None
+Trainer = None
 
 RAW_TABLE_PATHS = {
     "admissions": "mimiciii_clinical/admissions.csv",
@@ -146,190 +145,55 @@ def _load_local_module(module_name: str, relative_path: str):
 
 VALIDATION_EVENT_PROBE_ROWS = 50_000
 
-PAPER_URL = "https://proceedings.mlr.press/v85/boag18a.html"
-PAPER_PDF_URL = "https://proceedings.mlr.press/v85/boag18a/boag18a.pdf"
-
-PAPER_TABLE1_COUNTS = {
-    "Population Size": {"BLACK": 1214, "WHITE": 9987},
-    "Insurance Private": {"BLACK": 141, "WHITE": 1594},
-    "Insurance Public": {"BLACK": 1062, "WHITE": 8356},
-    "Insurance Self-Pay": {"BLACK": 11, "WHITE": 37},
-    "Discharge Deceased": {"BLACK": 401, "WHITE": 3869},
-    "Discharge Hospice": {"BLACK": 40, "WHITE": 421},
-    "Discharge Skilled Nursing Facility": {"BLACK": 773, "WHITE": 5697},
-    "Gender F": {"BLACK": 733, "WHITE": 5012},
-    "Gender M": {"BLACK": 481, "WHITE": 4975},
+RUN_TABLE1_RACES = ("BLACK", "WHITE")
+
+RUN_TABLE1_COUNT_SPECS = [
+    ("Population Size", None, None),
+    ("Insurance Private", "insurance_group", "Private"),
+    ("Insurance Public", "insurance_group", "Public"),
+    ("Insurance Self-Pay", "insurance_group", "Self-Pay"),
+    ("Discharge Deceased", "discharge_category", "Deceased"),
+    ("Discharge Hospice", "discharge_category", "Hospice"),
+    (
+        "Discharge Skilled Nursing Facility",
+        "discharge_category",
+        "Skilled Nursing Facility",
+    ),
+    ("Gender F", "gender", "F"),
+    ("Gender M", "gender", "M"),
+]
+
+RUN_TABLE1_CONTINUOUS_SPECS = {
+    "Length of stay (median days)": "los_days",
+    "Age (median years)": "age",
 }
 
-PAPER_TABLE1_CONTINUOUS = {
-    "Length of stay (median days)": {
-        "BLACK": {"center": 13.90, "lower": 5.55, "upper": 19.56},
-        "WHITE": {"center": 14.08, "lower": 6.45, "upper": 19.45},
-    },
-    "Age (median years)": {
-        "BLACK": {"center": 71.31, "lower": 60.21, "upper": 80.36},
-        "WHITE": {"center": 77.87, "lower": 66.61, "upper": 84.93},
-    },
-}
+RUN_TABLE2_TREATMENT_ORDER = ["total_vent_min", "total_vaso_min"]
 
-PAPER_TABLE2_TREATMENT = {
-    "total_vent_min": {
-        "n_black": 510,
-        "n_white": 4810,
-        "median_black": 3180.0,
-        "median_white": 2520.0,
-        "pvalue": 0.005,
-    },
-    "total_vaso_min": {
-        "n_black": 453,
-        "n_white": 4456,
-        "median_black": 2046.0,
-        "median_white": 1770.0,
-        "pvalue": 0.12,
-    },
-}
+RUN_TABLE3_PROXY_ORDER = ["noncompliance", "autopsy"]
 
-PAPER_TABLE3_WEIGHTS = {
-    "noncompliance": {
-        "positive": [
-            ("riker-sas scale: agitated", 0.7013),
-            ("education readiness: no", 0.2540),
-            ("pain level: 7-mod to severe", 0.2168),
-        ],
-        "negative": [
-            ("state: alert", -1.0156),
-            ("pain: none", -0.5427),
-            ("richmond-ras scale: 0 alert and calm", -0.3598),
-        ],
-    },
-    "autopsy": {
-        "positive": [
-            ("reapplied restraints", 0.1153),
-            ("restraint type: soft limb", 0.0980),
-            ("orientation: oriented 3x", 0.0363),
-        ],
-        "negative": [
-            ("pain present: no", -0.2689),
-            ("spokesperson is healthcare proxy", -0.2271),
-            ("family communication: talked to m.d.", -0.1184),
-        ],
-    },
-}
+RUN_TABLE4_FEATURE_ORDER = [
+    "autopsy_score_z",
+    "negative_sentiment_score_z",
+    "noncompliance_score_z",
+    "oasis",
+    "sapsii",
+]
 
-PAPER_TABLE3_FEATURE_ALIASES = {
-    "autopsy": {
-        "reapplied restraints": (
-            "restraints evaluated: restraintreapply",
-            "restraints evaluated: reapplied",
-            "restraints evaluated v1: restraint reapplied",
-            "restraints evaluated v2: reapplied",
-        ),
-        "orientation: oriented 3x": (
-            "orientation: oriented x 3",
-            "orientation: oriented x3",
-        ),
-        "spokesperson is healthcare proxy": (
-            "is the spokesperson the health care proxy: 1",
-        ),
-        "family communication: talked to m.d.": (
-            "family communication: family talked to md",
-            "family communication: fam talked to md",
-        ),
-    },
-}
+RUN_TABLE5_TASK_ORDER = ["Left AMA", "Code Status", "In-hospital mortality"]
 
-PAPER_TABLE4_CORRELATIONS = {
-    tuple(sorted(("oasis", "sapsii"))): 0.679,
-    tuple(sorted(("oasis", "noncompliance_score_z"))): 0.050,
-    tuple(sorted(("oasis", "autopsy_score_z"))): -0.012,
-    tuple(sorted(("oasis", "negative_sentiment_score_z"))): 0.075,
-    tuple(sorted(("sapsii", "noncompliance_score_z"))): 0.013,
-    tuple(sorted(("sapsii", "autopsy_score_z"))): -0.013,
-    tuple(sorted(("sapsii", "negative_sentiment_score_z"))): 0.086,
-    tuple(sorted(("noncompliance_score_z", "autopsy_score_z"))): 0.262,
-    tuple(sorted(("noncompliance_score_z", "negative_sentiment_score_z"))): 0.058,
-    tuple(sorted(("autopsy_score_z", "negative_sentiment_score_z"))): 0.044,
-}
+RUN_TABLE5_CONFIGURATION_ORDER = [
+    "Baseline",
+    "Baseline + Race",
+    "Baseline + Noncompliant",
+    "Baseline + Autopsy",
+    "Baseline + Neg-Sentiment",
+    "Baseline + ALL",
+]
 
-PAPER_TABLE5_AUC = {
-    ("Left AMA", "Baseline"): {"n_rows": 48071, "auc_mean": 0.859, "auc_std": 0.014},
-    ("Left AMA", "Baseline + Race"): {"n_rows": 48071, "auc_mean": 0.861, "auc_std": 0.014},
-    ("Left AMA", "Baseline + Noncompliant"): {"n_rows": 48071, "auc_mean": 0.869, "auc_std": 0.012},
-    ("Left AMA", "Baseline + Autopsy"): {"n_rows": 48071, "auc_mean": 0.861, "auc_std": 0.012},
-    ("Left AMA", "Baseline + Neg-Sentiment"): {"n_rows": 48071, "auc_mean": 0.859, "auc_std": 0.013},
-    ("Left AMA", "Baseline + ALL"): {"n_rows": 48071, "auc_mean": 0.873, "auc_std": 0.012},
-    ("Code Status", "Baseline"): {"n_rows": 39815, "auc_mean": 0.763, "auc_std": 0.013},
-    ("Code Status", "Baseline + Race"): {"n_rows": 39815, "auc_mean": 0.766, "auc_std": 0.014},
-    ("Code Status", "Baseline + Noncompliant"): {"n_rows": 39815, "auc_mean": 0.767, "auc_std": 0.013},
-    ("Code Status", "Baseline + Autopsy"): {"n_rows": 39815, "auc_mean": 0.773, "auc_std": 0.011},
-    ("Code Status", "Baseline + Neg-Sentiment"): {"n_rows": 39815, "auc_mean": 0.765, "auc_std": 0.014},
-    ("Code Status", "Baseline + ALL"): {"n_rows": 39815, "auc_mean": 0.782, "auc_std": 0.012},
-    ("In-hospital mortality", "Baseline"): {"n_rows": 48071, "auc_mean": 0.600, "auc_std": 0.011},
-    ("In-hospital mortality", "Baseline + Race"): {"n_rows": 48071, "auc_mean": 0.614, "auc_std": 0.011},
-    ("In-hospital mortality", "Baseline + Noncompliant"): {"n_rows": 48071, "auc_mean": 0.614, "auc_std": 0.010},
-    ("In-hospital mortality", "Baseline + Autopsy"): {"n_rows": 48071, "auc_mean": 0.603, "auc_std": 0.012},
-    ("In-hospital mortality", "Baseline + Neg-Sentiment"): {"n_rows": 48071, "auc_mean": 0.615, "auc_std": 0.010},
-    ("In-hospital mortality", "Baseline + ALL"): {"n_rows": 48071, "auc_mean": 0.635, "auc_std": 0.010},
-}
-
-PAPER_TABLE6_WEIGHTS = {
-    "Left AMA": {
-        "noncompliant": (0.52, 0.09),
-        "autopsy": (0.01, 0.03),
-        "negative sentiment": (0.00, 0.02),
-        "race: asian": (0.00, 0.00),
-        "race: black": (0.03, 0.12),
-        "race: hispanic": (0.00, 0.00),
-        "race: other": (-0.15, 0.19),
-        "race: white": (-0.02, 0.06),
-        "race: native american": (0.00, 0.00),
-        "gender: male": (0.00, 0.00),
-        "gender: female": (-0.40, 0.20),
-        "insurance: private": (-1.01, 0.21),
-        "insurance: public": (0.00, 0.00),
-        "insurance: self-pay": (0.00, 0.00),
-        "length-of-stay": (-1.44, 0.37),
-        "age": (-2.10, 0.21),
-    },
-    "Code Status": {
-        "noncompliant": (0.27, 0.04),
-        "autopsy": (-0.44, 0.05),
-        "negative sentiment": (0.09, 0.03),
-        "race: asian": (0.00, 0.00),
-        "race: black": (-0.22, 0.19),
-        "race: hispanic": (-0.17, 0.21),
-        "race: other": (-0.12, 0.17),
-        "race: white": (0.06, 0.15),
-        "race: native american": (0.00, 0.00),
-        "gender: male": (-0.85, 1.40),
-        "gender: female": (-0.49, 1.39),
-        "insurance: private": (-0.94, 0.29),
-        "insurance: public": (-0.02, 0.28),
-        "insurance: self-pay": (-0.02, 0.24),
-        "length-of-stay": (-0.70, 0.10),
-        "age": (0.42, 0.02),
-    },
-    "In-hospital mortality": {
-        "noncompliant": (0.16, 0.03),
-        "autopsy": (0.02, 0.02),
-        "negative sentiment": (0.16, 0.03),
-        "race: asian": (-0.05, 0.03),
-        "race: black": (-0.53, 0.31),
-        "race: hispanic": (-0.58, 0.34),
-        "race: other": (0.15, 0.30),
-        "race: white": (-0.26, 0.30),
-        "race: native american": (0.00, 0.00),
-        "gender: male": (-0.67, 0.99),
-        "gender: female": (-0.59, 0.99),
-        "insurance: private": (-0.96, 0.95),
-        "insurance: public": (-0.50, 0.95),
-        "insurance: self-pay": (-0.21, 0.68),
-        "length-of-stay": (0.08, 0.03),
-        "age": (0.20, 0.02),
-    },
-}
+RUN_TABLE6_TASK_ORDER = ["Left AMA", "Code Status", "In-hospital mortality"]
 
-TABLE6_FEATURE_NAME_MAP = {
+RUN_TABLE6_FEATURE_NAME_MAP = {
     "noncompliance_score_z": "noncompliant",
     "autopsy_score_z": "autopsy",
     "negative_sentiment_score_z": "negative sentiment",
@@ -348,6 +212,25 @@ def _load_local_module(module_name: str, relative_path: str):
     "age": "age",
 }
 
+RUN_TABLE6_FEATURE_ORDER = [
+    "age",
+    "length-of-stay",
+    "gender: female",
+    "gender: male",
+    "insurance: private",
+    "insurance: public",
+    "insurance: self-pay",
+    "race: white",
+    "race: black",
+    "race: asian",
+    "race: hispanic",
+    "race: native american",
+    "race: other",
+    "noncompliant",
+    "autopsy",
+    "negative sentiment",
+]
+
 
 def _read_csvs(root: Path, path_map: dict[str, str]) -> dict[str, pd.DataFrame]:
     tables: dict[str, pd.DataFrame] = {}
@@ -417,26 +300,27 @@ def _note_present_hadm_ids(note_corpus: pd.DataFrame) -> list[int]:
     return sorted(hadm_ids.dropna().astype(int).unique().tolist())
 
 
-def build_paper_table1_comparison(eol_cohort: pd.DataFrame) -> pd.DataFrame:
-    """Compare the run EOL cohort demographics against Table 1 from the paper."""
+def _ordered_present_values(
+    preferred_values: list[str] | tuple[str, ...],
+    present_values: list[str],
+) -> list[str]:
+    """Return present values in a stable preferred-first order."""
+
+    present_lookup = {str(value) for value in present_values}
+    ordered = [value for value in preferred_values if value in present_lookup]
+    remaining = sorted(present_lookup.difference(ordered))
+    return ordered + remaining
+
+
+def _build_run_table1_summary(eol_cohort: pd.DataFrame) -> pd.DataFrame:
+    """Build run-only Table 1 demographics summaries."""
 
-    cohort = eol_cohort[eol_cohort["race"].isin(["BLACK", "WHITE"])].copy()
-    totals = {race: int((cohort["race"] == race).sum()) for race in ("BLACK", "WHITE")}
+    cohort = eol_cohort[eol_cohort["race"].isin(RUN_TABLE1_RACES)].copy()
+    totals = {race: int((cohort["race"] == race).sum()) for race in RUN_TABLE1_RACES}
     rows: list[dict[str, object]] = []
 
-    metric_specs = [
-        ("Population Size", None, None),
-        ("Insurance Private", "insurance_group", "Private"),
-        ("Insurance Public", "insurance_group", "Public"),
-        ("Insurance Self-Pay", "insurance_group", "Self-Pay"),
-        ("Discharge Deceased", "discharge_category", "Deceased"),
-        ("Discharge Hospice", "discharge_category", "Hospice"),
-        ("Discharge Skilled Nursing Facility", "discharge_category", "Skilled Nursing Facility"),
-        ("Gender F", "gender", "F"),
-        ("Gender M", "gender", "M"),
-    ]
-    for metric, column, target_value in metric_specs:
-        for race in ("BLACK", "WHITE"):
+    for metric, column, target_value in RUN_TABLE1_COUNT_SPECS:
+        for race in RUN_TABLE1_RACES:
             race_frame = cohort[cohort["race"] == race]
             if column is None:
                 run_numeric = int(len(race_frame))
@@ -444,30 +328,18 @@ def build_paper_table1_comparison(eol_cohort: pd.DataFrame) -> pd.DataFrame:
             else:
                 run_numeric = int((race_frame[column] == target_value).sum())
                 run_display = _format_count_percent(run_numeric, totals[race])
-            paper_numeric = int(PAPER_TABLE1_COUNTS[metric][race])
-            if column is None:
-                paper_display = str(paper_numeric)
-            else:
-                paper_display = _format_count_percent(
-                    paper_numeric,
-                    PAPER_TABLE1_COUNTS["Population Size"][race],
-                )
             rows.append(
                 {
                     "metric": metric,
                     "race": race,
-                    "paper_value": paper_display,
                     "run_value": run_display,
-                    "paper_numeric": paper_numeric,
                     "run_numeric": run_numeric,
-                    "delta_numeric": int(run_numeric - paper_numeric),
                 }
             )
 
-    for metric, paper_values in PAPER_TABLE1_CONTINUOUS.items():
-        for race in ("BLACK", "WHITE"):
+    for metric, series_name in RUN_TABLE1_CONTINUOUS_SPECS.items():
+        for race in RUN_TABLE1_RACES:
             race_frame = cohort[cohort["race"] == race]
-            series_name = "los_days" if metric == "Length of stay (median days)" else "age"
             series = pd.to_numeric(race_frame[series_name], errors="coerce").dropna()
             if series.empty:
                 run_numeric = float("nan")
@@ -477,74 +349,39 @@ def build_paper_table1_comparison(eol_cohort: pd.DataFrame) -> pd.DataFrame:
                 run_numeric = float(series.median())
                 run_lower = float(series.quantile(0.25))
                 run_upper = float(series.quantile(0.75))
-            paper_numeric = float(paper_values[race]["center"])
-            paper_lower = float(paper_values[race]["lower"])
-            paper_upper = float(paper_values[race]["upper"])
             rows.append(
                 {
                     "metric": metric,
                     "race": race,
                     "summary_stat": "median_iqr",
-                    "paper_value": _format_continuous_summary(paper_numeric, paper_lower, paper_upper),
                     "run_value": _format_continuous_summary(run_numeric, run_lower, run_upper),
-                    "paper_numeric": paper_numeric,
                     "run_numeric": run_numeric,
-                    "paper_interval_lower": paper_lower,
-                    "paper_interval_upper": paper_upper,
                     "run_interval_lower": run_lower,
                     "run_interval_upper": run_upper,
-                    "delta_numeric": float(run_numeric - paper_numeric),
                 }
             )
 
     return pd.DataFrame(rows)
 
 
-def build_paper_table2_comparison(race_treatment_results: pd.DataFrame) -> pd.DataFrame:
-    """Compare run race-based treatment durations against Table 2 / Figure 2 from the paper."""
+def _build_run_table2_summary(race_treatment_results: pd.DataFrame) -> pd.DataFrame:
+    """Build run-only Table 2 treatment summaries."""
 
     if race_treatment_results.empty:
         return pd.DataFrame()
 
-    rows: list[dict[str, object]] = []
-    for _, row in race_treatment_results.iterrows():
-        treatment = row["treatment"]
-        if treatment not in PAPER_TABLE2_TREATMENT:
-            continue
-        paper = PAPER_TABLE2_TREATMENT[treatment]
-        run_median_black = float(row["median_black"])
-        run_median_white = float(row["median_white"])
-        run_pvalue = float(row["pvalue"])
-        rows.append(
-            {
-                "treatment": treatment,
-                "paper_n_black": int(paper["n_black"]),
-                "run_n_black": int(row["n_black"]),
-                "paper_n_white": int(paper["n_white"]),
-                "run_n_white": int(row["n_white"]),
-                "paper_median_black": float(paper["median_black"]),
-                "run_median_black": run_median_black,
-                "delta_median_black": run_median_black - float(paper["median_black"]),
-                "paper_median_white": float(paper["median_white"]),
-                "run_median_white": run_median_white,
-                "delta_median_white": run_median_white - float(paper["median_white"]),
-                "paper_pvalue": float(paper["pvalue"]),
-                "run_pvalue": run_pvalue,
-            }
-        )
-    return pd.DataFrame(rows)
+    return race_treatment_results[
+        ["treatment", "n_black", "n_white", "median_black", "median_white", "pvalue"]
+    ].copy()
 
 
-def build_paper_table3_comparison(feature_weight_summaries: dict[str, pd.DataFrame]) -> pd.DataFrame:
-    """Compare run proxy model top-3 feature weights against Table 3 from the paper."""
+def _build_run_table3_summary(
+    feature_weight_summaries: dict[str, pd.DataFrame | dict[str, pd.DataFrame]],
+) -> pd.DataFrame:
+    """Build run-only Table 3 top positive/negative proxy features."""
 
     rows: list[dict[str, object]] = []
     for model_name, weights_dict in feature_weight_summaries.items():
-        if model_name not in PAPER_TABLE3_WEIGHTS:
-            continue
-        paper_model = PAPER_TABLE3_WEIGHTS[model_name]
-
-        # weights_dict may be a dict with "all"/"positive"/"negative" keys
         if isinstance(weights_dict, dict):
             all_weights = weights_dict.get("all")
             if not isinstance(all_weights, pd.DataFrame) or all_weights.empty:
@@ -557,68 +394,16 @@ def build_paper_table3_comparison(feature_weight_summaries: dict[str, pd.DataFra
         if "weight" not in all_weights.columns or "feature" not in all_weights.columns:
             continue
 
-        # Build a lookup from lowercase feature name to weight
-        run_lookup = {
-            str(f).lower().strip(): float(w)
-            for f, w in zip(all_weights["feature"], all_weights["weight"])
-        }
-        alias_lookup = {
-            str(f).lower().strip(): str(f)
-            for f in all_weights["feature"]
-        }
-        model_aliases = PAPER_TABLE3_FEATURE_ALIASES.get(model_name, {})
-
-        for direction in ("positive", "negative"):
-            for rank, (paper_feature, paper_weight) in enumerate(
-                paper_model[direction], start=1
-            ):
-                normalized_paper_feature = paper_feature.lower().strip()
-                matched_feature = alias_lookup.get(normalized_paper_feature)
-                run_weight = run_lookup.get(normalized_paper_feature, float("nan"))
-                if pd.isna(run_weight):
-                    for alias in model_aliases.get(paper_feature, ()):
-                        normalized_alias = alias.lower().strip()
-                        alias_weight = run_lookup.get(normalized_alias, float("nan"))
-                        if not pd.isna(alias_weight):
-                            run_weight = alias_weight
-                            matched_feature = alias_lookup.get(normalized_alias, alias)
-                            break
-                rows.append(
-                    {
-                        "proxy_model": model_name,
-                        "direction": direction,
-                        "rank": int(rank),
-                        "paper_feature": paper_feature,
-                        "paper_weight": float(paper_weight),
-                        "run_feature": matched_feature,
-                        "run_weight": run_weight,
-                        "delta_weight": run_weight - float(paper_weight)
-                        if not pd.isna(run_weight)
-                        else float("nan"),
-                        "run_feature_found": not pd.isna(run_weight),
-                    }
-                )
-    return pd.DataFrame(rows)
-
-
-def build_paper_table3_snapshot(feature_weight_summaries: dict[str, pd.DataFrame]) -> pd.DataFrame:
-    """Capture the run's top positive/negative proxy weights for qualitative review."""
-
-    rows: list[dict[str, object]] = []
-    for model_name, weights_dict in feature_weight_summaries.items():
-        # Handle both dict-of-DataFrames and plain DataFrame inputs
-        if isinstance(weights_dict, dict):
-            working = weights_dict.get("all")
-            if not isinstance(working, pd.DataFrame) or working.empty:
-                continue
-        elif isinstance(weights_dict, pd.DataFrame):
-            working = weights_dict
-        else:
-            continue
-        if "weight" not in working.columns or "feature" not in working.columns:
-            continue
-        positive = working[working["weight"] > 0].sort_values("weight", ascending=False).head(3)
-        negative = working[working["weight"] < 0].sort_values("weight", ascending=True).head(3)
+        positive = (
+            all_weights[all_weights["weight"] > 0]
+            .sort_values("weight", ascending=False)
+            .head(3)
+        )
+        negative = (
+            all_weights[all_weights["weight"] < 0]
+            .sort_values("weight", ascending=True)
+            .head(3)
+        )
         for direction, frame in (("positive", positive), ("negative", negative)):
             for rank, row in enumerate(frame.itertuples(index=False), start=1):
                 rows.append(
@@ -633,98 +418,28 @@ def build_paper_table3_snapshot(feature_weight_summaries: dict[str, pd.DataFrame
     return pd.DataFrame(rows)
 
 
-def build_paper_table4_comparison(acuity_correlations: pd.DataFrame) -> pd.DataFrame:
-    """Compare run acuity/mistrust correlations against Table 4 from the paper."""
+def _build_run_table4_summary(acuity_correlations: pd.DataFrame) -> pd.DataFrame:
+    """Build run-only Table 4 correlation summaries."""
 
-    rows: list[dict[str, object]] = []
+    keyed_rows: dict[tuple[str, str], dict[str, object]] = {}
     for row in acuity_correlations.itertuples(index=False):
-        key = _canonical_pair(getattr(row, "feature_a"), getattr(row, "feature_b"))
-        if key not in PAPER_TABLE4_CORRELATIONS:
-            continue
-        paper_corr = float(PAPER_TABLE4_CORRELATIONS[key])
-        run_corr = float(getattr(row, "correlation"))
-        rows.append(
-            {
-                "feature_a": key[0],
-                "feature_b": key[1],
-                "paper_correlation": paper_corr,
-                "run_correlation": run_corr,
-                "delta_correlation": float(run_corr - paper_corr),
-            }
-        )
-    return pd.DataFrame(rows)
-
-
-def build_paper_table5_comparison(downstream_auc_results: pd.DataFrame) -> pd.DataFrame:
-    """Compare downstream AUCs against Table 5 from the paper."""
-
-    rows: list[dict[str, object]] = []
-    for row in downstream_auc_results.itertuples(index=False):
-        key = (getattr(row, "task"), getattr(row, "configuration"))
-        if key not in PAPER_TABLE5_AUC:
-            continue
-        paper = PAPER_TABLE5_AUC[key]
-        paper_mean = float(paper["auc_mean"])
-        paper_std = float(paper["auc_std"])
-        run_mean = float(getattr(row, "auc_mean"))
-        run_std = float(getattr(row, "auc_std"))
-        rows.append(
-            {
-                "task": key[0],
-                "configuration": key[1],
-                "paper_n_rows": int(paper["n_rows"]),
-                "run_n_rows": int(getattr(row, "n_rows")),
-                "paper_auc_mean": paper_mean,
-                "run_auc_mean": run_mean,
-                "delta_auc_mean": float(run_mean - paper_mean),
-                "paper_auc_std": paper_std,
-                "run_auc_std": run_std,
-                "delta_auc_std": float(run_std - paper_std),
-                "n_valid_auc": int(getattr(row, "n_valid_auc")),
-            }
-        )
-    return pd.DataFrame(rows)
-
-
-def build_paper_table6_comparison(downstream_weight_results: pd.DataFrame) -> pd.DataFrame:
-    """Compare Baseline + ALL downstream average weights against Table 6 from the paper."""
+        feature_a = str(getattr(row, "feature_a"))
+        feature_b = str(getattr(row, "feature_b"))
+        key = _canonical_pair(feature_a, feature_b)
+        keyed_rows[key] = {
+            "feature_a": key[0],
+            "feature_b": key[1],
+            "correlation": float(getattr(row, "correlation")),
+        }
+    return pd.DataFrame(keyed_rows.values())
 
-    if downstream_weight_results.empty:
-        return pd.DataFrame()
 
-    working = downstream_weight_results.copy()
-    working = working[working["configuration"] == "Baseline + ALL"].copy()
-    if working.empty:
-        return pd.DataFrame()
-    working["paper_feature"] = working["feature"].map(TABLE6_FEATURE_NAME_MAP)
-    working = working[working["paper_feature"].notna()].copy()
+def _build_run_table5_summary(downstream_auc_results: pd.DataFrame) -> pd.DataFrame:
+    """Build run-only Table 5 downstream AUC summaries."""
 
-    rows: list[dict[str, object]] = []
-    for row in working.itertuples(index=False):
-        task_name = getattr(row, "task")
-        feature_name = getattr(row, "paper_feature")
-        if task_name not in PAPER_TABLE6_WEIGHTS:
-            continue
-        if feature_name not in PAPER_TABLE6_WEIGHTS[task_name]:
-            continue
-        paper_mean, paper_std = PAPER_TABLE6_WEIGHTS[task_name][feature_name]
-        run_mean = float(getattr(row, "weight_mean"))
-        run_std = float(getattr(row, "weight_std"))
-        # Paper Table 6 reports 1.96*std (95% CI half-width), not raw std
-        run_std_ci = run_std * 1.96
-        rows.append(
-            {
-                "task": task_name,
-                "feature": feature_name,
-                "paper_weight_mean": float(paper_mean),
-                "run_weight_mean": run_mean,
-                "delta_weight_mean": float(run_mean - float(paper_mean)),
-                "paper_weight_std": float(paper_std),
-                "run_weight_std": run_std_ci,
-                "n_valid_weights": int(getattr(row, "n_valid_weights")),
-            }
-        )
-    return pd.DataFrame(rows)
+    return downstream_auc_results[
+        ["task", "configuration", "n_rows", "auc_mean", "auc_std", "n_valid_auc"]
+    ].copy()
 
 
 def _ensure_downstream_weight_results(
@@ -732,9 +447,17 @@ def _ensure_downstream_weight_results(
     *,
     repetitions: int,
 ) -> pd.DataFrame:
+    validation_summary = artifacts.get("validation_summary", {})
+    autopsy_proxy_enabled = True
+    if isinstance(validation_summary, dict):
+        autopsy_proxy_enabled = bool(
+            validation_summary.get("autopsy_proxy_enabled", True)
+        )
     existing = artifacts.get("downstream_weight_results")
     if isinstance(existing, pd.DataFrame) and not existing.empty:
         return existing
+    if isinstance(existing, pd.DataFrame) and not autopsy_proxy_enabled:
+        return existing
     final_model_table = artifacts.get("final_model_table")
     if not isinstance(final_model_table, pd.DataFrame) or final_model_table.empty:
         return pd.DataFrame()
@@ -742,10 +465,47 @@ def _ensure_downstream_weight_results(
         final_model_table=final_model_table,
         repetitions=repetitions,
     )
+    if not autopsy_proxy_enabled and "feature" in computed.columns:
+        computed = computed.loc[
+            computed["feature"] != "autopsy_score_z"
+        ].reset_index(drop=True)
     artifacts["downstream_weight_results"] = computed
     return computed
 
 
+def _build_run_table6_summary(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+    *,
+    repetitions: int,
+    autopsy_proxy_enabled: bool,
+) -> pd.DataFrame:
+    """Build run-only Table 6 downstream weight summaries."""
+
+    table6_source = _ensure_downstream_weight_results(artifacts, repetitions=repetitions)
+    if not isinstance(table6_source, pd.DataFrame) or table6_source.empty:
+        return pd.DataFrame()
+    required = {"task", "configuration", "feature", "weight_mean", "weight_std"}
+    if not required.issubset(table6_source.columns):
+        return pd.DataFrame()
+    work = table6_source.loc[
+        table6_source["configuration"] == "Baseline + ALL"
+    ].copy()
+    work["feature"] = work["feature"].map(RUN_TABLE6_FEATURE_NAME_MAP)
+    work = work.loc[work["feature"].notna()].copy()
+    if not autopsy_proxy_enabled:
+        work = work.loc[work["feature"] != "autopsy"].copy()
+    if work.empty:
+        return pd.DataFrame()
+    return work.rename(
+        columns={
+            "weight_mean": "run_weight_mean",
+            "weight_std": "run_weight_std",
+        }
+    )[
+        ["task", "feature", "run_weight_mean", "run_weight_std"]
+    ].reset_index(drop=True)
+
+
 def _render_run_table_summary(
     artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
     *,
@@ -766,7 +526,7 @@ def _render_run_table_summary(
 
     eol_cohort = artifacts.get("eol_cohort")
     table1 = (
-        build_paper_table1_comparison(eol_cohort)
+        _build_run_table1_summary(eol_cohort)
         if _has_columns(
             eol_cohort,
             {"race", "insurance_group", "discharge_category", "gender", "los_days", "age"},
@@ -775,7 +535,7 @@ def _render_run_table_summary(
     )
     race_treatment = artifacts.get("race_treatment_results")
     table2 = (
-        build_paper_table2_comparison(race_treatment)
+        _build_run_table2_summary(race_treatment)
         if _has_columns(
             race_treatment,
             {"treatment", "n_black", "n_white", "median_black", "median_white", "pvalue"},
@@ -783,64 +543,104 @@ def _render_run_table_summary(
         and not race_treatment.empty
         else pd.DataFrame()
     )
-    table3 = build_paper_table3_snapshot(feature_weight_summaries)
+    table3 = _build_run_table3_summary(feature_weight_summaries)
     if not autopsy_proxy_enabled and not table3.empty and "proxy_model" in table3.columns:
         table3 = table3.loc[table3["proxy_model"] != "autopsy"].reset_index(drop=True)
     acuity_correlations = artifacts.get("acuity_correlations")
     table4 = (
-        build_paper_table4_comparison(acuity_correlations)
+        _build_run_table4_summary(acuity_correlations)
         if _has_columns(acuity_correlations, {"feature_a", "feature_b", "correlation"})
         else pd.DataFrame()
     )
+    if not autopsy_proxy_enabled and not table4.empty:
+        table4 = table4.loc[
+            (table4["feature_a"] != "autopsy_score_z")
+            & (table4["feature_b"] != "autopsy_score_z")
+        ].reset_index(drop=True)
     downstream_auc_results = artifacts.get("downstream_auc_results")
     table5 = (
-        build_paper_table5_comparison(downstream_auc_results)
+        _build_run_table5_summary(downstream_auc_results)
         if _has_columns(
             downstream_auc_results,
-            {"task", "configuration", "n_rows", "auc_mean", "auc_std"},
+            {"task", "configuration", "n_rows", "auc_mean", "auc_std", "n_valid_auc"},
         )
         else pd.DataFrame()
     )
-    table6_source = _ensure_downstream_weight_results(artifacts, repetitions=repetitions)
-    table6 = build_paper_table6_comparison(table6_source)
-    if not autopsy_proxy_enabled and not table6.empty and "feature" in table6.columns:
-        table6 = table6.loc[table6["feature"] != "autopsy"].reset_index(drop=True)
+    if not autopsy_proxy_enabled and not table5.empty:
+        table5 = table5.loc[
+            table5["configuration"] != "Baseline + Autopsy"
+        ].reset_index(drop=True)
+    table6 = _build_run_table6_summary(
+        artifacts,
+        repetitions=repetitions,
+        autopsy_proxy_enabled=autopsy_proxy_enabled,
+    )
 
     lines = [
         "Run Table Results",
+        f"Route: {'Paper-like' if dataset_prepare_mode == 'paper_like' else 'Normal' if dataset_prepare_mode == 'default' else dataset_prepare_mode}",
         f"dataset_prepare_mode: {dataset_prepare_mode}",
         f"autopsy_proxy_enabled: {autopsy_proxy_enabled}",
+        f"repetitions: {repetitions}",
         "",
     ]
 
     if not table1.empty:
         lines.append("Table 1")
-        for row in table1.itertuples(index=False):
-            lines.append(f"- {row.metric}")
-            lines.append(f"  {row.race}: {row.run_value}")
+        table1_metric_order = _ordered_present_values(
+            [metric for metric, _, _ in RUN_TABLE1_COUNT_SPECS]
+            + list(RUN_TABLE1_CONTINUOUS_SPECS.keys()),
+            table1["metric"].drop_duplicates().astype(str).tolist(),
+        )
+        for metric in table1_metric_order:
+            metric_rows = table1.loc[table1["metric"] == metric]
+            if metric_rows.empty:
+                continue
+            lines.append(f"- {metric}")
+            for race in RUN_TABLE1_RACES:
+                race_rows = metric_rows.loc[metric_rows["race"] == race]
+                if race_rows.empty:
+                    continue
+                lines.append(f"  {race}: {race_rows.iloc[0]['run_value']}")
         lines.append("")
 
     if not table2.empty:
         lines.append("Table 2")
-        for row in table2.itertuples(index=False):
+        table2_by_treatment = {
+            str(row["treatment"]): row for _, row in table2.iterrows()
+        }
+        treatment_order = _ordered_present_values(
+            RUN_TABLE2_TREATMENT_ORDER,
+            list(table2_by_treatment.keys()),
+        )
+        for treatment in treatment_order:
+            row = table2_by_treatment.get(treatment)
+            if row is None:
+                continue
             lines.append(f"- {row.treatment}")
             lines.append(
-                f"  BLACK: n={int(row.run_n_black)}, median={float(row.run_median_black):.1f}"
+                f"  BLACK: n={int(row.n_black)}, median={float(row.median_black):.1f}"
             )
             lines.append(
-                f"  WHITE: n={int(row.run_n_white)}, median={float(row.run_median_white):.1f}"
+                f"  WHITE: n={int(row.n_white)}, median={float(row.median_white):.1f}"
             )
-            if not pd.isna(row.run_pvalue):
-                lines.append(f"  pvalue: {float(row.run_pvalue)}")
+            if not pd.isna(row.pvalue):
+                lines.append(f"  pvalue: {float(row.pvalue)}")
         lines.append("")
 
     if not table3.empty:
         lines.append("Table 3")
-        for proxy_model in table3["proxy_model"].drop_duplicates().tolist():
+        proxy_order = _ordered_present_values(
+            RUN_TABLE3_PROXY_ORDER,
+            table3["proxy_model"].drop_duplicates().astype(str).tolist(),
+        )
+        for proxy_model in proxy_order:
             lines.append(f"- {proxy_model}")
             proxy_rows = table3.loc[table3["proxy_model"] == proxy_model]
             for direction in ("positive", "negative"):
-                direction_rows = proxy_rows.loc[proxy_rows["direction"] == direction]
+                direction_rows = proxy_rows.loc[
+                    proxy_rows["direction"] == direction
+                ].sort_values("rank")
                 if direction_rows.empty:
                     continue
                 lines.append(f"  {direction}:")
@@ -852,27 +652,79 @@ def _render_run_table_summary(
 
     if not table4.empty:
         lines.append("Table 4")
-        for row in table4.itertuples(index=False):
+        table4_keyed = {
+            _canonical_pair(row["feature_a"], row["feature_b"]): row
+            for _, row in table4.iterrows()
+        }
+        feature_rank = {
+            feature_name: index
+            for index, feature_name in enumerate(RUN_TABLE4_FEATURE_ORDER)
+        }
+        for key in sorted(
+            table4_keyed.keys(),
+            key=lambda pair: (
+                feature_rank.get(pair[0], len(feature_rank)),
+                feature_rank.get(pair[1], len(feature_rank)),
+                pair[0],
+                pair[1],
+            ),
+        ):
+            row = table4_keyed.get(key)
+            if row is None:
+                continue
             lines.append(
-                f"- {row.feature_a} vs {row.feature_b}: {float(row.run_correlation):.3f}"
+                f"- {row.feature_a} vs {row.feature_b}: {float(row.correlation):.3f}"
             )
         lines.append("")
 
     if not table5.empty:
         lines.append("Table 5")
-        for row in table5.itertuples(index=False):
-            lines.append(f"- {row.task} | {row.configuration}")
-            lines.append(f"  n_rows: {int(row.run_n_rows)}")
-            lines.append(f"  auc_mean: {float(row.run_auc_mean):.3f}")
-            lines.append(f"  auc_std: {float(row.run_auc_std):.3f}")
+        table5_keyed = {
+            (str(row["task"]), str(row["configuration"])): row
+            for _, row in table5.iterrows()
+        }
+        task_order = _ordered_present_values(
+            RUN_TABLE5_TASK_ORDER,
+            table5["task"].drop_duplicates().astype(str).tolist(),
+        )
+        for task_name in task_order:
+            present_configs = table5.loc[
+                table5["task"] == task_name, "configuration"
+            ].astype(str).tolist()
+            config_order = _ordered_present_values(
+                RUN_TABLE5_CONFIGURATION_ORDER,
+                present_configs,
+            )
+            for configuration in config_order:
+                row = table5_keyed.get((task_name, configuration))
+                if row is None:
+                    continue
+                lines.append(f"- {row.task} | {row.configuration}")
+                lines.append(f"  n_rows: {int(row.n_rows)}")
+                lines.append(f"  auc_mean: {float(row.auc_mean):.3f}")
+                lines.append(f"  auc_std: {float(row.auc_std):.3f}")
         lines.append("")
 
     if not table6.empty:
         lines.append("Table 6")
-        for task_name in table6["task"].drop_duplicates().tolist():
+        table6_task_order = _ordered_present_values(
+            RUN_TABLE6_TASK_ORDER,
+            table6["task"].drop_duplicates().astype(str).tolist(),
+        )
+        for task_name in table6_task_order:
             lines.append(f"- {task_name}")
             task_rows = table6.loc[table6["task"] == task_name]
-            for row in task_rows.itertuples(index=False):
+            task_row_lookup = {
+                str(row["feature"]): row for _, row in task_rows.iterrows()
+            }
+            feature_order = _ordered_present_values(
+                RUN_TABLE6_FEATURE_ORDER,
+                list(task_row_lookup.keys()),
+            )
+            for feature_name in feature_order:
+                row = task_row_lookup.get(feature_name)
+                if row is None:
+                    continue
                 lines.append(
                     f"  {row.feature}: mean={float(row.run_weight_mean):.3f}, std={float(row.run_weight_std):.3f}"
                 )
@@ -896,209 +748,6 @@ def write_run_table_summary_artifacts(
     )
 
 
-def build_paper_comparison_outputs(
-    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
-    *,
-    repetitions: int,
-) -> dict[str, pd.DataFrame | dict[str, object]]:
-    """Build paper-aligned comparison tables from an example run."""
-
-    validation_summary = artifacts.get("validation_summary", {})
-    autopsy_proxy_enabled = True
-    if isinstance(validation_summary, dict):
-        autopsy_proxy_enabled = bool(validation_summary.get("autopsy_proxy_enabled", True))
-
-    feature_weight_summaries = artifacts.get("feature_weight_summaries", {})
-    if not isinstance(feature_weight_summaries, dict):
-        feature_weight_summaries = {}
-
-    table1 = build_paper_table1_comparison(artifacts["eol_cohort"]) if isinstance(artifacts.get("eol_cohort"), pd.DataFrame) else pd.DataFrame()
-    race_treatment = artifacts.get("race_treatment_results")
-    table2 = build_paper_table2_comparison(race_treatment) if isinstance(race_treatment, pd.DataFrame) and not race_treatment.empty else pd.DataFrame()
-    table3_comparison = build_paper_table3_comparison(feature_weight_summaries)
-    table3_snapshot = build_paper_table3_snapshot(feature_weight_summaries)
-    table4 = build_paper_table4_comparison(artifacts["acuity_correlations"]) if isinstance(artifacts.get("acuity_correlations"), pd.DataFrame) else pd.DataFrame()
-    table5 = build_paper_table5_comparison(artifacts["downstream_auc_results"]) if isinstance(artifacts.get("downstream_auc_results"), pd.DataFrame) else pd.DataFrame()
-    table6_source = _ensure_downstream_weight_results(artifacts, repetitions=repetitions)
-    table6 = build_paper_table6_comparison(table6_source)
-    if not autopsy_proxy_enabled and not table6.empty and "feature" in table6.columns:
-        table6 = table6.loc[table6["feature"] != "autopsy"].reset_index(drop=True)
-
-    summary = {
-        "paper_url": PAPER_URL,
-        "paper_pdf_url": PAPER_PDF_URL,
-        "table1_rows": int(len(table1)),
-        "table2_rows": int(len(table2)),
-        "table3_comparison_rows": int(len(table3_comparison)),
-        "table3_snapshot_rows": int(len(table3_snapshot)),
-        "table4_rows": int(len(table4)),
-        "table5_rows": int(len(table5)),
-        "table6_rows": int(len(table6)),
-        "table2_max_abs_delta_median": (
-            float(
-                max(
-                    table2["delta_median_black"].abs().max(),
-                    table2["delta_median_white"].abs().max(),
-                )
-            )
-            if not table2.empty
-            else None
-        ),
-        "table3_comparison_features_found": (
-            int(table3_comparison["run_feature_found"].sum())
-            if not table3_comparison.empty
-            else 0
-        ),
-        "table3_comparison_features_total": int(len(table3_comparison)),
-        "table3_comparison_max_abs_delta": (
-            float(table3_comparison["delta_weight"].dropna().abs().max())
-            if not table3_comparison.empty and table3_comparison["delta_weight"].notna().any()
-            else None
-        ),
-        "table4_max_abs_delta": (
-            float(table4["delta_correlation"].abs().max()) if not table4.empty else None
-        ),
-        "table5_max_abs_delta": (
-            float(table5["delta_auc_mean"].abs().max()) if not table5.empty else None
-        ),
-        "table6_max_abs_delta": (
-            float(table6["delta_weight_mean"].abs().max()) if not table6.empty else None
-        ),
-    }
-
-    return {
-        "summary": summary,
-        "table1_comparison": table1,
-        "table2_comparison": table2,
-        "table3_comparison": table3_comparison,
-        "table3_snapshot": table3_snapshot,
-        "table4_comparison": table4,
-        "table5_comparison": table5,
-        "table6_comparison": table6,
-    }
-
-
-def write_paper_comparison_artifacts(
-    comparison_outputs: dict[str, pd.DataFrame | dict[str, object]],
-    output_dir: Path,
-    *,
-    include_summary: bool = True,
-) -> None:
-    """Write paper comparison tables and summary next to the example deliverables."""
-
-    output_dir.mkdir(parents=True, exist_ok=True)
-    for name, artifact in comparison_outputs.items():
-        if isinstance(artifact, pd.DataFrame):
-            artifact.to_csv(output_dir / f"{name}.csv", index=False)
-        elif isinstance(artifact, dict):
-            (output_dir / f"{name}.json").write_text(json.dumps(artifact, indent=2))
-    if include_summary:
-        (output_dir / "paper_comparison_summary.txt").write_text(
-            _render_paper_comparison_summary(comparison_outputs) + "\n",
-            encoding="utf-8",
-        )
-
-
-def _render_paper_comparison_summary(
-    comparison_outputs: dict[str, pd.DataFrame | dict[str, object]],
-) -> str:
-    lines: list[str] = []
-
-    summary = comparison_outputs.get("summary", {})
-    if isinstance(summary, dict):
-        lines.append("Paper comparison summary:")
-        for key in (
-            "table1_rows",
-            "table2_rows",
-            "table3_snapshot_rows",
-            "table4_rows",
-            "table5_rows",
-            "table6_rows",
-            "table4_max_abs_delta",
-            "table5_max_abs_delta",
-            "table6_max_abs_delta",
-        ):
-            value = summary.get(key)
-            if value is not None:
-                lines.append(f"  {key}: {value}")
-
-    table1 = comparison_outputs.get("table1_comparison")
-    if isinstance(table1, pd.DataFrame) and not table1.empty:
-        lines.append("")
-        lines.append("Table 1 vs Paper:")
-        for row in table1.itertuples(index=False):
-            lines.append(f"  {row.metric} | {row.race} | paper={row.paper_value} | run={row.run_value}")
-
-    table2 = comparison_outputs.get("table2_comparison")
-    if isinstance(table2, pd.DataFrame) and not table2.empty:
-        lines.append("")
-        lines.append("Table 2 vs Paper:")
-        for row in table2.itertuples(index=False):
-            lines.append(
-                "  "
-                f"{row.treatment} | "
-                f"black n {int(row.paper_n_black)}->{int(row.run_n_black)}, median {row.paper_median_black:.1f}->{row.run_median_black:.1f} | "
-                f"white n {int(row.paper_n_white)}->{int(row.run_n_white)}, median {row.paper_median_white:.1f}->{row.run_median_white:.1f}"
-            )
-
-    table3 = comparison_outputs.get("table3_comparison")
-    if isinstance(table3, pd.DataFrame) and not table3.empty:
-        lines.append("")
-        lines.append("Table 3 vs Paper:")
-        for row in table3.itertuples(index=False):
-            run_weight = "missing" if pd.isna(row.run_weight) else f"{float(row.run_weight):.4f}"
-            lines.append(
-                "  "
-                f"{row.proxy_model} | {row.direction} #{int(row.rank)} | {row.paper_feature} | "
-                f"paper={float(row.paper_weight):.4f} | run={run_weight} | found={bool(row.run_feature_found)}"
-            )
-
-    table4 = comparison_outputs.get("table4_comparison")
-    if isinstance(table4, pd.DataFrame) and not table4.empty:
-        lines.append("")
-        lines.append("Table 4 vs Paper:")
-        for row in table4.itertuples(index=False):
-            lines.append(
-                "  "
-                f"{row.feature_a} vs {row.feature_b} | "
-                f"paper={float(row.paper_correlation):.3f} | run={float(row.run_correlation):.3f}"
-            )
-
-    table5 = comparison_outputs.get("table5_comparison")
-    if isinstance(table5, pd.DataFrame) and not table5.empty:
-        lines.append("")
-        lines.append("Table 5 vs Paper:")
-        for row in table5.itertuples(index=False):
-            lines.append(
-                "  "
-                f"{row.task} | {row.configuration} | "
-                f"n {int(row.paper_n_rows)}->{int(row.run_n_rows)} | "
-                f"auc {float(row.paper_auc_mean):.3f}->{float(row.run_auc_mean):.3f}"
-            )
-
-    table6 = comparison_outputs.get("table6_comparison")
-    if isinstance(table6, pd.DataFrame) and not table6.empty:
-        lines.append("")
-        lines.append("Table 6 vs Paper:")
-        for row in table6.itertuples(index=False):
-            lines.append(
-                "  "
-                f"{row.task} | {row.feature} | "
-                f"paper={float(row.paper_weight_mean):.3f} | run={float(row.run_weight_mean):.3f}"
-            )
-
-    return "\n".join(lines)
-
-
-def _print_paper_comparison_summary(
-    comparison_outputs: dict[str, pd.DataFrame | dict[str, object]],
-) -> None:
-    rendered = _render_paper_comparison_summary(comparison_outputs)
-    if rendered:
-        print()
-        print(rendered)
-
-
 def _log_stage(stage_start: float, pipeline_start: float, message: str) -> None:
     """Print a timing log line for a pipeline stage."""
     elapsed_stage = time.time() - stage_start
@@ -1147,14 +796,19 @@ def _build_managed_run_name(route_settings: _RouteSettings, timestamp: str) -> s
     return f"EOL_{_managed_run_route_label(route_settings)}_{timestamp}"
 
 
+def _build_ablation_run_name(timestamp: str) -> str:
+    """Return the managed run directory name for the route ablation study."""
+
+    return f"EOL_ablation_normal_vs_paperlike_{timestamp}"
+
+
 def _prepare_managed_run_directories(
     *,
     result_root: Path,
     route_settings: _RouteSettings,
     output_dir: Path | None,
-    stream_cache_dir: Path | None,
 ) -> dict[str, Path | str]:
-    """Create a managed run archive directory and resolve default output/cache paths."""
+    """Create a managed run archive directory and resolve the output path."""
 
     timestamp = _current_run_timestamp()
     base_name = _build_managed_run_name(route_settings, timestamp)
@@ -1168,16 +822,37 @@ def _prepare_managed_run_directories(
 
     run_dir.mkdir(parents=True, exist_ok=False)
     resolved_output_dir = output_dir if output_dir is not None else run_dir / "result"
-    resolved_stream_cache_dir = (
-        stream_cache_dir if stream_cache_dir is not None else run_dir / "cache"
-    )
     resolved_output_dir.mkdir(parents=True, exist_ok=True)
-    resolved_stream_cache_dir.mkdir(parents=True, exist_ok=True)
     return {
         "run_name": run_name,
         "run_dir": run_dir,
         "output_dir": resolved_output_dir,
-        "stream_cache_dir": resolved_stream_cache_dir,
+    }
+
+
+def _prepare_ablation_run_directories(
+    *,
+    result_root: Path,
+    output_dir: Path | None,
+) -> dict[str, Path | str]:
+    """Create a managed run archive directory for the normal-vs-paper-like study."""
+
+    timestamp = _current_run_timestamp()
+    base_name = _build_ablation_run_name(timestamp)
+    run_name = base_name
+    run_dir = output_dir if output_dir is not None else result_root / run_name
+    suffix = 1
+    while run_dir.exists():
+        run_name = f"{base_name}_{suffix:02d}"
+        run_dir = (
+            output_dir.parent / run_name if output_dir is not None else result_root / run_name
+        )
+        suffix += 1
+
+    run_dir.mkdir(parents=True, exist_ok=False)
+    return {
+        "run_name": run_name,
+        "run_dir": run_dir,
     }
 
 
@@ -1209,7 +884,6 @@ def _render_managed_run_summary(
     route_settings: _RouteSettings,
     args: argparse.Namespace,
     resolved_output_dir: Path,
-    resolved_stream_cache_dir: Path,
     started_at: datetime,
     finished_at: datetime,
     total_runtime_seconds: float,
@@ -1228,19 +902,9 @@ def _render_managed_run_summary(
         f"finished_at: {finished_at.isoformat(timespec='seconds')}",
         f"total_runtime_seconds: {total_runtime_seconds:.3f}",
         f"result_dir: {resolved_output_dir}",
-        f"stream_cache_base_dir: {resolved_stream_cache_dir}",
-        (
-            f"paper_comparison_summary_file: {run_dir / 'paper_comparison_summary.txt'}"
-            if args.compare_to_paper
-            else "paper_comparison_summary_file: disabled"
-        ),
         f"run_table_summary_file: {run_dir / 'run_table_summary.txt'}",
-        f"reuse_intermediates: {args.reuse_intermediates}",
-        f"compare_to_paper: {args.compare_to_paper}",
         f"paper_like_dataset_prepare: {args.paper_like_dataset_prepare}",
         f"repetitions: {args.repetitions}",
-        f"note_chunksize: {args.note_chunksize}",
-        f"chartevent_chunksize: {args.chartevent_chunksize}",
         f"command: {' '.join(sys.argv)}",
         "",
         "Validation summary:",
@@ -1265,7 +929,6 @@ def _write_managed_run_artifacts(
     route_settings: _RouteSettings,
     args: argparse.Namespace,
     resolved_output_dir: Path,
-    resolved_stream_cache_dir: Path,
     started_at: datetime,
     finished_at: datetime,
     total_runtime_seconds: float,
@@ -1279,26 +942,12 @@ def _write_managed_run_artifacts(
         route_settings=route_settings,
         args=args,
         resolved_output_dir=resolved_output_dir,
-        resolved_stream_cache_dir=resolved_stream_cache_dir,
         started_at=started_at,
         finished_at=finished_at,
         total_runtime_seconds=total_runtime_seconds,
         artifacts=artifacts,
     )
     (run_dir / "RUN_SUMMARY.txt").write_text(summary_text, encoding="utf-8")
-    (run_dir / "RUN_TIME.txt").write_text(
-        "\n".join(
-            [
-                "EOL run timing:",
-                f"managed_run_name: {run_name}",
-                f"started_at: {started_at.isoformat(timespec='seconds')}",
-                f"finished_at: {finished_at.isoformat(timespec='seconds')}",
-                f"total_runtime_seconds: {total_runtime_seconds:.3f}",
-            ]
-        )
-        + "\n",
-        encoding="utf-8",
-    )
 
     manifest = {
         "managed_run_name": run_name,
@@ -1309,15 +958,8 @@ def _write_managed_run_artifacts(
         "finished_at": finished_at.isoformat(timespec="seconds"),
         "total_runtime_seconds": round(float(total_runtime_seconds), 6),
         "result_dir": str(resolved_output_dir),
-        "stream_cache_base_dir": str(resolved_stream_cache_dir),
-        "reuse_intermediates": (
-            str(args.reuse_intermediates) if args.reuse_intermediates is not None else None
-        ),
-        "compare_to_paper": bool(args.compare_to_paper),
         "paper_like_dataset_prepare": bool(args.paper_like_dataset_prepare),
         "repetitions": int(args.repetitions),
-        "note_chunksize": int(args.note_chunksize),
-        "chartevent_chunksize": int(args.chartevent_chunksize),
         "command": sys.argv,
         "validation_summary": artifacts.get("validation_summary", {}),
         "core_artifact_shapes": _collect_core_artifact_shapes(artifacts),
@@ -1332,12 +974,238 @@ def _write_managed_run_artifacts(
         repetitions=int(args.repetitions),
     )
 
-    comparison_outputs = artifacts.get("paper_comparison")
-    if args.compare_to_paper and isinstance(comparison_outputs, dict):
-        (run_dir / "paper_comparison_summary.txt").write_text(
-            _render_paper_comparison_summary(comparison_outputs) + "\n",
-            encoding="utf-8",
+
+def _route_display_label(route_settings: _RouteSettings) -> str:
+    """Return the user-facing display name for a route."""
+
+    return "Paper-like" if route_settings.mode_name == "paper_like" else "Normal"
+
+
+def _namespace_from_args_like(args: argparse.Namespace | object) -> argparse.Namespace:
+    """Return an argparse.Namespace built from an argparse-like object."""
+
+    if isinstance(args, argparse.Namespace):
+        return argparse.Namespace(**vars(args))
+
+    attributes = {
+        name: getattr(args, name)
+        for name in dir(args)
+        if not name.startswith("_") and not callable(getattr(args, name))
+    }
+    return argparse.Namespace(**attributes)
+
+
+def _route_auc_summary_lines(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+) -> list[str]:
+    """Return a compact AUC summary for the route ablation appendix output."""
+
+    auc_results = artifacts.get("downstream_auc_results")
+    if not isinstance(auc_results, pd.DataFrame) or auc_results.empty:
+        return []
+    required = {"task", "configuration", "auc_mean", "auc_std"}
+    if not required.issubset(auc_results.columns):
+        return []
+
+    lines: list[str] = []
+    for task_name in RUN_TABLE5_TASK_ORDER:
+        row = auc_results.loc[
+            (auc_results["task"] == task_name)
+            & (auc_results["configuration"] == "Baseline + ALL")
+        ]
+        if row.empty:
+            continue
+        selected = row.iloc[0]
+        lines.append(
+            f"  {task_name} | Baseline + ALL: "
+            f"auc_mean: {float(selected['auc_mean']):.3f}, "
+            f"auc_std: {float(selected['auc_std']):.3f}"
         )
+    return lines
+
+
+def _route_has_autopsy_weight(
+    artifacts: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+) -> bool:
+    """Return whether the run includes an autopsy weight in Table 6 output."""
+
+    weights = artifacts.get("downstream_weight_results")
+    if not isinstance(weights, pd.DataFrame) or weights.empty:
+        return False
+    if "feature" not in weights.columns:
+        return False
+    return bool((weights["feature"] == "autopsy_score_z").any())
+
+
+def _render_ablation_summary(
+    *,
+    run_name: str,
+    run_dir: Path,
+    repetitions: int,
+    started_at: datetime,
+    finished_at: datetime,
+    total_runtime_seconds: float,
+    route_results: list[dict[str, object]],
+) -> str:
+    """Render a compact route-ablation summary for Normal vs Paper-like."""
+
+    lines = [
+        "Route Ablation Study",
+        f"managed_run_name: {run_name}",
+        f"managed_run_dir: {run_dir}",
+        "ablation_variable: route (Normal vs Paper-like)",
+        "ablation_focus: corrected default path without autopsy vs paper-like path with autopsy",
+        f"started_at: {started_at.isoformat(timespec='seconds')}",
+        f"finished_at: {finished_at.isoformat(timespec='seconds')}",
+        f"total_runtime_seconds: {total_runtime_seconds:.3f}",
+        f"repetitions: {repetitions}",
+        f"command: {' '.join(sys.argv)}",
+        "",
+    ]
+
+    for route_result in route_results:
+        route_settings = route_result["route_settings"]
+        artifacts = route_result["artifacts"]
+        route_run_dir = route_result["run_dir"]
+        route_output_dir = route_result["output_dir"]
+        validation_summary = artifacts.get("validation_summary", {})
+        final_model_table = artifacts.get("final_model_table")
+        final_model_rows = (
+            int(final_model_table.shape[0])
+            if isinstance(final_model_table, pd.DataFrame)
+            else 0
+        )
+        autopsy_proxy_enabled = False
+        if isinstance(validation_summary, dict):
+            autopsy_proxy_enabled = bool(
+                validation_summary.get("autopsy_proxy_enabled", False)
+            )
+
+        lines.extend(
+            [
+                f"{_route_display_label(route_settings)}:",
+                f"  route_mode: {route_settings.mode_name}",
+                f"  autopsy_proxy_enabled: {autopsy_proxy_enabled}",
+                f"  final_model_table_rows: {final_model_rows}",
+                f"  run_dir: {route_run_dir}",
+                f"  result_dir: {route_output_dir}",
+                f"  run_summary_file: {route_run_dir / 'RUN_SUMMARY.txt'}",
+                f"  run_table_summary_file: {route_run_dir / 'run_table_summary.txt'}",
+                f"  has_autopsy_weight: {_route_has_autopsy_weight(artifacts)}",
+            ]
+        )
+        lines.extend(_route_auc_summary_lines(artifacts))
+        lines.append("")
+
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def _run_single_managed_route(
+    *,
+    root: Path,
+    repetitions: int,
+    route_settings: _RouteSettings,
+    route_output_dir: Path,
+    route_run_dir: Path,
+    route_run_name: str,
+    args: argparse.Namespace,
+) -> dict[str, object]:
+    """Run one route and write the standard managed-run artifacts."""
+
+    route_run_dir.mkdir(parents=True, exist_ok=True)
+    route_output_dir.mkdir(parents=True, exist_ok=True)
+    started_at = datetime.now()
+    total_start = time.time()
+    artifacts = build_eol_mistrust_outputs(
+        root=root,
+        repetitions=repetitions,
+        output_dir=route_output_dir,
+        paper_like_dataset_prepare=(route_settings.mode_name == "paper_like"),
+    )
+    finished_at = datetime.now()
+    total_runtime_seconds = time.time() - total_start
+
+    route_args = _namespace_from_args_like(args)
+    route_args.paper_like_dataset_prepare = route_settings.mode_name == "paper_like"
+    _write_managed_run_artifacts(
+        run_name=route_run_name,
+        run_dir=route_run_dir,
+        route_settings=route_settings,
+        args=route_args,
+        resolved_output_dir=route_output_dir,
+        started_at=started_at,
+        finished_at=finished_at,
+        total_runtime_seconds=total_runtime_seconds,
+        artifacts=artifacts,
+    )
+    return {
+        "route_settings": route_settings,
+        "run_dir": route_run_dir,
+        "output_dir": route_output_dir,
+        "artifacts": artifacts,
+        "started_at": started_at,
+        "finished_at": finished_at,
+        "total_runtime_seconds": total_runtime_seconds,
+    }
+
+
+def _run_route_ablation_study(args: argparse.Namespace) -> None:
+    """Run the explicit Normal vs Paper-like route ablation study."""
+
+    if getattr(args, "task_demo", False) or getattr(args, "task_demo_train_eval", False):
+        raise ValueError(
+            "--ablation-study cannot be combined with --task-demo or "
+            "--task-demo-train-eval."
+        )
+
+    result_root = getattr(args, "result_root", DEFAULT_RESULT_ROOT)
+    ablation_run = _prepare_ablation_run_directories(
+        result_root=result_root,
+        output_dir=args.output_dir,
+    )
+    run_name = str(ablation_run["run_name"])
+    run_dir = Path(ablation_run["run_dir"])
+    started_at = datetime.now()
+    total_start = time.time()
+
+    normal_settings = _build_route_settings(False)
+    paperlike_settings = _build_route_settings(True)
+    route_results = [
+        _run_single_managed_route(
+            root=args.root,
+            repetitions=args.repetitions,
+            route_settings=normal_settings,
+            route_output_dir=run_dir / "normal" / "result",
+            route_run_dir=run_dir / "normal",
+            route_run_name=f"{run_name}_normal",
+            args=args,
+        ),
+        _run_single_managed_route(
+            root=args.root,
+            repetitions=args.repetitions,
+            route_settings=paperlike_settings,
+            route_output_dir=run_dir / "paper_like" / "result",
+            route_run_dir=run_dir / "paper_like",
+            route_run_name=f"{run_name}_paper_like",
+            args=args,
+        ),
+    ]
+    finished_at = datetime.now()
+    total_runtime_seconds = time.time() - total_start
+
+    summary_text = _render_ablation_summary(
+        run_name=run_name,
+        run_dir=run_dir,
+        repetitions=int(args.repetitions),
+        started_at=started_at,
+        finished_at=finished_at,
+        total_runtime_seconds=total_runtime_seconds,
+        route_results=route_results,
+    )
+    (run_dir / "ABLATION_SUMMARY.txt").write_text(summary_text, encoding="utf-8")
+
+    print(f"Managed route ablation archive: {run_dir}")
+    print(f"Wrote ablation summary to: {run_dir / 'ABLATION_SUMMARY.txt'}")
 
 
 def _build_route_settings(paper_like_dataset_prepare: bool) -> _RouteSettings:
@@ -1365,60 +1233,6 @@ def _build_route_settings(paper_like_dataset_prepare: bool) -> _RouteSettings:
     )
 
 
-def _resolve_stage_cache_dir(
-    *,
-    output_dir: Path | None,
-    stream_cache_dir: Path | None,
-    route_settings: _RouteSettings,
-) -> Path | None:
-    """Return the directory used for streamed-stage checkpoint CSVs."""
-
-    if stream_cache_dir is not None:
-        return Path(stream_cache_dir) / route_settings.mode_name
-    return output_dir
-
-
-def _has_reuse_cache_files(directory: Path) -> bool:
-    required = (
-        "note_corpus.csv",
-        "note_labels.csv",
-        "chartevent_feature_matrix.csv",
-        "code_status_targets.csv",
-    )
-    return all((directory / filename).exists() for filename in required)
-
-
-def _resolve_reuse_dir(
-    reuse_intermediates: Path | None,
-    *,
-    route_settings: _RouteSettings,
-) -> Path | None:
-    """Resolve the reuse directory, allowing a base cache dir with mode subfolders."""
-
-    if reuse_intermediates is None:
-        return None
-    direct = Path(reuse_intermediates)
-    if _has_reuse_cache_files(direct):
-        return direct
-    mode_dir = direct / route_settings.mode_name
-    if _has_reuse_cache_files(mode_dir):
-        return mode_dir
-    return direct
-
-
-def _write_stage_cache_frame(
-    output_dir: Path | None,
-    filename: str,
-    frame: pd.DataFrame,
-) -> None:
-    """Persist a reusable CSV artifact as soon as its stage completes."""
-
-    if output_dir is None:
-        return
-    output_dir.mkdir(parents=True, exist_ok=True)
-    frame.to_csv(output_dir / filename, index=False)
-
-
 def _disable_autopsy_scores(mistrust_scores: pd.DataFrame) -> pd.DataFrame:
     """Return a schema-stable score table with the autopsy proxy disabled."""
 
@@ -1542,25 +1356,15 @@ def _disable_autopsy_outputs(
     return adjusted
 
 
-def _build_or_reuse_mistrust_scores(
+def _build_mistrust_scores(
     *,
     model: object,
     feature_matrix: pd.DataFrame,
     note_labels: pd.DataFrame,
     note_corpus: pd.DataFrame,
-    reuse_dir: Path | None,
-    stage_cache_dir: Path | None,
     pipeline_start: float,
     autopsy_enabled: bool,
 ) -> pd.DataFrame:
-    cached_path = None if reuse_dir is None else reuse_dir / "mistrust_scores.csv"
-    if cached_path is not None and cached_path.exists():
-        t0 = time.time()
-        print(f"[REUSE] Loading mistrust_scores from {reuse_dir}", flush=True)
-        mistrust_scores = pd.read_csv(cached_path, low_memory=False)
-        _log_stage(t0, pipeline_start, f"Reused mistrust scores ({len(mistrust_scores)} rows)")
-        return mistrust_scores
-
     if hasattr(model, "estimator_factory") and hasattr(model, "sentiment_fn"):
         estimator_factory = getattr(model, "estimator_factory")
         sentiment_fn = getattr(model, "sentiment_fn")
@@ -1612,7 +1416,6 @@ def _build_or_reuse_mistrust_scores(
                 "negative_sentiment_score": "negative_sentiment_score_z",
             }
         ).reset_index(drop=True)
-        _write_stage_cache_frame(stage_cache_dir, "mistrust_scores.csv", mistrust_scores)
         _log_stage(t_total, pipeline_start, "Built mistrust scores (proxy models + sentiment)")
         return mistrust_scores
 
@@ -1622,41 +1425,19 @@ def _build_or_reuse_mistrust_scores(
         note_labels=note_labels,
         note_corpus=note_corpus,
     )
-    _write_stage_cache_frame(stage_cache_dir, "mistrust_scores.csv", mistrust_scores)
     _log_stage(t0, pipeline_start, "Built mistrust scores (proxy models + sentiment)")
     return mistrust_scores
 
 
-def _build_or_reuse_note_artifacts(
+def _build_note_artifacts(
     *,
     noteevents_csv_path: Path,
     all_cohort: pd.DataFrame,
-    reuse_dir: Path | None,
-    stage_cache_dir: Path | None,
     route_settings: _RouteSettings,
     note_chunksize: int,
     pipeline_start: float,
 ) -> tuple[pd.DataFrame, pd.DataFrame, list[int], pd.DataFrame]:
-    can_reuse = (
-        reuse_dir is not None
-        and (reuse_dir / "note_corpus.csv").exists()
-        and (reuse_dir / "note_labels.csv").exists()
-    )
-
     t0 = time.time()
-    if can_reuse:
-        print(f"[REUSE] Loading note_corpus & note_labels from {reuse_dir}", flush=True)
-        note_corpus = pd.read_csv(reuse_dir / "note_corpus.csv", low_memory=False)
-        note_labels = pd.read_csv(reuse_dir / "note_labels.csv", low_memory=False)
-        note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
-        filtered_all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
-        _log_stage(
-            t0,
-            pipeline_start,
-            f"Reused note artifacts ({len(note_corpus)} corpus rows, {len(note_labels)} label rows)",
-        )
-        return note_corpus, note_labels, note_present_hadm_ids, filtered_all_cohort
-
     note_corpus = build_note_corpus_from_csv(
         noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_cohort["hadm_id"],
@@ -1666,7 +1447,6 @@ def _build_or_reuse_note_artifacts(
     note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
     filtered_all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
     note_corpus = note_corpus.loc[note_corpus["hadm_id"].isin(note_present_hadm_ids)].copy()
-    _write_stage_cache_frame(stage_cache_dir, "note_corpus.csv", note_corpus)
     _log_stage(t0, pipeline_start, f"Streamed note corpus ({len(note_corpus)} rows)")
 
     t0 = time.time()
@@ -1676,40 +1456,20 @@ def _build_or_reuse_note_artifacts(
         autopsy_label_mode=route_settings.autopsy_label_mode,
         chunksize=note_chunksize,
     )
-    _write_stage_cache_frame(stage_cache_dir, "note_labels.csv", note_labels)
     _log_stage(t0, pipeline_start, f"Streamed note labels ({len(note_labels)} rows)")
     return note_corpus, note_labels, note_present_hadm_ids, filtered_all_cohort
 
 
-def _build_or_reuse_chartevent_artifacts(
+def _build_chartevent_artifacts(
     *,
     chartevents_csv_path: Path,
     d_items: pd.DataFrame,
     note_present_hadm_ids: list[int],
-    reuse_dir: Path | None,
-    stage_cache_dir: Path | None,
     route_settings: _RouteSettings,
     chartevent_chunksize: int,
     pipeline_start: float,
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
-    can_reuse = (
-        reuse_dir is not None
-        and (reuse_dir / "chartevent_feature_matrix.csv").exists()
-        and (reuse_dir / "code_status_targets.csv").exists()
-    )
-
     t0 = time.time()
-    if can_reuse:
-        print(f"[REUSE] Loading feature_matrix & code_status_targets from {reuse_dir}", flush=True)
-        feature_matrix = pd.read_csv(reuse_dir / "chartevent_feature_matrix.csv", low_memory=False)
-        code_status_targets = pd.read_csv(reuse_dir / "code_status_targets.csv", low_memory=False)
-        _log_stage(
-            t0,
-            pipeline_start,
-            f"Reused chartevent artifacts ({len(feature_matrix)} feature rows, {len(code_status_targets)} target rows)",
-        )
-        return feature_matrix, code_status_targets
-
     feature_matrix, code_status_targets = build_chartevent_artifacts_from_csv(
         chartevents_csv_path=chartevents_csv_path,
         d_items=d_items,
@@ -1718,8 +1478,6 @@ def _build_or_reuse_chartevent_artifacts(
         paper_like=route_settings.autopsy_enabled,
         code_status_mode=route_settings.code_status_mode,
     )
-    _write_stage_cache_frame(stage_cache_dir, "chartevent_feature_matrix.csv", feature_matrix)
-    _write_stage_cache_frame(stage_cache_dir, "code_status_targets.csv", code_status_targets)
     _log_stage(t0, pipeline_start, f"Streamed chartevents ({len(feature_matrix)} feature rows)")
     return feature_matrix, code_status_targets
 
@@ -1727,25 +1485,10 @@ def _build_or_reuse_chartevent_artifacts(
 def build_eol_mistrust_outputs(
     root: Path,
     repetitions: int = 100,
-    include_downstream_weight_summary: bool = False,
-    include_cdf_plot_data: bool = False,
-    compare_to_paper: bool = False,
     output_dir: Path | None = None,
-    stream_cache_dir: Path | None = None,
-    note_chunksize: int = 100_000,
-    chartevent_chunksize: int = 500_000,
-    reuse_intermediates: Path | None = None,
     paper_like_dataset_prepare: bool = False,
 ) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]]:
-    """Run the local end-to-end EOL mistrust workflow over downloaded CSV files.
-
-    When *reuse_intermediates* points to a previous output directory that
-    contains cached CSV artifacts (note_corpus, note_labels,
-    chartevent_feature_matrix, code_status_targets, optionally
-    mistrust_scores), the expensive CSV streaming stages are skipped and
-    those frames are loaded from disk instead. Everything downstream is
-    recomputed unless a reusable ``mistrust_scores.csv`` is also present.
-    """
+    """Run the local end-to-end EOL mistrust workflow over downloaded CSV files."""
 
     t_pipeline = time.time()
     route_settings = _build_route_settings(paper_like_dataset_prepare)
@@ -1765,17 +1508,6 @@ def build_eol_mistrust_outputs(
         "dataset_prepare_mode": route_settings.mode_name,
         "autopsy_proxy_enabled": route_settings.autopsy_enabled,
     }
-    _stage_cache_dir = _resolve_stage_cache_dir(
-        output_dir=output_dir,
-        stream_cache_dir=stream_cache_dir,
-        route_settings=route_settings,
-    )
-    _reuse_dir = _resolve_reuse_dir(
-        reuse_intermediates,
-        route_settings=route_settings,
-    )
-    if _stage_cache_dir is not None:
-        validation["stream_cache_dir"] = str(_stage_cache_dir)
 
     admissions = raw_tables["admissions"]
     patients = raw_tables["patients"]
@@ -1805,8 +1537,6 @@ def build_eol_mistrust_outputs(
         )
     validation["dataset_prepare_mode"] = route_settings.mode_name
     validation["autopsy_proxy_enabled"] = route_settings.autopsy_enabled
-    if _stage_cache_dir is not None:
-        validation["stream_cache_dir"] = str(_stage_cache_dir)
     _log_stage(t0, t_pipeline, "Validated database environment")
 
     # ------------------------------------------------------------------
@@ -1831,27 +1561,23 @@ def build_eol_mistrust_outputs(
     # ------------------------------------------------------------------
     # Stage 3: note corpus + note labels (SLOW — stream noteevents.csv)
     # ------------------------------------------------------------------
-    note_corpus, note_labels, note_present_hadm_ids, all_cohort = _build_or_reuse_note_artifacts(
+    note_corpus, note_labels, note_present_hadm_ids, all_cohort = _build_note_artifacts(
         noteevents_csv_path=noteevents_csv_path,
         all_cohort=all_cohort,
-        reuse_dir=_reuse_dir,
-        stage_cache_dir=_stage_cache_dir,
         route_settings=route_settings,
-        note_chunksize=note_chunksize,
+        note_chunksize=DEFAULT_NOTE_CHUNKSIZE,
         pipeline_start=t_pipeline,
     )
 
     # ------------------------------------------------------------------
     # Stage 4: chartevents feature matrix + code status (SLOW — stream chartevents.csv)
     # ------------------------------------------------------------------
-    feature_matrix, code_status_targets = _build_or_reuse_chartevent_artifacts(
+    feature_matrix, code_status_targets = _build_chartevent_artifacts(
         chartevents_csv_path=chartevents_csv_path,
         d_items=d_items,
         note_present_hadm_ids=note_present_hadm_ids,
-        reuse_dir=_reuse_dir,
-        stage_cache_dir=_stage_cache_dir,
         route_settings=route_settings,
-        chartevent_chunksize=chartevent_chunksize,
+        chartevent_chunksize=DEFAULT_CHARTEVENT_CHUNKSIZE,
         pipeline_start=t_pipeline,
     )
 
@@ -1866,19 +1592,16 @@ def build_eol_mistrust_outputs(
     # Stage 5: mistrust model + downstream evaluation (recomputed always)
     # ------------------------------------------------------------------
     model = EOLMistrustModel(repetitions=repetitions)
-    mistrust_scores = _build_or_reuse_mistrust_scores(
+    mistrust_scores = _build_mistrust_scores(
         model=model,
         feature_matrix=feature_matrix,
         note_labels=note_labels,
         note_corpus=note_corpus,
-        reuse_dir=_reuse_dir,
-        stage_cache_dir=_stage_cache_dir,
         pipeline_start=t_pipeline,
         autopsy_enabled=route_settings.autopsy_enabled,
     )
     if not route_settings.autopsy_enabled:
         mistrust_scores = _disable_autopsy_scores(mistrust_scores)
-        _write_stage_cache_frame(_stage_cache_dir, "mistrust_scores.csv", mistrust_scores)
 
     t0 = time.time()
     final_model_table = build_final_model_table_from_code_status_targets(
@@ -1904,8 +1627,8 @@ def build_eol_mistrust_outputs(
         treatment_totals=treatment_totals,
         acuity_scores=acuity_scores,
         final_model_table=final_model_table,
-        include_downstream_weight_summary=include_downstream_weight_summary,
-        include_cdf_plot_data=include_cdf_plot_data,
+        include_downstream_weight_summary=False,
+        include_cdf_plot_data=False,
         precomputed_mistrust_scores=mistrust_scores,
         score_columns=route_settings.score_columns,
         feature_configurations=route_settings.feature_configurations,
@@ -1947,56 +1670,129 @@ def build_eol_mistrust_outputs(
             },
             output_dir=output_dir,
         )
-        _log_stage(t0, t_pipeline, "Wrote deliverables + reuse cache to disk")
-
-    t0 = time.time()
-    comparison_outputs = build_paper_comparison_outputs(
-        artifacts,
-        repetitions=repetitions,
-    )
-    artifacts["paper_comparison"] = comparison_outputs
-    if output_dir is not None:
-        write_paper_comparison_artifacts(
-            comparison_outputs,
-            output_dir=output_dir / "paper_comparison",
-            include_summary=compare_to_paper,
-        )
-    _log_stage(t0, t_pipeline, "Built & wrote paper table artifacts")
+        _log_stage(t0, t_pipeline, "Wrote deliverables to disk")
 
     _log_stage(t_pipeline, t_pipeline, "=== Pipeline complete ===")
     return artifacts
 
 
-def run_task_demo(root: Path, config_path: Path) -> None:
+def run_task_demo(
+    root: Path,
+    config_path: Path,
+    dataset_prepare_mode: str = "default",
+    train_and_evaluate: bool = False,
+) -> None:
     """Build a PyHealth sample dataset with the custom EOL mistrust YAML config."""
 
-    global MIMIC3Dataset
+    global EOLMistrustClassifier
+    global EOLMistrustDataset
     global EOLMistrustMortalityPredictionMIMIC3
+    global get_dataloader
+    global split_by_patient
+    global Trainer
+
+    if train_and_evaluate and dataset_prepare_mode != "default":
+        raise ValueError(
+            "Native train/eval demo is only supported for the default normal path."
+        )
 
-    if MIMIC3Dataset is None:
-        from pyhealth.datasets import MIMIC3Dataset as _MIMIC3Dataset
+    if EOLMistrustDataset is None or get_dataloader is None or split_by_patient is None:
+        from pyhealth.datasets import (
+            EOLMistrustDataset as _EOLMistrustDataset,
+            get_dataloader as _get_dataloader,
+            split_by_patient as _split_by_patient,
+        )
+
+        EOLMistrustDataset = _EOLMistrustDataset
+        get_dataloader = _get_dataloader
+        split_by_patient = _split_by_patient
+    if EOLMistrustClassifier is None:
+        from pyhealth.models import EOLMistrustClassifier as _EOLMistrustClassifier
 
-        MIMIC3Dataset = _MIMIC3Dataset
+        EOLMistrustClassifier = _EOLMistrustClassifier
     if EOLMistrustMortalityPredictionMIMIC3 is None:
         from pyhealth.tasks.eol_mistrust import (
             EOLMistrustMortalityPredictionMIMIC3 as _EOLMistrustMortalityPredictionMIMIC3,
         )
 
         EOLMistrustMortalityPredictionMIMIC3 = _EOLMistrustMortalityPredictionMIMIC3
+    if train_and_evaluate and Trainer is None:
+        from pyhealth.trainer import Trainer as _Trainer
 
-    base_dataset = MIMIC3Dataset(
-        root=str(root),
-        tables=["chartevents", "noteevents", "d_items"],
-        dataset_name="eol_mistrust_mimic3",
-        config_path=str(config_path),
-        cache_dir=tempfile.mkdtemp(),
-        dev=True,
-    )
-    base_dataset.stats()
+        Trainer = _Trainer
 
-    task = EOLMistrustMortalityPredictionMIMIC3(include_notes=True)
-    sample_dataset = base_dataset.set_task(task, num_workers=1)
-    sample_dataset.stats()
+    def _close_unique_datasets(*datasets: object) -> None:
+        seen: set[int] = set()
+        for dataset in datasets:
+            if dataset is None:
+                continue
+            dataset_id = id(dataset)
+            if dataset_id in seen:
+                continue
+            seen.add(dataset_id)
+            close_fn = getattr(dataset, "close", None)
+            if callable(close_fn):
+                close_fn()
+
+    with tempfile.TemporaryDirectory() as cache_dir:
+        base_dataset = EOLMistrustDataset(
+            root=str(root),
+            tables=None,
+            dataset_name="eol_mistrust",
+            config_path=str(config_path),
+            cache_dir=cache_dir,
+            dev=True,
+            dataset_prepare_mode=dataset_prepare_mode,
+        )
+        base_dataset.stats()
+
+        task = EOLMistrustMortalityPredictionMIMIC3(
+            include_notes=True,
+            dataset_prepare_mode=dataset_prepare_mode,
+        )
+        sample_dataset = base_dataset.set_task(task, num_workers=1)
+        train_dataset = None
+        val_dataset = None
+        test_dataset = None
+        try:
+            model = EOLMistrustClassifier(dataset=sample_dataset)
+            if train_and_evaluate:
+                train_dataset, val_dataset, test_dataset = split_by_patient(
+                    sample_dataset,
+                    [0.6, 0.2, 0.2],
+                )
+                train_dataloader = get_dataloader(
+                    train_dataset, batch_size=32, shuffle=True
+                )
+                val_dataloader = get_dataloader(
+                    val_dataset, batch_size=32, shuffle=False
+                )
+                test_dataloader = get_dataloader(
+                    test_dataset, batch_size=32, shuffle=False
+                )
+                trainer = Trainer(
+                    model=model,
+                    metrics=["accuracy"],
+                    enable_logging=False,
+                )
+                trainer.train(
+                    train_dataloader=train_dataloader,
+                    val_dataloader=val_dataloader,
+                    test_dataloader=test_dataloader,
+                    epochs=1,
+                    monitor="accuracy",
+                    load_best_model_at_last=False,
+                )
+                scores = trainer.evaluate(test_dataloader)
+                print(f"Task demo evaluation scores: {scores}")
+            else:
+                batch = next(
+                    iter(get_dataloader(sample_dataset, batch_size=2, shuffle=False))
+                )
+                outputs = model(**batch)
+                print(f"Task demo forward keys: {sorted(outputs.keys())}")
+        finally:
+            _close_unique_datasets(sample_dataset, train_dataset, val_dataset, test_dataset)
 
 
 def parse_args() -> argparse.Namespace:
@@ -2023,18 +1819,6 @@ def parse_args() -> argparse.Namespace:
             "result_root/EOL_<route>_<timestamp>/result."
         ),
     )
-    parser.add_argument(
-        "--stream-cache-dir",
-        type=Path,
-        default=None,
-        help=(
-            "Optional base directory for streamed-stage reuse CSVs. "
-            "When set, note/chartevent checkpoints are written under "
-            "stream_cache_dir/{default|paper_like} as soon as each stage finishes. "
-            "When omitted, the script writes them under "
-            "result_root/EOL_<route>_<timestamp>/cache/{default|paper_like}."
-        ),
-    )
     parser.add_argument(
         "--result-root",
         type=Path,
@@ -2042,8 +1826,8 @@ def parse_args() -> argparse.Namespace:
         help=(
             "Managed run archive root. Each invocation creates "
             "result_root/EOL_(normal|Paperlike)_<timestamp> with run summaries, "
-            "runtime metadata, and default result/cache directories when explicit "
-            "paths are not provided."
+            "runtime metadata, and a default result directory when an explicit "
+            "output path is not provided."
         ),
     )
     parser.add_argument(
@@ -2053,22 +1837,11 @@ def parse_args() -> argparse.Namespace:
         help="Number of downstream 60/40 evaluation repetitions.",
     )
     parser.add_argument(
-        "--include-downstream-weight-summary",
-        action="store_true",
-        help="Also compute average downstream regularized weights across repetitions.",
-    )
-    parser.add_argument(
-        "--include-cdf-plot-data",
-        action="store_true",
-        help="Also build empirical CDF data for race-based and trust-based treatment plots.",
-    )
-    parser.add_argument(
-        "--compare-to-paper",
+        "--ablation-study",
         action="store_true",
         help=(
-            "Also write the human-readable paper comparison summary and print it. "
-            "Structured paper comparison CSV/JSON artifacts under output_dir/paper_comparison "
-            "are always generated."
+            "Run the explicit Normal vs Paper-like route ablation study and "
+            "write an ABLATION_SUMMARY.txt under a managed run directory."
         ),
     )
     parser.add_argument(
@@ -2077,28 +1850,11 @@ def parse_args() -> argparse.Namespace:
         help="Also build a PyHealth sample dataset with the custom EOL mistrust task.",
     )
     parser.add_argument(
-        "--note-chunksize",
-        type=int,
-        default=100_000,
-        help="Chunk size for streamed noteevents processing.",
-    )
-    parser.add_argument(
-        "--chartevent-chunksize",
-        type=int,
-        default=500_000,
-        help="Chunk size for streamed chartevents processing.",
-    )
-    parser.add_argument(
-        "--reuse-intermediates",
-        type=Path,
-        default=None,
+        "--task-demo-train-eval",
+        action="store_true",
         help=(
-            "Path to a previous output directory containing cached CSV artifacts "
-            "(note_corpus.csv, note_labels.csv, chartevent_feature_matrix.csv, "
-            "code_status_targets.csv).  This may point either directly to the cache "
-            "directory or to a base stream-cache dir containing mode subfolders. "
-            "When set, the expensive CSV streaming stages are skipped and those "
-            "frames are loaded from disk instead."
+            "When used with --task-demo, run a one-epoch native PyHealth "
+            "train/evaluate demo on the default normal path."
         ),
     )
     parser.add_argument(
@@ -2114,16 +1870,18 @@ def parse_args() -> argparse.Namespace:
 
 def main() -> None:
     args = parse_args()
+    if getattr(args, "ablation_study", False):
+        _run_route_ablation_study(args)
+        return
+
     route_settings = _build_route_settings(args.paper_like_dataset_prepare)
     result_root = getattr(args, "result_root", DEFAULT_RESULT_ROOT)
     managed_run = _prepare_managed_run_directories(
         result_root=result_root,
         route_settings=route_settings,
         output_dir=args.output_dir,
-        stream_cache_dir=args.stream_cache_dir,
     )
     resolved_output_dir = managed_run["output_dir"]
-    resolved_stream_cache_dir = managed_run["stream_cache_dir"]
     run_dir = managed_run["run_dir"]
     run_name = managed_run["run_name"]
     started_at = datetime.now()
@@ -2132,14 +1890,7 @@ def main() -> None:
     artifacts = build_eol_mistrust_outputs(
         root=args.root,
         repetitions=args.repetitions,
-        include_downstream_weight_summary=args.include_downstream_weight_summary,
-        include_cdf_plot_data=args.include_cdf_plot_data,
-        compare_to_paper=args.compare_to_paper,
         output_dir=resolved_output_dir,
-        stream_cache_dir=resolved_stream_cache_dir,
-        note_chunksize=args.note_chunksize,
-        chartevent_chunksize=args.chartevent_chunksize,
-        reuse_intermediates=args.reuse_intermediates,
         paper_like_dataset_prepare=args.paper_like_dataset_prepare,
     )
     finished_at = datetime.now()
@@ -2150,7 +1901,6 @@ def main() -> None:
         route_settings=route_settings,
         args=args,
         resolved_output_dir=Path(resolved_output_dir),
-        resolved_stream_cache_dir=Path(resolved_stream_cache_dir),
         started_at=started_at,
         finished_at=finished_at,
         total_runtime_seconds=total_runtime_seconds,
@@ -2177,20 +1927,18 @@ def main() -> None:
     print()
     print(f"Managed run archive: {run_dir}")
     print(f"Wrote required deliverables to: {resolved_output_dir}")
-    print(f"Wrote paper comparison artifacts to: {resolved_output_dir / 'paper_comparison'}")
-    stream_cache_path = artifacts["validation_summary"].get("stream_cache_dir")
-    if stream_cache_path is not None:
-        print(f"Streamed-stage cache directory: {stream_cache_path}")
-
-    if args.compare_to_paper:
-        comparison_outputs = artifacts.get("paper_comparison")
-        if isinstance(comparison_outputs, dict):
-            _print_paper_comparison_summary(comparison_outputs)
 
-    if args.task_demo:
+    task_demo = getattr(args, "task_demo", False)
+    task_demo_train_eval = getattr(args, "task_demo_train_eval", False)
+    if task_demo or task_demo_train_eval:
         print()
         print("Running PyHealth task demo...")
-        run_task_demo(root=args.root, config_path=args.config_path)
+        run_task_demo(
+            root=args.root,
+            config_path=args.config_path,
+            dataset_prepare_mode=route_settings.mode_name,
+            train_and_evaluate=task_demo_train_eval,
+        )
 
 
 if __name__ == "__main__":
diff --git a/pyhealth/datasets/__init__.py b/pyhealth/datasets/__init__.py
index 54e77670c..232d6d532 100644
--- a/pyhealth/datasets/__init__.py
+++ b/pyhealth/datasets/__init__.py
@@ -55,6 +55,7 @@ def __init__(self, *args, **kwargs):
 from .dreamt import DREAMTDataset
 from .ehrshot import EHRShotDataset
 from .eicu import eICUDataset
+from .eol_mistrust_dataset import EOLMistrustDataset
 from .isruc import ISRUCDataset
 from .medical_transcriptions import MedicalTranscriptionsDataset
 from .mimic3 import MIMIC3Dataset
diff --git a/pyhealth/datasets/base_dataset.py b/pyhealth/datasets/base_dataset.py
index 0e4280aab..54f6efd91 100644
--- a/pyhealth/datasets/base_dataset.py
+++ b/pyhealth/datasets/base_dataset.py
@@ -509,12 +509,17 @@ def _event_transform(self, output_dir: Path) -> None:
             ) as cluster:
                 with DaskClient(cluster) as client:
                     if self.dev:
-                        logger.info("Dev mode enabled: limiting to 1000 patients")
-                        patients = df["patient_id"].unique().head(1000).tolist()
+                        logger.debug("Dev mode enabled: limiting to 1000 patients")
+                        patients = (
+                            df["patient_id"]
+                            .drop_duplicates()
+                            .compute()
+                            .tolist()[:1000]
+                        )
                         filter = df["patient_id"].isin(patients)
                         df = df[filter]
 
-                    logger.info(f"Caching event dataframe to {output_dir}...")
+                    logger.debug(f"Caching event dataframe to {output_dir}...")
                     collection = df.sort_values("patient_id").to_parquet(
                         output_dir,
                         write_index=False,
@@ -565,10 +570,10 @@ def global_event_df(self) -> pl.LazyFrame:
                         f"Incomplete parquet cache at {ret_path} (directory exists but contains no parquet files). Removing and rebuilding."
                     )
                     shutil.rmtree(ret_path)
-                logger.info(f"No cached event dataframe found. Creating: {ret_path}")
+                logger.debug(f"No cached event dataframe found. Creating: {ret_path}")
                 self._event_transform(ret_path)
             else:
-                logger.info(f"Found cached event dataframe: {ret_path}")
+                logger.debug(f"Found cached event dataframe: {ret_path}")
             self._global_event_df = ret_path
 
         return pl.scan_parquet(
diff --git a/pyhealth/datasets/configs/eol_mistrust.yaml b/pyhealth/datasets/configs/eol_mistrust.yaml
index 8872e010d..e17abbb60 100644
--- a/pyhealth/datasets/configs/eol_mistrust.yaml
+++ b/pyhealth/datasets/configs/eol_mistrust.yaml
@@ -48,6 +48,63 @@ tables:
       - "outtime"
       - "los"
 
+  diagnoses_icd:
+    file_path: "mimiciii_clinical/diagnoses_icd.csv"
+    patient_id: "subject_id"
+    join:
+      - file_path: "mimiciii_clinical/admissions.csv"
+        "on": "hadm_id"
+        how: "inner"
+        columns:
+          - "dischtime"
+    timestamp: "dischtime"
+    attributes:
+      - "hadm_id"
+      - "icd9_code"
+      - "seq_num"
+
+  prescriptions:
+    file_path: "mimiciii_clinical/prescriptions.csv"
+    patient_id: "subject_id"
+    timestamp: "startdate"
+    join:
+      - file_path: "mimiciii_clinical/admissions.csv"
+        "on": "hadm_id"
+        how: "inner"
+        columns:
+          - "dischtime"
+    attributes:
+      - "hadm_id"
+      - "drug"
+      - "drug_type"
+      - "drug_name_poe"
+      - "drug_name_generic"
+      - "formulary_drug_cd"
+      - "gsn"
+      - "ndc"
+      - "prod_strength"
+      - "dose_val_rx"
+      - "dose_unit_rx"
+      - "form_val_disp"
+      - "form_unit_disp"
+      - "route"
+      - "enddate"
+
+  procedures_icd:
+    file_path: "mimiciii_clinical/procedures_icd.csv"
+    patient_id: "subject_id"
+    join:
+      - file_path: "mimiciii_clinical/admissions.csv"
+        "on": "hadm_id"
+        how: "inner"
+        columns:
+          - "dischtime"
+    timestamp: "dischtime"
+    attributes:
+      - "hadm_id"
+      - "icd9_code"
+      - "seq_num"
+
   noteevents:
     file_path: "mimiciii_notes/noteevents.csv"
     patient_id: "subject_id"
diff --git a/pyhealth/datasets/eol_mistrust.py b/pyhealth/datasets/eol_mistrust.py
index ca8c4dc11..011a29b1c 100644
--- a/pyhealth/datasets/eol_mistrust.py
+++ b/pyhealth/datasets/eol_mistrust.py
@@ -2279,6 +2279,7 @@ def _assemble_final_model_table(
             "Final model table contains null subject_id values after admissions merge."
         )
     final["subject_id"] = pd.to_numeric(final["subject_id"], errors="raise").astype(int)
+    final = final.drop(columns=["subject_id"])
 
     final = final.sort_values("hadm_id").drop_duplicates("hadm_id")
     return final.reset_index(drop=True)
diff --git a/pyhealth/datasets/eol_mistrust_dataset.py b/pyhealth/datasets/eol_mistrust_dataset.py
new file mode 100644
index 000000000..4e5821ab5
--- /dev/null
+++ b/pyhealth/datasets/eol_mistrust_dataset.py
@@ -0,0 +1,194 @@
+"""Native BaseDataset entrypoint for the EOL mistrust cohort tables."""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import List, Optional
+
+import narwhals as pl
+
+from .base_dataset import BaseDataset
+from .configs import load_yaml_config
+
+logger = logging.getLogger(__name__)
+
+DATASET_PREPARE_MODE_DEFAULT = "default"
+DATASET_PREPARE_MODE_PAPER_LIKE = "paper_like"
+
+_ROUTE_SETTINGS = {
+    DATASET_PREPARE_MODE_DEFAULT: {
+        "paper_like_dataset_prepare": False,
+        "code_status_mode": "corrected",
+        "autopsy_label_mode": "corrected",
+    },
+    DATASET_PREPARE_MODE_PAPER_LIKE: {
+        "paper_like_dataset_prepare": True,
+        "code_status_mode": "paper_like",
+        "autopsy_label_mode": "paper_like",
+    },
+}
+
+
+class EOLMistrustDataset(BaseDataset):
+    """PyHealth dataset wrapper for the combined EOL mistrust CSV export tree.
+
+    This dataset provides a proper :class:`~pyhealth.datasets.BaseDataset`
+    entrypoint for the custom EOL mistrust replication tables stored under a
+    combined root such as::
+
+        root/
+            mimiciii_clinical/
+            mimiciii_notes/
+            mimiciii_derived/
+
+    The default table set favors the admission-level task pipeline and only
+    requires the core tables that are available in the managed workspace export.
+    Optional EHR context tables such as ``diagnoses_icd`` or ``prescriptions``
+    can be added via ``tables=[...]`` when they are present in the root.
+
+    Args:
+        root: Root directory containing the combined EOL mistrust export.
+        tables: Additional table names to load. The dataset always includes the
+            core ``patients``, ``admissions``, and ``icustays`` tables.
+        dataset_name: Optional dataset name override.
+        config_path: Optional YAML config path. Defaults to the bundled
+            ``eol_mistrust.yaml`` config.
+        **kwargs: Additional :class:`BaseDataset` keyword arguments.
+    """
+
+    CORE_TABLES = ["patients", "admissions", "icustays"]
+    DEFAULT_OPTIONAL_TABLES = [
+        "diagnoses_icd",
+        "procedures_icd",
+        "prescriptions",
+        "noteevents",
+        "d_items",
+        "chartevents",
+    ]
+
+    @staticmethod
+    def _normalize_dataset_prepare_mode(mode: str | None) -> str:
+        normalized = (
+            DATASET_PREPARE_MODE_DEFAULT
+            if mode is None
+            else str(mode).strip().lower()
+        )
+        if normalized not in _ROUTE_SETTINGS:
+            raise ValueError(
+                "dataset_prepare_mode must be one of "
+                f"{DATASET_PREPARE_MODE_DEFAULT!r} or "
+                f"{DATASET_PREPARE_MODE_PAPER_LIKE!r}"
+            )
+        return normalized
+
+    @staticmethod
+    def _path_variants(root: str, relative_path: str) -> list[Path]:
+        csv_path = Path(root) / relative_path
+        if csv_path.suffix == ".gz":
+            return [csv_path, csv_path.with_suffix("")]
+        return [csv_path, Path(f"{csv_path}.gz")]
+
+    @classmethod
+    def _table_assets_exist(cls, root: str, config, table_name: str) -> bool:
+        if table_name not in config.tables:
+            return False
+
+        table_cfg = config.tables[table_name]
+        required_paths = [table_cfg.file_path]
+        join_cfg = getattr(table_cfg, "join", None) or []
+        required_paths.extend(join.file_path for join in join_cfg)
+
+        for relative_path in required_paths:
+            if not any(
+                path.exists() for path in cls._path_variants(root, relative_path)
+            ):
+                return False
+        return True
+
+    @classmethod
+    def _discover_optional_tables(
+        cls,
+        root: str,
+        config_path: str,
+    ) -> list[str]:
+        config = load_yaml_config(config_path)
+        available_tables: list[str] = []
+        for table_name in cls.DEFAULT_OPTIONAL_TABLES:
+            if cls._table_assets_exist(root, config, table_name):
+                available_tables.append(table_name)
+        return available_tables
+
+    def __init__(
+        self,
+        root: str,
+        tables: Optional[List[str]] = None,
+        dataset_name: Optional[str] = None,
+        config_path: Optional[str] = None,
+        dataset_prepare_mode: str = DATASET_PREPARE_MODE_DEFAULT,
+        **kwargs,
+    ) -> None:
+        if config_path is None:
+            logger.info("No config path provided, using default EOL mistrust config")
+            config_path = str(
+                Path(__file__).parent / "configs" / "eol_mistrust.yaml"
+            )
+
+        self.dataset_prepare_mode = self._normalize_dataset_prepare_mode(
+            dataset_prepare_mode
+        )
+        route_settings = _ROUTE_SETTINGS[self.dataset_prepare_mode]
+        self.paper_like_dataset_prepare = bool(
+            route_settings["paper_like_dataset_prepare"]
+        )
+        self.code_status_mode = str(route_settings["code_status_mode"])
+        self.autopsy_label_mode = str(route_settings["autopsy_label_mode"])
+
+        if tables is None:
+            requested_tables = self._discover_optional_tables(root, config_path)
+        else:
+            requested_tables = list(tables)
+        resolved_tables: list[str] = []
+        for table_name in [*self.CORE_TABLES, *requested_tables]:
+            if table_name not in resolved_tables:
+                resolved_tables.append(table_name)
+
+        super().__init__(
+            root=root,
+            tables=resolved_tables,
+            dataset_name=dataset_name or "eol_mistrust",
+            config_path=config_path,
+            **kwargs,
+        )
+
+    def preprocess_noteevents(self, df: pl.LazyFrame) -> pl.LazyFrame:
+        """Fill missing note ``charttime`` values from ``chartdate``.
+
+        MIMIC-III note rows may omit ``charttime`` while still providing a
+        ``chartdate``. PyHealth requires a single timestamp column for event
+        ordering, so we backfill missing times with midnight of the chart date.
+
+        Args:
+            df: Lazy noteevents frame before BaseDataset event normalization.
+
+        Returns:
+            LazyFrame with a populated ``charttime`` column.
+        """
+        columns = set(df.collect_schema().names())
+        if "charttime" not in columns:
+            raise ValueError("noteevents must include charttime for EOLMistrustDataset")
+        if "chartdate" not in columns:
+            return df
+
+        return df.with_columns(
+            pl.when(pl.col("charttime").is_null())
+            .then(pl.col("chartdate") + pl.lit(" 00:00:00"))
+            .otherwise(pl.col("charttime"))
+            .alias("charttime")
+        )
+
+__all__ = [
+    "DATASET_PREPARE_MODE_DEFAULT",
+    "DATASET_PREPARE_MODE_PAPER_LIKE",
+    "EOLMistrustDataset",
+]
diff --git a/pyhealth/models/__init__.py b/pyhealth/models/__init__.py
index 5233b1726..184766503 100644
--- a/pyhealth/models/__init__.py
+++ b/pyhealth/models/__init__.py
@@ -38,6 +38,7 @@
 from .transformer import Transformer, TransformerLayer
 from .transformers_model import TransformersModel
 from .ehrmamba import EHRMamba, MambaBlock
+from .eol_mistrust_classifier import EOLMistrustClassifier
 from .vae import VAE
 from .vision_embedding import VisionEmbeddingModel
 from .text_embedding import TextEmbedding
diff --git a/pyhealth/models/eol_mistrust.py b/pyhealth/models/eol_mistrust.py
index ee971ce18..b1de9c155 100644
--- a/pyhealth/models/eol_mistrust.py
+++ b/pyhealth/models/eol_mistrust.py
@@ -259,8 +259,8 @@ def _default_estimator_factory() -> object:
         penalty="l1",
         C=DEFAULT_LOGISTIC_C,
         solver="liblinear",
-        max_iter=1000,
-        tol=0.001,
+        max_iter=100,
+        tol=0.01,
     )
 
 
@@ -270,8 +270,8 @@ def build_logistic_estimator_factory(
     class_weight: str | Mapping[int, float] | None = None,
     penalty: str = "l1",
     solver: str = "liblinear",
-    max_iter: int = 1000,
-    tol: float = 0.001,
+    max_iter: int = 100,
+    tol: float = 0.01,
 ) -> Callable[[], object]:
     """Return a reusable sklearn logistic-regression factory."""
 
diff --git a/pyhealth/models/eol_mistrust_classifier.py b/pyhealth/models/eol_mistrust_classifier.py
new file mode 100644
index 000000000..895b0c8e0
--- /dev/null
+++ b/pyhealth/models/eol_mistrust_classifier.py
@@ -0,0 +1,194 @@
+"""Native BaseModel entrypoint for EOL mistrust downstream tasks."""
+
+from __future__ import annotations
+
+import hashlib
+from typing import Dict, Sequence
+
+import torch
+import torch.nn as nn
+
+from pyhealth.datasets.sample_dataset import SampleDataset
+from pyhealth.models.base_model import BaseModel
+from pyhealth.processors import SequenceProcessor, TensorProcessor, TextProcessor
+
+
+def _stable_bucket_index(text: str, num_buckets: int) -> int:
+    """Map text to a stable non-zero embedding bucket."""
+
+    digest = hashlib.md5(text.encode("utf-8")).hexdigest()
+    return (int(digest, 16) % max(num_buckets - 1, 1)) + 1
+
+
+class EOLMistrustClassifier(BaseModel):
+    """Simple multimodal classifier for EOL mistrust task samples.
+
+    The model is designed for the task schema used by
+    ``pyhealth.tasks.eol_mistrust``:
+
+    - coded EHR history fields use learned sequence embeddings with mean pooling
+    - scalar numeric fields use linear projections
+    - text and categorical string fields use stable hashed token embeddings
+
+    Args:
+        dataset: SampleDataset returned by ``dataset.set_task(...)``.
+        embedding_dim: Shared feature embedding dimension.
+        hidden_dim: Hidden layer width before the output head.
+        dropout: Dropout applied to the pooled patient representation.
+        text_hash_buckets: Number of buckets for hashed text embeddings.
+    """
+
+    def __init__(
+        self,
+        dataset: SampleDataset,
+        embedding_dim: int = 32,
+        hidden_dim: int = 64,
+        dropout: float = 0.1,
+        text_hash_buckets: int = 2048,
+    ) -> None:
+        super().__init__(dataset)
+
+        if len(self.label_keys) != 1:
+            raise ValueError("EOLMistrustClassifier supports exactly one label key.")
+
+        self.label_key = self.label_keys[0]
+        self.embedding_dim = int(embedding_dim)
+        self.hidden_dim = int(hidden_dim)
+        self.text_hash_buckets = int(text_hash_buckets)
+
+        self.sequence_embeddings = nn.ModuleDict()
+        self.tensor_projections = nn.ModuleDict()
+        self.text_embeddings = nn.ModuleDict()
+
+        for feature_key, processor in self.dataset.input_processors.items():
+            if isinstance(processor, SequenceProcessor):
+                self.sequence_embeddings[feature_key] = nn.Embedding(
+                    num_embeddings=len(processor.code_vocab),
+                    embedding_dim=self.embedding_dim,
+                    padding_idx=0,
+                )
+            elif isinstance(processor, TensorProcessor):
+                self.tensor_projections[feature_key] = nn.Linear(
+                    self._infer_tensor_input_size(feature_key),
+                    self.embedding_dim,
+                )
+            elif isinstance(processor, TextProcessor):
+                self.text_embeddings[feature_key] = nn.Embedding(
+                    num_embeddings=self.text_hash_buckets + 1,
+                    embedding_dim=self.embedding_dim,
+                    padding_idx=0,
+                )
+            else:
+                raise TypeError(
+                    f"Unsupported processor for EOLMistrustClassifier: "
+                    f"{feature_key} -> {processor.__class__.__name__}"
+                )
+
+        total_modalities = (
+            len(self.sequence_embeddings)
+            + len(self.tensor_projections)
+            + len(self.text_embeddings)
+        )
+        representation_dim = total_modalities * self.embedding_dim
+        self.hidden_layer = nn.Linear(representation_dim, self.hidden_dim)
+        self.activation = nn.ReLU()
+        self.dropout = nn.Dropout(dropout)
+        self.output_layer = nn.Linear(self.hidden_dim, self.get_output_size())
+
+    def _infer_tensor_input_size(self, feature_key: str) -> int:
+        for index in range(len(self.dataset)):
+            if feature_key not in self.dataset[index]:
+                continue
+            value = self.dataset[index][feature_key]
+            if isinstance(value, torch.Tensor):
+                if value.dim() == 0:
+                    return 1
+                return int(value.shape[-1])
+            return 1
+        return 1
+
+    def _mean_pool_sequence(
+        self, values: torch.Tensor, feature_key: str
+    ) -> torch.Tensor:
+        if values.dim() == 1:
+            values = values.unsqueeze(0)
+        values = values.long().to(self.device)
+        embeddings = self.sequence_embeddings[feature_key](values)
+        mask = (values != 0).unsqueeze(-1)
+        denom = mask.sum(dim=1).clamp(min=1)
+        return (embeddings * mask).sum(dim=1) / denom
+
+    def _project_tensor(self, values: torch.Tensor, feature_key: str) -> torch.Tensor:
+        values = values.to(self.device).float()
+        if values.dim() == 0:
+            values = values.view(1, 1)
+        elif values.dim() == 1:
+            values = values.unsqueeze(-1)
+        return self.tensor_projections[feature_key](values)
+
+    def _embed_text_field(
+        self, values: Sequence[str], feature_key: str
+    ) -> torch.Tensor:
+        token_lists = []
+        max_len = 1
+        for raw_value in values:
+            normalized = str(raw_value or "").strip().lower()
+            tokens = normalized.split()
+            if not tokens:
+                tokens = ["<empty>"]
+            token_lists.append(tokens)
+            max_len = max(max_len, len(tokens))
+
+        index_rows = []
+        for tokens in token_lists:
+            row = [
+                _stable_bucket_index(token, self.text_hash_buckets) for token in tokens
+            ]
+            if len(row) < max_len:
+                row.extend([0] * (max_len - len(row)))
+            index_rows.append(row)
+
+        indices = torch.tensor(index_rows, dtype=torch.long, device=self.device)
+        embeddings = self.text_embeddings[feature_key](indices)
+        mask = (indices != 0).unsqueeze(-1)
+        denom = mask.sum(dim=1).clamp(min=1)
+        return (embeddings * mask).sum(dim=1) / denom
+
+    def forward(self, **kwargs) -> Dict[str, torch.Tensor]:
+        pooled_features = []
+
+        for feature_key in self.feature_keys:
+            value = kwargs[feature_key]
+            if feature_key in self.sequence_embeddings:
+                pooled_features.append(self._mean_pool_sequence(value, feature_key))
+            elif feature_key in self.tensor_projections:
+                pooled_features.append(self._project_tensor(value, feature_key))
+            elif feature_key in self.text_embeddings:
+                if isinstance(value, str):
+                    text_values = [value]
+                else:
+                    text_values = list(value)
+                pooled_features.append(self._embed_text_field(text_values, feature_key))
+            else:
+                raise KeyError(
+                    "Unexpected feature key for EOLMistrustClassifier: "
+                    f"{feature_key}"
+                )
+
+        patient_representation = torch.cat(pooled_features, dim=1)
+        hidden = self.activation(self.hidden_layer(patient_representation))
+        hidden = self.dropout(hidden)
+        logits = self.output_layer(hidden)
+
+        y_true = kwargs[self.label_key].to(self.device)
+        loss = self.get_loss_function()(logits, y_true)
+        y_prob = self.prepare_y_prob(logits)
+        return {
+            "loss": loss,
+            "y_prob": y_prob,
+            "y_true": y_true,
+            "logit": logits,
+        }
+
+
+__all__ = ["EOLMistrustClassifier"]
diff --git a/pyhealth/tasks/__init__.py b/pyhealth/tasks/__init__.py
index 797988377..0e2410172 100644
--- a/pyhealth/tasks/__init__.py
+++ b/pyhealth/tasks/__init__.py
@@ -21,6 +21,12 @@
     drug_recommendation_mimic4_fn,
     drug_recommendation_omop_fn,
 )
+from .eol_mistrust import (
+    EOLMistrustCodeStatusPredictionMIMIC3,
+    EOLMistrustDownstreamMIMIC3,
+    EOLMistrustLeftAMAPredictionMIMIC3,
+    EOLMistrustMortalityPredictionMIMIC3,
+)
 from .in_hospital_mortality_mimic4 import InHospitalMortalityMIMIC4
 from .length_of_stay_prediction import (
     LengthOfStayPredictioneICU,
diff --git a/pyhealth/tasks/eol_mistrust.py b/pyhealth/tasks/eol_mistrust.py
index 8ca6fbfe5..af84f7f3a 100644
--- a/pyhealth/tasks/eol_mistrust.py
+++ b/pyhealth/tasks/eol_mistrust.py
@@ -23,6 +23,8 @@
 CODE_STATUS_ITEMIDS = {128, 223758}
 CODE_STATUS_MODE_CORRECTED = "corrected"
 CODE_STATUS_MODE_PAPER_LIKE = "paper_like"
+DATASET_PREPARE_MODE_DEFAULT = "default"
+DATASET_PREPARE_MODE_PAPER_LIKE = "paper_like"
 
 CODE_STATUS_POSITIVE_SUBSTRINGS = (
     "dnr",
@@ -44,6 +46,17 @@
     ]
 )
 
+_DATASET_PREPARE_ROUTE_SETTINGS = {
+    DATASET_PREPARE_MODE_DEFAULT: {
+        "paper_like_dataset_prepare": False,
+        "code_status_mode": CODE_STATUS_MODE_CORRECTED,
+    },
+    DATASET_PREPARE_MODE_PAPER_LIKE: {
+        "paper_like_dataset_prepare": True,
+        "code_status_mode": CODE_STATUS_MODE_PAPER_LIKE,
+    },
+}
+
 
 def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
     missing = [column for column in required if column not in df.columns]
@@ -75,6 +88,19 @@ def _normalize_code_status_mode(mode: str | None) -> str:
     return normalized
 
 
+def _normalize_dataset_prepare_mode(mode: str | None) -> str:
+    normalized = (
+        DATASET_PREPARE_MODE_DEFAULT if mode is None else str(mode).strip().lower()
+    )
+    if normalized not in _DATASET_PREPARE_ROUTE_SETTINGS:
+        raise ValueError(
+            "dataset_prepare_mode must be one of "
+            f"{DATASET_PREPARE_MODE_DEFAULT!r} or "
+            f"{DATASET_PREPARE_MODE_PAPER_LIKE!r}"
+        )
+    return normalized
+
+
 def _calculate_age_years(admittime, dob) -> float:
     admit_time = _coerce_timestamp(admittime)
     birth_time = _coerce_timestamp(dob)
@@ -96,6 +122,14 @@ def _calculate_los_days(admittime, dischtime) -> float:
     return float((discharge_time - admit_time).total_seconds() / 86400.0)
 
 
+def _calculate_paper_like_los_days(admittime, dischtime) -> float:
+    admit_time = _coerce_timestamp(admittime)
+    discharge_time = _coerce_timestamp(dischtime)
+    if pd.isna(admit_time) or pd.isna(discharge_time):
+        return float("nan")
+    return float((discharge_time - admit_time).seconds / 3600.0)
+
+
 # ---------------------------------------------------------------------------
 # Normal Path
 # ---------------------------------------------------------------------------
@@ -302,19 +336,30 @@ class EOLMistrustDownstreamMIMIC3(BaseTask):
     task_name = "EOLMistrustDownstreamMIMIC3"
 
     def __init__(
-        self, target: str = "in_hospital_mortality", include_notes: bool = False
+        self,
+        target: str = "in_hospital_mortality",
+        include_notes: bool = False,
+        dataset_prepare_mode: str = DATASET_PREPARE_MODE_DEFAULT,
     ) -> None:
         if target not in set(EOL_MISTRUST_TASK_MAP.values()):
             raise ValueError(f"Unsupported EOL mistrust target: {target}")
 
         self.target = target
         self.include_notes = include_notes
+        self.dataset_prepare_mode = _normalize_dataset_prepare_mode(
+            dataset_prepare_mode
+        )
+        route_settings = _DATASET_PREPARE_ROUTE_SETTINGS[self.dataset_prepare_mode]
+        self.paper_like_dataset_prepare = bool(
+            route_settings["paper_like_dataset_prepare"]
+        )
+        self.code_status_mode = str(route_settings["code_status_mode"])
         self.input_schema: dict[str, str] = {
             "conditions": "sequence",
             "procedures": "sequence",
             "drugs": "sequence",
-            "age": "float",
-            "los_days": "float",
+            "age": "tensor",
+            "los_days": "tensor",
             "gender": "text",
             "insurance": "text",
             "race": "text",
@@ -326,9 +371,7 @@ def __init__(
     def _get_codes_for_admission(
         self, patient: Any, event_type: str, hadm_id
     ) -> list[str]:
-        events = patient.get_events(
-            event_type=event_type, filters=[("hadm_id", "==", hadm_id)]
-        )
+        events = self._get_events_for_admission(patient, event_type, hadm_id)
         values: list[str] = []
         for event in events:
             for attribute in ("icd9_code", "icd_code", "drug", "ndc"):
@@ -338,29 +381,39 @@ def _get_codes_for_admission(
                     break
         return values
 
+    def _get_events_for_admission(
+        self, patient: Any, event_type: str, hadm_id
+    ) -> list[Any]:
+        events = patient.get_events(event_type=event_type)
+        return [
+            event
+            for event in events
+            if getattr(event, "hadm_id", None) == hadm_id
+        ]
+
     def _get_note_text(self, patient: Any, hadm_id) -> str:
-        notes = patient.get_events(
-            event_type="noteevents", filters=[("hadm_id", "==", hadm_id)]
-        )
+        notes = self._get_events_for_admission(patient, "noteevents", hadm_id)
         return prepare_note_text(
             " ".join(str(getattr(note, "text", "")) for note in notes)
         )
 
     def _get_code_status_label(self, patient: Any, hadm_id) -> int:
-        events = patient.get_events(
-            event_type="chartevents", filters=[("hadm_id", "==", hadm_id)]
-        )
+        events = self._get_events_for_admission(patient, "chartevents", hadm_id)
         rows = [
             {
                 "hadm_id": getattr(event, "hadm_id", hadm_id),
                 "itemid": getattr(event, "itemid", None),
                 "value": getattr(event, "value", None),
+                "charttime": getattr(event, "charttime", None),
             }
             for event in events
         ]
         if not rows:
             return 0
-        target = build_code_status_target(pd.DataFrame(rows))
+        target = build_code_status_target(
+            pd.DataFrame(rows),
+            code_status_mode=self.code_status_mode,
+        )
         return 0 if target.empty else int(target["code_status_dnr_dni_cmo"].max())
 
     def _get_target_value(self, patient: Any, admission: Any) -> int:
@@ -417,8 +470,16 @@ def __call__(self, patient: Any) -> list[dict[str, Any]]:
                         else None
                     ),
                 ),
-                "los_days": _calculate_los_days(
-                    admit_time, getattr(admission, "dischtime", None)
+                "los_days": (
+                    _calculate_paper_like_los_days(
+                        admit_time,
+                        getattr(admission, "dischtime", None),
+                    )
+                    if self.paper_like_dataset_prepare
+                    else _calculate_los_days(
+                        admit_time,
+                        getattr(admission, "dischtime", None),
+                    )
                 ),
                 "gender": (
                     getattr(patient_event, "gender", None)
@@ -442,8 +503,16 @@ class EOLMistrustLeftAMAPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
 
     task_name = "EOLMistrustLeftAMAPredictionMIMIC3"
 
-    def __init__(self, include_notes: bool = False) -> None:
-        super().__init__(target="left_ama", include_notes=include_notes)
+    def __init__(
+        self,
+        include_notes: bool = False,
+        dataset_prepare_mode: str = DATASET_PREPARE_MODE_DEFAULT,
+    ) -> None:
+        super().__init__(
+            target="left_ama",
+            include_notes=include_notes,
+            dataset_prepare_mode=dataset_prepare_mode,
+        )
 
 
 class EOLMistrustCodeStatusPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
@@ -451,8 +520,16 @@ class EOLMistrustCodeStatusPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
 
     task_name = "EOLMistrustCodeStatusPredictionMIMIC3"
 
-    def __init__(self, include_notes: bool = False) -> None:
-        super().__init__(target="code_status_dnr_dni_cmo", include_notes=include_notes)
+    def __init__(
+        self,
+        include_notes: bool = False,
+        dataset_prepare_mode: str = DATASET_PREPARE_MODE_DEFAULT,
+    ) -> None:
+        super().__init__(
+            target="code_status_dnr_dni_cmo",
+            include_notes=include_notes,
+            dataset_prepare_mode=dataset_prepare_mode,
+        )
 
 
 class EOLMistrustMortalityPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
@@ -460,14 +537,24 @@ class EOLMistrustMortalityPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
 
     task_name = "EOLMistrustMortalityPredictionMIMIC3"
 
-    def __init__(self, include_notes: bool = False) -> None:
-        super().__init__(target="in_hospital_mortality", include_notes=include_notes)
+    def __init__(
+        self,
+        include_notes: bool = False,
+        dataset_prepare_mode: str = DATASET_PREPARE_MODE_DEFAULT,
+    ) -> None:
+        super().__init__(
+            target="in_hospital_mortality",
+            include_notes=include_notes,
+            dataset_prepare_mode=dataset_prepare_mode,
+        )
 
 
 __all__ = [
     "CODE_STATUS_ITEMIDS",
     "CODE_STATUS_MODE_CORRECTED",
     "CODE_STATUS_MODE_PAPER_LIKE",
+    "DATASET_PREPARE_MODE_DEFAULT",
+    "DATASET_PREPARE_MODE_PAPER_LIKE",
     "EOL_MISTRUST_TASK_MAP",
     "EOLMistrustCodeStatusPredictionMIMIC3",
     "EOLMistrustDownstreamMIMIC3",
diff --git a/tests/core/test_base_dataset.py b/tests/core/test_base_dataset.py
index 4f9bb1fda..f349a341a 100644
--- a/tests/core/test_base_dataset.py
+++ b/tests/core/test_base_dataset.py
@@ -1,5 +1,6 @@
 import tempfile
 import unittest
+import warnings
 from unittest.mock import patch
 
 import polars as pl
@@ -129,6 +130,40 @@ def test_event_df_cache_is_physically_sorted(self):
                 "cached global_event_df parquet must be sorted by patient_id",
             )
 
+    def test_dev_mode_cache_build_does_not_emit_dask_head_warning(self):
+        small_data = dd.from_pandas(
+            pd.DataFrame(
+                {
+                    "patient_id": ["1", "2", "3"],
+                    "event_type": ["test"] * 3,
+                    "timestamp": [None] * 3,
+                    "test/value": [10, 20, 30],
+                }
+            ),
+            npartitions=1,
+        )
+
+        with tempfile.TemporaryDirectory() as cache_root, patch(
+            "pyhealth.datasets.base_dataset.platformdirs.user_cache_dir",
+            return_value=cache_root,
+        ):
+            dataset = MockDataset(
+                data=small_data,
+                root="/data/root_dev_warning",
+                tables=["table_a"],
+                dataset_name="DevWarningDataset",
+                dev=True,
+            )
+            with warnings.catch_warnings(record=True) as caught:
+                warnings.simplefilter("always")
+                _ = dataset.global_event_df
+
+            messages = [str(warning.message) for warning in caught]
+            self.assertFalse(
+                any("Insufficient elements for `head`" in message for message in messages),
+                msg=f"Unexpected head warning(s): {messages}",
+            )
+
     def test_empty_string_handling(self):
         import os
         from dataclasses import dataclass
diff --git a/tests/core/test_eol_mistrust_Integration.py b/tests/core/test_eol_mistrust_Integration.py
index b3aefb8b6..98b914cd1 100644
--- a/tests/core/test_eol_mistrust_Integration.py
+++ b/tests/core/test_eol_mistrust_Integration.py
@@ -4,6 +4,7 @@
 import shutil
 import unittest
 import uuid
+import warnings
 from contextlib import contextmanager
 from pathlib import Path
 from unittest.mock import patch
@@ -40,7 +41,11 @@ def _load_model_module():
 
 
 def _load_example_module():
-    module_path = Path(__file__).resolve().parents[2] / "examples" / "eol_mistrust.py"
+    module_path = (
+        Path(__file__).resolve().parents[2]
+        / "examples"
+        / "eol_mistrust_mortality_classifier.py"
+    )
     spec = importlib.util.spec_from_file_location(
         "examples.eol_mistrust_integration_tests",
         module_path,
@@ -127,6 +132,18 @@ def setUpClass(cls):
         cls.model = _load_model_module()
 
     def setUp(self):
+        self._warning_context = warnings.catch_warnings()
+        self._warning_context.__enter__()
+        warnings.filterwarnings(
+            "ignore",
+            message=r".*minimum 10 recommended.*",
+            category=UserWarning,
+        )
+        warnings.filterwarnings(
+            "ignore",
+            message=r".*autopsy_label.*has no joined training rows.*",
+            category=UserWarning,
+        )
         self.admissions = pd.DataFrame(
             [
                 {
@@ -284,6 +301,9 @@ def setUp(self):
             ]
         )
 
+    def tearDown(self):
+        self._warning_context.__exit__(None, None, None)
+
     def _sentiment_fn(self, text):
         if "non" in text or "refused" in text:
             return (-0.6, 0.0)
@@ -627,7 +647,8 @@ def test_dataset_identify_table2_itemids_and_feature_matrix_support_partial_matc
         self.assertEqual(int(row_101["Education Readiness: No"]), 1)
         self.assertEqual(int(row_101["Pain Level: 7-Mod to Severe"]), 1)
         row_104 = feature_matrix.set_index("hadm_id").loc[104]
-        self.assertTrue((row_104.fillna(0).astype(int) == 0).all())
+        row_104 = pd.to_numeric(row_104, errors="coerce").fillna(0).astype(int)
+        self.assertTrue((row_104 == 0).all())
 
     def test_dataset_identify_table2_itemids_does_not_overmatch_unrelated_labels(self):
         d_items = pd.DataFrame(
@@ -668,7 +689,8 @@ def test_dataset_build_treatment_totals_merges_gap_boundary_and_outputs_sorted_s
         empty_vaso = pd.DataFrame(columns=["icustay_id", "vasonum", "starttime", "endtime", "duration_hours"])
         totals = self.dataset.build_treatment_totals(boundary_icu, boundary_vent, empty_vaso)
         self.assertEqual(totals.columns.tolist(), ["hadm_id", "total_vent_min", "total_vaso_min"])
-        row = totals.fillna(0).set_index("hadm_id").loc[1]
+        row = totals.set_index("hadm_id").loc[1]
+        row = pd.to_numeric(row, errors="coerce").fillna(0.0)
         self.assertEqual(float(row["total_vent_min"]), 780.0)
 
     def test_dataset_build_final_model_table_returns_exact_full_schema_order(self):
@@ -769,8 +791,8 @@ def predict_proba(self, X):
         self.assertEqual(created[0].kwargs["penalty"], "l1")
         self.assertEqual(created[0].kwargs["C"], 0.1)
         self.assertEqual(created[0].kwargs["solver"], "liblinear")
-        self.assertEqual(created[0].kwargs["max_iter"], 1000)
-        self.assertEqual(created[0].kwargs["tol"], 0.001)
+        self.assertEqual(created[0].kwargs["max_iter"], 100)
+        self.assertEqual(created[0].kwargs["tol"], 0.01)
         self.assertEqual(len(created[0].fit_X), len(artifacts["feature_matrix"]))
 
         scores = self.model.build_proxy_probability_scores(
@@ -1741,17 +1763,119 @@ def test_integration_extra_nonbreaking_columns_do_not_change_results(self):
             with_extra["final_model_table"],
         )
 
-    def test_integration_package_import_and_direct_load_modules_are_compatible(self):
-        dataset_pkg = importlib.import_module("pyhealth.datasets.eol_mistrust")
-        model_pkg = importlib.import_module("pyhealth.models.eol_mistrust")
-        self.assertTrue(callable(dataset_pkg.build_final_model_table))
-        self.assertTrue(callable(model_pkg.run_full_eol_mistrust_modeling))
-        self.assertEqual(model_pkg.MISTRUST_SCORE_COLUMNS, self.model.MISTRUST_SCORE_COLUMNS)
+    def test_example_run_task_demo_uses_managed_temp_cache_dir(self):
+        example_module = _load_example_module()
+        captured = {}
+        classifier_kwargs = {}
+        dataloader_calls = []
+        task_kwargs = {}
+        close_calls = []
+
+        class _FakeTempDir:
+            def __init__(self, path):
+                self.path = path
+
+            def __enter__(self):
+                return self.path
+
+            def __exit__(self, exc_type, exc, tb):
+                del exc_type, exc, tb
+                return False
+
+        class _FakeDataset:
+            def __init__(self, *args, **kwargs):
+                del args
+                captured.update(kwargs)
+
+            def stats(self):
+                return None
+
+            def set_task(self, task, num_workers=0):
+                captured["task_dataset_prepare_mode"] = getattr(
+                    task,
+                    "dataset_prepare_mode",
+                    None,
+                )
+                del num_workers
+                return _FakeSampleDataset()
+
+        class _FakeSampleDataset:
+            def close(self):
+                close_calls.append("sample")
+
+        class _FakeModel:
+            def __init__(self, *args, **kwargs):
+                del args
+                classifier_kwargs.update(kwargs)
+
+            def __call__(self, **kwargs):
+                return {"loss": 0, "logit": 0, "y_prob": 0, "y_true": 0}
+
+        def _fake_get_dataloader(dataset, batch_size=0, shuffle=False):
+            del dataset
+            dataloader_calls.append(
+                {"batch_size": batch_size, "shuffle": shuffle}
+            )
+            return [{"dummy": "batch"}]
+
+        def _fake_split_by_patient(dataset, ratios, seed=None):
+            del dataset, ratios, seed
+            return None, None, None
+
+        class _FakeTask:
+            def __init__(self, **kwargs):
+                task_kwargs.update(kwargs)
+                self.dataset_prepare_mode = kwargs.get("dataset_prepare_mode")
+
+        with patch.object(
+            example_module.tempfile,
+            "TemporaryDirectory",
+            return_value=_FakeTempDir("stable-cache-dir"),
+        ), patch.object(
+            example_module, "EOLMistrustDataset", _FakeDataset
+        ), patch.object(
+            example_module, "EOLMistrustClassifier", _FakeModel
+        ), patch.object(
+            example_module,
+            "EOLMistrustMortalityPredictionMIMIC3",
+            _FakeTask,
+        ), patch.object(
+            example_module, "split_by_patient", _fake_split_by_patient
+        ), patch.object(
+            example_module, "get_dataloader", _fake_get_dataloader
+        ):
+            example_module.run_task_demo(
+                Path("root"),
+                Path("config"),
+                dataset_prepare_mode="paper_like",
+            )
 
-    def test_example_run_task_demo_uses_stable_mkdtemp_cache_dir(self):
+        self.assertEqual(captured["cache_dir"], "stable-cache-dir")
+        self.assertEqual(captured["dataset_prepare_mode"], "paper_like")
+        self.assertEqual(task_kwargs["dataset_prepare_mode"], "paper_like")
+        self.assertEqual(captured["task_dataset_prepare_mode"], "paper_like")
+        self.assertEqual(classifier_kwargs["dataset"].__class__, _FakeSampleDataset)
+        self.assertEqual(dataloader_calls, [{"batch_size": 2, "shuffle": False}])
+        self.assertEqual(close_calls, ["sample"])
+
+    def test_example_run_task_demo_can_train_and_evaluate_on_normal_path(self):
         example_module = _load_example_module()
         captured = {}
-        factory_kwargs = []
+        trainer_calls = {}
+        dataloader_calls = []
+        task_kwargs = {}
+        close_calls = []
+
+        class _FakeTempDir:
+            def __init__(self, path):
+                self.path = path
+
+            def __enter__(self):
+                return self.path
+
+            def __exit__(self, exc_type, exc, tb):
+                del exc_type, exc, tb
+                return False
 
         class _FakeDataset:
             def __init__(self, *args, **kwargs):
@@ -1762,15 +1886,127 @@ def stats(self):
                 return None
 
             def set_task(self, task, num_workers=0):
-                del task, num_workers
-                return self
+                del num_workers
+                captured["task_dataset_prepare_mode"] = getattr(
+                    task,
+                    "dataset_prepare_mode",
+                    None,
+                )
+                return _FakeSampleDataset()
+
+        class _FakeSampleDataset:
+            def close(self):
+                close_calls.append("sample")
 
-        with patch.object(example_module.tempfile, "mkdtemp", return_value="stable-cache-dir"), patch.object(
-            example_module, "MIMIC3Dataset", _FakeDataset
+        class _FakeModel:
+            def __init__(self, *args, **kwargs):
+                del args
+                captured["model_dataset"] = kwargs["dataset"]
+
+            def __call__(self, **kwargs):
+                del kwargs
+                return {"loss": 0, "logit": 0, "y_prob": 0, "y_true": 0}
+
+        class _FakeTask:
+            def __init__(self, **kwargs):
+                task_kwargs.update(kwargs)
+                self.dataset_prepare_mode = kwargs.get("dataset_prepare_mode")
+
+        class _FakeTrainer:
+            def __init__(self, model, metrics=None, enable_logging=True, device=None):
+                trainer_calls["model"] = model
+                trainer_calls["metrics"] = metrics
+                trainer_calls["enable_logging"] = enable_logging
+                trainer_calls["device"] = device
+
+            def train(self, **kwargs):
+                trainer_calls["train_kwargs"] = kwargs
+
+            def evaluate(self, dataloader):
+                trainer_calls["evaluate_loader"] = dataloader
+                return {"accuracy": 0.5, "loss": 0.1}
+
+        def _fake_split_by_patient(dataset, ratios, seed=None):
+            trainer_calls["split_dataset"] = dataset
+            trainer_calls["split_ratios"] = list(ratios)
+            trainer_calls["split_seed"] = seed
+            return dataset, dataset, dataset
+
+        def _fake_get_dataloader(dataset, batch_size=0, shuffle=False):
+            dataloader_calls.append(
+                {"dataset": dataset, "batch_size": batch_size, "shuffle": shuffle}
+            )
+            return f"loader-{len(dataloader_calls)}"
+
+        with patch.object(
+            example_module.tempfile,
+            "TemporaryDirectory",
+            return_value=_FakeTempDir("stable-cache-dir"),
+        ), patch.object(
+            example_module, "EOLMistrustDataset", _FakeDataset
+        ), patch.object(
+            example_module, "EOLMistrustClassifier", _FakeModel
+        ), patch.object(
+            example_module,
+            "EOLMistrustMortalityPredictionMIMIC3",
+            _FakeTask,
+        ), patch.object(
+            example_module, "split_by_patient", _fake_split_by_patient
+        ), patch.object(
+            example_module, "get_dataloader", _fake_get_dataloader
+        ), patch.object(
+            example_module, "Trainer", _FakeTrainer
         ):
-            example_module.run_task_demo(Path("root"), Path("config"))
+            example_module.run_task_demo(
+                Path("root"),
+                Path("config"),
+                dataset_prepare_mode="default",
+                train_and_evaluate=True,
+            )
 
         self.assertEqual(captured["cache_dir"], "stable-cache-dir")
+        self.assertEqual(captured["dataset_prepare_mode"], "default")
+        self.assertEqual(task_kwargs["dataset_prepare_mode"], "default")
+        self.assertEqual(captured["task_dataset_prepare_mode"], "default")
+        self.assertEqual(trainer_calls["split_dataset"].__class__, _FakeSampleDataset)
+        self.assertEqual(trainer_calls["split_ratios"], [0.6, 0.2, 0.2])
+        self.assertEqual(trainer_calls["metrics"], ["accuracy"])
+        self.assertFalse(trainer_calls["enable_logging"])
+        self.assertEqual(
+            trainer_calls["train_kwargs"],
+            {
+                "train_dataloader": "loader-1",
+                "val_dataloader": "loader-2",
+                "test_dataloader": "loader-3",
+                "epochs": 1,
+                "monitor": "accuracy",
+                "load_best_model_at_last": False,
+            },
+        )
+        self.assertEqual(trainer_calls["evaluate_loader"], "loader-3")
+        self.assertEqual(
+            dataloader_calls,
+            [
+                {"dataset": trainer_calls["split_dataset"], "batch_size": 32, "shuffle": True},
+                {"dataset": trainer_calls["split_dataset"], "batch_size": 32, "shuffle": False},
+                {"dataset": trainer_calls["split_dataset"], "batch_size": 32, "shuffle": False},
+            ],
+        )
+        self.assertEqual(close_calls, ["sample"])
+
+    def test_example_run_task_demo_rejects_train_eval_for_paper_like_path(self):
+        example_module = _load_example_module()
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "only supported for the default normal path",
+        ):
+            example_module.run_task_demo(
+                Path("root"),
+                Path("config"),
+                dataset_prepare_mode="paper_like",
+                train_and_evaluate=True,
+            )
 
     def test_example_build_outputs_routes_model_stage_through_eol_mistrust_model(self):
         example_module = _load_example_module()
@@ -2158,53 +2394,8 @@ def run(self, **kwargs):
         self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
         self.assertTrue(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
 
-    def test_example_build_outputs_checkpoints_note_corpus_immediately_before_later_stage_failure(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-            ]
-        )
-
-        with _workspace_tempdir() as temp_dir, patch.object(
-            example_module,
-            "load_eol_mistrust_tables",
-            return_value=(raw_tables, materialized_views),
-        ), patch.object(
-            example_module,
-            "build_note_corpus_from_csv",
-            return_value=note_corpus,
-        ), patch.object(
-            example_module,
-            "build_note_labels_from_csv",
-            side_effect=RuntimeError("boom after note corpus"),
-        ):
-            with self.assertRaisesRegex(RuntimeError, "boom after note corpus"):
-                example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    output_dir=Path(temp_dir),
-                )
-
-            saved_note_corpus = pd.read_csv(Path(temp_dir) / "note_corpus.csv")
-            pd.testing.assert_frame_equal(saved_note_corpus, note_corpus)
-            self.assertFalse((Path(temp_dir) / "note_labels.csv").exists())
 
-    def test_example_build_outputs_checkpoints_streamed_reuse_artifacts_before_model_failure(self):
+    def test_example_build_outputs_disables_autopsy_outputs_only_in_default_route(self):
         example_module = _load_example_module()
 
         raw_tables = {
@@ -2221,42 +2412,116 @@ def test_example_build_outputs_checkpoints_streamed_reuse_artifacts_before_model
         }
         note_corpus = pd.DataFrame(
             [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-                {"hadm_id": 103, "note_text": "note-103"},
+                {"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"}
+                for hadm_id in [101, 102, 103, 104, 105]
             ]
         )
         note_labels = pd.DataFrame(
             [
-                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
-                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0.0},
+                {"hadm_id": hadm_id, "noncompliance_label": 0, "autopsy_label": float("nan")}
+                for hadm_id in [101, 102, 103, 104, 105]
             ]
         )
         feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "Education Readiness: No": 1},
-                {"hadm_id": 102, "Education Readiness: No": 0},
-                {"hadm_id": 103, "Education Readiness: No": 0},
-            ]
+            [{"hadm_id": hadm_id, "education topic: medications": 0} for hadm_id in [101, 102, 103, 104, 105]]
         )
         code_status_targets = pd.DataFrame(
+            [{"hadm_id": hadm_id, "code_status_dnr_dni_cmo": 0} for hadm_id in [101, 102, 103, 104, 105]]
+        )
+        mistrust_scores = pd.DataFrame(
             [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
+                {
+                    "hadm_id": hadm_id,
+                    "noncompliance_score_z": float(index - 2),
+                    "autopsy_score_z": float(index) / 10.0,
+                    "negative_sentiment_score_z": float(2 - index),
+                }
+                for index, hadm_id in enumerate([101, 102, 103, 104, 105], start=1)
             ]
         )
 
-        class _ExplodingModel:
+        class _FakeModel:
             def __init__(self, repetitions):
                 self.repetitions = repetitions
 
             def build_mistrust_scores(self, **kwargs):
                 del kwargs
-                raise RuntimeError("boom after streamed artifacts")
+                return mistrust_scores
+
+            def run(self, **kwargs):
+                del kwargs
+                return {
+                    "downstream_auc_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline",
+                                "target_column": "left_ama",
+                                "n_rows": 5,
+                                "n_features": 7,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.7,
+                                "auc_std": 0.0,
+                            },
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline + Autopsy",
+                                "target_column": "left_ama",
+                                "n_rows": 5,
+                                "n_features": 8,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
+                                "auc_mean": 0.8,
+                                "auc_std": 0.0,
+                            },
+                        ]
+                    ),
+                    "downstream_weight_results": pd.DataFrame(
+                        [
+                            {
+                                "task": "Left AMA",
+                                "configuration": "Baseline + ALL",
+                                "target_column": "left_ama",
+                                "feature": "autopsy_score_z",
+                                "n_repeats": 1,
+                                "n_valid_weights": 1,
+                                "weight_mean": 0.2,
+                                "weight_std": 0.0,
+                            }
+                        ]
+                    ),
+                    "feature_weight_summaries": {
+                        "noncompliance": pd.DataFrame(
+                            [{"feature": "education topic: medications", "weight": 0.1}]
+                        ),
+                        "autopsy": pd.DataFrame(
+                            [{"feature": "pain present: no", "weight": -0.2}]
+                        ),
+                    },
+                    "acuity_correlations": pd.DataFrame(
+                        [
+                            {
+                                "feature_a": "autopsy_score_z",
+                                "feature_b": "oasis",
+                                "correlation": -0.2,
+                            },
+                            {
+                                "feature_a": "noncompliance_score_z",
+                                "feature_b": "oasis",
+                                "correlation": 0.1,
+                            },
+                        ]
+                    ),
+                    "trust_treatment_results": pd.DataFrame(
+                        [
+                            {"metric": "autopsy_score_z", "treatment": "total_vent_min"},
+                            {"metric": "noncompliance_score_z", "treatment": "total_vent_min"},
+                        ]
+                    ),
+                }
 
-        with _workspace_tempdir() as temp_dir, patch.object(
+        with patch.object(
             example_module,
             "load_eol_mistrust_tables",
             return_value=(raw_tables, materialized_views),
@@ -2275,33 +2540,37 @@ def build_mistrust_scores(self, **kwargs):
         ), patch.object(
             example_module,
             "EOLMistrustModel",
-            _ExplodingModel,
+            _FakeModel,
         ):
-            with self.assertRaisesRegex(RuntimeError, "boom after streamed artifacts"):
-                example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    output_dir=Path(temp_dir),
-                )
-
-            pd.testing.assert_frame_equal(
-                pd.read_csv(Path(temp_dir) / "note_corpus.csv"),
-                note_corpus,
-            )
-            pd.testing.assert_frame_equal(
-                pd.read_csv(Path(temp_dir) / "note_labels.csv"),
-                note_labels,
-            )
-            pd.testing.assert_frame_equal(
-                pd.read_csv(Path(temp_dir) / "chartevent_feature_matrix.csv"),
-                feature_matrix,
-            )
-            pd.testing.assert_frame_equal(
-                pd.read_csv(Path(temp_dir) / "code_status_targets.csv"),
-                code_status_targets,
+            outputs = example_module.build_eol_mistrust_outputs(
+                Path("ignored-root"),
+                repetitions=1,
             )
 
-    def test_example_build_outputs_checkpoints_mistrust_scores_before_final_table_failure(self):
+        self.assertTrue((outputs["mistrust_scores"]["autopsy_score_z"] == 0.0).all())
+        self.assertTrue((outputs["final_model_table"]["autopsy_score_z"] == 0.0).all())
+        self.assertFalse(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
+        self.assertEqual(set(outputs["feature_weight_summaries"].keys()), {"noncompliance"})
+        self.assertNotIn(
+            "Baseline + Autopsy",
+            outputs["downstream_auc_results"]["configuration"].tolist(),
+        )
+        self.assertNotIn(
+            "autopsy_score_z",
+            outputs["downstream_weight_results"]["feature"].tolist(),
+        )
+        self.assertNotIn(
+            "autopsy_score_z",
+            outputs["trust_treatment_results"]["metric"].tolist(),
+        )
+        self.assertFalse(
+            (
+                (outputs["acuity_correlations"]["feature_a"] == "autopsy_score_z")
+                | (outputs["acuity_correlations"]["feature_b"] == "autopsy_score_z")
+            ).any()
+        )
+
+    def test_example_build_outputs_passes_normal_route_without_autopsy_to_model_run(self):
         example_module = _load_example_module()
 
         raw_tables = {
@@ -2318,1220 +2587,46 @@ def test_example_build_outputs_checkpoints_mistrust_scores_before_final_table_fa
         }
         note_corpus = pd.DataFrame(
             [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-                {"hadm_id": 103, "note_text": "note-103"},
+                {"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"}
+                for hadm_id in [101, 102]
             ]
         )
         note_labels = pd.DataFrame(
             [
                 {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
                 {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-                {"hadm_id": 103, "noncompliance_label": 0, "autopsy_label": 0.0},
             ]
         )
         feature_matrix = pd.DataFrame(
             [
-                {"hadm_id": 101, "Education Readiness: No": 1},
-                {"hadm_id": 102, "Education Readiness: No": 0},
-                {"hadm_id": 103, "Education Readiness: No": 0},
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
             ]
         )
         code_status_targets = pd.DataFrame(
             [
                 {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
                 {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
             ]
         )
         mistrust_scores = pd.DataFrame(
             [
                 {
                     "hadm_id": 101,
-                    "noncompliance_score_z": -0.5,
-                    "autopsy_score_z": 0.0,
-                    "negative_sentiment_score_z": 0.1,
-                },
-                {
-                    "hadm_id": 102,
-                    "noncompliance_score_z": 1.0,
-                    "autopsy_score_z": 1.0,
-                    "negative_sentiment_score_z": -1.0,
-                },
-                {
-                    "hadm_id": 103,
-                    "noncompliance_score_z": -0.5,
-                    "autopsy_score_z": -1.0,
-                    "negative_sentiment_score_z": 0.9,
-                },
-            ]
-        )
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                return mistrust_scores
-
-        with _workspace_tempdir() as temp_dir, patch.object(
-            example_module,
-            "load_eol_mistrust_tables",
-            return_value=(raw_tables, materialized_views),
-        ), patch.object(
-            example_module,
-            "build_note_corpus_from_csv",
-            return_value=note_corpus,
-        ), patch.object(
-            example_module,
-            "build_note_labels_from_csv",
-            return_value=note_labels,
-        ), patch.object(
-            example_module,
-            "build_chartevent_artifacts_from_csv",
-            return_value=(feature_matrix, code_status_targets),
-        ), patch.object(
-            example_module,
-            "EOLMistrustModel",
-            _FakeModel,
-        ), patch.object(
-            example_module,
-            "build_final_model_table_from_code_status_targets",
-            side_effect=RuntimeError("boom after mistrust scores"),
-        ):
-            with self.assertRaisesRegex(RuntimeError, "boom after mistrust scores"):
-                example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    output_dir=Path(temp_dir),
-                )
-
-            expected_scores = mistrust_scores.copy()
-            expected_scores["autopsy_score_z"] = 0.0
-            pd.testing.assert_frame_equal(
-                pd.read_csv(Path(temp_dir) / "mistrust_scores.csv"),
-                expected_scores,
-            )
-
-    def test_example_build_outputs_can_reuse_cached_mistrust_scores(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
-                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "education topic: medications": 1},
-                {"hadm_id": 102, "education topic: medications": 0},
-            ]
-        )
-        code_status_targets = pd.DataFrame(
-            [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-            ]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
-                {
-                    "hadm_id": 101,
-                    "noncompliance_score_z": -1.0,
-                    "autopsy_score_z": 0.5,
-                    "negative_sentiment_score_z": 0.1,
-                },
-                {
-                    "hadm_id": 102,
-                    "noncompliance_score_z": 1.0,
-                    "autopsy_score_z": -0.5,
-                    "negative_sentiment_score_z": -0.1,
-                },
-            ]
-        )
-        captured = {}
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                raise AssertionError("should reuse mistrust_scores from cache")
-
-            def run(self, **kwargs):
-                captured["precomputed_mistrust_scores"] = kwargs["precomputed_mistrust_scores"].copy()
-                return {
-                    "downstream_auc_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline",
-                                "target_column": "left_ama",
-                                "n_rows": 2,
-                                "n_features": 7,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.7,
-                                "auc_std": 0.0,
-                            }
-                        ]
-                    ),
-                    "feature_weight_summaries": {},
-                }
-
-        with _workspace_tempdir() as temp_dir:
-            cache_dir = Path(temp_dir)
-            note_corpus.to_csv(cache_dir / "note_corpus.csv", index=False)
-            note_labels.to_csv(cache_dir / "note_labels.csv", index=False)
-            feature_matrix.to_csv(cache_dir / "chartevent_feature_matrix.csv", index=False)
-            code_status_targets.to_csv(cache_dir / "code_status_targets.csv", index=False)
-            mistrust_scores.to_csv(cache_dir / "mistrust_scores.csv", index=False)
-
-            with patch.object(
-                example_module,
-                "load_eol_mistrust_tables",
-                return_value=(raw_tables, materialized_views),
-            ), patch.object(
-                example_module,
-                "build_note_corpus_from_csv",
-                side_effect=AssertionError("should reuse note_corpus from cache"),
-            ), patch.object(
-                example_module,
-                "build_note_labels_from_csv",
-                side_effect=AssertionError("should reuse note_labels from cache"),
-            ), patch.object(
-                example_module,
-                "build_chartevent_artifacts_from_csv",
-                side_effect=AssertionError("should reuse chartevent artifacts from cache"),
-            ), patch.object(
-                example_module,
-                "EOLMistrustModel",
-                _FakeModel,
-            ):
-                outputs = example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    reuse_intermediates=cache_dir,
-                )
-
-        expected_scores = mistrust_scores.copy()
-        expected_scores["autopsy_score_z"] = 0.0
-        pd.testing.assert_frame_equal(outputs["mistrust_scores"], expected_scores)
-        pd.testing.assert_frame_equal(captured["precomputed_mistrust_scores"], expected_scores)
-
-    def test_example_build_outputs_preserves_cached_autopsy_scores_in_paper_like_route(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
-                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "education topic: medications": 1},
-                {"hadm_id": 102, "education topic: medications": 0},
-            ]
-        )
-        code_status_targets = pd.DataFrame(
-            [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-            ]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
-                {
-                    "hadm_id": 101,
-                    "noncompliance_score_z": -1.0,
-                    "autopsy_score_z": 0.5,
-                    "negative_sentiment_score_z": 0.1,
-                },
-                {
-                    "hadm_id": 102,
-                    "noncompliance_score_z": 1.0,
-                    "autopsy_score_z": -0.5,
-                    "negative_sentiment_score_z": -0.1,
-                },
-            ]
-        )
-        captured = {}
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                raise AssertionError("should reuse mistrust_scores from cache")
-
-            def run(self, **kwargs):
-                captured["precomputed_mistrust_scores"] = kwargs["precomputed_mistrust_scores"].copy()
-                return {
-                    "downstream_auc_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline + Autopsy",
-                                "target_column": "left_ama",
-                                "n_rows": 2,
-                                "n_features": 8,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.7,
-                                "auc_std": 0.0,
-                            }
-                        ]
-                    ),
-                    "feature_weight_summaries": {
-                        "autopsy": pd.DataFrame(
-                            [{"feature": "pain present: no", "weight": -0.2}]
-                        )
-                    },
-                }
-
-        with _workspace_tempdir() as temp_dir:
-            cache_base = Path(temp_dir) / "EOL_Workspace"
-            cache_dir = cache_base / "paper_like"
-            cache_dir.mkdir(parents=True, exist_ok=True)
-            note_corpus.to_csv(cache_dir / "note_corpus.csv", index=False)
-            note_labels.to_csv(cache_dir / "note_labels.csv", index=False)
-            feature_matrix.to_csv(cache_dir / "chartevent_feature_matrix.csv", index=False)
-            code_status_targets.to_csv(cache_dir / "code_status_targets.csv", index=False)
-            mistrust_scores.to_csv(cache_dir / "mistrust_scores.csv", index=False)
-
-            with patch.object(
-                example_module,
-                "load_eol_mistrust_tables",
-                return_value=(raw_tables, materialized_views),
-            ), patch.object(
-                example_module,
-                "build_note_corpus_from_csv",
-                side_effect=AssertionError("should reuse note_corpus from cache"),
-            ), patch.object(
-                example_module,
-                "build_note_labels_from_csv",
-                side_effect=AssertionError("should reuse note_labels from cache"),
-            ), patch.object(
-                example_module,
-                "build_chartevent_artifacts_from_csv",
-                side_effect=AssertionError("should reuse chartevent artifacts from cache"),
-            ), patch.object(
-                example_module,
-                "EOLMistrustModel",
-                _FakeModel,
-            ):
-                outputs = example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    reuse_intermediates=cache_base,
-                    paper_like_dataset_prepare=True,
-                )
-
-        pd.testing.assert_frame_equal(outputs["mistrust_scores"], mistrust_scores)
-        pd.testing.assert_frame_equal(captured["precomputed_mistrust_scores"], mistrust_scores)
-        self.assertIn("autopsy", outputs["feature_weight_summaries"])
-
-    def test_example_build_outputs_disables_autopsy_outputs_only_in_default_route(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"}
-                for hadm_id in [101, 102, 103, 104, 105]
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": hadm_id, "noncompliance_label": 0, "autopsy_label": float("nan")}
-                for hadm_id in [101, 102, 103, 104, 105]
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [{"hadm_id": hadm_id, "education topic: medications": 0} for hadm_id in [101, 102, 103, 104, 105]]
-        )
-        code_status_targets = pd.DataFrame(
-            [{"hadm_id": hadm_id, "code_status_dnr_dni_cmo": 0} for hadm_id in [101, 102, 103, 104, 105]]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
-                {
-                    "hadm_id": hadm_id,
-                    "noncompliance_score_z": float(index - 2),
-                    "autopsy_score_z": float(index) / 10.0,
-                    "negative_sentiment_score_z": float(2 - index),
-                }
-                for index, hadm_id in enumerate([101, 102, 103, 104, 105], start=1)
-            ]
-        )
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                return mistrust_scores
-
-            def run(self, **kwargs):
-                del kwargs
-                return {
-                    "downstream_auc_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline",
-                                "target_column": "left_ama",
-                                "n_rows": 5,
-                                "n_features": 7,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.7,
-                                "auc_std": 0.0,
-                            },
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline + Autopsy",
-                                "target_column": "left_ama",
-                                "n_rows": 5,
-                                "n_features": 8,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.8,
-                                "auc_std": 0.0,
-                            },
-                        ]
-                    ),
-                    "downstream_weight_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline + ALL",
-                                "target_column": "left_ama",
-                                "feature": "autopsy_score_z",
-                                "n_repeats": 1,
-                                "n_valid_weights": 1,
-                                "weight_mean": 0.2,
-                                "weight_std": 0.0,
-                            }
-                        ]
-                    ),
-                    "feature_weight_summaries": {
-                        "noncompliance": pd.DataFrame(
-                            [{"feature": "education topic: medications", "weight": 0.1}]
-                        ),
-                        "autopsy": pd.DataFrame(
-                            [{"feature": "pain present: no", "weight": -0.2}]
-                        ),
-                    },
-                    "acuity_correlations": pd.DataFrame(
-                        [
-                            {
-                                "feature_a": "autopsy_score_z",
-                                "feature_b": "oasis",
-                                "correlation": -0.2,
-                            },
-                            {
-                                "feature_a": "noncompliance_score_z",
-                                "feature_b": "oasis",
-                                "correlation": 0.1,
-                            },
-                        ]
-                    ),
-                    "trust_treatment_results": pd.DataFrame(
-                        [
-                            {"metric": "autopsy_score_z", "treatment": "total_vent_min"},
-                            {"metric": "noncompliance_score_z", "treatment": "total_vent_min"},
-                        ]
-                    ),
-                }
-
-        with patch.object(
-            example_module,
-            "load_eol_mistrust_tables",
-            return_value=(raw_tables, materialized_views),
-        ), patch.object(
-            example_module,
-            "build_note_corpus_from_csv",
-            return_value=note_corpus,
-        ), patch.object(
-            example_module,
-            "build_note_labels_from_csv",
-            return_value=note_labels,
-        ), patch.object(
-            example_module,
-            "build_chartevent_artifacts_from_csv",
-            return_value=(feature_matrix, code_status_targets),
-        ), patch.object(
-            example_module,
-            "EOLMistrustModel",
-            _FakeModel,
-        ):
-            outputs = example_module.build_eol_mistrust_outputs(
-                Path("ignored-root"),
-                repetitions=1,
-            )
-
-        self.assertTrue((outputs["mistrust_scores"]["autopsy_score_z"] == 0.0).all())
-        self.assertTrue((outputs["final_model_table"]["autopsy_score_z"] == 0.0).all())
-        self.assertFalse(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
-        self.assertEqual(set(outputs["feature_weight_summaries"].keys()), {"noncompliance"})
-        self.assertNotIn(
-            "Baseline + Autopsy",
-            outputs["downstream_auc_results"]["configuration"].tolist(),
-        )
-        self.assertNotIn(
-            "autopsy_score_z",
-            outputs["downstream_weight_results"]["feature"].tolist(),
-        )
-        self.assertNotIn(
-            "autopsy_score_z",
-            outputs["trust_treatment_results"]["metric"].tolist(),
-        )
-        self.assertFalse(
-            (
-                (outputs["acuity_correlations"]["feature_a"] == "autopsy_score_z")
-                | (outputs["acuity_correlations"]["feature_b"] == "autopsy_score_z")
-            ).any()
-        )
-
-    def test_example_build_outputs_passes_normal_route_without_autopsy_to_model_run(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"}
-                for hadm_id in [101, 102]
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
-                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "education topic: medications": 1},
-                {"hadm_id": 102, "education topic: medications": 0},
-            ]
-        )
-        code_status_targets = pd.DataFrame(
-            [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-            ]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
-                {
-                    "hadm_id": 101,
-                    "noncompliance_score_z": -1.0,
-                    "autopsy_score_z": 0.5,
-                    "negative_sentiment_score_z": 0.1,
-                },
-                {
-                    "hadm_id": 102,
-                    "noncompliance_score_z": 1.0,
-                    "autopsy_score_z": -0.5,
-                    "negative_sentiment_score_z": -0.1,
-                },
-            ]
-        )
-        captured = {}
-        factory_kwargs = []
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                return mistrust_scores
-
-            def run(self, **kwargs):
-                captured["score_columns"] = list(kwargs.get("score_columns") or [])
-                captured["feature_configurations"] = kwargs.get("feature_configurations")
-                resolver = kwargs.get("downstream_estimator_factory_resolver")
-                captured["downstream_estimator_factory_resolver"] = resolver
-                if callable(resolver):
-                    captured["resolver_returns"] = [
-                        callable(resolver("Left AMA", "Baseline")),
-                        callable(resolver("Code Status", "Baseline")),
-                        callable(resolver("In-hospital mortality", "Baseline")),
-                    ]
-                return {
-                    "downstream_auc_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline",
-                                "target_column": "left_ama",
-                                "n_rows": 2,
-                                "n_features": 7,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.7,
-                                "auc_std": 0.0,
-                            }
-                        ]
-                    ),
-                    "feature_weight_summaries": {},
-                }
-
-        with patch.object(
-            example_module,
-            "load_eol_mistrust_tables",
-            return_value=(raw_tables, materialized_views),
-        ), patch.object(
-            example_module,
-            "build_logistic_cv_estimator_factory",
-            side_effect=lambda **kwargs: factory_kwargs.append(dict(kwargs)) or (lambda: kwargs),
-        ), patch.object(
-            example_module,
-            "build_note_corpus_from_csv",
-            return_value=note_corpus,
-        ), patch.object(
-            example_module,
-            "build_note_labels_from_csv",
-            return_value=note_labels,
-        ), patch.object(
-            example_module,
-            "build_chartevent_artifacts_from_csv",
-            return_value=(feature_matrix, code_status_targets),
-        ), patch.object(
-            example_module,
-            "EOLMistrustModel",
-            _FakeModel,
-        ):
-            example_module.build_eol_mistrust_outputs(
-                Path("ignored-root"),
-                repetitions=1,
-            )
-
-        self.assertEqual(
-            captured["score_columns"],
-            ["noncompliance_score_z", "negative_sentiment_score_z"],
-        )
-        self.assertEqual(
-            list(captured["feature_configurations"].keys()),
-            [
-                "Baseline",
-                "Baseline + Race",
-                "Baseline + Noncompliant",
-                "Baseline + Neg-Sentiment",
-                "Baseline + ALL",
-            ],
-        )
-        self.assertNotIn("Baseline + Autopsy", captured["feature_configurations"])
-        resolver = captured["downstream_estimator_factory_resolver"]
-        self.assertTrue(callable(resolver))
-        self.assertEqual(captured["resolver_returns"], [True, True, True])
-        self.assertEqual(
-            factory_kwargs,
-            [
-                {"Cs": [0.01, 0.03, 0.1, 0.3], "class_weight": "balanced", "scoring": "roc_auc"},
-                {"Cs": [0.01, 0.03, 0.1, 0.3], "class_weight": "balanced", "scoring": "roc_auc"},
-                {"Cs": [0.03, 0.1, 0.3, 1.0], "class_weight": "balanced", "scoring": "roc_auc"},
-            ],
-        )
-
-    def test_example_build_outputs_passes_paper_like_route_to_model_run(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
-                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "education topic: medications": 1},
-                {"hadm_id": 102, "education topic: medications": 0},
-            ]
-        )
-        code_status_targets = pd.DataFrame(
-            [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-            ]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
-                {
-                    "hadm_id": 101,
-                    "noncompliance_score_z": -1.0,
-                    "autopsy_score_z": 0.5,
-                    "negative_sentiment_score_z": 0.1,
-                },
-                {
-                    "hadm_id": 102,
-                    "noncompliance_score_z": 1.0,
-                    "autopsy_score_z": -0.5,
-                    "negative_sentiment_score_z": -0.1,
-                },
-            ]
-        )
-        captured = {}
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                return mistrust_scores
-
-            def run(self, **kwargs):
-                captured["score_columns"] = kwargs.get("score_columns")
-                captured["feature_configurations"] = kwargs.get("feature_configurations")
-                captured["downstream_estimator_factory_resolver"] = kwargs.get(
-                    "downstream_estimator_factory_resolver"
-                )
-                return {
-                    "downstream_auc_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline",
-                                "target_column": "left_ama",
-                                "n_rows": 2,
-                                "n_features": 7,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.7,
-                                "auc_std": 0.0,
-                            }
-                        ]
-                    ),
-                    "feature_weight_summaries": {},
-                }
-
-        with patch.object(
-            example_module,
-            "load_eol_mistrust_tables",
-            return_value=(raw_tables, materialized_views),
-        ), patch.object(
-            example_module,
-            "build_note_corpus_from_csv",
-            return_value=note_corpus,
-        ), patch.object(
-            example_module,
-            "build_note_labels_from_csv",
-            return_value=note_labels,
-        ), patch.object(
-            example_module,
-            "build_chartevent_artifacts_from_csv",
-            return_value=(feature_matrix, code_status_targets),
-        ), patch.object(
-            example_module,
-            "EOLMistrustModel",
-            _FakeModel,
-        ):
-            outputs = example_module.build_eol_mistrust_outputs(
-                Path("ignored-root"),
-                repetitions=1,
-                paper_like_dataset_prepare=True,
-            )
-
-        self.assertIsNone(captured["score_columns"])
-        self.assertIsNone(captured["feature_configurations"])
-        self.assertIsNone(captured["downstream_estimator_factory_resolver"])
-        self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
-        self.assertTrue(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
-
-    def test_example_build_outputs_can_write_stream_cache_to_separate_base_directory(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-            ]
-        )
-
-        with _workspace_tempdir() as temp_dir, patch.object(
-            example_module,
-            "load_eol_mistrust_tables",
-            return_value=(raw_tables, materialized_views),
-        ), patch.object(
-            example_module,
-            "build_note_corpus_from_csv",
-            return_value=note_corpus,
-        ), patch.object(
-            example_module,
-            "build_note_labels_from_csv",
-            side_effect=RuntimeError("stop after note corpus"),
-        ):
-            output_dir = Path(temp_dir) / "runs" / "paper_eval"
-            stream_cache_base = Path(temp_dir) / "EOL_Workspace"
-
-            with self.assertRaisesRegex(RuntimeError, "stop after note corpus"):
-                example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    output_dir=output_dir,
-                    stream_cache_dir=stream_cache_base,
-                    paper_like_dataset_prepare=True,
-                )
-
-            expected_cache_dir = stream_cache_base / "paper_like"
-            pd.testing.assert_frame_equal(
-                pd.read_csv(expected_cache_dir / "note_corpus.csv"),
-                note_corpus,
-            )
-            self.assertFalse((output_dir / "note_corpus.csv").exists())
-
-    def test_example_build_outputs_can_reuse_from_stream_cache_base_directory(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [
-                {"hadm_id": 101, "note_text": "note-101"},
-                {"hadm_id": 102, "note_text": "note-102"},
-            ]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
-                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [
-                {"hadm_id": 101, "education topic: medications": 1},
-                {"hadm_id": 102, "education topic: medications": 0},
-            ]
-        )
-        code_status_targets = pd.DataFrame(
-            [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-            ]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
-                {
-                    "hadm_id": hadm_id,
-                    "noncompliance_score_z": 0.0,
-                    "autopsy_score_z": 0.0,
-                    "negative_sentiment_score_z": 0.0,
-                }
-                for hadm_id in [101, 102]
-            ]
-        )
-        captured = {}
-
-        class _FakeModel:
-            def __init__(self, repetitions):
-                self.repetitions = repetitions
-
-            def build_mistrust_scores(self, **kwargs):
-                del kwargs
-                return mistrust_scores
-
-            def run(self, **kwargs):
-                del kwargs
-                return {
-                    "downstream_auc_results": pd.DataFrame(
-                        [
-                            {
-                                "task": "Left AMA",
-                                "configuration": "Baseline",
-                                "target_column": "left_ama",
-                                "n_rows": 2,
-                                "n_features": 7,
-                                "n_repeats": 1,
-                                "n_valid_auc": 1,
-                                "auc_mean": 0.7,
-                                "auc_std": 0.0,
-                            }
-                        ]
-                    ),
-                    "feature_weight_summaries": {},
-                }
-
-        with _workspace_tempdir() as temp_dir:
-            stream_cache_base = Path(temp_dir) / "EOL_Workspace"
-            cache_dir = stream_cache_base / "paper_like"
-            cache_dir.mkdir(parents=True, exist_ok=True)
-            note_corpus.to_csv(cache_dir / "note_corpus.csv", index=False)
-            note_labels.to_csv(cache_dir / "note_labels.csv", index=False)
-            feature_matrix.to_csv(cache_dir / "chartevent_feature_matrix.csv", index=False)
-            code_status_targets.to_csv(cache_dir / "code_status_targets.csv", index=False)
-
-            with patch.object(
-                example_module,
-                "load_eol_mistrust_tables",
-                return_value=(raw_tables, materialized_views),
-            ), patch.object(
-                example_module,
-                "build_note_corpus_from_csv",
-                side_effect=AssertionError("should reuse note_corpus from cache"),
-            ), patch.object(
-                example_module,
-                "build_note_labels_from_csv",
-                side_effect=AssertionError("should reuse note_labels from cache"),
-            ), patch.object(
-                example_module,
-                "build_chartevent_artifacts_from_csv",
-                side_effect=AssertionError("should reuse chartevent artifacts from cache"),
-            ), patch.object(
-                example_module,
-                "EOLMistrustModel",
-                _FakeModel,
-            ):
-                outputs = example_module.build_eol_mistrust_outputs(
-                    Path("ignored-root"),
-                    repetitions=1,
-                    reuse_intermediates=stream_cache_base,
-                    paper_like_dataset_prepare=True,
-                )
-
-        self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
-        self.assertEqual(
-            outputs["validation_summary"]["all_cohort_rows"],
-            len(outputs["all_cohort"]),
-        )
-
-    def test_example_build_paper_comparison_outputs_emits_expected_delta_tables(self):
-        example_module = _load_example_module()
-
-        eol_cohort = pd.DataFrame(
-            [
-                {
-                    "race": "BLACK",
-                    "insurance_group": "Public",
-                    "discharge_category": "Deceased",
-                    "gender": "F",
-                    "los_days": 10.0,
-                    "age": 72.0,
-                },
-                {
-                    "race": "WHITE",
-                    "insurance_group": "Private",
-                    "discharge_category": "Hospice",
-                    "gender": "M",
-                    "los_days": 12.0,
-                    "age": 78.0,
-                },
-            ]
-        )
-        feature_weight_summaries = {
-            "noncompliance": pd.DataFrame(
-                [
-                    {"feature": "Education Readiness: No", "weight": 0.4},
-                    {"feature": "Riker-SAS Scale: Agitated", "weight": 0.3},
-                    {"feature": "Richmond-RAS Scale: 0 Alert and calm", "weight": -0.2},
-                ]
-            )
-        }
-        acuity_correlations = pd.DataFrame(
-            [
-                {
-                    "feature_a": "oasis",
-                    "feature_b": "sapsii",
-                    "correlation": 0.70,
-                }
-            ]
-        )
-        downstream_auc_results = pd.DataFrame(
-            [
-                {
-                    "task": "Left AMA",
-                    "configuration": "Baseline",
-                    "target_column": "left_ama",
-                    "n_rows": 48071,
-                    "n_features": 7,
-                    "n_repeats": 100,
-                    "n_valid_auc": 100,
-                    "auc_mean": 0.860,
-                    "auc_std": 0.014,
-                }
-            ]
-        )
-        downstream_weight_results = pd.DataFrame(
-            [
-                {
-                    "task": "Left AMA",
-                    "configuration": "Baseline + ALL",
-                    "target_column": "left_ama",
-                    "feature": "noncompliance_score_z",
-                    "n_repeats": 100,
-                    "n_valid_weights": 100,
-                    "weight_mean": 0.50,
-                    "weight_std": 0.08,
-                }
-            ]
-        )
-
-        outputs = example_module.build_paper_comparison_outputs(
-            {
-                "eol_cohort": eol_cohort,
-                "feature_weight_summaries": feature_weight_summaries,
-                "acuity_correlations": acuity_correlations,
-                "downstream_auc_results": downstream_auc_results,
-                "downstream_weight_results": downstream_weight_results,
-            },
-            repetitions=100,
-        )
-
-        self.assertIn("table1_comparison", outputs)
-        self.assertIn("table3_snapshot", outputs)
-        self.assertIn("table4_comparison", outputs)
-        self.assertIn("table5_comparison", outputs)
-        self.assertIn("table6_comparison", outputs)
-        table5 = outputs["table5_comparison"]
-        self.assertEqual(len(table5), 1)
-        self.assertAlmostEqual(table5.iloc[0]["delta_auc_mean"], 0.001)
-        table6 = outputs["table6_comparison"]
-        self.assertEqual(len(table6), 1)
-        self.assertAlmostEqual(table6.iloc[0]["delta_weight_mean"], -0.02)
-        # Paper Table 6 reports 1.96*std (95% CI half-width); run_weight_std must match
-        # run raw std = 0.08, so run_weight_std should be 0.08 * 1.96 = 0.1568
-        self.assertAlmostEqual(table6.iloc[0]["run_weight_std"], 0.08 * 1.96, places=4)
-        self.assertFalse(outputs["table3_snapshot"].empty)
-
-    def test_build_paper_comparison_outputs_omits_autopsy_rows_when_disabled_in_validation(self):
-        example_module = _load_example_module()
-
-        outputs = example_module.build_paper_comparison_outputs(
-            {
-                "validation_summary": {"autopsy_proxy_enabled": False},
-                "downstream_weight_results": pd.DataFrame(
-                    [
-                        {
-                            "task": "Left AMA",
-                            "configuration": "Baseline + ALL",
-                            "target_column": "left_ama",
-                            "feature": "autopsy_score_z",
-                            "n_repeats": 100,
-                            "n_valid_weights": 100,
-                            "weight_mean": 0.0,
-                            "weight_std": 0.0,
-                        },
-                        {
-                            "task": "Left AMA",
-                            "configuration": "Baseline + ALL",
-                            "target_column": "left_ama",
-                            "feature": "noncompliance_score_z",
-                            "n_repeats": 100,
-                            "n_valid_weights": 100,
-                            "weight_mean": 0.5,
-                            "weight_std": 0.08,
-                        },
-                    ]
-                ),
-            },
-            repetitions=100,
-        )
-
-        self.assertEqual(
-            outputs["table6_comparison"]["feature"].tolist(),
-            ["noncompliant"],
-        )
-
-    def test_build_paper_table1_comparison_reports_median_and_iqr_for_continuous_metrics(self):
-        example_module = _load_example_module()
-        eol_cohort = pd.DataFrame(
-            [
-                {"hadm_id": 1, "race": "BLACK", "los_days": 1.0, "age": 10.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "F"},
-                {"hadm_id": 2, "race": "BLACK", "los_days": 2.0, "age": 20.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "M"},
-                {"hadm_id": 3, "race": "BLACK", "los_days": 3.0, "age": 30.0, "insurance_group": "Private", "discharge_category": "Hospice", "gender": "F"},
-                {"hadm_id": 4, "race": "BLACK", "los_days": 4.0, "age": 40.0, "insurance_group": "Self-Pay", "discharge_category": "Skilled Nursing Facility", "gender": "M"},
-                {"hadm_id": 5, "race": "WHITE", "los_days": 10.0, "age": 50.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "F"},
-                {"hadm_id": 6, "race": "WHITE", "los_days": 20.0, "age": 60.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "M"},
-                {"hadm_id": 7, "race": "WHITE", "los_days": 30.0, "age": 70.0, "insurance_group": "Private", "discharge_category": "Hospice", "gender": "F"},
-                {"hadm_id": 8, "race": "WHITE", "los_days": 40.0, "age": 80.0, "insurance_group": "Self-Pay", "discharge_category": "Skilled Nursing Facility", "gender": "M"},
-            ]
-        )
-
-        table1 = example_module.build_paper_table1_comparison(eol_cohort)
-        los_black = table1[(table1["metric"] == "Length of stay (median days)") & (table1["race"] == "BLACK")].iloc[0]
-        age_white = table1[(table1["metric"] == "Age (median years)") & (table1["race"] == "WHITE")].iloc[0]
-
-        self.assertEqual(los_black["summary_stat"], "median_iqr")
-        self.assertAlmostEqual(float(los_black["run_numeric"]), 2.5)
-        self.assertAlmostEqual(float(los_black["run_interval_lower"]), 1.75)
-        self.assertAlmostEqual(float(los_black["run_interval_upper"]), 3.25)
-        self.assertIn("[", str(los_black["paper_value"]))
-        self.assertIn("[", str(los_black["run_value"]))
-
-        self.assertEqual(age_white["summary_stat"], "median_iqr")
-        self.assertAlmostEqual(float(age_white["run_numeric"]), 65.0)
-        self.assertAlmostEqual(float(age_white["run_interval_lower"]), 57.5)
-        self.assertAlmostEqual(float(age_white["run_interval_upper"]), 72.5)
-
-    def test_example_build_outputs_can_attach_paper_comparison_and_write_artifacts(self):
-        example_module = _load_example_module()
-
-        raw_tables = {
-            "admissions": self.admissions.copy(),
-            "patients": self.patients.copy(),
-            "icustays": self.icustays.copy(),
-            "d_items": self.d_items.copy(),
-        }
-        materialized_views = {
-            "ventdurations": self.ventdurations.copy(),
-            "vasopressordurations": self.vasopressordurations.copy(),
-            "oasis": self.oasis.copy(),
-            "sapsii": self.sapsii.copy(),
-        }
-        note_corpus = pd.DataFrame(
-            [{"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"} for hadm_id in range(101, 107)]
-        )
-        note_labels = pd.DataFrame(
-            [
-                {
-                    "hadm_id": hadm_id,
-                    "noncompliance_label": int(hadm_id % 2 == 0),
-                    "autopsy_label": int(hadm_id % 3 == 0),
-                }
-                for hadm_id in range(101, 107)
-            ]
-        )
-        feature_matrix = pd.DataFrame(
-            [
-                {
-                    "hadm_id": hadm_id,
-                    "Education Readiness: No": int(hadm_id % 2 == 0),
-                    "Pain Level: 7-Mod to Severe": int(hadm_id % 2 == 1),
-                }
-                for hadm_id in range(101, 107)
-            ]
-        )
-        code_status_targets = pd.DataFrame(
-            [
-                {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 104, "code_status_dnr_dni_cmo": 1},
-                {"hadm_id": 105, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 106, "code_status_dnr_dni_cmo": 0},
-            ]
-        )
-        mistrust_scores = pd.DataFrame(
-            [
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 0.5,
+                    "negative_sentiment_score_z": 0.1,
+                },
                 {
-                    "hadm_id": hadm_id,
-                    "noncompliance_score_z": 0.0,
-                    "autopsy_score_z": 0.0,
-                    "negative_sentiment_score_z": 0.0,
-                }
-                for hadm_id in range(101, 107)
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": -0.5,
+                    "negative_sentiment_score_z": -0.1,
+                },
             ]
         )
-        comparison_outputs = {
-            "summary": {"table5_max_abs_delta": 0.123},
-            "table5_comparison": pd.DataFrame([{"task": "Left AMA"}]),
-        }
+        captured = {}
+        factory_kwargs = []
 
         class _FakeModel:
             def __init__(self, repetitions):
@@ -3542,7 +2637,16 @@ def build_mistrust_scores(self, **kwargs):
                 return mistrust_scores
 
             def run(self, **kwargs):
-                del kwargs
+                captured["score_columns"] = list(kwargs.get("score_columns") or [])
+                captured["feature_configurations"] = kwargs.get("feature_configurations")
+                resolver = kwargs.get("downstream_estimator_factory_resolver")
+                captured["downstream_estimator_factory_resolver"] = resolver
+                if callable(resolver):
+                    captured["resolver_returns"] = [
+                        callable(resolver("Left AMA", "Baseline")),
+                        callable(resolver("Code Status", "Baseline")),
+                        callable(resolver("In-hospital mortality", "Baseline")),
+                    ]
                 return {
                     "downstream_auc_results": pd.DataFrame(
                         [
@@ -3550,10 +2654,10 @@ def run(self, **kwargs):
                                 "task": "Left AMA",
                                 "configuration": "Baseline",
                                 "target_column": "left_ama",
-                                "n_rows": 6,
+                                "n_rows": 2,
                                 "n_features": 7,
-                                "n_repeats": 2,
-                                "n_valid_auc": 2,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
                                 "auc_mean": 0.7,
                                 "auc_std": 0.0,
                             }
@@ -3562,10 +2666,14 @@ def run(self, **kwargs):
                     "feature_weight_summaries": {},
                 }
 
-        with _workspace_tempdir() as temp_dir, patch.object(
+        with patch.object(
             example_module,
             "load_eol_mistrust_tables",
             return_value=(raw_tables, materialized_views),
+        ), patch.object(
+            example_module,
+            "build_logistic_cv_estimator_factory",
+            side_effect=lambda **kwargs: factory_kwargs.append(dict(kwargs)) or (lambda: kwargs),
         ), patch.object(
             example_module,
             "build_note_corpus_from_csv",
@@ -3582,27 +2690,40 @@ def run(self, **kwargs):
             example_module,
             "EOLMistrustModel",
             _FakeModel,
-        ), patch.object(
-            example_module,
-            "build_paper_comparison_outputs",
-            return_value=comparison_outputs,
-        ) as comparison_builder, patch.object(
-            example_module,
-            "write_paper_comparison_artifacts",
-        ) as comparison_writer:
-            outputs = example_module.build_eol_mistrust_outputs(
+        ):
+            example_module.build_eol_mistrust_outputs(
                 Path("ignored-root"),
-                repetitions=2,
-                compare_to_paper=True,
-                output_dir=Path(temp_dir),
+                repetitions=1,
             )
 
-        self.assertEqual(outputs["paper_comparison"], comparison_outputs)
-        comparison_builder.assert_called_once()
-        comparison_writer.assert_called_once()
-        self.assertTrue(bool(comparison_writer.call_args.kwargs["include_summary"]))
+        self.assertEqual(
+            captured["score_columns"],
+            ["noncompliance_score_z", "negative_sentiment_score_z"],
+        )
+        self.assertEqual(
+            list(captured["feature_configurations"].keys()),
+            [
+                "Baseline",
+                "Baseline + Race",
+                "Baseline + Noncompliant",
+                "Baseline + Neg-Sentiment",
+                "Baseline + ALL",
+            ],
+        )
+        self.assertNotIn("Baseline + Autopsy", captured["feature_configurations"])
+        resolver = captured["downstream_estimator_factory_resolver"]
+        self.assertTrue(callable(resolver))
+        self.assertEqual(captured["resolver_returns"], [True, True, True])
+        self.assertEqual(
+            factory_kwargs,
+            [
+                {"Cs": [0.01, 0.03, 0.1, 0.3], "class_weight": "balanced", "scoring": "roc_auc"},
+                {"Cs": [0.01, 0.03, 0.1, 0.3], "class_weight": "balanced", "scoring": "roc_auc"},
+                {"Cs": [0.03, 0.1, 0.3, 1.0], "class_weight": "balanced", "scoring": "roc_auc"},
+            ],
+        )
 
-    def test_example_build_outputs_always_writes_paper_table_artifacts_when_compare_disabled(self):
+    def test_example_build_outputs_passes_paper_like_route_to_model_run(self):
         example_module = _load_example_module()
 
         raw_tables = {
@@ -3618,53 +2739,46 @@ def test_example_build_outputs_always_writes_paper_table_artifacts_when_compare_
             "sapsii": self.sapsii.copy(),
         }
         note_corpus = pd.DataFrame(
-            [{"hadm_id": hadm_id, "note_text": f"note-{hadm_id}"} for hadm_id in range(101, 107)]
+            [
+                {"hadm_id": 101, "note_text": "note-101"},
+                {"hadm_id": 102, "note_text": "note-102"},
+            ]
         )
         note_labels = pd.DataFrame(
             [
-                {
-                    "hadm_id": hadm_id,
-                    "noncompliance_label": int(hadm_id % 2 == 0),
-                    "autopsy_label": int(hadm_id % 3 == 0),
-                }
-                for hadm_id in range(101, 107)
+                {"hadm_id": 101, "noncompliance_label": 0, "autopsy_label": float("nan")},
+                {"hadm_id": 102, "noncompliance_label": 1, "autopsy_label": 1.0},
             ]
         )
         feature_matrix = pd.DataFrame(
             [
-                {
-                    "hadm_id": hadm_id,
-                    "Education Readiness: No": int(hadm_id % 2 == 0),
-                    "Pain Level: 7-Mod to Severe": int(hadm_id % 2 == 1),
-                }
-                for hadm_id in range(101, 107)
+                {"hadm_id": 101, "education topic: medications": 1},
+                {"hadm_id": 102, "education topic: medications": 0},
             ]
         )
         code_status_targets = pd.DataFrame(
             [
                 {"hadm_id": 101, "code_status_dnr_dni_cmo": 0},
                 {"hadm_id": 102, "code_status_dnr_dni_cmo": 1},
-                {"hadm_id": 103, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 104, "code_status_dnr_dni_cmo": 1},
-                {"hadm_id": 105, "code_status_dnr_dni_cmo": 0},
-                {"hadm_id": 106, "code_status_dnr_dni_cmo": 0},
             ]
         )
         mistrust_scores = pd.DataFrame(
             [
                 {
-                    "hadm_id": hadm_id,
-                    "noncompliance_score_z": 0.0,
-                    "autopsy_score_z": 0.0,
-                    "negative_sentiment_score_z": 0.0,
-                }
-                for hadm_id in range(101, 107)
+                    "hadm_id": 101,
+                    "noncompliance_score_z": -1.0,
+                    "autopsy_score_z": 0.5,
+                    "negative_sentiment_score_z": 0.1,
+                },
+                {
+                    "hadm_id": 102,
+                    "noncompliance_score_z": 1.0,
+                    "autopsy_score_z": -0.5,
+                    "negative_sentiment_score_z": -0.1,
+                },
             ]
         )
-        comparison_outputs = {
-            "summary": {"table5_max_abs_delta": 0.123},
-            "table5_comparison": pd.DataFrame([{"task": "Left AMA"}]),
-        }
+        captured = {}
 
         class _FakeModel:
             def __init__(self, repetitions):
@@ -3675,7 +2789,11 @@ def build_mistrust_scores(self, **kwargs):
                 return mistrust_scores
 
             def run(self, **kwargs):
-                del kwargs
+                captured["score_columns"] = kwargs.get("score_columns")
+                captured["feature_configurations"] = kwargs.get("feature_configurations")
+                captured["downstream_estimator_factory_resolver"] = kwargs.get(
+                    "downstream_estimator_factory_resolver"
+                )
                 return {
                     "downstream_auc_results": pd.DataFrame(
                         [
@@ -3683,10 +2801,10 @@ def run(self, **kwargs):
                                 "task": "Left AMA",
                                 "configuration": "Baseline",
                                 "target_column": "left_ama",
-                                "n_rows": 6,
+                                "n_rows": 2,
                                 "n_features": 7,
-                                "n_repeats": 2,
-                                "n_valid_auc": 2,
+                                "n_repeats": 1,
+                                "n_valid_auc": 1,
                                 "auc_mean": 0.7,
                                 "auc_std": 0.0,
                             }
@@ -3695,7 +2813,7 @@ def run(self, **kwargs):
                     "feature_weight_summaries": {},
                 }
 
-        with _workspace_tempdir() as temp_dir, patch.object(
+        with patch.object(
             example_module,
             "load_eol_mistrust_tables",
             return_value=(raw_tables, materialized_views),
@@ -3715,336 +2833,52 @@ def run(self, **kwargs):
             example_module,
             "EOLMistrustModel",
             _FakeModel,
-        ), patch.object(
-            example_module,
-            "build_paper_comparison_outputs",
-            return_value=comparison_outputs,
-        ) as comparison_builder, patch.object(
-            example_module,
-            "write_paper_comparison_artifacts",
-        ) as comparison_writer:
+        ):
             outputs = example_module.build_eol_mistrust_outputs(
                 Path("ignored-root"),
-                repetitions=2,
-                compare_to_paper=False,
-                output_dir=Path(temp_dir),
-            )
-
-        self.assertEqual(outputs["paper_comparison"], comparison_outputs)
-        comparison_builder.assert_called_once()
-        comparison_writer.assert_called_once()
-        self.assertFalse(bool(comparison_writer.call_args.kwargs["include_summary"]))
-
-    def test_write_paper_comparison_artifacts_writes_human_readable_summary_txt(self):
-        example_module = _load_example_module()
-
-        comparison_outputs = {
-            "summary": {
-                "table1_rows": 1,
-                "table2_rows": 1,
-                "table3_snapshot_rows": 1,
-                "table4_rows": 1,
-                "table5_rows": 1,
-                "table6_rows": 1,
-                "table4_max_abs_delta": 0.1,
-                "table5_max_abs_delta": 0.2,
-                "table6_max_abs_delta": 0.3,
-            },
-            "table1_comparison": pd.DataFrame(
-                [
-                    {
-                        "metric": "Population Size",
-                        "race": "BLACK",
-                        "paper_value": "1214",
-                        "run_value": "1215",
-                    }
-                ]
-            ),
-            "table2_comparison": pd.DataFrame(
-                [
-                    {
-                        "treatment": "total_vent_min",
-                        "paper_n_black": 510,
-                        "run_n_black": 587,
-                        "paper_n_white": 4810,
-                        "run_n_white": 5603,
-                        "paper_median_black": 3180.0,
-                        "run_median_black": 2700.0,
-                        "paper_median_white": 2520.0,
-                        "run_median_white": 2280.0,
-                    }
-                ]
-            ),
-            "table3_comparison": pd.DataFrame(
-                [
-                    {
-                        "proxy_model": "noncompliance",
-                        "direction": "positive",
-                        "rank": 1,
-                        "paper_feature": "riker-sas scale: agitated",
-                        "paper_weight": 0.7013,
-                        "run_weight": 0.6642,
-                        "run_feature_found": True,
-                    }
-                ]
-            ),
-            "table4_comparison": pd.DataFrame(
-                [
-                    {
-                        "feature_a": "oasis",
-                        "feature_b": "sapsii",
-                        "paper_correlation": 0.679,
-                        "run_correlation": 0.695,
-                    }
-                ]
-            ),
-            "table5_comparison": pd.DataFrame(
-                [
-                    {
-                        "task": "Left AMA",
-                        "configuration": "Baseline",
-                        "paper_auc_mean": 0.859,
-                        "run_auc_mean": 0.870,
-                        "paper_n_rows": 48071,
-                        "run_n_rows": 48289,
-                    }
-                ]
-            ),
-            "table6_comparison": pd.DataFrame(
-                [
-                    {
-                        "task": "Left AMA",
-                        "feature": "age",
-                        "paper_weight_mean": -2.10,
-                        "run_weight_mean": -0.78,
-                    }
-                ]
-            ),
-        }
-
-        with _workspace_tempdir() as temp_dir:
-            output_dir = Path(temp_dir) / "paper_comparison"
-            example_module.write_paper_comparison_artifacts(
-                comparison_outputs,
-                output_dir=output_dir,
+                repetitions=1,
+                paper_like_dataset_prepare=True,
             )
 
-            summary_text = (output_dir / "paper_comparison_summary.txt").read_text()
-
-        self.assertIn("Paper comparison summary:", summary_text)
-        self.assertIn("Table 1 vs Paper:", summary_text)
-        self.assertIn("Population Size | BLACK | paper=1214 | run=1215", summary_text)
-        self.assertIn("Table 5 vs Paper:", summary_text)
-        self.assertIn("Left AMA | Baseline | n 48071->48289 | auc 0.859->0.870", summary_text)
+        self.assertIsNone(captured["score_columns"])
+        self.assertIsNone(captured["feature_configurations"])
+        self.assertIsNone(captured["downstream_estimator_factory_resolver"])
+        self.assertEqual(outputs["validation_summary"]["dataset_prepare_mode"], "paper_like")
+        self.assertTrue(bool(outputs["validation_summary"]["autopsy_proxy_enabled"]))
 
-    def test_write_paper_comparison_artifacts_can_skip_human_readable_summary_txt(self):
+    def test_build_run_table1_summary_reports_median_and_iqr_for_continuous_metrics(self):
         example_module = _load_example_module()
+        eol_cohort = pd.DataFrame(
+            [
+                {"hadm_id": 1, "race": "BLACK", "los_days": 1.0, "age": 10.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "F"},
+                {"hadm_id": 2, "race": "BLACK", "los_days": 2.0, "age": 20.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "M"},
+                {"hadm_id": 3, "race": "BLACK", "los_days": 3.0, "age": 30.0, "insurance_group": "Private", "discharge_category": "Hospice", "gender": "F"},
+                {"hadm_id": 4, "race": "BLACK", "los_days": 4.0, "age": 40.0, "insurance_group": "Self-Pay", "discharge_category": "Skilled Nursing Facility", "gender": "M"},
+                {"hadm_id": 5, "race": "WHITE", "los_days": 10.0, "age": 50.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "F"},
+                {"hadm_id": 6, "race": "WHITE", "los_days": 20.0, "age": 60.0, "insurance_group": "Public", "discharge_category": "Deceased", "gender": "M"},
+                {"hadm_id": 7, "race": "WHITE", "los_days": 30.0, "age": 70.0, "insurance_group": "Private", "discharge_category": "Hospice", "gender": "F"},
+                {"hadm_id": 8, "race": "WHITE", "los_days": 40.0, "age": 80.0, "insurance_group": "Self-Pay", "discharge_category": "Skilled Nursing Facility", "gender": "M"},
+            ]
+        )
 
-        comparison_outputs = {
-            "summary": {
-                "table1_rows": 1,
-            },
-            "table1_comparison": pd.DataFrame(
-                [
-                    {
-                        "metric": "Population Size",
-                        "race": "BLACK",
-                        "paper_value": "1214",
-                        "run_value": "1215",
-                    }
-                ]
-            ),
-        }
-
-        with _workspace_tempdir() as temp_dir:
-            output_dir = Path(temp_dir) / "paper_comparison"
-            example_module.write_paper_comparison_artifacts(
-                comparison_outputs,
-                output_dir=output_dir,
-                include_summary=False,
-            )
-
-            self.assertTrue((output_dir / "table1_comparison.csv").exists())
-            self.assertTrue((output_dir / "summary.json").exists())
-            self.assertFalse((output_dir / "paper_comparison_summary.txt").exists())
-
-    def test_main_prints_full_paper_table_summary_with_paper_and_run_values(self):
-        example_module = _load_example_module()
+        table1 = example_module._build_run_table1_summary(eol_cohort)
+        los_black = table1[(table1["metric"] == "Length of stay (median days)") & (table1["race"] == "BLACK")].iloc[0]
+        age_white = table1[(table1["metric"] == "Age (median years)") & (table1["race"] == "WHITE")].iloc[0]
 
-        comparison_outputs = {
-            "summary": {
-                "table1_rows": 2,
-                "table2_rows": 1,
-                "table3_snapshot_rows": 1,
-                "table4_rows": 1,
-                "table5_rows": 1,
-                "table6_rows": 1,
-                "table4_max_abs_delta": 0.1,
-                "table5_max_abs_delta": 0.2,
-                "table6_max_abs_delta": 0.3,
-            },
-            "table1_comparison": pd.DataFrame(
-                [
-                    {
-                        "metric": "Population Size",
-                        "race": "BLACK",
-                        "paper_value": "1214",
-                        "run_value": "1215",
-                    }
-                ]
-            ),
-            "table2_comparison": pd.DataFrame(
-                [
-                    {
-                        "treatment": "total_vent_min",
-                        "paper_n_black": 510,
-                        "run_n_black": 587,
-                        "paper_n_white": 4810,
-                        "run_n_white": 5603,
-                        "paper_median_black": 3180.0,
-                        "run_median_black": 2700.0,
-                        "paper_median_white": 2520.0,
-                        "run_median_white": 2280.0,
-                    }
-                ]
-            ),
-            "table3_comparison": pd.DataFrame(
-                [
-                    {
-                        "proxy_model": "noncompliance",
-                        "direction": "positive",
-                        "rank": 1,
-                        "paper_feature": "riker-sas scale: agitated",
-                        "paper_weight": 0.7013,
-                        "run_weight": 0.6642,
-                        "run_feature_found": True,
-                    }
-                ]
-            ),
-            "table4_comparison": pd.DataFrame(
-                [
-                    {
-                        "feature_a": "oasis",
-                        "feature_b": "sapsii",
-                        "paper_correlation": 0.679,
-                        "run_correlation": 0.695,
-                    }
-                ]
-            ),
-            "table5_comparison": pd.DataFrame(
-                [
-                    {
-                        "task": "Left AMA",
-                        "configuration": "Baseline",
-                        "paper_auc_mean": 0.859,
-                        "run_auc_mean": 0.870,
-                        "paper_n_rows": 48071,
-                        "run_n_rows": 48289,
-                    }
-                ]
-            ),
-            "table6_comparison": pd.DataFrame(
-                [
-                    {
-                        "task": "Left AMA",
-                        "feature": "age",
-                        "paper_weight_mean": -2.10,
-                        "run_weight_mean": -0.78,
-                    }
-                ]
-            ),
-        }
-        artifacts = {
-            "validation_summary": {
-                "database_flavor": "postgresql",
-                "schema_name": "mimiciii",
-            },
-            "base_admissions": pd.DataFrame(columns=["hadm_id"]),
-            "all_cohort": pd.DataFrame(columns=["hadm_id"]),
-            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
-            "chartevent_feature_matrix": pd.DataFrame(columns=["hadm_id"]),
-            "note_labels": pd.DataFrame(columns=["hadm_id"]),
-            "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
-            "final_model_table": pd.DataFrame(columns=["hadm_id"]),
-            "paper_comparison": comparison_outputs,
-        }
+        self.assertEqual(los_black["summary_stat"], "median_iqr")
+        self.assertAlmostEqual(float(los_black["run_numeric"]), 2.5)
+        self.assertAlmostEqual(float(los_black["run_interval_lower"]), 1.75)
+        self.assertAlmostEqual(float(los_black["run_interval_upper"]), 3.25)
+        self.assertIn("[", str(los_black["run_value"]))
 
-        args = type(
-            "Args",
-            (),
-            {
-                "root": Path("ignored-root"),
-                "config_path": Path("ignored-config"),
-                "output_dir": Path("out"),
-                "stream_cache_dir": None,
-                "repetitions": 1,
-                "include_downstream_weight_summary": False,
-                "include_cdf_plot_data": False,
-                "compare_to_paper": True,
-                "task_demo": False,
-                "note_chunksize": 100_000,
-                "chartevent_chunksize": 500_000,
-                "reuse_intermediates": None,
-                "paper_like_dataset_prepare": False,
-            },
-        )()
+        self.assertEqual(age_white["summary_stat"], "median_iqr")
+        self.assertAlmostEqual(float(age_white["run_numeric"]), 65.0)
+        self.assertAlmostEqual(float(age_white["run_interval_lower"]), 57.5)
+        self.assertAlmostEqual(float(age_white["run_interval_upper"]), 72.5)
 
-        stdout = io.StringIO()
-        with patch.object(
-            example_module,
-            "parse_args",
-            return_value=args,
-        ), patch.object(
-            example_module,
-            "build_eol_mistrust_outputs",
-            return_value=artifacts,
-        ), patch(
-            "sys.stdout",
-            stdout,
-        ):
-            example_module.main()
-
-        output = stdout.getvalue()
-        self.assertIn("Paper comparison summary:", output)
-        self.assertIn("Table 1 vs Paper:", output)
-        self.assertIn("Population Size | BLACK | paper=1214 | run=1215", output)
-        self.assertIn("Table 2 vs Paper:", output)
-        self.assertIn("total_vent_min | black n 510->587", output)
-        self.assertIn("Table 3 vs Paper:", output)
-        self.assertIn("noncompliance | positive #1 | riker-sas scale: agitated", output)
-        self.assertIn("Table 4 vs Paper:", output)
-        self.assertIn("oasis vs sapsii | paper=0.679 | run=0.695", output)
-        self.assertIn("Table 5 vs Paper:", output)
-        self.assertIn("Left AMA | Baseline | n 48071->48289 | auc 0.859->0.870", output)
-        self.assertIn("Table 6 vs Paper:", output)
-        self.assertIn("Left AMA | age | paper=-2.100 | run=-0.780", output)
-
-    def test_main_writes_managed_normal_run_archive_with_default_output_and_cache_dirs(self):
+    def test_main_writes_managed_normal_run_archive_with_default_output_dir(self):
         example_module = _load_example_module()
 
-        comparison_outputs = {
-            "summary": {
-                "table1_rows": 1,
-                "table2_rows": 1,
-                "table3_snapshot_rows": 1,
-                "table4_rows": 1,
-                "table5_rows": 1,
-                "table6_rows": 1,
-                "table4_max_abs_delta": 0.1,
-                "table5_max_abs_delta": 0.2,
-                "table6_max_abs_delta": 0.3,
-            },
-            "table1_comparison": pd.DataFrame(
-                [
-                    {
-                        "metric": "Population Size",
-                        "race": "BLACK",
-                        "paper_value": "1214",
-                        "run_value": "1215",
-                    }
-                ]
-            ),
-        }
         artifacts = {
             "validation_summary": {
                 "database_flavor": "postgresql",
@@ -4059,7 +2893,6 @@ def test_main_writes_managed_normal_run_archive_with_default_output_and_cache_di
             "note_labels": pd.DataFrame(columns=["hadm_id"]),
             "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
             "final_model_table": pd.DataFrame(columns=["hadm_id"]),
-            "paper_comparison": comparison_outputs,
         }
 
         with _workspace_tempdir() as temp_dir:
@@ -4071,16 +2904,13 @@ def test_main_writes_managed_normal_run_archive_with_default_output_and_cache_di
                     "root": Path("ignored-root"),
                     "config_path": Path("ignored-config"),
                     "output_dir": None,
-                    "stream_cache_dir": None,
                     "result_root": result_root,
                     "repetitions": 1,
                     "include_downstream_weight_summary": False,
                     "include_cdf_plot_data": False,
-                    "compare_to_paper": True,
                     "task_demo": False,
                     "note_chunksize": 100_000,
                     "chartevent_chunksize": 500_000,
-                    "reuse_intermediates": None,
                     "paper_like_dataset_prepare": False,
                 },
             )()
@@ -4106,46 +2936,23 @@ def test_main_writes_managed_normal_run_archive_with_default_output_and_cache_di
 
             run_dir = result_root / "EOL_normal_20260410_153045"
             expected_output_dir = run_dir / "result"
-            expected_cache_dir = run_dir / "cache"
 
             build_outputs.assert_called_once()
             self.assertEqual(build_outputs.call_args.kwargs["output_dir"], expected_output_dir)
-            self.assertEqual(
-                build_outputs.call_args.kwargs["stream_cache_dir"],
-                expected_cache_dir,
-            )
 
             run_summary = (run_dir / "RUN_SUMMARY.txt").read_text(encoding="utf-8")
-            run_time = (run_dir / "RUN_TIME.txt").read_text(encoding="utf-8")
-            paper_summary = (run_dir / "paper_comparison_summary.txt").read_text(
-                encoding="utf-8"
-            )
 
             self.assertIn("managed_run_name: EOL_normal_20260410_153045", run_summary)
             self.assertIn(f"result_dir: {expected_output_dir}", run_summary)
-            self.assertIn(f"stream_cache_base_dir: {expected_cache_dir}", run_summary)
             self.assertIn("route_mode: default", run_summary)
-            self.assertIn("paper_comparison_summary_file:", run_summary)
-            self.assertNotIn("Paper comparison summary:", run_summary)
-            self.assertIn("Population Size | BLACK | paper=1214 | run=1215", paper_summary)
-            self.assertIn("total_runtime_seconds:", run_time)
+            self.assertIn("total_runtime_seconds:", run_summary)
+            self.assertNotIn("paper_comparison_summary_file", run_summary)
+            self.assertFalse((run_dir / "paper_comparison_summary.txt").exists())
+            self.assertFalse((run_dir / "RUN_TIME.txt").exists())
 
     def test_main_writes_managed_paperlike_run_archive_name(self):
         example_module = _load_example_module()
 
-        comparison_outputs = {
-            "summary": {"table1_rows": 1},
-            "table1_comparison": pd.DataFrame(
-                [
-                    {
-                        "metric": "Population Size",
-                        "race": "BLACK",
-                        "paper_value": "1214",
-                        "run_value": "1215",
-                    }
-                ]
-            ),
-        }
         artifacts = {
             "validation_summary": {
                 "database_flavor": "postgresql",
@@ -4160,7 +2967,6 @@ def test_main_writes_managed_paperlike_run_archive_name(self):
             "note_labels": pd.DataFrame(columns=["hadm_id"]),
             "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
             "final_model_table": pd.DataFrame(columns=["hadm_id"]),
-            "paper_comparison": comparison_outputs,
         }
 
         with _workspace_tempdir() as temp_dir:
@@ -4172,16 +2978,13 @@ def test_main_writes_managed_paperlike_run_archive_name(self):
                     "root": Path("ignored-root"),
                     "config_path": Path("ignored-config"),
                     "output_dir": None,
-                    "stream_cache_dir": None,
                     "result_root": result_root,
                     "repetitions": 1,
                     "include_downstream_weight_summary": False,
                     "include_cdf_plot_data": False,
-                    "compare_to_paper": False,
                     "task_demo": False,
                     "note_chunksize": 100_000,
                     "chartevent_chunksize": 500_000,
-                    "reuse_intermediates": None,
                     "paper_like_dataset_prepare": True,
                 },
             )()
@@ -4206,9 +3009,162 @@ def test_main_writes_managed_paperlike_run_archive_name(self):
             run_summary = (run_dir / "RUN_SUMMARY.txt").read_text(encoding="utf-8")
             self.assertIn("managed_run_name: EOL_Paperlike_20260410_153046", run_summary)
             self.assertIn("route_mode: paper_like", run_summary)
-            self.assertIn("paper_comparison_summary_file: disabled", run_summary)
+            self.assertIn("total_runtime_seconds:", run_summary)
+            self.assertNotIn("paper_comparison_summary_file", run_summary)
             self.assertTrue((run_dir / "run_table_summary.txt").exists())
             self.assertFalse((run_dir / "paper_comparison_summary.txt").exists())
+            self.assertFalse((run_dir / "RUN_TIME.txt").exists())
+
+    def test_main_runs_normal_vs_paperlike_ablation_study(self):
+        example_module = _load_example_module()
+
+        normal_artifacts = {
+            "validation_summary": {
+                "database_flavor": "postgresql",
+                "schema_name": "mimiciii",
+                "dataset_prepare_mode": "default",
+                "autopsy_proxy_enabled": False,
+            },
+            "base_admissions": pd.DataFrame(columns=["hadm_id"]),
+            "all_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "chartevent_feature_matrix": pd.DataFrame(columns=["hadm_id"]),
+            "note_labels": pd.DataFrame(columns=["hadm_id"]),
+            "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
+            "final_model_table": pd.DataFrame(columns=["hadm_id"]),
+            "downstream_auc_results": pd.DataFrame(
+                [
+                    {
+                        "task": "In-hospital mortality",
+                        "configuration": "Baseline + ALL",
+                        "n_rows": 48289,
+                        "auc_mean": 0.648,
+                        "auc_std": 0.012,
+                    }
+                ]
+            ),
+            "downstream_weight_results": pd.DataFrame(
+                [
+                    {
+                        "task": "In-hospital mortality",
+                        "configuration": "Baseline + ALL",
+                        "feature": "negative_sentiment_score_z",
+                        "weight_mean": 0.090,
+                        "weight_std": 0.000,
+                    }
+                ]
+            ),
+        }
+        paperlike_artifacts = {
+            "validation_summary": {
+                "database_flavor": "postgresql",
+                "schema_name": "mimiciii",
+                "dataset_prepare_mode": "paper_like",
+                "autopsy_proxy_enabled": True,
+            },
+            "base_admissions": pd.DataFrame(columns=["hadm_id"]),
+            "all_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "chartevent_feature_matrix": pd.DataFrame(columns=["hadm_id"]),
+            "note_labels": pd.DataFrame(columns=["hadm_id"]),
+            "mistrust_scores": pd.DataFrame(columns=["hadm_id"]),
+            "final_model_table": pd.DataFrame(columns=["hadm_id"]),
+            "downstream_auc_results": pd.DataFrame(
+                [
+                    {
+                        "task": "In-hospital mortality",
+                        "configuration": "Baseline + ALL",
+                        "n_rows": 48289,
+                        "auc_mean": 0.635,
+                        "auc_std": 0.010,
+                    }
+                ]
+            ),
+            "downstream_weight_results": pd.DataFrame(
+                [
+                    {
+                        "task": "In-hospital mortality",
+                        "configuration": "Baseline + ALL",
+                        "feature": "autopsy_score_z",
+                        "weight_mean": 0.020,
+                        "weight_std": 0.000,
+                    }
+                ]
+            ),
+        }
+
+        with _workspace_tempdir() as temp_dir:
+            result_root = Path(temp_dir) / "EOL_Result"
+            args = type(
+                "Args",
+                (),
+                {
+                    "root": Path("ignored-root"),
+                    "config_path": Path("ignored-config"),
+                    "output_dir": None,
+                    "result_root": result_root,
+                    "repetitions": 1,
+                    "task_demo": False,
+                    "task_demo_train_eval": False,
+                    "paper_like_dataset_prepare": False,
+                    "ablation_study": True,
+                },
+            )()
+
+            stdout = io.StringIO()
+            with patch.object(
+                example_module,
+                "parse_args",
+                return_value=args,
+            ), patch.object(
+                example_module,
+                "_current_run_timestamp",
+                return_value="20260411_120000",
+            ), patch.object(
+                example_module,
+                "build_eol_mistrust_outputs",
+                side_effect=[normal_artifacts, paperlike_artifacts],
+            ) as build_outputs, patch(
+                "sys.stdout",
+                stdout,
+            ):
+                example_module.main()
+
+            ablation_dir = (
+                result_root / "EOL_ablation_normal_vs_paperlike_20260411_120000"
+            )
+            normal_dir = ablation_dir / "normal"
+            paperlike_dir = ablation_dir / "paper_like"
+
+            self.assertEqual(build_outputs.call_count, 2)
+            self.assertFalse(
+                build_outputs.call_args_list[0].kwargs["paper_like_dataset_prepare"]
+            )
+            self.assertTrue(
+                build_outputs.call_args_list[1].kwargs["paper_like_dataset_prepare"]
+            )
+            self.assertEqual(
+                build_outputs.call_args_list[0].kwargs["output_dir"],
+                normal_dir / "result",
+            )
+            self.assertEqual(
+                build_outputs.call_args_list[1].kwargs["output_dir"],
+                paperlike_dir / "result",
+            )
+            self.assertTrue((normal_dir / "RUN_SUMMARY.txt").exists())
+            self.assertTrue((paperlike_dir / "RUN_SUMMARY.txt").exists())
+            self.assertTrue((normal_dir / "run_table_summary.txt").exists())
+            self.assertTrue((paperlike_dir / "run_table_summary.txt").exists())
+            ablation_summary = (ablation_dir / "ABLATION_SUMMARY.txt").read_text(
+                encoding="utf-8"
+            )
+            self.assertIn("Route Ablation Study", ablation_summary)
+            self.assertIn("Normal", ablation_summary)
+            self.assertIn("Paper-like", ablation_summary)
+            self.assertIn("autopsy_proxy_enabled: False", ablation_summary)
+            self.assertIn("autopsy_proxy_enabled: True", ablation_summary)
+            self.assertIn("auc_mean: 0.648", ablation_summary)
+            self.assertIn("auc_mean: 0.635", ablation_summary)
 
     def test_write_run_table_summary_artifacts_writes_run_only_table_summary_txt(self):
         example_module = _load_example_module()
@@ -4216,8 +3172,28 @@ def test_write_run_table_summary_artifacts_writes_run_only_table_summary_txt(sel
         artifacts = {
             "validation_summary": {
                 "autopsy_proxy_enabled": False,
+                "dataset_prepare_mode": "default",
             },
-            "eol_cohort": pd.DataFrame(columns=["hadm_id"]),
+            "eol_cohort": pd.DataFrame(
+                [
+                    {
+                        "race": "BLACK",
+                        "insurance_group": "Public",
+                        "discharge_category": "Deceased",
+                        "gender": "F",
+                        "los_days": 7.88,
+                        "age": 71.31,
+                    },
+                    {
+                        "race": "WHITE",
+                        "insurance_group": "Private",
+                        "discharge_category": "Skilled Nursing Facility",
+                        "gender": "M",
+                        "los_days": 7.77,
+                        "age": 77.85,
+                    },
+                ]
+            ),
             "race_treatment_results": pd.DataFrame(
                 [
                     {
@@ -4311,73 +3287,83 @@ def test_write_run_table_summary_artifacts_writes_run_only_table_summary_txt(sel
             summary_text = (run_dir / "run_table_summary.txt").read_text(encoding="utf-8")
 
         self.assertIn("Run Table Results", summary_text)
+        self.assertIn("Route: Normal", summary_text)
+        self.assertEqual(summary_text.count("- Population Size"), 1)
+        self.assertIn("  BLACK: 1", summary_text)
+        self.assertIn("  WHITE: 1", summary_text)
         self.assertIn("Table 2", summary_text)
         self.assertIn("BLACK: n=510, median=2782.5", summary_text)
+        self.assertIn("Table 4", summary_text)
+        self.assertIn("- oasis vs sapsii: 0.695", summary_text)
         self.assertIn("Table 5", summary_text)
         self.assertIn("Left AMA | Baseline", summary_text)
+        self.assertIn("Table 6", summary_text)
+        self.assertIn("age: mean=-0.782, std=0.200", summary_text)
         self.assertNotIn("paper=", summary_text)
+        self.assertNotIn("autopsy:", summary_text)
 
-    def test_build_paper_table3_comparison_matches_autopsy_alias_features(self):
+    def test_build_run_table3_summary_returns_top_positive_and_negative_weights(self):
         example_module = _load_example_module()
 
         feature_weight_summaries = {
-            "autopsy": {
+            "noncompliance": {
                 "all": pd.DataFrame(
                     [
                         {
-                            "feature": "restraints evaluated: restraintreapply",
-                            "weight": 0.1600,
+                            "feature": "riker-sas scale: agitated",
+                            "weight": 0.6648,
+                        },
+                        {
+                            "feature": "education readiness: no",
+                            "weight": 0.1665,
                         },
                         {
-                            "feature": "orientation: oriented x 3",
-                            "weight": 0.0360,
+                            "feature": "pain level: 7-mod to severe",
+                            "weight": 0.1243,
                         },
                         {
-                            "feature": "is the spokesperson the health care proxy: 1",
-                            "weight": -0.2200,
+                            "feature": "richmond-ras scale: 0 alert and calm",
+                            "weight": -0.3854,
                         },
                         {
-                            "feature": "family communication: family talked to md",
-                            "weight": -0.1200,
+                            "feature": "state: alert",
+                            "weight": -0.9000,
+                        },
+                        {
+                            "feature": "pain: none",
+                            "weight": -0.5000,
                         },
                     ]
                 )
             }
         }
 
-        comparison = example_module.build_paper_table3_comparison(feature_weight_summaries)
-
-        by_feature = comparison.set_index("paper_feature")
-
-        self.assertTrue(bool(by_feature.loc["reapplied restraints", "run_feature_found"]))
-        self.assertEqual(
-            by_feature.loc["reapplied restraints", "run_feature"],
-            "restraints evaluated: restraintreapply",
-        )
-        self.assertAlmostEqual(
-            float(by_feature.loc["reapplied restraints", "run_weight"]),
-            0.1600,
-            places=4,
-        )
+        summary = example_module._build_run_table3_summary(feature_weight_summaries)
 
-        self.assertTrue(bool(by_feature.loc["orientation: oriented 3x", "run_feature_found"]))
-        self.assertEqual(
-            by_feature.loc["orientation: oriented 3x", "run_feature"],
-            "orientation: oriented x 3",
-        )
+        positive = summary.loc[
+            (summary["proxy_model"] == "noncompliance")
+            & (summary["direction"] == "positive")
+        ].sort_values("rank")
+        negative = summary.loc[
+            (summary["proxy_model"] == "noncompliance")
+            & (summary["direction"] == "negative")
+        ].sort_values("rank")
 
-        self.assertTrue(bool(by_feature.loc["spokesperson is healthcare proxy", "run_feature_found"]))
         self.assertEqual(
-            by_feature.loc["spokesperson is healthcare proxy", "run_feature"],
-            "is the spokesperson the health care proxy: 1",
-        )
-
-        self.assertTrue(
-            bool(by_feature.loc["family communication: talked to m.d.", "run_feature_found"])
+            positive["feature"].tolist(),
+            [
+                "riker-sas scale: agitated",
+                "education readiness: no",
+                "pain level: 7-mod to severe",
+            ],
         )
         self.assertEqual(
-            by_feature.loc["family communication: talked to m.d.", "run_feature"],
-            "family communication: family talked to md",
+            negative["feature"].tolist(),
+            [
+                "state: alert",
+                "pain: none",
+                "richmond-ras scale: 0 alert and calm",
+            ],
         )
 
     def test_integration_minimal_boundary_scale_pipeline_runs_with_two_admissions(self):
diff --git a/tests/core/test_eol_mistrust_dataset.py b/tests/core/test_eol_mistrust_dataset.py
index a5c654b79..4214cf124 100644
--- a/tests/core/test_eol_mistrust_dataset.py
+++ b/tests/core/test_eol_mistrust_dataset.py
@@ -7,6 +7,7 @@
 from unittest.mock import patch
 
 import pandas as pd
+from pyhealth.datasets.base_dataset import BaseDataset
 
 def _load_model_build_mistrust_score_table():
     module_path = (
@@ -39,6 +40,23 @@ def _load_eol_mistrust_module():
     return module
 
 
+def _load_eol_mistrust_dataset_class_module():
+    module_path = (
+        Path(__file__).resolve().parents[2]
+        / "pyhealth"
+        / "datasets"
+        / "eol_mistrust_dataset.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_dataset_class_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
 @contextmanager
 def _workspace_tempdir():
     base = Path(__file__).resolve().parents[2] / ".tmp-test-dataset"
@@ -2864,7 +2882,6 @@ def test_build_final_model_table_baseline_only_columns_match_required_set(self):
         )
         expected_columns = {
             "hadm_id",
-            "subject_id",
             "age",
             "los_days",
             "gender_f",
@@ -2877,7 +2894,7 @@ def test_build_final_model_table_baseline_only_columns_match_required_set(self):
             "in_hospital_mortality",
         }
         self.assertEqual(set(final_table.columns), expected_columns)
-        self.assertEqual(len(final_table.columns), 12)
+        self.assertEqual(len(final_table.columns), 11)
 
     def test_build_final_model_table_from_code_status_targets_matches_raw_chartevents_path(self):
         build_base_admissions = self._get_callable("build_base_admissions")
@@ -3524,5 +3541,387 @@ def test_end_to_end_artifact_assembly_smoke_spec(self):
             self.assertEqual(len(list(output_dir.iterdir())), 9)
 
 
+class TestEOLMistrustDatasetClass(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.dataset_class_module = _load_eol_mistrust_dataset_class_module()
+        cls.task_module = importlib.util.module_from_spec(
+            spec := importlib.util.spec_from_file_location(
+                "pyhealth.tasks.eol_mistrust_dataset_class_tests",
+                Path(__file__).resolve().parents[2]
+                / "pyhealth"
+                / "tasks"
+                / "eol_mistrust.py",
+            )
+        )
+        assert spec is not None and spec.loader is not None
+        spec.loader.exec_module(cls.task_module)
+
+    def _write_minimal_root(self, root: Path) -> None:
+        (root / "mimiciii_clinical").mkdir(parents=True, exist_ok=True)
+        (root / "mimiciii_notes").mkdir(parents=True, exist_ok=True)
+        (root / "mimiciii_derived").mkdir(parents=True, exist_ok=True)
+
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "gender": "F",
+                    "dob": "2070-01-01 00:00:00",
+                    "dod": "",
+                    "dod_hosp": "",
+                    "dod_ssn": "",
+                    "expire_flag": 0,
+                },
+                {
+                    "subject_id": 2,
+                    "gender": "M",
+                    "dob": "2065-01-01 00:00:00",
+                    "dod": "",
+                    "dod_hosp": "",
+                    "dod_ssn": "",
+                    "expire_flag": 0,
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "patients.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-03 00:00:00",
+                    "deathtime": "",
+                    "admission_type": "EMERGENCY",
+                    "admission_location": "EMERGENCY ROOM ADMIT",
+                    "discharge_location": "HOME",
+                    "insurance": "Private",
+                    "language": "ENGLISH",
+                    "religion": "",
+                    "marital_status": "MARRIED",
+                    "ethnicity": "WHITE",
+                    "edregtime": "",
+                    "edouttime": "",
+                    "diagnosis": "Sepsis",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "admittime": "2100-02-01 00:00:00",
+                    "dischtime": "2100-02-04 12:00:00",
+                    "deathtime": "",
+                    "admission_type": "URGENT",
+                    "admission_location": "TRANSFER FROM HOSPITAL",
+                    "discharge_location": "HOME",
+                    "insurance": "Medicare",
+                    "language": "ENGLISH",
+                    "religion": "",
+                    "marital_status": "SINGLE",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "edregtime": "",
+                    "edouttime": "",
+                    "diagnosis": "Pneumonia",
+                    "hospital_expire_flag": 1,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "admissions.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "icustay_id": 111,
+                    "dbsource": "metavision",
+                    "first_careunit": "MICU",
+                    "last_careunit": "MICU",
+                    "intime": "2100-01-01 01:00:00",
+                    "outtime": "2100-01-02 12:00:00",
+                    "los": 1.5,
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "icustay_id": 222,
+                    "dbsource": "metavision",
+                    "first_careunit": "SICU",
+                    "last_careunit": "SICU",
+                    "intime": "2100-02-01 03:00:00",
+                    "outtime": "2100-02-03 12:00:00",
+                    "los": 2.3,
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "icustays.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "row_id": 1,
+                    "charttime": "",
+                    "chartdate": "2100-01-01",
+                    "text": "Family meeting note",
+                    "category": "Discharge summary",
+                    "description": "Report",
+                    "storetime": "2100-01-01 12:00:00",
+                    "iserror": "",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "row_id": 2,
+                    "charttime": "2100-02-01 08:00:00",
+                    "chartdate": "2100-02-01",
+                    "text": "Patient declining treatment",
+                    "category": "Nursing",
+                    "description": "Note",
+                    "storetime": "2100-02-01 09:00:00",
+                    "iserror": "",
+                },
+            ]
+        ).to_csv(root / "mimiciii_notes" / "noteevents.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "itemid": 128,
+                    "label": "Code Status",
+                    "abbreviation": "",
+                    "dbsource": "carevue",
+                    "linksto": "chartevents",
+                    "category": "",
+                    "unitname": "",
+                    "param_type": "",
+                    "conceptid": "",
+                }
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "d_items.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "icustay_id": 111,
+                    "itemid": 128,
+                    "charttime": "2100-01-01 08:00:00",
+                    "storetime": "2100-01-01 08:30:00",
+                    "cgid": 1,
+                    "value": "Full Code",
+                    "valuenum": "",
+                    "valueuom": "",
+                    "warning": "",
+                    "error": "",
+                    "resultstatus": "",
+                    "stopped": "",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "icustay_id": 222,
+                    "itemid": 128,
+                    "charttime": "2100-02-01 08:00:00",
+                    "storetime": "2100-02-01 08:30:00",
+                    "cgid": 2,
+                    "value": "DNR/DNI",
+                    "valuenum": "",
+                    "valueuom": "",
+                    "warning": "",
+                    "error": "",
+                    "resultstatus": "",
+                    "stopped": "",
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "chartevents.csv", index=False)
+
+    def _write_optional_context_tables(self, root: Path) -> None:
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "seq_num": 1,
+                    "icd9_code": "0389",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "seq_num": 1,
+                    "icd9_code": "486",
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "diagnoses_icd.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "seq_num": 1,
+                    "icd9_code": "3893",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "seq_num": 1,
+                    "icd9_code": "9671",
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "procedures_icd.csv", index=False)
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 11,
+                    "startdate": "2100-01-01 00:00:00",
+                    "enddate": "2100-01-02 00:00:00",
+                    "drug": "Aspirin",
+                    "drug_type": "MAIN",
+                    "drug_name_poe": "Aspirin",
+                    "drug_name_generic": "Aspirin",
+                    "formulary_drug_cd": "ASP",
+                    "gsn": "",
+                    "ndc": "",
+                    "prod_strength": "81 mg",
+                    "dose_val_rx": "81",
+                    "dose_unit_rx": "mg",
+                    "form_val_disp": "1",
+                    "form_unit_disp": "tab",
+                    "route": "PO",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 22,
+                    "startdate": "2100-02-01 00:00:00",
+                    "enddate": "2100-02-02 00:00:00",
+                    "drug": "Heparin",
+                    "drug_type": "MAIN",
+                    "drug_name_poe": "Heparin",
+                    "drug_name_generic": "Heparin",
+                    "formulary_drug_cd": "HEP",
+                    "gsn": "",
+                    "ndc": "",
+                    "prod_strength": "5000 unit",
+                    "dose_val_rx": "5000",
+                    "dose_unit_rx": "unit",
+                    "form_val_disp": "1",
+                    "form_unit_disp": "dose",
+                    "route": "IV",
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "prescriptions.csv", index=False)
+
+    def test_dataset_class_inherits_base_dataset_and_keeps_core_tables(self):
+        dataset_cls = self.dataset_class_module.EOLMistrustDataset
+
+        self.assertTrue(issubclass(dataset_cls, BaseDataset))
+
+        with _workspace_tempdir() as temp_dir:
+            root = Path(temp_dir)
+            self._write_minimal_root(root)
+            dataset = dataset_cls(
+                root=str(root),
+                tables=["noteevents"],
+                cache_dir=root / "cache",
+                num_workers=1,
+            )
+
+            self.assertIn("patients", dataset.tables)
+            self.assertIn("admissions", dataset.tables)
+            self.assertIn("icustays", dataset.tables)
+            self.assertIn("noteevents", dataset.tables)
+
+    def test_dataset_class_can_set_eol_task_on_minimal_synthetic_tables(self):
+        dataset_cls = self.dataset_class_module.EOLMistrustDataset
+        task = self.task_module.EOLMistrustMortalityPredictionMIMIC3(include_notes=True)
+
+        with _workspace_tempdir() as temp_dir:
+            root = Path(temp_dir)
+            self._write_minimal_root(root)
+            dataset = dataset_cls(
+                root=str(root),
+                tables=["noteevents"],
+                cache_dir=root / "cache",
+                num_workers=1,
+            )
+
+            sample_dataset = dataset.set_task(task, num_workers=1)
+            sample = sample_dataset[0]
+
+            self.assertIn("age", sample)
+            self.assertIn("los_days", sample)
+            self.assertIn("gender", sample)
+            self.assertIn("insurance", sample)
+            self.assertIn("race", sample)
+            self.assertIn("clinical_notes", sample)
+            self.assertIn("in_hospital_mortality", sample)
+
+    def test_dataset_class_defaults_include_available_optional_tables(self):
+        dataset_cls = self.dataset_class_module.EOLMistrustDataset
+
+        with _workspace_tempdir() as temp_dir:
+            root = Path(temp_dir)
+            self._write_minimal_root(root)
+            self._write_optional_context_tables(root)
+            dataset = dataset_cls(
+                root=str(root),
+                cache_dir=root / "cache",
+                num_workers=1,
+            )
+
+            self.assertIn("noteevents", dataset.tables)
+            self.assertIn("chartevents", dataset.tables)
+            self.assertIn("diagnoses_icd", dataset.tables)
+            self.assertIn("procedures_icd", dataset.tables)
+            self.assertIn("prescriptions", dataset.tables)
+
+    def test_dataset_class_tracks_dataset_prepare_mode_for_normal_and_paper_like(self):
+        dataset_cls = self.dataset_class_module.EOLMistrustDataset
+
+        with _workspace_tempdir() as temp_dir:
+            root = Path(temp_dir)
+            self._write_minimal_root(root)
+
+            normal_dataset = dataset_cls(
+                root=str(root),
+                tables=["noteevents"],
+                dataset_prepare_mode="default",
+                cache_dir=root / "cache_normal",
+                num_workers=1,
+            )
+            paper_like_dataset = dataset_cls(
+                root=str(root),
+                tables=["noteevents"],
+                dataset_prepare_mode="paper_like",
+                cache_dir=root / "cache_paper_like",
+                num_workers=1,
+            )
+
+            self.assertEqual(normal_dataset.dataset_prepare_mode, "default")
+            self.assertFalse(normal_dataset.paper_like_dataset_prepare)
+            self.assertEqual(normal_dataset.code_status_mode, "corrected")
+            self.assertEqual(normal_dataset.autopsy_label_mode, "corrected")
+
+            self.assertEqual(paper_like_dataset.dataset_prepare_mode, "paper_like")
+            self.assertTrue(paper_like_dataset.paper_like_dataset_prepare)
+            self.assertEqual(paper_like_dataset.code_status_mode, "paper_like")
+            self.assertEqual(paper_like_dataset.autopsy_label_mode, "paper_like")
+
+    def test_dataset_class_rejects_unknown_dataset_prepare_mode(self):
+        dataset_cls = self.dataset_class_module.EOLMistrustDataset
+
+        with _workspace_tempdir() as temp_dir:
+            root = Path(temp_dir)
+            self._write_minimal_root(root)
+
+            with self.assertRaisesRegex(ValueError, "dataset_prepare_mode"):
+                dataset_cls(
+                    root=str(root),
+                    tables=["noteevents"],
+                    dataset_prepare_mode="mystery_mode",
+                    cache_dir=root / "cache",
+                    num_workers=1,
+                )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/core/test_eol_mistrust_model.py b/tests/core/test_eol_mistrust_model.py
index 656054d8a..19bb0c3b1 100644
--- a/tests/core/test_eol_mistrust_model.py
+++ b/tests/core/test_eol_mistrust_model.py
@@ -1,10 +1,17 @@
 import importlib.util
 import importlib
+import shutil
+import tempfile
 import unittest
 from pathlib import Path
 from unittest.mock import patch
 
 import pandas as pd
+import torch
+from pyhealth.datasets.sample_dataset import create_sample_dataset
+from pyhealth.datasets.utils import get_dataloader
+from pyhealth.models.base_model import BaseModel
+from pyhealth.trainer import Trainer
 
 
 def _load_model_module():
@@ -21,6 +28,57 @@ def _load_model_module():
     return module
 
 
+def _load_classifier_module():
+    module_path = (
+        Path(__file__).resolve().parents[2]
+        / "pyhealth"
+        / "models"
+        / "eol_mistrust_classifier.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.models.eol_mistrust_classifier_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def _load_dataset_class_module():
+    module_path = (
+        Path(__file__).resolve().parents[2]
+        / "pyhealth"
+        / "datasets"
+        / "eol_mistrust_dataset.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.datasets.eol_mistrust_classifier_integration_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def _load_task_module():
+    module_path = (
+        Path(__file__).resolve().parents[2]
+        / "pyhealth"
+        / "tasks"
+        / "eol_mistrust.py"
+    )
+    spec = importlib.util.spec_from_file_location(
+        "pyhealth.tasks.eol_mistrust_classifier_integration_tests",
+        module_path,
+    )
+    module = importlib.util.module_from_spec(spec)
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
 def _load_dataset_module():
     module_path = (
         Path(__file__).resolve().parents[2] / "pyhealth" / "datasets" / "eol_mistrust.py"
@@ -367,8 +425,8 @@ def predict_proba(self, X):
         self.assertEqual(created[0].kwargs.get("penalty"), "l1")
         self.assertEqual(created[0].kwargs.get("C"), 0.1)
         self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
-        self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
-        self.assertEqual(created[0].kwargs.get("tol"), 0.001)
+        self.assertEqual(created[0].kwargs.get("max_iter"), 100)
+        self.assertEqual(created[0].kwargs.get("tol"), 0.01)
         self.assertEqual(len(created[0].fit_X), len(self.feature_matrix))
         self.assertEqual(len(created[0].fit_y), len(self.note_labels))
 
@@ -1123,8 +1181,8 @@ def _auc_fn(y_true, y_prob):
         self.assertEqual(created[0].kwargs.get("penalty"), "l1")
         self.assertEqual(created[0].kwargs.get("C"), 0.1)
         self.assertEqual(created[0].kwargs.get("solver"), "liblinear")
-        self.assertEqual(created[0].kwargs.get("max_iter"), 1000)
-        self.assertEqual(created[0].kwargs.get("tol"), 0.001)
+        self.assertEqual(created[0].kwargs.get("max_iter"), 100)
+        self.assertEqual(created[0].kwargs.get("tol"), 0.01)
         self.assertEqual(auc_calls[0]["y_prob"], [0.1, 0.9])
         self.assertEqual(int(results.iloc[0]["n_valid_auc"]), 1)
 
@@ -1612,7 +1670,6 @@ def test_baseline_feature_columns_align_with_real_dataset_baseline_only_output(s
             set(baseline_only.columns),
             {
                 "hadm_id",
-                "subject_id",
                 *self.module.BASELINE_FEATURE_COLUMNS,
                 "left_ama",
                 "code_status_dnr_dni_cmo",
@@ -1779,8 +1836,8 @@ def test_dataset_model_integration_smoke_flow_runs_without_column_renaming(self)
         )
 
         self.assertEqual(scores.shape[1], 4)
-        self.assertEqual(final_model_table.shape[1], 21)
-        self.assertIn("subject_id", final_model_table.columns)
+        self.assertEqual(final_model_table.shape[1], 20)
+        self.assertNotIn("subject_id", final_model_table.columns)
         self.assertEqual(outputs["downstream_auc_results"].shape[0], 18)
         self.assertEqual(scores["hadm_id"].tolist(), final_model_table["hadm_id"].tolist())
 
@@ -2119,5 +2176,558 @@ def test_evaluate_downstream_predictions_is_seed_stable_for_repeated_identical_r
         pd.testing.assert_frame_equal(first, second)
 
 
+class TestEOLMistrustClassifier(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.module = _load_classifier_module()
+        cls.dataset_class_module = _load_dataset_class_module()
+        cls.task_module = _load_task_module()
+        cls._tmp_dirs: list[Path] = []
+        cls._default_full = cls._build_route(
+            dataset_prepare_mode="default",
+            cache_subdir="cache_default_shared",
+        )
+        cls._paperlike_full = cls._build_route(
+            dataset_prepare_mode="paper_like",
+            cache_subdir="cache_paperlike_shared",
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        for path in getattr(cls, "_tmp_dirs", []):
+            shutil.rmtree(path, ignore_errors=True)
+        cls._tmp_dirs = []
+
+    @classmethod
+    def _build_route(
+        cls,
+        *,
+        dataset_prepare_mode: str,
+        cache_subdir: str,
+    ) -> dict[str, object]:
+        temp_dir = Path(
+            tempfile.mkdtemp(dir=Path(__file__).resolve().parents[2])
+        )
+        cls._tmp_dirs.append(temp_dir)
+
+        cls._write_minimal_root(temp_dir)
+        cls._write_full_feature_tables(temp_dir)
+
+        dataset_cls = cls.dataset_class_module.EOLMistrustDataset
+        dataset = dataset_cls(
+            root=str(temp_dir),
+            tables=None,
+            dataset_prepare_mode=dataset_prepare_mode,
+            cache_dir=temp_dir / cache_subdir,
+            num_workers=1,
+        )
+        task = cls.task_module.EOLMistrustMortalityPredictionMIMIC3(
+            include_notes=True,
+            dataset_prepare_mode=dataset_prepare_mode,
+        )
+        sample_dataset = dataset.set_task(task, num_workers=1)
+        model = cls.module.EOLMistrustClassifier(
+            dataset=sample_dataset,
+            embedding_dim=8,
+            hidden_dim=16,
+            text_hash_buckets=64,
+        )
+        batch = next(
+            iter(
+                get_dataloader(
+                    sample_dataset,
+                    batch_size=2,
+                    shuffle=False,
+                )
+            )
+        )
+        outputs = model(**batch)
+        sample_by_visit = {
+            int(sample_dataset[index]["visit_id"]): sample_dataset[index]
+            for index in range(len(sample_dataset))
+        }
+        return {
+            "dataset": dataset,
+            "task": task,
+            "sample_dataset": sample_dataset,
+            "model": model,
+            "outputs": outputs,
+            "sample_by_visit": sample_by_visit,
+        }
+
+    @staticmethod
+    def _write_minimal_root(root: Path) -> None:
+        (root / "mimiciii_clinical").mkdir(parents=True, exist_ok=True)
+        (root / "mimiciii_notes").mkdir(parents=True, exist_ok=True)
+        (root / "mimiciii_derived").mkdir(parents=True, exist_ok=True)
+
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "gender": "F",
+                    "dob": "2070-01-01 00:00:00",
+                    "dod": "",
+                    "dod_hosp": "",
+                    "dod_ssn": "",
+                    "expire_flag": 0,
+                },
+                {
+                    "subject_id": 2,
+                    "gender": "M",
+                    "dob": "2065-01-01 00:00:00",
+                    "dod": "",
+                    "dod_hosp": "",
+                    "dod_ssn": "",
+                    "expire_flag": 0,
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "patients.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {
+                    "row_id": 1,
+                    "subject_id": 1,
+                    "hadm_id": 101,
+                    "admittime": "2100-01-01 00:00:00",
+                    "dischtime": "2100-01-03 00:00:00",
+                    "deathtime": "",
+                    "admission_type": "EMERGENCY",
+                    "admission_location": "EMERGENCY ROOM",
+                    "discharge_location": "HOME",
+                    "insurance": "Private",
+                    "language": "ENGLISH",
+                    "religion": "CATHOLIC",
+                    "marital_status": "MARRIED",
+                    "ethnicity": "WHITE",
+                    "edregtime": "",
+                    "edouttime": "",
+                    "diagnosis": "SEPSIS",
+                    "hospital_expire_flag": 0,
+                    "has_chartevents_data": 1,
+                },
+                {
+                    "row_id": 2,
+                    "subject_id": 2,
+                    "hadm_id": 102,
+                    "admittime": "2100-02-01 00:00:00",
+                    "dischtime": "2100-02-04 12:00:00",
+                    "deathtime": "",
+                    "admission_type": "EMERGENCY",
+                    "admission_location": "EMERGENCY ROOM",
+                    "discharge_location": "HOME",
+                    "insurance": "Medicare",
+                    "language": "ENGLISH",
+                    "religion": "CATHOLIC",
+                    "marital_status": "WIDOWED",
+                    "ethnicity": "BLACK/AFRICAN AMERICAN",
+                    "edregtime": "",
+                    "edouttime": "",
+                    "diagnosis": "PNEUMONIA",
+                    "hospital_expire_flag": 1,
+                    "has_chartevents_data": 1,
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "admissions.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {
+                    "row_id": 1,
+                    "subject_id": 1,
+                    "hadm_id": 101,
+                    "icustay_id": 1001,
+                    "dbsource": "metavision",
+                    "first_careunit": "MICU",
+                    "last_careunit": "MICU",
+                    "first_wardid": 1,
+                    "last_wardid": 1,
+                    "intime": "2100-01-01 00:00:00",
+                    "outtime": "2100-01-02 00:00:00",
+                    "los": 1.0,
+                },
+                {
+                    "row_id": 2,
+                    "subject_id": 2,
+                    "hadm_id": 102,
+                    "icustay_id": 1002,
+                    "dbsource": "metavision",
+                    "first_careunit": "MICU",
+                    "last_careunit": "MICU",
+                    "first_wardid": 1,
+                    "last_wardid": 1,
+                    "intime": "2100-02-01 00:00:00",
+                    "outtime": "2100-02-02 00:00:00",
+                    "los": 1.0,
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "icustays.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {
+                    "row_id": 1,
+                    "subject_id": 1,
+                    "hadm_id": 101,
+                    "chartdate": "2100-01-01",
+                    "charttime": "2100-01-01 12:00:00",
+                    "storetime": "2100-01-01 13:00:00",
+                    "category": "Nursing",
+                    "description": "Report",
+                    "cgid": 1,
+                    "iserror": 0,
+                    "text": "Family meeting note and goals of care discussion.",
+                },
+                {
+                    "row_id": 2,
+                    "subject_id": 2,
+                    "hadm_id": 102,
+                    "chartdate": "2100-02-01",
+                    "charttime": "2100-02-01 12:00:00",
+                    "storetime": "2100-02-01 13:00:00",
+                    "category": "Nursing",
+                    "description": "Report",
+                    "cgid": 2,
+                    "iserror": 0,
+                    "text": "Patient declining treatment and family distressed.",
+                },
+            ]
+        ).to_csv(root / "mimiciii_notes" / "noteevents.csv", index=False)
+
+    @staticmethod
+    def _write_full_feature_tables(root: Path) -> None:
+        pd.DataFrame(
+            [
+                {
+                    "itemid": 128,
+                    "label": "Code Status",
+                    "abbreviation": "",
+                    "dbsource": "carevue",
+                    "linksto": "chartevents",
+                    "category": "",
+                    "unitname": "",
+                    "param_type": "",
+                    "conceptid": "",
+                }
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "d_items.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 101,
+                    "icustay_id": 1001,
+                    "itemid": 128,
+                    "charttime": "2100-01-01 08:00:00",
+                    "storetime": "2100-01-01 08:30:00",
+                    "cgid": 1,
+                    "value": "DNR/DNI",
+                    "valuenum": "",
+                    "valueuom": "",
+                    "warning": "",
+                    "error": "",
+                    "resultstatus": "",
+                    "stopped": "",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 102,
+                    "icustay_id": 1002,
+                    "itemid": 128,
+                    "charttime": "2100-02-04 11:00:00",
+                    "storetime": "2100-02-04 11:30:00",
+                    "cgid": 2,
+                    "value": "Full Code",
+                    "valuenum": "",
+                    "valueuom": "",
+                    "warning": "",
+                    "error": "",
+                    "resultstatus": "",
+                    "stopped": "",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 102,
+                    "icustay_id": 1002,
+                    "itemid": 128,
+                    "charttime": "2100-02-01 08:00:00",
+                    "storetime": "2100-02-01 08:30:00",
+                    "cgid": 2,
+                    "value": "DNR/DNI",
+                    "valuenum": "",
+                    "valueuom": "",
+                    "warning": "",
+                    "error": "",
+                    "resultstatus": "",
+                    "stopped": "",
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "chartevents.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {"subject_id": 1, "hadm_id": 101, "seq_num": 1, "icd9_code": "0389"},
+                {"subject_id": 2, "hadm_id": 102, "seq_num": 1, "icd9_code": "486"},
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "diagnoses_icd.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {"subject_id": 1, "hadm_id": 101, "seq_num": 1, "icd9_code": "3893"},
+                {"subject_id": 2, "hadm_id": 102, "seq_num": 1, "icd9_code": "9671"},
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "procedures_icd.csv", index=False)
+
+        pd.DataFrame(
+            [
+                {
+                    "subject_id": 1,
+                    "hadm_id": 101,
+                    "startdate": "2100-01-01 00:00:00",
+                    "enddate": "2100-01-02 00:00:00",
+                    "drug": "Aspirin",
+                    "drug_type": "MAIN",
+                    "drug_name_poe": "Aspirin",
+                    "drug_name_generic": "Aspirin",
+                    "formulary_drug_cd": "ASP",
+                    "gsn": "",
+                    "ndc": "",
+                    "prod_strength": "81 mg",
+                    "dose_val_rx": "81",
+                    "dose_unit_rx": "mg",
+                    "form_val_disp": "1",
+                    "form_unit_disp": "tab",
+                    "route": "PO",
+                },
+                {
+                    "subject_id": 2,
+                    "hadm_id": 102,
+                    "startdate": "2100-02-01 00:00:00",
+                    "enddate": "2100-02-02 00:00:00",
+                    "drug": "Heparin",
+                    "drug_type": "MAIN",
+                    "drug_name_poe": "Heparin",
+                    "drug_name_generic": "Heparin",
+                    "formulary_drug_cd": "HEP",
+                    "gsn": "",
+                    "ndc": "",
+                    "prod_strength": "5000 unit",
+                    "dose_val_rx": "5000",
+                    "dose_unit_rx": "unit",
+                    "form_val_disp": "1",
+                    "form_unit_disp": "dose",
+                    "route": "IV",
+                },
+            ]
+        ).to_csv(root / "mimiciii_clinical" / "prescriptions.csv", index=False)
+
+    def test_classifier_inherits_base_model_and_supports_task_like_inputs(self):
+        samples = [
+            {
+                "patient_id": "p1",
+                "visit_id": "v1",
+                "conditions": ["4019", "25000"],
+                "procedures": ["3893"],
+                "drugs": ["Aspirin"],
+                "age": 70.0,
+                "los_days": 5.0,
+                "gender": "F",
+                "insurance": "Private",
+                "race": "WHITE",
+                "clinical_notes": "family meeting note",
+                "label": 1,
+            },
+            {
+                "patient_id": "p2",
+                "visit_id": "v2",
+                "conditions": ["486"],
+                "procedures": ["9671"],
+                "drugs": ["Heparin"],
+                "age": 80.0,
+                "los_days": 8.0,
+                "gender": "M",
+                "insurance": "Public",
+                "race": "BLACK",
+                "clinical_notes": "patient declining treatment",
+                "label": 0,
+            },
+        ]
+        dataset = create_sample_dataset(
+            samples=samples,
+            input_schema={
+                "conditions": "sequence",
+                "procedures": "sequence",
+                "drugs": "sequence",
+                "age": "tensor",
+                "los_days": "tensor",
+                "gender": "text",
+                "insurance": "text",
+                "race": "text",
+                "clinical_notes": "text",
+            },
+            output_schema={"label": "binary"},
+            dataset_name="eol_mistrust_classifier_test",
+        )
+
+        model = self.module.EOLMistrustClassifier(
+            dataset=dataset,
+            embedding_dim=8,
+            hidden_dim=16,
+            text_hash_buckets=64,
+        )
+        self.assertTrue(issubclass(self.module.EOLMistrustClassifier, BaseModel))
+
+        batch = next(iter(get_dataloader(dataset, batch_size=2, shuffle=False)))
+        outputs = model(**batch)
+
+        self.assertEqual(set(outputs.keys()), {"loss", "y_prob", "y_true", "logit"})
+        self.assertEqual(tuple(outputs["logit"].shape), (2, 1))
+        self.assertEqual(tuple(outputs["y_prob"].shape), (2, 1))
+        self.assertEqual(tuple(outputs["y_true"].shape), (2, 1))
+
+        outputs["loss"].backward()
+        self.assertIsNotNone(model.output_layer.weight.grad)
+        self.assertTrue(torch.isfinite(outputs["loss"]).item())
+
+    def test_classifier_runs_on_samples_from_eol_mistrust_dataset_task_pipeline(self):
+        dataset_cls = self.dataset_class_module.EOLMistrustDataset
+        task = self.task_module.EOLMistrustMortalityPredictionMIMIC3(
+            include_notes=True
+        )
+
+        temp_dir = tempfile.mkdtemp(dir=Path(__file__).resolve().parents[2])
+        try:
+            root = Path(temp_dir)
+            self._write_minimal_root(root)
+            dataset = dataset_cls(
+                root=str(root),
+                tables=["noteevents"],
+                cache_dir=root / "cache",
+                num_workers=1,
+            )
+            sample_dataset = dataset.set_task(task, num_workers=1)
+            model = self.module.EOLMistrustClassifier(
+                dataset=sample_dataset,
+                embedding_dim=8,
+                hidden_dim=16,
+                text_hash_buckets=64,
+            )
+
+            batch = next(iter(get_dataloader(sample_dataset, batch_size=2, shuffle=False)))
+            outputs = model(**batch)
+            del batch
+            del model
+            del sample_dataset
+            del dataset
+        finally:
+            shutil.rmtree(temp_dir, ignore_errors=True)
+
+        self.assertEqual(tuple(outputs["logit"].shape), (2, 1))
+        self.assertEqual(tuple(outputs["y_prob"].shape), (2, 1))
+        self.assertEqual(tuple(outputs["y_true"].shape), (2, 1))
+        self.assertTrue(torch.isfinite(outputs["loss"]).item())
+
+    def test_classifier_runs_end_to_end_for_normal_full_feature_path(self):
+        results = self._default_full
+
+        dataset = results["dataset"]
+        task = results["task"]
+        sample_by_visit = results["sample_by_visit"]
+        outputs = results["outputs"]
+
+        self.assertEqual(dataset.dataset_prepare_mode, "default")
+        self.assertFalse(dataset.paper_like_dataset_prepare)
+        self.assertEqual(task.dataset_prepare_mode, "default")
+        self.assertIn("diagnoses_icd", dataset.tables)
+        self.assertIn("procedures_icd", dataset.tables)
+        self.assertIn("prescriptions", dataset.tables)
+        self.assertIn("chartevents", dataset.tables)
+        self.assertIn("noteevents", dataset.tables)
+        self.assertGreater(
+            int(torch.count_nonzero(sample_by_visit[101]["conditions"]).item()),
+            0,
+        )
+        self.assertGreater(
+            int(torch.count_nonzero(sample_by_visit[101]["procedures"]).item()),
+            0,
+        )
+        self.assertGreater(
+            int(torch.count_nonzero(sample_by_visit[101]["drugs"]).item()),
+            0,
+        )
+        self.assertAlmostEqual(
+            float(sample_by_visit[102]["los_days"].view(-1)[0].item()),
+            3.5,
+        )
+        self.assertEqual(tuple(outputs["logit"].shape), (2, 1))
+        self.assertTrue(torch.isfinite(outputs["loss"]).item())
+
+    def test_classifier_runs_end_to_end_for_paper_like_full_feature_path(self):
+        results = self._paperlike_full
+
+        dataset = results["dataset"]
+        task = results["task"]
+        sample_by_visit = results["sample_by_visit"]
+        outputs = results["outputs"]
+
+        self.assertEqual(dataset.dataset_prepare_mode, "paper_like")
+        self.assertTrue(dataset.paper_like_dataset_prepare)
+        self.assertEqual(task.dataset_prepare_mode, "paper_like")
+        self.assertEqual(task.code_status_mode, "paper_like")
+        self.assertGreater(
+            int(torch.count_nonzero(sample_by_visit[101]["conditions"]).item()),
+            0,
+        )
+        self.assertGreater(
+            int(torch.count_nonzero(sample_by_visit[101]["procedures"]).item()),
+            0,
+        )
+        self.assertGreater(
+            int(torch.count_nonzero(sample_by_visit[101]["drugs"]).item()),
+            0,
+        )
+        self.assertAlmostEqual(
+            float(sample_by_visit[102]["los_days"].view(-1)[0].item()),
+            12.0,
+        )
+        self.assertEqual(tuple(outputs["logit"].shape), (2, 1))
+        self.assertTrue(torch.isfinite(outputs["loss"]).item())
+
+    def test_classifier_can_train_and_evaluate_on_normal_full_feature_path(self):
+        sample_dataset = self._default_full["sample_dataset"]
+        model = self.module.EOLMistrustClassifier(
+            dataset=sample_dataset,
+            embedding_dim=8,
+            hidden_dim=16,
+            text_hash_buckets=64,
+        )
+
+        train_loader = get_dataloader(sample_dataset, batch_size=2, shuffle=True)
+        eval_loader = get_dataloader(sample_dataset, batch_size=2, shuffle=False)
+        trainer = Trainer(
+            model=model,
+            metrics=["accuracy"],
+            device="cpu",
+            enable_logging=False,
+        )
+        trainer.train(
+            train_dataloader=train_loader,
+            val_dataloader=eval_loader,
+            test_dataloader=eval_loader,
+            epochs=1,
+            monitor="accuracy",
+            load_best_model_at_last=False,
+        )
+        scores = trainer.evaluate(eval_loader)
+
+        self.assertIn("accuracy", scores)
+        self.assertIn("loss", scores)
+        self.assertGreaterEqual(float(scores["accuracy"]), 0.0)
+        self.assertLessEqual(float(scores["accuracy"]), 1.0)
+        self.assertTrue(torch.isfinite(torch.tensor(float(scores["loss"]))).item())
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/core/test_eol_mistrust_module.py b/tests/core/test_eol_mistrust_module.py
index db32ed6d6..ef492ed60 100644
--- a/tests/core/test_eol_mistrust_module.py
+++ b/tests/core/test_eol_mistrust_module.py
@@ -1070,8 +1070,8 @@ def test_treatment_disparity_uses_admission_level_vent_and_vaso_totals(self):
             self.ventdurations,
             self.vasopressordurations,
         ).fillna(0).set_index("hadm_id")
-        self.assertEqual(totals.loc[302, "total_vent_min"], 810.0)
-        self.assertEqual(totals.loc[303, "total_vaso_min"], 840.0)
+        self.assertEqual(totals.loc[302, "total_vent_min"], 750.0)
+        self.assertEqual(totals.loc[303, "total_vaso_min"], 240.0)
 
     def test_treatment_totals_respect_exact_six_hundred_minute_merge_boundary(self):
         build_treatment_totals = self._get_callable("build_treatment_totals")
@@ -1293,15 +1293,15 @@ def test_left_ama_target_accepts_truncated_mimic_discharge_location(self):
         self.assertEqual(int(final.loc[306, "left_ama"]), 0)
 
     def test_code_status_target_uses_required_itemids_and_values(self):
-        build_code_status_target = getattr(self.module, "_build_code_status_target")
-        target = build_code_status_target(self.chartevents, self.d_items).set_index("hadm_id")
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        target = build_code_status_target(self.chartevents).set_index("hadm_id")
         self.assertEqual(target.loc[302, "code_status_dnr_dni_cmo"], 1)
         self.assertEqual(target.loc[303, "code_status_dnr_dni_cmo"], 1)
         self.assertEqual(target.loc[305, "code_status_dnr_dni_cmo"], 0)
         self.assertNotIn(304, set(target.index))
 
     def test_code_status_target_recognizes_common_truncated_positive_values(self):
-        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
         chartevents = pd.DataFrame(
             [
                 {"hadm_id": 401, "itemid": 128, "value": "Do Not Resuscita", "icustay_id": 4011},
@@ -1310,13 +1310,13 @@ def test_code_status_target_recognizes_common_truncated_positive_values(self):
             ]
         )
 
-        target = build_code_status_target(chartevents, self.d_items).set_index("hadm_id")
+        target = build_code_status_target(chartevents).set_index("hadm_id")
         self.assertEqual(int(target.loc[401, "code_status_dnr_dni_cmo"]), 1)
         self.assertEqual(int(target.loc[402, "code_status_dnr_dni_cmo"]), 1)
         self.assertEqual(int(target.loc[403, "code_status_dnr_dni_cmo"]), 1)
 
     def test_code_status_target_uses_last_charted_status_when_charttime_is_present(self):
-        build_code_status_target = getattr(self.module, "_build_code_status_target")
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
         chartevents = pd.DataFrame(
             [
                 {
@@ -1350,13 +1350,13 @@ def test_code_status_target_uses_last_charted_status_when_charttime_is_present(s
             ]
         )
 
-        target = build_code_status_target(chartevents, self.d_items).set_index("hadm_id")
+        target = build_code_status_target(chartevents).set_index("hadm_id")
         self.assertEqual(int(target.loc[451, "code_status_dnr_dni_cmo"]), 1)
         self.assertEqual(int(target.loc[452, "code_status_dnr_dni_cmo"]), 0)
 
     def test_code_status_task_excludes_admissions_without_charted_code_status(self):
-        build_code_status_target = getattr(self.module, "_build_code_status_target")
-        target = build_code_status_target(self.chartevents, self.d_items)
+        build_code_status_target = getattr(self.module, "_build_task_code_status_target")
+        target = build_code_status_target(self.chartevents)
         self.assertNotIn(306, set(target["hadm_id"]))
 
     def test_in_hospital_mortality_target_comes_from_hospital_expire_flag(self):
diff --git a/tests/core/test_eol_mistrust_task.py b/tests/core/test_eol_mistrust_task.py
index 19d89bfe5..9123f7cda 100644
--- a/tests/core/test_eol_mistrust_task.py
+++ b/tests/core/test_eol_mistrust_task.py
@@ -173,6 +173,75 @@ def test_task_map_and_wrapper_targets_stay_consistent(self):
         with self.assertRaisesRegex(ValueError, "Unsupported EOL mistrust target"):
             self.module.EOLMistrustDownstreamMIMIC3(target="unknown")
 
+    def test_numeric_inputs_use_supported_tensor_processors(self):
+        task = self.module.EOLMistrustMortalityPredictionMIMIC3(include_notes=True)
+
+        self.assertEqual(task.input_schema["age"], "tensor")
+        self.assertEqual(task.input_schema["los_days"], "tensor")
+
+    def test_task_rejects_unknown_dataset_prepare_mode(self):
+        with self.assertRaisesRegex(ValueError, "dataset_prepare_mode"):
+            self.module.EOLMistrustMortalityPredictionMIMIC3(
+                dataset_prepare_mode="mystery_mode"
+            )
+
+    def test_paper_like_task_route_changes_code_status_and_los_representation(self):
+        normal_task = self.module.EOLMistrustCodeStatusPredictionMIMIC3(
+            include_notes=False,
+            dataset_prepare_mode="default",
+        )
+        paper_like_task = self.module.EOLMistrustCodeStatusPredictionMIMIC3(
+            include_notes=False,
+            dataset_prepare_mode="paper_like",
+        )
+        patient = _DummyPatient(
+            patient_id="subject-2",
+            events_by_type={
+                "patients": [
+                    _DummyEvent(gender="M", dob="2070-01-01 00:00:00"),
+                ],
+                "admissions": [
+                    _DummyEvent(
+                        hadm_id=302,
+                        admittime="2100-01-01 00:00:00",
+                        dischtime="2100-01-02 12:00:00",
+                        discharge_location="HOME",
+                        hospital_expire_flag=0,
+                        insurance="Medicare",
+                        ethnicity="WHITE",
+                    ),
+                ],
+                "chartevents": [
+                    _DummyEvent(
+                        hadm_id=302,
+                        itemid=128,
+                        value="Full Code",
+                        charttime="2100-01-02 11:00:00",
+                    ),
+                    _DummyEvent(
+                        hadm_id=302,
+                        itemid=128,
+                        value="DNR/DNI",
+                        charttime="2100-01-01 08:00:00",
+                    ),
+                ],
+            },
+        )
+
+        normal_sample = normal_task(patient)[0]
+        paper_like_sample = paper_like_task(patient)[0]
+
+        self.assertEqual(normal_task.dataset_prepare_mode, "default")
+        self.assertEqual(normal_task.code_status_mode, "corrected")
+        self.assertFalse(normal_task.paper_like_dataset_prepare)
+        self.assertEqual(paper_like_task.dataset_prepare_mode, "paper_like")
+        self.assertEqual(paper_like_task.code_status_mode, "paper_like")
+        self.assertTrue(paper_like_task.paper_like_dataset_prepare)
+        self.assertEqual(normal_sample["code_status_dnr_dni_cmo"], 0)
+        self.assertEqual(paper_like_sample["code_status_dnr_dni_cmo"], 1)
+        self.assertAlmostEqual(normal_sample["los_days"], 1.5)
+        self.assertAlmostEqual(paper_like_sample["los_days"], 12.0)
+
 
 if __name__ == "__main__":
     unittest.main()

From c04a7fd619e0431ef72883c48e13d4851df5c228 Mon Sep 17 00:00:00 2001
From: Amy Hwang <ahwang22@illinois.edu>
Date: Sat, 18 Apr 2026 02:23:17 -0700
Subject: [PATCH 6/7] Additional updates

- PEP8 88-char cleanup across 6 core files + example
- Added synthetic-data reproducibility note in example docstring
- Added 3 RST stubs + index updates for dataset/model/tasks
- Added conftest.py with `--run-slow` opt-in; tagged slow tests
- Refactored models/eol_mistrust.py helper for readability
---
 docs/api/datasets.rst                         |   1 +
 .../pyhealth.datasets.EOLMistrustDataset.rst  |  13 +
 docs/api/models.rst                           |   1 +
 .../pyhealth.models.EOLMistrustClassifier.rst |  14 +
 docs/api/tasks.rst                            |   1 +
 .../api/tasks/pyhealth.tasks.eol_mistrust.rst |  29 +
 examples/eol_mistrust_mortality_classifier.py | 245 ++++++--
 pyhealth/datasets/eol_mistrust.py             |  14 +-
 pyhealth/datasets/eol_mistrust_dataset.py     |  45 ++
 pyhealth/models/eol_mistrust.py               | 559 +++++++++++++-----
 pyhealth/models/eol_mistrust_classifier.py    |  64 ++
 pyhealth/tasks/eol_mistrust.py                |  88 ++-
 tests/core/conftest.py                        |  47 ++
 tests/core/test_eol_mistrust_Integration.py   |  20 +-
 tests/core/test_eol_mistrust_dataset.py       |  22 +-
 tests/core/test_eol_mistrust_model.py         |   2 +
 16 files changed, 954 insertions(+), 211 deletions(-)
 create mode 100644 docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst
 create mode 100644 docs/api/models/pyhealth.models.EOLMistrustClassifier.rst
 create mode 100644 docs/api/tasks/pyhealth.tasks.eol_mistrust.rst
 create mode 100644 tests/core/conftest.py

diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
index b02439d26..a85a12607 100644
--- a/docs/api/datasets.rst
+++ b/docs/api/datasets.rst
@@ -235,6 +235,7 @@ Available Datasets
     datasets/pyhealth.datasets.SleepEDFDataset
     datasets/pyhealth.datasets.EHRShotDataset
     datasets/pyhealth.datasets.Support2Dataset
+    datasets/pyhealth.datasets.EOLMistrustDataset
     datasets/pyhealth.datasets.BMDHSDataset
     datasets/pyhealth.datasets.COVID19CXRDataset
     datasets/pyhealth.datasets.ChestXray14Dataset
diff --git a/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst b/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst
new file mode 100644
index 000000000..24fbe3559
--- /dev/null
+++ b/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst
@@ -0,0 +1,13 @@
+pyhealth.datasets.EOLMistrustDataset
+======================================
+
+MIMIC-III dataset wrapper used to replicate Boag et al. 2018,
+*"Racial Disparities and Mistrust in End-of-Life Care."* It loads the
+admissions, ICU stays, and (optionally) note events tables, and exposes
+the proxy-mistrust and end-of-life cohort definitions used by the three
+downstream tasks in :doc:`../tasks/pyhealth.tasks.eol_mistrust`.
+
+.. autoclass:: pyhealth.datasets.EOLMistrustDataset
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/api/models.rst b/docs/api/models.rst
index 7368dec94..7cbb5ffce 100644
--- a/docs/api/models.rst
+++ b/docs/api/models.rst
@@ -203,4 +203,5 @@ API Reference
     models/pyhealth.models.VisionEmbeddingModel
     models/pyhealth.models.TextEmbedding
     models/pyhealth.models.BIOT
+    models/pyhealth.models.EOLMistrustClassifier
     models/pyhealth.models.unified_multimodal_embedding_docs
diff --git a/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst b/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst
new file mode 100644
index 000000000..0829a3be6
--- /dev/null
+++ b/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst
@@ -0,0 +1,14 @@
+pyhealth.models.EOLMistrustClassifier
+=======================================
+
+Multimodal classifier that mirrors the end-of-life prediction head from
+Boag et al. 2018. It consumes sequence features (diagnoses, procedures,
+drugs), tensor features (age, length of stay), and text features
+(demographics and free-text clinical notes) from the
+:class:`~pyhealth.datasets.EOLMistrustDataset` and predicts a binary
+target such as Left-AMA, code-status change, or in-hospital mortality.
+
+.. autoclass:: pyhealth.models.EOLMistrustClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst
index 399b8f1aa..cbaa9d07c 100644
--- a/docs/api/tasks.rst
+++ b/docs/api/tasks.rst
@@ -229,3 +229,4 @@ Available Tasks
     Mutation Pathogenicity (COSMIC) <tasks/pyhealth.tasks.MutationPathogenicityPrediction>
     Cancer Survival Prediction (TCGA) <tasks/pyhealth.tasks.CancerSurvivalPrediction>
     Cancer Mutation Burden (TCGA) <tasks/pyhealth.tasks.CancerMutationBurden>
+    EOL Mistrust (MIMIC-III) <tasks/pyhealth.tasks.eol_mistrust>
diff --git a/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst b/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst
new file mode 100644
index 000000000..b8eb76f90
--- /dev/null
+++ b/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst
@@ -0,0 +1,29 @@
+pyhealth.tasks.eol_mistrust
+==============================
+
+End-of-life cohort tasks from Boag et al. 2018, *"Racial Disparities and
+Mistrust in End-of-Life Care."* Three binary prediction targets are
+defined on top of the :class:`~pyhealth.datasets.EOLMistrustDataset`:
+Left-AMA, code-status change (DNR/DNI/CMO), and in-hospital mortality.
+All three share the same input schema and differ only in the extracted
+label.
+
+.. autoclass:: pyhealth.tasks.EOLMistrustDownstreamMIMIC3
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: pyhealth.tasks.EOLMistrustLeftAMAPredictionMIMIC3
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: pyhealth.tasks.EOLMistrustCodeStatusPredictionMIMIC3
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: pyhealth.tasks.EOLMistrustMortalityPredictionMIMIC3
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/examples/eol_mistrust_mortality_classifier.py b/examples/eol_mistrust_mortality_classifier.py
index d400f263e..79b56f5ff 100644
--- a/examples/eol_mistrust_mortality_classifier.py
+++ b/examples/eol_mistrust_mortality_classifier.py
@@ -24,32 +24,62 @@
 Recommended commands
 --------------------
 
-Full pipeline, normal::
+Full pipeline, normal (PowerShell; ``` ` ``` continues the line)::
 
-    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --repetitions 10
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py `
+        --root EOL_Workspace\eol_mistrust_required_combined `
+        --repetitions 10
 
 Full pipeline, paper-like::
 
-    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --paper-like-dataset-prepare --repetitions 10
-    
-Full pipeline . Route ablation, normal vs paper-like::
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py `
+        --root EOL_Workspace\eol_mistrust_required_combined `
+        --paper-like-dataset-prepare --repetitions 10
 
-    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --ablation-study --repetitions 1
+Full pipeline. Route ablation, normal vs paper-like::
 
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py `
+        --root EOL_Workspace\eol_mistrust_required_combined `
+        --ablation-study --repetitions 1
 
-Native proof, normal::
 
-    .\.venv\Scripts\python.exe -m unittest tests.core.test_eol_mistrust_model.TestEOLMistrustClassifier.test_classifier_runs_end_to_end_for_normal_full_feature_path
+Native proof, normal (test IDs are wrapped here for readability; run on one line)::
+
+    # Method name (one line):
+    #   test_classifier_runs_end_to_end_for_normal_full_feature_path
+    .\.venv\Scripts\python.exe -m unittest `
+        tests.core.test_eol_mistrust_model.TestEOLMistrustClassifier.<method>
 
 Native proof, paper-like::
 
-    .\.venv\Scripts\python.exe -m unittest tests.core.test_eol_mistrust_model.TestEOLMistrustClassifier.test_classifier_runs_end_to_end_for_paper_like_full_feature_path
+    # Method name (one line):
+    #   test_classifier_runs_end_to_end_for_paper_like_full_feature_path
+    .\.venv\Scripts\python.exe -m unittest `
+        tests.core.test_eol_mistrust_model.TestEOLMistrustClassifier.<method>
 
 Native train/eval demo, normal only::
 
-    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py --root EOL_Workspace\eol_mistrust_required_combined --task-demo --task-demo-train-eval
-    
-    
+    .\.venv\Scripts\python.exe examples\eol_mistrust_mortality_classifier.py `
+        --root EOL_Workspace\eol_mistrust_required_combined `
+        --task-demo --task-demo-train-eval
+
+
+Reproducibility on synthetic data (no MIMIC-III credentials required)
+---------------------------------------------------------------------
+
+The commands above require the combined MIMIC-III CSV exports under
+``--root``. To verify the full
+``BaseDataset -> set_task -> BaseModel -> Trainer.train -> Trainer.evaluate``
+pipeline on synthetic fixtures instead, run the opt-in slow test suite::
+
+    pytest tests/core/test_eol_mistrust_dataset.py `
+           tests/core/test_eol_mistrust_model.py `
+           tests/core/test_eol_mistrust_Integration.py --run-slow
+
+These tests build in-memory synthetic MIMIC-III tables, exercise every
+stage of the pipeline end-to-end, and assert on shapes, gradients, and
+evaluation outputs. This is the rubric-grade reproduction path when
+MIMIC-III credentialing is unavailable.
 """
 
 from __future__ import annotations
@@ -70,7 +100,9 @@
     sys.path.insert(0, str(REPO_ROOT))
 
 DEFAULT_DATA_ROOT = REPO_ROOT / "EOL_Workspace" / "eol_mistrust_required_combined"
-DEFAULT_CONFIG_PATH = REPO_ROOT / "pyhealth" / "datasets" / "configs" / "eol_mistrust.yaml"
+DEFAULT_CONFIG_PATH = (
+    REPO_ROOT / "pyhealth" / "datasets" / "configs" / "eol_mistrust.yaml"
+)
 DEFAULT_RESULT_ROOT = REPO_ROOT / "EOL_Workspace" / "EOL_Result"
 DEFAULT_NOTE_CHUNKSIZE = 100_000
 DEFAULT_CHARTEVENT_CHUNKSIZE = 500_000
@@ -98,10 +130,14 @@ def _load_local_module(module_name: str, relative_path: str):
 build_acuity_scores = _DATASET_MODULE.build_acuity_scores
 build_all_cohort = _DATASET_MODULE.build_all_cohort
 build_base_admissions = _DATASET_MODULE.build_base_admissions
-build_chartevent_artifacts_from_csv = _DATASET_MODULE.build_chartevent_artifacts_from_csv
+build_chartevent_artifacts_from_csv = (
+    _DATASET_MODULE.build_chartevent_artifacts_from_csv
+)
 build_demographics_table = _DATASET_MODULE.build_demographics_table
 build_eol_cohort = _DATASET_MODULE.build_eol_cohort
-build_final_model_table_from_code_status_targets = _DATASET_MODULE.build_final_model_table_from_code_status_targets
+build_final_model_table_from_code_status_targets = (
+    _DATASET_MODULE.build_final_model_table_from_code_status_targets
+)
 build_note_corpus_from_csv = _DATASET_MODULE.build_note_corpus_from_csv
 build_note_labels_from_csv = _DATASET_MODULE.build_note_labels_from_csv
 build_treatment_totals = _DATASET_MODULE.build_treatment_totals
@@ -112,9 +148,13 @@ def _load_local_module(module_name: str, relative_path: str):
 evaluate_downstream_average_weights = _MODEL_MODULE.evaluate_downstream_average_weights
 build_autopsy_mistrust_scores = _MODEL_MODULE.build_autopsy_mistrust_scores
 build_logistic_cv_estimator_factory = _MODEL_MODULE.build_logistic_cv_estimator_factory
-build_negative_sentiment_mistrust_scores = _MODEL_MODULE.build_negative_sentiment_mistrust_scores
+build_negative_sentiment_mistrust_scores = (
+    _MODEL_MODULE.build_negative_sentiment_mistrust_scores
+)
 build_noncompliance_mistrust_scores = _MODEL_MODULE.build_noncompliance_mistrust_scores
-get_downstream_feature_configurations = _MODEL_MODULE.get_downstream_feature_configurations
+get_downstream_feature_configurations = (
+    _MODEL_MODULE.get_downstream_feature_configurations
+)
 z_normalize_scores = _MODEL_MODULE.z_normalize_scores
 
 EOLMistrustClassifier = None
@@ -237,7 +277,9 @@ def _read_csvs(root: Path, path_map: dict[str, str]) -> dict[str, pd.DataFrame]:
     for name, relative_path in path_map.items():
         csv_path = root / relative_path
         if not csv_path.exists():
-            raise FileNotFoundError(f"Missing required table for EOL example: {csv_path}")
+            raise FileNotFoundError(
+                f"Missing required table for EOL example: {csv_path}"
+            )
         table = pd.read_csv(csv_path, low_memory=False)
         table.columns = [str(column).lower() for column in table.columns]
         tables[name] = table
@@ -293,8 +335,11 @@ def _format_continuous_summary(center: float, lower: float, upper: float) -> str
 def _note_present_hadm_ids(note_corpus: pd.DataFrame) -> list[int]:
     """Return sorted admission ids with at least one non-empty aggregated note."""
 
+    non_empty_mask = (
+        note_corpus["note_text"].fillna("").astype(str).str.strip() != ""
+    )
     hadm_ids = pd.to_numeric(
-        note_corpus.loc[note_corpus["note_text"].fillna("").astype(str).str.strip() != "", "hadm_id"],
+        note_corpus.loc[non_empty_mask, "hadm_id"],
         errors="coerce",
     )
     return sorted(hadm_ids.dropna().astype(int).unique().tolist())
@@ -354,7 +399,9 @@ def _build_run_table1_summary(eol_cohort: pd.DataFrame) -> pd.DataFrame:
                     "metric": metric,
                     "race": race,
                     "summary_stat": "median_iqr",
-                    "run_value": _format_continuous_summary(run_numeric, run_lower, run_upper),
+                    "run_value": _format_continuous_summary(
+                        run_numeric, run_lower, run_upper
+                    ),
                     "run_numeric": run_numeric,
                     "run_interval_lower": run_lower,
                     "run_interval_upper": run_upper,
@@ -481,7 +528,9 @@ def _build_run_table6_summary(
 ) -> pd.DataFrame:
     """Build run-only Table 6 downstream weight summaries."""
 
-    table6_source = _ensure_downstream_weight_results(artifacts, repetitions=repetitions)
+    table6_source = _ensure_downstream_weight_results(
+        artifacts, repetitions=repetitions
+    )
     if not isinstance(table6_source, pd.DataFrame) or table6_source.empty:
         return pd.DataFrame()
     required = {"task", "configuration", "feature", "weight_mean", "weight_std"}
@@ -517,8 +566,12 @@ def _render_run_table_summary(
     autopsy_proxy_enabled = True
     dataset_prepare_mode = "unknown"
     if isinstance(validation_summary, dict):
-        autopsy_proxy_enabled = bool(validation_summary.get("autopsy_proxy_enabled", True))
-        dataset_prepare_mode = str(validation_summary.get("dataset_prepare_mode", "unknown"))
+        autopsy_proxy_enabled = bool(
+            validation_summary.get("autopsy_proxy_enabled", True)
+        )
+        dataset_prepare_mode = str(
+            validation_summary.get("dataset_prepare_mode", "unknown")
+        )
 
     feature_weight_summaries = artifacts.get("feature_weight_summaries", {})
     if not isinstance(feature_weight_summaries, dict):
@@ -529,7 +582,14 @@ def _render_run_table_summary(
         _build_run_table1_summary(eol_cohort)
         if _has_columns(
             eol_cohort,
-            {"race", "insurance_group", "discharge_category", "gender", "los_days", "age"},
+            {
+                "race",
+                "insurance_group",
+                "discharge_category",
+                "gender",
+                "los_days",
+                "age",
+            },
         )
         else pd.DataFrame()
     )
@@ -538,13 +598,24 @@ def _render_run_table_summary(
         _build_run_table2_summary(race_treatment)
         if _has_columns(
             race_treatment,
-            {"treatment", "n_black", "n_white", "median_black", "median_white", "pvalue"},
+            {
+                "treatment",
+                "n_black",
+                "n_white",
+                "median_black",
+                "median_white",
+                "pvalue",
+            },
         )
         and not race_treatment.empty
         else pd.DataFrame()
     )
     table3 = _build_run_table3_summary(feature_weight_summaries)
-    if not autopsy_proxy_enabled and not table3.empty and "proxy_model" in table3.columns:
+    if (
+        not autopsy_proxy_enabled
+        and not table3.empty
+        and "proxy_model" in table3.columns
+    ):
         table3 = table3.loc[table3["proxy_model"] != "autopsy"].reset_index(drop=True)
     acuity_correlations = artifacts.get("acuity_correlations")
     table4 = (
@@ -576,9 +647,15 @@ def _render_run_table_summary(
         autopsy_proxy_enabled=autopsy_proxy_enabled,
     )
 
+    if dataset_prepare_mode == "paper_like":
+        route_label = "Paper-like"
+    elif dataset_prepare_mode == "default":
+        route_label = "Normal"
+    else:
+        route_label = dataset_prepare_mode
     lines = [
         "Run Table Results",
-        f"Route: {'Paper-like' if dataset_prepare_mode == 'paper_like' else 'Normal' if dataset_prepare_mode == 'default' else dataset_prepare_mode}",
+        f"Route: {route_label}",
         f"dataset_prepare_mode: {dataset_prepare_mode}",
         f"autopsy_proxy_enabled: {autopsy_proxy_enabled}",
         f"repetitions: {repetitions}",
@@ -725,8 +802,10 @@ def _render_run_table_summary(
                 row = task_row_lookup.get(feature_name)
                 if row is None:
                     continue
+                mean_value = float(row.run_weight_mean)
+                std_value = float(row.run_weight_std)
                 lines.append(
-                    f"  {row.feature}: mean={float(row.run_weight_mean):.3f}, std={float(row.run_weight_std):.3f}"
+                    f"  {row.feature}: mean={mean_value:.3f}, std={std_value:.3f}"
                 )
         lines.append("")
 
@@ -752,7 +831,10 @@ def _log_stage(stage_start: float, pipeline_start: float, message: str) -> None:
     """Print a timing log line for a pipeline stage."""
     elapsed_stage = time.time() - stage_start
     elapsed_total = time.time() - pipeline_start
-    print(f"[{elapsed_total:7.1f}s total | {elapsed_stage:6.1f}s] {message}", flush=True)
+    print(
+        f"[{elapsed_total:7.1f}s total | {elapsed_stage:6.1f}s] {message}",
+        flush=True,
+    )
 
 
 class _RouteSettings:
@@ -775,7 +857,9 @@ def __init__(
         self.score_columns = score_columns
         self.feature_configurations = feature_configurations
         self.downstream_estimator_mode = downstream_estimator_mode
-        self.downstream_estimator_factory_resolver = downstream_estimator_factory_resolver
+        self.downstream_estimator_factory_resolver = (
+            downstream_estimator_factory_resolver
+        )
 
 
 def _current_run_timestamp() -> str:
@@ -845,7 +929,9 @@ def _prepare_ablation_run_directories(
     while run_dir.exists():
         run_name = f"{base_name}_{suffix:02d}"
         run_dir = (
-            output_dir.parent / run_name if output_dir is not None else result_root / run_name
+            output_dir.parent / run_name
+            if output_dir is not None
+            else result_root / run_name
         )
         suffix += 1
 
@@ -1054,7 +1140,10 @@ def _render_ablation_summary(
         f"managed_run_name: {run_name}",
         f"managed_run_dir: {run_dir}",
         "ablation_variable: route (Normal vs Paper-like)",
-        "ablation_focus: corrected default path without autopsy vs paper-like path with autopsy",
+        (
+            "ablation_focus: corrected default path without autopsy vs "
+            "paper-like path with autopsy"
+        ),
         f"started_at: {started_at.isoformat(timespec='seconds')}",
         f"finished_at: {finished_at.isoformat(timespec='seconds')}",
         f"total_runtime_seconds: {total_runtime_seconds:.3f}",
@@ -1152,7 +1241,9 @@ def _run_single_managed_route(
 def _run_route_ablation_study(args: argparse.Namespace) -> None:
     """Run the explicit Normal vs Paper-like route ablation study."""
 
-    if getattr(args, "task_demo", False) or getattr(args, "task_demo_train_eval", False):
+    if getattr(args, "task_demo", False) or getattr(
+        args, "task_demo_train_eval", False
+    ):
         raise ValueError(
             "--ablation-study cannot be combined with --task-demo or "
             "--task-demo-train-eval."
@@ -1229,7 +1320,9 @@ def _build_route_settings(paper_like_dataset_prepare: bool) -> _RouteSettings:
         score_columns=_normal_route_score_columns(),
         feature_configurations=_normal_route_feature_configurations(),
         downstream_estimator_mode="task_balanced_logistic_cv",
-        downstream_estimator_factory_resolver=_normal_route_downstream_estimator_factory_resolver(),
+        downstream_estimator_factory_resolver=(
+            _normal_route_downstream_estimator_factory_resolver()
+        ),
     )
 
 
@@ -1258,7 +1351,7 @@ def _normal_route_feature_configurations() -> dict[str, list[str]]:
 
 
 def _normal_route_downstream_estimator_factory_resolver():
-    """Return task-specific balanced LogisticRegressionCV factories for the corrected route."""
+    """Return task-balanced LogisticRegressionCV factories for the corrected route."""
 
     task_specs = {
         "Left AMA": {
@@ -1297,7 +1390,9 @@ def _filter_metric_frame(frame: pd.DataFrame, metric: str) -> pd.DataFrame:
 
 
 def _disable_autopsy_outputs(
-    model_outputs: dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]],
+    model_outputs: dict[
+        str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]
+    ],
 ) -> dict[str, pd.DataFrame | dict[str, pd.DataFrame] | dict[str, object]]:
     """Strip autopsy-specific analysis outputs from the default route."""
 
@@ -1311,7 +1406,11 @@ def _disable_autopsy_outputs(
             if name != "autopsy"
         }
 
-    for key in ("race_gap_results", "trust_treatment_results", "trust_treatment_by_acuity_results"):
+    for key in (
+        "race_gap_results",
+        "trust_treatment_results",
+        "trust_treatment_by_acuity_results",
+    ):
         frame = adjusted.get(key)
         if isinstance(frame, pd.DataFrame):
             adjusted[key] = _filter_metric_frame(frame, "autopsy_score_z")
@@ -1376,7 +1475,11 @@ def _build_mistrust_scores(
             note_labels=note_labels,
             estimator_factory=estimator_factory,
         )
-        _log_stage(t0, pipeline_start, f"Built noncompliance proxy scores ({len(noncompliance)} rows)")
+        _log_stage(
+            t0,
+            pipeline_start,
+            f"Built noncompliance proxy scores ({len(noncompliance)} rows)",
+        )
 
         if autopsy_enabled:
             t0 = time.time()
@@ -1385,7 +1488,11 @@ def _build_mistrust_scores(
                 note_labels=note_labels,
                 estimator_factory=estimator_factory,
             )
-            _log_stage(t0, pipeline_start, f"Built autopsy proxy scores ({len(autopsy)} rows)")
+            _log_stage(
+                t0,
+                pipeline_start,
+                f"Built autopsy proxy scores ({len(autopsy)} rows)",
+            )
         else:
             autopsy = pd.DataFrame(
                 {
@@ -1399,16 +1506,26 @@ def _build_mistrust_scores(
             note_corpus=note_corpus,
             sentiment_fn=sentiment_fn,
         )
-        _log_stage(t0, pipeline_start, f"Built negative sentiment scores ({len(sentiment)} rows)")
+        _log_stage(
+            t0,
+            pipeline_start,
+            f"Built negative sentiment scores ({len(sentiment)} rows)",
+        )
 
         merged = (
-            noncompliance.merge(autopsy, on="hadm_id", how="inner", validate="one_to_one")
+            noncompliance.merge(
+                autopsy, on="hadm_id", how="inner", validate="one_to_one"
+            )
             .merge(sentiment, on="hadm_id", how="inner", validate="one_to_one")
             .sort_values("hadm_id")
         )
         mistrust_scores = z_normalize_scores(
             merged,
-            columns=["noncompliance_score", "autopsy_score", "negative_sentiment_score"],
+            columns=[
+                "noncompliance_score",
+                "autopsy_score",
+                "negative_sentiment_score",
+            ],
         ).rename(
             columns={
                 "noncompliance_score": "noncompliance_score_z",
@@ -1416,7 +1533,11 @@ def _build_mistrust_scores(
                 "negative_sentiment_score": "negative_sentiment_score_z",
             }
         ).reset_index(drop=True)
-        _log_stage(t_total, pipeline_start, "Built mistrust scores (proxy models + sentiment)")
+        _log_stage(
+            t_total,
+            pipeline_start,
+            "Built mistrust scores (proxy models + sentiment)",
+        )
         return mistrust_scores
 
     t0 = time.time()
@@ -1445,8 +1566,12 @@ def _build_note_artifacts(
         chunksize=note_chunksize,
     )
     note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
-    filtered_all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
-    note_corpus = note_corpus.loc[note_corpus["hadm_id"].isin(note_present_hadm_ids)].copy()
+    filtered_all_cohort = all_cohort.loc[
+        all_cohort["hadm_id"].isin(note_present_hadm_ids)
+    ].copy()
+    note_corpus = note_corpus.loc[
+        note_corpus["hadm_id"].isin(note_present_hadm_ids)
+    ].copy()
     _log_stage(t0, pipeline_start, f"Streamed note corpus ({len(note_corpus)} rows)")
 
     t0 = time.time()
@@ -1478,7 +1603,11 @@ def _build_chartevent_artifacts(
         paper_like=route_settings.autopsy_enabled,
         code_status_mode=route_settings.code_status_mode,
     )
-    _log_stage(t0, pipeline_start, f"Streamed chartevents ({len(feature_matrix)} feature rows)")
+    _log_stage(
+        t0,
+        pipeline_start,
+        f"Streamed chartevents ({len(feature_matrix)} feature rows)",
+    )
     return feature_matrix, code_status_targets
 
 
@@ -1570,7 +1699,7 @@ def build_eol_mistrust_outputs(
     )
 
     # ------------------------------------------------------------------
-    # Stage 4: chartevents feature matrix + code status (SLOW — stream chartevents.csv)
+    # Stage 4: chartevents feature matrix + code status (SLOW: stream chartevents).
     # ------------------------------------------------------------------
     feature_matrix, code_status_targets = _build_chartevent_artifacts(
         chartevents_csv_path=chartevents_csv_path,
@@ -1611,7 +1740,11 @@ def build_eol_mistrust_outputs(
         code_status_targets=code_status_targets,
         mistrust_scores=mistrust_scores,
     )
-    _log_stage(t0, t_pipeline, f"Built final model table ({len(final_model_table)} rows)")
+    _log_stage(
+        t0,
+        t_pipeline,
+        f"Built final model table ({len(final_model_table)} rows)",
+    )
 
     validation["base_admissions_rows"] = int(len(base_admissions))
     validation["all_cohort_rows"] = int(len(all_cohort))
@@ -1632,7 +1765,9 @@ def build_eol_mistrust_outputs(
         precomputed_mistrust_scores=mistrust_scores,
         score_columns=route_settings.score_columns,
         feature_configurations=route_settings.feature_configurations,
-        downstream_estimator_factory_resolver=route_settings.downstream_estimator_factory_resolver,
+        downstream_estimator_factory_resolver=(
+            route_settings.downstream_estimator_factory_resolver
+        ),
     )
     if not route_settings.autopsy_enabled:
         model_outputs = _disable_autopsy_outputs(model_outputs)
@@ -1712,10 +1847,10 @@ def run_task_demo(
         EOLMistrustClassifier = _EOLMistrustClassifier
     if EOLMistrustMortalityPredictionMIMIC3 is None:
         from pyhealth.tasks.eol_mistrust import (
-            EOLMistrustMortalityPredictionMIMIC3 as _EOLMistrustMortalityPredictionMIMIC3,
+            EOLMistrustMortalityPredictionMIMIC3 as _EOLMortality,
         )
 
-        EOLMistrustMortalityPredictionMIMIC3 = _EOLMistrustMortalityPredictionMIMIC3
+        EOLMistrustMortalityPredictionMIMIC3 = _EOLMortality
     if train_and_evaluate and Trainer is None:
         from pyhealth.trainer import Trainer as _Trainer
 
@@ -1792,11 +1927,15 @@ def _close_unique_datasets(*datasets: object) -> None:
                 outputs = model(**batch)
                 print(f"Task demo forward keys: {sorted(outputs.keys())}")
         finally:
-            _close_unique_datasets(sample_dataset, train_dataset, val_dataset, test_dataset)
+            _close_unique_datasets(
+                sample_dataset, train_dataset, val_dataset, test_dataset
+            )
 
 
 def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Run the EOL mistrust example workflow.")
+    parser = argparse.ArgumentParser(
+        description="Run the EOL mistrust example workflow.",
+    )
     parser.add_argument(
         "--root",
         type=Path,
diff --git a/pyhealth/datasets/eol_mistrust.py b/pyhealth/datasets/eol_mistrust.py
index 011a29b1c..5897a9f44 100644
--- a/pyhealth/datasets/eol_mistrust.py
+++ b/pyhealth/datasets/eol_mistrust.py
@@ -234,9 +234,10 @@
     "permission obtained for autopsy",
 )
 _AUTOPSY_SEGMENT_SPLIT_PATTERN = re.compile(r"[\n.;]+")
+_DECLINE_VERBS = r"declin\w*|refus\w*|deni\w*|not\s+consent(?:ed)?"
 _AUTOPSY_CORRECTED_DECLINE_PATTERN = re.compile(
-    r"(?:\b(?:declin\w*|refus\w*|deni\w*|not\s+consent(?:ed)?)\b(?:\W+\w+){0,5}\W+\bautopsy\b)"
-    r"|(?:\bautopsy\b(?:\W+\w+){0,5}\W+\b(?:declin\w*|refus\w*|deni\w*|not\s+consent(?:ed)?)\b)",
+    rf"(?:\b(?:{_DECLINE_VERBS})\b(?:\W+\w+){{0,5}}\W+\bautopsy\b)"
+    rf"|(?:\bautopsy\b(?:\W+\w+){{0,5}}\W+\b(?:{_DECLINE_VERBS})\b)",
     re.IGNORECASE,
 )
 _AUTOPSY_STUB_SEGMENT_PATTERN = re.compile(r"^(?:an?\s+)?autopsy\b", re.IGNORECASE)
@@ -940,7 +941,8 @@ def _validate_text_access(noteevents: pd.DataFrame, chartevents: pd.DataFrame) -
         raise ValueError("noteevents.text must be accessible for NLP steps.")
     if chartevents["value"].isna().all():
         raise ValueError(
-            "chartevents.value must be accessible for string matching and feature extraction."
+            "chartevents.value must be accessible for string matching and "
+            "feature extraction."
         )
 
 
@@ -2285,7 +2287,7 @@ def _assemble_final_model_table(
     return final.reset_index(drop=True)
 
 
-def build_final_model_table(  # pylint: disable=too-many-arguments,too-many-positional-arguments
+def build_final_model_table(
     demographics: pd.DataFrame,
     all_cohort: pd.DataFrame,
     admissions: pd.DataFrame,
@@ -2295,6 +2297,7 @@ def build_final_model_table(  # pylint: disable=too-many-arguments,too-many-posi
     include_race: bool = True,
     include_mistrust: bool = True,
 ) -> pd.DataFrame:
+    # pylint: disable=too-many-arguments,too-many-positional-arguments
     """Assemble the final model table from raw chartevents.
 
     ``d_items`` is retained for API compatibility; the normal path uses the
@@ -2320,7 +2323,7 @@ def build_final_model_table(  # pylint: disable=too-many-arguments,too-many-posi
     )
 
 
-def build_final_model_table_from_code_status_targets(  # pylint: disable=too-many-arguments
+def build_final_model_table_from_code_status_targets(
     demographics: pd.DataFrame,
     all_cohort: pd.DataFrame,
     admissions: pd.DataFrame,
@@ -2329,6 +2332,7 @@ def build_final_model_table_from_code_status_targets(  # pylint: disable=too-man
     include_race: bool = True,
     include_mistrust: bool = True,
 ) -> pd.DataFrame:
+    # pylint: disable=too-many-arguments
     """Assemble the final model table using precomputed code-status targets."""
 
     return _assemble_final_model_table(
diff --git a/pyhealth/datasets/eol_mistrust_dataset.py b/pyhealth/datasets/eol_mistrust_dataset.py
index 4e5821ab5..7a66df1f7 100644
--- a/pyhealth/datasets/eol_mistrust_dataset.py
+++ b/pyhealth/datasets/eol_mistrust_dataset.py
@@ -54,7 +54,34 @@ class EOLMistrustDataset(BaseDataset):
         dataset_name: Optional dataset name override.
         config_path: Optional YAML config path. Defaults to the bundled
             ``eol_mistrust.yaml`` config.
+        dataset_prepare_mode: Either ``"default"`` (corrected replication
+            pipeline) or ``"paper_like"`` (notebook-faithful reproduction of
+            Boag et al. 2018). Controls code-status and autopsy label logic.
         **kwargs: Additional :class:`BaseDataset` keyword arguments.
+
+    Attributes:
+        CORE_TABLES: Tables always loaded (``patients``, ``admissions``,
+            ``icustays``).
+        DEFAULT_OPTIONAL_TABLES: Tables auto-discovered from the root when
+            ``tables`` is not explicitly provided.
+        dataset_prepare_mode: Normalized mode string, one of ``"default"`` or
+            ``"paper_like"``.
+        paper_like_dataset_prepare: ``True`` when running the notebook-faithful
+            variant.
+        code_status_mode: Label extraction strategy for code-status tasks.
+        autopsy_label_mode: Label extraction strategy for the autopsy proxy.
+
+    Raises:
+        ValueError: If ``dataset_prepare_mode`` is not ``"default"`` or
+            ``"paper_like"``.
+
+    Example:
+        >>> from pyhealth.datasets import EOLMistrustDataset
+        >>> dataset = EOLMistrustDataset(
+        ...     root="/data/eol_mistrust",
+        ...     dataset_prepare_mode="default",
+        ... )
+        >>> dataset.stats()
     """
 
     CORE_TABLES = ["patients", "admissions", "icustays"]
@@ -128,6 +155,24 @@ def __init__(
         dataset_prepare_mode: str = DATASET_PREPARE_MODE_DEFAULT,
         **kwargs,
     ) -> None:
+        """Initialize the EOL mistrust dataset.
+
+        Args:
+            root: Root directory containing the combined EOL mistrust export.
+            tables: Optional list of extra table names to load on top of the
+                core tables. When ``None``, optional tables are auto-discovered
+                from the root directory.
+            dataset_name: Optional dataset name override (defaults to
+                ``"eol_mistrust"``).
+            config_path: Optional path to a YAML config file. Falls back to
+                the bundled ``eol_mistrust.yaml``.
+            dataset_prepare_mode: ``"default"`` or ``"paper_like"``.
+            **kwargs: Additional :class:`BaseDataset` keyword arguments
+                (``cache_dir``, ``dev``, ``num_workers``, ...).
+
+        Raises:
+            ValueError: If ``dataset_prepare_mode`` is not a supported value.
+        """
         if config_path is None:
             logger.info("No config path provided, using default EOL mistrust config")
             config_path = str(
diff --git a/pyhealth/models/eol_mistrust.py b/pyhealth/models/eol_mistrust.py
index b1de9c155..46563e6e5 100644
--- a/pyhealth/models/eol_mistrust.py
+++ b/pyhealth/models/eol_mistrust.py
@@ -37,10 +37,11 @@
     pearsonr = None
 
 try:
-    from sklearn.linear_model import LogisticRegression, LogisticRegressionCV  # pylint: disable=import-error
-    from sklearn.metrics import roc_auc_score  # pylint: disable=import-error
-    from sklearn.model_selection import GroupShuffleSplit, train_test_split  # pylint: disable=import-error
-    from sklearn.preprocessing import StandardScaler  # pylint: disable=import-error
+    # pylint: disable=import-error
+    from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
+    from sklearn.metrics import roc_auc_score
+    from sklearn.model_selection import GroupShuffleSplit, train_test_split
+    from sklearn.preprocessing import StandardScaler
 except ModuleNotFoundError:  # pragma: no cover
     class LogisticRegression:  # type: ignore[no-redef]
         """Fallback estimator preserving the sklearn constructor surface."""
@@ -131,7 +132,10 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
     [
         ("Baseline", list(BASELINE_FEATURE_COLUMNS)),
         ("Baseline + Race", list(BASELINE_FEATURE_COLUMNS + RACE_FEATURE_COLUMNS)),
-        ("Baseline + Noncompliant", list(BASELINE_FEATURE_COLUMNS + ["noncompliance_score_z"])),
+        (
+            "Baseline + Noncompliant",
+            list(BASELINE_FEATURE_COLUMNS + ["noncompliance_score_z"]),
+        ),
         ("Baseline + Autopsy", list(BASELINE_FEATURE_COLUMNS + ["autopsy_score_z"])),
         (
             "Baseline + Neg-Sentiment",
@@ -139,7 +143,11 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
         ),
         (
             "Baseline + ALL",
-            list(BASELINE_FEATURE_COLUMNS + RACE_FEATURE_COLUMNS + MISTRUST_SCORE_COLUMNS),
+            list(
+                BASELINE_FEATURE_COLUMNS
+                + RACE_FEATURE_COLUMNS
+                + MISTRUST_SCORE_COLUMNS
+            ),
         ),
     ]
 )
@@ -147,10 +155,14 @@ def roc_auc_score(*args, **kwargs):  # type: ignore[no-redef]
 
 DEFAULT_TRANSFORMERS_SENTIMENT_BATCH_SIZE = 64
 
-_SENTIMENT_BATCH_BACKEND: Callable[[Sequence[str]], list[tuple[float, float]]] | None = None
+_SENTIMENT_BATCH_BACKEND: (
+    Callable[[Sequence[str]], list[tuple[float, float]]] | None
+) = None
 
 
-def _parse_transformers_sentiment_output(result: Mapping[str, object]) -> tuple[float, float]:
+def _parse_transformers_sentiment_output(
+    result: Mapping[str, object],
+) -> tuple[float, float]:
     """Convert a transformers pipeline output row into the repo sentiment tuple."""
 
     label = str(result.get("label", "")).upper()
@@ -173,7 +185,9 @@ def _load_transformers_sentiment_batch(
 
     pipeline_factory = getattr(transformers_module, "pipeline", None)
     if not callable(pipeline_factory):
-        raise ModuleNotFoundError("transformers.pipeline is unavailable in the current environment.")
+        raise ModuleNotFoundError(
+            "transformers.pipeline is unavailable in the current environment."
+        )
 
     try:  # pragma: no cover - logging surface depends on transformers version
         transformers_logging = importlib.import_module("transformers.utils.logging")
@@ -183,7 +197,9 @@ def _load_transformers_sentiment_batch(
     except Exception:
         pass
 
-    use_cuda = bool(getattr(torch_module, "cuda", None) and torch_module.cuda.is_available())
+    use_cuda = bool(
+        getattr(torch_module, "cuda", None) and torch_module.cuda.is_available()
+    )
     device = 0 if use_cuda else -1
     classifier = pipeline_factory(
         "sentiment-analysis",
@@ -191,7 +207,9 @@ def _load_transformers_sentiment_batch(
         device=device,
     )
 
-    def _transformers_sentiment_batch(texts: Sequence[str]) -> list[tuple[float, float]]:
+    def _transformers_sentiment_batch(
+        texts: Sequence[str],
+    ) -> list[tuple[float, float]]:
         cleaned_texts = [_prepare_note_text_for_sentiment(text) for text in texts]
         outputs = [(0.0, 0.0) for _ in cleaned_texts]
 
@@ -401,7 +419,9 @@ def _resolve_downstream_estimator_factory(
         resolved = downstream_estimator_factory_resolver(task_name, config_name)
         if resolved is not None:
             return resolved
-    return _default_estimator_factory if estimator_factory is None else estimator_factory
+    return (
+        _default_estimator_factory if estimator_factory is None else estimator_factory
+    )
 
 
 def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
@@ -410,7 +430,8 @@ def _extract_positive_class_probabilities(probabilities) -> np.ndarray:
     probability_array = np.asarray(probabilities, dtype=float)
     if probability_array.ndim != 2 or probability_array.shape[1] < 2:
         raise IndexError(
-            "Estimator `predict_proba` output must have shape (n_samples, n_classes>=2)."
+            "Estimator `predict_proba` output must have shape "
+            "(n_samples, n_classes>=2)."
         )
     return probability_array[:, 1]
 
@@ -422,7 +443,7 @@ def _score_column_name(label_column: str) -> str:
 
 
 class _ConstantProbabilityEstimator:
-    """Degenerate proxy estimator that predicts a constant positive-class probability."""
+    """Proxy estimator that predicts a constant positive-class probability."""
 
     def __init__(self, positive_probability: float):
         self.positive_probability = float(positive_probability)
@@ -439,7 +460,10 @@ def fit(self, X, y):
         self.coef_ = np.zeros((1, n_features), dtype=float)
         probability = self.positive_probability
         if 0.0 < probability < 1.0:
-            self.intercept_ = np.array([float(np.log(probability / (1.0 - probability)))], dtype=float)
+            self.intercept_ = np.array(
+                [float(np.log(probability / (1.0 - probability)))],
+                dtype=float,
+            )
         return self
 
     def predict_proba(self, X):
@@ -500,7 +524,8 @@ def _iter_downstream_jobs(
             if n_pos < 10:
                 warnings.warn(
                     f"Downstream task '{task_name}' / config '{config_name}' has only "
-                    f"{n_pos} positive examples in the cohort (minimum 10 recommended). "
+                    f"{n_pos} positive examples in the cohort "
+                    "(minimum 10 recommended). "
                     "AUC results for this combination will be NaN.",
                     UserWarning,
                     stacklevel=2,
@@ -514,15 +539,18 @@ def _iter_downstream_jobs_with_estimators(
     feature_configurations: Mapping[str, Sequence[str]] | None = None,
     task_map: Mapping[str, str] | None = None,
     estimator_factory: Callable[[], object] | None = None,
-    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+    downstream_estimator_factory_resolver: (
+        DownstreamEstimatorFactoryResolver | None
+    ) = None,
 ):
     """Yield downstream jobs together with the resolved estimator factory."""
 
-    for task_name, target_column, config_name, feature_columns, usable, X, y in _iter_downstream_jobs(
+    jobs = _iter_downstream_jobs(
         final_model_table,
         feature_configurations=feature_configurations,
         task_map=task_map,
-    ):
+    )
+    for task_name, target_column, config_name, feature_columns, usable, X, y in jobs:
         yield (
             task_name,
             target_column,
@@ -535,7 +563,9 @@ def _iter_downstream_jobs_with_estimators(
                 task_name=task_name,
                 config_name=config_name,
                 estimator_factory=estimator_factory,
-                downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
+                downstream_estimator_factory_resolver=(
+                    downstream_estimator_factory_resolver
+                ),
             ),
         )
 
@@ -564,7 +594,9 @@ def _downstream_split_with_optional_grouping(
 
     groups = pd.to_numeric(usable["subject_id"], errors="coerce")
     if groups.isna().any():
-        raise ValueError("Downstream final_model_table contains null subject_id values.")
+        raise ValueError(
+            "Downstream final_model_table contains null subject_id values."
+        )
     splitter = GroupShuffleSplit(
         n_splits=1,
         test_size=test_size,
@@ -644,7 +676,9 @@ def _prepare_proxy_training_frame(
     _require_columns(feature_matrix, ["hadm_id"], "feature_matrix")
     _require_columns(note_labels, ["hadm_id", label_column], "note_labels")
 
-    feature_columns = [column for column in feature_matrix.columns if column != "hadm_id"]
+    feature_columns = [
+        column for column in feature_matrix.columns if column != "hadm_id"
+    ]
     merged = feature_matrix.merge(
         note_labels[["hadm_id", label_column]],
         on="hadm_id",
@@ -661,7 +695,14 @@ def _make_metric_result(
     left = pd.to_numeric(left, errors="coerce").dropna().astype(float)
     right = pd.to_numeric(right, errors="coerce").dropna().astype(float)
     if left.empty or right.empty:
-        return float("nan"), float("nan"), float("nan"), float("nan"), len(left), len(right)
+        return (
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            len(left),
+            len(right),
+        )
 
     left_median = float(left.median())
     right_median = float(right.median())
@@ -720,7 +761,9 @@ def _assign_severity_bins(
 def build_empirical_cdf_curve(values: Iterable[float]) -> pd.DataFrame:
     """Build a plot-ready empirical CDF curve from numeric values."""
 
-    series = pd.to_numeric(pd.Series(list(values)), errors="coerce").dropna().astype(float)
+    series = (
+        pd.to_numeric(pd.Series(list(values)), errors="coerce").dropna().astype(float)
+    )
     series = series.sort_values().reset_index(drop=True)
     if series.empty:
         return pd.DataFrame(columns=["x", "cdf"])
@@ -731,7 +774,9 @@ def build_empirical_cdf_curve(values: Iterable[float]) -> pd.DataFrame:
 def get_downstream_feature_configurations() -> OrderedDict[str, list[str]]:
     """Return the six required downstream feature configurations."""
 
-    return OrderedDict((name, list(columns)) for name, columns in DOWNSTREAM_FEATURE_CONFIGS.items())
+    return OrderedDict(
+        (name, list(columns)) for name, columns in DOWNSTREAM_FEATURE_CONFIGS.items()
+    )
 
 
 def get_downstream_task_map() -> OrderedDict[str, str]:
@@ -751,7 +796,9 @@ def fit_proxy_mistrust_model(
     Rows where ``label_column`` is NaN are excluded from training.
     """
 
-    merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    merged, feature_columns = _prepare_proxy_training_frame(
+        feature_matrix, note_labels, label_column
+    )
     labeled_mask = merged[label_column].notna()
     train = merged.loc[labeled_mask].copy()
     train_labels = train[label_column].astype(int)
@@ -760,9 +807,14 @@ def fit_proxy_mistrust_model(
     if train.empty or len(observed_classes) < 2:
         _warn_degenerate_proxy_training(label_column, observed_classes, len(train))
         probability = float(observed_classes[0]) if observed_classes else 0.0
-        return _ConstantProbabilityEstimator(probability).fit(train[feature_columns], train_labels)
+        return _ConstantProbabilityEstimator(probability).fit(
+            train[feature_columns], train_labels
+        )
 
-    estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+    estimator = (
+        _default_estimator_factory() if estimator_factory is None
+        else estimator_factory()
+    )
     estimator.fit(train[feature_columns], train_labels)
     return estimator
 
@@ -782,7 +834,9 @@ def build_proxy_probability_scores(
     are produced for all patients.
     """
 
-    merged, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    merged, feature_columns = _prepare_proxy_training_frame(
+        feature_matrix, note_labels, label_column
+    )
     score_column = _score_column_name(label_column)
 
     labeled_mask = merged[label_column].notna()
@@ -795,7 +849,10 @@ def build_proxy_probability_scores(
         default_prob = float(observed_classes[0]) if observed_classes else 0.0
         positive_class = np.full(len(merged), default_prob, dtype=float)
     else:
-        estimator = _default_estimator_factory() if estimator_factory is None else estimator_factory()
+        estimator = (
+        _default_estimator_factory() if estimator_factory is None
+        else estimator_factory()
+    )
         estimator.fit(train[feature_columns], train_labels)
         positive_class = _extract_positive_class_probabilities(
             estimator.predict_proba(merged[feature_columns])
@@ -807,7 +864,11 @@ def build_proxy_probability_scores(
             score_column: positive_class.astype(float),
         }
     )
-    return scores.sort_values("hadm_id").drop_duplicates("hadm_id").reset_index(drop=True)
+    return (
+        scores.sort_values("hadm_id")
+        .drop_duplicates("hadm_id")
+        .reset_index(drop=True)
+    )
 
 
 def build_noncompliance_mistrust_scores(
@@ -852,18 +913,26 @@ def build_negative_sentiment_mistrust_scores(
     cleaned = note_corpus.copy()
     cleaned["note_text"] = cleaned["note_text"].map(_prepare_note_text_for_sentiment)
     if sentiment_fn is None:
-        sentiment_scores = _default_sentiment_batch_backend(cleaned["note_text"].tolist())
+        sentiment_scores = _default_sentiment_batch_backend(
+            cleaned["note_text"].tolist()
+        )
     else:
         empty_mask = cleaned["note_text"] == ""
         sentiment_scores = [(0.0, 0.0)] * len(cleaned)
-        non_empty_indices = [index for index, is_empty in enumerate(empty_mask) if not is_empty]
+        non_empty_indices = [
+            index for index, is_empty in enumerate(empty_mask) if not is_empty
+        ]
         for index in non_empty_indices:
             sentiment_scores[index] = sentiment_fn(cleaned["note_text"].iloc[index])
 
     cleaned["negative_sentiment_score"] = [
         float(-1.0 * score[0]) for score in sentiment_scores
     ]
-    return cleaned[["hadm_id", "negative_sentiment_score"]].sort_values("hadm_id").reset_index(drop=True)
+    return (
+        cleaned[["hadm_id", "negative_sentiment_score"]]
+        .sort_values("hadm_id")
+        .reset_index(drop=True)
+    )
 
 
 def z_normalize_scores(
@@ -878,7 +947,8 @@ def z_normalize_scores(
         score_columns = [
             column
             for column in normalized.columns
-            if column != "hadm_id" and (column.endswith("_score") or column.endswith("_score_z"))
+            if column != "hadm_id"
+            and (column.endswith("_score") or column.endswith("_score_z"))
         ]
     else:
         score_columns = list(columns)
@@ -904,7 +974,9 @@ def build_mistrust_score_table(
 ) -> pd.DataFrame:
     """Build the three normalized mistrust metrics."""
 
-    _require_columns(note_labels, ["hadm_id", *PROXY_LABEL_COLUMNS.values()], "note_labels")
+    _require_columns(
+        note_labels, ["hadm_id", *PROXY_LABEL_COLUMNS.values()], "note_labels"
+    )
     _require_columns(note_corpus, ["hadm_id", "note_text"], "note_corpus")
 
     proxy_scores: OrderedDict[str, pd.DataFrame] = OrderedDict()
@@ -925,11 +997,16 @@ def build_mistrust_score_table(
         if merged is None:
             merged = score_table
             continue
-        merged = merged.merge(score_table, on="hadm_id", how="inner", validate="one_to_one")
+        merged = merged.merge(
+            score_table, on="hadm_id", how="inner", validate="one_to_one"
+        )
     assert merged is not None
     merged = merged.sort_values("hadm_id")
 
-    raw_score_columns = [_score_column_name(label_column) for label_column in PROXY_LABEL_COLUMNS.values()]
+    raw_score_columns = [
+        _score_column_name(label_column)
+        for label_column in PROXY_LABEL_COLUMNS.values()
+    ]
     rename_map = {
         _score_column_name(label_column): f"{proxy_name}_score_z"
         for proxy_name, label_column in PROXY_LABEL_COLUMNS.items()
@@ -961,10 +1038,18 @@ def summarize_feature_weights(
     if len(weights) != len(feature_columns):
         raise ValueError("Feature columns must align with estimator coefficients.")
 
-    summary = pd.DataFrame({"feature": list(feature_columns), "weight": weights.astype(float)})
-    summary = summary.sort_values(["weight", "feature"], ascending=[False, True]).reset_index(drop=True)
+    summary = pd.DataFrame(
+        {"feature": list(feature_columns), "weight": weights.astype(float)}
+    )
+    summary = summary.sort_values(
+        ["weight", "feature"], ascending=[False, True]
+    ).reset_index(drop=True)
     positive = summary.head(top_n).reset_index(drop=True)
-    negative = summary.sort_values(["weight", "feature"], ascending=[True, True]).head(top_n).reset_index(drop=True)
+    negative = (
+        summary.sort_values(["weight", "feature"], ascending=[True, True])
+        .head(top_n)
+        .reset_index(drop=True)
+    )
     return {"all": summary, "positive": positive, "negative": negative}
 
 
@@ -977,7 +1062,9 @@ def build_proxy_feature_weight_summary(
 ) -> dict[str, pd.DataFrame]:
     """Fit a proxy model and summarize the learned coefficient weights."""
 
-    _, feature_columns = _prepare_proxy_training_frame(feature_matrix, note_labels, label_column)
+    _, feature_columns = _prepare_proxy_training_frame(
+        feature_matrix, note_labels, label_column
+    )
     estimator = fit_proxy_mistrust_model(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
@@ -1043,7 +1130,14 @@ def run_race_gap_analysis(
     for column in columns:
         black = merged.loc[merged[race_column] == RACE_BLACK, column]
         white = merged.loc[merged[race_column] == RACE_WHITE, column]
-        statistic, pvalue, median_black, median_white, n_black, n_white = _make_metric_result(
+        (
+            statistic,
+            pvalue,
+            median_black,
+            median_white,
+            n_black,
+            n_white,
+        ) = _make_metric_result(
             black, white
         )
         rows.append(
@@ -1076,9 +1170,13 @@ def run_race_based_treatment_analysis(
     """Compare Black and White treatment durations within the EOL cohort."""
 
     _require_columns(eol_cohort, ["hadm_id", race_column], "eol_cohort")
-    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(
+        treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals"
+    )
 
-    merged = eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+    merged = eol_cohort.merge(
+        treatment_totals, on="hadm_id", how="left", validate="one_to_one"
+    )
     merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
 
     rows: list[dict[str, float | int | str]] = []
@@ -1086,7 +1184,14 @@ def run_race_based_treatment_analysis(
         usable = merged.loc[merged[column].notna()].copy()
         black = usable.loc[usable[race_column] == RACE_BLACK, column]
         white = usable.loc[usable[race_column] == RACE_WHITE, column]
-        statistic, pvalue, median_black, median_white, n_black, n_white = _make_metric_result(
+        (
+            statistic,
+            pvalue,
+            median_black,
+            median_white,
+            n_black,
+            n_white,
+        ) = _make_metric_result(
             black, white
         )
         rows.append(
@@ -1117,12 +1222,21 @@ def run_race_based_treatment_analysis_by_acuity(
     """Compare Black and White treatment duration within OASIS severity terciles."""
 
     _require_columns(eol_cohort, ["hadm_id", race_column], "eol_cohort")
-    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(
+        treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals"
+    )
     _require_columns(acuity_scores, ["hadm_id", acuity_column], "acuity_scores")
 
     merged = (
-        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
-        .merge(acuity_scores[["hadm_id", acuity_column]], on="hadm_id", how="inner", validate="one_to_one")
+        eol_cohort.merge(
+            treatment_totals, on="hadm_id", how="left", validate="one_to_one"
+        )
+        .merge(
+            acuity_scores[["hadm_id", acuity_column]],
+            on="hadm_id",
+            how="inner",
+            validate="one_to_one",
+        )
     )
     merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
     merged = _assign_severity_bins(merged, acuity_column=acuity_column)
@@ -1135,7 +1249,14 @@ def run_race_based_treatment_analysis_by_acuity(
             ].copy()
             black = usable.loc[usable[race_column] == RACE_BLACK, treatment]
             white = usable.loc[usable[race_column] == RACE_WHITE, treatment]
-            statistic, pvalue, median_black, median_white, n_black, n_white = _make_metric_result(
+            (
+            statistic,
+            pvalue,
+            median_black,
+            median_white,
+            n_black,
+            n_white,
+        ) = _make_metric_result(
                 black,
                 white,
             )
@@ -1163,12 +1284,16 @@ def build_race_based_treatment_cdf_plot_data(
     race_column: str = "race",
     treatment_columns: Sequence[str] = ("total_vent_min", "total_vaso_min"),
 ) -> dict[str, pd.DataFrame]:
-    """Build plot-ready CDF curves and median markers for race-based treatment analysis."""
+    """Build plot-ready CDF curves and medians for race-based treatment analysis."""
 
     _require_columns(eol_cohort, ["hadm_id", race_column], "eol_cohort")
-    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(
+        treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals"
+    )
 
-    merged = eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+    merged = eol_cohort.merge(
+        treatment_totals, on="hadm_id", how="left", validate="one_to_one"
+    )
     merged = merged.loc[merged[race_column].isin({RACE_WHITE, RACE_BLACK})].copy()
 
     curves: list[dict[str, float | str]] = []
@@ -1187,7 +1312,9 @@ def build_race_based_treatment_cdf_plot_data(
                         "cdf": float(row.cdf),
                     }
                 )
-            median = pd.to_numeric(values, errors="coerce").dropna().astype(float).median()
+            median = (
+                pd.to_numeric(values, errors="coerce").dropna().astype(float).median()
+            )
             medians.append(
                 {
                     "treatment": treatment,
@@ -1212,14 +1339,23 @@ def run_trust_based_treatment_analysis(
 
     _require_columns(eol_cohort, ["hadm_id"], "eol_cohort")
     _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
-    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(
+        treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals"
+    )
 
     columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
     _require_columns(mistrust_scores, columns, "mistrust_scores")
 
     merged = (
-        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
-        .merge(mistrust_scores[["hadm_id", *columns]], on="hadm_id", how="inner", validate="one_to_one")
+        eol_cohort.merge(
+            treatment_totals, on="hadm_id", how="left", validate="one_to_one"
+        )
+        .merge(
+            mistrust_scores[["hadm_id", *columns]],
+            on="hadm_id",
+            how="inner",
+            validate="one_to_one",
+        )
     )
     groups = dict(group_sizes or {})
 
@@ -1236,8 +1372,12 @@ def run_trust_based_treatment_analysis(
     rows: list[dict[str, float | int | str]] = []
     for treatment in treatment_columns:
         for metric in columns:
-            usable = merged.loc[merged[treatment].notna() & merged[metric].notna()].copy()
-            usable = usable.sort_values([metric, "hadm_id"], ascending=[False, True]).reset_index(drop=True)
+            usable = merged.loc[
+                merged[treatment].notna() & merged[metric].notna()
+            ].copy()
+            usable = usable.sort_values(
+                [metric, "hadm_id"], ascending=[False, True]
+            ).reset_index(drop=True)
             group_size = int(groups.get(treatment, 0))
 
             if group_size <= 0 or group_size >= len(usable):
@@ -1259,7 +1399,14 @@ def run_trust_based_treatment_analysis(
 
             high = usable.iloc[:group_size][treatment]
             low = usable.iloc[group_size:][treatment]
-            statistic, pvalue, median_high, median_low, n_high, n_low = _make_metric_result(
+            (
+                statistic,
+                pvalue,
+                median_high,
+                median_low,
+                n_high,
+                n_low,
+            ) = _make_metric_result(
                 high, low
             )
             rows.append(
@@ -1296,21 +1443,30 @@ def run_trust_based_treatment_analysis_by_acuity(
 
     _require_columns(eol_cohort, ["hadm_id"], "eol_cohort")
     _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
-    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(
+        treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals"
+    )
     _require_columns(acuity_scores, ["hadm_id", acuity_column], "acuity_scores")
 
     columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
     _require_columns(mistrust_scores, columns, "mistrust_scores")
 
     merged = (
-        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+        eol_cohort.merge(
+            treatment_totals, on="hadm_id", how="left", validate="one_to_one"
+        )
         .merge(
             mistrust_scores[["hadm_id", *columns]],
             on="hadm_id",
             how="inner",
             validate="one_to_one",
         )
-        .merge(acuity_scores[["hadm_id", acuity_column]], on="hadm_id", how="inner", validate="one_to_one")
+        .merge(
+            acuity_scores[["hadm_id", acuity_column]],
+            on="hadm_id",
+            how="inner",
+            validate="one_to_one",
+        )
     )
     merged = _assign_severity_bins(merged, acuity_column=acuity_column)
     explicit_groups = dict(group_sizes or {})
@@ -1326,7 +1482,9 @@ def run_trust_based_treatment_analysis_by_acuity(
             acuity_column=acuity_column,
         )
         for row in race_based.itertuples(index=False):
-            derived_groups[(str(row.severity_bin), str(row.treatment))] = int(row.n_black)
+            derived_groups[(str(row.severity_bin), str(row.treatment))] = int(
+                row.n_black
+            )
 
     rows: list[dict[str, float | int | str]] = []
     for metric in columns:
@@ -1337,9 +1495,9 @@ def run_trust_based_treatment_analysis_by_acuity(
                     & merged[treatment].notna()
                     & merged[metric].notna()
                 ].copy()
-                usable = usable.sort_values([metric, "hadm_id"], ascending=[False, True]).reset_index(
-                    drop=True
-                )
+                usable = usable.sort_values(
+                    [metric, "hadm_id"], ascending=[False, True]
+                ).reset_index(drop=True)
                 group_size = int(
                     explicit_groups.get(
                         treatment,
@@ -1367,7 +1525,14 @@ def run_trust_based_treatment_analysis_by_acuity(
 
                 high = usable.iloc[:group_size][treatment]
                 low = usable.iloc[group_size:][treatment]
-                statistic, pvalue, median_high, median_low, n_high, n_low = _make_metric_result(
+                (
+                statistic,
+                pvalue,
+                median_high,
+                median_low,
+                n_high,
+                n_low,
+            ) = _make_metric_result(
                     high,
                     low,
                 )
@@ -1400,17 +1565,21 @@ def build_trust_based_treatment_cdf_plot_data(
     group_sizes: Mapping[str, int] | None = None,
     race_column: str = "race",
 ) -> dict[str, pd.DataFrame]:
-    """Build plot-ready CDF curves and median markers for trust-based treatment analysis."""
+    """Build plot-ready CDF curves and medians for trust-based treatment analysis."""
 
     _require_columns(eol_cohort, ["hadm_id"], "eol_cohort")
     _require_columns(mistrust_scores, ["hadm_id"], "mistrust_scores")
-    _require_columns(treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals")
+    _require_columns(
+        treatment_totals, ["hadm_id", *treatment_columns], "treatment_totals"
+    )
 
     columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
     _require_columns(mistrust_scores, columns, "mistrust_scores")
 
     merged = (
-        eol_cohort.merge(treatment_totals, on="hadm_id", how="left", validate="one_to_one")
+        eol_cohort.merge(
+            treatment_totals, on="hadm_id", how="left", validate="one_to_one"
+        )
         .merge(
             mistrust_scores[["hadm_id", *columns]],
             on="hadm_id",
@@ -1433,8 +1602,12 @@ def build_trust_based_treatment_cdf_plot_data(
     medians: list[dict[str, float | str]] = []
     for treatment in treatment_columns:
         for metric in columns:
-            usable = merged.loc[merged[treatment].notna() & merged[metric].notna()].copy()
-            usable = usable.sort_values([metric, "hadm_id"], ascending=[False, True]).reset_index(drop=True)
+            usable = merged.loc[
+                merged[treatment].notna() & merged[metric].notna()
+            ].copy()
+            usable = usable.sort_values(
+                [metric, "hadm_id"], ascending=[False, True]
+            ).reset_index(drop=True)
             group_size = int(groups.get(treatment, 0))
             if group_size <= 0 or group_size >= len(usable):
                 continue
@@ -1455,13 +1628,17 @@ def build_trust_based_treatment_cdf_plot_data(
                             "cdf": float(row.cdf),
                         }
                     )
-                median = pd.to_numeric(values, errors="coerce").dropna().astype(float).median()
+                median = (
+                pd.to_numeric(values, errors="coerce").dropna().astype(float).median()
+            )
                 medians.append(
                     {
                         "metric": metric,
                         "treatment": treatment,
                         "group": label,
-                        "median": float(median) if not pd.isna(median) else float("nan"),
+                        "median": (
+                            float(median) if not pd.isna(median) else float("nan")
+                        ),
                         "line_style": "dotted",
                     }
                 )
@@ -1510,7 +1687,9 @@ def evaluate_downstream_average_weights(
     feature_configurations: Mapping[str, Sequence[str]] | None = None,
     task_map: Mapping[str, str] | None = None,
     estimator_factory: Callable[[], object] | None = None,
-    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+    downstream_estimator_factory_resolver: (
+        DownstreamEstimatorFactoryResolver | None
+    ) = None,
     split_fn: Callable[..., tuple] | None = None,
     repetitions: int = 100,
     test_size: float = 0.4,
@@ -1563,11 +1742,15 @@ def evaluate_downstream_average_weights(
             coefficients = np.asarray(getattr(estimator, "coef_", None), dtype=float)
             if coefficients.ndim != 2 or coefficients.shape[0] == 0:
                 raise ValueError(
-                    "Downstream estimator must expose `coef_` with shape (n_classes, n_features)."
+                    "Downstream estimator must expose `coef_` with shape "
+                    "(n_classes, n_features)."
                 )
             weights = coefficients[0]
             if len(weights) != len(feature_columns):
-                raise ValueError("Downstream feature columns must align with estimator coefficients.")
+                raise ValueError(
+                    "Downstream feature columns must align with estimator "
+                    "coefficients."
+                )
             collected_weights.append(weights.astype(float))
 
         if collected_weights:
@@ -1589,8 +1772,16 @@ def evaluate_downstream_average_weights(
                     "feature": feature,
                     "n_repeats": int(repetitions),
                     "n_valid_weights": int(n_valid),
-                    "weight_mean": float(weight_mean[index]) if not np.isnan(weight_mean[index]) else float("nan"),
-                    "weight_std": float(weight_std[index]) if not np.isnan(weight_std[index]) else float("nan"),
+                    "weight_mean": (
+                        float(weight_mean[index])
+                        if not np.isnan(weight_mean[index])
+                        else float("nan")
+                    ),
+                    "weight_std": (
+                        float(weight_std[index])
+                        if not np.isnan(weight_std[index])
+                        else float("nan")
+                    ),
                 }
             )
 
@@ -1602,7 +1793,9 @@ def evaluate_downstream_predictions(
     feature_configurations: Mapping[str, Sequence[str]] | None = None,
     task_map: Mapping[str, str] | None = None,
     estimator_factory: Callable[[], object] | None = None,
-    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+    downstream_estimator_factory_resolver: (
+        DownstreamEstimatorFactoryResolver | None
+    ) = None,
     split_fn: Callable[..., tuple] | None = None,
     auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
     repetitions: int = 100,
@@ -1666,8 +1859,16 @@ def evaluate_downstream_predictions(
                 "n_features": int(len(feature_columns)),
                 "n_repeats": int(repetitions),
                 "n_valid_auc": int(auc_series.notna().sum()),
-                "auc_mean": float(auc_series.mean()) if auc_series.notna().any() else float("nan"),
-                "auc_std": float(auc_series.std(ddof=0)) if auc_series.notna().any() else float("nan"),
+                "auc_mean": (
+                    float(auc_series.mean())
+                    if auc_series.notna().any()
+                    else float("nan")
+                ),
+                "auc_std": (
+                    float(auc_series.std(ddof=0))
+                    if auc_series.notna().any()
+                    else float("nan")
+                ),
             }
         )
     return pd.DataFrame(rows)
@@ -1691,14 +1892,18 @@ def plot_grouped_treatment_cdf(
     try:
         import matplotlib.pyplot as plt  # type: ignore
     except ModuleNotFoundError as exc:  # pragma: no cover
-        raise ModuleNotFoundError("matplotlib is required for EOL mistrust CDF plotting.") from exc
+        raise ModuleNotFoundError(
+            "matplotlib is required for EOL mistrust CDF plotting."
+        ) from exc
 
     if ax is None:
         _, ax = plt.subplots()
 
     ordered_curves = curves.copy()
     if not ordered_curves.empty:
-        ordered_curves = ordered_curves.sort_values([group_column, x_column]).reset_index(drop=True)
+        ordered_curves = ordered_curves.sort_values(
+            [group_column, x_column]
+        ).reset_index(drop=True)
     for group_value, group_df in ordered_curves.groupby(group_column, sort=False):
         ax.plot(group_df[x_column], group_df[y_column], label=str(group_value))
 
@@ -1743,7 +1948,9 @@ def run_full_eol_mistrust_modeling(
     acuity_scores: pd.DataFrame | None = None,
     final_model_table: pd.DataFrame | None = None,
     estimator_factory: Callable[[], object] | None = None,
-    downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+    downstream_estimator_factory_resolver: (
+        DownstreamEstimatorFactoryResolver | None
+    ) = None,
     sentiment_fn: Callable[[str], tuple[float, float]] | None = None,
     split_fn: Callable[..., tuple] | None = None,
     auc_fn: Callable[[Iterable[int], Iterable[float]], float] | None = None,
@@ -1778,7 +1985,9 @@ def run_full_eol_mistrust_modeling(
         "mistrust_scores": mistrust_scores,
         "feature_weight_summaries": feature_weight_summaries,
     }
-    selected_score_columns = list(MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns)
+    selected_score_columns = list(
+        MISTRUST_SCORE_COLUMNS if score_columns is None else score_columns
+    )
 
     if demographics is not None:
         outputs["race_gap_results"] = run_race_gap_analysis(
@@ -1799,28 +2008,36 @@ def run_full_eol_mistrust_modeling(
             score_columns=selected_score_columns,
         )
         if acuity_scores is not None:
-            outputs["race_treatment_by_acuity_results"] = run_race_based_treatment_analysis_by_acuity(
-                eol_cohort=eol_cohort,
-                treatment_totals=treatment_totals,
-                acuity_scores=acuity_scores,
+            outputs["race_treatment_by_acuity_results"] = (
+                run_race_based_treatment_analysis_by_acuity(
+                    eol_cohort=eol_cohort,
+                    treatment_totals=treatment_totals,
+                    acuity_scores=acuity_scores,
+                )
             )
-            outputs["trust_treatment_by_acuity_results"] = run_trust_based_treatment_analysis_by_acuity(
-                eol_cohort=eol_cohort,
-                mistrust_scores=mistrust_scores,
-                treatment_totals=treatment_totals,
-                acuity_scores=acuity_scores,
-                score_columns=selected_score_columns,
+            outputs["trust_treatment_by_acuity_results"] = (
+                run_trust_based_treatment_analysis_by_acuity(
+                    eol_cohort=eol_cohort,
+                    mistrust_scores=mistrust_scores,
+                    treatment_totals=treatment_totals,
+                    acuity_scores=acuity_scores,
+                    score_columns=selected_score_columns,
+                )
             )
         if include_cdf_plot_data:
-            outputs["race_treatment_cdf_plot_data"] = build_race_based_treatment_cdf_plot_data(
-                eol_cohort=eol_cohort,
-                treatment_totals=treatment_totals,
+            outputs["race_treatment_cdf_plot_data"] = (
+                build_race_based_treatment_cdf_plot_data(
+                    eol_cohort=eol_cohort,
+                    treatment_totals=treatment_totals,
+                )
             )
-            outputs["trust_treatment_cdf_plot_data"] = build_trust_based_treatment_cdf_plot_data(
-                eol_cohort=eol_cohort,
-                mistrust_scores=mistrust_scores,
-                treatment_totals=treatment_totals,
-                score_columns=selected_score_columns,
+            outputs["trust_treatment_cdf_plot_data"] = (
+                build_trust_based_treatment_cdf_plot_data(
+                    eol_cohort=eol_cohort,
+                    mistrust_scores=mistrust_scores,
+                    treatment_totals=treatment_totals,
+                    score_columns=selected_score_columns,
+                )
             )
 
     if acuity_scores is not None:
@@ -1848,7 +2065,9 @@ def run_full_eol_mistrust_modeling(
                 final_model_table=downstream,
                 feature_configurations=feature_configurations,
                 estimator_factory=estimator_factory,
-                downstream_estimator_factory_resolver=downstream_estimator_factory_resolver,
+                downstream_estimator_factory_resolver=(
+                    downstream_estimator_factory_resolver
+                ),
                 split_fn=split_fn,
                 repetitions=repetitions,
             )
@@ -1890,7 +2109,9 @@ def build_mistrust_scores(
     def evaluate_downstream(
         self,
         final_model_table: pd.DataFrame,
-        downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+        downstream_estimator_factory_resolver: (
+        DownstreamEstimatorFactoryResolver | None
+    ) = None,
     ) -> pd.DataFrame:
         return evaluate_downstream_predictions(
             final_model_table=final_model_table,
@@ -1916,7 +2137,9 @@ def run(
         precomputed_mistrust_scores: pd.DataFrame | None = None,
         score_columns: Sequence[str] | None = None,
         feature_configurations: Mapping[str, Sequence[str]] | None = None,
-        downstream_estimator_factory_resolver: DownstreamEstimatorFactoryResolver | None = None,
+        downstream_estimator_factory_resolver: (
+        DownstreamEstimatorFactoryResolver | None
+    ) = None,
     ) -> EOLMistrustModelOutputs:
         """Return model-stage outputs only.
 
@@ -1949,12 +2172,21 @@ def run(
 
 
 def _default_eol_mistrust_data_root() -> Path:
-    return Path(__file__).resolve().parents[2] / "EOL_Workspace" / "eol_mistrust_required_combined"
+    return (
+        Path(__file__).resolve().parents[2]
+        / "EOL_Workspace"
+        / "eol_mistrust_required_combined"
+    )
 
 
 def _default_eol_mistrust_slice_output_dir() -> Path:
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    return Path(__file__).resolve().parents[2] / "EOL_Workspace" / "eol_mistrust_runs" / f"e2e_1pct_gpu_{timestamp}"
+    return (
+        Path(__file__).resolve().parents[2]
+        / "EOL_Workspace"
+        / "eol_mistrust_runs"
+        / f"e2e_1pct_gpu_{timestamp}"
+    )
 
 
 def _log_eol_mistrust_runner(start_time: float, message: str) -> None:
@@ -1988,7 +2220,11 @@ def run_eol_mistrust_gpu_slice(
         os.environ["TRANSFORMERS_OFFLINE"] = "1"
     os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
 
-    output_path = _default_eol_mistrust_slice_output_dir() if output_dir is None else Path(output_dir)
+    output_path = (
+        _default_eol_mistrust_slice_output_dir()
+        if output_dir is None
+        else Path(output_dir)
+    )
     output_path.mkdir(parents=True, exist_ok=True)
 
     cuda_available = False
@@ -1997,7 +2233,10 @@ def run_eol_mistrust_gpu_slice(
 
     try:
         torch_module = importlib.import_module("torch")
-        cuda_available = bool(getattr(torch_module, "cuda", None) and torch_module.cuda.is_available())
+        cuda_available = bool(
+            getattr(torch_module, "cuda", None)
+            and torch_module.cuda.is_available()
+        )
         gpu_name = torch_module.cuda.get_device_name(0) if cuda_available else None
         if cuda_available:
             torch_module.cuda.empty_cache()
@@ -2010,7 +2249,11 @@ def run_eol_mistrust_gpu_slice(
     _ = warmup_sentiment("patient is calm and cooperative.")
     warmup_seconds = round(time.time() - warmup_started, 2)
 
-    if cuda_available and torch_module is not None and hasattr(torch_module.cuda, "reset_peak_memory_stats"):
+    if (
+        cuda_available
+        and torch_module is not None
+        and hasattr(torch_module.cuda, "reset_peak_memory_stats")
+    ):
         torch_module.cuda.reset_peak_memory_stats()
 
     example_module = importlib.import_module("examples.eol_mistrust")
@@ -2047,17 +2290,28 @@ def run_eol_mistrust_gpu_slice(
         .reset_index(drop=True)
     )
     sampled_hadm_ids = set(
-        pd.to_numeric(sampled_hadm["hadm_id"], errors="coerce").dropna().astype(int).tolist()
+        pd.to_numeric(sampled_hadm["hadm_id"], errors="coerce")
+        .dropna()
+        .astype(int)
+        .tolist()
     )
 
-    admissions_slice = admissions.loc[admissions["hadm_id"].isin(sampled_hadm_ids)].copy()
+    admissions_slice = admissions.loc[
+        admissions["hadm_id"].isin(sampled_hadm_ids)
+    ].copy()
     subject_ids = set(
-        pd.to_numeric(admissions_slice["subject_id"], errors="coerce").dropna().astype(int).tolist()
+        pd.to_numeric(admissions_slice["subject_id"], errors="coerce")
+        .dropna()
+        .astype(int)
+        .tolist()
     )
     patients_slice = patients.loc[patients["subject_id"].isin(subject_ids)].copy()
     icustays_slice = icustays.loc[icustays["hadm_id"].isin(sampled_hadm_ids)].copy()
     icustay_ids = set(
-        pd.to_numeric(icustays_slice["icustay_id"], errors="coerce").dropna().astype(int).tolist()
+        pd.to_numeric(icustays_slice["icustay_id"], errors="coerce")
+        .dropna()
+        .astype(int)
+        .tolist()
     )
 
     ventdurations_slice = materialized_views["ventdurations"].loc[
@@ -2077,7 +2331,9 @@ def run_eol_mistrust_gpu_slice(
         start_time,
         (
             "Prepared slice with "
-            f"{len(sampled_hadm_ids)} admissions, {len(subject_ids)} patients, {len(icustay_ids)} ICU stays"
+            f"{len(sampled_hadm_ids)} admissions, "
+            f"{len(subject_ids)} patients, "
+            f"{len(icustay_ids)} ICU stays"
         ),
     )
 
@@ -2096,7 +2352,10 @@ def run_eol_mistrust_gpu_slice(
     chartevents_csv_path = resolved_root / "mimiciii_clinical" / "chartevents.csv"
 
     notes_started = time.time()
-    _log_eol_mistrust_runner(start_time, "Streaming notes to build sentiment corpus and note-derived labels")
+    _log_eol_mistrust_runner(
+        start_time,
+        "Streaming notes to build sentiment corpus and note-derived labels",
+    )
     note_corpus, note_labels = build_note_artifacts_from_csv(
         noteevents_csv_path=noteevents_csv_path,
         all_hadm_ids=all_cohort["hadm_id"],
@@ -2105,17 +2364,27 @@ def run_eol_mistrust_gpu_slice(
         chunksize=note_chunksize,
     )
     note_present_hadm_ids = _note_present_hadm_ids(note_corpus)
-    all_cohort = all_cohort.loc[all_cohort["hadm_id"].isin(note_present_hadm_ids)].copy()
-    note_corpus = note_corpus.loc[note_corpus["hadm_id"].isin(note_present_hadm_ids)].copy()
-    note_labels = note_labels.loc[note_labels["hadm_id"].isin(note_present_hadm_ids)].copy()
+    all_cohort = all_cohort.loc[
+        all_cohort["hadm_id"].isin(note_present_hadm_ids)
+    ].copy()
+    note_corpus = note_corpus.loc[
+        note_corpus["hadm_id"].isin(note_present_hadm_ids)
+    ].copy()
+    note_labels = note_labels.loc[
+        note_labels["hadm_id"].isin(note_present_hadm_ids)
+    ].copy()
     _log_eol_mistrust_runner(
         start_time,
-        f"Retained {len(note_present_hadm_ids)} ALL-cohort admissions with at least one non-error note",
+        f"Retained {len(note_present_hadm_ids)} ALL-cohort admissions "
+        "with at least one non-error note",
     )
     note_stage_seconds = round(time.time() - notes_started, 2)
 
     chartevents_started = time.time()
-    _log_eol_mistrust_runner(start_time, "Streaming chartevents to build feature matrix and code-status targets")
+    _log_eol_mistrust_runner(
+        start_time,
+        "Streaming chartevents to build feature matrix and code-status targets",
+    )
     feature_matrix, code_status_targets = build_chartevent_artifacts_from_csv(
         chartevents_csv_path=chartevents_csv_path,
         d_items=d_items,
@@ -2203,10 +2472,14 @@ def run_eol_mistrust_gpu_slice(
                 continue
             for table_name, table in tables.items():
                 if isinstance(table, pd.DataFrame):
-                    table.to_csv(summary_dir / f"{model_name}_{table_name}.csv", index=False)
+                    table.to_csv(
+                        summary_dir / f"{model_name}_{table_name}.csv", index=False
+                    )
 
     if cuda_available and torch_module is not None:
-        cuda_peak_mb = round(torch_module.cuda.max_memory_allocated() / (1024 * 1024), 2)
+        cuda_peak_mb = round(
+            torch_module.cuda.max_memory_allocated() / (1024 * 1024), 2
+        )
 
     downstream_results = artifacts["downstream_auc_results"]
     if not isinstance(downstream_results, pd.DataFrame):
@@ -2214,7 +2487,10 @@ def run_eol_mistrust_gpu_slice(
 
     target_positives = {
         "left_ama_positive": int(
-            pd.to_numeric(final_model_table["left_ama"], errors="coerce").fillna(0).astype(int).sum()
+            pd.to_numeric(final_model_table["left_ama"], errors="coerce")
+            .fillna(0)
+            .astype(int)
+            .sum()
         ),
         "code_status_positive": int(
             pd.to_numeric(final_model_table["code_status_dnr_dni_cmo"], errors="coerce")
@@ -2255,12 +2531,17 @@ def run_eol_mistrust_gpu_slice(
                 .sum()
             ),
             "autopsy_label": int(
-                pd.to_numeric(note_labels["autopsy_label"], errors="coerce").fillna(0).astype(int).sum()
+                pd.to_numeric(note_labels["autopsy_label"], errors="coerce")
+                .fillna(0)
+                .astype(int)
+                .sum()
             ),
         },
         "target_positives": target_positives,
         "artifact_shapes": {
-            key: list(value.shape) for key, value in artifacts.items() if isinstance(value, pd.DataFrame)
+            key: list(value.shape)
+            for key, value in artifacts.items()
+            if isinstance(value, pd.DataFrame)
         },
         "stage_seconds": {
             "sentiment_warmup": warmup_seconds,
@@ -2273,14 +2554,20 @@ def run_eol_mistrust_gpu_slice(
     }
 
     (output_path / "run_summary.json").write_text(json.dumps(summary, indent=2))
-    _log_eol_mistrust_runner(start_time, f"Run complete; artifacts written to {output_path.resolve()}")
+    _log_eol_mistrust_runner(
+        start_time,
+        f"Run complete; artifacts written to {output_path.resolve()}",
+    )
     print(json.dumps(summary, indent=2), flush=True)
     return summary
 
 
 def _parse_eol_mistrust_cli_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
-        description="Run the EOL mistrust pipeline on a deterministic GPU-backed cohort slice."
+        description=(
+            "Run the EOL mistrust pipeline on a deterministic "
+            "GPU-backed cohort slice."
+        )
     )
     parser.add_argument(
         "--root",
@@ -2322,12 +2609,18 @@ def _parse_eol_mistrust_cli_args() -> argparse.Namespace:
         "--output-dir",
         type=Path,
         default=None,
-        help="Optional output directory. Defaults to EOL_Workspace/eol_mistrust_runs/<timestamp>.",
+        help=(
+            "Optional output directory. "
+            "Defaults to EOL_Workspace/eol_mistrust_runs/<timestamp>."
+        ),
     )
     parser.add_argument(
         "--allow-online-hf",
         action="store_true",
-        help="Allow Hugging Face network access instead of forcing offline cached model loading.",
+        help=(
+            "Allow Hugging Face network access instead of forcing "
+            "offline cached model loading."
+        ),
     )
     return parser.parse_args()
 
diff --git a/pyhealth/models/eol_mistrust_classifier.py b/pyhealth/models/eol_mistrust_classifier.py
index 895b0c8e0..81f8bbeae 100644
--- a/pyhealth/models/eol_mistrust_classifier.py
+++ b/pyhealth/models/eol_mistrust_classifier.py
@@ -36,6 +36,34 @@ class EOLMistrustClassifier(BaseModel):
         hidden_dim: Hidden layer width before the output head.
         dropout: Dropout applied to the pooled patient representation.
         text_hash_buckets: Number of buckets for hashed text embeddings.
+
+    Attributes:
+        label_key: The single label field name consumed from the task schema.
+        embedding_dim: Dimension of every per-modality pooled representation.
+        hidden_dim: Width of the hidden layer before the classification head.
+        text_hash_buckets: Vocabulary size (excluding pad) for text embeddings.
+        sequence_embeddings: ``nn.ModuleDict`` of learned sequence embeddings.
+        tensor_projections: ``nn.ModuleDict`` of linear tensor projections.
+        text_embeddings: ``nn.ModuleDict`` of hashed-token text embeddings.
+        hidden_layer: Linear layer that mixes concatenated modality features.
+        output_layer: Final linear head producing task logits.
+
+    Raises:
+        ValueError: If the task has anything other than exactly one label key.
+        TypeError: If the task schema contains a processor type this model does
+            not support (only sequence, tensor, and text are handled).
+
+    Example:
+        >>> from pyhealth.datasets import EOLMistrustDataset
+        >>> from pyhealth.tasks import EOLMistrustMortalityPredictionMIMIC3
+        >>> from pyhealth.models import EOLMistrustClassifier
+        >>> dataset = EOLMistrustDataset(root="/data/eol_mistrust")
+        >>> samples = dataset.set_task(EOLMistrustMortalityPredictionMIMIC3())
+        >>> model = EOLMistrustClassifier(
+        ...     dataset=samples,
+        ...     embedding_dim=32,
+        ...     hidden_dim=64,
+        ... )
     """
 
     def __init__(
@@ -46,6 +74,22 @@ def __init__(
         dropout: float = 0.1,
         text_hash_buckets: int = 2048,
     ) -> None:
+        """Build the multimodal classification head over the task schema.
+
+        Args:
+            dataset: Fitted :class:`~pyhealth.datasets.SampleDataset`.
+            embedding_dim: Shared per-modality embedding dimension.
+            hidden_dim: Width of the hidden layer before the output head.
+            dropout: Dropout probability applied to the patient representation.
+            text_hash_buckets: Number of hashed text token buckets (the
+                embedding table has ``text_hash_buckets + 1`` rows, with row 0
+                reserved for padding).
+
+        Raises:
+            ValueError: If the task does not have exactly one label key.
+            TypeError: If a processor in the schema is not a sequence, tensor,
+                or text processor.
+        """
         super().__init__(dataset)
 
         if len(self.label_keys) != 1:
@@ -155,6 +199,26 @@ def _embed_text_field(
         return (embeddings * mask).sum(dim=1) / denom
 
     def forward(self, **kwargs) -> Dict[str, torch.Tensor]:
+        """Run a forward pass over one batch of task samples.
+
+        Each feature in ``self.feature_keys`` is routed to the appropriate
+        per-modality handler (sequence mean pool, tensor projection, or hashed
+        text embedding), the results are concatenated, and a two-layer MLP
+        produces logits for the single task label.
+
+        Args:
+            **kwargs: Batch dictionary with one entry per task feature key and
+                one entry for the task label key. Values may be tensors
+                (sequence / tensor features) or strings / sequences of strings
+                (text features).
+
+        Returns:
+            Dict with keys ``loss``, ``y_prob``, ``y_true``, and ``logit``, as
+            expected by :class:`~pyhealth.trainer.Trainer`.
+
+        Raises:
+            KeyError: If an unexpected feature key is encountered at runtime.
+        """
         pooled_features = []
 
         for feature_key in self.feature_keys:
diff --git a/pyhealth/tasks/eol_mistrust.py b/pyhealth/tasks/eol_mistrust.py
index af84f7f3a..7dcffe0f5 100644
--- a/pyhealth/tasks/eol_mistrust.py
+++ b/pyhealth/tasks/eol_mistrust.py
@@ -331,7 +331,48 @@ def get_eol_mistrust_task_map() -> OrderedDict[str, str]:
 
 
 class EOLMistrustDownstreamMIMIC3(BaseTask):
-    """Admission-level downstream prediction task for the EOL mistrust study."""
+    """Admission-level downstream prediction task for the EOL mistrust study.
+
+    Replicates the admission-level prediction targets from Boag et al. 2018,
+    *"Racial Disparities and Mistrust in End-of-Life Care."* Three concrete
+    subclasses bind ``target`` to each of the paper's downstream outcomes:
+    :class:`EOLMistrustLeftAMAPredictionMIMIC3`,
+    :class:`EOLMistrustCodeStatusPredictionMIMIC3`, and
+    :class:`EOLMistrustMortalityPredictionMIMIC3`.
+
+    The task iterates admissions for each patient, builds a structured
+    feature set (coded EHR history, demographics, length of stay, age, and
+    optionally clinical notes), and emits one sample per eligible admission
+    with a single binary label.
+
+    Args:
+        target: One of the supported downstream target names returned by
+            :func:`get_eol_mistrust_task_map`.
+        include_notes: When ``True``, concatenated clinical notes are added
+            to the input schema as a text feature.
+        dataset_prepare_mode: ``"default"`` (corrected) or ``"paper_like"``
+            (notebook-faithful) label-extraction strategy.
+
+    Attributes:
+        task_name: Task identifier used by PyHealth's sample cache.
+        target: Active downstream target name.
+        include_notes: Whether ``clinical_notes`` is present in the schema.
+        dataset_prepare_mode: Normalized preparation mode string.
+        input_schema: Mapping of feature names to PyHealth processor keys
+            (``sequence`` / ``tensor`` / ``text``).
+        output_schema: Mapping of the single label field to ``"binary"``.
+
+    Raises:
+        ValueError: If ``target`` is not one of the supported task names, or
+            if ``dataset_prepare_mode`` is not recognized.
+
+    Example:
+        >>> from pyhealth.datasets import EOLMistrustDataset
+        >>> from pyhealth.tasks import EOLMistrustDownstreamMIMIC3
+        >>> dataset = EOLMistrustDataset(root="/data/eol_mistrust")
+        >>> task = EOLMistrustDownstreamMIMIC3(target="in_hospital_mortality")
+        >>> samples = dataset.set_task(task)
+    """
 
     task_name = "EOLMistrustDownstreamMIMIC3"
 
@@ -499,7 +540,20 @@ def __call__(self, patient: Any) -> list[dict[str, Any]]:
 
 
 class EOLMistrustLeftAMAPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
-    """Task wrapper for the Left AMA downstream target."""
+    """Left-Against-Medical-Advice (Left-AMA) downstream target.
+
+    Predicts whether an admission ends with the patient leaving against
+    medical advice. Label is derived from the admissions table's
+    ``discharge_location`` field.
+
+    Args:
+        include_notes: When ``True``, clinical notes are added to the schema.
+        dataset_prepare_mode: ``"default"`` or ``"paper_like"``.
+
+    Example:
+        >>> from pyhealth.tasks import EOLMistrustLeftAMAPredictionMIMIC3
+        >>> task = EOLMistrustLeftAMAPredictionMIMIC3(include_notes=True)
+    """
 
     task_name = "EOLMistrustLeftAMAPredictionMIMIC3"
 
@@ -516,7 +570,21 @@ def __init__(
 
 
 class EOLMistrustCodeStatusPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
-    """Task wrapper for the code-status downstream target."""
+    """Code-status change (DNR / DNI / CMO) downstream target.
+
+    Predicts whether any chart event on the admission records a DNR, DNI, or
+    CMO code-status value on the itemids tracked by the study.
+
+    Args:
+        include_notes: When ``True``, clinical notes are added to the schema.
+        dataset_prepare_mode: ``"default"`` or ``"paper_like"``. The
+            paper-like mode reproduces the notebook's stateful overwrite
+            behavior for mixed code-status events.
+
+    Example:
+        >>> from pyhealth.tasks import EOLMistrustCodeStatusPredictionMIMIC3
+        >>> task = EOLMistrustCodeStatusPredictionMIMIC3()
+    """
 
     task_name = "EOLMistrustCodeStatusPredictionMIMIC3"
 
@@ -533,7 +601,19 @@ def __init__(
 
 
 class EOLMistrustMortalityPredictionMIMIC3(EOLMistrustDownstreamMIMIC3):
-    """Task wrapper for the in-hospital mortality downstream target."""
+    """In-hospital mortality downstream target.
+
+    Predicts whether the admission ends in in-hospital death, using the
+    admissions table's death indicator.
+
+    Args:
+        include_notes: When ``True``, clinical notes are added to the schema.
+        dataset_prepare_mode: ``"default"`` or ``"paper_like"``.
+
+    Example:
+        >>> from pyhealth.tasks import EOLMistrustMortalityPredictionMIMIC3
+        >>> task = EOLMistrustMortalityPredictionMIMIC3()
+    """
 
     task_name = "EOLMistrustMortalityPredictionMIMIC3"
 
diff --git a/tests/core/conftest.py b/tests/core/conftest.py
new file mode 100644
index 000000000..ef9b73b8f
--- /dev/null
+++ b/tests/core/conftest.py
@@ -0,0 +1,47 @@
+"""Test configuration for ``tests/core``.
+
+Registers a ``slow`` pytest marker that is skipped by default so the
+default test run stays in the sub-second-per-test budget recommended by
+the PyHealth contribution guide. Tests that genuinely exercise the full
+``BaseDataset`` → ``set_task`` → model-training pipeline (which takes a
+few seconds even on synthetic data) are marked ``slow`` and only run
+when ``--run-slow`` is passed to pytest.
+
+Usage:
+    pytest tests/core/test_eol_mistrust_model.py              # fast only
+    pytest tests/core/test_eol_mistrust_model.py --run-slow   # include integration
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--run-slow",
+        action="store_true",
+        default=False,
+        help="Run tests marked @pytest.mark.slow (end-to-end pipeline tests).",
+    )
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    config.addinivalue_line(
+        "markers",
+        "slow: end-to-end integration test that exercises the full PyHealth "
+        "pipeline (dataset → set_task → model). Skipped unless --run-slow.",
+    )
+
+
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: list[pytest.Item]
+) -> None:
+    if config.getoption("--run-slow"):
+        return
+    skip_slow = pytest.mark.skip(
+        reason="Slow integration test; pass --run-slow to include."
+    )
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
diff --git a/tests/core/test_eol_mistrust_Integration.py b/tests/core/test_eol_mistrust_Integration.py
index 98b914cd1..ff775fda5 100644
--- a/tests/core/test_eol_mistrust_Integration.py
+++ b/tests/core/test_eol_mistrust_Integration.py
@@ -58,14 +58,18 @@ def _load_example_module():
 
 @contextmanager
 def _workspace_tempdir():
-    base = Path(__file__).resolve().parents[2] / ".tmp-test-integration"
-    base.mkdir(parents=True, exist_ok=True)
-    path = base / f"tmp_{uuid.uuid4().hex}"
-    path.mkdir()
-    try:
-        yield str(path)
-    finally:
-        shutil.rmtree(path, ignore_errors=True)
+    """Yield a system temporary directory path that is cleaned up on exit.
+
+    Uses :class:`tempfile.TemporaryDirectory` so no persistent scratch folder
+    is left behind inside the repository tree.
+    """
+    import tempfile
+
+    with tempfile.TemporaryDirectory(
+        prefix="pyhealth_eol_integration_",
+        ignore_cleanup_errors=True,
+    ) as path:
+        yield path
 
 
 class _FakeProbEstimator:
diff --git a/tests/core/test_eol_mistrust_dataset.py b/tests/core/test_eol_mistrust_dataset.py
index 4214cf124..f9b77c916 100644
--- a/tests/core/test_eol_mistrust_dataset.py
+++ b/tests/core/test_eol_mistrust_dataset.py
@@ -7,6 +7,7 @@
 from unittest.mock import patch
 
 import pandas as pd
+import pytest
 from pyhealth.datasets.base_dataset import BaseDataset
 
 def _load_model_build_mistrust_score_table():
@@ -59,14 +60,18 @@ def _load_eol_mistrust_dataset_class_module():
 
 @contextmanager
 def _workspace_tempdir():
-    base = Path(__file__).resolve().parents[2] / ".tmp-test-dataset"
-    base.mkdir(parents=True, exist_ok=True)
-    path = base / f"tmp_{uuid.uuid4().hex}"
-    path.mkdir()
-    try:
-        yield str(path)
-    finally:
-        shutil.rmtree(path, ignore_errors=True)
+    """Yield a system temporary directory path that is cleaned up on exit.
+
+    Uses :class:`tempfile.TemporaryDirectory` so no persistent scratch folder
+    is left behind inside the repository tree.
+    """
+    import tempfile
+
+    with tempfile.TemporaryDirectory(
+        prefix="pyhealth_eol_dataset_",
+        ignore_cleanup_errors=True,
+    ) as path:
+        yield path
 
 
 class _FakeProbEstimator:
@@ -3830,6 +3835,7 @@ def test_dataset_class_inherits_base_dataset_and_keeps_core_tables(self):
             self.assertIn("icustays", dataset.tables)
             self.assertIn("noteevents", dataset.tables)
 
+    @pytest.mark.slow
     def test_dataset_class_can_set_eol_task_on_minimal_synthetic_tables(self):
         dataset_cls = self.dataset_class_module.EOLMistrustDataset
         task = self.task_module.EOLMistrustMortalityPredictionMIMIC3(include_notes=True)
diff --git a/tests/core/test_eol_mistrust_model.py b/tests/core/test_eol_mistrust_model.py
index 19bb0c3b1..c156a4546 100644
--- a/tests/core/test_eol_mistrust_model.py
+++ b/tests/core/test_eol_mistrust_model.py
@@ -7,6 +7,7 @@
 from unittest.mock import patch
 
 import pandas as pd
+import pytest
 import torch
 from pyhealth.datasets.sample_dataset import create_sample_dataset
 from pyhealth.datasets.utils import get_dataloader
@@ -2176,6 +2177,7 @@ def test_evaluate_downstream_predictions_is_seed_stable_for_repeated_identical_r
         pd.testing.assert_frame_equal(first, second)
 
 
+@pytest.mark.slow
 class TestEOLMistrustClassifier(unittest.TestCase):
     @classmethod
     def setUpClass(cls):

From d370a55e26a7bb14e94f5437e8f9e9721b82a08c Mon Sep 17 00:00:00 2001
From: pattyboy227 <patrickwrocks@hotmail.com>
Date: Sat, 18 Apr 2026 16:08:35 -0700
Subject: [PATCH 7/7] add paper references, missing docstrings, and YAML
 comments

- Expand module-level docstrings in dataset, model, and task modules
  with Boag et al. 2018 paper citation and URL
- Add missing docstrings to private helper functions and methods:
  _path_variants, _table_assets_exist, _discover_optional_tables,
  _infer_tensor_input_size, _mean_pool_sequence, _project_tensor,
  _embed_text_field, _require_columns, _coerce_timestamp,
  _normalize_token, _normalize_code_status_mode,
  _normalize_dataset_prepare_mode, _calculate_age_years,
  _calculate_los_days, _calculate_paper_like_los_days,
  _build_code_status_target_normal, _build_code_status_target_paper_like
- Add section comments to eol_mistrust.yaml config for table groups
- Enhance RST documentation with paper links and modality descriptions
- Add paper reference to example script header
- Add docstrings to four public wrapper functions in pyhealth/models/eol_mistrust.py
that bind the generic proxy helpers to specific label columns
---
 .../pyhealth.datasets.EOLMistrustDataset.rst  |  7 +++-
 .../pyhealth.models.EOLMistrustClassifier.rst | 21 ++++++++----
 .../api/tasks/pyhealth.tasks.eol_mistrust.rst | 15 ++++++---
 examples/eol_mistrust_mortality_classifier.py |  4 +++
 pyhealth/datasets/configs/eol_mistrust.yaml   | 13 ++++++++
 pyhealth/datasets/eol_mistrust_dataset.py     | 14 +++++++-
 pyhealth/models/eol_mistrust.py               | 20 ++++++++++++
 pyhealth/models/eol_mistrust_classifier.py    | 15 ++++++++-
 pyhealth/tasks/eol_mistrust.py                | 32 +++++++++++++++----
 9 files changed, 122 insertions(+), 19 deletions(-)

diff --git a/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst b/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst
index 24fbe3559..520a4420e 100644
--- a/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst
+++ b/docs/api/datasets/pyhealth.datasets.EOLMistrustDataset.rst
@@ -2,11 +2,16 @@ pyhealth.datasets.EOLMistrustDataset
 ======================================
 
 MIMIC-III dataset wrapper used to replicate Boag et al. 2018,
-*"Racial Disparities and Mistrust in End-of-Life Care."* It loads the
+*"Racial Disparities and Mistrust in End-of-Life Care"*
+(`paper <https://proceedings.mlr.press/v85/boag18a.html>`_). It loads the
 admissions, ICU stays, and (optionally) note events tables, and exposes
 the proxy-mistrust and end-of-life cohort definitions used by the three
 downstream tasks in :doc:`../tasks/pyhealth.tasks.eol_mistrust`.
 
+Supports both a corrected ``"default"`` pipeline and a notebook-faithful
+``"paper_like"`` reproduction mode via the ``dataset_prepare_mode``
+parameter.
+
 .. autoclass:: pyhealth.datasets.EOLMistrustDataset
     :members:
     :undoc-members:
diff --git a/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst b/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst
index 0829a3be6..ed8d7933d 100644
--- a/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst
+++ b/docs/api/models/pyhealth.models.EOLMistrustClassifier.rst
@@ -1,12 +1,21 @@
 pyhealth.models.EOLMistrustClassifier
 =======================================
 
-Multimodal classifier that mirrors the end-of-life prediction head from
-Boag et al. 2018. It consumes sequence features (diagnoses, procedures,
-drugs), tensor features (age, length of stay), and text features
-(demographics and free-text clinical notes) from the
-:class:`~pyhealth.datasets.EOLMistrustDataset` and predicts a binary
-target such as Left-AMA, code-status change, or in-hospital mortality.
+Multimodal classifier for the end-of-life prediction targets from
+Boag et al. 2018, *"Racial Disparities and Mistrust in End-of-Life Care"*
+(`paper <https://proceedings.mlr.press/v85/boag18a.html>`_).
+
+The model handles three modality types from the
+:class:`~pyhealth.datasets.EOLMistrustDataset`:
+
+- **Coded EHR sequences** (diagnoses, procedures, drugs) — learned
+  embeddings with mean pooling.
+- **Scalar numeric features** (age, length of stay) — linear projections.
+- **Text / categorical fields** (demographics, clinical notes) — stable
+  hash-based token embeddings with mean pooling.
+
+It predicts a binary target such as Left-AMA, code-status change, or
+in-hospital mortality.
 
 .. autoclass:: pyhealth.models.EOLMistrustClassifier
     :members:
diff --git a/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst b/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst
index b8eb76f90..9396b1a96 100644
--- a/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst
+++ b/docs/api/tasks/pyhealth.tasks.eol_mistrust.rst
@@ -2,11 +2,18 @@ pyhealth.tasks.eol_mistrust
 ==============================
 
 End-of-life cohort tasks from Boag et al. 2018, *"Racial Disparities and
-Mistrust in End-of-Life Care."* Three binary prediction targets are
-defined on top of the :class:`~pyhealth.datasets.EOLMistrustDataset`:
+Mistrust in End-of-Life Care"*
+(`paper <https://proceedings.mlr.press/v85/boag18a.html>`_). Three binary
+prediction targets are defined on top of the
+:class:`~pyhealth.datasets.EOLMistrustDataset`:
 Left-AMA, code-status change (DNR/DNI/CMO), and in-hospital mortality.
-All three share the same input schema and differ only in the extracted
-label.
+All three share the same input schema (demographics, diagnoses, procedures,
+medications, and optionally clinical notes) and differ only in the
+extracted label.
+
+Supports both a corrected ``"default"`` and a notebook-faithful
+``"paper_like"`` label-extraction strategy via the
+``dataset_prepare_mode`` parameter.
 
 .. autoclass:: pyhealth.tasks.EOLMistrustDownstreamMIMIC3
     :members:
diff --git a/examples/eol_mistrust_mortality_classifier.py b/examples/eol_mistrust_mortality_classifier.py
index 79b56f5ff..1d014556d 100644
--- a/examples/eol_mistrust_mortality_classifier.py
+++ b/examples/eol_mistrust_mortality_classifier.py
@@ -1,5 +1,9 @@
 r"""Run the EOL mistrust workflow.
 
+Reproduces the full analysis pipeline from Boag et al. 2018,
+*"Racial Disparities and Mistrust in End-of-Life Care"*
+(https://proceedings.mlr.press/v85/boag18a.html).
+
 Expected data root::
 
     EOL_Workspace/eol_mistrust_required_combined/
diff --git a/pyhealth/datasets/configs/eol_mistrust.yaml b/pyhealth/datasets/configs/eol_mistrust.yaml
index e17abbb60..cd7bc81bb 100644
--- a/pyhealth/datasets/configs/eol_mistrust.yaml
+++ b/pyhealth/datasets/configs/eol_mistrust.yaml
@@ -1,6 +1,11 @@
 version: "1.0"
+# MIMIC-III table configuration for the EOL Mistrust pipeline.
+# Reproduces the data schema from Boag et al. 2018,
+# "Racial Disparities and Mistrust in End-of-Life Care."
 
 tables:
+  # --- Core clinical tables ---
+
   patients:
     file_path: "mimiciii_clinical/patients.csv"
     patient_id: "subject_id"
@@ -48,6 +53,8 @@ tables:
       - "outtime"
       - "los"
 
+  # --- Coded event tables (joined to admissions for timestamps) ---
+
   diagnoses_icd:
     file_path: "mimiciii_clinical/diagnoses_icd.csv"
     patient_id: "subject_id"
@@ -105,6 +112,8 @@ tables:
       - "icd9_code"
       - "seq_num"
 
+  # --- Free-text clinical notes ---
+
   noteevents:
     file_path: "mimiciii_notes/noteevents.csv"
     patient_id: "subject_id"
@@ -119,6 +128,8 @@ tables:
       - "storetime"
       - "iserror"
 
+  # --- Chart event reference and time-series data ---
+
   d_items:
     file_path: "mimiciii_clinical/d_items.csv"
     patient_id: null
@@ -163,6 +174,8 @@ tables:
       - "dbsource"
       - "category"
 
+  # --- Derived severity scores and treatment duration tables ---
+
   ventdurations:
     file_path: "mimiciii_derived/ventdurations.csv"
     patient_id: "subject_id"
diff --git a/pyhealth/datasets/eol_mistrust_dataset.py b/pyhealth/datasets/eol_mistrust_dataset.py
index 7a66df1f7..19be53df2 100644
--- a/pyhealth/datasets/eol_mistrust_dataset.py
+++ b/pyhealth/datasets/eol_mistrust_dataset.py
@@ -1,4 +1,13 @@
-"""Native BaseDataset entrypoint for the EOL mistrust cohort tables."""
+"""Native BaseDataset entrypoint for the EOL mistrust cohort tables.
+
+Implements the dataset component for replicating Boag et al. 2018,
+*"Racial Disparities and Mistrust in End-of-Life Care"*
+(https://proceedings.mlr.press/v85/boag18a.html). Wraps the combined
+MIMIC-III CSV export tree (clinical, notes, and derived tables) as a
+proper :class:`~pyhealth.datasets.BaseDataset` with support for both a
+corrected ``"default"`` pipeline and a notebook-faithful ``"paper_like"``
+reproduction mode.
+"""
 
 from __future__ import annotations
 
@@ -111,6 +120,7 @@ def _normalize_dataset_prepare_mode(mode: str | None) -> str:
 
     @staticmethod
     def _path_variants(root: str, relative_path: str) -> list[Path]:
+        """Return candidate file paths, toggling ``.gz`` compression suffix."""
         csv_path = Path(root) / relative_path
         if csv_path.suffix == ".gz":
             return [csv_path, csv_path.with_suffix("")]
@@ -118,6 +128,7 @@ def _path_variants(root: str, relative_path: str) -> list[Path]:
 
     @classmethod
     def _table_assets_exist(cls, root: str, config, table_name: str) -> bool:
+        """Return ``True`` when all required CSV files for *table_name* exist."""
         if table_name not in config.tables:
             return False
 
@@ -139,6 +150,7 @@ def _discover_optional_tables(
         root: str,
         config_path: str,
     ) -> list[str]:
+        """Auto-discover which optional MIMIC-III tables are present on disk."""
         config = load_yaml_config(config_path)
         available_tables: list[str] = []
         for table_name in cls.DEFAULT_OPTIONAL_TABLES:
diff --git a/pyhealth/models/eol_mistrust.py b/pyhealth/models/eol_mistrust.py
index 46563e6e5..b103b2a44 100644
--- a/pyhealth/models/eol_mistrust.py
+++ b/pyhealth/models/eol_mistrust.py
@@ -876,6 +876,11 @@ def build_noncompliance_mistrust_scores(
     note_labels: pd.DataFrame,
     estimator_factory: Callable[[], object] | None = None,
 ) -> pd.DataFrame:
+    """Compute the noncompliance proxy mistrust score per admission.
+
+    Convenience wrapper around :func:`build_proxy_probability_scores` bound to
+    the noncompliance label column.
+    """
     return build_proxy_probability_scores(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
@@ -889,6 +894,11 @@ def build_autopsy_mistrust_scores(
     note_labels: pd.DataFrame,
     estimator_factory: Callable[[], object] | None = None,
 ) -> pd.DataFrame:
+    """Compute the autopsy proxy mistrust score per admission.
+
+    Convenience wrapper around :func:`build_proxy_probability_scores` bound to
+    the autopsy label column.
+    """
     return build_proxy_probability_scores(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
@@ -1080,6 +1090,11 @@ def build_noncompliance_feature_weight_summary(
     estimator_factory: Callable[[], object] | None = None,
     top_n: int = 10,
 ) -> dict[str, pd.DataFrame]:
+    """Return top positive/negative feature weights for the noncompliance proxy.
+
+    Convenience wrapper around :func:`build_proxy_feature_weight_summary` bound
+    to the noncompliance label column.
+    """
     return build_proxy_feature_weight_summary(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
@@ -1095,6 +1110,11 @@ def build_autopsy_feature_weight_summary(
     estimator_factory: Callable[[], object] | None = None,
     top_n: int = 10,
 ) -> dict[str, pd.DataFrame]:
+    """Return top positive/negative feature weights for the autopsy proxy.
+
+    Convenience wrapper around :func:`build_proxy_feature_weight_summary` bound
+    to the autopsy label column.
+    """
     return build_proxy_feature_weight_summary(
         feature_matrix=feature_matrix,
         note_labels=note_labels,
diff --git a/pyhealth/models/eol_mistrust_classifier.py b/pyhealth/models/eol_mistrust_classifier.py
index 81f8bbeae..053674043 100644
--- a/pyhealth/models/eol_mistrust_classifier.py
+++ b/pyhealth/models/eol_mistrust_classifier.py
@@ -1,4 +1,13 @@
-"""Native BaseModel entrypoint for EOL mistrust downstream tasks."""
+"""Native BaseModel entrypoint for EOL mistrust downstream tasks.
+
+Provides the :class:`EOLMistrustClassifier`, a lightweight multimodal
+classifier for the three downstream prediction targets defined by
+Boag et al. 2018, *"Racial Disparities and Mistrust in End-of-Life Care"*
+(https://proceedings.mlr.press/v85/boag18a.html). The model consumes
+coded EHR sequences, scalar numeric features, and free-text or
+categorical fields, and produces binary predictions for Left-AMA,
+code-status change (DNR/DNI/CMO), or in-hospital mortality.
+"""
 
 from __future__ import annotations
 
@@ -140,6 +149,7 @@ def __init__(
         self.output_layer = nn.Linear(self.hidden_dim, self.get_output_size())
 
     def _infer_tensor_input_size(self, feature_key: str) -> int:
+        """Infer the input dimension of a tensor feature by inspecting the dataset."""
         for index in range(len(self.dataset)):
             if feature_key not in self.dataset[index]:
                 continue
@@ -154,6 +164,7 @@ def _infer_tensor_input_size(self, feature_key: str) -> int:
     def _mean_pool_sequence(
         self, values: torch.Tensor, feature_key: str
     ) -> torch.Tensor:
+        """Embed coded sequences and return their masked mean-pooled vector."""
         if values.dim() == 1:
             values = values.unsqueeze(0)
         values = values.long().to(self.device)
@@ -163,6 +174,7 @@ def _mean_pool_sequence(
         return (embeddings * mask).sum(dim=1) / denom
 
     def _project_tensor(self, values: torch.Tensor, feature_key: str) -> torch.Tensor:
+        """Project scalar / numeric tensor features to the embedding space."""
         values = values.to(self.device).float()
         if values.dim() == 0:
             values = values.view(1, 1)
@@ -173,6 +185,7 @@ def _project_tensor(self, values: torch.Tensor, feature_key: str) -> torch.Tenso
     def _embed_text_field(
         self, values: Sequence[str], feature_key: str
     ) -> torch.Tensor:
+        """Hash-embed text tokens and return their mean-pooled representation."""
         token_lists = []
         max_len = 1
         for raw_value in values:
diff --git a/pyhealth/tasks/eol_mistrust.py b/pyhealth/tasks/eol_mistrust.py
index 7dcffe0f5..b5587e35b 100644
--- a/pyhealth/tasks/eol_mistrust.py
+++ b/pyhealth/tasks/eol_mistrust.py
@@ -1,13 +1,23 @@
 """Task definitions and target helpers for the EOL mistrust workflow.
 
+Implements the three downstream binary prediction tasks from
+Boag et al. 2018, *"Racial Disparities and Mistrust in End-of-Life Care"*
+(https://proceedings.mlr.press/v85/boag18a.html):
+
+- **Left AMA** — whether the patient left against medical advice.
+- **Code Status** — whether a DNR / DNI / CMO order was recorded.
+- **In-hospital Mortality** — whether the patient died during the stay.
+
 Structure
 ---------
-This module now keeps two logic families explicit:
-
-1. Normal Path
-   The corrected, cleaned task helpers used by the default research flow.
-2. Paper-like Path
-   The notebook-faithful special logic that only exists for paper compatibility.
+This module keeps two label-extraction logic families explicit:
+
+1. **Normal (corrected) path** — the cleaned task helpers used by the
+   default research flow, with corrected code-status resolution and
+   proper length-of-stay calculation.
+2. **Paper-like path** — the notebook-faithful special logic that
+   reproduces the original paper's stateful overwrite behavior for
+   code-status labels.
 """
 
 from __future__ import annotations
@@ -59,16 +69,19 @@
 
 
 def _require_columns(df: pd.DataFrame, required: Sequence[str], df_name: str) -> None:
+    """Raise ``ValueError`` if any *required* columns are missing from *df*."""
     missing = [column for column in required if column not in df.columns]
     if missing:
         raise ValueError(f"{df_name} is missing required columns: {', '.join(missing)}")
 
 
 def _coerce_timestamp(value) -> pd.Timestamp:
+    """Convert *value* to a :class:`pandas.Timestamp`, returning ``NaT`` on failure."""
     return pd.to_datetime(value, errors="coerce")
 
 
 def _normalize_token(value) -> str:
+    """Lowercase and collapse non-alphanumeric characters into underscores."""
     if value is None or (isinstance(value, float) and pd.isna(value)):
         return ""
     normalized = re.sub(r"[^a-z0-9]+", "_", str(value).strip().lower())
@@ -77,6 +90,7 @@ def _normalize_token(value) -> str:
 
 
 def _normalize_code_status_mode(mode: str | None) -> str:
+    """Validate and normalise the *code_status_mode* parameter."""
     normalized = (
         CODE_STATUS_MODE_CORRECTED if mode is None else str(mode).strip().lower()
     )
@@ -89,6 +103,7 @@ def _normalize_code_status_mode(mode: str | None) -> str:
 
 
 def _normalize_dataset_prepare_mode(mode: str | None) -> str:
+    """Validate and normalise the *dataset_prepare_mode* parameter."""
     normalized = (
         DATASET_PREPARE_MODE_DEFAULT if mode is None else str(mode).strip().lower()
     )
@@ -102,6 +117,7 @@ def _normalize_dataset_prepare_mode(mode: str | None) -> str:
 
 
 def _calculate_age_years(admittime, dob) -> float:
+    """Return age in years at admission, capped at 90 (MIMIC-III convention)."""
     admit_time = _coerce_timestamp(admittime)
     birth_time = _coerce_timestamp(dob)
     if pd.isna(admit_time) or pd.isna(birth_time):
@@ -115,6 +131,7 @@ def _calculate_age_years(admittime, dob) -> float:
 
 
 def _calculate_los_days(admittime, dischtime) -> float:
+    """Return length of stay in fractional days (corrected path)."""
     admit_time = _coerce_timestamp(admittime)
     discharge_time = _coerce_timestamp(dischtime)
     if pd.isna(admit_time) or pd.isna(discharge_time):
@@ -123,6 +140,7 @@ def _calculate_los_days(admittime, dischtime) -> float:
 
 
 def _calculate_paper_like_los_days(admittime, dischtime) -> float:
+    """Return length of stay in hours, reproducing the paper's calculation."""
     admit_time = _coerce_timestamp(admittime)
     discharge_time = _coerce_timestamp(dischtime)
     if pd.isna(admit_time) or pd.isna(discharge_time):
@@ -221,6 +239,7 @@ def is_positive_code_status_value(value) -> bool:
 
 
 def _build_code_status_target_normal(codes: pd.DataFrame) -> pd.DataFrame:
+    """Build code-status labels using the corrected (latest-value) logic."""
     labeled = codes.copy()
     labeled["code_status_dnr_dni_cmo"] = labeled["value"].map(
         lambda value: int(is_positive_code_status_value(value))
@@ -272,6 +291,7 @@ def _advance_paper_like_code_status_label(
 
 
 def _build_code_status_target_paper_like(codes: pd.DataFrame) -> pd.DataFrame:
+    """Build code-status labels using the paper's stateful overwrite logic."""
     current_label: int | None = None
     notebook_targets: dict[int, int] = {}