diff --git a/.github/workflows/testrail-ff-tests-deduplication.yml b/.github/workflows/testrail-ff-tests-deduplication.yml
new file mode 100644
index 0000000..3d80099
--- /dev/null
+++ b/.github/workflows/testrail-ff-tests-deduplication.yml
@@ -0,0 +1,205 @@
+name: TestRail Test Case Deduplication
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 9 * * 1"  # Every Monday at 9am UTC
+
+env:
+  BUCKET: mobile-reports
+  BUCKET_PREFIX: public/testrail-ff-test-deduplication
+  DEFAULT_DIR: ./testrail/testcases-deduplication
+  STORAGE_URL_PREFIX: https://console.cloud.google.com/storage/browser
+  BQ_DATASET: testops_stats
+  BQ_TABLE: testrail_deduplication_runs
+
+jobs:
+  deduplication:
+    name: Deduplication — ${{ matrix.project_name }}
+    runs-on: ubuntu-24.04
+    defaults:
+      run:
+        working-directory: ${{ env.DEFAULT_DIR }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - project_id: '14'
+            project_name: firefox-ios
+            suite_id: '45443'
+          - project_id: '59'
+            project_name: fenix
+            suite_id: '3192'
+          - project_id: '27'
+            project_name: focus-ios
+            suite_id: '5291'
+          - project_id: '48'
+            project_name: focus-android
+            suite_id: '1028'
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.11'
+
+      - name: Get sentence-transformers version
+        id: st-version
+        run: |
+          version=$(grep '^sentence-transformers' requirements.txt | sed 's/[^0-9.]//g')
+          echo "version=$version" >> $GITHUB_OUTPUT
+
+      - name: Cache sentence-transformers model
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/huggingface
+          key: sentence-transformers-all-MiniLM-L6-v2-${{ steps.st-version.outputs.version }}-${{ runner.os }}
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      - name: Fetch test cases from TestRail
+        env:
+          TESTRAIL_HOST: ${{ secrets.TESTRAIL_HOST }}
+          TESTRAIL_USERNAME: ${{ secrets.TESTRAIL_USERNAME }}
+          TESTRAIL_PASSWORD: ${{ secrets.TESTRAIL_PASSWORD }}
+        run: |
+          python3 fetch_testrail_export.py \
+            --project-id "${{ matrix.project_id }}" \
+            --suite-id "${{ matrix.suite_id }}" \
+            --output testrail_export.xlsx
+
+      - name: Run deduplication pipeline
+        run: |
+          python3 run_all.py testrail_export.xlsx --output-dir ./output
+
+      - name: Set run metadata
+        run: |
+          echo "today=$(date '+%Y-%m-%d')" >> $GITHUB_ENV
+          echo "project_id=${{ matrix.project_id }}" >> $GITHUB_ENV
+          echo "project_name=${{ matrix.project_name }}" >> $GITHUB_ENV
+
+      - name: Establish Google Cloud connection
+        uses: google-github-actions/auth@v3
+        with:
+          credentials_json: ${{ secrets.GCLOUD_AUTH }}
+
+      - name: Upload results to GCS
+        id: upload-results
+        uses: google-github-actions/upload-cloud-storage@v3
+        with:
+          path: ${{ env.DEFAULT_DIR }}/output
+          destination: ${{ env.BUCKET }}/${{ env.BUCKET_PREFIX }}/${{ matrix.project_name }}/${{ env.today }}
+          glob: '*.csv'
+          parent: false
+
+      - name: Query previous stats from BigQuery
+        run: |
+          bq_result=$(bq query \
+            --project_id=moz-mobile-tools \
+            --use_legacy_sql=false \
+            --format=json \
+            "SELECT exact_duplicate_cases, similar_pairs, high_priority_similar_pairs, total_cases
+             FROM \`moz-mobile-tools.${{ env.BQ_DATASET }}.${{ env.BQ_TABLE }}\`
+             WHERE project_id = '${{ matrix.project_id }}'
+             ORDER BY run_date DESC
+             LIMIT 1" 2>/dev/null || echo "[]")
+
+          python3 - << PYEOF
+          import json, os, sys
+          raw = """${bq_result}"""
+          rows = []
+          try:
+              rows = json.loads(raw.strip())
+          except (json.JSONDecodeError, ValueError):
+              pass
+          with open(os.environ["GITHUB_ENV"], "a") as f:
+              if rows:
+                  row = rows[0]
+                  f.write(f"prev_exact={row.get('exact_duplicate_cases', 0)}\n")
+                  f.write(f"prev_similar={row.get('similar_pairs', 0)}\n")
+                  f.write(f"prev_high_priority_similar={row.get('high_priority_similar_pairs', 0)}\n")
+                  f.write(f"prev_total={row.get('total_cases', 0)}\n")
+                  f.write("has_prev_data=true\n")
+              else:
+                  f.write("prev_exact=0\nprev_similar=0\nprev_high_priority_similar=0\nprev_total=0\nhas_prev_data=false\n")
+          PYEOF
+
+      - name: Insert current stats into BigQuery
+        run: |
+          python3 insert_bq_stats.py \
+            --output-dir ./output \
+            --project-id "${{ matrix.project_id }}" \
+            --project-name "${{ matrix.project_name }}" \
+            --run-date "${{ env.today }}" \
+            --github-run-id "${{ github.run_id }}" \
+            --bq-project moz-mobile-tools \
+            --bq-dataset ${{ env.BQ_DATASET }} \
+            --bq-table ${{ env.BQ_TABLE }}
+
+      - name: Build Slack payloads
+        run: |
+          python3 build_slack_payloads.py \
+            --output-dir . \
+            --today "${{ env.today }}" \
+            --project-id "${{ matrix.project_id }}" \
+            --project-name "${{ matrix.project_name }}" \
+            --gcs-url "${{ env.GCS_URL }}" \
+            --run-url "${{ env.RUN_URL }}"
+        env:
+          GCS_URL: ${{ env.STORAGE_URL_PREFIX }}/${{ env.BUCKET }}/${{ env.BUCKET_PREFIX }}/${{ matrix.project_name }}/${{ env.today }}/
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+      - name: Write job summary
+        run: |
+          echo "## TestRail Deduplication Report" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY
+          echo "|-------|-------|" >> $GITHUB_STEP_SUMMARY
+          echo "| Project | ${{ matrix.project_name }} (ID: ${{ matrix.project_id }}) |" >> $GITHUB_STEP_SUMMARY
+          echo "| Date | ${{ env.today }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Total cases | ${{ env.current_total }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Exact duplicates | ${{ env.current_exact }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Similar pairs | ${{ env.current_similar }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Duplicate rate | ${{ env.current_rate }} |" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "[Download results from GCS](${{ env.STORAGE_URL_PREFIX }}/${{ env.BUCKET }}/${{ env.BUCKET_PREFIX }}/${{ matrix.project_name }}/${{ env.today }}/)" >> $GITHUB_STEP_SUMMARY
+
+      - name: Notify Slack — weekly digest
+        if: success()
+        uses: slackapi/slack-github-action@v3.0.1
+        with:
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_TEST_ALERTS_SANDBOX }}
+          webhook-type: incoming-webhook
+          payload-file-path: ${{ env.DEFAULT_DIR }}/slack-digest.json
+
+      - name: Notify Slack — spike alert
+        if: success() && env.send_spike == 'true'
+        uses: slackapi/slack-github-action@v3.0.1
+        with:
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_TEST_ALERTS_SANDBOX }}
+          webhook-type: incoming-webhook
+          payload-file-path: ${{ env.DEFAULT_DIR }}/slack-spike.json
+
+      - name: Notify Slack (failure)
+        if: failure()
+        uses: slackapi/slack-github-action@v3.0.1
+        with:
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_TEST_ALERTS_SANDBOX }}
+          webhook-type: incoming-webhook
+          payload: |
+            {
+              "blocks": [
+                {
+                  "type": "section",
+                  "text": {
+                    "type": "mrkdwn",
+                    "text": ":x: *TestRail Deduplication failed* (${{ matrix.project_name }})\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>"
+                  }
+                }
+              ]
+            }
\ No newline at end of file
diff --git a/testrail/testcases-deduplication/.gitignore b/testrail/testcases-deduplication/.gitignore
new file mode 100644
index 0000000..1f60ff8
--- /dev/null
+++ b/testrail/testcases-deduplication/.gitignore
@@ -0,0 +1,34 @@
+# Virtual environment
+venv/
+.venv/
+
+# Generated output files (contain sensitive TestRail data — do not commit)
+output/
+duplicates_exact.csv
+similar_pairs.csv
+WORK_LIST_EXACT.csv
+WORK_LIST_PERFECT_MATCHES.csv
+WORK_LIST_SIMILAR_HIGH_PRIORITY.csv
+PRIORITIZED_DUPLICATES.csv
+ACTION_ITEMS.csv
+analysis_stats.json
+
+# Input files (TestRail exports contain product test coverage data)
+*.xlsx
+*.xls
+
+# Reports generated locally
+DEDUPLICATION_REPORT.md
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.pytest_cache/
+
+# sentence-transformers model cache (if stored locally)
+.cache/
+models/
+
+# OS
+.DS_Store
diff --git a/testrail/testcases-deduplication/HOW_TO_USE.md b/testrail/testcases-deduplication/HOW_TO_USE.md
new file mode 100644
index 0000000..dc4c049
--- /dev/null
+++ b/testrail/testcases-deduplication/HOW_TO_USE.md
@@ -0,0 +1,348 @@
+# How to Use the Duplicate Detection Results
+
+**Updated:** 2026-04-21
+
+---
+
+## Running the Pipeline
+
+### Full pipeline (recommended)
+
+```bash
+cd testrail/testcases-deduplication
+pip install -r requirements.txt
+
+python run_all.py /path/to/testrail_export.xlsx
+```
+
+This runs all three steps and writes output files to the script directory by default.
+
+### Custom output directory
+
+Use `--output-dir` to control where output CSVs are written — useful for CI or when you want to keep results from different runs separate:
+
+```bash
+python run_all.py /path/to/export.xlsx --output-dir /tmp/dedup-2024-01-15
+```
+
+### Adjust detection thresholds
+
+```bash
+python run_all.py export.xlsx \
+  --dup-threshold 0.92 \
+  --sim-threshold 0.85 \
+  --overlap-threshold 0.80
+```
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--dup-threshold` | 0.90 | Minimum similarity to label a pair as `semantic_duplicate` |
+| `--sim-threshold` | 0.80 | Minimum similarity to include a pair in the report at all |
+| `--overlap-threshold` | 0.80 | Minimum step overlap to set `shares_most_steps = True` |
+
+### Run individual steps
+
+Each step can also be run standalone if you already have intermediate CSVs:
+
+```bash
+python generate-work-list.py --output-dir /path/to/results
+python export-priority-list.py --output-dir /path/to/results
+```
+
+> **Note:** Output CSVs contain TestRail test case data — do not commit them to the repository. They are covered by `.gitignore`.
+
+---
+
+## Automated Monitoring (GitHub Action)
+
+The workflow `testrail-ff-tests-deduplication.yml` runs automatically every **Monday at 9am UTC** and can also be triggered manually from GitHub Actions.
+
+### What it does
+
+1. Fetches test cases from TestRail via API
+2. Runs the full deduplication pipeline
+3. Uploads result CSVs to GCS (`mobile-reports/public/testrail-ff-test-deduplication/{project}/{date}/`)
+4. Stores run stats in BigQuery (`moz-mobile-tools.testops_stats.testrail_deduplication_runs`)
+5. Sends Slack notifications to `#mobile-alerts-sandbox`
+
+### Slack notifications
+
+There are three types of notifications:
+
+#### 1. Weekly digest (every Monday, on success)
+
+Sent after every successful run. Shows the current stats with week-over-week deltas so you can track trends at a glance.
+
+```
+🔍 TestRail Deduplication — 2026-04-28
+
+Project: firefox-ios (ID: 14)
+Total cases: 1,620 (+5 vs last week)
+Exact duplicates: 42 (+3 vs last week)
+Similar pairs: 310 (no change)
+Duplicate rate: 2.6%
+Download results from GCS
+```
+
+#### 2. Spike alert (only when exact duplicates jump by more than 10)
+
+Sent in addition to the digest when there's a significant increase in exact duplicates — this usually means a bulk import or copy-paste of test cases happened.
+
+```
+⚠️ Duplicate spike detected — firefox-ios
+
+Exact duplicates jumped by 23 this week (42 → 65)
+Project: firefox-ios (ID: 14) | Date: 2026-04-28
+Download results · View run
+```
+
+> This alert is skipped on the very first run (no previous data to compare against).
+
+#### 3. Failure notification (any step fails)
+
+```
+❌ TestRail Deduplication failed (project 14)
+View run
+```
+
+### Triggering manually
+
+Go to **Actions → TestRail Test Case Deduplication → Run workflow** and select:
+- **Project ID**: 14 (Firefox iOS), 59 (Fenix), 27 (Focus iOS), 48 (Focus Android)
+- **Suite ID** (optional): leave empty to fetch all suites
+
+---
+
+## Quick Start Guide (reviewing results)
+
+### Step 1: Open the Data Files
+
+Two CSV files contain the raw data:
+
+#### duplicates_exact.csv
+- Each row is a test case that belongs to a duplicate group
+- All cases with the same `duplicate_group_id` are exact duplicates
+- **Action:** Keep one test per group, archive the rest
+
+#### similar_pairs.csv
+- Each row is a pair of similar tests with their similarity scores
+- **Action:** Review high-similarity pairs (≥95%) for potential consolidation
+
+### Step 3: Understand the Columns
+
+#### duplicates_exact.csv
+
+| Column | What it means |
+|--------|---------------|
+| **_case_id** | TestRail case ID (e.g., C2575167) |
+| **_title** | Test case title |
+| **duplicate_group_id** | Group identifier - same ID = exact duplicates |
+
+**Usage:**
+1. Sort by `duplicate_group_id`
+2. For each group, choose one test to keep
+3. Archive all others in that group
+
+#### similar_pairs.csv
+
+| Column | What it means |
+|--------|---------------|
+| **case_id_1**, **case_id_2** | The two test case IDs being compared |
+| **title_1**, **title_2** | Their titles |
+| **similarity** | Semantic similarity score (0.0-1.0) |
+| **step_overlap** | Percentage of shared steps (0.0-1.0) |
+| **relation** | "semantic_duplicate" (≥90%) or "similar" (80-90%) |
+| **shares_most_steps** | True if ≥80% of steps are identical |
+
+**Usage:**
+```python
+# In Excel/Sheets, filter by:
+similarity >= 0.95  # High priority duplicates
+relation == "semantic_duplicate"  # Strong duplicate candidates
+shares_most_steps == TRUE  # Tests with identical execution
+```
+
+---
+
+## Action Plan
+
+### Phase 1: Address Exact Duplicates
+
+**Goal:** Review and archive all exact duplicate groups
+
+**Priority Order** (use `WORK_LIST_EXACT.csv`):
+
+1. Start with the largest groups (4+ duplicates) — biggest savings per group
+2. Then medium groups (3 duplicates)
+3. Finally the 2-duplicate groups
+
+For each group: keep the case with the **lowest ID** (usually the original), archive the rest.
+
+### Phase 2: Review High Similarity Cases
+
+**Goal:** Identify consolidation opportunities in near-duplicates
+
+**Priority** (use `WORK_LIST_PERFECT_MATCHES.csv` and `WORK_LIST_SIMILAR_HIGH_PRIORITY.csv`):
+
+1. **Perfect semantic matches (100% similarity)** — treat as exact duplicates; likely differ only in formatting
+2. **Near-perfect matches (95-99%)** — review manually; small differences may be intentional
+3. **High step overlap (≥80%)** — consider parameterization if tests differ only by a variable
+
+### Phase 3: Pattern Analysis
+
+**Goal:** Understand root causes to prevent future duplication
+
+**Tasks:**
+1. Identify which sections/suites have the most duplicates
+2. Map duplicates to test creation periods (bulk imports, copy-paste)
+3. Create process guidelines to prevent recurrence
+
+---
+
+## How to Archive Tests in TestRail
+
+### Option 1: Individual Archive
+1. Open the test case in TestRail
+2. Click "Edit"
+3. Check the "Archived" checkbox
+4. Add a comment: "Archived - exact duplicate of [CASE_ID]"
+5. Save
+
+### Option 2: Bulk Archive
+1. Go to the test suite in TestRail
+2. Select multiple test cases (checkbox selection)
+3. Click "Bulk Update"
+4. Set "Archived" = Yes
+5. Add comment: "Archived - duplicate cleanup [DATE]"
+6. Apply
+
+### Best Practices
+- ✅ Always add a comment explaining why you archived
+- ✅ Reference the test you're keeping
+- ✅ Archive rather than delete (can be restored)
+- ✅ Verify automation coverage before archiving
+- ✅ Update any test runs or plans that reference archived cases
+
+---
+
+## Example Walkthrough
+
+### Example 1: Exact Duplicate Group
+
+**From duplicates_exact.csv:**
+```
+_case_id,  _title,                            duplicate_group_id
+C1000001,  "Select and save System auto theme", 5
+C1000045,  "Select and save System auto theme", 5
+C1000089,  "Select and save System auto theme", 5
+```
+
+**Steps:**
+1. Open all 3 cases in TestRail
+2. Verify they are truly identical (check steps, expected results)
+3. Choose to keep: **C1000001** (lowest ID = original)
+4. Archive C1000045 with comment: "Archived - exact duplicate of C1000001"
+5. Archive C1000089 with comment: "Archived - exact duplicate of C1000001"
+6. Result: 2 fewer test cases ✅
+
+### Example 2: High Similarity Pair
+
+**From similar_pairs.csv:**
+```
+case_id_1, title_1,                          case_id_2, title_2,                        similarity, step_overlap
+C1000010,  "Verify CFR displayed - bottom toolbar", C1000011, "Verify CFR displayed - top toolbar", 1.000, 1.000
+```
+
+**Steps:**
+1. Open both cases
+2. Review: They test the same thing with different toolbar positions
+3. **Decision:** These are intentional variants - KEEP BOTH
+4. No action needed
+
+**Alternative scenario:**
+If the toolbar position isn't critical, consider:
+- Create one parameterized test: "Verify CFR displayed [toolbar_position]"
+- Archive both originals
+- Result: 1 test instead of 2 ✅
+
+---
+
+## Using the Data Files
+
+### In Excel/Google Sheets
+
+**duplicates_exact.csv:**
+1. Open in Excel
+2. Sort by `duplicate_group_id`
+3. For each group:
+   - Highlight the row you'll KEEP (lowest case_id)
+   - Mark others for archive
+4. Track progress with a "Status" column
+
+**similar_pairs.csv:**
+1. Open in Excel
+2. Add filters to all columns
+3. Filter: `similarity >= 0.95`
+4. Sort by `similarity` descending
+5. Review top matches first
+
+### In Python
+
+```python
+import pandas as pd
+
+# Load exact duplicates
+exact = pd.read_csv('duplicates_exact.csv')
+
+# Find largest groups
+group_sizes = exact.groupby('duplicate_group_id').size()
+largest_groups = group_sizes[group_sizes >= 3].sort_values(ascending=False)
+
+# Load similar pairs
+similar = pd.read_csv('similar_pairs.csv')
+
+# High priority: near-perfect matches
+high_priority = similar[similar['similarity'] >= 0.95].sort_values('similarity', ascending=False)
+
+# Cases that share most steps
+high_overlap = similar[similar['shares_most_steps'] == True]
+
+# Semantic duplicates only
+duplicates = similar[similar['relation'] == 'semantic_duplicate']
+```
+
+---
+
+## FAQs
+
+### Q: Which test should I keep if they're all identical?
+**A:** Keep the test that is linked in githug to avoid creating a PR just to modify the link.
+
+### Q: What if one duplicate has automation coverage and the other doesn't?
+**A:** Keep the one with automation coverage, or migrate the automation to the version you want to keep before archiving.
+
+### Q: What if the tests are 100% similar but have different titles?
+**A:** Review carefully - they might be testing subtly different scenarios. If truly identical, consolidate to one and update the title to be more descriptive.
+
+### Q: Should I delete or archive?
+**A:** **Always archive** (never delete). Archiving allows you to restore if you make a mistake or discover the "duplicate" was actually testing something different.
+
+### Q: What if I disagree with a suggested duplicate?
+**A:** Trust your judgment! The script uses semantic analysis, but you have domain knowledge. If two tests seem similar but test different things, keep both and document why.
+
+### Q: How do I prevent future duplicates?
+**A:** See "Priority 4" in DEDUPLICATION_REPORT.md for prevention strategies:
+- Search before creating new tests
+- Use consistent naming conventions
+- Run quarterly deduplication audits
+- Consider automated duplicate detection in CI/CD
+
+---
+
+## Need Help?
+
+If you encounter issues or have questions:
+
+1. Check the script source: `find-duplicates.py`
+2. Re-run the pipeline with updated data: `python run_all.py export.xlsx`
+3. Adjust thresholds if results seem off (see the table above)
diff --git a/testrail/testcases-deduplication/build_slack_payloads.py b/testrail/testcases-deduplication/build_slack_payloads.py
new file mode 100644
index 0000000..bcb07db
--- /dev/null
+++ b/testrail/testcases-deduplication/build_slack_payloads.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Build Slack JSON payload files for the deduplication weekly digest and spike alert.
+
+Reads stats from environment variables (set by insert_bq_stats.py / GITHUB_ENV)
+and writes slack-digest.json and optionally slack-spike.json to the output directory.
+
+Usage:
+    python build_slack_payloads.py \
+        --output-dir . \
+        --today 2026-04-24 \
+        --project-id 14 \
+        --project-name firefox-ios \
+        --gcs-url https://console.cloud.google.com/storage/browser/... \
+        --run-url https://github.com/...
+"""
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+
+def delta_str(current: int, previous: int, has_prev_data: bool) -> str:
+    if not has_prev_data:
+        return ""
+    diff = current - previous
+    if diff > 0:
+        return f" _(+{diff} vs last week)_"
+    if diff < 0:
+        return f" _({diff} vs last week)_"
+    return " _(no change)_"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build Slack payload JSON files.")
+    parser.add_argument("--output-dir",   default=".", help="Directory to write slack-*.json files")
+    parser.add_argument("--today",        required=True)
+    parser.add_argument("--project-id",   required=True)
+    parser.add_argument("--project-name", required=True)
+    parser.add_argument("--gcs-url",      required=True)
+    parser.add_argument("--run-url",      required=True)
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+
+    # Current stats (written to GITHUB_ENV by insert_bq_stats.py)
+    current_total                 = int(os.environ.get("current_total", 0))
+    current_exact                 = int(os.environ.get("current_exact", 0))
+    current_high_priority_similar = int(os.environ.get("current_high_priority_similar", 0))
+    current_rate                  = float(os.environ.get("current_rate", 0))
+
+    # Previous stats (written to GITHUB_ENV by the BQ query step)
+    prev_exact                    = int(os.environ.get("prev_exact", 0))
+    prev_high_priority_similar    = int(os.environ.get("prev_high_priority_similar", 0))
+    prev_total                    = int(os.environ.get("prev_total", 0))
+    has_prev_data                 = os.environ.get("has_prev_data", "false") == "true"
+
+    digest_text = (
+        f"*Project:* {args.project_name} (ID: {args.project_id})\n"
+        f"*Total cases:* {current_total}{delta_str(current_total, prev_total, has_prev_data)}\n"
+        f"*Exact duplicates:* {current_exact}{delta_str(current_exact, prev_exact, has_prev_data)}\n"
+        f"*High-priority similar pairs:* {current_high_priority_similar}"
+        f"{delta_str(current_high_priority_similar, prev_high_priority_similar, has_prev_data)}\n"
+        f"*Duplicate rate:* {current_rate:.1%}\n"
+        f"<{args.gcs_url}|Download results from GCS>"
+    )
+
+    digest_payload = {
+        "blocks": [
+            {"type": "header", "text": {"type": "plain_text", "text": f":mag: TestRail Deduplication — {args.today}"}},
+            {"type": "section", "text": {"type": "mrkdwn", "text": digest_text}},
+        ]
+    }
+    (output_dir / "slack-digest.json").write_text(json.dumps(digest_payload))
+    print(f"Written slack-digest.json")
+
+    delta_exact = current_exact - prev_exact
+    send_spike = has_prev_data and delta_exact > 10
+
+    github_env = os.environ.get("GITHUB_ENV")
+    if github_env:
+        with open(github_env, "a") as f:
+            f.write(f"send_spike={'true' if send_spike else 'false'}\n")
+
+    if send_spike:
+        spike_payload = {
+            "blocks": [
+                {"type": "header", "text": {"type": "plain_text", "text": f":warning: Duplicate spike detected — {args.project_name}"}},
+                {"type": "section", "text": {"type": "mrkdwn", "text": (
+                    f"*Exact duplicates jumped by {delta_exact}* this week "
+                    f"({prev_exact} \u2192 {current_exact})\n"
+                    f"*Project:* {args.project_name} (ID: {args.project_id}) | *Date:* {args.today}\n"
+                    f"<{args.gcs_url}|Download results> \u00b7 <{args.run_url}|View run>"
+                )}},
+            ]
+        }
+        (output_dir / "slack-spike.json").write_text(json.dumps(spike_payload))
+        print(f"Written slack-spike.json (spike detected: +{delta_exact})")
+    else:
+        print("No spike detected — slack-spike.json not written")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/testrail/testcases-deduplication/export-priority-list.py b/testrail/testcases-deduplication/export-priority-list.py
new file mode 100644
index 0000000..111a496
--- /dev/null
+++ b/testrail/testcases-deduplication/export-priority-list.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""
+Export filtered priority lists to CSV for easy review in Excel
+"""
+import argparse
+import os
+import re
+import sys
+import pandas as pd
+
+def load_csv(path: str) -> pd.DataFrame:
+    """Load a CSV file, returning an empty DataFrame if the file doesn't exist."""
+    if not os.path.exists(path):
+        print(f"Warning: {path} not found — skipping (no results for this category).")
+        return pd.DataFrame()
+    return pd.read_csv(path)
+
+
+def case_id_sort_key(case_id: str) -> int:
+    """Extract numeric part of a case ID like 'C12345' for correct numeric sorting."""
+    match = re.search(r'\d+', str(case_id))
+    return int(match.group()) if match else 0
+
+
+def export_priority_lists():
+    # Load data
+    exact = load_csv('duplicates_exact.csv')
+    similar = load_csv('similar_pairs.csv')
+
+    if exact.empty and similar.empty:
+        raise RuntimeError("No duplicate data found. Run find-duplicates.py first.")
+
+    # 1. Exact duplicates - sorted by group size
+    if exact.empty:
+        print("⚠️  No exact duplicates — skipping WORK_LIST_EXACT.csv")
+    else:
+        group_sizes = exact.groupby('duplicate_group_id').size()
+        exact['group_size'] = exact['duplicate_group_id'].map(group_sizes)
+        exact = exact.sort_values(['group_size', 'duplicate_group_id', '_case_id'], ascending=[False, True, True])
+
+        # Add decision columns (sort numerically by case ID to pick the lowest-numbered case to keep)
+        # Use Series.groupby to avoid pandas 2.2+ include_groups deprecation
+        group_keep = (
+            exact.groupby('duplicate_group_id')['_case_id']
+            .apply(lambda ids: sorted(ids.tolist(), key=case_id_sort_key)[0])
+        )
+        group_archive = (
+            exact.groupby('duplicate_group_id')['_case_id']
+            .apply(lambda ids: ', '.join(sorted(ids.tolist(), key=case_id_sort_key)[1:]))
+        )
+        exact['KEEP'] = exact['duplicate_group_id'].map(group_keep)
+        exact['ARCHIVE'] = exact['duplicate_group_id'].map(group_archive)
+
+        # Reorder columns
+        section_col = '_section' if '_section' in exact.columns else None
+        base_cols = ['duplicate_group_id', 'group_size', '_case_id', '_title']
+        if section_col:
+            base_cols.append(section_col)
+        base_cols += ['KEEP', 'ARCHIVE']
+        exact = exact[base_cols]
+        new_names = ['Group_ID', 'Group_Size', 'Case_ID', 'Title']
+        if section_col:
+            new_names.append('Section')
+        new_names += ['Suggested_KEEP', 'Suggested_ARCHIVE']
+        exact.columns = new_names
+
+        # Add empty tracking columns
+        exact['Decision'] = ''
+        exact['Status'] = 'TODO'
+        exact['Notes'] = ''
+
+        # Save
+        exact.to_csv('WORK_LIST_EXACT.csv', index=False)
+        print(f"✅ Created WORK_LIST_EXACT.csv ({len(exact)} cases in {exact['Group_ID'].nunique()} groups)")
+
+    # 2. High priority similar pairs (>= 95% similarity)
+    if similar.empty:
+        print("⚠️  No similar pairs — skipping WORK_LIST_SIMILAR_HIGH_PRIORITY.csv and WORK_LIST_PERFECT_MATCHES.csv")
+    else:
+        high_sim = similar[similar['similarity'] >= 0.95].copy()
+        high_sim = high_sim.sort_values('similarity', ascending=False)
+
+        # Add decision columns
+        high_sim['Suggested_Action'] = ''
+        high_sim['Decision'] = ''
+        high_sim['Status'] = 'TODO'
+        high_sim['Notes'] = ''
+
+        # Reorder columns (include section if present)
+        base_cols = ['case_id_1', 'title_1']
+        if 'section_1' in high_sim.columns:
+            base_cols.append('section_1')
+        base_cols += ['case_id_2', 'title_2']
+        if 'section_2' in high_sim.columns:
+            base_cols.append('section_2')
+        base_cols += ['similarity', 'step_overlap', 'relation', 'shares_most_steps',
+                      'Suggested_Action', 'Decision', 'Status', 'Notes']
+        high_sim = high_sim[base_cols]
+
+        high_sim.to_csv('WORK_LIST_SIMILAR_HIGH_PRIORITY.csv', index=False)
+        print(f"✅ Created WORK_LIST_SIMILAR_HIGH_PRIORITY.csv ({len(high_sim)} pairs)")
+
+        # 3. Perfect matches (100% similarity) - these are basically exact duplicates
+        perfect = similar[similar['similarity'] == 1.0].copy()
+        perfect = perfect.sort_values('step_overlap', ascending=False)
+
+        # Keep the case with the lower numeric ID
+        perfect['Suggested_KEEP'] = perfect.apply(
+            lambda r: r['case_id_1'] if case_id_sort_key(r['case_id_1']) <= case_id_sort_key(r['case_id_2']) else r['case_id_2'],
+            axis=1,
+        )
+        perfect['Suggested_ARCHIVE'] = perfect.apply(
+            lambda r: r['case_id_2'] if case_id_sort_key(r['case_id_1']) <= case_id_sort_key(r['case_id_2']) else r['case_id_1'],
+            axis=1,
+        )
+        perfect['Decision'] = ''
+        perfect['Status'] = 'TODO'
+        perfect['Notes'] = ''
+
+        base_cols = ['case_id_1', 'title_1']
+        if 'section_1' in perfect.columns:
+            base_cols.append('section_1')
+        base_cols += ['case_id_2', 'title_2']
+        if 'section_2' in perfect.columns:
+            base_cols.append('section_2')
+        base_cols += ['similarity', 'step_overlap', 'Suggested_KEEP', 'Suggested_ARCHIVE',
+                      'Decision', 'Status', 'Notes']
+        perfect = perfect[base_cols]
+
+        perfect.to_csv('WORK_LIST_PERFECT_MATCHES.csv', index=False)
+        print(f"✅ Created WORK_LIST_PERFECT_MATCHES.csv ({len(perfect)} pairs)")
+
+    # Summary
+    print("\n" + "="*60)
+    print("WORK LISTS CREATED")
+    print("="*60)
+    if not exact.empty and 'Group_ID' in exact.columns:
+        print("\n1. WORK_LIST_EXACT.csv")
+        print(f"   - {len(exact)} cases in {exact['Group_ID'].nunique()} groups")
+        print(f"   - Suggested savings: ~{len(exact) - exact['Group_ID'].nunique()} cases")
+    if not similar.empty:
+        print("\n2. WORK_LIST_PERFECT_MATCHES.csv")
+        print(f"   - {len(perfect)} pairs with 100% similarity")
+        print(f"   - Suggested savings: ~{len(perfect)} cases")
+        print("\n3. WORK_LIST_SIMILAR_HIGH_PRIORITY.csv")
+        print(f"   - {len(high_sim)} pairs with ≥95% similarity")
+        print(f"   - Review and decide case by case")
+    print("\n💡 TIP: Open these in Excel and use filters/sorting to prioritize")
+    print("="*60)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Export priority work lists from deduplication results.")
+    parser.add_argument(
+        "--output-dir", default=".",
+        help="Directory containing input CSVs and where work lists will be written (default: current directory)"
+    )
+    cli_args = parser.parse_args()
+    os.chdir(cli_args.output_dir)
+    try:
+        export_priority_lists()
+    except RuntimeError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
diff --git a/testrail/testcases-deduplication/fetch_testrail_export.py b/testrail/testcases-deduplication/fetch_testrail_export.py
new file mode 100644
index 0000000..95a0be7
--- /dev/null
+++ b/testrail/testcases-deduplication/fetch_testrail_export.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+Fetch test cases from the TestRail API and export as xlsx for deduplication analysis.
+
+Reads credentials from environment variables:
+    TESTRAIL_HOST      — e.g. "yourcompany.testrail.io"
+    TESTRAIL_USERNAME  — API user email
+    TESTRAIL_PASSWORD  — API key or password
+
+Usage:
+    python fetch_testrail_export.py --project-id 14 --output export.xlsx
+    python fetch_testrail_export.py --project-id 14 --suite-id 123 --output export.xlsx
+"""
+import argparse
+import os
+import sys
+
+import pandas as pd
+import requests
+from requests.auth import HTTPBasicAuth
+
+
+def testrail_client() -> tuple[str, HTTPBasicAuth]:
+    host = os.environ.get("TESTRAIL_HOST", "").strip().rstrip("/")
+    username = os.environ.get("TESTRAIL_USERNAME", "")
+    password = os.environ.get("TESTRAIL_PASSWORD", "")
+
+    if not all([host, username, password]):
+        print("Error: TESTRAIL_HOST, TESTRAIL_USERNAME and TESTRAIL_PASSWORD must be set.")
+        sys.exit(1)
+
+    base_url = f"{host}/index.php?/api/v2"
+    return base_url, HTTPBasicAuth(username, password)
+
+
+REQUEST_TIMEOUT = 30  # seconds
+
+
+def api_get(base_url: str, auth: HTTPBasicAuth, endpoint: str, params: dict = None) -> dict:
+    url = f"{base_url}/{endpoint}"
+    resp = requests.get(url, auth=auth, params=params or {}, timeout=REQUEST_TIMEOUT)
+    resp.raise_for_status()
+    return resp.json()
+
+
+def fetch_cases(base_url: str, auth: HTTPBasicAuth, project_id: str, suite_id: str = None) -> list[dict]:
+    """Fetch all test cases for a project (paginated, 250 per page)."""
+    cases = []
+    offset = 0
+    limit = 250
+
+    while True:
+        params = {"limit": limit, "offset": offset}
+        if suite_id:
+            params["suite_id"] = suite_id
+
+        data = api_get(base_url, auth, f"get_cases/{project_id}", params)
+        batch = data.get("cases", [])
+        cases.extend(batch)
+
+        if len(batch) < limit:
+            break
+        offset += len(batch)  # advance by page size, not cumulative total
+
+    return cases
+
+
+def fetch_sections(base_url: str, auth: HTTPBasicAuth, project_id: str, suite_id: str = None) -> dict[int, str]:
+    """Return a mapping of section_id → section name (paginated, 250 per page)."""
+    sections = []
+    offset = 0
+    limit = 250
+
+    while True:
+        params = {"limit": limit, "offset": offset}
+        if suite_id:
+            params["suite_id"] = suite_id
+
+        data = api_get(base_url, auth, f"get_sections/{project_id}", params)
+        batch = data.get("sections", [])
+        sections.extend(batch)
+
+        if len(batch) < limit:
+            break
+        offset += len(batch)
+
+    return {s["id"]: s["name"] for s in sections}
+
+
+def format_steps(steps_list: list[dict], field: str) -> str:
+    """Convert [{content, expected}, ...] to a numbered string (TestRail xlsx export format)."""
+    if not steps_list:
+        return ""
+    return "\n".join(f"{i}. {step.get(field, '')}" for i, step in enumerate(steps_list, 1))
+
+
+def build_xlsx(cases: list[dict], sections: dict[int, str], output_path: str) -> None:
+    rows = []
+    for case in cases:
+        steps_raw = case.get("custom_steps_separated") or []
+        rows.append({
+            "ID": f"C{case['id']}",
+            "Title": case.get("title", ""),
+            "Section": sections.get(case.get("section_id"), ""),
+            "Steps (Step)": format_steps(steps_raw, "content"),
+            "Steps (Expected Result)": format_steps(steps_raw, "expected"),
+        })
+
+    df = pd.DataFrame(rows)
+    df.to_excel(output_path, index=False)
+    print(f"Exported {len(df)} test cases to {output_path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Fetch TestRail test cases and export as xlsx for deduplication."
+    )
+    parser.add_argument("--project-id", required=True, help="TestRail project ID")
+    parser.add_argument("--suite-id", default=None, help="TestRail suite ID (optional, fetches all suites if omitted)")
+    parser.add_argument("--output", default="testrail_export.xlsx", help="Output xlsx file path")
+    args = parser.parse_args()
+
+    base_url, auth = testrail_client()
+
+    print(f"Fetching test cases for project {args.project_id}...")
+    cases = fetch_cases(base_url, auth, args.project_id, args.suite_id)
+    print(f"Fetched {len(cases)} test cases")
+
+    if not cases:
+        print("No test cases found. Check project ID and suite ID.")
+        sys.exit(1)
+
+    print("Fetching section names...")
+    sections = fetch_sections(base_url, auth, args.project_id, args.suite_id)
+
+    build_xlsx(cases, sections, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/testrail/testcases-deduplication/find-duplicates.py b/testrail/testcases-deduplication/find-duplicates.py
new file mode 100644
index 0000000..4a6aaae
--- /dev/null
+++ b/testrail/testcases-deduplication/find-duplicates.py
@@ -0,0 +1,451 @@
+import argparse
+import json
+import pandas as pd
+import re
+import unicodedata
+from difflib import SequenceMatcher
+from sentence_transformers import SentenceTransformer
+from sklearn.neighbors import NearestNeighbors
+
+
+# ---------- Configuration ----------
+EXACT_OUTPUT = "duplicates_exact.csv"
+SIMILAR_OUTPUT = "similar_pairs.csv"
+
+# Thresholds
+SEMANTIC_DUP_THRESHOLD = 0.90   # >= this is considered a strong duplicate
+SEMANTIC_SIM_THRESHOLD = 0.80   # >= this is considered very similar
+STEP_OVERLAP_THRESHOLD = 0.80   # % of common steps to mark "shares most steps"
+
+
+# ---------- Text utilities ----------
+
+HTML_TAG_RE = re.compile(r"<.*?>", re.DOTALL)
+
+def strip_html(text: str) -> str:
+    """Remove HTML tags from text."""
+    text = HTML_TAG_RE.sub(" ", text)
+    return text
+
+def normalize_text(text: str) -> str:
+    """
+    Normalize text by:
+    - Stripping HTML tags
+    - Converting to lowercase
+    - Normalizing Unicode
+    - Removing numbering (1. 2) etc.) from beginning of lines
+    - Collapsing whitespace
+    """
+    if not isinstance(text, str):
+        return ""
+
+    # Step 1: Strip whitespace and HTML first (before expensive operations)
+    text = text.strip()
+    text = strip_html(text)
+
+    # Step 2: Normalize Unicode and lowercase
+    text = unicodedata.normalize("NFKC", text)
+    text = text.lower()
+
+    # Step 3: Remove numbering like "1. ", "2) ", "- ", etc. at the beginning of lines
+    # This also handles double numbering like "1. 1. text"
+    lines = []
+    for line in text.splitlines():
+        # Remove one or more number patterns at the start (handles "1. 1. text")
+        line = re.sub(r"^(\s*\d+\s*[\.\)]\s*)+", " ", line)
+        # Remove bullet points
+        line = re.sub(r"^\s*[-•]\s*", " ", line)
+        lines.append(line.strip())
+    text = " ".join(l for l in lines if l)
+
+    # Step 4: Collapse whitespace
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+
+
+def split_numbered_items(text: str):
+    """
+    Converts something like:
+    '1. step one\n2. step two' -> ['step one', 'step two']
+    Also handles formats like '1. <p>step one</p>\n2. <p>step two</p>'
+    """
+    if not isinstance(text, str):
+        return []
+
+    # First remove HTML
+    text = strip_html(text)
+
+    # Split by patterns "n. " or "n) " at the beginning of line
+    # Add a fictitious newline at the beginning to simplify the split
+    text = "\n" + text
+    pieces = re.split(r"\n\s*\d+\s*[\.\)]\s*", text)
+    # pieces[0] will be what's before the first numbering, we ignore it
+    steps = [normalize_text(p) for p in pieces[1:]]
+    return [s for s in steps if s]
+
+
+def parse_notes_steps_and_expected(notes: str):
+    """
+    From a Notes block with:
+        Step Description:
+        ...
+        Expected Result:
+        ...
+    extracts two lists: [desc1, desc2,...], [exp1, exp2,...]
+    """
+    if not isinstance(notes, str):
+        return [], []
+
+    # Normalize line breaks
+    text = notes.replace("\r\n", "\n").replace("\r", "\n")
+
+    # Split by 'Step Description:'
+    chunks = text.split("Step Description:")
+    descs = []
+    exps = []
+
+    for chunk in chunks[1:]:  # the first piece is before the first description
+        parts = chunk.split("Expected Result:", 1)
+        desc = parts[0]
+        exp = parts[1] if len(parts) > 1 else ""
+        descs.append(normalize_text(desc))
+        exps.append(normalize_text(exp))
+
+    return descs, exps
+
+
+def parse_testrail_steps(steps_text: str, expected_text: str):
+    """
+    Parse TestRail's 'Steps (Step)' and 'Steps (Expected Result)' columns.
+    These come in format:
+        Steps: "1. step one\n2. step two\n..."
+        Expected: "1. expected one\n2. expected two\n..."
+    Returns: ([step1, step2, ...], [expected1, expected2, ...])
+    """
+    steps = split_numbered_items(steps_text) if isinstance(steps_text, str) else []
+    expected = split_numbered_items(expected_text) if isinstance(expected_text, str) else []
+    return steps, expected
+
+
+def step_similarity(a: str, b: str) -> float:
+    return SequenceMatcher(None, a, b).ratio()
+
+
+def compute_step_overlap(steps1, steps2, min_ratio=0.85):
+    """
+    Calculates the % of common steps by pairing each step from steps1
+    with the most similar one from steps2 that exceeds min_ratio.
+    Uses min() to detect when shorter tests are contained in longer ones.
+    """
+    if not steps1 or not steps2:
+        return 0.0
+
+    used_j = set()
+    matches = 0
+
+    for s1 in steps1:
+        best_ratio = 0.0
+        best_j = None
+        for j, s2 in enumerate(steps2):
+            if j in used_j:
+                continue
+            ratio = step_similarity(s1, s2)
+            if ratio > best_ratio:
+                best_ratio = ratio
+                best_j = j
+        if best_ratio >= min_ratio:
+            matches += 1
+            used_j.add(best_j)
+
+    overlap = matches / min(len(steps1), len(steps2))
+    return overlap
+
+
+# ---------- Data loading and normalization ----------
+
+def load_and_normalize(path: str) -> pd.DataFrame:
+    # Try different header rows to find the right format
+    raw = None
+    for header_row in [0, 2]:
+        try:
+            test_df = pd.read_excel(path, header=header_row, nrows=1)
+            # Check if we have ID or Unnamed: 1 column with case ID pattern
+            if "ID" in test_df.columns or "Unnamed: 1" in test_df.columns:
+                raw = pd.read_excel(path, header=header_row)
+                break
+        except Exception:
+            continue
+
+    if raw is None:
+        raise ValueError("Could not determine Excel file structure")
+
+    # Rename key columns for readability (only if they don't already exist)
+    rename_map = {}
+    if "Unnamed: 1" in raw.columns and "CaseID" not in raw.columns:
+        rename_map["Unnamed: 1"] = "CaseID"
+    if "Unnamed: 2" in raw.columns and "Title" not in raw.columns:
+        rename_map["Unnamed: 2"] = "TestTitle"
+
+    if rename_map:
+        raw = raw.rename(columns=rename_map)
+
+    # Determine which column has the Case ID
+    if "ID" in raw.columns:
+        case_id_col = "ID"
+    elif "CaseID" in raw.columns:
+        case_id_col = "CaseID"
+    elif "Unnamed: 1" in raw.columns:
+        case_id_col = "Unnamed: 1"
+    else:
+        raise ValueError("Could not find Case ID column")
+
+    if "Title" in raw.columns:
+        title_col = "Title"
+    elif "TestTitle" in raw.columns:
+        title_col = "TestTitle"
+    elif "Unnamed: 2" in raw.columns:
+        title_col = "Unnamed: 2"
+    else:
+        raise ValueError("Could not find Title column")
+
+    # Keep only rows that have a CaseID
+    raw = raw[raw[case_id_col].notna()].copy()
+
+    # Extract lists of steps and expected results
+    step_lists = []
+    expected_lists = []
+
+    # Store CaseID, Title, and Section for later use
+    raw["_case_id"] = raw[case_id_col]
+    raw["_title"] = raw[title_col]
+
+    # Extract Section column (TestRail exports use "Section" or "Section Hierarchy")
+    if "Section" in raw.columns:
+        raw["_section"] = raw["Section"].fillna("").astype(str)
+    elif "Section Hierarchy" in raw.columns:
+        raw["_section"] = raw["Section Hierarchy"].fillna("").astype(str)
+    else:
+        raw["_section"] = ""
+
+    for _, row in raw.iterrows():
+        # Try different sources for steps in priority order:
+        # 1) TestRail's standard 'Steps (Step)' and 'Steps (Expected Result)' columns
+        steps_col = row.get("Steps (Step)")
+        expected_col = row.get("Steps (Expected Result)")
+
+        if pd.notna(steps_col):
+            steps, expected = parse_testrail_steps(steps_col, expected_col)
+        else:
+            # 2) Try to parse Notes (Step Description / Expected Result format)
+            notes = row.get("Notes")
+            descs, exps = parse_notes_steps_and_expected(notes)
+            if descs:
+                steps = descs
+                expected = exps
+            else:
+                # 3) Fallback to Section Description
+                section_desc = row.get("Section Description")
+                steps = split_numbered_items(str(section_desc)) if pd.notna(section_desc) else []
+                expected = []
+
+        # 4) If still no expected results, try the global Expected Result column
+        if not expected:
+            expected_global = row.get("Expected Result")
+            if pd.notna(expected_global) and isinstance(expected_global, str):
+                expected = [normalize_text(expected_global)]
+
+        step_lists.append(steps)
+        expected_lists.append(expected)
+
+    raw["steps_list"] = step_lists
+    raw["expected_list"] = expected_lists
+
+    # Canonical text fields
+    raw["canonical_title"] = raw["_title"].fillna("").apply(lambda x: normalize_text(str(x)))
+    raw["canonical_steps"] = raw["steps_list"].apply(
+        lambda lst: " | ".join(normalize_text(s) for s in lst)
+    )
+    raw["canonical_expected"] = raw["expected_list"].apply(
+        lambda lst: " | ".join(normalize_text(s) for s in lst)
+    )
+
+    def build_full(row):
+        return (
+            f"title: {row['canonical_title']}\n"
+            f"steps: {row['canonical_steps']}\n"
+            f"expected: {row['canonical_expected']}"
+        )
+
+    raw["canonical_full_text"] = raw.apply(build_full, axis=1)
+
+    return raw
+
+
+# ---------- Exact duplicates ----------
+
+def find_exact_duplicates(df: pd.DataFrame) -> pd.DataFrame:
+    df["exact_key"] = df["canonical_full_text"]
+    dup_groups = df.groupby("exact_key").filter(lambda g: len(g) > 1).copy()
+    # Assign a group ID for each set of duplicates
+    dup_groups["duplicate_group_id"] = dup_groups.groupby("exact_key").ngroup()
+    return dup_groups
+
+
+# ---------- Semantic similarity ----------
+
+def compute_semantic_pairs(df: pd.DataFrame) -> pd.DataFrame:
+    texts = df["canonical_full_text"].tolist()
+    case_ids = df["_case_id"].tolist()
+    titles = df["_title"].fillna("").tolist()
+    sections = df["_section"].fillna("").tolist()
+
+    # Local sentence-transformers model
+    print("Loading embeddings model...")
+    model = SentenceTransformer("all-MiniLM-L6-v2")
+    print("Generating embeddings...")
+    embeddings = model.encode(texts, batch_size=32, show_progress_bar=True)
+
+    # Nearest neighbors
+    print("Searching for similar neighbors...")
+    nn = NearestNeighbors(metric="cosine", algorithm="brute")
+    nn.fit(embeddings)
+
+    # Include the point itself (n_neighbors=21 -> 1 self + 20 neighbors for medium datasets)
+    distances, indices = nn.kneighbors(embeddings, n_neighbors=min(21, len(embeddings)))
+
+    rows = []
+    n = len(df)
+
+    for i in range(n):
+        for k in range(1, indices.shape[1]):  # skip neighbor 0 (itself)
+            j = indices[i, k]
+            if i >= j:
+                continue  # avoid duplicating pairs (i,j) and (j,i)
+
+            dist = distances[i, k]
+            sim = 1.0 - dist
+
+            if sim < SEMANTIC_SIM_THRESHOLD:
+                continue
+
+            steps1 = df.iloc[i]["steps_list"]
+            steps2 = df.iloc[j]["steps_list"]
+            overlap = compute_step_overlap(steps1, steps2)
+
+            label = "similar"
+            if sim >= SEMANTIC_DUP_THRESHOLD:
+                label = "semantic_duplicate"
+
+            shares_most_steps = overlap >= STEP_OVERLAP_THRESHOLD
+
+            rows.append({
+                "case_id_1": case_ids[i],
+                "title_1": titles[i],
+                "section_1": sections[i],
+                "case_id_2": case_ids[j],
+                "title_2": titles[j],
+                "section_2": sections[j],
+                "similarity": round(float(sim), 4),
+                "step_overlap": round(float(overlap), 4),
+                "relation": label,
+                "shares_most_steps": shares_most_steps,
+            })
+
+    return pd.DataFrame(rows)
+
+
+# ---------- Main ----------
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Detect duplicate and similar test cases from a TestRail Excel export."
+    )
+    parser.add_argument(
+        "input_xlsx",
+        help="Path to the TestRail Excel export (.xlsx)",
+    )
+    parser.add_argument(
+        "--exact-output",
+        default=EXACT_OUTPUT,
+        help=f"Output CSV for exact duplicates (default: {EXACT_OUTPUT})",
+    )
+    parser.add_argument(
+        "--similar-output",
+        default=SIMILAR_OUTPUT,
+        help=f"Output CSV for similar pairs (default: {SIMILAR_OUTPUT})",
+    )
+    parser.add_argument(
+        "--dup-threshold",
+        type=float,
+        default=SEMANTIC_DUP_THRESHOLD,
+        help=f"Semantic similarity threshold for 'duplicate' label (default: {SEMANTIC_DUP_THRESHOLD})",
+    )
+    parser.add_argument(
+        "--sim-threshold",
+        type=float,
+        default=SEMANTIC_SIM_THRESHOLD,
+        help=f"Minimum similarity threshold to report a pair (default: {SEMANTIC_SIM_THRESHOLD})",
+    )
+    parser.add_argument(
+        "--overlap-threshold",
+        type=float,
+        default=STEP_OVERLAP_THRESHOLD,
+        help=f"Step overlap threshold for 'shares_most_steps' flag (default: {STEP_OVERLAP_THRESHOLD})",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    # Apply CLI overrides to module-level thresholds
+    global SEMANTIC_DUP_THRESHOLD, SEMANTIC_SIM_THRESHOLD, STEP_OVERLAP_THRESHOLD
+    SEMANTIC_DUP_THRESHOLD = args.dup_threshold
+    SEMANTIC_SIM_THRESHOLD = args.sim_threshold
+    STEP_OVERLAP_THRESHOLD = args.overlap_threshold
+
+    try:
+        print("Loading and normalizing data...")
+        df = load_and_normalize(args.input_xlsx)
+
+        print(f"Total test cases loaded: {len(df)}")
+
+        if len(df) == 0:
+            print("Warning: No test cases found in the input file.")
+            return
+
+        print("Searching for exact duplicates...")
+        exact_dups = find_exact_duplicates(df)
+        if not exact_dups.empty:
+            exact_dups[["_case_id", "_title", "_section", "duplicate_group_id"]].to_csv(args.exact_output, index=False)
+            print(f"Exact duplicates saved to {args.exact_output}")
+        else:
+            print("No exact duplicates found.")
+
+        print("Searching for similar tests (semantic + steps)...")
+        similar_pairs = compute_semantic_pairs(df)
+        if not similar_pairs.empty:
+            similar_pairs.to_csv(args.similar_output, index=False)
+            print(f"Similar pairs saved to {args.similar_output}")
+        else:
+            print("No similar pairs found with the defined thresholds.")
+
+        # Save stats for downstream scripts
+        stats = {
+            "total_cases": len(df),
+            "input_file": args.input_xlsx,
+        }
+        with open("analysis_stats.json", "w") as f:
+            json.dump(stats, f, indent=2)
+        print(f"Stats saved to analysis_stats.json")
+
+    except FileNotFoundError:
+        print(f"Error: Input file '{args.input_xlsx}' not found.")
+    except KeyError as e:
+        print(f"Error: Required column not found in Excel file: {e}")
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/testrail/testcases-deduplication/generate-work-list.py b/testrail/testcases-deduplication/generate-work-list.py
new file mode 100644
index 0000000..5240883
--- /dev/null
+++ b/testrail/testcases-deduplication/generate-work-list.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+Generate prioritized work list for reviewing duplicates
+"""
+import json
+import os
+import pandas as pd
+
+
+def load_csv(path: str) -> pd.DataFrame:
+    """Load a CSV file, returning an empty DataFrame if the file doesn't exist."""
+    if not os.path.exists(path):
+        print(f"Warning: {path} not found — skipping (no results for this category).")
+        return pd.DataFrame()
+    return pd.read_csv(path)
+
+
+def generate_work_list():
+    # Load data
+    exact = load_csv('duplicates_exact.csv')
+    similar = load_csv('similar_pairs.csv')
+
+    if exact.empty and similar.empty:
+        raise RuntimeError("No duplicate data found. Run find-duplicates.py first.")
+
+    # Load total case count from stats file (written by find-duplicates.py)
+    total_cases = 0
+    if os.path.exists('analysis_stats.json'):
+        with open('analysis_stats.json') as f:
+            stats = json.load(f)
+        total_cases = stats.get('total_cases', 0)
+
+    print("=" * 80)
+    print("WORK LIST - TEST CASE DEDUPLICATION")
+    print("=" * 80)
+    print()
+
+    # PART 1: Exact Duplicates
+    print("PHASE 1: EXACT DUPLICATES")
+    print("-" * 80)
+
+    total_to_archive = 0
+
+    if exact.empty:
+        print("  No exact duplicates found.")
+    else:
+        group_sizes = exact.groupby('duplicate_group_id').size().sort_values(ascending=False)
+        priority = 1
+
+        # Large groups first (4+)
+        print("\n🔴 PRIORITY 1: Large groups (4+ duplicates)")
+        print()
+        for group_id, size in group_sizes[group_sizes >= 4].items():
+            group = exact[exact['duplicate_group_id'] == group_id].sort_values('_case_id')
+            title = group.iloc[0]['_title']
+            case_ids = list(group['_case_id'].values)
+
+            print(f"{priority}. Group {group_id}: {size} duplicates")
+            print(f"   Title: {title}")
+            print(f"   ✅ KEEP: {case_ids[0]} (lowest ID)")
+            print(f"   🗑️  ARCHIVE: {', '.join(case_ids[1:])}")
+            print(f"   Savings: {size - 1} test cases")
+            print()
+
+            priority += 1
+            total_to_archive += size - 1
+
+        # Medium groups (3)
+        print("\n🟠 PRIORITY 2: Medium groups (3 duplicates)")
+        print()
+        for group_id, size in group_sizes[group_sizes == 3].items():
+            group = exact[exact['duplicate_group_id'] == group_id].sort_values('_case_id')
+            title = group.iloc[0]['_title']
+            case_ids = list(group['_case_id'].values)
+
+            print(f"{priority}. Group {group_id}: {title[:60]}")
+            print(f"   ✅ KEEP: {case_ids[0]}")
+            print(f"   🗑️  ARCHIVE: {', '.join(case_ids[1:])}")
+            print()
+
+            priority += 1
+            total_to_archive += size - 1
+
+            if priority > 25:  # Limit output
+                remaining = len(group_sizes[group_sizes == 3]) - (priority - len(group_sizes[group_sizes >= 4]) - 1)
+                if remaining > 0:
+                    print(f"   ... and {remaining} more groups of 3")
+                break
+
+        # Small groups (2)
+        print(f"\n🟡 PRIORITY 3: Small groups (2 duplicates)")
+        print(f"   {(group_sizes == 2).sum()} groups")
+        print(f"   Savings: {(group_sizes == 2).sum()} test cases")
+        print()
+
+        total_to_archive += (group_sizes == 2).sum()
+
+        print(f"\n📊 PHASE 1 SUMMARY:")
+        print(f"   Total groups: {len(group_sizes)}")
+        print(f"   Total cases to archive: {total_to_archive}")
+        print()
+
+    # PART 2: High Similarity
+    print("\n" + "=" * 80)
+    print("PHASE 2: HIGH SIMILARITY PAIRS")
+    print("-" * 80)
+
+    if similar.empty:
+        print("  No similar pairs found.")
+    else:
+        perfect = similar[similar['similarity'] == 1.0]
+        print(f"\n🔴 PRIORITY 1: Perfect semantic matches (100%)")
+        print(f"   {len(perfect)} pairs")
+        print(f"   Estimated savings: ~{len(perfect) // 2} test cases")
+        print()
+
+        print("   Top 5 examples:")
+        for i, (_, row) in enumerate(perfect.head(5).iterrows(), 1):
+            print(f"   {i}. {row['case_id_1']} vs {row['case_id_2']}")
+            print(f"      {row['title_1'][:60]}")
+            print()
+
+        near_perfect = similar[(similar['similarity'] >= 0.95) & (similar['similarity'] < 1.0)]
+        print(f"🟠 PRIORITY 2: Near-perfect matches (95-100%)")
+        print(f"   {len(near_perfect)} pairs")
+        print(f"   Recommended: Review top 50")
+        print(f"   Estimated savings: ~{len(near_perfect) // 3} test cases")
+        print()
+
+        high_overlap = similar[(similar['similarity'] >= 0.90) & (similar['step_overlap'] >= 0.8)]
+        print(f"🟡 PRIORITY 3: High step overlap (≥90% sim, ≥80% overlap)")
+        print(f"   {len(high_overlap)} pairs")
+        print(f"   These share most execution steps")
+        print()
+
+    # Compute Phase 2 estimate from real data (perfect + ~1/3 of near-perfect)
+    if not similar.empty:
+        perfect_savings = len(similar[similar['similarity'] == 1.0]) // 2
+        near_perfect_savings = len(similar[(similar['similarity'] >= 0.95) & (similar['similarity'] < 1.0)]) // 3
+        phase2_estimate = perfect_savings + near_perfect_savings
+    else:
+        phase2_estimate = 0
+
+    # Overall summary
+    print("\n" + "=" * 80)
+    print("ESTIMATED TOTALS")
+    print("-" * 80)
+    print(f"Phase 1 (Exact):      ~{total_to_archive} cases")
+    if phase2_estimate > 0:
+        print(f"Phase 2 (Similar):    ~{phase2_estimate} cases")
+    estimated_total = total_to_archive + phase2_estimate
+    if total_cases > 0:
+        pct = estimated_total / total_cases * 100
+        print(f"GRAND TOTAL:          ~{estimated_total} cases ({pct:.1f}% reduction out of {total_cases} total)")
+    else:
+        print(f"GRAND TOTAL:          ~{estimated_total} cases")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(description="Generate prioritized work list from deduplication results.")
+    parser.add_argument(
+        "--output-dir", default=".",
+        help="Directory containing duplicates_exact.csv and similar_pairs.csv (default: current directory)"
+    )
+    cli_args = parser.parse_args()
+    os.chdir(cli_args.output_dir)
+    try:
+        generate_work_list()
+    except RuntimeError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
diff --git a/testrail/testcases-deduplication/insert_bq_stats.py b/testrail/testcases-deduplication/insert_bq_stats.py
new file mode 100644
index 0000000..906f03c
--- /dev/null
+++ b/testrail/testcases-deduplication/insert_bq_stats.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""
+Compute deduplication stats from output CSVs, write them to GITHUB_ENV,
+and insert a row into BigQuery.
+
+Usage:
+    python insert_bq_stats.py \
+        --output-dir ./output \
+        --project-id 14 \
+        --project-name firefox-ios \
+        --run-date 2026-04-23 \
+        --github-run-id 12345678 \
+        --bq-project moz-mobile-tools \
+        --bq-dataset testops_stats \
+        --bq-table testrail_deduplication_runs
+"""
+import argparse
+import csv
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+
+HIGH_PRIORITY_THRESHOLD = 0.95
+
+
+def compute_stats(output_dir: Path) -> dict:
+    total = 0
+    exact = 0
+    similar = 0
+    high_priority_similar = 0
+
+    stats_file = output_dir / "analysis_stats.json"
+    if stats_file.exists():
+        total = json.loads(stats_file.read_text()).get("total_cases", 0)
+
+    exact_file = output_dir / "duplicates_exact.csv"
+    if exact_file.exists():
+        with exact_file.open() as f:
+            exact = sum(1 for _ in csv.DictReader(f))
+
+    similar_file = output_dir / "similar_pairs.csv"
+    if similar_file.exists():
+        with similar_file.open() as f:
+            for row in csv.DictReader(f):
+                similar += 1
+                try:
+                    if float(row["similarity"]) >= HIGH_PRIORITY_THRESHOLD:
+                        high_priority_similar += 1
+                except (ValueError, KeyError):
+                    pass
+
+    duplicate_rate = round(exact / total, 4) if total > 0 else 0.0
+
+    return {
+        "total": total,
+        "exact": exact,
+        "similar": similar,
+        "high_priority_similar": high_priority_similar,
+        "duplicate_rate": duplicate_rate,
+    }
+
+
+def write_github_env(stats: dict) -> None:
+    github_env = os.environ.get("GITHUB_ENV")
+    if not github_env:
+        return
+    with open(github_env, "a") as f:
+        f.write(f"current_total={stats['total']}\n")
+        f.write(f"current_exact={stats['exact']}\n")
+        f.write(f"current_similar={stats['similar']}\n")
+        f.write(f"current_high_priority_similar={stats['high_priority_similar']}\n")
+        f.write(f"current_rate={stats['duplicate_rate']}\n")
+
+
+def insert_bigquery(stats: dict, args: argparse.Namespace) -> None:
+    payload = json.dumps({
+        "run_date":                    args.run_date,
+        "project_id":                  args.project_id,
+        "project_name":                args.project_name,
+        "total_cases":                 stats["total"],
+        "exact_duplicate_cases":       stats["exact"],
+        "similar_pairs":               stats["similar"],
+        "high_priority_similar_pairs": stats["high_priority_similar"],
+        "duplicate_rate":              stats["duplicate_rate"],
+        "github_run_id":               args.github_run_id,
+    })
+
+    result = subprocess.run(
+        ["bq", "insert",
+         f"--project_id={args.bq_project}",
+         f"{args.bq_dataset}.{args.bq_table}"],
+        input=payload.encode(),
+        capture_output=True,
+    )
+
+    if result.returncode != 0:
+        print(f"BigQuery insert failed: {result.stderr.decode()}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compute dedup stats and insert into BigQuery.")
+    parser.add_argument("--output-dir",     required=True, help="Directory with output CSVs")
+    parser.add_argument("--project-id",     required=True, help="TestRail project ID")
+    parser.add_argument("--project-name",   required=True, help="Project name (e.g. firefox-ios)")
+    parser.add_argument("--run-date",       required=True, help="Run date (YYYY-MM-DD)")
+    parser.add_argument("--github-run-id",  required=True, help="GitHub Actions run ID")
+    parser.add_argument("--bq-project",     default="moz-mobile-tools")
+    parser.add_argument("--bq-dataset",     default="testops_stats")
+    parser.add_argument("--bq-table",       default="testrail_deduplication_runs")
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+    if not output_dir.exists():
+        print(f"Error: output directory '{output_dir}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+
+    stats = compute_stats(output_dir)
+
+    print(f"Total cases:                {stats['total']}")
+    print(f"Exact duplicates:           {stats['exact']}")
+    print(f"Similar pairs:              {stats['similar']}")
+    print(f"High-priority similar pairs:{stats['high_priority_similar']}")
+    print(f"Duplicate rate:             {stats['duplicate_rate']:.1%}")
+
+    write_github_env(stats)
+    insert_bigquery(stats, args)
+    print("BigQuery insert successful.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/testrail/testcases-deduplication/requirements.txt b/testrail/testcases-deduplication/requirements.txt
new file mode 100644
index 0000000..14b61de
--- /dev/null
+++ b/testrail/testcases-deduplication/requirements.txt
@@ -0,0 +1,5 @@
+pandas>=2.0.0
+openpyxl>=3.1.0
+requests>=2.32.0
+sentence-transformers>=2.2.0
+scikit-learn>=1.3.0
diff --git a/testrail/testcases-deduplication/run_all.py b/testrail/testcases-deduplication/run_all.py
new file mode 100644
index 0000000..3db3fc7
--- /dev/null
+++ b/testrail/testcases-deduplication/run_all.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Full deduplication pipeline: find duplicates → generate work list → export CSVs.
+
+Usage:
+    python run_all.py /path/to/testrail_export.xlsx
+    python run_all.py /path/to/export.xlsx --sim-threshold 0.85
+"""
+import argparse
+import importlib.util
+import json
+import os
+import sys
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).parent
+
+
+def load_module(filename: str):
+    """Load a module from a hyphen-named file in the same directory as this script."""
+    path = SCRIPT_DIR / filename
+    module_name = filename.removesuffix(".py").replace("-", "_")
+    spec = importlib.util.spec_from_file_location(module_name, path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def run_find_duplicates(input_xlsx: str, dup_threshold: float, sim_threshold: float, overlap_threshold: float):
+    fd = load_module("find-duplicates.py")
+
+    # Set thresholds before calling any function (they are module-level constants)
+    fd.SEMANTIC_DUP_THRESHOLD = dup_threshold
+    fd.SEMANTIC_SIM_THRESHOLD = sim_threshold
+    fd.STEP_OVERLAP_THRESHOLD = overlap_threshold
+
+    print("Loading and normalizing data...")
+    df = fd.load_and_normalize(input_xlsx)
+    print(f"Total test cases loaded: {len(df)}")
+
+    if len(df) == 0:
+        print("Warning: No test cases found in the input file.")
+        return
+
+    print("Searching for exact duplicates...")
+    exact_dups = fd.find_exact_duplicates(df)
+    if not exact_dups.empty:
+        exact_dups[["_case_id", "_title", "_section", "duplicate_group_id"]].to_csv("duplicates_exact.csv", index=False)
+        print("Exact duplicates saved to duplicates_exact.csv")
+    else:
+        print("No exact duplicates found.")
+
+    print("Searching for similar tests (semantic + steps)...")
+    similar_pairs = fd.compute_semantic_pairs(df)
+    if not similar_pairs.empty:
+        similar_pairs.to_csv("similar_pairs.csv", index=False)
+        print("Similar pairs saved to similar_pairs.csv")
+    else:
+        print("No similar pairs found with the defined thresholds.")
+
+    stats = {"total_cases": len(df), "input_file": input_xlsx}
+    with open("analysis_stats.json", "w") as f:
+        json.dump(stats, f, indent=2)
+    print("Stats saved to analysis_stats.json")
+
+
+def run_generate_work_list():
+    gwl = load_module("generate-work-list.py")
+    gwl.generate_work_list()
+
+
+def run_export_priority_lists():
+    epl = load_module("export-priority-list.py")
+    epl.export_priority_lists()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run the full test case deduplication pipeline."
+    )
+    parser.add_argument("input_xlsx", help="Path to the TestRail Excel export (.xlsx)")
+    parser.add_argument(
+        "--dup-threshold", type=float, default=0.90,
+        help="Semantic similarity threshold for 'duplicate' label (default: 0.90)"
+    )
+    parser.add_argument(
+        "--sim-threshold", type=float, default=0.80,
+        help="Minimum similarity threshold to report a pair (default: 0.80)"
+    )
+    parser.add_argument(
+        "--overlap-threshold", type=float, default=0.80,
+        help="Step overlap threshold for 'shares_most_steps' flag (default: 0.80)"
+    )
+    parser.add_argument(
+        "--output-dir", default=str(SCRIPT_DIR),
+        help="Directory where output CSVs will be written (default: script directory)"
+    )
+    args = parser.parse_args()
+
+    # Resolve both paths before chdir, in case they are relative to the caller's CWD
+    input_xlsx = str(Path(args.input_xlsx).resolve())
+    output_dir = Path(args.output_dir).resolve()
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # All sub-scripts use relative paths for their CSVs — chdir so they resolve to output_dir
+    os.chdir(output_dir)
+
+    print(f"\n{'='*60}")
+    print("  Step 1/3: Finding duplicates")
+    print(f"{'='*60}")
+    run_find_duplicates(input_xlsx, args.dup_threshold, args.sim_threshold, args.overlap_threshold)
+
+    for name, fn in [
+        ("Step 2/3: Generating work list",    run_generate_work_list),
+        ("Step 3/3: Exporting priority CSVs", run_export_priority_lists),
+    ]:
+        print(f"\n{'='*60}")
+        print(f"  {name}")
+        print(f"{'='*60}")
+        try:
+            fn()
+        except RuntimeError as e:
+            print(f"  Skipped: {e}")
+
+    print(f"\n{'='*60}")
+    print("  Pipeline complete.")
+    print(f"  Output directory: {output_dir}")
+    print("  Output files:")
+    print("    - duplicates_exact.csv")
+    print("    - similar_pairs.csv")
+    print("    - WORK_LIST_EXACT.csv")
+    print("    - WORK_LIST_PERFECT_MATCHES.csv")
+    print("    - WORK_LIST_SIMILAR_HIGH_PRIORITY.csv")
+    print("    - analysis_stats.json")
+    print(f"{'='*60}\n")
+
+
+if __name__ == "__main__":
+    main()