From 6d529db5ab3503d379f152be5bd05cf12a688722 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Wed, 11 Mar 2026 12:46:47 +0100
Subject: [PATCH 01/12] perf(importers): batch endpoint creation and status
 updates during import/reimport

Replace per-finding endpoint_get_or_create() calls with a stateful EndpointManager
that accumulates endpoints and statuses across findings and flushes them in bulk at
batch boundaries. Reduces ~6200 DB queries to ~3-5 for a 1000-finding scan with 5
endpoints per finding (200 unique).

- EndpointManager now takes a `product` param and holds internal accumulators
- `record_endpoint()` deduplicates by normalized key within a batch
- `record_status_for_create()` / `record_statuses_to_mitigate()` / `record_statuses_to_reactivate()` accumulate operations
- `persist()` flushes all pending creates and bulk_updates in one shot
- `update_endpoint_status()` accumulates mitigate/reactivate lists instead of dispatching per-finding Celery tasks
- Removed old `chunk_endpoints_and_*`, `add_endpoints_to_unsaved_finding`, `mitigate_endpoint_status`, `reactivate_endpoint_status` methods
- Added unit tests for `_make_endpoint_unique_tuple` normalization
- Updated performance test fixture to match new stateful manager interface
---
 dojo/importers/base_importer.py         |  28 +-
 dojo/importers/default_importer.py      |  11 +-
 dojo/importers/default_reimporter.py    |  21 +-
 dojo/importers/endpoint_manager.py      | 334 +++++++++++++++---------
 unittests/test_endpoint_manager.py      |  83 ++++++
 unittests/test_importers_performance.py |  52 ++--
 6 files changed, 362 insertions(+), 167 deletions(-)
 create mode 100644 unittests/test_endpoint_manager.py

diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py
index 7734fc42d93..743da0d862a 100644
--- a/dojo/importers/base_importer.py
+++ b/dojo/importers/base_importer.py
@@ -14,7 +14,6 @@
 import dojo.finding.helper as finding_helper
 import dojo.risk_acceptance.helper as ra_helper
 from dojo.celery_dispatch import dojo_dispatch_task
-from dojo.importers.endpoint_manager import EndpointManager
 from dojo.importers.location_manager import LocationManager, UnsavedLocation
 from dojo.importers.options import ImporterOptions
 from dojo.jira_link.helper import is_keep_in_sync_with_jira
@@ -85,7 +84,9 @@ def __init__(
             self.location_manager = LocationManager()
         else:
             # TODO: Delete this after the move to Locations
-            self.endpoint_manager = EndpointManager()
+            # EndpointManager is initialized in each subclass's process_scan()
+            # after self.test is available, because it needs the product.
+            self.endpoint_manager = None
 
     def check_child_implementation_exception(self):
         """
@@ -825,12 +826,17 @@ def process_endpoints(
             msg = "BaseImporter#process_endpoints() method is deprecated when V3_FEATURE_LOCATIONS is enabled"
             raise NotImplementedError(msg)
 
-        # Save the unsaved endpoints
-        self.endpoint_manager.chunk_endpoints_and_disperse(finding, finding.unsaved_endpoints)
-        # Check for any that were added in the form
+        # Clean and record unsaved endpoints from the report
+        self.endpoint_manager.clean_unsaved_endpoints(finding.unsaved_endpoints)
+        for endpoint in finding.unsaved_endpoints:
+            key = self.endpoint_manager.record_endpoint(endpoint)
+            self.endpoint_manager.record_status_for_create(finding, key)
+        # Record any endpoints added from the form
         if len(endpoints_to_add) > 0:
             logger.debug("endpoints_to_add: %s", endpoints_to_add)
-            self.endpoint_manager.chunk_endpoints_and_disperse(finding, endpoints_to_add)
+            for endpoint in endpoints_to_add:
+                key = self.endpoint_manager.record_endpoint(endpoint)
+                self.endpoint_manager.record_status_for_create(finding, key)
 
     def sanitize_vulnerability_ids(self, finding) -> None:
         """Remove undisired vulnerability id values"""
@@ -934,14 +940,8 @@ def mitigate_finding(
             )
         else:
             # TODO: Delete this after the move to Locations
-            # Mitigate the endpoint statuses
-            dojo_dispatch_task(
-                EndpointManager.mitigate_endpoint_status,
-                finding.status_finding.all(),
-                self.user,
-                kwuser=self.user,
-                sync=True,
-            )
+            # Accumulate endpoint statuses for bulk mitigate in persist()
+            self.endpoint_manager.record_statuses_to_mitigate(finding.status_finding.all())
         # to avoid pushing a finding group multiple times, we push those outside of the loop
         if finding_groups_enabled and finding.finding_group:
             # don't try to dedupe findings that we are closing
diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index 8dd2aa4a4f9..1d988848210 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -9,6 +9,7 @@
 from dojo.celery_dispatch import dojo_dispatch_task
 from dojo.finding import helper as finding_helper
 from dojo.importers.base_importer import BaseImporter, Parser
+from dojo.importers.endpoint_manager import EndpointManager
 from dojo.importers.options import ImporterOptions
 from dojo.jira_link.helper import is_keep_in_sync_with_jira
 from dojo.models import (
@@ -110,6 +111,9 @@ def process_scan(
         # Get the findings from the parser based on what methods the parser supplies
         # This could either mean traditional file parsing, or API pull parsing
         parsed_findings = self.parse_findings(scan, parser) or []
+        # Initialize the endpoint manager now that self.test is available
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager = EndpointManager(self.test.engagement.product)
         new_findings = self.process_findings(parsed_findings, **kwargs)
         # Close any old findings in the processed list if the the user specified for that
         # to occur in the form that is then passed to the kwargs
@@ -259,8 +263,10 @@ def process_findings(
             logger.debug("process_findings: computed push_to_jira=%s", push_to_jira)
             batch_finding_ids.append(finding.id)
 
-            # If batch is full or we're at the end, dispatch one batched task
+            # If batch is full or we're at the end, persist endpoints and dispatch
             if len(batch_finding_ids) >= batch_max_size or is_final_finding:
+                if not settings.V3_FEATURE_LOCATIONS:
+                    self.endpoint_manager.persist(user=self.user)
                 finding_ids_batch = list(batch_finding_ids)
                 batch_finding_ids.clear()
                 logger.debug("process_findings: dispatching batch with push_to_jira=%s (batch_size=%d, is_final=%s)",
@@ -378,6 +384,9 @@ def close_old_findings(
                 finding_groups_enabled=self.findings_groups_enabled,
                 product_grading_option=False,
             )
+        # Persist any accumulated endpoint status mitigations
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager.persist(user=self.user)
         # push finding groups to jira since we only only want to push whole groups
         # We dont check if the finding jira sync is applicable quite yet until we can get in the loop
         # but this is a way to at least make it that far
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index 5075eb6409b..fd4d71c4a38 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -14,6 +14,7 @@
     find_candidates_for_reimport_legacy,
 )
 from dojo.importers.base_importer import BaseImporter, Parser
+from dojo.importers.endpoint_manager import EndpointManager
 from dojo.importers.options import ImporterOptions
 from dojo.jira_link.helper import is_keep_in_sync_with_jira
 from dojo.location.status import FindingLocationStatus
@@ -95,6 +96,9 @@ def process_scan(
         - Update the test progress
         """
         logger.debug(f"REIMPORT_SCAN: parameters: {locals()}")
+        # Initialize the endpoint manager now that self.test is available
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager = EndpointManager(self.test.engagement.product)
         # Validate the Tool_Configuration
         self.verify_tool_configuration_from_test()
         # Fetch the parser based upon the string version of the scan type
@@ -430,6 +434,8 @@ def process_findings(
                     # - Deduplication batches: optimize bulk operations (larger batches = fewer queries)
                     # They don't need to be aligned since they optimize different operations.
                     if len(batch_finding_ids) >= dedupe_batch_max_size or is_final:
+                        if not settings.V3_FEATURE_LOCATIONS:
+                            self.endpoint_manager.persist(user=self.user)
                         finding_ids_batch = list(batch_finding_ids)
                         batch_finding_ids.clear()
                         dojo_dispatch_task(
@@ -497,6 +503,9 @@ def close_old_findings(
                     product_grading_option=False,
                 )
                 mitigated_findings.append(finding)
+        # Persist any accumulated endpoint status mitigations
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager.persist(user=self.user)
         # push finding groups to jira since we only only want to push whole groups
         # We dont check if the finding jira sync is applicable quite yet until we can get in the loop
         # but this is a way to at least make it that far
@@ -763,9 +772,9 @@ def process_matched_mitigated_finding(
             self.location_manager.chunk_locations_and_reactivate(mitigated_locations)
         else:
             # TODO: Delete this after the move to Locations
-            # Reactivate mitigated endpoints that are not false positives, out of scope, or risk accepted
+            # Accumulate endpoint statuses for bulk reactivation in persist()
             # status_finding_non_special is prefetched by build_candidate_scope_queryset
-            self.endpoint_manager.chunk_endpoints_and_reactivate(existing_finding.status_finding_non_special)
+            self.endpoint_manager.record_statuses_to_reactivate(existing_finding.status_finding_non_special)
         existing_finding.notes.add(note)
         self.reactivated_items.append(existing_finding)
         # The new finding is active while the existing on is mitigated. The existing finding needs to
@@ -932,9 +941,13 @@ def finding_post_processing(
                 self.location_manager.chunk_locations_and_disperse(finding, self.endpoints_to_add)
         else:
             # TODO: Delete this after the move to Locations
-            self.endpoint_manager.chunk_endpoints_and_disperse(finding, finding_from_report.unsaved_endpoints)
+            for endpoint in finding_from_report.unsaved_endpoints:
+                key = self.endpoint_manager.record_endpoint(endpoint)
+                self.endpoint_manager.record_status_for_create(finding, key)
             if len(self.endpoints_to_add) > 0:
-                self.endpoint_manager.chunk_endpoints_and_disperse(finding, self.endpoints_to_add)
+                for endpoint in self.endpoints_to_add:
+                    key = self.endpoint_manager.record_endpoint(endpoint)
+                    self.endpoint_manager.record_status_for_create(finding, key)
         # Parsers shouldn't use the tags field, and use unsaved_tags instead.
         # Merge any tags set by parser into unsaved_tags
         tags_from_parser = finding_from_report.tags if isinstance(finding_from_report.tags, list) else []
diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py
index 2b5883121ab..7df87f10036 100644
--- a/dojo/importers/endpoint_manager.py
+++ b/dojo/importers/endpoint_manager.py
@@ -1,122 +1,80 @@
 import logging
+from typing import NamedTuple
 
-from django.core.exceptions import MultipleObjectsReturned, ValidationError
-from django.urls import reverse
+from django.core.exceptions import ValidationError
+from django.db import transaction
 from django.utils import timezone
+from hyperlink._url import SCHEME_PORT_MAP  # noqa: PLC2701
 
-from dojo.celery import app
-from dojo.celery_dispatch import dojo_dispatch_task
-from dojo.endpoint.utils import endpoint_get_or_create
 from dojo.models import (
     Dojo_User,
     Endpoint,
     Endpoint_Status,
     Finding,
+    Product,
 )
 
 logger = logging.getLogger(__name__)
 
 
+class EndpointUniqueKey(NamedTuple):
+    protocol: str | None
+    userinfo: str | None
+    host: str | None
+    port: int | None
+    path: str | None
+    query: str | None
+    fragment: str | None
+    product_id: int
+
+
 # TODO: Delete this after the move to Locations
 class EndpointManager:
-    @app.task
-    def add_endpoints_to_unsaved_finding(
-        finding: Finding,  # noqa: N805
-        endpoints: list[Endpoint],
-        **kwargs: dict,
-    ) -> None:
-        """Creates Endpoint objects for a single finding and creates the link via the endpoint status"""
-        logger.debug(f"IMPORT_SCAN: Adding {len(endpoints)} endpoints to finding: {finding}")
-        EndpointManager.clean_unsaved_endpoints(endpoints)
-        for endpoint in endpoints:
-            ep = None
-            eps = []
-            try:
-                ep, _ = endpoint_get_or_create(
-                    protocol=endpoint.protocol,
-                    userinfo=endpoint.userinfo,
-                    host=endpoint.host,
-                    port=endpoint.port,
-                    path=endpoint.path,
-                    query=endpoint.query,
-                    fragment=endpoint.fragment,
-                    product=finding.test.engagement.product,
-                )
-                eps.append(ep)
-            except (MultipleObjectsReturned):
-                msg = (
-                    f"Endpoints in your database are broken. "
-                    f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them."
-                )
-                raise Exception(msg)
 
-            # bulk_create will translate to INSERT WITH IGNORE CONFLICTS
-            # much faster than get_or_create which issues two queries per endpoint
-            # bulk_create will not trigger endpoint_status.save and signals which is fine for now
-            rows = [Endpoint_Status(finding=finding, endpoint=e, date=finding.date) for e in eps]
-            Endpoint_Status.objects.bulk_create(rows, ignore_conflicts=True, batch_size=1000)
+    def __init__(self, product: Product) -> None:
+        self._product = product
+        self._endpoints_to_create: dict[EndpointUniqueKey, dict] = {}
+        self._statuses_to_create: list[tuple[Finding, EndpointUniqueKey]] = []
+        self._statuses_to_mitigate: list[Endpoint_Status] = []
+        self._statuses_to_reactivate: list[Endpoint_Status] = []
 
-        logger.debug(f"IMPORT_SCAN: {len(endpoints)} endpoints imported")
+    @staticmethod
+    def _make_endpoint_unique_tuple(
+        protocol: str | None,
+        userinfo: str | None,
+        host: str | None,
+        port: int | None,
+        path: str | None,
+        query: str | None,
+        fragment: str | None,
+        product_id: int,
+    ) -> EndpointUniqueKey:
+        """
+        Normalize endpoint fields to a unique key matching endpoint_filter() semantics.
 
-    @app.task
-    def mitigate_endpoint_status(
-        endpoint_status_list: list[Endpoint_Status],  # noqa: N805
-        user: Dojo_User,
-        **kwargs: dict,
-    ) -> None:
-        """Mitigates all endpoint status objects that are supplied"""
-        now = timezone.now()
-        to_update = []
-        for endpoint_status in endpoint_status_list:
-            # Only mitigate endpoints that are actually active
-            if endpoint_status.mitigated is False:
-                endpoint_status.mitigated_time = now
-                endpoint_status.last_modified = now
-                endpoint_status.mitigated_by = user
-                endpoint_status.mitigated = True
-                to_update.append(endpoint_status)
-
-        if to_update:
-            Endpoint_Status.objects.bulk_update(
-                to_update,
-                ["mitigated", "mitigated_time", "last_modified", "mitigated_by"],
-                batch_size=1000,
-            )
+        See dojo/endpoint/utils.py endpoint_filter() for the canonical matching logic.
+        """
+        norm_protocol = protocol.lower() if protocol else None
+        norm_host = host.lower() if host else None
 
-    @app.task
-    def reactivate_endpoint_status(
-        endpoint_status_list: list[Endpoint_Status],  # noqa: N805
-        **kwargs: dict,
-    ) -> None:
-        """Reactivate all endpoint status objects that are supplied"""
-        now = timezone.now()
-        to_update = []
-        for endpoint_status in endpoint_status_list:
-            # Only reactivate endpoints that are actually mitigated
-            if endpoint_status.mitigated:
-                logger.debug("Re-import: reactivating endpoint %s that is present in this scan", endpoint_status.endpoint)
-                endpoint_status.mitigated_by = None
-                endpoint_status.mitigated_time = None
-                endpoint_status.mitigated = False
-                endpoint_status.last_modified = now
-                to_update.append(endpoint_status)
-
-        if to_update:
-            Endpoint_Status.objects.bulk_update(
-                to_update,
-                ["mitigated", "mitigated_time", "mitigated_by", "last_modified"],
-                batch_size=1000,
-            )
+        # Port normalization: if protocol has a default port, treat that port
+        # and None as equivalent (matching endpoint_filter's Q(port__isnull=True) | Q(port__exact=default))
+        norm_port = port
+        if norm_protocol and norm_protocol in SCHEME_PORT_MAP:
+            default_port = SCHEME_PORT_MAP[norm_protocol]
+            if port is None or port == default_port:
+                norm_port = None
 
-    def chunk_endpoints_and_disperse(
-        self,
-        finding: Finding,
-        endpoints: list[Endpoint],
-        **kwargs: dict,
-    ) -> None:
-        if not endpoints:
-            return
-        dojo_dispatch_task(EndpointManager.add_endpoints_to_unsaved_finding, finding, endpoints, sync=True)
+        return EndpointUniqueKey(
+            protocol=norm_protocol,
+            userinfo=userinfo or None,
+            host=norm_host,
+            port=norm_port,
+            path=path or None,
+            query=query or None,
+            fragment=fragment or None,
+            product_id=product_id,
+        )
 
     @staticmethod
     def clean_unsaved_endpoints(
@@ -124,7 +82,7 @@ def clean_unsaved_endpoints(
     ) -> None:
         """
         Clean endpoints that are supplied. For any endpoints that fail this validation
-        process, raise a message that broken endpoints are being stored
+        process, raise a message that broken endpoints are being stored.
         """
         for endpoint in endpoints:
             try:
@@ -132,50 +90,182 @@ def clean_unsaved_endpoints(
             except ValidationError as e:
                 logger.warning("DefectDojo is storing broken endpoint because cleaning wasn't successful: %s", e)
 
-    def chunk_endpoints_and_reactivate(
-        self,
-        endpoint_status_list: list[Endpoint_Status],
-        **kwargs: dict,
-    ) -> None:
-        dojo_dispatch_task(EndpointManager.reactivate_endpoint_status, endpoint_status_list, sync=True)
+    def record_endpoint(self, endpoint: Endpoint) -> EndpointUniqueKey:
+        """Record an endpoint for later batch creation. Returns the unique key."""
+        key = self._make_endpoint_unique_tuple(
+            protocol=endpoint.protocol,
+            userinfo=endpoint.userinfo,
+            host=endpoint.host,
+            port=endpoint.port,
+            path=endpoint.path,
+            query=endpoint.query,
+            fragment=endpoint.fragment,
+            product_id=self._product.id,
+        )
+        if key not in self._endpoints_to_create:
+            self._endpoints_to_create[key] = {
+                "protocol": endpoint.protocol,
+                "userinfo": endpoint.userinfo,
+                "host": endpoint.host,
+                "port": endpoint.port,
+                "path": endpoint.path,
+                "query": endpoint.query,
+                "fragment": endpoint.fragment,
+                "product": self._product,
+            }
+        return key
 
-    def chunk_endpoints_and_mitigate(
-        self,
-        endpoint_status_list: list[Endpoint_Status],
-        user: Dojo_User,
-        **kwargs: dict,
-    ) -> None:
-        dojo_dispatch_task(EndpointManager.mitigate_endpoint_status, endpoint_status_list, user, sync=True)
+    def record_status_for_create(self, finding: Finding, key: EndpointUniqueKey) -> None:
+        """Record that a finding should be linked to an endpoint (identified by key) via Endpoint_Status."""
+        self._statuses_to_create.append((finding, key))
 
     def update_endpoint_status(
         self,
         existing_finding: Finding,
         new_finding: Finding,
         user: Dojo_User,
-        **kwargs: dict,
     ) -> None:
-        """Update the list of endpoints from the new finding with the list that is in the old finding"""
-        # New endpoints are already added in serializers.py / views.py (see comment "# for existing findings: make sure endpoints are present or created")
-        # So we only need to mitigate endpoints that are no longer present
+        """
+        Compare old/new endpoints and accumulate mitigate/reactivate lists.
+
+        The actual bulk_update happens in persist().
+        """
         # status_finding_non_special is prefetched by build_candidate_scope_queryset with the
         # special-status exclusion and endpoint select_related already applied at the DB level
         existing_finding_endpoint_status_list = existing_finding.status_finding_non_special
         new_finding_endpoints_list = new_finding.unsaved_endpoints
         if new_finding.is_mitigated:
             # New finding is mitigated, so mitigate all old endpoints
-            endpoint_status_to_mitigate = existing_finding_endpoint_status_list
+            self._statuses_to_mitigate.extend(existing_finding_endpoint_status_list)
         else:
             # Convert to set for O(1) lookups instead of O(n) linear search
             new_finding_endpoints_set = set(new_finding_endpoints_list)
             # Mitigate any endpoints in the old finding not found in the new finding
-            endpoint_status_to_mitigate = [
+            self._statuses_to_mitigate.extend(
                 eps for eps in existing_finding_endpoint_status_list
                 if eps.endpoint not in new_finding_endpoints_set
-            ]
+            )
             # Re-activate any endpoints in the old finding that are in the new finding
-            endpoint_status_to_reactivate = [
+            self._statuses_to_reactivate.extend(
                 eps for eps in existing_finding_endpoint_status_list
                 if eps.endpoint in new_finding_endpoints_set
+            )
+
+    def record_statuses_to_reactivate(self, statuses: list[Endpoint_Status]) -> None:
+        """Accumulate endpoint statuses for bulk reactivation in persist()."""
+        self._statuses_to_reactivate.extend(statuses)
+
+    def record_statuses_to_mitigate(self, statuses: list[Endpoint_Status]) -> None:
+        """Accumulate endpoint statuses for bulk mitigation in persist()."""
+        self._statuses_to_mitigate.extend(statuses)
+
+    def _fetch_and_create_endpoints(self) -> dict[EndpointUniqueKey, Endpoint]:
+        """
+        Fetch existing product endpoints and bulk_create missing ones.
+
+        Returns a key -> Endpoint mapping for all endpoints referenced in _endpoints_to_create.
+        """
+        if not self._endpoints_to_create:
+            return {}
+
+        key_to_endpoint: dict[EndpointUniqueKey, Endpoint] = {}
+
+        with transaction.atomic():
+            # Fetch all existing endpoints for this product
+            for ep in (
+                Endpoint.objects.filter(product=self._product)
+                .only("id", "protocol", "userinfo", "host", "port", "path", "query", "fragment", "product_id")
+                .order_by("id")
+                .iterator()
+            ):
+                key = self._make_endpoint_unique_tuple(
+                    protocol=ep.protocol,
+                    userinfo=ep.userinfo,
+                    host=ep.host,
+                    port=ep.port,
+                    path=ep.path,
+                    query=ep.query,
+                    fragment=ep.fragment,
+                    product_id=ep.product_id,
+                )
+                # First-by-id wins, matching endpoint_get_or_create behavior
+                if key not in key_to_endpoint:
+                    key_to_endpoint[key] = ep
+
+            # Determine which endpoints still need creating
+            to_create = []
+            to_create_keys = []
+            for key, kwargs in self._endpoints_to_create.items():
+                if key not in key_to_endpoint:
+                    to_create.append(Endpoint(**kwargs))
+                    to_create_keys.append(key)
+
+            if to_create:
+                created = Endpoint.objects.bulk_create(to_create, batch_size=1000)
+                key_to_endpoint.update(zip(to_create_keys, created, strict=True))
+
+        self._endpoints_to_create.clear()
+        return key_to_endpoint
+
+    def persist(self, user: Dojo_User | None = None) -> None:
+        """
+        Persist all accumulated endpoint operations to the database.
+
+        Called at batch boundaries during import/reimport.
+        """
+        # Step 1: Ensure all recorded endpoints exist in DB
+        key_to_endpoint = self._fetch_and_create_endpoints()
+
+        # Step 2: Bulk-create Endpoint_Status rows
+        if self._statuses_to_create:
+            rows = [
+                Endpoint_Status(
+                    finding=finding,
+                    endpoint=key_to_endpoint[key],
+                    date=finding.date,
+                )
+                for finding, key in self._statuses_to_create
+                if key in key_to_endpoint
             ]
-            self.chunk_endpoints_and_reactivate(endpoint_status_to_reactivate)
-        self.chunk_endpoints_and_mitigate(endpoint_status_to_mitigate, user)
+            if rows:
+                Endpoint_Status.objects.bulk_create(rows, ignore_conflicts=True, batch_size=1000)
+            self._statuses_to_create.clear()
+
+        # Step 3: Bulk-update mitigated endpoint statuses
+        if self._statuses_to_mitigate:
+            now = timezone.now()
+            to_update = []
+            for endpoint_status in self._statuses_to_mitigate:
+                if endpoint_status.mitigated is False:
+                    endpoint_status.mitigated_time = now
+                    endpoint_status.last_modified = now
+                    endpoint_status.mitigated_by = user
+                    endpoint_status.mitigated = True
+                    to_update.append(endpoint_status)
+            if to_update:
+                Endpoint_Status.objects.bulk_update(
+                    to_update,
+                    ["mitigated", "mitigated_time", "last_modified", "mitigated_by"],
+                    batch_size=1000,
+                )
+            self._statuses_to_mitigate.clear()
+
+        # Step 4: Bulk-update reactivated endpoint statuses
+        if self._statuses_to_reactivate:
+            now = timezone.now()
+            to_update = []
+            for endpoint_status in self._statuses_to_reactivate:
+                if endpoint_status.mitigated:
+                    logger.debug("Re-import: reactivating endpoint %s that is present in this scan", endpoint_status.endpoint)
+                    endpoint_status.mitigated_by = None
+                    endpoint_status.mitigated_time = None
+                    endpoint_status.mitigated = False
+                    endpoint_status.last_modified = now
+                    to_update.append(endpoint_status)
+            if to_update:
+                Endpoint_Status.objects.bulk_update(
+                    to_update,
+                    ["mitigated", "mitigated_time", "mitigated_by", "last_modified"],
+                    batch_size=1000,
+                )
+            self._statuses_to_reactivate.clear()
diff --git a/unittests/test_endpoint_manager.py b/unittests/test_endpoint_manager.py
new file mode 100644
index 00000000000..af7fa8ccb47
--- /dev/null
+++ b/unittests/test_endpoint_manager.py
@@ -0,0 +1,83 @@
+from django.test import TestCase
+
+from dojo.importers.endpoint_manager import EndpointManager, EndpointUniqueKey
+
+
+class TestMakeEndpointUniqueTuple(TestCase):
+
+    """Tests for EndpointManager._make_endpoint_unique_tuple normalization."""
+
+    def _make(self, **kwargs):
+        defaults = {
+            "protocol": None,
+            "userinfo": None,
+            "host": None,
+            "port": None,
+            "path": None,
+            "query": None,
+            "fragment": None,
+            "product_id": 1,
+        }
+        defaults.update(kwargs)
+        return EndpointManager._make_endpoint_unique_tuple(**defaults)
+
+    def test_protocol_case_insensitive(self):
+        a = self._make(protocol="HTTP", host="example.com")
+        b = self._make(protocol="http", host="example.com")
+        self.assertEqual(a, b)
+
+    def test_host_case_insensitive(self):
+        a = self._make(host="Example.COM")
+        b = self._make(host="example.com")
+        self.assertEqual(a, b)
+
+    def test_default_port_normalized_to_none_https(self):
+        a = self._make(protocol="https", host="example.com", port=443)
+        b = self._make(protocol="https", host="example.com", port=None)
+        self.assertEqual(a, b)
+        self.assertIsNone(a.port)
+
+    def test_default_port_normalized_to_none_http(self):
+        a = self._make(protocol="http", host="example.com", port=80)
+        b = self._make(protocol="http", host="example.com", port=None)
+        self.assertEqual(a, b)
+
+    def test_non_default_port_preserved(self):
+        a = self._make(protocol="https", host="example.com", port=8443)
+        b = self._make(protocol="https", host="example.com", port=None)
+        self.assertNotEqual(a, b)
+        self.assertEqual(a.port, 8443)
+
+    def test_none_fields_handled(self):
+        key = self._make()
+        self.assertIsNone(key.protocol)
+        self.assertIsNone(key.host)
+        self.assertIsNone(key.port)
+        self.assertIsNone(key.path)
+
+    def test_empty_string_fields_normalized_to_none(self):
+        key = self._make(userinfo="", path="", query="", fragment="")
+        self.assertIsNone(key.userinfo)
+        self.assertIsNone(key.path)
+        self.assertIsNone(key.query)
+        self.assertIsNone(key.fragment)
+
+    def test_different_products_different_keys(self):
+        a = self._make(host="example.com", product_id=1)
+        b = self._make(host="example.com", product_id=2)
+        self.assertNotEqual(a, b)
+
+    def test_returns_named_tuple(self):
+        key = self._make(protocol="https", host="example.com", path="/api")
+        self.assertIsInstance(key, EndpointUniqueKey)
+        self.assertEqual(key.protocol, "https")
+        self.assertEqual(key.host, "example.com")
+        self.assertEqual(key.path, "/api")
+
+    def test_port_without_known_protocol_preserved(self):
+        key = self._make(protocol="custom", host="example.com", port=9999)
+        self.assertEqual(key.port, 9999)
+
+    def test_port_none_without_known_protocol(self):
+        key = self._make(protocol="custom", host="example.com", port=None)
+        self.assertIsNone(key.port)
diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py
index 565e04f98e6..b54f04c55c2 100644
--- a/unittests/test_importers_performance.py
+++ b/unittests/test_importers_performance.py
@@ -268,12 +268,12 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
         configure_pghistory_triggers()
 
         self._import_reimport_performance(
-            expected_num_queries1=295,
-            expected_num_async_tasks1=6,
-            expected_num_queries2=226,
-            expected_num_async_tasks2=17,
-            expected_num_queries3=108,
-            expected_num_async_tasks3=16,
+            expected_num_queries1=139,
+            expected_num_async_tasks1=1,
+            expected_num_queries2=115,
+            expected_num_async_tasks2=1,
+            expected_num_queries3=29,
+            expected_num_async_tasks3=1,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -290,12 +290,12 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
         testuser.usercontactinfo.save()
 
         self._import_reimport_performance(
-            expected_num_queries1=302,
-            expected_num_async_tasks1=6,
-            expected_num_queries2=233,
-            expected_num_async_tasks2=17,
-            expected_num_queries3=115,
-            expected_num_async_tasks3=16,
+            expected_num_queries1=146,
+            expected_num_async_tasks1=1,
+            expected_num_queries2=122,
+            expected_num_async_tasks2=1,
+            expected_num_queries3=36,
+            expected_num_async_tasks3=1,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -313,12 +313,12 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
         self.system_settings(enable_product_grade=True)
 
         self._import_reimport_performance(
-            expected_num_queries1=309,
-            expected_num_async_tasks1=8,
-            expected_num_queries2=240,
-            expected_num_async_tasks2=19,
-            expected_num_queries3=119,
-            expected_num_async_tasks3=18,
+            expected_num_queries1=153,
+            expected_num_async_tasks1=3,
+            expected_num_queries2=129,
+            expected_num_async_tasks2=3,
+            expected_num_queries3=40,
+            expected_num_async_tasks3=3,
         )
 
     # Deduplication is enabled in the tests above, but to properly test it we must run the same import twice and capture the results.
@@ -436,10 +436,10 @@ def test_deduplication_performance_pghistory_async(self):
         self.system_settings(enable_deduplication=True)
 
         self._deduplication_performance(
-            expected_num_queries1=264,
-            expected_num_async_tasks1=7,
-            expected_num_queries2=175,
-            expected_num_async_tasks2=7,
+            expected_num_queries1=74,
+            expected_num_async_tasks1=1,
+            expected_num_queries2=69,
+            expected_num_async_tasks2=1,
             check_duplicates=False,  # Async mode - deduplication happens later
         )
 
@@ -457,8 +457,8 @@ def test_deduplication_performance_pghistory_no_async(self):
         testuser.usercontactinfo.save()
 
         self._deduplication_performance(
-            expected_num_queries1=271,
-            expected_num_async_tasks1=7,
-            expected_num_queries2=236,
-            expected_num_async_tasks2=7,
+            expected_num_queries1=81,
+            expected_num_async_tasks1=1,
+            expected_num_queries2=130,
+            expected_num_async_tasks2=1,
         )

From bcdac76cf3928a7e79a4f5d0cdf6fd66b04dfb2f Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Wed, 11 Mar 2026 14:58:15 +0100
Subject: [PATCH 02/12] fix endpoint manager initialization

---
 dojo/importers/default_importer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index 1d988848210..93f20e8a593 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -84,6 +84,9 @@ def create_test(
             api_scan_configuration=self.api_scan_configuration,
             tags=self.tags,
         )
+        # Initialize the endpoint manager now that self.test is available
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager = EndpointManager(self.test.engagement.product)
         return self.test
 
     def process_scan(
@@ -111,9 +114,6 @@ def process_scan(
         # Get the findings from the parser based on what methods the parser supplies
         # This could either mean traditional file parsing, or API pull parsing
         parsed_findings = self.parse_findings(scan, parser) or []
-        # Initialize the endpoint manager now that self.test is available
-        if not settings.V3_FEATURE_LOCATIONS:
-            self.endpoint_manager = EndpointManager(self.test.engagement.product)
         new_findings = self.process_findings(parsed_findings, **kwargs)
         # Close any old findings in the processed list if the the user specified for that
         # to occur in the form that is then passed to the kwargs

From 5539db23c8211e01c77f1ef741062e0cca19a620 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Wed, 11 Mar 2026 15:05:01 +0100
Subject: [PATCH 03/12] fix(importers): restore tag inheritance and
 endpoint_manager init for direct callers

- bulk_create bypasses Django post_save signals, so manually call inherit_instance_tags()
  for each newly created Endpoint to preserve product tag inheritance behavior
- Initialize endpoint_manager in create_test() so callers that invoke create_test() +
  process_findings() directly (without going through process_scan()) don't hit a NoneType error
---
 dojo/importers/endpoint_manager.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py
index 7df87f10036..132921a53da 100644
--- a/dojo/importers/endpoint_manager.py
+++ b/dojo/importers/endpoint_manager.py
@@ -13,6 +13,7 @@
     Finding,
     Product,
 )
+from dojo.tags_signals import inherit_instance_tags
 
 logger = logging.getLogger(__name__)
 
@@ -203,6 +204,10 @@ def _fetch_and_create_endpoints(self) -> dict[EndpointUniqueKey, Endpoint]:
             if to_create:
                 created = Endpoint.objects.bulk_create(to_create, batch_size=1000)
                 key_to_endpoint.update(zip(to_create_keys, created, strict=True))
+                # bulk_create bypasses post_save signals, so manually trigger tag inheritance
+                # this is not ideal, but we need to take a separate look at the tag inheritance feature itself later
+                for ep in created:
+                    inherit_instance_tags(ep)
 
         self._endpoints_to_create.clear()
         return key_to_endpoint

From 260b31374d463ec77137bd5216a0cea85ca34232 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sat, 14 Mar 2026 16:27:19 +0100
Subject: [PATCH 04/12] refactor(importers): explicit test param for endpoint
 manager, rename and improve get_or_create_endpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add `test` parameter to `_create_endpoint_manager()` so callers pass
  the test explicitly instead of relying on `self.test` being set
- Raise `ValueError` with a clear message when `test is None`
- Initialize endpoint_manager in `DefaultImporter.process_scan()` after
  `parse_findings()` (which calls `create_test()` for fresh imports),
  covering both the sync path and the async Celery task path
- Rename `_fetch_and_create_endpoints()` → `get_or_create_endpoints()`
- Change return type to `tuple[dict, list]`: returns `(endpoints_by_key, created)`
  so callers know exactly which endpoints were newly inserted
- Rename local `key_to_endpoint` → `endpoints_by_key` for clarity
---
 dojo/importers/default_importer.py   | 16 ++++++++++++---
 dojo/importers/default_reimporter.py |  6 +++++-
 dojo/importers/endpoint_manager.py   | 30 ++++++++++++++++------------
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index 93f20e8a593..59482eddf17 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -84,11 +84,14 @@ def create_test(
             api_scan_configuration=self.api_scan_configuration,
             tags=self.tags,
         )
-        # Initialize the endpoint manager now that self.test is available
-        if not settings.V3_FEATURE_LOCATIONS:
-            self.endpoint_manager = EndpointManager(self.test.engagement.product)
         return self.test
 
+    def _create_endpoint_manager(self, test: Test) -> EndpointManager:
+        """Factory method — override in subclasses to inject a custom EndpointManager."""
+        if test is None:
+            raise ValueError("Cannot create EndpointManager: test is None. Ensure a test is created before initializing the endpoint manager.")
+        return EndpointManager(test.engagement.product)
+
     def process_scan(
         self,
         scan: TemporaryUploadedFile,
@@ -113,7 +116,14 @@ def process_scan(
         parser = self.get_parser()
         # Get the findings from the parser based on what methods the parser supplies
         # This could either mean traditional file parsing, or API pull parsing
+        # Note: for fresh imports, parse_findings() calls create_test() internally,
+        # so self.test is guaranteed to be set after this call.
         parsed_findings = self.parse_findings(scan, parser) or []
+        # Initialize the endpoint manager now that self.test is available.
+        # This covers both the sync path (test created inside parse_findings above)
+        # and the async path (test was already set before process_scan was called).
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager = self._create_endpoint_manager(self.test)
         new_findings = self.process_findings(parsed_findings, **kwargs)
         # Close any old findings in the processed list if the the user specified for that
         # to occur in the form that is then passed to the kwargs
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index fd4d71c4a38..b423a089bc9 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -78,6 +78,10 @@ def __init__(self, *args, **kwargs):
             **kwargs,
         )
 
+    def _create_endpoint_manager(self) -> EndpointManager:
+        """Factory method — override in subclasses to inject a custom EndpointManager."""
+        return EndpointManager(self.test.engagement.product)
+
     def process_scan(
         self,
         scan: TemporaryUploadedFile,
@@ -98,7 +102,7 @@ def process_scan(
         logger.debug(f"REIMPORT_SCAN: parameters: {locals()}")
         # Initialize the endpoint manager now that self.test is available
         if not settings.V3_FEATURE_LOCATIONS:
-            self.endpoint_manager = EndpointManager(self.test.engagement.product)
+            self.endpoint_manager = self._create_endpoint_manager(self.test)
         # Validate the Tool_Configuration
         self.verify_tool_configuration_from_test()
         # Fetch the parser based upon the string version of the scan type
diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py
index 132921a53da..62cd7c350d0 100644
--- a/dojo/importers/endpoint_manager.py
+++ b/dojo/importers/endpoint_manager.py
@@ -160,16 +160,19 @@ def record_statuses_to_mitigate(self, statuses: list[Endpoint_Status]) -> None:
         """Accumulate endpoint statuses for bulk mitigation in persist()."""
         self._statuses_to_mitigate.extend(statuses)
 
-    def _fetch_and_create_endpoints(self) -> dict[EndpointUniqueKey, Endpoint]:
+    def get_or_create_endpoints(self) -> tuple[dict[EndpointUniqueKey, Endpoint], list[Endpoint]]:
         """
-        Fetch existing product endpoints and bulk_create missing ones.
+        For each queued endpoint record, fetch the existing DB row or bulk_create a new one.
 
-        Returns a key -> Endpoint mapping for all endpoints referenced in _endpoints_to_create.
+        Returns:
+            (endpoints_by_key, created) where:
+            - endpoints_by_key maps each EndpointUniqueKey to its Endpoint object (existing or new)
+            - created is the list of Endpoint objects that were actually inserted into the DB
         """
         if not self._endpoints_to_create:
-            return {}
+            return {}, []
 
-        key_to_endpoint: dict[EndpointUniqueKey, Endpoint] = {}
+        endpoints_by_key: dict[EndpointUniqueKey, Endpoint] = {}
 
         with transaction.atomic():
             # Fetch all existing endpoints for this product
@@ -190,27 +193,28 @@ def _fetch_and_create_endpoints(self) -> dict[EndpointUniqueKey, Endpoint]:
                     product_id=ep.product_id,
                 )
                 # First-by-id wins, matching endpoint_get_or_create behavior
-                if key not in key_to_endpoint:
-                    key_to_endpoint[key] = ep
+                if key not in endpoints_by_key:
+                    endpoints_by_key[key] = ep
 
             # Determine which endpoints still need creating
             to_create = []
             to_create_keys = []
             for key, kwargs in self._endpoints_to_create.items():
-                if key not in key_to_endpoint:
+                if key not in endpoints_by_key:
                     to_create.append(Endpoint(**kwargs))
                     to_create_keys.append(key)
 
+            created: list[Endpoint] = []
             if to_create:
                 created = Endpoint.objects.bulk_create(to_create, batch_size=1000)
-                key_to_endpoint.update(zip(to_create_keys, created, strict=True))
+                endpoints_by_key.update(zip(to_create_keys, created, strict=True))
                 # bulk_create bypasses post_save signals, so manually trigger tag inheritance
                 # this is not ideal, but we need to take a separate look at the tag inheritance feature itself later
                 for ep in created:
                     inherit_instance_tags(ep)
 
         self._endpoints_to_create.clear()
-        return key_to_endpoint
+        return endpoints_by_key, created
 
     def persist(self, user: Dojo_User | None = None) -> None:
         """
@@ -219,18 +223,18 @@ def persist(self, user: Dojo_User | None = None) -> None:
         Called at batch boundaries during import/reimport.
         """
         # Step 1: Ensure all recorded endpoints exist in DB
-        key_to_endpoint = self._fetch_and_create_endpoints()
+        endpoints_by_key, _ = self.get_or_create_endpoints()
 
         # Step 2: Bulk-create Endpoint_Status rows
         if self._statuses_to_create:
             rows = [
                 Endpoint_Status(
                     finding=finding,
-                    endpoint=key_to_endpoint[key],
+                    endpoint=endpoints_by_key[key],
                     date=finding.date,
                 )
                 for finding, key in self._statuses_to_create
-                if key in key_to_endpoint
+                if key in endpoints_by_key
             ]
             if rows:
                 Endpoint_Status.objects.bulk_create(rows, ignore_conflicts=True, batch_size=1000)

From ea1963b812a637b96c98a9226eb478b588a584d9 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 15 Mar 2026 01:07:36 +0100
Subject: [PATCH 05/12] ruff

---
 dojo/importers/default_importer.py | 3 ++-
 dojo/importers/endpoint_manager.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index 59482eddf17..996085c0d5a 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -89,7 +89,8 @@ def create_test(
     def _create_endpoint_manager(self, test: Test) -> EndpointManager:
         """Factory method — override in subclasses to inject a custom EndpointManager."""
         if test is None:
-            raise ValueError("Cannot create EndpointManager: test is None. Ensure a test is created before initializing the endpoint manager.")
+            msg = "Cannot create EndpointManager: test is None. Ensure a test is created before initializing the endpoint manager."
+            raise ValueError(msg)
         return EndpointManager(test.engagement.product)
 
     def process_scan(
diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py
index 62cd7c350d0..4d89b19072a 100644
--- a/dojo/importers/endpoint_manager.py
+++ b/dojo/importers/endpoint_manager.py
@@ -168,6 +168,7 @@ def get_or_create_endpoints(self) -> tuple[dict[EndpointUniqueKey, Endpoint], li
             (endpoints_by_key, created) where:
             - endpoints_by_key maps each EndpointUniqueKey to its Endpoint object (existing or new)
             - created is the list of Endpoint objects that were actually inserted into the DB
+
         """
         if not self._endpoints_to_create:
             return {}, []

From f5ab948acad8e71ef7cc0e318fae0d09e42fc4c7 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 15 Mar 2026 10:04:51 +0100
Subject: [PATCH 06/12] fix create endpoint manager

---
 dojo/importers/default_reimporter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index b423a089bc9..6d462c39e07 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -78,9 +78,9 @@ def __init__(self, *args, **kwargs):
             **kwargs,
         )
 
-    def _create_endpoint_manager(self) -> EndpointManager:
+    def _create_endpoint_manager(self, test: Test) -> EndpointManager:
         """Factory method — override in subclasses to inject a custom EndpointManager."""
-        return EndpointManager(self.test.engagement.product)
+        return EndpointManager(test.engagement.product)
 
     def process_scan(
         self,

From d3a20df5070b6da5dd40988bfa55b82e6a93b57a Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 15 Mar 2026 10:07:44 +0100
Subject: [PATCH 07/12] fix counts

---
 unittests/test_importers_performance.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py
index 7aeafe31bde..f9d9e7dee47 100644
--- a/unittests/test_importers_performance.py
+++ b/unittests/test_importers_performance.py
@@ -457,8 +457,8 @@ def test_deduplication_performance_pghistory_no_async(self):
         testuser.usercontactinfo.save()
 
         self._deduplication_performance(
-            expected_num_queries1=271,
-            expected_num_async_tasks1=7,
-            expected_num_queries2=183,
-            expected_num_async_tasks2=7,
+            expected_num_queries1=81,
+            expected_num_async_tasks1=1,
+            expected_num_queries2=77,
+            expected_num_async_tasks2=1,
         )

From 9705de838766a0c8ec6e42ca41ed09ec56f06813 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Mon, 16 Mar 2026 21:56:27 +0100
Subject: [PATCH 08/12] refactor(importers): initialize EndpointManager eagerly
 in __init__

Instead of lazily initializing endpoint_manager in process_scan() or
create_test(), initialize it immediately in __init__ using the product
from the required engagement/test parameter. This eliminates the NoneType
AttributeError when callers invoke create_test() + process_findings()
directly without going through process_scan().

- DefaultImporter: uses self.engagement.product (engagement is required)
- DefaultReImporter: uses self.test.engagement.product (test is required)
- Remove _create_endpoint_manager() factory methods and lazy init sites
- Remove self.endpoint_manager = None from BaseImporter.__init__
---
 dojo/importers/base_importer.py      |  5 -----
 dojo/importers/default_importer.py   | 14 ++------------
 dojo/importers/default_reimporter.py |  9 ++-------
 3 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py
index 743da0d862a..cdc4f171077 100644
--- a/dojo/importers/base_importer.py
+++ b/dojo/importers/base_importer.py
@@ -82,11 +82,6 @@ def __init__(
         ImporterOptions.__init__(self, *args, **kwargs)
         if settings.V3_FEATURE_LOCATIONS:
             self.location_manager = LocationManager()
-        else:
-            # TODO: Delete this after the move to Locations
-            # EndpointManager is initialized in each subclass's process_scan()
-            # after self.test is available, because it needs the product.
-            self.endpoint_manager = None
 
     def check_child_implementation_exception(self):
         """
diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index 996085c0d5a..838490663ec 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -57,6 +57,8 @@ def __init__(self, *args, **kwargs):
             import_type=Test_Import.IMPORT_TYPE,
             **kwargs,
         )
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager = EndpointManager(self.engagement.product)
 
     def create_test(
         self,
@@ -86,13 +88,6 @@ def create_test(
         )
         return self.test
 
-    def _create_endpoint_manager(self, test: Test) -> EndpointManager:
-        """Factory method — override in subclasses to inject a custom EndpointManager."""
-        if test is None:
-            msg = "Cannot create EndpointManager: test is None. Ensure a test is created before initializing the endpoint manager."
-            raise ValueError(msg)
-        return EndpointManager(test.engagement.product)
-
     def process_scan(
         self,
         scan: TemporaryUploadedFile,
@@ -120,11 +115,6 @@ def process_scan(
         # Note: for fresh imports, parse_findings() calls create_test() internally,
         # so self.test is guaranteed to be set after this call.
         parsed_findings = self.parse_findings(scan, parser) or []
-        # Initialize the endpoint manager now that self.test is available.
-        # This covers both the sync path (test created inside parse_findings above)
-        # and the async path (test was already set before process_scan was called).
-        if not settings.V3_FEATURE_LOCATIONS:
-            self.endpoint_manager = self._create_endpoint_manager(self.test)
         new_findings = self.process_findings(parsed_findings, **kwargs)
         # Close any old findings in the processed list if the the user specified for that
         # to occur in the form that is then passed to the kwargs
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index 6d462c39e07..d26b5d63fb7 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -77,10 +77,8 @@ def __init__(self, *args, **kwargs):
             import_type=Test_Import.REIMPORT_TYPE,
             **kwargs,
         )
-
-    def _create_endpoint_manager(self, test: Test) -> EndpointManager:
-        """Factory method — override in subclasses to inject a custom EndpointManager."""
-        return EndpointManager(test.engagement.product)
+        if not settings.V3_FEATURE_LOCATIONS:
+            self.endpoint_manager = EndpointManager(self.test.engagement.product)
 
     def process_scan(
         self,
@@ -100,9 +98,6 @@ def process_scan(
         - Update the test progress
         """
         logger.debug(f"REIMPORT_SCAN: parameters: {locals()}")
-        # Initialize the endpoint manager now that self.test is available
-        if not settings.V3_FEATURE_LOCATIONS:
-            self.endpoint_manager = self._create_endpoint_manager(self.test)
         # Validate the Tool_Configuration
         self.verify_tool_configuration_from_test()
         # Fetch the parser based upon the string version of the scan type

From c6e25aadff950c1809a6f51b0479e54beba3be70 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Mon, 23 Mar 2026 20:43:33 +0100
Subject: [PATCH 09/12] add test case

---
 ...hawk_two_vul_same_hashcode_fabricated.json |  77 ++++++
 unittests/test_reimport_prefetch.py           | 255 ++++++++++++++++++
 2 files changed, 332 insertions(+)
 create mode 100644 unittests/scans/stackhawk/stackhawk_two_vul_same_hashcode_fabricated.json
 create mode 100644 unittests/test_reimport_prefetch.py

diff --git a/unittests/scans/stackhawk/stackhawk_two_vul_same_hashcode_fabricated.json b/unittests/scans/stackhawk/stackhawk_two_vul_same_hashcode_fabricated.json
new file mode 100644
index 00000000000..ab9ed464215
--- /dev/null
+++ b/unittests/scans/stackhawk/stackhawk_two_vul_same_hashcode_fabricated.json
@@ -0,0 +1,77 @@
+{
+  "service": "StackHawk",
+  "scanCompleted": {
+    "scan": {
+      "id": "f3aa6752-8fef-58fa-c854-1d3g8e972f38",
+      "hawkscanVersion": "2.1.1",
+      "env": "Development",
+      "status": "COMPLETED",
+      "application": "Secured Application",
+      "startedTimestamp": "2022-02-16T23:07:19.575Z",
+      "scanURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38"
+    },
+    "scanDuration": "21",
+    "spiderDuration": "45",
+    "completedScanStats": {
+      "urlsCount": "2",
+      "duration": "66",
+      "scanResultsStats": {
+        "totalCount": "2",
+        "lowCount": "0",
+        "mediumCount": "0",
+        "highCount": "2",
+        "lowTriagedCount": "0",
+        "mediumTriagedCount": "0",
+        "highTriagedCount": "0"
+      }
+    },
+    "findings": [
+      {
+        "pluginId": "90001",
+        "pluginName": "Insecure JSF ViewState",
+        "severity": "High",
+        "host": "https://localhost:9000",
+        "paths": [
+          {
+            "path": "/app/login",
+            "method": "GET",
+            "status": "NEW",
+            "pathURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90001/path/1/message/1"
+          }
+        ],
+        "pathStats": [
+          {
+            "status": "NEW",
+            "count": 1
+          }
+        ],
+        "totalCount": "1",
+        "category": "Security Misconfiguration",
+        "findingURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90001"
+      },
+      {
+        "pluginId": "90002",
+        "pluginName": "Insecure HTTP Header",
+        "severity": "High",
+        "host": "https://localhost:9000",
+        "paths": [
+          {
+            "path": "/app/dashboard",
+            "method": "GET",
+            "status": "NEW",
+            "pathURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90002/path/1/message/1"
+          }
+        ],
+        "pathStats": [
+          {
+            "status": "NEW",
+            "count": 1
+          }
+        ],
+        "totalCount": "1",
+        "category": "Security Misconfiguration",
+        "findingURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90002"
+      }
+    ]
+  }
+}
diff --git a/unittests/test_reimport_prefetch.py b/unittests/test_reimport_prefetch.py
new file mode 100644
index 00000000000..67265135925
--- /dev/null
+++ b/unittests/test_reimport_prefetch.py
@@ -0,0 +1,255 @@
+"""
+Regression tests for handling duplicate findings within the same reimport report.
+
+When a scan report contains two findings that produce the same hash_code, the first
+creates a new finding via process_finding_that_was_not_matched() and gets added to
+the candidate dictionaries. When the second finding in the same batch matches against
+this newly-created finding, accessing existing_finding.status_finding_non_special
+raises AttributeError because the finding was never loaded through
+build_candidate_scope_queryset (which sets up the Prefetch with to_attr).
+
+Bugfix: https://github.com/DefectDojo/django-DefectDojo/pull/14569
+Batch endpoint optimization (related): https://github.com/DefectDojo/django-DefectDojo/pull/14489
+"""
+
+from unittest.mock import patch
+
+from crum import impersonate
+from django.conf import settings
+from django.test import override_settings
+from django.utils import timezone
+
+from dojo.importers.default_reimporter import DefaultReImporter
+from dojo.location.models import LocationFindingReference
+from dojo.location.status import FindingLocationStatus
+from dojo.models import (
+    Development_Environment,
+    Dojo_User,
+    Endpoint,
+    Endpoint_Status,
+    Engagement,
+    Finding,
+    Product,
+    Product_Type,
+    Test,
+    Test_Type,
+    User,
+    UserContactInfo,
+)
+
+from .dojo_test_case import DojoTestCase, get_unit_tests_scans_path, skip_unless_v2
+
+SCAN_TYPE = "StackHawk HawkScan"
+
+# Reimport: two findings with different pluginIds (so the parser keeps both as separate
+# findings) but identical hash_code fields (component_name + component_version).
+# Each finding has one unique endpoint/location path (/app/login and /app/dashboard).
+# This triggers the same-batch matching scenario where the second finding matches
+# against the first one that was just created in the same reimport batch.
+REIMPORT_SCAN = get_unit_tests_scans_path("stackhawk") / "stackhawk_two_vul_same_hashcode_fabricated.json"
+
+
+def _hashcode_fields_without_vuln_id():
+    """
+    Return a copy of HASHCODE_FIELDS_PER_SCANNER with StackHawk configured to
+    hash only on component_name + component_version (excluding vuln_id_from_tool).
+
+    By default StackHawk hashes on [vuln_id_from_tool, component_name, component_version].
+    Since vuln_id_from_tool maps to pluginId and the parser deduplicates by pluginId,
+    it is normally impossible for two separate findings to share a hash_code.
+
+    By removing vuln_id_from_tool from the hash fields, two findings with different
+    pluginIds but the same application/env metadata will produce the same hash_code,
+    allowing us to exercise the same-batch matching code path.
+    """
+    fields = dict(settings.HASHCODE_FIELDS_PER_SCANNER)
+    fields[SCAN_TYPE] = ["component_name", "component_version"]
+    return fields
+
+
+class ReimportDuplicateFindingsTestBase(DojoTestCase):
+
+    """Shared setup for duplicate-findings-in-same-report tests."""
+
+    def setUp(self):
+        super().setUp()
+        testuser, _ = User.objects.get_or_create(username="admin")
+        UserContactInfo.objects.get_or_create(user=testuser, defaults={"block_execution": True})
+
+        self.system_settings(enable_deduplication=True)
+        self.system_settings(enable_product_grade=False)
+
+        product_type, _ = Product_Type.objects.get_or_create(name="test")
+        product, _ = Product.objects.get_or_create(
+            name="ReimportPrefetchTest",
+            description="Test",
+            prod_type=product_type,
+        )
+        engagement, _ = Engagement.objects.get_or_create(
+            name="Test Engagement",
+            product=product,
+            target_start=timezone.now(),
+            target_end=timezone.now(),
+        )
+        self.lead, _ = User.objects.get_or_create(username="admin")
+        environment, _ = Development_Environment.objects.get_or_create(name="Development")
+        test_type, _ = Test_Type.objects.get_or_create(name=SCAN_TYPE)
+        self.test = Test.objects.create(
+            engagement=engagement,
+            test_type=test_type,
+            scan_type=SCAN_TYPE,
+            target_start=timezone.now(),
+            target_end=timezone.now(),
+            environment=environment,
+        )
+        self.product = product
+
+    def _reimport_with_overridden_hashcode(self):
+        """
+        Reimport the two-finding scan file with overridden hash settings.
+
+        We mock get_custom_method to return None so that the OSS hash_code logic
+        is used (reads from django.settings, which override_settings can modify).
+        In Pro, compute_hash_code is overridden via FINDING_COMPUTE_HASH_METHOD to
+        read hash settings from the TunableSettings database model instead of
+        django.settings, which makes override_settings ineffective.
+        """
+        hashcode_override = _hashcode_fields_without_vuln_id()
+        with (
+            impersonate(Dojo_User.objects.get(username="admin")),
+            override_settings(HASHCODE_FIELDS_PER_SCANNER=hashcode_override),
+            patch("dojo.utils.get_custom_method", return_value=None),
+            REIMPORT_SCAN.open(encoding="utf-8") as scan,
+        ):
+            reimporter = DefaultReImporter(
+                test=self.test,
+                user=self.lead,
+                lead=self.lead,
+                scan_date=None,
+                minimum_severity="Info",
+                active=True,
+                verified=True,
+                sync=True,
+                scan_type=SCAN_TYPE,
+            )
+            return reimporter.process_scan(scan)
+
+
+@skip_unless_v2
+class TestReimportDuplicateFindingsEndpointHandling(ReimportDuplicateFindingsTestBase):
+
+    """Regression test: reimport must handle endpoints correctly for duplicate findings in the same report."""
+
+    def test_reimport_duplicate_findings_in_same_report_endpoints(self):
+        """
+        Reimporting a report with two findings that share the same hash_code must
+        not raise AttributeError on status_finding_non_special, and must correctly
+        create endpoints for both findings.
+
+        The scan file has two findings with different pluginIds, each with one
+        unique endpoint (/app/login and /app/dashboard). With the overridden hash
+        settings, both produce the same hash_code. The first finding is created as
+        new and added to candidates. The second finding matches against it (a
+        batch-created finding), and its endpoint is added via finding_post_processing.
+        """
+        # This previously raised:
+        # AttributeError: 'Finding' object has no attribute 'status_finding_non_special'
+        test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
+
+        # The first finding is new (empty test, no candidates). The second finding
+        # matches the first (same hash_code) so it is not counted as new.
+        self.assertEqual(len_new, 1, "Reimport should create one new finding (second matches first)")
+        self.assertEqual(len_closed, 0, "No findings should be closed")
+
+        # Only one finding should exist — the second was matched to the first
+        findings = Finding.objects.filter(test=self.test)
+        self.assertEqual(findings.count(), 1, "Only one finding should exist after reimport")
+
+        finding = findings.first()
+
+        # The first finding (new, pluginId=90001) creates endpoint /app/login.
+        # The second finding (pluginId=90002) matches the first via hash_code.
+        # finding_post_processing runs for BOTH iterations, adding each finding's
+        # endpoints to the (single) matched finding. So the finding ends up with
+        # 2 endpoints: /app/login (from the first) and /app/dashboard (from the second).
+        endpoints = Endpoint.objects.filter(product=self.product)
+        self.assertEqual(endpoints.count(), 2, "Two endpoints should be created (one per finding in report)")
+
+        endpoint_statuses = Endpoint_Status.objects.filter(finding=finding)
+        self.assertEqual(endpoint_statuses.count(), 2, "Finding should have two endpoint statuses")
+
+        # Because the second finding is dynamic, update_endpoint_status() compares
+        # the first finding's existing endpoint statuses against the second finding's
+        # unsaved_endpoints. The second finding only has /app/dashboard, so /app/login
+        # (which belongs to the first finding) is considered "no longer present" and
+        # gets mitigated. This is arguably wrong for batch-created findings — both
+        # endpoints came from the same report — but it is the current behavior.
+        self.assertEqual(
+            endpoint_statuses.filter(mitigated=False).count(), 1,
+            "One endpoint status should be active (/app/dashboard from the matched finding)",
+        )
+        self.assertEqual(
+            endpoint_statuses.filter(mitigated=True).count(), 1,
+            "One endpoint status should be mitigated (/app/login — mitigated by update_endpoint_status "
+            "because it is not in the second finding's endpoint list)",
+        )
+
+
+@override_settings(V3_FEATURE_LOCATIONS=True)
+class TestReimportDuplicateFindingsLocationHandling(ReimportDuplicateFindingsTestBase):
+
+    """Test that reimport handles locations correctly for duplicate findings in the same report."""
+
+    def test_reimport_duplicate_findings_in_same_report_locations(self):
+        """
+        Reimporting a report with two findings that share the same hash_code must
+        correctly create locations for both findings.
+
+        The locations code does not use the to_attr prefetch that caused the
+        AttributeError in the endpoint code path, so it does not crash. However,
+        the same logical issue applies: update_location_status() compares the
+        batch-created finding's locations against the second finding's unsaved
+        locations, mitigating locations that are "no longer present".
+
+        The scan file has two findings with different pluginIds, each with one
+        unique location (https://localhost:9000/app/login and .../app/dashboard).
+        With the overridden hash settings, both produce the same hash_code. The
+        first finding is created as new and added to candidates. The second finding
+        matches against it (a batch-created finding).
+        """
+        test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
+
+        # The first finding is new (empty test, no candidates). The second finding
+        # matches the first (same hash_code) so it is not counted as new.
+        self.assertEqual(len_new, 1, "Reimport should create one new finding (second matches first)")
+        self.assertEqual(len_closed, 0, "No findings should be closed")
+
+        # Only one finding should exist — the second was matched to the first
+        findings = Finding.objects.filter(test=self.test)
+        self.assertEqual(findings.count(), 1, "Only one finding should exist after reimport")
+
+        finding = findings.first()
+
+        # The first finding (new, pluginId=90001) creates location for /app/login.
+        # The second finding (pluginId=90002) matches the first via hash_code.
+        # finding_post_processing runs for BOTH iterations, adding each finding's
+        # locations to the (single) matched finding. So the finding ends up with
+        # 2 location references pointing to 2 distinct Location objects.
+        location_refs = LocationFindingReference.objects.filter(finding=finding)
+        self.assertEqual(location_refs.count(), 2, "Finding should have two location references")
+
+        # Same behavior as endpoints: update_location_status() compares the first
+        # finding's existing location refs against the second finding's unsaved_locations.
+        # The second finding only has /app/dashboard, so /app/login (which belongs to
+        # the first finding) is considered "no longer present" and gets mitigated.
+        # This is arguably wrong for batch-created findings — both locations came from
+        # the same report — but it is the current behavior.
+        self.assertEqual(
+            location_refs.filter(status=FindingLocationStatus.Active).count(), 1,
+            "One location ref should be active (/app/dashboard from the matched finding)",
+        )
+        self.assertEqual(
+            location_refs.filter(status=FindingLocationStatus.Mitigated).count(), 1,
+            "One location ref should be mitigated (/app/login — mitigated by update_location_status "
+            "because it is not in the second finding's location list)",
+        )

From 2a40617fa19efd6e4d0764b21edd52fe0091f3b9 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Mon, 23 Mar 2026 21:46:00 +0100
Subject: [PATCH 10/12] ruff

---
 unittests/test_reimport_prefetch.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/unittests/test_reimport_prefetch.py b/unittests/test_reimport_prefetch.py
index 67265135925..71355dc209e 100644
--- a/unittests/test_reimport_prefetch.py
+++ b/unittests/test_reimport_prefetch.py
@@ -154,7 +154,7 @@ def test_reimport_duplicate_findings_in_same_report_endpoints(self):
         """
         # This previously raised:
         # AttributeError: 'Finding' object has no attribute 'status_finding_non_special'
-        test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
+        _test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
 
         # The first finding is new (empty test, no candidates). The second finding
         # matches the first (same hash_code) so it is not counted as new.
@@ -185,11 +185,13 @@ def test_reimport_duplicate_findings_in_same_report_endpoints(self):
         # gets mitigated. This is arguably wrong for batch-created findings — both
         # endpoints came from the same report — but it is the current behavior.
         self.assertEqual(
-            endpoint_statuses.filter(mitigated=False).count(), 1,
+            endpoint_statuses.filter(mitigated=False).count(),
+            1,
             "One endpoint status should be active (/app/dashboard from the matched finding)",
         )
         self.assertEqual(
-            endpoint_statuses.filter(mitigated=True).count(), 1,
+            endpoint_statuses.filter(mitigated=True).count(),
+            1,
             "One endpoint status should be mitigated (/app/login — mitigated by update_endpoint_status "
             "because it is not in the second finding's endpoint list)",
         )
@@ -217,7 +219,7 @@ def test_reimport_duplicate_findings_in_same_report_locations(self):
         first finding is created as new and added to candidates. The second finding
         matches against it (a batch-created finding).
         """
-        test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
+        _test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
 
         # The first finding is new (empty test, no candidates). The second finding
         # matches the first (same hash_code) so it is not counted as new.
@@ -245,11 +247,13 @@ def test_reimport_duplicate_findings_in_same_report_locations(self):
         # This is arguably wrong for batch-created findings — both locations came from
         # the same report — but it is the current behavior.
         self.assertEqual(
-            location_refs.filter(status=FindingLocationStatus.Active).count(), 1,
+            location_refs.filter(status=FindingLocationStatus.Active).count(),
+            1,
             "One location ref should be active (/app/dashboard from the matched finding)",
         )
         self.assertEqual(
-            location_refs.filter(status=FindingLocationStatus.Mitigated).count(), 1,
+            location_refs.filter(status=FindingLocationStatus.Mitigated).count(),
+            1,
             "One location ref should be mitigated (/app/login — mitigated by update_location_status "
             "because it is not in the second finding's location list)",
         )

From 56e4e9e27021a79782f6e1201182b40e3b8689f1 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Tue, 24 Mar 2026 18:44:36 +0100
Subject: [PATCH 11/12] remove mock

---
 unittests/test_reimport_prefetch.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/unittests/test_reimport_prefetch.py b/unittests/test_reimport_prefetch.py
index 71355dc209e..6d5c5ce4ba5 100644
--- a/unittests/test_reimport_prefetch.py
+++ b/unittests/test_reimport_prefetch.py
@@ -105,20 +105,11 @@ def setUp(self):
         self.product = product
 
     def _reimport_with_overridden_hashcode(self):
-        """
-        Reimport the two-finding scan file with overridden hash settings.
-
-        We mock get_custom_method to return None so that the OSS hash_code logic
-        is used (reads from django.settings, which override_settings can modify).
-        In Pro, compute_hash_code is overridden via FINDING_COMPUTE_HASH_METHOD to
-        read hash settings from the TunableSettings database model instead of
-        django.settings, which makes override_settings ineffective.
-        """
+        """Reimport the two-finding scan file with overridden hash settings."""
         hashcode_override = _hashcode_fields_without_vuln_id()
         with (
             impersonate(Dojo_User.objects.get(username="admin")),
             override_settings(HASHCODE_FIELDS_PER_SCANNER=hashcode_override),
-            patch("dojo.utils.get_custom_method", return_value=None),
             REIMPORT_SCAN.open(encoding="utf-8") as scan,
         ):
             reimporter = DefaultReImporter(

From a6209f797d67212ec74e11a7e529e3bcaaa35298 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Tue, 24 Mar 2026 19:04:46 +0100
Subject: [PATCH 12/12] fix: replace status_finding_non_special prefetch with
 Python filtering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the to_attr Prefetch for status_finding_non_special and instead
prefetch all endpoint statuses, filtering non-special ones in Python
via EndpointManager.get_non_special_endpoint_statuses().

This avoids AttributeError when a finding created during the same
reimport batch (via add_new_finding_to_candidates) is matched by a
subsequent finding — such findings were never loaded through the
prefetch queryset and lacked the to_attr attribute.

See: https://github.com/DefectDojo/django-DefectDojo/pull/14569
---
 dojo/finding/deduplication.py        | 15 ++++++++-------
 dojo/importers/default_reimporter.py |  5 +++--
 dojo/importers/endpoint_manager.py   | 21 ++++++++++++++++++---
 unittests/test_reimport_prefetch.py  | 24 ++++++++----------------
 4 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/dojo/finding/deduplication.py b/dojo/finding/deduplication.py
index 33f89a16e58..99d416f44f0 100644
--- a/dojo/finding/deduplication.py
+++ b/dojo/finding/deduplication.py
@@ -336,17 +336,18 @@ def build_candidate_scope_queryset(test, mode="deduplication", service=None):
         # Base prefetches for both modes
         prefetch_list = ["endpoints", "vulnerability_id_set", "found_by"]
 
-        # Additional prefetches for reimport mode: fetch only non-special endpoint statuses with their
-        # endpoint joined in, so endpoint_manager can read status_finding_non_special directly without
-        # any extra DB queries
+        # Prefetch all endpoint statuses with their endpoint for reimport mode.
+        # The non-special filtering (excluding false_positive, out_of_scope, risk_accepted)
+        # is done in Python by EndpointManager.get_non_special_endpoint_statuses().
+        # We avoid using to_attr here because findings created during the same reimport
+        # batch (via add_new_finding_to_candidates) are never loaded through this queryset
+        # and would lack the to_attr, causing an AttributeError.
+        # See: https://github.com/DefectDojo/django-DefectDojo/pull/14569
         if mode == "reimport":
             prefetch_list.append(
                 Prefetch(
                     "status_finding",
-                    queryset=Endpoint_Status.objects.exclude(
-                        Q(false_positive=True) | Q(out_of_scope=True) | Q(risk_accepted=True),
-                    ).select_related("endpoint"),
-                    to_attr="status_finding_non_special",
+                    queryset=Endpoint_Status.objects.select_related("endpoint"),
                 ),
             )
 
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index d26b5d63fb7..5e57125e49b 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -772,8 +772,9 @@ def process_matched_mitigated_finding(
         else:
             # TODO: Delete this after the move to Locations
             # Accumulate endpoint statuses for bulk reactivation in persist()
-            # status_finding_non_special is prefetched by build_candidate_scope_queryset
-            self.endpoint_manager.record_statuses_to_reactivate(existing_finding.status_finding_non_special)
+            self.endpoint_manager.record_statuses_to_reactivate(
+                self.endpoint_manager.get_non_special_endpoint_statuses(existing_finding),
+            )
         existing_finding.notes.add(note)
         self.reactivated_items.append(existing_finding)
         # The new finding is active while the existing on is mitigated. The existing finding needs to
diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py
index 4d89b19072a..d7c3cd1df46 100644
--- a/dojo/importers/endpoint_manager.py
+++ b/dojo/importers/endpoint_manager.py
@@ -120,6 +120,23 @@ def record_status_for_create(self, finding: Finding, key: EndpointUniqueKey) ->
         """Record that a finding should be linked to an endpoint (identified by key) via Endpoint_Status."""
         self._statuses_to_create.append((finding, key))
 
+    @staticmethod
+    def get_non_special_endpoint_statuses(finding: Finding) -> list[Endpoint_Status]:
+        """
+        Return endpoint statuses that are not false_positive, out_of_scope, or risk_accepted.
+
+        Uses finding.status_finding.all() which is served from the prefetch cache when the
+        finding was loaded through build_candidate_scope_queryset, and falls back to a DB
+        query otherwise (e.g. for findings created during the same reimport batch).
+
+        This might be ineffecient if lots of internal duplicates are in the report.
+        But this should be limited as most parsers dedupe during parsing and merge the endpoints.
+        """
+        return [
+            eps for eps in finding.status_finding.all()
+            if not eps.false_positive and not eps.out_of_scope and not eps.risk_accepted
+        ]
+
     def update_endpoint_status(
         self,
         existing_finding: Finding,
@@ -131,9 +148,7 @@ def update_endpoint_status(
 
         The actual bulk_update happens in persist().
         """
-        # status_finding_non_special is prefetched by build_candidate_scope_queryset with the
-        # special-status exclusion and endpoint select_related already applied at the DB level
-        existing_finding_endpoint_status_list = existing_finding.status_finding_non_special
+        existing_finding_endpoint_status_list = self.get_non_special_endpoint_statuses(existing_finding)
         new_finding_endpoints_list = new_finding.unsaved_endpoints
         if new_finding.is_mitigated:
             # New finding is mitigated, so mitigate all old endpoints
diff --git a/unittests/test_reimport_prefetch.py b/unittests/test_reimport_prefetch.py
index 6d5c5ce4ba5..d421b0129dc 100644
--- a/unittests/test_reimport_prefetch.py
+++ b/unittests/test_reimport_prefetch.py
@@ -12,8 +12,6 @@
 Batch endpoint optimization (related): https://github.com/DefectDojo/django-DefectDojo/pull/14489
 """
 
-from unittest.mock import patch
-
 from crum import impersonate
 from django.conf import settings
 from django.test import override_settings
@@ -169,22 +167,16 @@ def test_reimport_duplicate_findings_in_same_report_endpoints(self):
         endpoint_statuses = Endpoint_Status.objects.filter(finding=finding)
         self.assertEqual(endpoint_statuses.count(), 2, "Finding should have two endpoint statuses")
 
-        # Because the second finding is dynamic, update_endpoint_status() compares
-        # the first finding's existing endpoint statuses against the second finding's
-        # unsaved_endpoints. The second finding only has /app/dashboard, so /app/login
-        # (which belongs to the first finding) is considered "no longer present" and
-        # gets mitigated. This is arguably wrong for batch-created findings — both
-        # endpoints came from the same report — but it is the current behavior.
+        # With batched endpoint creation, endpoint statuses for the batch-created
+        # finding are not yet persisted when update_endpoint_status() runs. So the
+        # comparison finds no existing statuses and nothing gets mitigated — both
+        # endpoint statuses remain active. This is actually the correct outcome:
+        # both endpoints came from the same report and should both be active.
         self.assertEqual(
             endpoint_statuses.filter(mitigated=False).count(),
-            1,
-            "One endpoint status should be active (/app/dashboard from the matched finding)",
-        )
-        self.assertEqual(
-            endpoint_statuses.filter(mitigated=True).count(),
-            1,
-            "One endpoint status should be mitigated (/app/login — mitigated by update_endpoint_status "
-            "because it is not in the second finding's endpoint list)",
+            2,
+            "Both endpoint statuses should be active (batch-created finding has no persisted "
+            "statuses when update_endpoint_status runs, so nothing gets mitigated)",
         )