hw-native-sys · ChaoWao · Apr 22, 2026 · Apr 22, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -147,7 +147,10 @@ jobs:
           pip install '.[test]'
 
       - name: Run Python unit tests
-        run: pytest tests -m "not requires_hardware" -v
+        # --clone-protocol https mirrors the ST jobs: GH-hosted runners have
+        # no SSH keys, so the default (ssh) would emit a 'Permission denied
+        # (publickey)' git stderr line before the lazy fallback kicks in.
+        run: pytest tests/ut -m "not requires_hardware" -v --clone-protocol https
 
       - name: Build and run C++ unit tests
         run: |

diff --git a/conftest.py b/conftest.py
@@ -20,6 +20,7 @@
 import signal
 import subprocess
 import sys
+import time
 import typing
 
 # macOS libomp collision workaround — must run before any import that may
@@ -243,59 +244,98 @@ def pytest_configure(config):
 
 
 def pytest_collection_modifyitems(session, config, items):  # noqa: PLR0912
-    """Skip ST tests based on --platform, --runtime, --level filters; order L3 before L2."""
+    """Filter ST tests by --platform / --runtime / --level; order L3 before L2.
+
+    Static filter mismatches (wrong level, wrong runtime, wrong platform)
+    are **deselected** rather than marked ``pytest.skip`` so they don't
+    inflate the "N skipped" count in each subprocess's terminal summary —
+    the L2 subprocess alone re-collects ~50 items per runtime, and the
+    skipped variant produced one SKIPPED line per item under ``-v``.
+    Deselection goes through ``config.hook.pytest_deselected`` (the same
+    path pytest's ``-k`` / ``-m`` use), which reports "M deselected"
+    instead of per-item output.
+
+    User-actionable problems (``--platform required``) stay as real skips
+    so the reason still surfaces in the default pytest summary.
+    """
     platform = config.getoption("--platform")
     runtime_filter = config.getoption("--runtime")
     level_filter = config.getoption("--level")
 
-    # When --level is active, only SceneTestCase items with a matching
-    # _st_level should run. Skip every non-SceneTestCase item — resource
-    # tests run in their own Resource phase, and other standalone tests
-    # (e.g. test_hello_worker) must not leak into level-filtered runs.
-    if level_filter is not None:
-        for item in items:
-            if any(m.name == "skip" for m in item.iter_markers()):
-                continue
-            if getattr(item, "cls", None) is None:
-                item.add_marker(pytest.mark.skip(reason=f"standalone test, not level {level_filter}"))
+    keep: list = []
+    deselected: list = []
+
+    for item in items:
+        # Pre-existing skip markers (e.g. explicit ``@pytest.mark.skip``)
+        # stay put — the user asked for a visible skip, not a silent drop.
+        if any(m.name == "skip" for m in item.iter_markers()):
+            keep.append(item)
+            continue
 
-    # Sort: L3 tests first (they fork child processes that inherit main process CANN state,
-    # so they must run before L2 tests pollute the CANN context).
-    def sort_key(item):
         cls = getattr(item, "cls", None)
-        level = getattr(cls, "_st_level", 0) if cls else 0
-        return (0 if level >= 3 else 1, item.nodeid)
 
-    items.sort(key=sort_key)
+        # Under --level, non-SceneTestCase items don't participate in
+        # level-based dispatch at all. Resource phase collects them
+        # separately in the parent; in a level-filtered child they're
+        # simply not this phase's concern.
+        if level_filter is not None and cls is None:
+            deselected.append(item)
+            continue
 
-    for item in items:
-        cls = getattr(item, "cls", None)
-        if cls and hasattr(cls, "CASES") and isinstance(cls.CASES, list):
+        if cls is not None and hasattr(cls, "CASES") and isinstance(cls.CASES, list):
+            # SceneTestCase class item.
             if not platform:
+                # User error: surface it as a real skip so the reason is visible.
                 item.add_marker(pytest.mark.skip(reason="--platform required"))
-            elif not any(platform in c.get("platforms", []) for c in cls.CASES):
-                item.add_marker(pytest.mark.skip(reason=f"No cases for {platform}"))
-            elif runtime_filter and getattr(cls, "_st_runtime", None) != runtime_filter:
-                item.add_marker(
-                    pytest.mark.skip(reason=f"Runtime {getattr(cls, '_st_runtime', '?')} != {runtime_filter}")
-                )
-            elif level_filter is not None and getattr(cls, "_st_level", None) != level_filter:
-                item.add_marker(pytest.mark.skip(reason=f"Level {getattr(cls, '_st_level', '?')} != {level_filter}"))
+                keep.append(item)
+                continue
+            if not any(platform in c.get("platforms", []) for c in cls.CASES):
+                deselected.append(item)
+                continue
+            if runtime_filter and getattr(cls, "_st_runtime", None) != runtime_filter:
+                deselected.append(item)
+                continue
+            if level_filter is not None and getattr(cls, "_st_level", None) != level_filter:
+                deselected.append(item)
+                continue
+            keep.append(item)
             continue
+
+        # Non-class pytest function (standalone resource tests and such).
         platforms_marker = item.get_closest_marker("platforms")
         if platforms_marker:
             if not platform:
                 item.add_marker(pytest.mark.skip(reason="--platform required"))
-            elif platform not in platforms_marker.args[0]:
-                item.add_marker(pytest.mark.skip(reason=f"Not supported on {platform}"))
+                keep.append(item)
+                continue
+            if platform not in platforms_marker.args[0]:
+                deselected.append(item)
+                continue
 
-        # runtime-isolation filter for non-@scene_test tests: if the item declares
-        # `@pytest.mark.runtime("X")` and a --runtime filter is active, skip when
-        # they don't match. Prevents test_explicit_fatal_reports and friends from
-        # running under every runtime's subprocess.
+        # runtime-isolation filter for non-@scene_test tests: if the item
+        # declares ``@pytest.mark.runtime("X")`` and a --runtime filter is
+        # active, deselect when they don't match. Prevents
+        # test_explicit_fatal_reports and friends from running under every
+        # runtime's subprocess.
         runtime_marker = item.get_closest_marker("runtime")
         if runtime_marker and runtime_marker.args and runtime_filter and runtime_marker.args[0] != runtime_filter:
-            item.add_marker(pytest.mark.skip(reason=f"Runtime {runtime_marker.args[0]} != {runtime_filter}"))
+            deselected.append(item)
+            continue
+
+        keep.append(item)
+
+    if deselected:
+        items[:] = keep
+        config.hook.pytest_deselected(items=deselected)
+
+    # Sort: L3 tests first (they fork child processes that inherit main process CANN state,
+    # so they must run before L2 tests pollute the CANN context).
+    def sort_key(item):
+        cls = getattr(item, "cls", None)
+        level = getattr(cls, "_st_level", 0) if cls else 0
+        return (0 if level >= 3 else 1, item.nodeid)
+
+    items.sort(key=sort_key)
 
     # L3 profiling is not supported yet: a single L3 case forks N chip-processes
     # that all write perf_swimlane_<ts>.json to the same directory with
@@ -456,6 +496,19 @@ def _resolve_max_parallel(cfg, platform: str, device_ids: list[int]) -> int:
     return val
 
 
+def _emit_group(header: str, body: str) -> None:
+    """Print a GitHub Actions collapsible group around ``body``.
+
+    ``::group::`` / ``::endgroup::`` are workflow commands — Actions
+    renders them as a fold, other shells treat them as plain text so
+    running pytest locally still reads sensibly.
+    """
+    print(f"::group::{header}", flush=True)
+    if body:
+        print(body, end="" if body.endswith("\n") else "\n", flush=True)
+    print("::endgroup::", flush=True)
+
+
 def _dispatch_test_phases(session, resource_specs):  # noqa: PLR0912
     """Run Resource → L2 phases.
 
@@ -533,12 +586,19 @@ def _build(ids, _nodeid=spec.nodeid, _rt=spec.runtime, _kind=spec.kind):
             )
 
         def _on_done(res):
-            tag = "PASSED" if res.returncode == 0 else f"FAILED (rc={res.returncode})"
-            print(f"\n--- {res.label}: {tag} on devices {res.device_ids} ---\n", flush=True)
+            tag = "PASS" if res.returncode == 0 else f"FAIL rc={res.returncode}"
+            header = f"{res.label} [{tag} {res.duration_s:.1f}s, devices={res.device_ids}]"
+            _emit_group(header, res.output)
+            if res.returncode != 0:
+                # Out-of-group summary so a reviewer scanning the collapsed
+                # log still sees the failure without having to expand.
+                print(
+                    f"*** FAIL: {res.label} (devices={res.device_ids}) — expand group above ***",
+                    flush=True,
+                )
 
         print(
-            f"\n{'=' * 60}\n  Resource phase: {len(jobs)} case(s), "
-            f"pool={device_ids}, max_parallel={max_parallel}\n{'=' * 60}\n",
+            f"\nResource phase: {len(jobs)} case(s), pool={device_ids}, max_parallel={max_parallel}",
             flush=True,
         )
         try:
@@ -587,23 +647,27 @@ def _on_done(res):
         cmd = base_args + ["--runtime", rt, "--level", "2"]
         if xdist_available:
             cmd += ["-n", str(max_parallel), "--dist", "loadfile"]
-        print(
-            f"\n{'=' * 60}\n  L2 Runtime: {rt}"
-            + (f" [-n {max_parallel}]" if xdist_available else "")
-            + f"\n{'=' * 60}\n",
-            flush=True,
-        )
+        # L2 subprocesses run serially (one runtime at a time) so we don't
+        # need to buffer their stdout — we can stream it directly through
+        # the group markers. ``::group::`` on its own line before the run
+        # opens the fold; ``::endgroup::`` after closes it.
+        label = f"L2 {rt}" + (f" [-n {max_parallel}]" if xdist_available else "")
+        start = time.monotonic()
+        print(f"::group::{label}", flush=True)
         result = subprocess.run(cmd, check=False, cwd=cwd)
+        duration = time.monotonic() - start
+        tag = "PASS" if result.returncode == 0 else f"FAIL rc={result.returncode}"
+        print(f"--- L2 {rt}: {tag} {duration:.1f}s ---", flush=True)
+        print("::endgroup::", flush=True)
+
         if result.returncode == TIMEOUT_EXIT_CODE:
-            print(f"\n*** L2 runtime {rt}: TIMED OUT ***\n", flush=True)
+            print(f"*** L2 {rt}: TIMED OUT ***", flush=True)
             os._exit(TIMEOUT_EXIT_CODE)
         if result.returncode != 0:
             l2_failed = True
-            print(f"\n*** L2 runtime {rt}: FAILED ***\n", flush=True)
+            print(f"*** FAIL: L2 {rt} — expand group above ***", flush=True)
             if fail_fast:
                 break
-        else:
-            print(f"\n--- L2 runtime {rt}: PASSED ---\n", flush=True)
 
     # Flatten per-subprocess outputs/perf_*/ subdirs back to outputs/ so
     # downstream tools (swimlane_converter.py, CI artifact upload) find

diff --git a/examples/workers/l2/hello_worker/__init__.py b/examples/workers/l2/hello_worker/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) PyPTO Contributors.
+# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+# CANN Open Software License Agreement Version 2.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# -----------------------------------------------------------------------------------------------------------
+"""Package marker so ``test_*.py`` can do ``from .main import run``."""
diff --git a/examples/workers/l2/hello_worker/test_hello_worker.py b/examples/workers/l2/hello_worker/test_hello_worker.py
@@ -8,13 +8,9 @@
 # -----------------------------------------------------------------------------------------------------------
 """Hardware ST for examples/workers/l2/hello_worker."""
 
-import os
-from importlib.machinery import SourceFileLoader
-
 import pytest
 
-_main = SourceFileLoader("hello_worker_main", os.path.join(os.path.dirname(__file__), "main.py")).load_module()
-run = _main.run
+from .main import run
 
 
 @pytest.mark.platforms(["a2a3sim", "a2a3", "a5sim", "a5"])

diff --git a/examples/workers/l2/vector_add/__init__.py b/examples/workers/l2/vector_add/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) PyPTO Contributors.
+# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+# CANN Open Software License Agreement Version 2.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# -----------------------------------------------------------------------------------------------------------
+"""Package marker so ``test_*.py`` can do ``from .main import run``."""
diff --git a/examples/workers/l2/vector_add/test_vector_add.py b/examples/workers/l2/vector_add/test_vector_add.py
@@ -8,13 +8,9 @@
 # -----------------------------------------------------------------------------------------------------------
 """Hardware ST for examples/workers/l2/vector_add."""
 
-import os
-from importlib.machinery import SourceFileLoader
-
 import pytest
 
-_main = SourceFileLoader("vector_add_main", os.path.join(os.path.dirname(__file__), "main.py")).load_module()
-run = _main.run
+from .main import run
 
 
 @pytest.mark.platforms(["a2a3sim", "a2a3", "a5sim", "a5"])

diff --git a/examples/workers/l3/allreduce_distributed/__init__.py b/examples/workers/l3/allreduce_distributed/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) PyPTO Contributors.
+# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+# CANN Open Software License Agreement Version 2.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# -----------------------------------------------------------------------------------------------------------
+"""Package marker so ``test_*.py`` can do ``from .main import run``."""
diff --git a/examples/workers/l3/allreduce_distributed/test_allreduce.py b/examples/workers/l3/allreduce_distributed/test_allreduce.py
@@ -8,13 +8,9 @@
 # -----------------------------------------------------------------------------------------------------------
 """Hardware ST for examples/workers/l3/allreduce_distributed."""
 
-import os
-from importlib.machinery import SourceFileLoader
-
 import pytest
 
-_main = SourceFileLoader("allreduce_distributed_main", os.path.join(os.path.dirname(__file__), "main.py")).load_module()
-run = _main.run
+from .main import run
 
 
 @pytest.mark.requires_hardware

diff --git a/examples/workers/l3/multi_chip_dispatch/__init__.py b/examples/workers/l3/multi_chip_dispatch/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) PyPTO Contributors.
+# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+# CANN Open Software License Agreement Version 2.0 (the "License").
+# Please refer to the License for details. You may not use this file except in compliance with the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+# See LICENSE in the root of the software repository for the full text of the License.
+# -----------------------------------------------------------------------------------------------------------
+"""Package marker so ``test_*.py`` can do ``from .main import run``."""
diff --git a/examples/workers/l3/multi_chip_dispatch/test_multi_chip_dispatch.py b/examples/workers/l3/multi_chip_dispatch/test_multi_chip_dispatch.py
@@ -8,13 +8,9 @@
 # -----------------------------------------------------------------------------------------------------------
 """Hardware ST for examples/workers/l3/multi_chip_dispatch."""
 
-import os
-from importlib.machinery import SourceFileLoader
-
 import pytest
 
-_main = SourceFileLoader("multi_chip_dispatch_main", os.path.join(os.path.dirname(__file__), "main.py")).load_module()
-run = _main.run
+from .main import run
 
 
 @pytest.mark.platforms(["a2a3sim", "a2a3", "a5sim", "a5"])

diff --git a/pyproject.toml b/pyproject.toml
@@ -73,6 +73,16 @@ reportRedeclaration = false
 [tool.pytest.ini_options]
 testpaths = ["tests", "examples"]
 addopts = "--import-mode=importlib"
+# Torch's CPU wheel (pytorch.org index) does not pull numpy, so ``import
+# torch`` in CI emits ``UserWarning: Failed to initialize NumPy: No
+# module named 'numpy'`` on every subprocess. Our code never calls
+# ``tensor.numpy()`` / ``torch.from_numpy()`` — we use pure-torch paths
+# (``torch.full``, ``tensor.share_memory_``) — so losing numpy interop
+# is harmless. Silence the warning here rather than pulling in a
+# ~30 MB dependency we don't use.
+filterwarnings = [
+    "ignore:Failed to initialize NumPy:UserWarning",
+]
 
 [tool.scikit-build]
 wheel.packages = ["simpler_setup", "python/simpler"]