fix(backend/kernel): unit tests skip without pyarrow, mypy + black

vikrantpuppala · vikrantpuppala · commit 823c4164c164 · 2026-05-14T15:48:50.000Z
Three CI failures after the poetry-lock fix uncovered three real
issues:

1. pyarrow is optional in the connector. The default-deps CI test
   job installs without it; the +PyArrow job installs with. The
   kernel backend's result_set.py + type_mapping.py import pyarrow
   eagerly (the kernel always returns pyarrow), and the unit tests
   import the backend at collection time — which crashes the
   default-deps job at ModuleNotFoundError.

   Fix: gate the three kernel unit tests on `pytest.importorskip(
   "pyarrow")` so they skip on default-deps and run on +PyArrow.
   Verified locally: 39 pass with pyarrow, 3 skipped without.
   No change to the backend module itself — nothing imports it
   until use_sea=True is invoked, and pyarrow is on the kernel
   wheel's runtime dep list so use_sea=True can't hit this either.

2. mypy: KernelDatabricksClient.open_session returns
   self._session_id, which mypy types as Optional[SessionId]
   because the field starts as None. Fix: bind the new id to a
   local non-Optional variable, assign to the field, return the
   local. CI's check-types runs cleanly on backend/kernel/ now;
   pre-existing mypy noise elsewhere isn't mine.

3. black --check: black 22.12.0 (the version CI pins) wants
   reformatting on result_set.py / type_mapping.py / client.py.
   Applied. Verified locally with the same black version.

All 39 kernel unit tests + 619 pre-existing unit tests pass.

Co-authored-by: Isaac
Signed-off-by: Vikrant Puppala &lt;vikrant.puppala@databricks.com&gt;
diff --git a/src/databricks/sql/backend/kernel/client.py b/src/databricks/sql/backend/kernel/client.py
@@ -214,11 +214,11 @@ def open_session(
 
         # Use the kernel's real server-issued session id, not a
         # synthetic UUID. Matches what the native SEA backend does.
-        self._session_id = SessionId.from_sea_session_id(
-            self._kernel_session.session_id
-        )
-        logger.info("Opened kernel-backed session %s", self._session_id)
-        return self._session_id
+        # Bind to a local first so mypy sees a non-Optional return.
+        session_id = SessionId.from_sea_session_id(self._kernel_session.session_id)
+        self._session_id = session_id
+        logger.info("Opened kernel-backed session %s", session_id)
+        return session_id
 
     def close_session(self, session_id: SessionId) -> None:
         if self._kernel_session is None:
@@ -229,7 +229,9 @@ def close_session(self, session_id: SessionId) -> None:
             try:
                 handle.close()
             except _kernel.KernelError as exc:
-                logger.warning("Error closing async handle during session close: %s", exc)
+                logger.warning(
+                    "Error closing async handle during session close: %s", exc
+                )
         self._async_handles.clear()
         try:
             self._kernel_session.close()
@@ -474,7 +476,9 @@ def get_columns(
             # Kernel's list_columns requires a catalog (SEA `SHOW
             # COLUMNS` cannot span catalogs). Surface the constraint
             # explicitly rather than letting the kernel error.
-            raise ProgrammingError("get_columns requires catalog_name on the kernel backend.")
+            raise ProgrammingError(
+                "get_columns requires catalog_name on the kernel backend."
+            )
         try:
             stream = self._kernel_session.metadata().list_columns(
                 catalog=catalog_name,
diff --git a/src/databricks/sql/backend/kernel/result_set.py b/src/databricks/sql/backend/kernel/result_set.py
@@ -144,7 +144,9 @@ def _drain(self) -> pyarrow.Table:
         chunks: List[pyarrow.RecordBatch] = []
         if self._buffer and self._buffer_offset > 0:
             head = self._buffer.popleft()
-            chunks.append(head.slice(self._buffer_offset, head.num_rows - self._buffer_offset))
+            chunks.append(
+                head.slice(self._buffer_offset, head.num_rows - self._buffer_offset)
+            )
             self._buffer_offset = 0
         while self._buffer:
             chunks.append(self._buffer.popleft())
diff --git a/src/databricks/sql/backend/kernel/type_mapping.py b/src/databricks/sql/backend/kernel/type_mapping.py
@@ -66,6 +66,14 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
     ADBC / Thrift result paths produce.
     """
     return [
-        (field.name, _arrow_type_to_dbapi_string(field.type), None, None, None, None, None)
+        (
+            field.name,
+            _arrow_type_to_dbapi_string(field.type),
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
         for field in schema
     ]
diff --git a/tests/unit/test_kernel_auth_bridge.py b/tests/unit/test_kernel_auth_bridge.py
@@ -15,6 +15,14 @@
 
 import pytest
 
+# The kernel backend's result_set + type_mapping modules transitively
+# import pyarrow; the connector's default-deps test job doesn't
+# install pyarrow, so importing the auth_bridge in that environment
+# would fail at module-collection time. Gate the whole module on
+# pyarrow availability — matches the convention the connector uses
+# for pyarrow-dependent tests.
+pytest.importorskip("pyarrow")
+
 from databricks.sql.auth.authenticators import (
     AccessTokenAuthProvider,
     AuthProvider,
diff --git a/tests/unit/test_kernel_result_set.py b/tests/unit/test_kernel_result_set.py
@@ -8,9 +8,13 @@
 from typing import Deque
 from unittest.mock import MagicMock
 
-import pyarrow as pa
 import pytest
 
+# pyarrow is an optional connector dep; the default-deps CI test
+# job runs without it. KernelResultSet imports pyarrow eagerly,
+# so the whole module must skip when pyarrow is unavailable.
+pa = pytest.importorskip("pyarrow")
+
 from databricks.sql.backend.kernel.result_set import KernelResultSet
 from databricks.sql.backend.types import CommandId, CommandState
 
diff --git a/tests/unit/test_kernel_type_mapping.py b/tests/unit/test_kernel_type_mapping.py
@@ -2,9 +2,14 @@
 
 from __future__ import annotations
 
-import pyarrow as pa
 import pytest
 
+# pyarrow is an optional connector dep; the default-deps CI test
+# job runs without it. The kernel backend itself imports pyarrow
+# at module load, so any test that touches the backend must skip
+# when pyarrow is unavailable.
+pa = pytest.importorskip("pyarrow")
+
 from databricks.sql.backend.kernel.type_mapping import (
     _arrow_type_to_dbapi_string,
     description_from_arrow_schema,