Skip to content

Commit 823c416

Browse files
fix(backend/kernel): unit tests skip without pyarrow, mypy + black
Three CI failures after the poetry-lock fix uncovered three real issues: 1. pyarrow is optional in the connector. The default-deps CI test job installs without it; the +PyArrow job installs with. The kernel backend's result_set.py + type_mapping.py import pyarrow eagerly (the kernel always returns pyarrow), and the unit tests import the backend at collection time — which crashes the default-deps job at ModuleNotFoundError. Fix: gate the three kernel unit tests on `pytest.importorskip( "pyarrow")` so they skip on default-deps and run on +PyArrow. Verified locally: 39 pass with pyarrow, 3 skipped without. No change to the backend module itself — nothing imports it until use_sea=True is invoked, and pyarrow is on the kernel wheel's runtime dep list so use_sea=True can't hit this either. 2. mypy: KernelDatabricksClient.open_session returns self._session_id, which mypy types as Optional[SessionId] because the field starts as None. Fix: bind the new id to a local non-Optional variable, assign to the field, return the local. CI's check-types runs cleanly on backend/kernel/ now; pre-existing mypy noise elsewhere isn't mine. 3. black --check: black 22.12.0 (the version CI pins) wants reformatting on result_set.py / type_mapping.py / client.py. Applied. Verified locally with the same black version. All 39 kernel unit tests + 619 pre-existing unit tests pass. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
1 parent 31ca581 commit 823c416

6 files changed

Lines changed: 42 additions & 11 deletions

File tree

src/databricks/sql/backend/kernel/client.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -214,11 +214,11 @@ def open_session(
214214

215215
# Use the kernel's real server-issued session id, not a
216216
# synthetic UUID. Matches what the native SEA backend does.
217-
self._session_id = SessionId.from_sea_session_id(
218-
self._kernel_session.session_id
219-
)
220-
logger.info("Opened kernel-backed session %s", self._session_id)
221-
return self._session_id
217+
# Bind to a local first so mypy sees a non-Optional return.
218+
session_id = SessionId.from_sea_session_id(self._kernel_session.session_id)
219+
self._session_id = session_id
220+
logger.info("Opened kernel-backed session %s", session_id)
221+
return session_id
222222

223223
def close_session(self, session_id: SessionId) -> None:
224224
if self._kernel_session is None:
@@ -229,7 +229,9 @@ def close_session(self, session_id: SessionId) -> None:
229229
try:
230230
handle.close()
231231
except _kernel.KernelError as exc:
232-
logger.warning("Error closing async handle during session close: %s", exc)
232+
logger.warning(
233+
"Error closing async handle during session close: %s", exc
234+
)
233235
self._async_handles.clear()
234236
try:
235237
self._kernel_session.close()
@@ -474,7 +476,9 @@ def get_columns(
474476
# Kernel's list_columns requires a catalog (SEA `SHOW
475477
# COLUMNS` cannot span catalogs). Surface the constraint
476478
# explicitly rather than letting the kernel error.
477-
raise ProgrammingError("get_columns requires catalog_name on the kernel backend.")
479+
raise ProgrammingError(
480+
"get_columns requires catalog_name on the kernel backend."
481+
)
478482
try:
479483
stream = self._kernel_session.metadata().list_columns(
480484
catalog=catalog_name,

src/databricks/sql/backend/kernel/result_set.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ def _drain(self) -> pyarrow.Table:
144144
chunks: List[pyarrow.RecordBatch] = []
145145
if self._buffer and self._buffer_offset > 0:
146146
head = self._buffer.popleft()
147-
chunks.append(head.slice(self._buffer_offset, head.num_rows - self._buffer_offset))
147+
chunks.append(
148+
head.slice(self._buffer_offset, head.num_rows - self._buffer_offset)
149+
)
148150
self._buffer_offset = 0
149151
while self._buffer:
150152
chunks.append(self._buffer.popleft())

src/databricks/sql/backend/kernel/type_mapping.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
6666
ADBC / Thrift result paths produce.
6767
"""
6868
return [
69-
(field.name, _arrow_type_to_dbapi_string(field.type), None, None, None, None, None)
69+
(
70+
field.name,
71+
_arrow_type_to_dbapi_string(field.type),
72+
None,
73+
None,
74+
None,
75+
None,
76+
None,
77+
)
7078
for field in schema
7179
]

tests/unit/test_kernel_auth_bridge.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@
1515

1616
import pytest
1717

18+
# The kernel backend's result_set + type_mapping modules transitively
19+
# import pyarrow; the connector's default-deps test job doesn't
20+
# install pyarrow, so importing the auth_bridge in that environment
21+
# would fail at module-collection time. Gate the whole module on
22+
# pyarrow availability — matches the convention the connector uses
23+
# for pyarrow-dependent tests.
24+
pytest.importorskip("pyarrow")
25+
1826
from databricks.sql.auth.authenticators import (
1927
AccessTokenAuthProvider,
2028
AuthProvider,

tests/unit/test_kernel_result_set.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@
88
from typing import Deque
99
from unittest.mock import MagicMock
1010

11-
import pyarrow as pa
1211
import pytest
1312

13+
# pyarrow is an optional connector dep; the default-deps CI test
14+
# job runs without it. KernelResultSet imports pyarrow eagerly,
15+
# so the whole module must skip when pyarrow is unavailable.
16+
pa = pytest.importorskip("pyarrow")
17+
1418
from databricks.sql.backend.kernel.result_set import KernelResultSet
1519
from databricks.sql.backend.types import CommandId, CommandState
1620

tests/unit/test_kernel_type_mapping.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22

33
from __future__ import annotations
44

5-
import pyarrow as pa
65
import pytest
76

7+
# pyarrow is an optional connector dep; the default-deps CI test
8+
# job runs without it. The kernel backend itself imports pyarrow
9+
# at module load, so any test that touches the backend must skip
10+
# when pyarrow is unavailable.
11+
pa = pytest.importorskip("pyarrow")
12+
813
from databricks.sql.backend.kernel.type_mapping import (
914
_arrow_type_to_dbapi_string,
1015
description_from_arrow_schema,

0 commit comments

Comments
 (0)