Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
62487d3
feat(backend/kernel): route use_sea=True through the Rust kernel
vikrantpuppala May 14, 2026
90d1de9
refactor(backend/kernel): PAT-only auth, drop External trampoline
vikrantpuppala May 14, 2026
4b07e4c
test(e2e): live kernel-backend (use_sea=True) suite
vikrantpuppala May 14, 2026
aab4fa8
fix(backend/kernel): defer databricks-sql-kernel poetry dep declaration
vikrantpuppala May 14, 2026
8566325
fix(backend/kernel): unit tests skip without pyarrow, mypy + black
vikrantpuppala May 14, 2026
58afea4
fix(backend/kernel): make package importable without the kernel wheel
vikrantpuppala May 14, 2026
c5a5162
test(e2e): skip use_sea=True parametrized cases when kernel wheel mis…
vikrantpuppala May 14, 2026
9224325
refactor(backend/kernel): address review feedback — mechanical fixes
vikrantpuppala May 15, 2026
ea1ba45
feat(backend/kernel): introduce dedicated use_kernel flag + substanti…
vikrantpuppala May 15, 2026
5a6f1f0
fix(backend/kernel): CI-greening — mypy + e2e module skip
vikrantpuppala May 18, 2026
da2bc44
fix(backend/kernel): address gopalldb minor review comments (m1, m4)
vikrantpuppala May 18, 2026
1d0f7b6
fix(backend/kernel): substantive review fixes — M1, M2, M3, m2, m3
vikrantpuppala May 18, 2026
089e271
refactor(backend/kernel): replace kernel_call context manager with ex…
vikrantpuppala May 18, 2026
14357c5
style(backend/kernel): black format client.py
vikrantpuppala May 18, 2026
0e1a250
fix(backend/kernel): address gopalldb's P1 review comments
vikrantpuppala May 18, 2026
a05781f
fix(backend/kernel): address gopalldb's follow-up P1/P2 review comments
vikrantpuppala May 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ requests-kerberos = {version = "^0.15.0", optional = true}

[tool.poetry.extras]
pyarrow = ["pyarrow"]
# `[kernel]` extra is intentionally not declared here yet.
# `databricks-sql-kernel` is built from the databricks-sql-kernel
# repo and not yet published to PyPI; declaring it as a poetry dep
# breaks `poetry lock` for every CI job. Once the wheel is on PyPI
# the extra will be added back here:
#
# databricks-sql-kernel = {version = "^0.1.0", optional = true}
# [tool.poetry.extras]
# kernel = ["databricks-sql-kernel"]
#
# Until then, the wheel is not on PyPI and the only supported
# install path is local dev:
# cd databricks-sql-kernel/pyo3 && maturin develop --release
# (into the same venv as databricks-sql-connector).

[tool.poetry.group.dev.dependencies]
pytest = "^7.1.2"
Expand Down
25 changes: 25 additions & 0 deletions src/databricks/sql/backend/kernel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Backend that delegates to the Databricks SQL Kernel (Rust) via PyO3.

Routed when ``use_kernel=True`` is passed to ``databricks.sql.connect``.
The module's identity is "delegates to the kernel" — not the wire
protocol the kernel happens to use today (SEA REST). The kernel may
switch its default transport (SEA REST → SEA gRPC → …) without
renaming this module.

This ``__init__`` deliberately does **not** re-export
``KernelDatabricksClient`` from ``.client``. Importing ``.client``
loads the ``databricks_sql_kernel`` PyO3 extension at module-import
time; doing that eagerly here would make ``import
databricks.sql.backend.kernel.type_mapping`` (used by tests / by
``KernelResultSet`` consumers) require the kernel wheel even when
the caller never plans to open a kernel-backed session. Callers
that need the client import it directly:

from databricks.sql.backend.kernel.client import KernelDatabricksClient

``session.py::_create_backend`` already does this lazy import under
the ``use_kernel=True`` branch.

See ``docs/designs/pysql-kernel-integration.md`` in
``databricks-sql-kernel`` for the full integration design.
"""
129 changes: 129 additions & 0 deletions src/databricks/sql/backend/kernel/_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Shared error-mapping primitives for the kernel backend.

The PyO3 boundary can produce two flavours of exception:

- ``databricks_sql_kernel.KernelError`` — the kernel's own
structured error type. Carries ``code`` / ``message`` /
``sql_state`` / ``query_id`` / ``http_status`` / ``retryable`` /
``vendor_code`` / ``error_code`` as attributes; mapped to a PEP
249 exception class via ``_CODE_TO_EXCEPTION`` with the
attributes forwarded onto the re-raised exception so callers can
branch on ``err.code`` / ``err.sql_state`` without reaching
through ``__cause__``.
- Anything else — ``TypeError`` / ``OverflowError`` /
``ValueError`` from PyO3 argument conversion, or arbitrary
extension-internal Python errors. These would otherwise propagate
raw to connector callers, breaking the DB-API contract that says
"only PEP 249 exception types cross the boundary". Wrapped in
``OperationalError`` here.

These primitives live in their own module so both ``client.py``
(which orchestrates PyO3 calls) and ``result_set.py`` (which calls
``fetch_next_batch`` on the same kernel handles) can share them
without ``result_set.py`` importing from ``client.py``.

Usage at every PyO3 call site is a plain try/except:

try:
stmt.execute()
except Exception as exc:
raise wrap_kernel_exception("execute_command", exc) from exc

The helper returns the mapped exception; callers raise it. Plain
``try/except`` is preferred over a context manager: the control
flow is visible at the call site, the helper is a pure function
(trivial to test), and tracebacks don't carry an extra
``__exit__`` frame.
"""

from __future__ import annotations

from databricks.sql.exc import (
DatabaseError,
Error,
OperationalError,
ProgrammingError,
)


try:
import databricks_sql_kernel as _kernel # type: ignore[import-not-found]
except ImportError as exc: # pragma: no cover - same hint as client.py
raise ImportError(
"use_kernel=True requires the databricks-sql-kernel extension, which "
"is not yet published on PyPI. Build and install it locally from the "
"databricks-sql-kernel repo:\n"
" cd databricks-sql-kernel/pyo3 && maturin develop --release\n"
"(into the same venv as databricks-sql-connector)."
) from exc


# Map a kernel `code` slug to the PEP 249 exception class that best
# captures it. The match isn't a perfect 1:1 — PEP 249 has a
# narrower taxonomy than the kernel — so several kernel codes
# collapse onto the same Python exception. This table is the only
# place that mapping lives.
_CODE_TO_EXCEPTION = {
"InvalidArgument": ProgrammingError,
"Unauthenticated": OperationalError,
"PermissionDenied": OperationalError,
"NotFound": ProgrammingError,
"ResourceExhausted": OperationalError,
"Unavailable": OperationalError,
"Timeout": OperationalError,
"Cancelled": OperationalError,
"DataLoss": DatabaseError,
"Internal": DatabaseError,
"InvalidStatementHandle": ProgrammingError,
"NetworkError": OperationalError,
"SqlError": DatabaseError,
"Unknown": DatabaseError,
}


def reraise_kernel_error(exc: "_kernel.KernelError") -> "Error":
"""Convert a ``databricks_sql_kernel.KernelError`` to a PEP 249
exception with the kernel's structured attributes forwarded onto
the new instance.

The returned exception is raised by callers with ``raise ... from
exc``; the ``from`` clause is what sets ``__cause__``, so we don't
touch it here.
"""
code = getattr(exc, "code", "Unknown")
cls = _CODE_TO_EXCEPTION.get(code, DatabaseError)
new = cls(getattr(exc, "message", str(exc)))
for attr in (
"code",
"sql_state",
"error_code",
"vendor_code",
"http_status",
"retryable",
"query_id",
):
setattr(new, attr, getattr(exc, attr, None))
return new


def wrap_kernel_exception(what: str, exc: BaseException) -> "Error":
"""Map any exception from a PyO3 call site to a PEP 249 exception.

- ``KernelError`` → mapped class with structured attrs forwarded.
- Already-PEP-249 ``Error`` (e.g. raised by an inner caller that
already mapped) → passed through unchanged.
- Anything else (``TypeError`` / ``ValueError`` / etc. from PyO3
argument conversion, extension-internal errors) → wrapped in
``OperationalError``.

Returned, not raised — the caller decides whether to ``raise``
or ``raise ... from exc``. ``what`` is a short tag (the calling
method name) used only in the ``OperationalError`` message.
"""
if isinstance(exc, _kernel.KernelError):
return reraise_kernel_error(exc)
if isinstance(exc, Error):
return exc
return OperationalError(
f"Unexpected error from databricks_sql_kernel during {what}: {exc!r}"
)
118 changes: 118 additions & 0 deletions src/databricks/sql/backend/kernel/auth_bridge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Translate the connector's ``AuthProvider`` into ``databricks_sql_kernel``
``Session`` auth kwargs.

This phase ships PAT only. The kernel-side PyO3 binding accepts
``auth_type='pat'``; OAuth / federation / custom credentials
providers are reserved but not yet wired in either layer. Non-PAT
auth raises ``NotSupportedError`` from this bridge so the failure
surfaces at session-open time with a clear message rather than
deep inside the kernel.

Token extraction goes through ``AuthProvider.add_headers({})``
rather than touching auth-provider-specific attributes, so the
bridge works uniformly for every PAT shape — including
``AccessTokenAuthProvider`` wrapped in ``TokenFederationProvider``
(which ``get_python_sql_connector_auth_provider`` does for every
provider it builds).
"""

from __future__ import annotations

import logging
import re
from typing import Any, Dict, Optional

from databricks.sql.auth.authenticators import AccessTokenAuthProvider, AuthProvider
from databricks.sql.auth.token_federation import TokenFederationProvider
from databricks.sql.exc import NotSupportedError, ProgrammingError

logger = logging.getLogger(__name__)


# RFC 6750 §2.1 defines the Authorization scheme as case-insensitive.
# The connector's auth providers all emit ``Bearer `` exactly today,
# but we match leniently in case a federation proxy or future provider
# normalises the casing differently — failing closed here would surface
# as a confusing ``ProgrammingError`` from the bridge.
_BEARER_PREFIX_LEN = len("Bearer ")

# Defense-in-depth: reject tokens containing ASCII control characters
# or whitespace. CR/LF/NUL in a token would let a misbehaving HTTP
# stack split or terminate the Authorization header line, opening a
# header-injection sink. Space (0x20) is included so leading-/
# embedded-whitespace tokens (e.g. ``"Bearer doubled-space-token"``,
# tab-prefixed token) get rejected too — RFC 6750 §2.1 forbids
# whitespace within the credential token itself.
_TOKEN_REJECT_RE = re.compile(r"[\x00-\x20\x7f]")


def _is_pat(auth_provider: AuthProvider) -> bool:
"""Return True iff this provider ultimately wraps an
``AccessTokenAuthProvider``.

``get_python_sql_connector_auth_provider`` always wraps the
base provider in a ``TokenFederationProvider``, so an
``isinstance`` check against ``AccessTokenAuthProvider`` alone
never matches in practice. We peek through the federation
wrapper to find the real type.
"""
if isinstance(auth_provider, AccessTokenAuthProvider):
return True
if isinstance(auth_provider, TokenFederationProvider) and isinstance(
auth_provider.external_provider, AccessTokenAuthProvider
):
return True
return False


def _extract_bearer_token(auth_provider: AuthProvider) -> Optional[str]:
"""Pull the current bearer token out of an ``AuthProvider``.

The connector's ``AuthProvider.add_headers`` mutates a header
dict and writes the ``Authorization: Bearer <token>`` value.
Going through that public surface keeps us insulated from
provider-specific internals.

Returns ``None`` if the provider did not write an Authorization
header or wrote a non-Bearer scheme — neither is representable
in the kernel's PAT auth surface.
"""
headers: Dict[str, str] = {}
auth_provider.add_headers(headers)
auth = headers.get("Authorization")
if not auth:
return None
if not auth[:_BEARER_PREFIX_LEN].lower() == "bearer ":
return None
token = auth[_BEARER_PREFIX_LEN:]
if _TOKEN_REJECT_RE.search(token):
raise ProgrammingError(
"Bearer token contains ASCII control characters or whitespace; "
"refusing to forward it to the kernel auth bridge."
)
return token


def kernel_auth_kwargs(auth_provider: AuthProvider) -> Dict[str, Any]:
"""Build the kwargs passed to ``databricks_sql_kernel.Session(...)``.

PAT (including ``TokenFederationProvider``-wrapped PAT) routes
through the kernel's PAT path. Anything else raises
``NotSupportedError`` — the kernel binding doesn't accept OAuth
today, and routing OAuth through PAT would silently break
token refresh during long-running sessions.
"""
if _is_pat(auth_provider):
token = _extract_bearer_token(auth_provider)
if not token:
raise ProgrammingError(
"PAT auth provider did not produce a Bearer Authorization "
"header; cannot route through the kernel's PAT path"
)
return {"auth_type": "pat", "access_token": token}

raise NotSupportedError(
f"The kernel backend (use_kernel=True) currently only supports PAT auth, "
f"but got {type(auth_provider).__name__}. Use the Thrift backend "
"(default) for OAuth / federation / custom credential providers."
)
Loading
Loading