feat(backend/kernel): wire TSparkParameter through to kernel bind_param

vikrantpuppala · vikrantpuppala · commit aa8bd2470dbb · 2026-05-15T09:21:01.000Z
Lifts the NotSupportedError that execute_command currently raises
for parametrized queries. The kernel-side PyO3 binding for
Statement.bind_param landed in databricks-sql-kernel#18; this
commit wires the connector's TSparkParameter shape through to it.

Implementation:

- New `bind_tspark_params(kernel_stmt, parameters)` in
  type_mapping.py forwards each TSparkParameter to the kernel as
  `(ordinal, value.stringValue, type)`. ordinal is the 1-based
  position in the parameters list; the connector's `ordinal: bool`
  flag is checked only to reject named bindings (kernel v0 doesn't
  accept them on the wire).
- execute_command no longer raises on `parameters=[...]`. The
  query_tags branch stays — that's a separate gap.

Tests:

- 6 new unit tests in tests/unit/test_kernel_type_mapping.py for
  the mapper:
  - positional forwarding preserves ordering and (ordinal, value, type)
  - None value forwards as SQL NULL
  - VOID passes through verbatim (kernel parser ignores value for VOID)
  - named bindings raise NotSupportedError with a pointed message
  - missing TSparkParameter.type defaults to STRING (defensive)
  - empty parameters list is a no-op
- 3 new e2e tests in tests/e2e/test_kernel_backend.py against
  dogfood:
  - mixed-type round-trip (INT, STRING, BOOLEAN) via the
    connector's native IntegerParameter/StringParameter/BooleanParameter
  - None parameter (VoidParameter → SQL NULL)
  - DECIMAL parameter with precision/scale carried in the SQL type
    string (auto-inferred — explicit-arg path has a pre-existing
    bug in native.py where format-args are swapped)

Unit pass: 45/45 (was 39). Full unit suite: 625 pass.
Live e2e: 14/14 (was 11).

Co-authored-by: Isaac
Signed-off-by: Vikrant Puppala &lt;vikrant.puppala@databricks.com&gt;
diff --git a/src/databricks/sql/backend/kernel/client.py b/src/databricks/sql/backend/kernel/client.py
@@ -14,10 +14,6 @@
 
 Phase 1 gaps documented in the integration design:
 
-- Parameter binding (``parameters=[TSparkParameter, ...]``) is not
-  yet supported — the PyO3 ``Statement`` doesn't expose
-  ``bind_param``. ``execute_command(parameters=[...])`` raises
-  ``NotSupportedError``.
 - ``query_tags`` on execute is not supported (kernel exposes
   ``statement_conf`` but PyO3 doesn't surface it).
 - ``get_tables`` with a non-empty ``table_types`` filter applies
@@ -262,11 +258,6 @@ def execute_command(
     ) -> Union["ResultSet", None]:
         if self._kernel_session is None:
             raise InterfaceError("Cannot execute_command without an open session.")
-        if parameters:
-            raise NotSupportedError(
-                "Parameter binding is not yet supported on the kernel backend "
-                "(PyO3 Statement.bind_param lands in a follow-up PR)."
-            )
         if query_tags:
             raise NotSupportedError(
                 "Statement-level query_tags are not yet supported on the kernel backend."
@@ -275,6 +266,13 @@ def execute_command(
         stmt = self._kernel_session.statement()
         try:
             stmt.set_sql(operation)
+            if parameters:
+                # Lazy import — type_mapping touches pyarrow at
+                # module load; keep `execute_command` callable from
+                # contexts that don't yet need it.
+                from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+
+                bind_tspark_params(stmt, parameters)
             if async_op:
                 async_exec = stmt.submit()
                 command_id = CommandId.from_sea_statement_id(async_exec.statement_id)
diff --git a/src/databricks/sql/backend/kernel/type_mapping.py b/src/databricks/sql/backend/kernel/type_mapping.py
@@ -6,18 +6,20 @@
 flattens the conversion so ``KernelResultSet`` and any future
 kernel-result wrapper share the same mapping.
 
-Parameter binding (``TSparkParameter`` → kernel ``TypedValue``) is
-not yet implemented — the PyO3 ``Statement`` doesn't expose a
-``bind_param`` method on this branch. It'll land in a follow-up
-once that PyO3 surface ships.
+Parameter binding (``TSparkParameter`` → kernel
+``Statement.bind_param``) is handled by ``bind_tspark_params`` —
+forwards the connector's already-string-encoded form to the kernel
+binding without an intermediate Python-typed round-trip.
 """
 
 from __future__ import annotations
 
-from typing import List, Tuple
+from typing import Any, List, Tuple
 
 import pyarrow
 
+from databricks.sql.thrift_api.TCLIService import ttypes
+
 
 def _arrow_type_to_dbapi_string(arrow_type: pyarrow.DataType) -> str:
     """Map a pyarrow type to the Databricks SQL type name used in
@@ -77,3 +79,57 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
         )
         for field in schema
     ]
+
+
+def _tspark_param_value_str(param: ttypes.TSparkParameter) -> Any:
+    """Extract the string-encoded value from a ``TSparkParameter``,
+    or ``None`` for SQL NULL.
+
+    Native parameters (``IntegerParameter`` etc.) always wrap their
+    value in ``TSparkParameterValue(stringValue=str(self.value))``;
+    ``VoidParameter`` sets ``stringValue="None"`` but the type is
+    ``"VOID"`` — the kernel-side parser ignores the value when the
+    type is VOID, so we don't have to special-case here.
+    """
+    if param.value is None:
+        return None
+    return param.value.stringValue
+
+
+def bind_tspark_params(
+    kernel_stmt, parameters: List[ttypes.TSparkParameter]
+) -> None:
+    """Bind a list of ``TSparkParameter`` onto a kernel ``Statement``.
+
+    The kernel expects positional bindings only (SEA v0 doesn't
+    accept named bindings on the wire). The connector's
+    ``TSparkParameter`` has an ``ordinal: bool`` flag; ``True`` means
+    "treat as positional in source-list order". Native bindings
+    almost always come through positional today; named-binding
+    parameters surface as ``NotSupportedError`` so the user gets a
+    clear message instead of a server-side rejection.
+
+    Compound types (``ARRAY`` / ``MAP`` / ``STRUCT``) are routed
+    through the kernel parser which currently rejects them — same
+    user-visible message ("compound parameter types … are not yet
+    supported"). Tracked as a follow-up.
+    """
+    for i, param in enumerate(parameters, start=1):
+        # The connector's `ordinal` field is a bool (True/False) on
+        # native params and indicates positional vs named. Named
+        # params can't flow through the kernel today; raise early
+        # rather than letting the server reject.
+        if getattr(param, "ordinal", None) is False and getattr(param, "name", None):
+            from databricks.sql.exc import NotSupportedError
+
+            raise NotSupportedError(
+                f"Named parameter binding (got name={param.name!r}) is not yet "
+                "supported on the kernel backend; pass parameters positionally."
+            )
+
+        sql_type = param.type or "STRING"
+        value_str = _tspark_param_value_str(param)
+        # The kernel takes 1-based ordinals; `i` is already that.
+        # Errors from the kernel side (bad literal, unsupported type,
+        # etc.) come up as KernelError and bubble through normally.
+        kernel_stmt.bind_param(i, value_str, sql_type)
diff --git a/tests/e2e/test_kernel_backend.py b/tests/e2e/test_kernel_backend.py
@@ -184,3 +184,69 @@ def test_bad_sql_surfaces_as_databaseerror(conn):
         # Structured fields copied off the kernel exception:
         assert getattr(err, "code", None) == "SqlError"
         assert getattr(err, "sql_state", None) == "42P01"
+
+
+# ── Parameter binding ─────────────────────────────────────────────
+
+
+def test_parameterized_query_round_trips(conn):
+    """Positional parameter binding via the kernel backend. The
+    connector's native parameter classes (IntegerParameter etc.)
+    serialize to TSparkParameter under the hood; the kernel
+    backend's mapper forwards them positionally to the kernel.
+    """
+    from databricks.sql.parameters.native import (
+        IntegerParameter,
+        StringParameter,
+        BooleanParameter,
+    )
+
+    with conn.cursor() as cur:
+        cur.execute(
+            "SELECT ? AS i, ? AS s, ? AS b",
+            [
+                IntegerParameter(42),
+                StringParameter("alice"),
+                BooleanParameter(True),
+            ],
+        )
+        rows = cur.fetchall()
+        assert len(rows) == 1
+        assert rows[0][0] == 42
+        assert rows[0][1] == "alice"
+        assert rows[0][2] is True
+
+
+def test_parameterized_query_with_null(conn):
+    """`None` in the parameter list flows through as VoidParameter
+    → kernel TypedValue::Null."""
+    with conn.cursor() as cur:
+        cur.execute("SELECT ? IS NULL AS is_null", [None])
+        rows = cur.fetchall()
+        assert rows[0][0] is True
+
+
+def test_parameterized_query_decimal(conn):
+    """DECIMAL parameters carry precision/scale in the SQL type
+    string ('DECIMAL(p,s)') — the kernel parser extracts them so
+    fractional digits survive the wire.
+
+    Uses the connector's auto-inference path
+    (`calculate_decimal_cast_string`) to derive precision/scale
+    from the value; the explicit-arg path
+    (`DecimalParameter(v, scale=, precision=)`) has a pre-existing
+    bug in this branch where the format-args are passed
+    `(scale, precision)` instead of `(precision, scale)` — out of
+    scope for this PR.
+    """
+    import decimal
+    from databricks.sql.parameters.native import DecimalParameter
+
+    with conn.cursor() as cur:
+        cur.execute(
+            "SELECT ? AS d",
+            [DecimalParameter(decimal.Decimal("-123.45"))],
+        )
+        rows = cur.fetchall()
+        # Server echoes back as decimal.Decimal.
+        assert str(rows[0][0]) == "-123.45"
diff --git a/tests/unit/test_kernel_type_mapping.py b/tests/unit/test_kernel_type_mapping.py
@@ -71,3 +71,102 @@ def test_description_from_schema_preserves_field_names_and_order():
     for d in desc:
         assert len(d) == 7
         assert d[2:] == (None, None, None, None, None)
+
+
+# ─── bind_tspark_params ──────────────────────────────────────────────────
+
+
+def _mk_param(*, type, value, ordinal=True, name=None):
+    """Build a minimal TSparkParameter for tests."""
+    from databricks.sql.thrift_api.TCLIService import ttypes
+
+    p = ttypes.TSparkParameter(ordinal=ordinal, name=name, type=type)
+    p.value = ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
+    return p
+
+
+class _RecordingStmt:
+    """Stand-in for the kernel `Statement` pyclass — records every
+    `bind_param` call so tests can assert the (ordinal, value, type)
+    triples the mapper forwarded."""
+
+    def __init__(self):
+        self.calls = []
+
+    def bind_param(self, ordinal, value_str, sql_type):
+        self.calls.append((ordinal, value_str, sql_type))
+
+
+def test_bind_tspark_params_forwards_each_param_positionally():
+    from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+
+    params = [
+        _mk_param(type="INT", value="42"),
+        _mk_param(type="STRING", value="alice"),
+        _mk_param(type="DATE", value="2026-05-15"),
+    ]
+    stmt = _RecordingStmt()
+    bind_tspark_params(stmt, params)
+    assert stmt.calls == [
+        (1, "42", "INT"),
+        (2, "alice", "STRING"),
+        (3, "2026-05-15", "DATE"),
+    ]
+
+
+def test_bind_tspark_params_null_value():
+    """TSparkParameter with value=None → kernel sees value_str=None,
+    interpreted as SQL NULL regardless of the SQL type."""
+    from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+
+    p = _mk_param(type="STRING", value=None)
+    stmt = _RecordingStmt()
+    bind_tspark_params(stmt, [p])
+    assert stmt.calls == [(1, None, "STRING")]
+
+
+def test_bind_tspark_params_void_passes_through():
+    """VoidParameter sets type='VOID' with stringValue='None'; the
+    kernel parser ignores the value when type=VOID."""
+    from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+
+    p = _mk_param(type="VOID", value="None")
+    stmt = _RecordingStmt()
+    bind_tspark_params(stmt, [p])
+    assert stmt.calls == [(1, "None", "VOID")]
+
+
+def test_bind_tspark_params_named_param_rejected():
+    """The kernel doesn't accept named bindings on the SEA wire;
+    surface that at the connector layer so the user gets a pointed
+    error instead of a server-side rejection."""
+    from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+    from databricks.sql.exc import NotSupportedError
+
+    p = _mk_param(type="INT", value="42", ordinal=False, name="my_param")
+    stmt = _RecordingStmt()
+    with pytest.raises(NotSupportedError, match="(?i)named"):
+        bind_tspark_params(stmt, [p])
+    # Nothing should have been forwarded before the rejection.
+    assert stmt.calls == []
+
+
+def test_bind_tspark_params_missing_type_defaults_to_string():
+    """Defensive: a TSparkParameter with no `type` shouldn't crash
+    the mapper — fall back to STRING and let the kernel parse."""
+    from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+    from databricks.sql.thrift_api.TCLIService import ttypes
+
+    p = ttypes.TSparkParameter(ordinal=True, name=None, type=None)
+    p.value = ttypes.TSparkParameterValue(stringValue="hello")
+    stmt = _RecordingStmt()
+    bind_tspark_params(stmt, [p])
+    assert stmt.calls == [(1, "hello", "STRING")]
+
+
+def test_bind_tspark_params_empty_list_is_noop():
+    from databricks.sql.backend.kernel.type_mapping import bind_tspark_params
+
+    stmt = _RecordingStmt()
+    bind_tspark_params(stmt, [])
+    assert stmt.calls == []