Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,10 @@ impl LogicalPlanBuilder {
let n_cols = values[0].len();
let mut fields = ValuesFields::new();
for j in 0..n_cols {
let field_type = schema.field(j).data_type();
let field_nullable = schema.field(j).is_nullable();
let field = schema.field(j);
let field_type = field.data_type();
let field_nullable = field.is_nullable();
let field_metadata = FieldMetadata::new_from_field(field);
for row in values.iter() {
let value = &row[j];
let data_type = value.get_type(schema)?;
Expand All @@ -288,7 +290,12 @@ impl LogicalPlanBuilder {
);
}
}
fields.push(field_type.to_owned(), field_nullable);
let metadata = if field_metadata.is_empty() {
None
} else {
Some(field_metadata)
};
fields.push_with_metadata(field_type.to_owned(), field_nullable, metadata);
}

Self::infer_inner(values, fields, schema)
Expand Down Expand Up @@ -1567,10 +1574,6 @@ impl ValuesFields {
Self::default()
}

pub fn push(&mut self, data_type: DataType, nullable: bool) {
self.push_with_metadata(data_type, nullable, None);
}

pub fn push_with_metadata(
&mut self,
data_type: DataType,
Expand Down
15 changes: 15 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,21 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

// If no type_planner can handle this type, use the default conversion
match sql_type {
// Canonical Arrow extension types
SQLDataType::Uuid => Ok(Arc::new(
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
HashMap::from([(
"ARROW:extension:name".to_string(),
"arrow.uuid".to_string(),
)]),
),
)),
SQLDataType::JSON | SQLDataType::JSONB => Ok(Arc::new(
Field::new("", DataType::Utf8, true).with_metadata(HashMap::from([(
"ARROW:extension:name".to_string(),
"arrow.json".to_string(),
)])),
)),
SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => {
// Arrays may be multi-dimensional.
Ok(self.convert_data_type_to_field(inner_sql_type)?.into_list())
Expand Down
30 changes: 23 additions & 7 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use crate::utils::normalize_ident;

use arrow::datatypes::{Field, FieldRef, Fields};
use datafusion_common::error::_plan_err;
use datafusion_common::metadata::FieldMetadata;
use datafusion_common::parsers::CompressionTypeVariant;
use datafusion_common::{
Column, Constraint, Constraints, DFSchema, DFSchemaRef, DataFusionError, Result,
Expand All @@ -44,15 +45,15 @@ use datafusion_expr::logical_plan::DdlStatement;
use datafusion_expr::logical_plan::builder::project;
use datafusion_expr::utils::expr_to_columns;
use datafusion_expr::{
Analyze, CreateCatalog, CreateCatalogSchema,
Analyze, Cast, CreateCatalog, CreateCatalogSchema,
CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody,
CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, Deallocate,
DescribeTable, DmlStatement, DropCatalogSchema, DropFunction, DropTable, DropView,
EmptyRelation, Execute, Explain, ExplainFormat, Expr, ExprSchemable, Filter,
LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare,
ResetVariable, SetVariable, SortExpr, Statement as PlanStatement, ToStringifiedPlan,
TransactionAccessMode, TransactionConclusion, TransactionEnd,
TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, cast, col,
TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, col,
};
use sqlparser::ast::{
self, BeginTransactionKind, CheckConstraint, ForeignKeyConstraint, IndexColumn,
Expand Down Expand Up @@ -530,11 +531,26 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
.iter()
.zip(input_fields)
.map(|(field, input_field)| {
cast(
col(input_field.name()),
field.data_type().clone(),
)
.alias(field.name())
let target_field = Arc::new(
Field::new(
field.name(),
field.data_type().clone(),
field.is_nullable(),
)
.with_metadata(field.metadata().clone()),
);
let metadata =
FieldMetadata::new_from_field(field.as_ref());
let alias_metadata = if metadata.is_empty() {
None
} else {
Some(metadata)
};
Expr::Cast(Cast::new_from_field(
Box::new(col(input_field.name())),
target_field,
))
.alias_with_metadata(field.name(), alias_metadata)
})
.collect::<Vec<_>>();

Expand Down
6 changes: 0 additions & 6 deletions datafusion/sql/tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,12 +343,6 @@ impl TypePlanner for CustomTypePlanner {
sql_type: &sqlparser::ast::DataType,
) -> Result<Option<FieldRef>> {
match sql_type {
sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
[("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
.into(),
),
))),
sqlparser::ast::DataType::Datetime(precision) => {
let precision = match precision {
Some(0) => TimeUnit::Second,
Expand Down
31 changes: 1 addition & 30 deletions datafusion/sqllogictest/src/test_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,14 @@ use arrow::array::{
LargeStringArray, StringArray, TimestampNanosecondArray, UnionArray,
};
use arrow::buffer::ScalarBuffer;
use arrow::datatypes::{
DataType, Field, FieldRef, Schema, SchemaRef, TimeUnit, UnionFields,
};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit, UnionFields};
use arrow::record_batch::RecordBatch;
use datafusion::catalog::{
CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, Session,
};
use datafusion::common::{DataFusionError, Result, not_impl_err};
use datafusion::functions::math::abs;
use datafusion::logical_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl};
use datafusion::logical_expr::planner::TypePlanner;
use datafusion::logical_expr::{
ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
Volatility, create_udf,
Expand All @@ -56,7 +53,6 @@ use datafusion::common::cast::as_float64_array;
use datafusion::execution::SessionStateBuilder;
use datafusion::execution::runtime_env::RuntimeEnv;
use log::info;
use sqlparser::ast;
use tempfile::TempDir;

/// Context for running tests
Expand All @@ -67,23 +63,6 @@ pub struct TestContext {
test_dir: Option<TempDir>,
}

#[derive(Debug)]
struct SqlLogicTestTypePlanner;

impl TypePlanner for SqlLogicTestTypePlanner {
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
match sql_type {
ast::DataType::Uuid => Ok(Some(Arc::new(
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
[("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
.into(),
),
))),
_ => Ok(None),
}
}
}

impl TestContext {
pub fn new(ctx: SessionContext) -> Self {
Self {
Expand Down Expand Up @@ -112,14 +91,6 @@ impl TestContext {
state_builder = state_builder.with_spark_features();
}

if matches!(
relative_path.file_name().and_then(|name| name.to_str()),
Some("cast_extension_type_metadata.slt")
) {
state_builder =
state_builder.with_type_planner(Arc::new(SqlLogicTestTypePlanner));
}

let state = state_builder.build();

let mut test_ctx = TestContext::new(SessionContext::new_with_state(state));
Expand Down
137 changes: 137 additions & 0 deletions datafusion/sqllogictest/test_files/sql_extension_types.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Tests for built-in mapping of SQL types to Arrow canonical extension types.
# See: https://arrow.apache.org/docs/format/CanonicalExtensions.html

#######
# UUID Extension Type
#######

# CAST to UUID preserves extension metadata
query ?T
SELECT CAST(arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') AS UUID),
arrow_metadata(CAST(arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') AS UUID), 'ARROW:extension:name');
----
00010203040506070809000102030506 arrow.uuid

# CREATE TABLE with UUID column preserves extension metadata through VALUES
statement ok
CREATE TABLE test_uuid (
value UUID
) AS VALUES
(NULL);

query TTT
DESCRIBE test_uuid;
----
value FixedSizeBinary(16) YES

query T
SELECT arrow_metadata(value, 'ARROW:extension:name') FROM test_uuid;
----
arrow.uuid

statement ok
DROP TABLE test_uuid;

# UUID column with non-null binary value
statement ok
CREATE TABLE test_uuid2 (
id UUID
) AS VALUES
(CAST(arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') AS UUID));

query T
SELECT arrow_metadata(id, 'ARROW:extension:name') FROM test_uuid2;
----
arrow.uuid

statement ok
DROP TABLE test_uuid2;

#######
# JSON Extension Type
#######

# CAST to JSON preserves extension metadata
query TT
SELECT CAST('{"a": 1}' AS JSON),
arrow_metadata(CAST('{"a": 1}' AS JSON), 'ARROW:extension:name');
----
{"a": 1} arrow.json

# CREATE TABLE with JSON column preserves extension metadata
statement ok
CREATE TABLE test_json (
data JSON
) AS VALUES
(NULL);

query TTT
DESCRIBE test_json;
----
data Utf8 YES

query T
SELECT arrow_metadata(data, 'ARROW:extension:name') FROM test_json;
----
arrow.json

statement ok
DROP TABLE test_json;

# JSON column with non-null value
statement ok
CREATE TABLE test_json2 (
data JSON
) AS VALUES
(CAST('{"hello": "world"}' AS JSON));

query T
SELECT arrow_metadata(data, 'ARROW:extension:name') FROM test_json2;
----
arrow.json

statement ok
DROP TABLE test_json2;

#######
# JSONB Extension Type (maps to arrow.json)
#######

# JSONB is treated as arrow.json
query TT
SELECT CAST('{"key": "value"}' AS JSONB),
arrow_metadata(CAST('{"key": "value"}' AS JSONB), 'ARROW:extension:name');
----
{"key": "value"} arrow.json

# CREATE TABLE with JSONB column preserves extension metadata
statement ok
CREATE TABLE test_jsonb (
data JSONB
) AS VALUES
(NULL);

query T
SELECT arrow_metadata(data, 'ARROW:extension:name') FROM test_jsonb;
----
arrow.json

statement ok
DROP TABLE test_jsonb;