Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3435,21 +3435,17 @@ mod tests {
}

#[tokio::test]
async fn in_list_types() -> Result<()> {
// expression: "a in ('a', 1)"
async fn in_list_types_mixed_string_int_error() -> Result<()> {
// expression: "c1 in ('a', 1)" where c1 is Utf8
let list = vec![lit("a"), lit(1i64)];
let logical_plan = test_csv_scan()
.await?
// filter clause needs the type coercion rule applied
.filter(col("c12").lt(lit(0.05)))?
.project(vec![col("c1").in_list(list, false)])?
.build()?;
let execution_plan = plan(&logical_plan).await?;
// verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.

let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: false }"#;
let e = plan(&logical_plan).await.unwrap_err().to_string();

assert_contains!(format!("{execution_plan:?}"), expected);
assert_contains!(&e, "Cannot cast string 'a' to value of Int64 type");

Ok(())
}
Expand Down
31 changes: 18 additions & 13 deletions datafusion/core/tests/expr_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,20 +342,25 @@ fn test_create_physical_expr_nvl2() {

#[tokio::test]
async fn test_create_physical_expr_coercion() {
// create_physical_expr does apply type coercion and unwrapping in cast
// create_physical_expr applies type coercion (and can unwrap/fold
// literal casts). Comparison coercion prefers numeric types, so
// string/int comparisons cast the string side to the numeric type.
//
// expect the cast on the literals
// compare string function to int `id = 1`
create_expr_test(col("id").eq(lit(1i32)), "id@0 = CAST(1 AS Utf8)");
create_expr_test(lit(1i32).eq(col("id")), "CAST(1 AS Utf8) = id@0");
// compare int col to string literal `i = '202410'`
// Note this casts the column (not the field)
create_expr_test(col("i").eq(lit("202410")), "CAST(i@1 AS Utf8) = 202410");
create_expr_test(lit("202410").eq(col("i")), "202410 = CAST(i@1 AS Utf8)");
// however, when simplified the casts on i should removed
// https://github.com/apache/datafusion/issues/14944
create_simplified_expr_test(col("i").eq(lit("202410")), "CAST(i@1 AS Utf8) = 202410");
create_simplified_expr_test(lit("202410").eq(col("i")), "CAST(i@1 AS Utf8) = 202410");
// string column vs int literal: id (Utf8) is cast to Int32
create_expr_test(col("id").eq(lit(1i32)), "CAST(id@0 AS Int32) = 1");
create_expr_test(lit(1i32).eq(col("id")), "1 = CAST(id@0 AS Int32)");
// int column vs string literal: the string literal is cast to Int64
create_expr_test(col("i").eq(lit("202410")), "i@1 = CAST(202410 AS Int64)");
create_expr_test(lit("202410").eq(col("i")), "CAST(202410 AS Int64) = i@1");
// when simplified, the literal cast is constant-folded
create_simplified_expr_test(
col("i").eq(lit("202410")),
"i@1 = CAST(202410 AS Int64)",
);
create_simplified_expr_test(
lit("202410").eq(col("i")),
"i@1 = CAST(202410 AS Int64)",
);
}

/// Evaluates the specified expr as an aggregate and compares the result to the
Expand Down
16 changes: 13 additions & 3 deletions datafusion/core/tests/sql/unparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,26 @@ fn tpch_queries() -> Vec<TestQuery> {
}

/// Create a new SessionContext for testing that has all Clickbench tables registered.
///
/// Registers the raw Parquet as `hits_raw`, then creates a `hits` view that
/// casts `EventDate` from UInt16 (day-offset) to DATE. This mirrors the
/// approach used by the benchmark runner in `benchmarks/src/clickbench.rs`.
async fn clickbench_test_context() -> Result<SessionContext> {
let ctx = SessionContext::new();
ctx.register_parquet(
"hits",
"hits_raw",
"tests/data/clickbench_hits_10.parquet",
ParquetReadOptions::default(),
)
.await?;
// Sanity check we found the table by querying it's schema, it should not be empty
// Otherwise if the path is wrong the tests will all fail in confusing ways
ctx.sql(
r#"CREATE VIEW hits AS
SELECT * EXCEPT ("EventDate"),
CAST(CAST("EventDate" AS INTEGER) AS DATE) AS "EventDate"
FROM hits_raw"#,
)
.await?;
// Sanity check we found the table by querying its schema
let df = ctx.sql("SELECT * FROM hits LIMIT 1").await?;
assert!(
!df.schema().fields().is_empty(),
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr-common/src/interval_arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::fmt::{self, Display, Formatter};
use std::ops::{AddAssign, SubAssign};

use crate::operator::Operator;
use crate::type_coercion::binary::{BinaryTypeCoercer, comparison_coercion_numeric};
use crate::type_coercion::binary::{BinaryTypeCoercer, comparison_coercion};

use arrow::compute::{CastOptions, cast_with_options};
use arrow::datatypes::{
Expand Down Expand Up @@ -734,7 +734,7 @@ impl Interval {
(self.lower.clone(), self.upper.clone(), rhs.clone())
} else {
let maybe_common_type =
comparison_coercion_numeric(&self.data_type(), &rhs.data_type());
comparison_coercion(&self.data_type(), &rhs.data_type());
assert_or_internal_err!(
maybe_common_type.is_some(),
"Data types must be compatible for containment checks, lhs:{}, rhs:{}",
Expand Down
21 changes: 11 additions & 10 deletions datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ pub enum Arity {
pub enum TypeSignature {
/// One or more arguments of a common type out of a list of valid types.
///
/// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
/// For functions that take no arguments (e.g. `random()`), see [`TypeSignature::Nullary`].
///
/// # Examples
///
Expand All @@ -184,38 +184,39 @@ pub enum TypeSignature {
Uniform(usize, Vec<DataType>),
/// One or more arguments with exactly the specified types in order.
///
/// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
/// For functions that take no arguments (e.g. `random()`), use [`TypeSignature::Nullary`].
Exact(Vec<DataType>),
/// One or more arguments belonging to the [`TypeSignatureClass`], in order.
///
/// [`Coercion`] contains not only the desired type but also the allowed
/// casts. For example, if you expect a function has string type, but you
/// also allow it to be casted from binary type.
///
/// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
/// For functions that take no arguments (e.g. `random()`), see [`TypeSignature::Nullary`].
Coercible(Vec<Coercion>),
/// One or more arguments coercible to a single, comparable type.
///
/// Each argument will be coerced to a single type using the
/// coercion rules described in [`comparison_coercion_numeric`].
/// coercion rules described in [`comparison_coercion`].
///
/// # Examples
///
/// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
/// the types will both be coerced to `i64` before the function is invoked.
///
/// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
/// the types will both be coerced to `Utf8` before the function is invoked.
/// the types will both be coerced to `Int64` before the function is invoked
/// (numeric is preferred over string).
///
/// Note:
/// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
/// - For functions that take no arguments (e.g. `random()`), see [`TypeSignature::Nullary`].
/// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
///
/// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
/// [`comparison_coercion`]: crate::type_coercion::binary::comparison_coercion
Comparable(usize),
/// One or more arguments of arbitrary types.
///
/// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
/// For functions that take no arguments (e.g. `random()`), use [`TypeSignature::Nullary`].
Any(usize),
/// Matches exactly one of a list of [`TypeSignature`]s.
///
Expand All @@ -233,7 +234,7 @@ pub enum TypeSignature {
///
/// See [`NativeType::is_numeric`] to know which type is considered numeric
///
/// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
/// For functions that take no arguments (e.g. `random()`), use [`TypeSignature::Nullary`].
///
/// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
Numeric(usize),
Expand All @@ -246,7 +247,7 @@ pub enum TypeSignature {
/// For example, if a function is called with (utf8, large_utf8), all
/// arguments will be coerced to `LargeUtf8`
///
/// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
/// For functions that take no arguments (e.g. `random()`), use [`TypeSignature::Nullary`].
String(usize),
/// No arguments
Nullary,
Expand Down
Loading
Loading