diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 3133ed7337f25..7fcbf88464d22 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -27,6 +27,7 @@ pub mod last_day; pub mod make_dt_interval; pub mod make_interval; pub mod next_day; +pub mod quarter; pub mod time_trunc; pub mod to_utc_timestamp; pub mod trunc; @@ -72,6 +73,7 @@ make_udf_function!( unix_seconds, unix::SparkUnixTimestamp::seconds ); +make_udf_function!(quarter::SparkQuarter, quarter); pub mod expr_fn { use datafusion_functions::export_functions; @@ -179,6 +181,11 @@ pub mod expr_fn { "Returns the number of seconds since epoch (1970-01-01 00:00:00 UTC) for the given timestamp `ts`.", ts )); + export_functions!(( + quarter, + "Returns the quarter of the year for date, in the range 1 to 4.", + arg1 + )); } pub fn functions() -> Vec> { @@ -204,5 +211,6 @@ pub fn functions() -> Vec> { unix_micros(), unix_millis(), unix_seconds(), + quarter(), ] } diff --git a/datafusion/spark/src/function/datetime/quarter.rs b/datafusion/spark/src/function/datetime/quarter.rs new file mode 100644 index 0000000000000..287f6adac2826 --- /dev/null +++ b/datafusion/spark/src/function/datetime/quarter.rs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::types::{NativeType, logical_date, logical_string}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err}; +use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, + Signature, TypeSignature, TypeSignatureClass, Volatility, +}; +use datafusion_functions::datetime::date_part; +use std::sync::Arc; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkQuarter { + signature: Signature, +} + +impl Default for SparkQuarter { + fn default() -> Self { + Self::new() + } +} + +impl SparkQuarter { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Timestamp, + )]), + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Native(logical_date()), + )]), + TypeSignature::Coercible(vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_date()), + vec![TypeSignatureClass::Native(logical_string())], + NativeType::Date, + )]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkQuarter { + fn name(&self) -> &str { + "quarter" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result { + Ok(Arc::new(Field::new(self.name(), DataType::Int32, true))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("spark quarter should have been simplified to standard date_part") + } + + fn simplify( + &self, + args: Vec, + _info: &SimplifyContext, + ) -> Result { + let [date_expr] = take_function_args(self.name(), args)?; + let part_expr = Expr::Literal(ScalarValue::new_utf8("quarter"), None); + + let date_part_expr = Expr::ScalarFunction(ScalarFunction::new_udf( + date_part(), + vec![part_expr, date_expr], + )); + Ok(ExprSimplifyResult::Simplified(date_part_expr)) + } +} diff --git a/datafusion/sqllogictest/test_files/spark/datetime/quarter.slt b/datafusion/sqllogictest/test_files/spark/datetime/quarter.slt index 27b6728b0b7bb..175328359efbb 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/quarter.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/quarter.slt @@ -15,13 +15,62 @@ # specific language governing permissions and limitations # under the License. -# This file was originally created by a porting script from: -# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function -# This file is part of the implementation of the datafusion-spark function library. -# For more information, please see: -# https://github.com/apache/datafusion/issues/15914 - -## Original Query: SELECT quarter('2016-08-31'); -## PySpark 3.5.5 Result: {'quarter(2016-08-31)': 3, 'typeof(quarter(2016-08-31))': 'int', 'typeof(2016-08-31)': 'string'} -#query -#SELECT quarter('2016-08-31'::string); +query I +SELECT quarter('2009-01-12'::date); +---- +1 + +query I +SELECT quarter('1970-01-01'::date); +---- +1 + +query I +SELECT quarter('1870-01-01'::date); +---- +1 + +query I +SELECT quarter('2011-04-21'::date); +---- +2 + +query I +SELECT quarter('2024-08-14'::date); +---- +3 + +query I +SELECT quarter('2016-12-12'::date); +---- +4 + +query I +SELECT quarter(NULL::date); +---- +NULL + +query I +SELECT quarter(NULL::timestamp); +---- +NULL + +query I +SELECT quarter('2009-01-12 10:00:00'::timestamp); +---- +1 + +query I +SELECT quarter('2020-09-08T12:00:12.12345678+00:00'::timestamp); +---- +3 + +query I +SELECT quarter('2016-12-12'); +---- +4 + +query I +SELECT quarter('2009-01-12 10:00:00'); +---- +1