diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 4511d8db90075..f70640b519dfb 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -44,12 +44,12 @@ use crate::cast::{ as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array, as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array, as_interval_ym_array, as_large_binary_array, as_large_list_array, - as_large_string_array, as_run_array, as_string_array, as_string_view_array, - as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array, - as_time64_nanosecond_array, as_timestamp_microsecond_array, - as_timestamp_millisecond_array, as_timestamp_nanosecond_array, - as_timestamp_second_array, as_uint8_array, as_uint16_array, as_uint32_array, - as_uint64_array, as_union_array, + as_large_list_view_array, as_large_string_array, as_list_view_array, as_run_array, + as_string_array, as_string_view_array, as_time32_millisecond_array, + as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array, + as_timestamp_microsecond_array, as_timestamp_millisecond_array, + as_timestamp_nanosecond_array, as_timestamp_second_array, as_uint8_array, + as_uint16_array, as_uint32_array, as_uint64_array, as_union_array, }; use crate::error::{_exec_err, _internal_err, _not_impl_err, DataFusionError, Result}; use crate::format::DEFAULT_CAST_OPTIONS; @@ -63,14 +63,15 @@ use arrow::array::{ DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray, - Int8Array, Int16Array, Int32Array, Int64Array, IntervalDayTimeArray, - IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray, - LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait, - PrimitiveArray, RunArray, Scalar, StringArray, StringViewArray, StringViewBuilder, - StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, - Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, - UInt64Array, UnionArray, downcast_run_array, new_empty_array, new_null_array, + GenericListViewArray, Int8Array, Int16Array, Int32Array, Int64Array, + IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, + LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray, + ListViewArray, MapArray, MutableArrayData, PrimitiveArray, RunArray, Scalar, + StringArray, StringViewArray, StringViewBuilder, StructArray, Time32MillisecondArray, + Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, + TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray, + downcast_run_array, new_empty_array, new_null_array, }; use arrow::buffer::{BooleanBuffer, ScalarBuffer}; use arrow::compute::kernels::cast::{CastOptions, cast_with_options}; @@ -257,8 +258,8 @@ pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String /// /// # Nested Types /// -/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a -/// single element array of the corresponding type. +/// `List` / `LargeList` / `FixedSizeList` / `ListView` / `LargeListView` / `Struct` / `Map` +/// are represented as a single element array of the corresponding type. /// /// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`] /// ``` @@ -381,6 +382,14 @@ pub enum ScalarValue { List(Arc), /// The array must be a LargeListArray with length 1. LargeList(Arc), + /// Represents a single element of a [`ListViewArray`] as an [`ArrayRef`] + /// + /// The array must be a ListViewArray with length 1. + ListView(Arc), + /// Represents a single element of a [`LargeListViewArray`] as an [`ArrayRef`] + /// + /// The array must be a LargeListViewArray with length 1. + LargeListView(Arc), /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See /// [`ScalarValue`] for examples of how to create instances of this type. Struct(Arc), @@ -517,6 +526,10 @@ impl PartialEq for ScalarValue { (List(_), _) => false, (LargeList(v1), LargeList(v2)) => v1.eq(v2), (LargeList(_), _) => false, + (ListView(v1), ListView(v2)) => v1.eq(v2), + (ListView(_), _) => false, + (LargeListView(v1), LargeListView(v2)) => v1.eq(v2), + (LargeListView(_), _) => false, (Struct(v1), Struct(v2)) => v1.eq(v2), (Struct(_), _) => false, (Map(v1), Map(v2)) => v1.eq(v2), @@ -662,7 +675,8 @@ impl PartialOrd for ScalarValue { (FixedSizeBinary(_, _), _) => None, (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2), (LargeBinary(_), _) => None, - // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1 + // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList / ScalarValue::ListView / ScalarValue::LargeListView + // are guaranteed to have length 1 (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()), (FixedSizeList(arr1), FixedSizeList(arr2)) => { partial_cmp_list(arr1.as_ref(), arr2.as_ref()) @@ -670,7 +684,17 @@ impl PartialOrd for ScalarValue { (LargeList(arr1), LargeList(arr2)) => { partial_cmp_list(arr1.as_ref(), arr2.as_ref()) } - (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None, + (ListView(arr1), ListView(arr2)) => { + partial_cmp_list(arr1.as_ref(), arr2.as_ref()) + } + (LargeListView(arr1), LargeListView(arr2)) => { + partial_cmp_list(arr1.as_ref(), arr2.as_ref()) + } + (List(_), _) + | (LargeList(_), _) + | (FixedSizeList(_), _) + | (ListView(_), _) + | (LargeListView(_), _) => None, (Struct(struct_arr1), Struct(struct_arr2)) => { partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref()) } @@ -745,7 +769,7 @@ impl PartialOrd for ScalarValue { } } -/// List/LargeList/FixedSizeList scalars always have a single element +/// List/LargeList/FixedSizeList/ListView/LargeListView scalars always have a single element /// array. This function returns that array fn first_array_for_list(arr: &dyn Array) -> ArrayRef { assert_eq!(arr.len(), 1); @@ -755,14 +779,18 @@ fn first_array_for_list(arr: &dyn Array) -> ArrayRef { arr.value(0) } else if let Some(arr) = arr.as_fixed_size_list_opt() { arr.value(0) + } else if let Some(arr) = arr.as_list_view_opt::() { + arr.value(0) + } else if let Some(arr) = arr.as_list_view_opt::() { + arr.value(0) } else { unreachable!( - "Since only List / LargeList / FixedSizeList are supported, this should never happen" + "Since only List / LargeList / FixedSizeList / ListView / LargeListView are supported, this should never happen" ) } } -/// Compares two List/LargeList/FixedSizeList scalars +/// Compares two List/LargeList/FixedSizeList/ListView/LargeListView scalars fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option { if arr1.data_type() != arr2.data_type() { return None; @@ -950,6 +978,12 @@ impl Hash for ScalarValue { FixedSizeList(arr) => { hash_nested_array(arr.to_owned() as ArrayRef, state); } + ListView(arr) => { + hash_nested_array(arr.to_owned() as ArrayRef, state); + } + LargeListView(arr) => { + hash_nested_array(arr.to_owned() as ArrayRef, state); + } Struct(arr) => { hash_nested_array(arr.to_owned() as ArrayRef, state); } @@ -1288,6 +1322,12 @@ impl ScalarValue { 1, ))) } + DataType::ListView(field_ref) => ScalarValue::ListView(Arc::new( + GenericListViewArray::new_null(Arc::clone(field_ref), 1), + )), + DataType::LargeListView(field_ref) => ScalarValue::LargeListView(Arc::new( + GenericListViewArray::new_null(Arc::clone(field_ref), 1), + )), DataType::Struct(fields) => ScalarValue::Struct( new_null_array(&DataType::Struct(fields.to_owned()), 1) .as_struct() @@ -1640,7 +1680,7 @@ impl ScalarValue { let empty_arr = new_empty_array(field.data_type()); let values = Arc::new( SingleRowListArrayBuilder::new(empty_arr) - .with_nullable(field.is_nullable()) + .with_field(field) .build_fixed_size_list_array(0), ); Ok(ScalarValue::FixedSizeList(values)) @@ -1649,6 +1689,24 @@ impl ScalarValue { let list = ScalarValue::new_large_list(&[], field.data_type()); Ok(ScalarValue::LargeList(list)) } + DataType::ListView(field) => { + let empty_arr = new_empty_array(field.data_type()); + let values = Arc::new( + SingleRowListArrayBuilder::new(empty_arr) + .with_field(field) + .build_list_view_array(), + ); + Ok(ScalarValue::ListView(values)) + } + DataType::LargeListView(field) => { + let empty_arr = new_empty_array(field.data_type()); + let values = Arc::new( + SingleRowListArrayBuilder::new(empty_arr) + .with_field(field) + .build_large_list_view_array(), + ); + Ok(ScalarValue::LargeListView(values)) + } // Struct types DataType::Struct(fields) => { @@ -1698,12 +1756,6 @@ impl ScalarValue { _internal_err!("Union type must have at least one field") } } - - DataType::ListView(_) | DataType::LargeListView(_) => { - _not_impl_err!( - "Default value for data_type \"{datatype}\" is not implemented yet" - ) - } } } @@ -1961,6 +2013,8 @@ impl ScalarValue { ScalarValue::List(arr) => arr.data_type().to_owned(), ScalarValue::LargeList(arr) => arr.data_type().to_owned(), ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(), + ScalarValue::ListView(arr) => arr.data_type().to_owned(), + ScalarValue::LargeListView(arr) => arr.data_type().to_owned(), ScalarValue::Struct(arr) => arr.data_type().to_owned(), ScalarValue::Map(arr) => arr.data_type().to_owned(), ScalarValue::Date32(_) => DataType::Date32, @@ -2471,6 +2525,8 @@ impl ScalarValue { ScalarValue::List(arr) => arr.len() == arr.null_count(), ScalarValue::LargeList(arr) => arr.len() == arr.null_count(), ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(), + ScalarValue::ListView(arr) => arr.len() == arr.null_count(), + ScalarValue::LargeListView(arr) => arr.len() == arr.null_count(), ScalarValue::Struct(arr) => arr.len() == arr.null_count(), ScalarValue::Map(arr) => arr.len() == arr.null_count(), ScalarValue::Date32(v) => v.is_none(), @@ -2822,6 +2878,8 @@ impl ScalarValue { } DataType::List(_) | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) | DataType::Map(_, _) | DataType::Struct(_) | DataType::Union(_, _) => { @@ -2978,9 +3036,7 @@ impl ScalarValue { DataType::Time32(TimeUnit::Microsecond) | DataType::Time32(TimeUnit::Nanosecond) | DataType::Time64(TimeUnit::Second) - | DataType::Time64(TimeUnit::Millisecond) - | DataType::ListView(_) - | DataType::LargeListView(_) => { + | DataType::Time64(TimeUnit::Millisecond) => { return _not_impl_err!( "Unsupported creation of {:?} array from ScalarValue {:?}", data_type, @@ -3401,6 +3457,18 @@ impl ScalarValue { } Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? } + ScalarValue::ListView(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } + Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? + } + ScalarValue::LargeListView(arr) => { + if size == 1 { + return Ok(Arc::clone(arr) as Arc); + } + Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)? + } ScalarValue::Struct(arr) => { if size == 1 { return Ok(Arc::clone(arr) as Arc); @@ -3778,29 +3846,35 @@ impl ScalarValue { pub fn convert_array_to_scalar_vec( array: &dyn Array, ) -> Result>>> { - fn generic_collect( - array: &dyn Array, - ) -> Result>>> { - array - .as_list::() - .iter() - .map(|nested_array| { - nested_array - .map(|array| { - (0..array.len()) - .map(|i| ScalarValue::try_from_array(&array, i)) - .collect::>>() - }) - .transpose() + fn map_element( + nested_array: Option, + ) -> Result>> { + nested_array + .map(|array| { + (0..array.len()) + .map(|i| ScalarValue::try_from_array(&array, i)) + .collect::>>() }) - .collect() + .transpose() } match array.data_type() { - DataType::List(_) => generic_collect::(array), - DataType::LargeList(_) => generic_collect::(array), + DataType::List(_) => array.as_list::().iter().map(map_element).collect(), + DataType::LargeList(_) => { + array.as_list::().iter().map(map_element).collect() + } + DataType::ListView(_) => array + .as_list_view::() + .iter() + .map(map_element) + .collect(), + DataType::LargeListView(_) => array + .as_list_view::() + .iter() + .map(map_element) + .collect(), _ => _internal_err!( - "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList type" + "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList/ListView/LargeListView type" ), } } @@ -3898,20 +3972,20 @@ impl ScalarValue { .build_fixed_size_list_scalar(list_size) } DataType::ListView(field) => { - let list_array = array.as_list_view::(); + let list_array = as_list_view_array(array)?; let nested_array = list_array.value(index); - // Store as List scalar since ScalarValue has no ListView variant. + // Produces a single element `ListViewArray` with the value at `index`. SingleRowListArrayBuilder::new(nested_array) .with_field(field) - .build_list_scalar() + .build_list_view_scalar() } DataType::LargeListView(field) => { - let list_array = array.as_list_view::(); + let list_array = as_large_list_view_array(array)?; let nested_array = list_array.value(index); - // Store as LargeList scalar since ScalarValue has no LargeListView variant. + // Produces a single element `LargeListViewArray` with the value at `index`. SingleRowListArrayBuilder::new(nested_array) .with_field(field) - .build_large_list_scalar() + .build_large_list_view_scalar() } DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?, DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?, @@ -4368,6 +4442,12 @@ impl ScalarValue { ScalarValue::FixedSizeList(arr) => { Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) } + ScalarValue::ListView(arr) => { + Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) + } + ScalarValue::LargeListView(arr) => { + Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) + } ScalarValue::Struct(arr) => { Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index) } @@ -4555,6 +4635,8 @@ impl ScalarValue { ScalarValue::List(arr) => arr.get_array_memory_size(), ScalarValue::LargeList(arr) => arr.get_array_memory_size(), ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(), + ScalarValue::ListView(arr) => arr.get_array_memory_size(), + ScalarValue::LargeListView(arr) => arr.get_array_memory_size(), ScalarValue::Struct(arr) => arr.get_array_memory_size(), ScalarValue::Map(arr) => arr.get_array_memory_size(), ScalarValue::Union(vals, fields, _mode) => { @@ -4671,6 +4753,14 @@ impl ScalarValue { let array = copy_array_data(&arr.to_data()); *Arc::make_mut(arr) = LargeListArray::from(array) } + ScalarValue::ListView(arr) => { + let array = copy_array_data(&arr.to_data()); + *Arc::make_mut(arr) = ListViewArray::from(array); + } + ScalarValue::LargeListView(arr) => { + let array = copy_array_data(&arr.to_data()); + *Arc::make_mut(arr) = LargeListViewArray::from(array) + } ScalarValue::Struct(arr) => { let array = copy_array_data(&arr.to_data()); *Arc::make_mut(arr) = StructArray::from(array); @@ -5208,6 +5298,8 @@ impl fmt::Display for ScalarValue { ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?, ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?, ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?, + ScalarValue::ListView(arr) => fmt_list(arr.as_ref(), f)?, + ScalarValue::LargeListView(arr) => fmt_list(arr.as_ref(), f)?, ScalarValue::Date32(e) => format_option!( f, e.map(|v| { @@ -5331,7 +5423,7 @@ impl fmt::Display for ScalarValue { } fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result { - // ScalarValue List, LargeList, FixedSizeList should always have a single element + // ScalarValue List, LargeList, FixedSizeList, ListView, LargeListView should always have a single element assert_eq!(arr.len(), 1); let options = FormatOptions::default().with_display_error(true); let formatter = ArrayFormatter::try_new(arr, &options).unwrap(); @@ -5417,6 +5509,8 @@ impl fmt::Debug for ScalarValue { ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"), ScalarValue::List(_) => write!(f, "List({self})"), ScalarValue::LargeList(_) => write!(f, "LargeList({self})"), + ScalarValue::ListView(_) => write!(f, "ListView({self})"), + ScalarValue::LargeListView(_) => write!(f, "LargeListView({self})"), ScalarValue::Struct(struct_arr) => { // ScalarValue Struct should always have a single element assert_eq!(struct_arr.len(), 1); @@ -5556,12 +5650,15 @@ impl ScalarType for Date32Type { mod tests { use super::*; - use crate::cast::{as_list_array, as_map_array, as_struct_array}; + use crate::cast::{ + as_large_list_view_array, as_list_array, as_map_array, as_struct_array, + }; use crate::test_util::batches_to_string; use arrow::array::{ - FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder, - NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch, - StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder, + FixedSizeListBuilder, Int32Builder, LargeListBuilder, LargeListViewBuilder, + ListBuilder, ListViewBuilder, MapBuilder, NullArray, NullBufferBuilder, + OffsetSizeTrait, PrimitiveBuilder, RecordBatch, StringBuilder, + StringDictionaryBuilder, StructBuilder, UnionBuilder, }; use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer}; use arrow::compute::{is_null, kernels}; @@ -5747,6 +5844,27 @@ mod tests { ]); assert_eq!(&arr, actual_list_arr); + + // ListView + let arr = + ListViewArray::from_iter_primitive::(vec![Some(vec![ + Some(1), + None, + Some(2), + ])]); + + let sv = ScalarValue::ListView(Arc::new(arr)); + let actual_arr = sv + .to_array_of_size(2) + .expect("Failed to convert to array of size"); + let actual_list_arr = actual_arr.as_list_view::(); + + let arr = ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), None, Some(2)]), + Some(vec![Some(1), None, Some(2)]), + ]); + + assert_eq!(&arr, actual_list_arr); } #[test] @@ -5886,29 +6004,11 @@ mod tests { values .into_iter() .map(|v| { - let arr = if v.is_some() { - Arc::new( - GenericListArray::::from_iter_primitive::( - vec![v], - ), - ) - } else if O::IS_LARGE { - new_null_array( - &DataType::LargeList(Arc::new(Field::new_list_field( - DataType::Int64, - true, - ))), - 1, - ) - } else { - new_null_array( - &DataType::List(Arc::new(Field::new_list_field( - DataType::Int64, - true, - ))), - 1, - ) - }; + let arr = Arc::new(GenericListArray::::from_iter_primitive::< + Int64Type, + _, + _, + >(vec![v])) as ArrayRef; if O::IS_LARGE { ScalarValue::LargeList(arr.as_list::().to_owned().into()) @@ -5919,6 +6019,29 @@ mod tests { .collect() } + fn build_list_view( + values: Vec>>>, + ) -> Vec { + values + .into_iter() + .map(|v| { + let arr = Arc::new(GenericListViewArray::::from_iter_primitive::< + Int64Type, + _, + _, + >(vec![v])) as ArrayRef; + + if O::IS_LARGE { + ScalarValue::LargeListView( + arr.as_list_view::().to_owned().into(), + ) + } else { + ScalarValue::ListView(arr.as_list_view::().to_owned().into()) + } + }) + .collect() + } + #[test] fn test_iter_to_array_fixed_size_list() { let field = Arc::new(Field::new_list_field(DataType::Int32, true)); @@ -6047,13 +6170,13 @@ mod tests { #[test] fn iter_to_array_primitive_test() { + // List // List[[1,2,3]], List[null], List[[4,5]] let scalars = build_list::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, Some(vec![Some(4), Some(5)]), ]); - let array = ScalarValue::iter_to_array(scalars).unwrap(); let list_array = as_list_array(&array).unwrap(); // List[[1,2,3], null, [4,5]] @@ -6064,20 +6187,57 @@ mod tests { ]); assert_eq!(list_array, &expected); + // LargeList + // List[[1,2,3]], List[null], List[[4,5]] let scalars = build_list::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, Some(vec![Some(4), Some(5)]), ]); - let array = ScalarValue::iter_to_array(scalars).unwrap(); - let list_array = as_large_list_array(&array).unwrap(); + let large_list_array = as_large_list_array(&array).unwrap(); let expected = LargeListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2), Some(3)]), None, Some(vec![Some(4), Some(5)]), ]); - assert_eq!(list_array, &expected); + assert_eq!(large_list_array, &expected); + + // ListView + // ListView[[1,2,3]], ListView[null], ListView[[4,5]] + let scalars = build_list_view::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + + let array = ScalarValue::iter_to_array(scalars).unwrap(); + let list_view_array = as_list_view_array(&array).unwrap(); + // ListView[[1,2,3], null, [4,5]] + let expected = ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + assert_eq!(list_view_array, &expected); + + // LargeListView + // LargeListView[[1,2,3]], LargeListView[null], LargeListView[[4,5]] + let scalars = build_list_view::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + + let array = ScalarValue::iter_to_array(scalars).unwrap(); + let large_list_view_array = as_large_list_view_array(&array).unwrap(); + // LargeListView[[1,2,3], null, [4,5]] + let expected = LargeListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), Some(5)]), + ]); + assert_eq!(large_list_view_array, &expected); } #[test] @@ -6120,16 +6280,36 @@ mod tests { ])); let fsl_array: ArrayRef = - Arc::new(ListArray::from_iter_primitive::(vec![ + Arc::new(FixedSizeListArray::from_iter_primitive::( + vec![ + Some(vec![Some(0), Some(1), Some(2)]), + None, + Some(vec![Some(3), None, Some(5)]), + ], + 3, + )); + let list_view_array: ArrayRef = + Arc::new(ListViewArray::from_iter_primitive::(vec![ Some(vec![Some(0), Some(1), Some(2)]), None, - Some(vec![Some(3), None, Some(5)]), + Some(vec![None, Some(5)]), ])); - for arr in [list_array, fsl_array] { + for arr in [list_array, fsl_array, list_view_array] { for i in 0..arr.len() { - let scalar = - ScalarValue::List(arr.slice(i, 1).as_list::().to_owned().into()); + let slice = arr.slice(i, 1); + let scalar = match arr.data_type() { + DataType::List(_) => { + ScalarValue::List(slice.as_list::().to_owned().into()) + } + DataType::FixedSizeList(_, _) => ScalarValue::FixedSizeList( + slice.as_fixed_size_list().to_owned().into(), + ), + DataType::ListView(_) => ScalarValue::ListView( + slice.as_list_view::().to_owned().into(), + ), + _ => unreachable!(), + }; assert!(scalar.eq_array(&arr, i).unwrap()); } } @@ -6828,6 +7008,40 @@ mod tests { ), )); assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); + + let a = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::< + Int64Type, + _, + _, + >(vec![Some(vec![ + None, + Some(2), + Some(3), + ])]))); + let b = ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::< + Int64Type, + _, + _, + >(vec![Some(vec![ + Some(1), + Some(2), + Some(3), + ])]))); + assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); + + let a = + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![Some( + vec![None, Some(2), Some(3)], + )]), + )); + let b = + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![Some( + vec![Some(1), Some(2), Some(3)], + )]), + )); + assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater)); } #[test] @@ -7175,6 +7389,30 @@ mod tests { ); assert_eq!(expected, scalar); assert!(expected.is_null()); + + // Test for ListView + let data_type = &DataType::ListView(Arc::clone(&inner_field)); + let scalar: ScalarValue = data_type.try_into().unwrap(); + let expected = ScalarValue::ListView( + new_null_array(data_type, 1) + .as_list_view::() + .to_owned() + .into(), + ); + assert_eq!(expected, scalar); + assert!(expected.is_null()); + + // Test for LargeListView + let data_type = &DataType::LargeListView(Arc::clone(&inner_field)); + let scalar: ScalarValue = data_type.try_into().unwrap(); + let expected = ScalarValue::LargeListView( + new_null_array(data_type, 1) + .as_list_view::() + .to_owned() + .into(), + ); + assert_eq!(expected, scalar); + assert!(expected.is_null()); } #[test] @@ -7889,6 +8127,34 @@ mod tests { builder.append(true); Arc::new(builder.finish()) }, + // list view array + { + let values_builder = StringBuilder::new(); + let mut builder = ListViewBuilder::new(values_builder); + // [A, B] + builder.values().append_value("A"); + builder.values().append_value("B"); + builder.append(true); + // [ ] (empty list) + builder.append(true); + // Null + builder.append(false); + Arc::new(builder.finish()) + }, + // large list view array + { + let values_builder = StringBuilder::new(); + let mut builder = LargeListViewBuilder::new(values_builder); + // [A, B] + builder.values().append_value("A"); + builder.values().append_value("B"); + builder.append(true); + // [ ] (empty list) + builder.append(true); + // Null + builder.append(false); + Arc::new(builder.finish()) + }, // map { let string_builder = StringBuilder::new(); @@ -8411,6 +8677,38 @@ mod tests { }, DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))), ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = + ListViewBuilder::new(Int32Builder::new()).with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::ListView(Arc::new(builder.finish())) + }, + DataType::ListView(Arc::new(Field::new("element", DataType::Int64, true))), + ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = LargeListViewBuilder::new(Int32Builder::new()) + .with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::LargeListView(Arc::new(builder.finish())) + }, + DataType::LargeListView(Arc::new(Field::new( + "element", + DataType::Int64, + true, + ))), + ); } // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` @@ -9036,7 +9334,7 @@ mod tests { )))), ]; - let check_array = |array| { + let check_array = |array: Arc| { let is_null = is_null(&array).unwrap(); assert_eq!(is_null, BooleanArray::from(vec![true, false, false])); @@ -9101,6 +9399,21 @@ mod tests { "); } + #[test] + fn test_list_view_display() { + let s = ScalarValue::ListView( + ListViewArray::from_iter_primitive::(vec![Some(vec![ + Some(1), + None, + Some(3), + ])]) + .into(), + ); + + assert_eq!(s.to_string(), "[1, , 3]"); + assert_eq!(format!("{s:?}"), "ListView([1, , 3])"); + } + #[test] fn test_null_bug() { let field_a = Field::new("a", DataType::Int32, true); @@ -9559,6 +9872,10 @@ mod tests { 42, )) .unwrap(), + ScalarValue::try_new_null(&DataType::ListView(Arc::clone(&field_ref))) + .unwrap(), + ScalarValue::try_new_null(&DataType::LargeListView(Arc::clone(&field_ref))) + .unwrap(), ScalarValue::try_new_null(&DataType::Struct( vec![Arc::clone(&field_ref)].into(), )) @@ -9651,6 +9968,41 @@ mod tests { _ => panic!("Expected List"), } + let list_field = Field::new_list_field(DataType::Int32, true); + let list_result = + ScalarValue::new_default(&DataType::LargeList(Arc::new(list_field.clone()))) + .unwrap(); + match list_result { + ScalarValue::LargeList(arr) => { + assert_eq!(arr.len(), 1); + assert_eq!(arr.value_length(0), 0); // empty list + } + _ => panic!("Expected LargeList"), + } + + let list_result = + ScalarValue::new_default(&DataType::ListView(Arc::new(list_field.clone()))) + .unwrap(); + match list_result { + ScalarValue::ListView(arr) => { + assert_eq!(arr.len(), 1); + assert_eq!(arr.value_size(0), 0); // empty list + } + _ => panic!("Expected ListView"), + } + + let list_result = ScalarValue::new_default(&DataType::LargeListView(Arc::new( + list_field.clone(), + ))) + .unwrap(); + match list_result { + ScalarValue::LargeListView(arr) => { + assert_eq!(arr.len(), 1); + assert_eq!(arr.value_size(0), 0); // empty list + } + _ => panic!("Expected LargeListView"), + } + // Test struct type let struct_fields = Fields::from(vec![ Field::new("a", DataType::Int32, false), @@ -9760,6 +10112,30 @@ mod tests { )))), None ); + assert_eq!( + ScalarValue::min(&DataType::LargeList(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); + assert_eq!( + ScalarValue::min(&DataType::ListView(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); + assert_eq!( + ScalarValue::min(&DataType::LargeListView(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); } #[test] @@ -9836,6 +10212,22 @@ mod tests { )]))), None ); + assert_eq!( + ScalarValue::max(&DataType::ListView(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); + assert_eq!( + ScalarValue::max(&DataType::LargeListView(Arc::new(Field::new( + "item", + DataType::Int32, + true + )))), + None + ); } #[test] @@ -10214,5 +10606,52 @@ mod tests { ]), ] ); + + // 6: Regular ListViewArray + let list = ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2)]), + None, + Some(vec![Some(3), None, Some(4)]), + ]); + let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap(); + assert_eq!( + converted, + vec![ + Some(vec![ + ScalarValue::Int64(Some(1)), + ScalarValue::Int64(Some(2)) + ]), + None, + Some(vec![ + ScalarValue::Int64(Some(3)), + ScalarValue::Int64(None), + ScalarValue::Int64(Some(4)) + ]), + ] + ); + + // 7: Regular LargeListViewArray + let large_list = + LargeListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2)]), + None, + Some(vec![Some(3), None, Some(4)]), + ]); + let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap(); + assert_eq!( + converted, + vec![ + Some(vec![ + ScalarValue::Int64(Some(1)), + ScalarValue::Int64(Some(2)) + ]), + None, + Some(vec![ + ScalarValue::Int64(Some(3)), + ScalarValue::Int64(None), + ScalarValue::Int64(Some(4)) + ]), + ] + ); } } diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 97525740027f4..2f01859f4544b 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -30,7 +30,8 @@ use arrow::array::{ Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait, cast::AsArray, }; -use arrow::buffer::OffsetBuffer; +use arrow::array::{LargeListViewArray, ListViewArray}; +use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::compute::{SortColumn, SortOptions, partition}; use arrow::datatypes::{DataType, Field, SchemaRef}; #[cfg(feature = "sql")] @@ -480,6 +481,34 @@ impl SingleRowListArrayBuilder { ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size))) } + /// Build a single element [`ListViewArray`] + pub fn build_list_view_array(self) -> ListViewArray { + let (field, arr) = self.into_field_and_arr(); + let offsets = ScalarBuffer::from(vec![0]); + let sizes = ScalarBuffer::from(vec![i32::try_from(arr.len()).expect( + "Trying to construct a ListView where element length exceeds i32::MAX", + )]); + ListViewArray::new(field, offsets, sizes, arr, None) + } + + /// Build a single element [`ListViewArray`] and wrap as [`ScalarValue::ListView`] + pub fn build_list_view_scalar(self) -> ScalarValue { + ScalarValue::ListView(Arc::new(self.build_list_view_array())) + } + + /// Build a single element [`LargeListViewArray`] + pub fn build_large_list_view_array(self) -> LargeListViewArray { + let (field, arr) = self.into_field_and_arr(); + let offsets = ScalarBuffer::from(vec![0]); + let sizes = ScalarBuffer::from(vec![arr.len() as i64]); + LargeListViewArray::new(field, offsets, sizes, arr, None) + } + + /// Build a single element [`LargeListViewArray`] and wrap as [`ScalarValue::LargeListView`] + pub fn build_large_list_view_scalar(self) -> ScalarValue { + ScalarValue::LargeListView(Arc::new(self.build_large_list_view_array())) + } + /// Helper function: convert this builder into a tuple of field and array fn into_field_and_arr(self) -> (Arc, ArrayRef) { let Self { diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 31ece63577b4f..53850e3146838 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -199,7 +199,7 @@ message Union{ repeated int32 type_ids = 3; } -// Used for List/FixedSizeList/LargeList/Struct/Map +// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map message ScalarNestedValue { message Dictionary { bytes ipc_message = 1; @@ -306,6 +306,8 @@ message ScalarValue{ ScalarNestedValue large_list_value = 16; ScalarNestedValue list_value = 17; ScalarNestedValue fixed_size_list_value = 18; + ScalarNestedValue list_view_value = 46; + ScalarNestedValue large_list_view_value = 47; ScalarNestedValue struct_value = 32; ScalarNestedValue map_value = 41; @@ -398,6 +400,8 @@ message ArrowType{ List LIST = 25; List LARGE_LIST = 26; FixedSizeList FIXED_SIZE_LIST = 27; + List LIST_VIEW = 43; + List LARGE_LIST_VIEW = 44; Struct STRUCT = 28; Union UNION = 29; Dictionary DICTIONARY = 30; diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index 4b7a91f38c201..5913a2c8f335e 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -296,6 +296,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType { let list_size = list.list_size; DataType::FixedSizeList(Arc::new(list_type), list_size) } + arrow_type::ArrowTypeEnum::ListView(list) => { + let list_type = + list.as_ref().field_type.as_deref().required("field_type")?; + DataType::ListView(Arc::new(list_type)) + } + arrow_type::ArrowTypeEnum::LargeListView(list) => { + let list_type = + list.as_ref().field_type.as_deref().required("field_type")?; + DataType::LargeListView(Arc::new(list_type)) + } arrow_type::ArrowTypeEnum::Struct(strct) => DataType::Struct( parse_proto_fields_to_fields(&strct.sub_field_types)?.into(), ), @@ -405,6 +415,8 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Value::ListValue(v) | Value::FixedSizeListValue(v) | Value::LargeListValue(v) + | Value::ListViewValue(v) + | Value::LargeListViewValue(v) | Value::StructValue(v) | Value::MapValue(v) => { let protobuf::ScalarNestedValue { @@ -517,6 +529,12 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Value::FixedSizeListValue(_) => { Self::FixedSizeList(arr.as_fixed_size_list().to_owned().into()) } + Value::ListViewValue(_) => { + Self::ListView(arr.as_list_view::().to_owned().into()) + } + Value::LargeListViewValue(_) => { + Self::LargeListView(arr.as_list_view::().to_owned().into()) + } Value::StructValue(_) => { Self::Struct(arr.as_struct().to_owned().into()) } diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs index 77a3b71488ece..950b5a6f63a6f 100644 --- a/datafusion/proto-common/src/generated/pbjson.rs +++ b/datafusion/proto-common/src/generated/pbjson.rs @@ -264,6 +264,12 @@ impl serde::Serialize for ArrowType { arrow_type::ArrowTypeEnum::FixedSizeList(v) => { struct_ser.serialize_field("FIXEDSIZELIST", v)?; } + arrow_type::ArrowTypeEnum::ListView(v) => { + struct_ser.serialize_field("LISTVIEW", v)?; + } + arrow_type::ArrowTypeEnum::LargeListView(v) => { + struct_ser.serialize_field("LARGELISTVIEW", v)?; + } arrow_type::ArrowTypeEnum::Struct(v) => { struct_ser.serialize_field("STRUCT", v)?; } @@ -332,6 +338,10 @@ impl<'de> serde::Deserialize<'de> for ArrowType { "LARGELIST", "FIXED_SIZE_LIST", "FIXEDSIZELIST", + "LIST_VIEW", + "LISTVIEW", + "LARGE_LIST_VIEW", + "LARGELISTVIEW", "STRUCT", "UNION", "DICTIONARY", @@ -376,6 +386,8 @@ impl<'de> serde::Deserialize<'de> for ArrowType { List, LargeList, FixedSizeList, + ListView, + LargeListView, Struct, Union, Dictionary, @@ -436,6 +448,8 @@ impl<'de> serde::Deserialize<'de> for ArrowType { "LIST" => Ok(GeneratedField::List), "LARGELIST" | "LARGE_LIST" => Ok(GeneratedField::LargeList), "FIXEDSIZELIST" | "FIXED_SIZE_LIST" => Ok(GeneratedField::FixedSizeList), + "LISTVIEW" | "LIST_VIEW" => Ok(GeneratedField::ListView), + "LARGELISTVIEW" | "LARGE_LIST_VIEW" => Ok(GeneratedField::LargeListView), "STRUCT" => Ok(GeneratedField::Struct), "UNION" => Ok(GeneratedField::Union), "DICTIONARY" => Ok(GeneratedField::Dictionary), @@ -694,6 +708,20 @@ impl<'de> serde::Deserialize<'de> for ArrowType { return Err(serde::de::Error::duplicate_field("FIXEDSIZELIST")); } arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::FixedSizeList) +; + } + GeneratedField::ListView => { + if arrow_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("LISTVIEW")); + } + arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::ListView) +; + } + GeneratedField::LargeListView => { + if arrow_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("LARGELISTVIEW")); + } + arrow_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(arrow_type::ArrowTypeEnum::LargeListView) ; } GeneratedField::Struct => { @@ -7978,6 +8006,12 @@ impl serde::Serialize for ScalarValue { scalar_value::Value::FixedSizeListValue(v) => { struct_ser.serialize_field("fixedSizeListValue", v)?; } + scalar_value::Value::ListViewValue(v) => { + struct_ser.serialize_field("listViewValue", v)?; + } + scalar_value::Value::LargeListViewValue(v) => { + struct_ser.serialize_field("largeListViewValue", v)?; + } scalar_value::Value::StructValue(v) => { struct_ser.serialize_field("structValue", v)?; } @@ -8115,6 +8149,10 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { "listValue", "fixed_size_list_value", "fixedSizeListValue", + "list_view_value", + "listViewValue", + "large_list_view_value", + "largeListViewValue", "struct_value", "structValue", "map_value", @@ -8185,6 +8223,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { LargeListValue, ListValue, FixedSizeListValue, + ListViewValue, + LargeListViewValue, StructValue, MapValue, Decimal32Value, @@ -8249,6 +8289,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { "largeListValue" | "large_list_value" => Ok(GeneratedField::LargeListValue), "listValue" | "list_value" => Ok(GeneratedField::ListValue), "fixedSizeListValue" | "fixed_size_list_value" => Ok(GeneratedField::FixedSizeListValue), + "listViewValue" | "list_view_value" => Ok(GeneratedField::ListViewValue), + "largeListViewValue" | "large_list_view_value" => Ok(GeneratedField::LargeListViewValue), "structValue" | "struct_value" => Ok(GeneratedField::StructValue), "mapValue" | "map_value" => Ok(GeneratedField::MapValue), "decimal32Value" | "decimal32_value" => Ok(GeneratedField::Decimal32Value), @@ -8417,6 +8459,20 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { return Err(serde::de::Error::duplicate_field("fixedSizeListValue")); } value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::FixedSizeListValue) +; + } + GeneratedField::ListViewValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("listViewValue")); + } + value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::ListViewValue) +; + } + GeneratedField::LargeListViewValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("largeListViewValue")); + } + value__ = map_.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::LargeListViewValue) ; } GeneratedField::StructValue => { diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs index 1251a51ab0983..aac131e57e34b 100644 --- a/datafusion/proto-common/src/generated/prost.rs +++ b/datafusion/proto-common/src/generated/prost.rs @@ -191,7 +191,7 @@ pub struct Union { #[prost(int32, repeated, tag = "3")] pub type_ids: ::prost::alloc::vec::Vec, } -/// Used for List/FixedSizeList/LargeList/Struct/Map +/// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarNestedValue { #[prost(bytes = "vec", tag = "1")] @@ -327,7 +327,7 @@ pub struct ScalarFixedSizeBinary { pub struct ScalarValue { #[prost( oneof = "scalar_value::Value", - tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42, 45" + tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 46, 47, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42, 45" )] pub value: ::core::option::Option, } @@ -378,6 +378,10 @@ pub mod scalar_value { ListValue(super::ScalarNestedValue), #[prost(message, tag = "18")] FixedSizeListValue(super::ScalarNestedValue), + #[prost(message, tag = "46")] + ListViewValue(super::ScalarNestedValue), + #[prost(message, tag = "47")] + LargeListViewValue(super::ScalarNestedValue), #[prost(message, tag = "32")] StructValue(super::ScalarNestedValue), #[prost(message, tag = "41")] @@ -467,7 +471,7 @@ pub struct Decimal256 { pub struct ArrowType { #[prost( oneof = "arrow_type::ArrowTypeEnum", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 28, 29, 30, 33, 42" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 43, 44, 28, 29, 30, 33, 42" )] pub arrow_type_enum: ::core::option::Option, } @@ -548,6 +552,10 @@ pub mod arrow_type { LargeList(::prost::alloc::boxed::Box), #[prost(message, tag = "27")] FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag = "43")] + ListView(::prost::alloc::boxed::Box), + #[prost(message, tag = "44")] + LargeListView(::prost::alloc::boxed::Box), #[prost(message, tag = "28")] Struct(super::Struct), #[prost(message, tag = "29")] diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index 65089f029b866..a2da9b77996d7 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -171,6 +171,14 @@ impl TryFrom<&DataType> for protobuf::arrow_type::ArrowTypeEnum { DataType::LargeList(item_type) => Self::LargeList(Box::new(protobuf::List { field_type: Some(Box::new(item_type.as_ref().try_into()?)), })), + DataType::ListView(item_type) => Self::ListView(Box::new(protobuf::List { + field_type: Some(Box::new(item_type.as_ref().try_into()?)), + })), + DataType::LargeListView(item_type) => { + Self::LargeListView(Box::new(protobuf::List { + field_type: Some(Box::new(item_type.as_ref().try_into()?)), + })) + } DataType::Struct(struct_fields) => Self::Struct(protobuf::Struct { sub_field_types: convert_arc_fields_to_proto_fields(struct_fields)?, }), @@ -227,11 +235,6 @@ impl TryFrom<&DataType> for protobuf::arrow_type::ArrowTypeEnum { values_field: Some(Box::new(values_field.as_ref().try_into()?)), })) } - DataType::ListView(_) | DataType::LargeListView(_) => { - return Err(Error::General(format!( - "Proto serialization error: {val} not yet supported" - ))); - } }; Ok(res) @@ -383,6 +386,12 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { ScalarValue::FixedSizeList(arr) => { encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) } + ScalarValue::ListView(arr) => { + encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) + } + ScalarValue::LargeListView(arr) => { + encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) + } ScalarValue::Struct(arr) => { encode_scalar_nested_value(arr.to_owned() as ArrayRef, val) } @@ -1042,8 +1051,8 @@ fn create_proto_scalar protobuf::scalar_value::Value>( Ok(protobuf::ScalarValue { value: Some(value) }) } -// Nested ScalarValue types (List / FixedSizeList / LargeList / Struct / Map) are serialized using -// Arrow IPC messages as a single column RecordBatch +// Nested ScalarValue types (List / FixedSizeList / LargeList / ListView / LargeListView / Struct / Map) +// are serialized using Arrow IPC messages as a single column RecordBatch fn encode_scalar_nested_value( arr: ArrayRef, val: &ScalarValue, @@ -1105,6 +1114,16 @@ fn encode_scalar_nested_value( scalar_list_value, )), }), + ScalarValue::ListView(_) => Ok(protobuf::ScalarValue { + value: Some(protobuf::scalar_value::Value::ListViewValue( + scalar_list_value, + )), + }), + ScalarValue::LargeListView(_) => Ok(protobuf::ScalarValue { + value: Some(protobuf::scalar_value::Value::LargeListViewValue( + scalar_list_value, + )), + }), ScalarValue::Struct(_) => Ok(protobuf::ScalarValue { value: Some(protobuf::scalar_value::Value::StructValue( scalar_list_value, diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index 1251a51ab0983..aac131e57e34b 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -191,7 +191,7 @@ pub struct Union { #[prost(int32, repeated, tag = "3")] pub type_ids: ::prost::alloc::vec::Vec, } -/// Used for List/FixedSizeList/LargeList/Struct/Map +/// Used for List/FixedSizeList/LargeList/ListView/LargeListView/Struct/Map #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarNestedValue { #[prost(bytes = "vec", tag = "1")] @@ -327,7 +327,7 @@ pub struct ScalarFixedSizeBinary { pub struct ScalarValue { #[prost( oneof = "scalar_value::Value", - tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42, 45" + tags = "33, 1, 2, 3, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 46, 47, 32, 41, 43, 44, 20, 39, 21, 24, 35, 36, 37, 38, 26, 27, 28, 29, 22, 30, 25, 31, 34, 42, 45" )] pub value: ::core::option::Option, } @@ -378,6 +378,10 @@ pub mod scalar_value { ListValue(super::ScalarNestedValue), #[prost(message, tag = "18")] FixedSizeListValue(super::ScalarNestedValue), + #[prost(message, tag = "46")] + ListViewValue(super::ScalarNestedValue), + #[prost(message, tag = "47")] + LargeListViewValue(super::ScalarNestedValue), #[prost(message, tag = "32")] StructValue(super::ScalarNestedValue), #[prost(message, tag = "41")] @@ -467,7 +471,7 @@ pub struct Decimal256 { pub struct ArrowType { #[prost( oneof = "arrow_type::ArrowTypeEnum", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 28, 29, 30, 33, 42" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 35, 32, 15, 34, 16, 31, 17, 18, 19, 20, 21, 22, 23, 40, 41, 24, 36, 25, 26, 27, 43, 44, 28, 29, 30, 33, 42" )] pub arrow_type_enum: ::core::option::Option, } @@ -548,6 +552,10 @@ pub mod arrow_type { LargeList(::prost::alloc::boxed::Box), #[prost(message, tag = "27")] FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag = "43")] + ListView(::prost::alloc::boxed::Box), + #[prost(message, tag = "44")] + LargeListView(::prost::alloc::boxed::Box), #[prost(message, tag = "28")] Struct(super::Struct), #[prost(message, tag = "29")] diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 07bd02c61b7e9..a8dbf10aafba8 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -16,7 +16,8 @@ // under the License. use arrow::array::{ - ArrayRef, FixedSizeListArray, Int32Builder, MapArray, MapBuilder, StringBuilder, + ArrayRef, FixedSizeListArray, Int32Builder, LargeListViewArray, ListViewArray, + MapArray, MapBuilder, StringBuilder, }; use arrow::datatypes::{ DECIMAL256_MAX_PRECISION, DataType, Field, FieldRef, Fields, Int32Type, @@ -1540,263 +1541,277 @@ impl LogicalExtensionCodec for UDFExtensionCodec { #[test] fn round_trip_scalar_values_and_data_types() { - let should_pass: Vec = vec![ - ScalarValue::Boolean(None), - ScalarValue::Float32(None), - ScalarValue::Float64(None), - ScalarValue::Int8(None), - ScalarValue::Int16(None), - ScalarValue::Int32(None), - ScalarValue::Int64(None), - ScalarValue::UInt8(None), - ScalarValue::UInt16(None), - ScalarValue::UInt32(None), - ScalarValue::UInt64(None), - ScalarValue::Utf8(None), - ScalarValue::LargeUtf8(None), - ScalarValue::List(ScalarValue::new_list_nullable(&[], &DataType::Boolean)), - ScalarValue::LargeList(ScalarValue::new_large_list(&[], &DataType::Boolean)), - ScalarValue::Date32(None), - ScalarValue::Boolean(Some(true)), - ScalarValue::Boolean(Some(false)), - ScalarValue::Float32(Some(1.0)), - ScalarValue::Float32(Some(f32::MAX)), - ScalarValue::Float32(Some(f32::MIN)), - ScalarValue::Float32(Some(-2000.0)), - ScalarValue::Float64(Some(1.0)), - ScalarValue::Float64(Some(f64::MAX)), - ScalarValue::Float64(Some(f64::MIN)), - ScalarValue::Float64(Some(-2000.0)), - ScalarValue::Int8(Some(i8::MIN)), - ScalarValue::Int8(Some(i8::MAX)), - ScalarValue::Int8(Some(0)), - ScalarValue::Int8(Some(-15)), - ScalarValue::Int16(Some(i16::MIN)), - ScalarValue::Int16(Some(i16::MAX)), - ScalarValue::Int16(Some(0)), - ScalarValue::Int16(Some(-15)), - ScalarValue::Int32(Some(i32::MIN)), - ScalarValue::Int32(Some(i32::MAX)), - ScalarValue::Int32(Some(0)), - ScalarValue::Int32(Some(-15)), - ScalarValue::Int64(Some(i64::MIN)), - ScalarValue::Int64(Some(i64::MAX)), - ScalarValue::Int64(Some(0)), - ScalarValue::Int64(Some(-15)), - ScalarValue::UInt8(Some(u8::MAX)), - ScalarValue::UInt8(Some(0)), - ScalarValue::UInt16(Some(u16::MAX)), - ScalarValue::UInt16(Some(0)), - ScalarValue::UInt32(Some(u32::MAX)), - ScalarValue::UInt32(Some(0)), - ScalarValue::UInt64(Some(u64::MAX)), - ScalarValue::UInt64(Some(0)), - ScalarValue::Utf8(Some(String::from("Test string "))), - ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))), - ScalarValue::Utf8View(Some(String::from("Test stringview"))), - ScalarValue::BinaryView(Some(b"binaryview".to_vec())), - ScalarValue::Date32(Some(0)), - ScalarValue::Date32(Some(i32::MAX)), - ScalarValue::Date32(None), - ScalarValue::Date64(Some(0)), - ScalarValue::Date64(Some(i64::MAX)), - ScalarValue::Date64(None), - ScalarValue::Time32Second(Some(0)), - ScalarValue::Time32Second(Some(i32::MAX)), - ScalarValue::Time32Second(None), - ScalarValue::Time32Millisecond(Some(0)), - ScalarValue::Time32Millisecond(Some(i32::MAX)), - ScalarValue::Time32Millisecond(None), - ScalarValue::Time64Microsecond(Some(0)), - ScalarValue::Time64Microsecond(Some(i64::MAX)), - ScalarValue::Time64Microsecond(None), - ScalarValue::Time64Nanosecond(Some(0)), - ScalarValue::Time64Nanosecond(Some(i64::MAX)), - ScalarValue::Time64Nanosecond(None), - ScalarValue::TimestampNanosecond(Some(0), None), - ScalarValue::TimestampNanosecond(Some(i64::MAX), None), - ScalarValue::TimestampNanosecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampNanosecond(None, None), - ScalarValue::TimestampMicrosecond(Some(0), None), - ScalarValue::TimestampMicrosecond(Some(i64::MAX), None), - ScalarValue::TimestampMicrosecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampMicrosecond(None, None), - ScalarValue::TimestampMillisecond(Some(0), None), - ScalarValue::TimestampMillisecond(Some(i64::MAX), None), - ScalarValue::TimestampMillisecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampMillisecond(None, None), - ScalarValue::TimestampSecond(Some(0), None), - ScalarValue::TimestampSecond(Some(i64::MAX), None), - ScalarValue::TimestampSecond(Some(0), Some("UTC".into())), - ScalarValue::TimestampSecond(None, None), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(1, 2))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - i32::MAX, - i32::MAX, - ))), - ScalarValue::IntervalDayTime(None), - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - 0, 0, 0, - ))), - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - 1, 2, 3, - ))), - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - i32::MAX, - i32::MAX, - i64::MAX, - ))), - ScalarValue::IntervalMonthDayNano(None), - ScalarValue::List(ScalarValue::new_list_nullable( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ScalarValue::LargeList(ScalarValue::new_large_list( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ScalarValue::List(ScalarValue::new_list_nullable( - &[ - ScalarValue::List(ScalarValue::new_list_nullable( - &[], - &DataType::Float32, - )), - ScalarValue::List(ScalarValue::new_list_nullable( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ], - &DataType::List(new_arc_field("item", DataType::Float32, true)), - )), - ScalarValue::LargeList(ScalarValue::new_large_list( - &[ - ScalarValue::LargeList(ScalarValue::new_large_list( - &[], - &DataType::Float32, - )), - ScalarValue::LargeList(ScalarValue::new_large_list( - &[ - ScalarValue::Float32(Some(-213.1)), - ScalarValue::Float32(None), - ScalarValue::Float32(Some(5.5)), - ScalarValue::Float32(Some(2.0)), - ScalarValue::Float32(Some(1.0)), - ], - &DataType::Float32, - )), - ], - &DataType::LargeList(new_arc_field("item", DataType::Float32, true)), - )), - ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::from_iter_primitive::< - Int32Type, - _, - _, - >( - vec![Some(vec![Some(1), Some(2), Some(3)])], - 3, - ))), - ScalarValue::Dictionary( - Box::new(DataType::Int32), - Box::new(ScalarValue::from("foo")), - ), - ScalarValue::Dictionary( - Box::new(DataType::Int32), - Box::new(ScalarValue::Utf8(None)), - ), - ScalarValue::RunEndEncoded( - Field::new("run_ends", DataType::Int32, false).into(), - Field::new("values", DataType::Utf8, true).into(), - Box::new(ScalarValue::from("foo")), - ), - ScalarValue::RunEndEncoded( - Field::new("run_ends", DataType::Int32, false).into(), - Field::new("values", DataType::Utf8, true).into(), - Box::new(ScalarValue::Utf8(None)), - ), - ScalarValue::Binary(Some(b"bar".to_vec())), - ScalarValue::Binary(None), - ScalarValue::LargeBinary(Some(b"bar".to_vec())), - ScalarValue::LargeBinary(None), - ScalarStructBuilder::new() - .with_scalar( + let should_pass: Vec = + vec![ + ScalarValue::Boolean(None), + ScalarValue::Float32(None), + ScalarValue::Float64(None), + ScalarValue::Int8(None), + ScalarValue::Int16(None), + ScalarValue::Int32(None), + ScalarValue::Int64(None), + ScalarValue::UInt8(None), + ScalarValue::UInt16(None), + ScalarValue::UInt32(None), + ScalarValue::UInt64(None), + ScalarValue::Utf8(None), + ScalarValue::LargeUtf8(None), + ScalarValue::List(ScalarValue::new_list_nullable(&[], &DataType::Boolean)), + ScalarValue::LargeList(ScalarValue::new_large_list(&[], &DataType::Boolean)), + ScalarValue::Date32(None), + ScalarValue::Boolean(Some(true)), + ScalarValue::Boolean(Some(false)), + ScalarValue::Float32(Some(1.0)), + ScalarValue::Float32(Some(f32::MAX)), + ScalarValue::Float32(Some(f32::MIN)), + ScalarValue::Float32(Some(-2000.0)), + ScalarValue::Float64(Some(1.0)), + ScalarValue::Float64(Some(f64::MAX)), + ScalarValue::Float64(Some(f64::MIN)), + ScalarValue::Float64(Some(-2000.0)), + ScalarValue::Int8(Some(i8::MIN)), + ScalarValue::Int8(Some(i8::MAX)), + ScalarValue::Int8(Some(0)), + ScalarValue::Int8(Some(-15)), + ScalarValue::Int16(Some(i16::MIN)), + ScalarValue::Int16(Some(i16::MAX)), + ScalarValue::Int16(Some(0)), + ScalarValue::Int16(Some(-15)), + ScalarValue::Int32(Some(i32::MIN)), + ScalarValue::Int32(Some(i32::MAX)), + ScalarValue::Int32(Some(0)), + ScalarValue::Int32(Some(-15)), + ScalarValue::Int64(Some(i64::MIN)), + ScalarValue::Int64(Some(i64::MAX)), + ScalarValue::Int64(Some(0)), + ScalarValue::Int64(Some(-15)), + ScalarValue::UInt8(Some(u8::MAX)), + ScalarValue::UInt8(Some(0)), + ScalarValue::UInt16(Some(u16::MAX)), + ScalarValue::UInt16(Some(0)), + ScalarValue::UInt32(Some(u32::MAX)), + ScalarValue::UInt32(Some(0)), + ScalarValue::UInt64(Some(u64::MAX)), + ScalarValue::UInt64(Some(0)), + ScalarValue::Utf8(Some(String::from("Test string "))), + ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))), + ScalarValue::Utf8View(Some(String::from("Test stringview"))), + ScalarValue::BinaryView(Some(b"binaryview".to_vec())), + ScalarValue::Date32(Some(0)), + ScalarValue::Date32(Some(i32::MAX)), + ScalarValue::Date32(None), + ScalarValue::Date64(Some(0)), + ScalarValue::Date64(Some(i64::MAX)), + ScalarValue::Date64(None), + ScalarValue::Time32Second(Some(0)), + ScalarValue::Time32Second(Some(i32::MAX)), + ScalarValue::Time32Second(None), + ScalarValue::Time32Millisecond(Some(0)), + ScalarValue::Time32Millisecond(Some(i32::MAX)), + ScalarValue::Time32Millisecond(None), + ScalarValue::Time64Microsecond(Some(0)), + ScalarValue::Time64Microsecond(Some(i64::MAX)), + ScalarValue::Time64Microsecond(None), + ScalarValue::Time64Nanosecond(Some(0)), + ScalarValue::Time64Nanosecond(Some(i64::MAX)), + ScalarValue::Time64Nanosecond(None), + ScalarValue::TimestampNanosecond(Some(0), None), + ScalarValue::TimestampNanosecond(Some(i64::MAX), None), + ScalarValue::TimestampNanosecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampNanosecond(None, None), + ScalarValue::TimestampMicrosecond(Some(0), None), + ScalarValue::TimestampMicrosecond(Some(i64::MAX), None), + ScalarValue::TimestampMicrosecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampMicrosecond(None, None), + ScalarValue::TimestampMillisecond(Some(0), None), + ScalarValue::TimestampMillisecond(Some(i64::MAX), None), + ScalarValue::TimestampMillisecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampMillisecond(None, None), + ScalarValue::TimestampSecond(Some(0), None), + ScalarValue::TimestampSecond(Some(i64::MAX), None), + ScalarValue::TimestampSecond(Some(0), Some("UTC".into())), + ScalarValue::TimestampSecond(None, None), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(1, 2))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + i32::MAX, + i32::MAX, + ))), + ScalarValue::IntervalDayTime(None), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, 0), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(1, 2, 3), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(i32::MAX, i32::MAX, i64::MAX), + )), + ScalarValue::IntervalMonthDayNano(None), + ScalarValue::List(ScalarValue::new_list_nullable( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ScalarValue::LargeList(ScalarValue::new_large_list( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ScalarValue::List(ScalarValue::new_list_nullable( + &[ + ScalarValue::List(ScalarValue::new_list_nullable( + &[], + &DataType::Float32, + )), + ScalarValue::List(ScalarValue::new_list_nullable( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ], + &DataType::List(new_arc_field("item", DataType::Float32, true)), + )), + ScalarValue::LargeList(ScalarValue::new_large_list( + &[ + ScalarValue::LargeList(ScalarValue::new_large_list( + &[], + &DataType::Float32, + )), + ScalarValue::LargeList(ScalarValue::new_large_list( + &[ + ScalarValue::Float32(Some(-213.1)), + ScalarValue::Float32(None), + ScalarValue::Float32(Some(5.5)), + ScalarValue::Float32(Some(2.0)), + ScalarValue::Float32(Some(1.0)), + ], + &DataType::Float32, + )), + ], + &DataType::LargeList(new_arc_field("item", DataType::Float32, true)), + )), + ScalarValue::FixedSizeList(Arc::new( + FixedSizeListArray::from_iter_primitive::( + vec![Some(vec![Some(1), Some(2), Some(3)])], + 3, + ), + )), + ScalarValue::ListView(Arc::new(ListViewArray::from_iter_primitive::< + Int32Type, + _, + _, + >(vec![Some(vec![ + Some(1), + None, + Some(3), + ])]))), + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![Some( + vec![Some(1), None, Some(3)], + )]), + )), + ScalarValue::Dictionary( + Box::new(DataType::Int32), + Box::new(ScalarValue::from("foo")), + ), + ScalarValue::Dictionary( + Box::new(DataType::Int32), + Box::new(ScalarValue::Utf8(None)), + ), + ScalarValue::RunEndEncoded( + Field::new("run_ends", DataType::Int32, false).into(), + Field::new("values", DataType::Utf8, true).into(), + Box::new(ScalarValue::from("foo")), + ), + ScalarValue::RunEndEncoded( + Field::new("run_ends", DataType::Int32, false).into(), + Field::new("values", DataType::Utf8, true).into(), + Box::new(ScalarValue::Utf8(None)), + ), + ScalarValue::Binary(Some(b"bar".to_vec())), + ScalarValue::Binary(None), + ScalarValue::LargeBinary(Some(b"bar".to_vec())), + ScalarValue::LargeBinary(None), + ScalarStructBuilder::new() + .with_scalar( + Field::new("a", DataType::Int32, true), + ScalarValue::from(23i32), + ) + .with_scalar( + Field::new("b", DataType::Boolean, false), + ScalarValue::from(false), + ) + .build() + .unwrap(), + ScalarStructBuilder::new() + .with_scalar( + Field::new("a", DataType::Int32, true), + ScalarValue::from(23i32), + ) + .with_scalar( + Field::new("b", DataType::Boolean, false), + ScalarValue::from(false), + ) + .build() + .unwrap(), + ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ Field::new("a", DataType::Int32, true), - ScalarValue::from(23i32), - ) - .with_scalar( Field::new("b", DataType::Boolean, false), - ScalarValue::from(false), - ) - .build() + ]))) .unwrap(), - ScalarStructBuilder::new() - .with_scalar( + ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ Field::new("a", DataType::Int32, true), - ScalarValue::from(23i32), - ) - .with_scalar( Field::new("b", DataType::Boolean, false), - ScalarValue::from(false), - ) - .build() + ]))) .unwrap(), - ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Boolean, false), - ]))) - .unwrap(), - ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Boolean, false), - ]))) - .unwrap(), - ScalarValue::try_from(&DataType::Map( - Arc::new(Field::new( - "entries", - DataType::Struct(Fields::from(vec![ - Field::new("key", DataType::Int32, true), - Field::new("value", DataType::Utf8, false), - ])), - false, - )), - false, - )) - .unwrap(), - ScalarValue::try_from(&DataType::Map( - Arc::new(Field::new( - "entries", - DataType::Struct(Fields::from(vec![ - Field::new("key", DataType::Int32, true), - Field::new("value", DataType::Utf8, true), - ])), + ScalarValue::try_from(&DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, true), + Field::new("value", DataType::Utf8, false), + ])), + false, + )), false, - )), - true, - )) - .unwrap(), - ScalarValue::Map(Arc::new(create_map_array_test_case())), - ScalarValue::FixedSizeBinary(b"bar".to_vec().len() as i32, Some(b"bar".to_vec())), - ScalarValue::FixedSizeBinary(0, None), - ScalarValue::FixedSizeBinary(5, None), - ]; + )) + .unwrap(), + ScalarValue::try_from(&DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, true), + Field::new("value", DataType::Utf8, true), + ])), + false, + )), + true, + )) + .unwrap(), + ScalarValue::Map(Arc::new(create_map_array_test_case())), + ScalarValue::FixedSizeBinary( + b"bar".to_vec().len() as i32, + Some(b"bar".to_vec()), + ), + ScalarValue::FixedSizeBinary(0, None), + ScalarValue::FixedSizeBinary(5, None), + ]; // ScalarValue directly for test_case in should_pass.iter() { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 5954e2971850c..807e358c6123c 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1324,6 +1324,8 @@ impl Unparser<'_> { ScalarValue::FixedSizeList(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::List(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::LargeList(a) => self.scalar_value_list_to_sql(a.values()), + ScalarValue::ListView(a) => self.scalar_value_list_to_sql(a.values()), + ScalarValue::LargeListView(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::Date32(Some(_)) => { let date = v .to_array()? @@ -1839,7 +1841,7 @@ mod tests { use std::{sync::Arc, vec}; use crate::unparser::dialect::SqliteDialect; - use arrow::array::{LargeListArray, ListArray}; + use arrow::array::{LargeListArray, LargeListViewArray, ListArray, ListViewArray}; use arrow::datatypes::{DataType::Int8, Field, Int32Type, Schema, TimeUnit}; use ast::ObjectName; use datafusion_common::datatype::DataTypeExt; @@ -2353,6 +2355,28 @@ mod tests { ), "[1, 2, 3]", ), + ( + Expr::Literal( + ScalarValue::ListView(Arc::new( + ListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + ]), + )), + None, + ), + "[1, 2, 3]", + ), + ( + Expr::Literal( + ScalarValue::LargeListView(Arc::new( + LargeListViewArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + ]), + )), + None, + ), + "[1, 2, 3]", + ), ( Expr::BinaryExpr(BinaryExpr { left: Box::new(col("a")),