From 5010bfdd87ef971ee8a4818b487b86f0aff1fb88 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:58:20 -0500 Subject: [PATCH] Add docstring examples for Scalar array/list functions Add example usage to docstrings for Scalar array/list functions to improve documentation. Co-Authored-By: Claude Opus 4.6 --- python/datafusion/functions.py | 742 +++++++++++++++++++++++++++++++-- 1 file changed, 713 insertions(+), 29 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index fd116254b..f24423717 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1343,7 +1343,17 @@ def upper(arg: Expr) -> Expr: def make_array(*args: Expr) -> Expr: - """Returns an array using the specified input expressions.""" + """Returns an array using the specified input expressions. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select( + ... dfn.functions.make_array(dfn.lit(1), dfn.lit(2), dfn.lit(3)).alias("arr")) + >>> result.collect_column("arr")[0].as_py() + [1, 2, 3] + """ args = [arg.expr for arg in args] return Expr(f.make_array(args)) @@ -1352,6 +1362,14 @@ def make_list(*args: Expr) -> Expr: """Returns an array using the specified input expressions. This is an alias for :py:func:`make_array`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select(dfn.functions.make_list(dfn.lit(4), dfn.lit(5)).alias("arr")) + >>> result.collect_column("arr")[0].as_py() + [4, 5] """ return make_array(*args) @@ -1360,12 +1378,30 @@ def array(*args: Expr) -> Expr: """Returns an array using the specified input expressions. This is an alias for :py:func:`make_array`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select(dfn.functions.array(dfn.lit(7), dfn.lit(8)).alias("arr")) + >>> result.collect_column("arr")[0].as_py() + [7, 8] """ return make_array(*args) def range(start: Expr, stop: Expr, step: Expr) -> Expr: - """Create a list of values in the range between start and stop.""" + """Create a list of values in the range between start and stop. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select( + ... dfn.functions.range(dfn.lit(0), dfn.lit(5), dfn.lit(2)).alias("r")) + >>> result.collect_column("r")[0].as_py() + [0, 2, 4] + """ return Expr(f.range(start.expr, stop.expr, step.expr)) @@ -1413,7 +1449,17 @@ def random() -> Expr: def array_append(array: Expr, element: Expr) -> Expr: - """Appends an element to the end of an array.""" + """Appends an element to the end of an array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.array_append(dfn.col("a"), dfn.lit(4)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3, 4] + """ return Expr(f.array_append(array.expr, element.expr)) @@ -1421,6 +1467,15 @@ def array_push_back(array: Expr, element: Expr) -> Expr: """Appends an element to the end of an array. This is an alias for :py:func:`array_append`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.array_push_back(dfn.col("a"), dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3] """ return array_append(array, element) @@ -1429,6 +1484,15 @@ def list_append(array: Expr, element: Expr) -> Expr: """Appends an element to the end of an array. This is an alias for :py:func:`array_append`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.list_append(dfn.col("a"), dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3] """ return array_append(array, element) @@ -1437,12 +1501,31 @@ def list_push_back(array: Expr, element: Expr) -> Expr: """Appends an element to the end of an array. This is an alias for :py:func:`array_append`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.list_push_back(dfn.col("a"), dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3] """ return array_append(array, element) def array_concat(*args: Expr) -> Expr: - """Concatenates the input arrays.""" + """Concatenates the input arrays. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]}) + >>> result = df.select( + ... dfn.functions.array_concat(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3, 4] + """ args = [arg.expr for arg in args] return Expr(f.array_concat(args)) @@ -1451,17 +1534,50 @@ def array_cat(*args: Expr) -> Expr: """Concatenates the input arrays. This is an alias for :py:func:`array_concat`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]}) + >>> result = df.select( + ... dfn.functions.array_cat(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3, 4] """ return array_concat(*args) def array_dims(array: Expr) -> Expr: - """Returns an array of the array's dimensions.""" + """Returns an array of the array's dimensions. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.array_dims(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [3] + """ return Expr(f.array_dims(array.expr)) def array_distinct(array: Expr) -> Expr: - """Returns distinct values from the array after removing duplicates.""" + """Returns distinct values from the array after removing duplicates. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.array_distinct( + ... dfn.col("a") + ... ).alias("result") + ... ) + >>> sorted( + ... result.collect_column("result")[0].as_py() + ... ) + [1, 2, 3] + """ return Expr(f.array_distinct(array.expr)) @@ -1469,6 +1585,15 @@ def list_cat(*args: Expr) -> Expr: """Concatenates the input arrays. This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]}) + >>> result = df.select( + ... dfn.functions.list_cat(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3, 4] """ return array_concat(*args) @@ -1477,6 +1602,15 @@ def list_concat(*args: Expr) -> Expr: """Concatenates the input arrays. This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]}) + >>> result = df.select( + ... dfn.functions.list_concat(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3, 4] """ return array_concat(*args) @@ -1485,6 +1619,20 @@ def list_distinct(array: Expr) -> Expr: """Returns distinct values from the array after removing duplicates. This is an alias for :py:func:`array_distinct`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.list_distinct( + ... dfn.col("a") + ... ).alias("result") + ... ) + >>> sorted( + ... result.collect_column("result")[0].as_py() + ... ) + [1, 2, 3] """ return array_distinct(array) @@ -1493,17 +1641,44 @@ def list_dims(array: Expr) -> Expr: """Returns an array of the array's dimensions. This is an alias for :py:func:`array_dims`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.list_dims(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [3] """ return array_dims(array) def array_element(array: Expr, n: Expr) -> Expr: - """Extracts the element with the index n from the array.""" + """Extracts the element with the index n from the array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.array_element(dfn.col("a"), dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 20 + """ return Expr(f.array_element(array.expr, n.expr)) def array_empty(array: Expr) -> Expr: - """Returns a boolean indicating whether the array is empty.""" + """Returns a boolean indicating whether the array is empty. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select(dfn.functions.array_empty(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + False + """ return Expr(f.array_empty(array.expr)) @@ -1511,6 +1686,15 @@ def array_extract(array: Expr, n: Expr) -> Expr: """Extracts the element with the index n from the array. This is an alias for :py:func:`array_element`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.array_extract(dfn.col("a"), dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 20 """ return array_element(array, n) @@ -1519,6 +1703,15 @@ def list_element(array: Expr, n: Expr) -> Expr: """Extracts the element with the index n from the array. This is an alias for :py:func:`array_element`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.list_element(dfn.col("a"), dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 20 """ return array_element(array, n) @@ -1527,12 +1720,30 @@ def list_extract(array: Expr, n: Expr) -> Expr: """Extracts the element with the index n from the array. This is an alias for :py:func:`array_element`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.list_extract(dfn.col("a"), dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 20 """ return array_element(array, n) def array_length(array: Expr) -> Expr: - """Returns the length of the array.""" + """Returns the length of the array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.array_length(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + 3 + """ return Expr(f.array_length(array.expr)) @@ -1540,12 +1751,30 @@ def list_length(array: Expr) -> Expr: """Returns the length of the array. This is an alias for :py:func:`array_length`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.list_length(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + 3 """ return array_length(array) def array_has(first_array: Expr, second_array: Expr) -> Expr: - """Returns true if the element appears in the first array, otherwise false.""" + """Returns true if the element appears in the first array, otherwise false. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.array_has(dfn.col("a"), dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + True + """ return Expr(f.array_has(first_array.expr, second_array.expr)) @@ -1554,6 +1783,15 @@ def array_has_all(first_array: Expr, second_array: Expr) -> Expr: Returns true if each element of the second array appears in the first array. Otherwise, it returns false. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.array_has_all(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + True """ return Expr(f.array_has_all(first_array.expr, second_array.expr)) @@ -1563,12 +1801,31 @@ def array_has_any(first_array: Expr, second_array: Expr) -> Expr: Returns true if at least one element of the second array appears in the first array. Otherwise, it returns false. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 5]]}) + >>> result = df.select( + ... dfn.functions.array_has_any(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + True """ return Expr(f.array_has_any(first_array.expr, second_array.expr)) def array_position(array: Expr, element: Expr, index: int | None = 1) -> Expr: - """Return the position of the first occurrence of ``element`` in ``array``.""" + """Return the position of the first occurrence of ``element`` in ``array``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.array_position(dfn.col("a"), dfn.lit(20)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 2 + """ return Expr(f.array_position(array.expr, element.expr, index)) @@ -1576,6 +1833,15 @@ def array_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr: """Return the position of the first occurrence of ``element`` in ``array``. This is an alias for :py:func:`array_position`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.array_indexof(dfn.col("a"), dfn.lit(20)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 2 """ return array_position(array, element, index) @@ -1584,6 +1850,15 @@ def list_position(array: Expr, element: Expr, index: int | None = 1) -> Expr: """Return the position of the first occurrence of ``element`` in ``array``. This is an alias for :py:func:`array_position`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.list_position(dfn.col("a"), dfn.lit(20)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 2 """ return array_position(array, element, index) @@ -1592,12 +1867,31 @@ def list_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr: """Return the position of the first occurrence of ``element`` in ``array``. This is an alias for :py:func:`array_position`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[10, 20, 30]]}) + >>> result = df.select( + ... dfn.functions.list_indexof(dfn.col("a"), dfn.lit(20)).alias("result")) + >>> result.collect_column("result")[0].as_py() + 2 """ return array_position(array, element, index) def array_positions(array: Expr, element: Expr) -> Expr: - """Searches for an element in the array and returns all occurrences.""" + """Searches for an element in the array and returns all occurrences. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.array_positions(dfn.col("a"), dfn.lit(1)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 3] + """ return Expr(f.array_positions(array.expr, element.expr)) @@ -1605,12 +1899,30 @@ def list_positions(array: Expr, element: Expr) -> Expr: """Searches for an element in the array and returns all occurrences. This is an alias for :py:func:`array_positions`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.list_positions(dfn.col("a"), dfn.lit(1)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 3] """ return array_positions(array, element) def array_ndims(array: Expr) -> Expr: - """Returns the number of dimensions of the array.""" + """Returns the number of dimensions of the array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.array_ndims(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + 1 + """ return Expr(f.array_ndims(array.expr)) @@ -1618,12 +1930,30 @@ def list_ndims(array: Expr) -> Expr: """Returns the number of dimensions of the array. This is an alias for :py:func:`array_ndims`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.list_ndims(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + 1 """ return array_ndims(array) def array_prepend(element: Expr, array: Expr) -> Expr: - """Prepends an element to the beginning of an array.""" + """Prepends an element to the beginning of an array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.array_prepend(dfn.lit(0), dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [0, 1, 2] + """ return Expr(f.array_prepend(element.expr, array.expr)) @@ -1631,6 +1961,15 @@ def array_push_front(element: Expr, array: Expr) -> Expr: """Prepends an element to the beginning of an array. This is an alias for :py:func:`array_prepend`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.array_push_front(dfn.lit(0), dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [0, 1, 2] """ return array_prepend(element, array) @@ -1639,6 +1978,15 @@ def list_prepend(element: Expr, array: Expr) -> Expr: """Prepends an element to the beginning of an array. This is an alias for :py:func:`array_prepend`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.list_prepend(dfn.lit(0), dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [0, 1, 2] """ return array_prepend(element, array) @@ -1647,22 +1995,59 @@ def list_push_front(element: Expr, array: Expr) -> Expr: """Prepends an element to the beginning of an array. This is an alias for :py:func:`array_prepend`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.list_push_front(dfn.lit(0), dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [0, 1, 2] """ return array_prepend(element, array) def array_pop_back(array: Expr) -> Expr: - """Returns the array without the last element.""" + """Returns the array without the last element. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.array_pop_back(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2] + """ return Expr(f.array_pop_back(array.expr)) def array_pop_front(array: Expr) -> Expr: - """Returns the array without the first element.""" + """Returns the array without the first element. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.array_pop_front(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 3] + """ return Expr(f.array_pop_front(array.expr)) def array_remove(array: Expr, element: Expr) -> Expr: - """Removes the first element from the array equal to the given value.""" + """Removes the first element from the array equal to the given value. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.array_remove(dfn.col("a"), dfn.lit(1)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 1] + """ return Expr(f.array_remove(array.expr, element.expr)) @@ -1670,12 +2055,32 @@ def list_remove(array: Expr, element: Expr) -> Expr: """Removes the first element from the array equal to the given value. This is an alias for :py:func:`array_remove`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.list_remove(dfn.col("a"), dfn.lit(1)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 1] """ return array_remove(array, element) def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: - """Removes the first ``max`` elements from the array equal to the given value.""" + """Removes the first ``max`` elements from the array equal to the given value. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]}) + >>> result = df.select( + ... dfn.functions.array_remove_n(dfn.col("a"), dfn.lit(1), + ... dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 1] + """ return Expr(f.array_remove_n(array.expr, element.expr, max.expr)) @@ -1683,12 +2088,32 @@ def list_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: """Removes the first ``max`` elements from the array equal to the given value. This is an alias for :py:func:`array_remove_n`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]}) + >>> result = df.select( + ... dfn.functions.list_remove_n(dfn.col("a"), dfn.lit(1), + ... dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 1] """ return array_remove_n(array, element, max) def array_remove_all(array: Expr, element: Expr) -> Expr: - """Removes all elements from the array equal to the given value.""" + """Removes all elements from the array equal to the given value. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.array_remove_all(dfn.col("a"), dfn.lit(1)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2] + """ return Expr(f.array_remove_all(array.expr, element.expr)) @@ -1696,12 +2121,31 @@ def list_remove_all(array: Expr, element: Expr) -> Expr: """Removes all elements from the array equal to the given value. This is an alias for :py:func:`array_remove_all`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.list_remove_all(dfn.col("a"), dfn.lit(1)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2] """ return array_remove_all(array, element) def array_repeat(element: Expr, count: Expr) -> Expr: - """Returns an array containing ``element`` ``count`` times.""" + """Returns an array containing ``element`` ``count`` times. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select( + ... dfn.functions.array_repeat(dfn.lit(3), dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [3, 3, 3] + """ return Expr(f.array_repeat(element.expr, count.expr)) @@ -1709,12 +2153,32 @@ def list_repeat(element: Expr, count: Expr) -> Expr: """Returns an array containing ``element`` ``count`` times. This is an alias for :py:func:`array_repeat`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select( + ... dfn.functions.list_repeat(dfn.lit(3), dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [3, 3, 3] """ return array_repeat(element, count) def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces the first occurrence of ``from_val`` with ``to_val``.""" + """Replaces the first occurrence of ``from_val`` with ``to_val``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.array_replace(dfn.col("a"), dfn.lit(1), + ... dfn.lit(9)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [9, 2, 1] + """ return Expr(f.array_replace(array.expr, from_val.expr, to_val.expr)) @@ -1722,6 +2186,16 @@ def list_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr: """Replaces the first occurrence of ``from_val`` with ``to_val``. This is an alias for :py:func:`array_replace`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.list_replace(dfn.col("a"), dfn.lit(1), + ... dfn.lit(9)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [9, 2, 1] """ return array_replace(array, from_val, to_val) @@ -1731,6 +2205,16 @@ def array_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Exp Replaces the first ``max`` occurrences of the specified element with another specified element. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]}) + >>> result = df.select( + ... dfn.functions.array_replace_n(dfn.col("a"), dfn.lit(1), dfn.lit(9), + ... dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [9, 2, 9, 1] """ return Expr(f.array_replace_n(array.expr, from_val.expr, to_val.expr, max.expr)) @@ -1742,12 +2226,33 @@ def list_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Expr specified element. This is an alias for :py:func:`array_replace_n`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]}) + >>> result = df.select( + ... dfn.functions.list_replace_n(dfn.col("a"), dfn.lit(1), dfn.lit(9), + ... dfn.lit(2)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [9, 2, 9, 1] """ return array_replace_n(array, from_val, to_val, max) def array_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces all occurrences of ``from_val`` with ``to_val``.""" + """Replaces all occurrences of ``from_val`` with ``to_val``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.array_replace_all(dfn.col("a"), dfn.lit(1), + ... dfn.lit(9)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [9, 2, 9] + """ return Expr(f.array_replace_all(array.expr, from_val.expr, to_val.expr)) @@ -1755,6 +2260,16 @@ def list_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr: """Replaces all occurrences of ``from_val`` with ``to_val``. This is an alias for :py:func:`array_replace_all`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 1]]}) + >>> result = df.select( + ... dfn.functions.list_replace_all(dfn.col("a"), dfn.lit(1), + ... dfn.lit(9)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [9, 2, 9] """ return array_replace_all(array, from_val, to_val) @@ -1766,6 +2281,14 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False) array: The input array to sort. descending: If True, sorts in descending order. null_first: If True, nulls will be returned at the beginning of the array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[3, 1, 2]]}) + >>> result = df.select(dfn.functions.array_sort(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3] """ desc = "DESC" if descending else "ASC" nulls_first = "NULLS FIRST" if null_first else "NULLS LAST" @@ -1779,14 +2302,34 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False) def list_sort(array: Expr, descending: bool = False, null_first: bool = False) -> Expr: - """This is an alias for :py:func:`array_sort`.""" + """This is an alias for :py:func:`array_sort`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[3, 1, 2]]}) + >>> result = df.select(dfn.functions.list_sort(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3] + """ return array_sort(array, descending=descending, null_first=null_first) def array_slice( array: Expr, begin: Expr, end: Expr, stride: Expr | None = None ) -> Expr: - """Returns a slice of the array.""" + """Returns a slice of the array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.array_slice(dfn.col("a"), dfn.lit(2), + ... dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 3] + """ if stride is not None: stride = stride.expr return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride)) @@ -1796,12 +2339,37 @@ def list_slice(array: Expr, begin: Expr, end: Expr, stride: Expr | None = None) """Returns a slice of the array. This is an alias for :py:func:`array_slice`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.list_slice(dfn.col("a"), dfn.lit(2), + ... dfn.lit(3)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [2, 3] """ return array_slice(array, begin, end, stride) def array_intersect(array1: Expr, array2: Expr) -> Expr: - """Returns the intersection of ``array1`` and ``array2``.""" + """Returns the intersection of ``array1`` and ``array2``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.array_intersect( + ... dfn.col("a"), dfn.col("b") + ... ).alias("result") + ... ) + >>> sorted( + ... result.collect_column("result")[0].as_py() + ... ) + [2, 3] + """ return Expr(f.array_intersect(array1.expr, array2.expr)) @@ -1809,6 +2377,20 @@ def list_intersect(array1: Expr, array2: Expr) -> Expr: """Returns an the intersection of ``array1`` and ``array2``. This is an alias for :py:func:`array_intersect`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.list_intersect( + ... dfn.col("a"), dfn.col("b") + ... ).alias("result") + ... ) + >>> sorted( + ... result.collect_column("result")[0].as_py() + ... ) + [2, 3] """ return array_intersect(array1, array2) @@ -1817,6 +2399,20 @@ def array_union(array1: Expr, array2: Expr) -> Expr: """Returns an array of the elements in the union of array1 and array2. Duplicate rows will not be returned. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.array_union( + ... dfn.col("a"), dfn.col("b") + ... ).alias("result") + ... ) + >>> sorted( + ... result.collect_column("result")[0].as_py() + ... ) + [1, 2, 3, 4] """ return Expr(f.array_union(array1.expr, array2.expr)) @@ -1827,12 +2423,36 @@ def list_union(array1: Expr, array2: Expr) -> Expr: Duplicate rows will not be returned. This is an alias for :py:func:`array_union`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.list_union( + ... dfn.col("a"), dfn.col("b") + ... ).alias("result") + ... ) + >>> sorted( + ... result.collect_column("result")[0].as_py() + ... ) + [1, 2, 3, 4] """ return array_union(array1, array2) def array_except(array1: Expr, array2: Expr) -> Expr: - """Returns the elements that appear in ``array1`` but not in ``array2``.""" + """Returns the elements that appear in ``array1`` but not in ``array2``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.array_except(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1] + """ return Expr(f.array_except(array1.expr, array2.expr)) @@ -1840,6 +2460,15 @@ def list_except(array1: Expr, array2: Expr) -> Expr: """Returns the elements that appear in ``array1`` but not in the ``array2``. This is an alias for :py:func:`array_except`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]}) + >>> result = df.select( + ... dfn.functions.list_except(dfn.col("a"), dfn.col("b")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1] """ return array_except(array1, array2) @@ -1849,6 +2478,16 @@ def array_resize(array: Expr, size: Expr, value: Expr) -> Expr: If ``size`` is greater than the ``array`` length, the additional entries will be filled with the given ``value``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.array_resize(dfn.col("a"), dfn.lit(4), + ... dfn.lit(0)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 0, 0] """ return Expr(f.array_resize(array.expr, size.expr, value.expr)) @@ -1858,22 +2497,59 @@ def list_resize(array: Expr, size: Expr, value: Expr) -> Expr: If ``size`` is greater than the ``array`` length, the additional entries will be filled with the given ``value``. This is an alias for :py:func:`array_resize`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select( + ... dfn.functions.list_resize(dfn.col("a"), dfn.lit(4), + ... dfn.lit(0)).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 0, 0] """ return array_resize(array, size, value) def flatten(array: Expr) -> Expr: - """Flattens an array of arrays into a single array.""" + """Flattens an array of arrays into a single array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[[1, 2], [3, 4]]]}) + >>> result = df.select(dfn.functions.flatten(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + [1, 2, 3, 4] + """ return Expr(f.flatten(array.expr)) def cardinality(array: Expr) -> Expr: - """Returns the total number of elements in the array.""" + """Returns the total number of elements in the array. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select(dfn.functions.cardinality(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + 3 + """ return Expr(f.cardinality(array.expr)) def empty(array: Expr) -> Expr: - """This is an alias for :py:func:`array_empty`.""" + """This is an alias for :py:func:`array_empty`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2]]}) + >>> result = df.select(dfn.functions.empty(dfn.col("a")).alias("result")) + >>> result.collect_column("result")[0].as_py() + False + """ return array_empty(array) @@ -2013,6 +2689,14 @@ def array_agg( For example:: df.aggregate([], array_agg(col("a"), order_by="b")) + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.aggregate([], [dfn.functions.array_agg(dfn.col("a")).alias("v")]) + >>> result.collect_column("v")[0].as_py() + [1, 2, 3] """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None