Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 226 additions & 23 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,10 +484,13 @@ def window(
def abs(arg: Expr) -> Expr:
"""Return the absolute value of a given number.

Returns:
--------
Expr
A new expression representing the absolute value of the input expression.
Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [-1, 0, 1]})
>>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs"))
>>> result.collect_column("abs")[0].as_py()
1
"""
return Expr(f.abs(arg.expr))

Expand Down Expand Up @@ -607,12 +610,30 @@ def btrim(arg: Expr) -> Expr:


def cbrt(arg: Expr) -> Expr:
"""Returns the cube root of a number."""
"""Returns the cube root of a number.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [27]})
>>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt"))
>>> cbrt_df.collect_column("cbrt")[0].as_py()
3.0
"""
return Expr(f.cbrt(arg.expr))


def ceil(arg: Expr) -> Expr:
"""Returns the nearest integer greater than or equal to argument."""
"""Returns the nearest integer greater than or equal to argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1.9]})
>>> floor_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil"))
>>> floor_df.collect_column("ceil")[0].as_py()
2.0
"""
return Expr(f.ceil(arg.expr))


Expand Down Expand Up @@ -711,12 +732,32 @@ def ends_with(arg: Expr, suffix: Expr) -> Expr:


def exp(arg: Expr) -> Expr:
"""Returns the exponential of the argument."""
"""Returns the exponential of the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [0.0]})
>>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp"))
>>> result.collect_column("exp")[0].as_py()
1.0
"""
return Expr(f.exp(arg.expr))


def factorial(arg: Expr) -> Expr:
"""Returns the factorial of the argument."""
"""Returns the factorial of the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [3]})
>>> result = df.select(
... dfn.functions.factorial(dfn.col("a")).alias("factorial")
... )
>>> result.collect_column("factorial")[0].as_py()
6
"""
return Expr(f.factorial(arg.expr))


Expand All @@ -732,12 +773,30 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr:


def floor(arg: Expr) -> Expr:
"""Returns the nearest integer less than or equal to the argument."""
"""Returns the nearest integer less than or equal to the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1.9]})
>>> floor_df = df.select(dfn.functions.floor(dfn.col("a")).alias("floor"))
>>> floor_df.collect_column("floor")[0].as_py()
1.0
"""
return Expr(f.floor(arg.expr))


def gcd(x: Expr, y: Expr) -> Expr:
"""Returns the greatest common divisor."""
"""Returns the greatest common divisor.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [12], "b": [8]})
>>> result = df.select(dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd"))
>>> result.collect_column("gcd")[0].as_py()
4
"""
return Expr(f.gcd(x.expr, y.expr))


Expand All @@ -759,12 +818,30 @@ def instr(string: Expr, substring: Expr) -> Expr:


def iszero(arg: Expr) -> Expr:
"""Returns true if a given number is +0.0 or -0.0 otherwise returns false."""
"""Returns true if a given number is +0.0 or -0.0 otherwise returns false.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [0.0, 1.0]})
>>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz"))
>>> result.collect_column("iz")[0].as_py()
True
"""
return Expr(f.iszero(arg.expr))


def lcm(x: Expr, y: Expr) -> Expr:
"""Returns the least common multiple."""
"""Returns the least common multiple.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [4], "b": [6]})
>>> result = df.select(dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm"))
>>> result.collect_column("lcm")[0].as_py()
12
"""
return Expr(f.lcm(x.expr, y.expr))


Expand All @@ -779,22 +856,58 @@ def levenshtein(string1: Expr, string2: Expr) -> Expr:


def ln(arg: Expr) -> Expr:
"""Returns the natural logarithm (base e) of the argument."""
"""Returns the natural logarithm (base e) of the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1.0]})
>>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln"))
>>> result.collect_column("ln")[0].as_py()
0.0
"""
return Expr(f.ln(arg.expr))


def log(base: Expr, num: Expr) -> Expr:
"""Returns the logarithm of a number for a particular ``base``."""
"""Returns the logarithm of a number for a particular ``base``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [100.0]})
>>> result = df.select(dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log"))
>>> result.collect_column("log")[0].as_py()
2.0
"""
return Expr(f.log(base.expr, num.expr))


def log10(arg: Expr) -> Expr:
"""Base 10 logarithm of the argument."""
"""Base 10 logarithm of the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [100.0]})
>>> result = df.select(dfn.functions.log10(dfn.col("a")).alias("log10"))
>>> result.collect_column("log10")[0].as_py()
2.0
"""
return Expr(f.log10(arg.expr))


def log2(arg: Expr) -> Expr:
"""Base 2 logarithm of the argument."""
"""Base 2 logarithm of the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [8.0]})
>>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2"))
>>> result.collect_column("log2")[0].as_py()
3.0
"""
return Expr(f.log2(arg.expr))


Expand Down Expand Up @@ -825,7 +938,19 @@ def md5(arg: Expr) -> Expr:


def nanvl(x: Expr, y: Expr) -> Expr:
"""Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``."""
"""Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]})
>>> nanvl_df = df.select(
... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl"))
>>> nanvl_df.collect_column("nanvl")[0].as_py()
0.0
>>> nanvl_df.collect_column("nanvl")[1].as_py()
1.0
"""
return Expr(f.nanvl(x.expr, y.expr))


Expand Down Expand Up @@ -853,7 +978,21 @@ def overlay(


def pi() -> Expr:
"""Returns an approximate value of π."""
"""Returns an approximate value of π.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
>>> import builtins
>>> result = df.select(
... dfn.functions.pi().alias("pi")
... )
>>> builtins.round(
... result.collect_column("pi")[0].as_py(), 5
... )
3.14159
"""
return Expr(f.pi())


Expand All @@ -866,14 +1005,31 @@ def position(string: Expr, substring: Expr) -> Expr:


def power(base: Expr, exponent: Expr) -> Expr:
"""Returns ``base`` raised to the power of ``exponent``."""
"""Returns ``base`` raised to the power of ``exponent``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [2.0]})
>>> result = df.select(dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow"))
>>> result.collect_column("pow")[0].as_py()
8.0
"""
return Expr(f.power(base.expr, exponent.expr))


def pow(base: Expr, exponent: Expr) -> Expr:
"""Returns ``base`` raised to the power of ``exponent``.

This is an alias of :py:func:`power`.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [3.0]})
>>> result = df.select(dfn.functions.pow(dfn.col("a"), dfn.lit(2.0)).alias("pow"))
>>> result.collect_column("pow")[0].as_py()
9.0
"""
return power(base, exponent)

Expand Down Expand Up @@ -1008,6 +1164,14 @@ def round(value: Expr, decimal_places: Expr | None = None) -> Expr:
If the optional ``decimal_places`` is specified, round to the nearest number of
decimal places. You can specify a negative number of decimal places. For example
``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1.567]})
>>> result = df.select(dfn.functions.round(dfn.col("a"), dfn.lit(2)).alias("r"))
>>> result.collect_column("r")[0].as_py()
1.57
"""
if decimal_places is None:
decimal_places = Expr.literal(0)
Expand Down Expand Up @@ -1050,7 +1214,16 @@ def sha512(arg: Expr) -> Expr:


def signum(arg: Expr) -> Expr:
"""Returns the sign of the argument (-1, 0, +1)."""
"""Returns the sign of the argument (-1, 0, +1).

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]})
>>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s"))
>>> result.collect_column("s").to_pylist()
[-1.0, 0.0, 1.0]
"""
return Expr(f.signum(arg.expr))


Expand Down Expand Up @@ -1092,7 +1265,16 @@ def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr:


def sqrt(arg: Expr) -> Expr:
"""Returns the square root of the argument."""
"""Returns the square root of the argument.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [9.0]})
>>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt"))
>>> result.collect_column("sqrt")[0].as_py()
3.0
"""
return Expr(f.sqrt(arg.expr))


Expand Down Expand Up @@ -1331,7 +1513,16 @@ def trim(arg: Expr) -> Expr:


def trunc(num: Expr, precision: Expr | None = None) -> Expr:
"""Truncate the number toward zero with optional precision."""
"""Truncate the number toward zero with optional precision.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1.567]})
>>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
>>> result.collect_column("t")[0].as_py()
1.0
"""
if precision is not None:
return Expr(f.trunc(num.expr, precision.expr))
return Expr(f.trunc(num.expr))
Expand Down Expand Up @@ -1408,7 +1599,19 @@ def arrow_cast(expr: Expr, data_type: Expr) -> Expr:


def random() -> Expr:
"""Returns a random value in the range ``0.0 <= x < 1.0``."""
"""Returns a random value in the range ``0.0 <= x < 1.0``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
>>> result = df.select(
... dfn.functions.random().alias("r")
... )
>>> val = result.collect_column("r")[0].as_py()
>>> 0.0 <= val < 1.0
True
"""
return Expr(f.random())


Expand Down
Loading