Skip to content

Commit c3958f2

Browse files
authored
feat (database): first and last formulas (baserow#5066)
* feat: add first(), last() and generalized index() formula functions * fix: address PR review feedback and handle NaN index argument - Rename _BaserowIndexShortcut to BaserowIndexShortcut (drop leading _) - Remove unnecessary comments and section separators - Rename variable c to clone in JSONBArrayGetElement - Rename test to test_first_and_last_return_scalar_values, drop docstring - Remove step comments in test_index_generalized.py - Handle NaN/div-by-zero as index() argument (returns null instead of crash) * feat: update index() formula examples to include array fields
1 parent d9acf4f commit c3958f2

13 files changed

Lines changed: 1182 additions & 25 deletions

File tree

backend/src/baserow/contrib/database/formula/ast/function_defs.py

Lines changed: 132 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
BaserowExpressionContext,
7777
BaserowFunctionCall,
7878
BaserowIntegerLiteral,
79+
BaserowStringLiteral,
7980
)
8081
from baserow.contrib.database.formula.expression_generator.django_expressions import (
8182
AndExpr,
@@ -84,6 +85,7 @@
8485
GreaterThanExpr,
8586
GreaterThanOrEqualExpr,
8687
IsNullExpr,
88+
JSONBArrayGetElement,
8789
JSONBArrayJoinValues,
8890
JSONBArraySlice,
8991
JSONBArrayUniqueByValue,
@@ -265,6 +267,8 @@ def register_formula_functions(registry):
265267
registry.register(BaserowArrayLength())
266268
registry.register(BaserowArrayJoinValues())
267269
registry.register(BaserowArraySlice())
270+
registry.register(BaserowFirst())
271+
registry.register(BaserowLast())
268272
# ManyToMany functions
269273
registry.register(BaserowStringAggManyToManyValues())
270274
registry.register(BaserowManyToManyCount())
@@ -2607,6 +2611,45 @@ def to_django_expression(
26072611
)
26082612

26092613

2614+
class BaserowIndexShortcut(OneArgumentBaserowFunction):
2615+
arg_type = [BaserowFormulaValidType]
2616+
_index: int
2617+
2618+
def type_function(
2619+
self,
2620+
func_call: BaserowFunctionCall[UnTyped],
2621+
arg: BaserowExpression[BaserowFormulaValidType],
2622+
) -> BaserowExpression[BaserowFormulaType]:
2623+
if arg.many:
2624+
arg = arg.expression_type.collapse_many(arg)
2625+
2626+
if not isinstance(arg.expression_type, BaserowFormulaArrayType):
2627+
return func_call.with_invalid_type(f"{self.type} requires an array input.")
2628+
2629+
from baserow.contrib.database.formula.registries import (
2630+
formula_function_registry,
2631+
)
2632+
2633+
num_type = BaserowFormulaNumberType(0)
2634+
index_func = formula_function_registry.get("index")
2635+
return index_func.call_and_type_with_args(
2636+
[arg, BaserowIntegerLiteral(self._index, num_type)]
2637+
)
2638+
2639+
def to_django_expression(self, arg: Expression) -> Expression:
2640+
raise NotImplementedError("type_function delegates to index")
2641+
2642+
2643+
class BaserowFirst(BaserowIndexShortcut):
2644+
type = "first"
2645+
_index = 0
2646+
2647+
2648+
class BaserowLast(BaserowIndexShortcut):
2649+
type = "last"
2650+
_index = -1
2651+
2652+
26102653
class BaserowArrayLength(OneArgumentBaserowFunction):
26112654
type = "array_length"
26122655
arg_type = [BaserowFormulaArrayType]
@@ -2979,36 +3022,105 @@ def to_django_expression(self, arg: Expression) -> Expression:
29793022
)
29803023

29813024

2982-
class BaserowIndex(TwoArgumentBaserowFunction):
2983-
arg1_type = [BaserowFormulaArrayType]
2984-
arg2_type = [BaserowFormulaNumberType]
3025+
def _index_output_field(mode):
3026+
"""Return a fresh Django output_field for the given extraction mode."""
3027+
3028+
from baserow.contrib.database.formula.types.formula_types import (
3029+
_lookup_formula_type_from_string,
3030+
)
3031+
3032+
try:
3033+
return _lookup_formula_type_from_string(mode).output_field_class()
3034+
except Exception:
3035+
return fields.TextField()
3036+
3037+
3038+
def _unwrap_literal_value(django_expr):
3039+
"""
3040+
Extract the Python value from a Django expression that wraps a
3041+
``Value(...)`` — e.g. ``Cast(Value('x'), TextField())``.
3042+
"""
3043+
3044+
while not hasattr(django_expr, "value"):
3045+
if (
3046+
hasattr(django_expr, "source_expressions")
3047+
and django_expr.source_expressions
3048+
):
3049+
django_expr = django_expr.source_expressions[0]
3050+
else:
3051+
return None
3052+
return django_expr.value
3053+
29853054

3055+
class BaserowIndex(BaserowFunctionDefinition):
29863056
type = "index"
3057+
num_args = NumOfArgsBetween(2, 4)
29873058

2988-
def type_function(
3059+
@property
3060+
def arg_types(self) -> BaserowArgumentTypeChecker:
3061+
def type_checker(arg_index, arg_types):
3062+
if arg_index == 0:
3063+
return [BaserowFormulaValidType]
3064+
elif arg_index == 1:
3065+
return [BaserowFormulaNumberType]
3066+
else:
3067+
return [BaserowFormulaTextType] # mode + sql literals
3068+
3069+
return type_checker
3070+
3071+
def type_function_given_valid_args(
29893072
self,
3073+
args: List[BaserowExpression[BaserowFormulaValidType]],
29903074
func_call: BaserowFunctionCall[UnTyped],
2991-
arg1: BaserowExpression[BaserowFormulaValidType],
2992-
arg2: BaserowExpression[BaserowFormulaValidType],
29933075
) -> BaserowExpression[BaserowFormulaType]:
2994-
if not isinstance(arg1.expression_type.sub_type, BaserowFormulaSingleFileType):
3076+
if len(args) not in (2, 4):
29953077
return func_call.with_invalid_type(
2996-
"index only currently supports indexing file fields."
2997-
)
2998-
else:
2999-
if arg1.many:
3000-
arg1 = arg1.expression_type.collapse_many(arg1)
3001-
return func_call.with_args([arg1, arg2]).with_valid_type(
3002-
arg1.expression_type.sub_type
3078+
"index requires exactly 2 arguments: an array and an index."
30033079
)
30043080

3005-
def to_django_expression(self, arg1: Expression, arg2: Expression) -> Expression:
3006-
return Func(
3007-
arg1,
3008-
Cast(arg2, fields.TextField()),
3009-
function="jsonb_extract_path",
3010-
output_field=JSONField(),
3081+
arg1, arg2 = args[0], args[1]
3082+
3083+
if arg1.many:
3084+
arg1 = arg1.expression_type.collapse_many(arg1)
3085+
3086+
if not isinstance(arg1.expression_type, BaserowFormulaArrayType):
3087+
return func_call.with_invalid_type("index requires an array input.")
3088+
3089+
sub_type = arg1.expression_type.sub_type
3090+
3091+
if len(args) == 4:
3092+
return func_call.with_args(list(args)).with_valid_type(sub_type)
3093+
3094+
mode_literal = BaserowStringLiteral(
3095+
sub_type.array_index_mode, BaserowFormulaTextType()
30113096
)
3097+
sql_literal = BaserowStringLiteral(
3098+
sub_type.array_index_sql, BaserowFormulaTextType()
3099+
)
3100+
3101+
return func_call.with_args(
3102+
[arg1, arg2, mode_literal, sql_literal]
3103+
).with_valid_type(sub_type)
3104+
3105+
def to_django_expression_given_args(
3106+
self,
3107+
args: List["WrappedExpressionWithMetadata"],
3108+
context: BaserowExpressionContext,
3109+
) -> "WrappedExpressionWithMetadata":
3110+
mode = _unwrap_literal_value(args[2].expression) or "text"
3111+
value_sql = _unwrap_literal_value(args[3].expression) or "{elem} ->> 'value'"
3112+
safe_index = handle_arg_being_nan(
3113+
args[1].expression,
3114+
Value(None, output_field=fields.IntegerField()),
3115+
args[1].expression,
3116+
)
3117+
expr = JSONBArrayGetElement(
3118+
args[0].expression,
3119+
safe_index,
3120+
value_sql,
3121+
_index_output_field(mode),
3122+
)
3123+
return WrappedExpressionWithMetadata.from_args(expr, args)
30123124

30133125

30143126
class BaserowJsonbExtractPathText(BaserowFunctionDefinition):

backend/src/baserow/contrib/database/formula/expression_generator/django_expressions.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,47 @@ def as_sql(self, compiler, connection, **extra_context):
163163
return sql, (*separator_params, *params)
164164

165165

166+
class JSONBArrayGetElement(Expression):
167+
"""
168+
Extract a single element from a JSONB array by 0-based index (negative
169+
counts from end) and optionally unwrap / cast the ``value`` key.
170+
171+
*value_sql* is a SQL template with an ``{elem}`` placeholder that controls
172+
how the element is extracted (e.g. ``({elem} ->> 'value')::numeric``).
173+
Each formula type provides its own template via ``array_index_sql``.
174+
175+
PostgreSQL's ``->`` operator natively handles negative indices and returns
176+
NULL for out-of-bounds, so no CASE expression is needed.
177+
"""
178+
179+
def __init__(self, array_expr, index_expr, value_sql, output_field):
180+
super().__init__(output_field=output_field)
181+
self.array_expr = array_expr
182+
self.index_expr = index_expr
183+
self.value_sql = value_sql
184+
185+
def resolve_expression(
186+
self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False
187+
):
188+
clone = self.copy()
189+
clone.is_summary = summarize
190+
clone.array_expr = self.array_expr.resolve_expression(
191+
query, allow_joins, reuse, summarize, for_save
192+
)
193+
clone.index_expr = self.index_expr.resolve_expression(
194+
query, allow_joins, reuse, summarize, for_save
195+
)
196+
return clone
197+
198+
def as_sql(self, compiler, connection):
199+
arr_sql, arr_params = compiler.compile(self.array_expr)
200+
idx_sql, idx_params = compiler.compile(self.index_expr)
201+
202+
elem_sql = f"({arr_sql}) -> ({idx_sql})::int"
203+
sql = f"({self.value_sql.format(elem=elem_sql)})"
204+
return sql, list(arr_params) + list(idx_params)
205+
206+
166207
class JSONBArraySlice(Expression):
167208
"""
168209
Slice a JSONB array with offset, limit, and optional reverse.

backend/src/baserow/contrib/database/formula/types/formula_type.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import abc
22
from typing import TYPE_CHECKING, List, Type, TypeVar
33

4+
from django.db import models
45
from django.db.models import Expression, F, Model, Value
56
from django.utils.functional import classproperty
67

@@ -285,6 +286,18 @@ def can_represent_collaborators(self) -> bool:
285286
def item_is_in_nested_value_object_when_in_array(self) -> bool:
286287
return True
287288

289+
@property
290+
def array_index_mode(self) -> str:
291+
return self.type
292+
293+
@property
294+
def array_index_sql(self) -> str:
295+
if not self.item_is_in_nested_value_object_when_in_array:
296+
return "{elem}"
297+
return "{elem} ->> 'value'"
298+
299+
output_field_class = models.TextField
300+
288301
@property
289302
def can_have_db_index(self) -> bool:
290303
return False

backend/src/baserow/contrib/database/formula/types/formula_types.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080

8181

8282
class BaserowJSONBObjectBaseType(BaserowFormulaValidType, ABC):
83+
array_index_sql = "{elem} -> 'value'"
84+
output_field_class = JSONField
85+
8386
def parse_filter_value(self, field, model_field, value):
8487
"""
8588
Since the subclasses don't have a baserow_field_type or data might be stored
@@ -361,6 +364,8 @@ class BaserowFormulaNumberType(
361364
):
362365
type = "number"
363366
baserow_field_type = "number"
367+
array_index_sql = "({elem} ->> 'value')::numeric"
368+
output_field_class = models.DecimalField
364369
user_overridable_formatting_option_fields = [
365370
"number_decimal_places",
366371
"number_prefix",
@@ -516,6 +521,8 @@ class BaserowFormulaBooleanType(
516521
):
517522
type = "boolean"
518523
baserow_field_type = "boolean"
524+
array_index_sql = "({elem} ->> 'value')::boolean"
525+
output_field_class = models.BooleanField
519526
can_order_by_in_array = True
520527
can_group_by = True
521528
can_have_db_index = True
@@ -715,6 +722,8 @@ class BaserowFormulaDurationType(
715722
):
716723
type = "duration"
717724
baserow_field_type = "duration"
725+
array_index_sql = "({elem} ->> 'value')::interval"
726+
output_field_class = models.DurationField
718727
user_overridable_formatting_option_fields = ["duration_format"]
719728
can_group_by = True
720729
can_order_by_in_array = True
@@ -856,6 +865,7 @@ class BaserowFormulaDateType(
856865
can_order_by_in_array = True
857866
can_group_by = True
858867
can_have_db_index = True
868+
output_field_class = models.DateTimeField
859869

860870
def __init__(
861871
self,
@@ -873,6 +883,11 @@ def __init__(
873883
self.date_show_tzinfo = date_show_tzinfo
874884
self.date_force_timezone = date_force_timezone
875885

886+
@property
887+
def array_index_sql(self) -> str:
888+
cast = "::timestamptz" if self.date_include_time else "::date"
889+
return f"({{elem}} ->> 'value'){cast}"
890+
876891
@property
877892
def comparable_types(self) -> List[Type["BaserowFormulaValidType"]]:
878893
return [
@@ -1003,6 +1018,7 @@ class BaserowFormulaSingleFileType(
10031018
can_order_by_in_array = False
10041019
baserow_field_type = None
10051020
item_is_in_nested_value_object_when_in_array = False
1021+
array_index_sql = "{elem}"
10061022
can_represent_files = True
10071023

10081024
def is_searchable(self, field):

backend/tests/baserow/contrib/automation/history/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@ def assert_history(
66
):
77
"""Helper to test AutomationWorkflowHistory objects."""
88

9-
histories = list(AutomationWorkflowHistory.objects.filter(workflow=workflow))
9+
histories = list(
10+
AutomationWorkflowHistory.objects.filter(workflow=workflow).order_by(
11+
"started_on", "id"
12+
)
13+
)
1014
assert len(histories) == expected_count
1115
if expected_count > 0:
1216
history = histories[history_index]

backend/tests/baserow/contrib/database/formula/test_baserow_formula_results.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3063,6 +3063,43 @@ def test_array_slice_empty_array(data_fixture):
30633063
assert getattr(result, ref_slice_field.db_column) == []
30643064

30653065

3066+
@pytest.mark.django_db
3067+
def test_first_and_last_return_scalar_values(data_fixture):
3068+
user = data_fixture.create_user()
3069+
table_a, table_b, link_field = data_fixture.create_two_linked_tables(user=user)
3070+
text_field, b_rows, row_a1 = _setup_text_5_rows(
3071+
data_fixture, table_a, table_b, link_field, user
3072+
)
3073+
3074+
lookup_field = FieldHandler().create_field(
3075+
user,
3076+
table_a,
3077+
"formula",
3078+
name="lookup",
3079+
formula=f"lookup('{link_field.name}', '{text_field.name}')",
3080+
)
3081+
first_field = FieldHandler().create_field(
3082+
user,
3083+
table_a,
3084+
"formula",
3085+
name="first_val",
3086+
formula="first(field('lookup'))",
3087+
)
3088+
last_field = FieldHandler().create_field(
3089+
user,
3090+
table_a,
3091+
"formula",
3092+
name="last_val",
3093+
formula="last(field('lookup'))",
3094+
)
3095+
3096+
table_a_model = table_a.get_model()
3097+
result = table_a_model.objects.get(id=row_a1.id)
3098+
3099+
assert getattr(result, first_field.db_column) == "A"
3100+
assert getattr(result, last_field.db_column) == "E"
3101+
3102+
30663103
@pytest.mark.django_db
30673104
def test_array_slice_rejects_non_array_input(data_fixture):
30683105
user = data_fixture.create_user()

0 commit comments

Comments
 (0)