From 05f82f9937fb1b2a29954849381f8b6817effbf0 Mon Sep 17 00:00:00 2001 From: "David W. Dougherty" Date: Thu, 14 May 2026 07:37:18 -0700 Subject: [PATCH 1/2] DEV: (cmds) FT.AGGREGATE - add COLLECT reducer --- content/commands/ft.aggregate.md | 363 ++++++++++++++++-- .../advanced-concepts/aggregations.md | 54 +++ static/images/railroad/ft.aggregate.svg | 318 ++++++++++----- 3 files changed, 618 insertions(+), 117 deletions(-) diff --git a/content/commands/ft.aggregate.md b/content/commands/ft.aggregate.md index fd2f6c16d3..c90ed85b84 100644 --- a/content/commands/ft.aggregate.md +++ b/content/commands/ft.aggregate.md @@ -5,11 +5,14 @@ acl_categories: - '@fast' arguments: - name: index + summary: Specifies the name of the index. The index must be created using `FT.CREATE`. type: string - name: query + summary: Specifies the query to profile and analyze performance. type: string - name: verbatim optional: true + summary: Searches using the exact query terms without stemming or synonym expansion. token: VERBATIM type: pure-token - arguments: @@ -18,12 +21,14 @@ arguments: type: string - multiple: true name: field + summary: Specifies a field in the index schema with its properties. type: string name: load optional: true type: block - name: timeout optional: true + summary: Sets a time limit for query execution, specified in milliseconds. token: TIMEOUT type: integer - name: loadall @@ -38,9 +43,111 @@ arguments: name: property type: string - arguments: - - name: function + - name: reduce token: REDUCE - type: string + type: pure-token + - arguments: + - name: count + token: COUNT + type: pure-token + - name: count_distinct + token: COUNT_DISTINCT + type: pure-token + - name: count_distinctish + token: COUNT_DISTINCTISH + type: pure-token + - name: sum + token: SUM + type: pure-token + - name: min + token: MIN + type: pure-token + - name: max + token: MAX + type: pure-token + - name: avg + token: AVG + type: pure-token + - name: stddev + token: STDDEV + type: pure-token + - name: quantile + token: QUANTILE + type: pure-token + - name: tolist + token: TOLIST + type: pure-token + - name: first_value + token: FIRST_VALUE + type: pure-token + - name: random_sample + token: RANDOM_SAMPLE + type: pure-token + - arguments: + - name: collect_token + token: COLLECT + type: pure-token + - arguments: + - name: fields_token + token: FIELDS + type: pure-token + - arguments: + - name: all + token: '*' + type: pure-token + - arguments: + - name: num_fields + type: integer + - multiple: true + name: field + type: string + name: explicit + type: block + name: fields_spec + type: oneof + name: fields + type: block + - arguments: + - name: sortby_token + token: SORTBY + type: pure-token + - name: nargs + type: integer + - arguments: + - name: field + type: string + - arguments: + - name: asc + token: ASC + type: pure-token + - name: desc + token: DESC + type: pure-token + name: order + optional: true + type: oneof + multiple: true + name: key + type: block + name: sortby + optional: true + type: block + - arguments: + - name: limit_token + token: LIMIT + type: pure-token + - name: offset + type: integer + - name: count + type: integer + name: limit + optional: true + type: block + name: collect + since: 8.8.0 + type: block + name: function + type: oneof - name: nargs type: integer - multiple: true @@ -53,10 +160,12 @@ arguments: multiple: true name: reduce optional: true + summary: Applies a reducer function, like `SUM` or `COUNT`, on grouped results. type: block multiple: true name: groupby optional: true + summary: Groups results by specified fields, often used for aggregations. type: block - arguments: - name: nargs @@ -86,9 +195,203 @@ arguments: optional: true type: block - arguments: - - name: expression + - arguments: + - arguments: + - token: s + name: exists + summary: Checks whether a field exists in a document. + token: exists + type: function + - arguments: + - token: x + name: log + summary: Return the logarithm of a number, property or subexpression + token: log + type: function + - arguments: + - token: x + name: abs + summary: Return the absolute value of a numeric expression + token: abs + type: function + - arguments: + - token: x + name: ceil + summary: Round to the smallest integer not less than x + token: ceil + type: function + - arguments: + - token: x + name: floor + summary: Round to largest integer not greater than x + token: floor + type: function + - arguments: + - token: x + name: log2 + summary: Return the logarithm of x to base 2 + token: log2 + type: function + - arguments: + - token: x + name: exp + summary: Return the exponent of x, e.g., e^x + token: exp + type: function + - arguments: + - token: x + name: sqrt + summary: Return the square root of x + token: sqrt + type: function + - arguments: + - token: s + name: upper + summary: Return the uppercase conversion of s + token: upper + type: function + - arguments: + - token: s + name: lower + summary: Return the lowercase conversion of s + token: lower + type: function + - arguments: + - token: s1 + - token: s2 + name: startswith + summary: Return 1 if s2 is the prefix of s1, 0 otherwise. + token: startswith + type: function + - arguments: + - token: s1 + - token: s2 + name: contains + summary: Return the number of occurrences of s2 in s1, 0 otherwise. If s2 is + an empty string, return length(s1) + 1. + token: contains + type: function + - arguments: + - token: s + name: strlen + summary: Return the length of s + token: strlen + type: function + - arguments: + - token: s + - token: offset + - token: count + name: substr + summary: Return the substring of s, starting at offset and having count characters. + If offset is negative, it represents the distance from the end of the string. + If count is -1, it means "the rest of the string starting at offset". + token: substr + type: function + - arguments: + - token: fmt + name: format + summary: Use the arguments following fmt to format a string. Currently the only + format argument supported is %s and it applies to all types of arguments. + token: format + type: function + - arguments: + - optional: true + token: max_terms=100 + name: matched_terms + summary: Return the query terms that matched for each record (up to 100), as + a list. If a limit is specified, Redis will return the first N matches found, + based on query order. + token: matched_terms + type: function + - arguments: + - token: s + name: split + summary: Split a string by any character in the string sep, and strip any characters + in strip. If only s is specified, it is split by commas and spaces are stripped. + The output is an array. + token: split + type: function + - arguments: + - token: x + - optional: true + token: fmt + name: timefmt + summary: Return a formatted time string based on a numeric timestamp value x. + token: timefmt + type: function + - arguments: + - token: timesharing + - optional: true + token: fmt + name: parsetime + summary: The opposite of timefmt() - parse a time format using a given format + string + token: parsetime + type: function + - arguments: + - token: timestamp + name: day + summary: Round a Unix timestamp to midnight (00:00) start of the current day. + token: day + type: function + - arguments: + - token: timestamp + name: hour + summary: Round a Unix timestamp to the beginning of the current hour. + token: hour + type: function + - arguments: + - token: timestamp + name: minute + summary: Round a Unix timestamp to the beginning of the current minute. + token: minute + type: function + - arguments: + - token: timestamp + name: month + summary: Round a unix timestamp to the beginning of the current month. + token: month + type: function + - arguments: + - token: timestamp + name: dayofweek + summary: Convert a Unix timestamp to the day number (Sunday = 0). + token: dayofweek + type: function + - arguments: + - token: timestamp + name: dayofmonth + summary: Convert a Unix timestamp to the day of month number (1 .. 31). + token: dayofmonth + type: function + - arguments: + - token: timestamp + name: dayofyear + summary: Convert a Unix timestamp to the day of year number (0 .. 365). + token: dayofyear + type: function + - arguments: + - token: timestamp + name: year + summary: Convert a Unix timestamp to the current year (e.g. 2018). + token: year + type: function + - arguments: + - token: timestamp + name: monthofyear + summary: Convert a Unix timestamp to the current month (0 .. 11). + token: monthofyear + type: function + - arguments: + - token: '' + name: geodistance + summary: Return distance in meters. + token: geodistance + type: function + expression: true + name: expression token: APPLY - type: string + type: block - name: name token: AS type: string @@ -107,8 +410,11 @@ arguments: name: limit optional: true type: block -- name: filter +- expression: true + name: filter optional: true + summary: Applies a numeric range filter to restrict results to documents with field + values within the specified range. token: FILTER type: string - arguments: @@ -146,6 +452,7 @@ arguments: - name: dialect optional: true since: 2.4.3 + summary: Sets the query dialect version to be used. token: DIALECT type: integer categories: @@ -174,22 +481,12 @@ summary: Run a search query on an index and perform aggregate transformations on results syntax: "FT.AGGREGATE index query \n [VERBATIM] \n [LOAD count field [field ...]]\ \ \n [TIMEOUT timeout] \n [GROUPBY nargs property [property ...] [REDUCE function\ - \ nargs arg [arg ...] [AS name] [REDUCE function nargs arg [arg ...] [AS name]\ - \ ...]] ...]] \n [SORTBY nargs [property ASC | DESC [property ASC | DESC ...]]\ - \ [MAX num] [WITHCOUNT | WITHOUTCOUNT]] \n [APPLY expression AS name [APPLY expression AS name\ - \ ...]] \n [LIMIT offset num] \n [FILTER filter] \n [WITHCURSOR [COUNT read_size]\ - \ [MAXIDLE idle_time]] \n [PARAMS nargs name value [name value ...]] \n [SCORER scorer]\n - \ [ADDSCORES] \n [DIALECT\ - \ dialect]\n" -syntax_fmt: "FT.AGGREGATE index query [VERBATIM] [LOAD\_count field [field ...]]\n\ - \ [TIMEOUT\_timeout] [LOAD *] [GROUPBY\_nargs property [property ...]\n [REDUCE\_\ - function nargs arg [arg ...] [AS\_name] [REDUCE\_function\n nargs arg [arg ...]\ - \ [AS\_name] ...]] [GROUPBY\_nargs property\n [property ...] [REDUCE\_function\ - \ nargs arg [arg ...] [AS\_name]\n [REDUCE\_function nargs arg [arg ...] [AS\_\ - name] ...]] ...]]\n [SORTBY\_nargs [property [property \ - \ ...]]\n [MAX\_num]] [APPLY\_expression AS\_name [APPLY\_expression AS\_name\n\ - \ ...]] [LIMIT offset num] [FILTER\_filter] [WITHCURSOR\n [COUNT\_read_size] [MAXIDLE\_\ - idle_time]] [PARAMS nargs name value\n [name value ...]] [DIALECT\_dialect]" + \ nargs arg [arg ...] [AS name] [REDUCE function nargs arg [arg ...] [AS name] ...]]\ + \ ...]] \n [SORTBY nargs [property ASC | DESC [property ASC | DESC ...]] [MAX num]\ + \ [WITHCOUNT | WITHOUTCOUNT]] \n [APPLY expression AS name [APPLY expression AS\ + \ name ...]] \n [LIMIT offset num] \n [FILTER filter] \n [WITHCURSOR [COUNT read_size]\ + \ [MAXIDLE idle_time]] \n [PARAMS nargs name value [name value ...]] \n [SCORER\ + \ scorer]\n [ADDSCORES] \n [DIALECT dialect]\n" title: FT.AGGREGATE --- @@ -238,13 +535,14 @@ Attributes needed for aggregations should be stored as `SORTABLE`, where they ar groups the results in the pipeline based on one or more properties. Each group should have at least one _reducer_, a function that handles the group entries, either counting them, or performing multiple aggregate operations (see below). +
REDUCE {func} {nargs} {arg} … [AS {name}] reduces the matching results in each group into a single record, using a reduction function. For example, `COUNT` counts the number of records in the group. The reducers can have their own property names using the `AS {name}` optional argument. If a name is not given, the resulting name will be the name of the reduce function and the group properties. For example, if a name is not given to `COUNT_DISTINCT` by property `@foo`, the resulting name will be `count_distinct(@foo)`. -See [Supported GROUPBY reducers]({{< relref "develop/ai/search-and-query/advanced-concepts/aggregations#supported-groupby-reducers" >}}) for more details. +See [Supported GROUPBY reducers]({{< relref "develop/ai/search-and-query/advanced-concepts/aggregations#supported-groupby-reducers" >}}) for more details.
@@ -491,6 +789,27 @@ APPLY case(@is_pending == 0 && @priority == "high", 3,4) AS status_completed
+
+Collect top documents per group + +Group movies by genre and collect the top 5 movies per group, sorted by rating in descending order. Return the top 50 groups. + +{{< highlight bash >}} +FT.AGGREGATE idx:movies "*" + LOAD * + GROUPBY 1 @genre + REDUCE COLLECT 10 FIELDS 1 * SORTBY 2 @rating DESC LIMIT 0 5 AS top_movies + SORTBY 1 @genre LIMIT 0 50 +{{< /highlight >}} + +In this example: +- All document fields are fetched via `*`. +- Duplicates are retained (default behavior, no `DISTINCT` flag). +- Documents within each group are sorted by `@rating` descending. +- Only the top 5 documents per group are returned. +- Only the top 50 groups are returned. +
+ ## Redis Software and Redis Cloud compatibility | Redis
Software | Redis Cloud
Flexible & Annual | Redis Cloud
Free & Fixed | Notes | diff --git a/content/develop/ai/search-and-query/advanced-concepts/aggregations.md b/content/develop/ai/search-and-query/advanced-concepts/aggregations.md index 0e5e79e293..8d7758ce43 100644 --- a/content/develop/ai/search-and-query/advanced-concepts/aggregations.md +++ b/content/develop/ai/search-and-query/advanced-concepts/aggregations.md @@ -354,6 +354,60 @@ REDUCE RANDOM_SAMPLE {nargs} {property} {sample_size} Perform a reservoir sampling of the group elements with a given size, and return an array of the sampled items with an even distribution. +#### COLLECT + +{{< note >}} +The `COLLECT` reducer was introduced in version 8.8.0. +{{< /note >}} + +**Format** + +``` +REDUCE COLLECT {nargs} + FIELDS ( * | {num_fields} {field_1} [{field_2} ...] ) + [DISTINCT] + [SORTBY {nargs} {@field} [ASC|DESC] [{@field} [ASC|DESC] ...]] + [LIMIT {offset} {count}] + [AS {alias}] +``` + +**Description** + +Fetch complete documents or project specific fields directly within an aggregation query, with optional sorting, limiting, and deduplication per group. + +- `FIELDS *` returns all document fields available at the current pipeline stage. This is stage-local: it does not trigger an implicit load and does not reach back to fields from earlier stages that were not preserved as grouping keys or reducer aliases. +- `FIELDS {num} {field_1} ...` projects a specific list of fields. The special fields `@__key` (document key) and `@__score` (document score) are supported as projectable fields alongside document fields. +- Fields missing from a document are omitted from that entry (sparse output, with no NULL placeholders). +- `DISTINCT` (optional) deduplicates entries with identical projected fields, keeping the best representative according to the sort keys. When `DISTINCT` is omitted, all documents are retained including duplicates. +- `SORTBY` (optional) sorts documents within each group by one or more fields with `ASC`/`DESC`. +- `LIMIT` (optional) limits the number of documents returned per group. + +Each collected entry is returned as a key-value map (matching the standard `FT.AGGREGATE` result format). The `COLLECT` output is an array of such maps nested under the alias. For example: + +``` +"top_fruits" => + 1) "__key" => "doc_10" + "__score" => "0.95" + "fruit" => "apple" + "color" => "yellow" + "sweetness"=> "6" + 2) "__key" => "doc_1" + "__score" => "0.82" + "fruit" => "banana" + "color" => "yellow" + "sweetness"=> "5" +``` + +For example, to group movies by genre and collect the top 5 movies per group, sorted by rating in descending order: + +``` +FT.AGGREGATE idx:movies "*" + LOAD * + GROUPBY 1 @genre + REDUCE COLLECT 10 FIELDS 1 * SORTBY 2 @rating DESC LIMIT 0 5 AS top_movies + SORTBY 1 @genre LIMIT 0 50 +``` + ## APPLY expressions `APPLY` performs a one-to-one transformation on one or more properties in each record. It either stores the result as a new property down the pipeline, or replaces any property using this transformation. diff --git a/static/images/railroad/ft.aggregate.svg b/static/images/railroad/ft.aggregate.svg index 73e02e3db3..82305faa0c 100644 --- a/static/images/railroad/ft.aggregate.svg +++ b/static/images/railroad/ft.aggregate.svg @@ -1,4 +1,4 @@ - + - + FT.AGGREGATE index query @@ -68,102 +68,230 @@ circle { fill: #DC382D !important; stroke: #DC382D !important; } LOAD * - - - - + + + + GROUPBY nargs property - - - - - -REDUCE -function -nargs - -arg - - - - -AS -name - - - - - - -SORTBY -nargs - - - - -property - -ASC -DESC - - - - -MAX -num - - - - - -APPLY -expression - -AS -name - - - - -LIMIT -offset -num - - - -FILTER -filter - - - -WITHCURSOR - - - -COUNT -read_size - - - -MAXIDLE -idle_time - - - -PARAMS -nargs - - -name -value - - - - -DIALECT -dialect \ No newline at end of file + + + + +REDUCE + +COUNT +COUNT_DISTINCT +COUNT_DISTINCTISH +SUM +MIN +MAX +AVG +STDDEV +QUANTILE +TOLIST +FIRST_VALUE +RANDOM_SAMPLE + +COLLECT + +FIELDS + +* + +num_fields + +field + + + + +SORTBY +nargs + + +field + + + +ASC +DESC + + + + +LIMIT +offset +count +nargs + +arg + + + + +AS +name + + + + + + +SORTBY +nargs + + + + +property + +ASC +DESC + + + + +MAX +num + + + + + +APPLY + +exists +exists + +log +log + +abs +abs + +ceil +ceil + +floor +floor + +log2 +log2 + +exp +exp + +sqrt +sqrt + +upper +upper + +lower +lower + +startswith +startswith + +contains +contains + +strlen +strlen + +substr +substr + +format +format + +matched_terms +matched_terms + +split +split + +timefmt +timefmt + +parsetime +parsetime + +day +day + +hour +hour + +minute +minute + +month +month + +dayofweek +dayofweek + +dayofmonth +dayofmonth + +dayofyear +dayofyear + +year +year + +monthofyear +monthofyear + +geodistance +geodistance + +AS +name + + + + +LIMIT +offset +num + + + +FILTER +filter + + + +WITHCURSOR + + + +COUNT +read_size + + + +MAXIDLE +idle_time + + + +PARAMS +nargs + + +name +value + + + + +DIALECT +dialect \ No newline at end of file From ffd9ebd56d0330b839b1320eebde8284953393a0 Mon Sep 17 00:00:00 2001 From: "David W. Dougherty" Date: Mon, 18 May 2026 07:23:12 -0700 Subject: [PATCH 2/2] Apply review suggestions --- content/commands/ft.aggregate.md | 7 ++++--- .../ai/search-and-query/advanced-concepts/aggregations.md | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/content/commands/ft.aggregate.md b/content/commands/ft.aggregate.md index c90ed85b84..07c87c7d8a 100644 --- a/content/commands/ft.aggregate.md +++ b/content/commands/ft.aggregate.md @@ -796,14 +796,15 @@ Group movies by genre and collect the top 5 movies per group, sorted by rating i {{< highlight bash >}} FT.AGGREGATE idx:movies "*" - LOAD * + LOAD 4 @genre @title @rating @year GROUPBY 1 @genre - REDUCE COLLECT 10 FIELDS 1 * SORTBY 2 @rating DESC LIMIT 0 5 AS top_movies + REDUCE COLLECT 12 FIELDS 3 @title @rating @year SORTBY 2 @rating DESC LIMIT 0 5 AS top_movies SORTBY 1 @genre LIMIT 0 50 {{< /highlight >}} In this example: -- All document fields are fetched via `*`. +- `LOAD` brings `@genre`, `@title`, `@rating`, and `@year` into the pipeline. `COLLECT` projects from fields already available in the pipeline; it does not trigger an implicit document load. Using `FIELDS *` in place of the explicit field list would return only the fields the pipeline already has, as controlled by `LOAD` and any field-generating operations such as `APPLY`. +- `FIELDS 3 @title @rating @year` projects those three fields into each collected entry. - Duplicates are retained (default behavior, no `DISTINCT` flag). - Documents within each group are sorted by `@rating` descending. - Only the top 5 documents per group are returned. diff --git a/content/develop/ai/search-and-query/advanced-concepts/aggregations.md b/content/develop/ai/search-and-query/advanced-concepts/aggregations.md index 8d7758ce43..fd0c84ee1d 100644 --- a/content/develop/ai/search-and-query/advanced-concepts/aggregations.md +++ b/content/develop/ai/search-and-query/advanced-concepts/aggregations.md @@ -373,7 +373,7 @@ REDUCE COLLECT {nargs} **Description** -Fetch complete documents or project specific fields directly within an aggregation query, with optional sorting, limiting, and deduplication per group. +Collect documents within each group as an array of key-value maps, projecting either all fields currently available in the pipeline or an explicit list of fields, with optional sorting, limiting, and deduplication. - `FIELDS *` returns all document fields available at the current pipeline stage. This is stage-local: it does not trigger an implicit load and does not reach back to fields from earlier stages that were not preserved as grouping keys or reducer aliases. - `FIELDS {num} {field_1} ...` projects a specific list of fields. The special fields `@__key` (document key) and `@__score` (document score) are supported as projectable fields alongside document fields.