diff --git a/Makefile b/Makefile index 394951ca0..9d2e05e8e 100644 --- a/Makefile +++ b/Makefile @@ -111,6 +111,7 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ + pattern_expression \ map_projection \ direct_field_access \ security @@ -150,7 +151,19 @@ src/include/parser/cypher_kwlist_d.h: src/include/parser/cypher_kwlist.h $(GEN_K src/include/parser/cypher_gram_def.h: src/backend/parser/cypher_gram.c -src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h +# +# The Cypher grammar uses GLR mode with a number of inherent shift/reduce +# and reduce/reduce conflicts arising from the ambiguity between +# parenthesized expressions and graph patterns (both start with '('). +# GLR handles these correctly at runtime by forking at the conflict +# point; %dprec annotations resolve cases where both forks succeed. +# +# We suppress the conflict warnings rather than hard-coding a conflict +# budget with %expect / %expect-rr, because the exact counts vary across +# Bison versions and would otherwise make the build fragile across +# distros and future Bison releases. +# +src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -Wno-conflicts-sr -Wno-conflicts-rr src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h diff --git a/regress/expected/pattern_expression.out b/regress/expected/pattern_expression.out new file mode 100644 index 000000000..1789b5b80 --- /dev/null +++ b/regress/expected/pattern_expression.out @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('pattern_expr'); +NOTICE: graph "pattern_expr" has been created + create_graph +-------------- + +(1 row) + +-- +-- Setup test data +-- +SELECT * FROM cypher('pattern_expr', $$ + CREATE (alice:Person {name: 'Alice'})-[:KNOWS]->(bob:Person {name: 'Bob'}), + (alice)-[:WORKS_WITH]->(charlie:Person {name: 'Charlie'}), + (dave:Person {name: 'Dave'}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- +-- Basic pattern expression in WHERE +-- +-- Bare pattern: (a)-[:REL]->(b) +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- NOT pattern expression +-- +-- Find people who don't KNOW anyone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE NOT (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +----------- + "Bob" + "Charlie" + "Dave" +(3 rows) + +-- +-- Pattern with labeled first node +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a:Person)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- Pattern combined with AND +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) AND a.name = 'Alice' + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- Pattern combined with OR +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) OR (a)-[:WORKS_WITH]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" +(2 rows) + +-- +-- Left-directed pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)<-[:KNOWS]-(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + a | b +-------+--------- + "Bob" | "Alice" +(1 row) + +-- +-- Pattern with anonymous nodes +-- +-- Find anyone who has any outgoing KNOWS relationship +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->() + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Multiple relationship pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (c:Person) + WHERE (a)-[:KNOWS]->()-[:WORKS_WITH]->(c) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + a | c +---+--- +(0 rows) + +-- +-- Existing EXISTS() syntax still works (backward compatibility) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE EXISTS((a)-[:KNOWS]->(b)) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- Pattern expression produces same results as EXISTS() +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE EXISTS((a)-[:KNOWS]->(:Person)) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Regular (non-pattern) expressions still work (no regression) +-- +SELECT * FROM cypher('pattern_expr', $$ + RETURN (1 + 2) +$$) AS (result agtype); + result +-------- + 3 +(1 row) + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (n:Person) + WHERE n.name = 'Alice' + RETURN (n.name) +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Pattern expressions in RETURN (boolean projection) +-- +-- Each person gets a column showing whether they know someone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person) AS knows_someone + ORDER BY a.name +$$) AS (name agtype, knows_someone agtype); + name | knows_someone +-----------+--------------- + "Alice" | true + "Bob" | false + "Charlie" | false + "Dave" | false +(4 rows) + +-- Mix pattern expression with other projections +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person), (a)-[:WORKS_WITH]->(:Person) + ORDER BY a.name +$$) AS (name agtype, knows agtype, works_with agtype); + name | knows | works_with +-----------+-------+------------ + "Alice" | true | true + "Bob" | false | false + "Charlie" | false | false + "Dave" | false | false +(4 rows) + +-- +-- Pattern expressions in CASE WHEN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + CASE WHEN (a)-[:KNOWS]->(:Person) THEN 'social' + ELSE 'loner' + END + ORDER BY a.name +$$) AS (name agtype, kind agtype); + name | kind +-----------+---------- + "Alice" | "social" + "Bob" | "loner" + "Charlie" | "loner" + "Dave" | "loner" +(4 rows) + +-- +-- Pattern expressions combined with boolean operators in RETURN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + (a)-[:KNOWS]->(:Person) AND (a)-[:WORKS_WITH]->(:Person) AS has_both, + (a)-[:KNOWS]->(:Person) OR (a)-[:WORKS_WITH]->(:Person) AS has_either + ORDER BY a.name +$$) AS (name agtype, has_both agtype, has_either agtype); + name | has_both | has_either +-----------+----------+------------ + "Alice" | true | true + "Bob" | false | false + "Charlie" | false | false + "Dave" | false | false +(4 rows) + +-- +-- Pattern expression in SET (store boolean as property) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + SET a.is_social = (a)-[:KNOWS]->(:Person) + RETURN a.name, a.is_social + ORDER BY a.name +$$) AS (name agtype, is_social agtype); + name | is_social +-----------+----------- + "Alice" | true + "Bob" | false + "Charlie" | false + "Dave" | false +(4 rows) + +-- +-- Pattern expression in WITH (carry boolean through pipeline) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WITH a.name AS name, (a)-[:KNOWS]->(:Person) AS knows + WHERE knows + RETURN name + ORDER BY name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('pattern_expr', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table pattern_expr._ag_label_vertex +drop cascades to table pattern_expr._ag_label_edge +drop cascades to table pattern_expr."Person" +drop cascades to table pattern_expr."KNOWS" +drop cascades to table pattern_expr."WORKS_WITH" +NOTICE: graph "pattern_expr" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/pattern_expression.sql b/regress/sql/pattern_expression.sql new file mode 100644 index 000000000..60c5aa11a --- /dev/null +++ b/regress/sql/pattern_expression.sql @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('pattern_expr'); + +-- +-- Setup test data +-- +SELECT * FROM cypher('pattern_expr', $$ + CREATE (alice:Person {name: 'Alice'})-[:KNOWS]->(bob:Person {name: 'Bob'}), + (alice)-[:WORKS_WITH]->(charlie:Person {name: 'Charlie'}), + (dave:Person {name: 'Dave'}) +$$) AS (result agtype); + +-- +-- Basic pattern expression in WHERE +-- +-- Bare pattern: (a)-[:REL]->(b) +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- NOT pattern expression +-- +-- Find people who don't KNOW anyone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE NOT (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +-- +-- Pattern with labeled first node +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a:Person)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern combined with AND +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) AND a.name = 'Alice' + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern combined with OR +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) OR (a)-[:WORKS_WITH]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- Left-directed pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)<-[:KNOWS]-(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern with anonymous nodes +-- +-- Find anyone who has any outgoing KNOWS relationship +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->() + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +-- +-- Multiple relationship pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (c:Person) + WHERE (a)-[:KNOWS]->()-[:WORKS_WITH]->(c) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + +-- +-- Existing EXISTS() syntax still works (backward compatibility) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE EXISTS((a)-[:KNOWS]->(b)) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern expression produces same results as EXISTS() +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE EXISTS((a)-[:KNOWS]->(:Person)) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +-- +-- Regular (non-pattern) expressions still work (no regression) +-- +SELECT * FROM cypher('pattern_expr', $$ + RETURN (1 + 2) +$$) AS (result agtype); + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (n:Person) + WHERE n.name = 'Alice' + RETURN (n.name) +$$) AS (result agtype); + +-- +-- Pattern expressions in RETURN (boolean projection) +-- +-- Each person gets a column showing whether they know someone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person) AS knows_someone + ORDER BY a.name +$$) AS (name agtype, knows_someone agtype); + +-- Mix pattern expression with other projections +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person), (a)-[:WORKS_WITH]->(:Person) + ORDER BY a.name +$$) AS (name agtype, knows agtype, works_with agtype); + +-- +-- Pattern expressions in CASE WHEN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + CASE WHEN (a)-[:KNOWS]->(:Person) THEN 'social' + ELSE 'loner' + END + ORDER BY a.name +$$) AS (name agtype, kind agtype); + +-- +-- Pattern expressions combined with boolean operators in RETURN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + (a)-[:KNOWS]->(:Person) AND (a)-[:WORKS_WITH]->(:Person) AS has_both, + (a)-[:KNOWS]->(:Person) OR (a)-[:WORKS_WITH]->(:Person) AS has_either + ORDER BY a.name +$$) AS (name agtype, has_both agtype, has_either agtype); + +-- +-- Pattern expression in SET (store boolean as property) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + SET a.is_social = (a)-[:KNOWS]->(:Person) + RETURN a.name, a.is_social + ORDER BY a.name +$$) AS (name agtype, is_social agtype); + +-- +-- Pattern expression in WITH (carry boolean through pipeline) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WITH a.name AS name, (a)-[:KNOWS]->(:Person) AS knows + WHERE knows + RETURN name + ORDER BY name +$$) AS (result agtype); + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('pattern_expr', true); diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 5ba1e6354..5e6baed46 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -34,7 +34,7 @@ do \ { \ if ((n) > 0) \ - current = (rhs)[1]; \ + current = YYRHSLOC(rhs, 1); \ else \ current = -1; \ } while (0) @@ -49,6 +49,34 @@ %locations %name-prefix="cypher_yy" %pure-parser +/* + * GLR mode handles the ambiguity between parenthesized expressions and + * graph patterns. For example, WHERE (a)-[:KNOWS]->(b) starts with (a) + * which is valid as both an expression and a path_node. The parser forks + * at the conflict point and discards the failing path. %dprec annotations + * on expr_var/var_name_opt and '(' expr ')'/anonymous_path resolve cases + * where both paths succeed (bare (a) prefers the expression interpretation). + */ +%glr-parser +/* + * GLR conflicts are expected and correct for this grammar. They arise + * from the inherent ambiguity between parenthesized expressions and + * graph patterns: the shift/reduce conflicts on '-', '<', '{', + * PARAMETER and ')' all come from path extension vs. arithmetic or + * parenthesized-expression alternatives after a leading '(', and the + * reduce/reduce conflicts on ')', '}' and '=' come from the overlap + * between expr_var and var_name_opt. GLR handles all of these by + * forking at the conflict point and discarding the failing alternative; + * %dprec annotations on expr_var/var_name_opt and '(' expr ')' / + * anonymous_path resolve cases where both forks succeed (bare (a) + * prefers the expression interpretation). + * + * We intentionally do not use %expect / %expect-rr here because the + * exact conflict counts can vary across Bison versions (and across + * distros) as the generator's internals change. Instead, the Makefile + * passes -Wno-conflicts-sr,-Wno-conflicts-rr via BISONFLAGS so the + * build stays clean without binding us to a specific Bison release. + */ %lex-param {ag_scanner_t scanner} %parse-param {ag_scanner_t scanner} @@ -279,6 +307,9 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, Node *where, Node *mapping_expr, int location); +/* pattern expression helper */ +static Node *make_exists_pattern_sublink(Node *pattern, int location); + /* helper functions */ static ExplainStmt *make_explain_stmt(List *options); static void validate_return_item_aliases(List *items, ag_scanner_t scanner); @@ -1808,21 +1839,7 @@ expr_func_subexpr: } | EXISTS '(' anonymous_path ')' { - cypher_sub_pattern *sub; - SubLink *n; - - sub = make_ag_node(cypher_sub_pattern); - sub->kind = CSP_EXISTS; - sub->pattern = list_make1($3); - - n = makeNode(SubLink); - n->subLinkType = EXISTS_SUBLINK; - n->subLinkId = 0; - n->testexpr = NULL; - n->operName = NIL; - n->subselect = (Node *) sub; - n->location = @1; - $$ = (Node *)node_to_agtype((Node *)n, "boolean", @1); + $$ = make_exists_pattern_sublink($3, @1); } | EXISTS '(' property_value ')' { @@ -1942,7 +1959,7 @@ expr_atom: $$ = (Node *)n; } - | '(' expr ')' + | '(' expr ')' %dprec 2 { Node *n = $2; @@ -1953,6 +1970,17 @@ expr_atom: } $$ = n; } + | anonymous_path %dprec 1 + { + /* + * Bare pattern in expression context is semantically + * equivalent to EXISTS(pattern). Example: + * WHERE (a)-[:KNOWS]->(b) + * becomes + * WHERE EXISTS((a)-[:KNOWS]->(b)) + */ + $$ = make_exists_pattern_sublink($1, @1); + } | expr_case | expr_var | expr_func @@ -2204,7 +2232,7 @@ expr_case_default: ; expr_var: - var_name + var_name %dprec 2 { ColumnRef *n; @@ -2254,11 +2282,11 @@ var_name: ; var_name_opt: - /* empty */ + /* empty */ %dprec 1 { $$ = NULL; } - | var_name + | var_name %dprec 1 ; label_name: @@ -3317,6 +3345,30 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, return (Node *) node_to_agtype((Node *)sub, "agtype[]", location); } +/* + * Wrap a graph pattern in an EXISTS SubLink. Used by both + * EXISTS(pattern) syntax and bare pattern expressions in WHERE. + */ +static Node *make_exists_pattern_sublink(Node *pattern, int location) +{ + cypher_sub_pattern *sub; + SubLink *n; + + sub = make_ag_node(cypher_sub_pattern); + sub->kind = CSP_EXISTS; + sub->pattern = list_make1(pattern); + + n = makeNode(SubLink); + n->subLinkType = EXISTS_SUBLINK; + n->subLinkId = 0; + n->testexpr = NULL; + n->operName = NIL; + n->subselect = (Node *) sub; + n->location = location; + + return (Node *)node_to_agtype((Node *)n, "boolean", location); +} + /* Helper function to create an ExplainStmt node */ static ExplainStmt *make_explain_stmt(List *options) {