Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import com.google.common.collect.SetMultimap;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.lib.CostLessRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
import org.apache.hadoop.hive.ql.lib.Node;
Expand Down Expand Up @@ -350,18 +349,39 @@
static class IdentifierProcessor implements SemanticNodeProcessor {

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)

Check failure on line 352 in ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this method to not always return the same value.

See more on https://sonarcloud.io/project/issues?id=apache_hive&issues=AZ2bdLo-h2dPdrP2CY5f&open=AZ2bdLo-h2dPdrP2CY5f&pullRequest=6439
throws SemanticException {
UnparseTranslator unparseTranslator = ((QuotedIdExpressionContext)procCtx).getUnparseTranslator();
ASTNode identifier = (ASTNode) nd;
String id = identifier.getText();
if (FunctionRegistry.getFunctionInfo(id) != null){
if (isFunctionNameToken(identifier)) {
return null;
}

unparseTranslator.addIdentifierTranslation(identifier);
return null;
}

/*
* Quote identifiers during unparse.
*
* Only skip quoting for function names.
* Always quote column names, even if they match function names.
* For example, use `alias`.`date` instead of `alias`.date.
*/
private static boolean isFunctionNameToken(ASTNode identifier) {
ASTNode parent = (ASTNode) identifier.getParent();
if (parent == null || parent.getChildCount() == 0) {
return false;
}

int parentType = parent.getType();
boolean isFunctionNode =
parentType == HiveParser.TOK_FUNCTION
|| parentType == HiveParser.TOK_FUNCTIONDI
|| parentType == HiveParser.TOK_FUNCTIONSTAR;

return isFunctionNode && parent.getChild(0) == identifier;
}
}

static class QuotedIdExpressionContext implements NodeProcessorCtx {
Expand Down
12 changes: 12 additions & 0 deletions ql/src/test/queries/clientpositive/sqlmerge.q
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,15 @@ explain merge into acidTbl_n0 as t using nonAcidOrcTbl_n0 s ON t.a = s.a
WHEN MATCHED AND s.a > 8 THEN DELETE
WHEN MATCHED THEN UPDATE SET b = 7
WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b);

-- MERGE rewrite must preserve quoting for qualified identifiers like s.`date` when column name is function keyword
drop table if exists `count`;
drop table if exists tgt_table;
create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc
TBLPROPERTIES ('transactional'='true');
create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc
TBLPROPERTIES ('transactional'='true');

explain merge into tgt_table using `count` ON tgt_table.a = `count`.a
WHEN MATCHED THEN UPDATE SET `date` = `count`.`date`
WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`);
279 changes: 279 additions & 0 deletions ql/src/test/results/clientpositive/llap/sqlmerge.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -819,3 +819,282 @@ STAGE PLANS:
Stats Work
Basic Stats Work:

PREHOOK: query: drop table if exists `count`
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists `count`
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: drop table if exists tgt_table
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists tgt_table
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc
TBLPROPERTIES ('transactional'='true')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@count
POSTHOOK: query: create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc
TBLPROPERTIES ('transactional'='true')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@count
PREHOOK: query: create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc
TBLPROPERTIES ('transactional'='true')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tgt_table
POSTHOOK: query: create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc
TBLPROPERTIES ('transactional'='true')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tgt_table
PREHOOK: query: explain merge into tgt_table using `count` ON tgt_table.a = `count`.a
WHEN MATCHED THEN UPDATE SET `date` = `count`.`date`
WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`)
PREHOOK: type: QUERY
PREHOOK: Input: default@count
PREHOOK: Input: default@tgt_table
PREHOOK: Output: default@merge_tmp_table
PREHOOK: Output: default@tgt_table
PREHOOK: Output: default@tgt_table
POSTHOOK: query: explain merge into tgt_table using `count` ON tgt_table.a = `count`.a
WHEN MATCHED THEN UPDATE SET `date` = `count`.`date`
WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@count
POSTHOOK: Input: default@tgt_table
POSTHOOK: Output: default@merge_tmp_table
POSTHOOK: Output: default@tgt_table
POSTHOOK: Output: default@tgt_table
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-4 depends on stages: Stage-3
Stage-0 depends on stages: Stage-4
Stage-5 depends on stages: Stage-0
Stage-1 depends on stages: Stage-4
Stage-6 depends on stages: Stage-1
Stage-2 depends on stages: Stage-4
Stage-7 depends on stages: Stage-2

STAGE PLANS:
Stage: Stage-3
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: count
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), date (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
Map Operator Tree:
TableScan
alias: tgt_table
filterExpr: a is not null (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: a is not null (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), a (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: int)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col3 (type: int), _col1 (type: int), _col2 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col1 is null (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
Filter Operator
predicate: (_col1 = _col0) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col1 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
null sort order: z
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Filter Operator
predicate: (_col1 = _col0) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col3
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.tgt_table
Write Type: INSERT
Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), VALUE._col1 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.tgt_table
Write Type: UPDATE
Reducer 5
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col1 > 1L) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cardinality_violation(_col0) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.merge_tmp_table

Stage: Stage-4
Dependency Collection

Stage: Stage-0
Move Operator
tables:
replace: false
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.tgt_table
Write Type: INSERT

Stage: Stage-5
Stats Work
Basic Stats Work:

Stage: Stage-1
Move Operator
tables:
replace: false
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.tgt_table
Write Type: UPDATE

Stage: Stage-6
Stats Work
Basic Stats Work:

Stage: Stage-2
Move Operator
tables:
replace: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.merge_tmp_table

Stage: Stage-7
Stats Work
Basic Stats Work:

4 changes: 2 additions & 2 deletions ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,10 @@ Stage-0
Number of rows:1
TableScan [TS_0]

PREHOOK: query: DROP TEMPORARY MACRO SIGMOID
PREHOOK: query: DROP TEMPORARY MACRO `SIGMOID`
PREHOOK: type: DROPMACRO
PREHOOK: Output: database:default
POSTHOOK: query: DROP TEMPORARY MACRO SIGMOID
POSTHOOK: query: DROP TEMPORARY MACRO `SIGMOID`
POSTHOOK: type: DROPMACRO
POSTHOOK: Output: database:default
PREHOOK: query: explain analyze DROP TEMPORARY MACRO SIGMOID
Expand Down
Loading