diff --git a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java index 4a123529513e..ee621740f9b0 100644 --- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java +++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java @@ -66,7 +66,7 @@ public CsvFilterableTable(Source source, return new AbstractEnumerable<@Nullable Object[]>() { @Override public Enumerator<@Nullable Object[]> enumerator() { return new CsvEnumerator<>(source, cancelFlag, false, filterValues, - CsvEnumerator.arrayConverter(fieldTypes, fields, false)); + CsvEnumerator.arrayConverter(fieldTypes, fields, false), ','); } }; } diff --git a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java index 25d029505490..836af81373b7 100644 --- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java +++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java @@ -58,7 +58,7 @@ public class CsvScannableTable extends CsvTable return new AbstractEnumerable<@Nullable Object[]>() { @Override public Enumerator<@Nullable Object[]> enumerator() { return new CsvEnumerator<>(source, cancelFlag, false, null, - CsvEnumerator.arrayConverter(fieldTypes, fields, false)); + CsvEnumerator.arrayConverter(fieldTypes, fields, false), ','); } }; } diff --git a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java index a7947a2f9853..7c0d574cc7af 100644 --- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java +++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java @@ -65,7 +65,7 @@ public class CsvStreamScannableTable extends CsvScannableTable return new AbstractEnumerable<@Nullable Object[]>() { @Override public Enumerator<@Nullable Object[]> enumerator() { return new CsvEnumerator<>(source, cancelFlag, true, null, - CsvEnumerator.arrayConverter(fieldTypes, fields, true)); + CsvEnumerator.arrayConverter(fieldTypes, fields, true), ','); } }; } diff --git a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java index a882e5c8247c..aac56bb38ce4 100644 --- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java +++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java @@ -51,7 +51,7 @@ public abstract class CsvTable extends AbstractTable { if (rowType == null) { rowType = CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source, - null, isStream()); + null, isStream(), ','); } return rowType; } @@ -61,7 +61,7 @@ public List getFieldTypes(RelDataTypeFactory typeFactory) { if (fieldTypes == null) { fieldTypes = new ArrayList<>(); CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source, - fieldTypes, isStream()); + fieldTypes, isStream(), ','); } return fieldTypes; } diff --git a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java index 78632cfb43f8..1da1992b6b14 100644 --- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java +++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java @@ -66,7 +66,8 @@ public Enumerable project(final DataContext root, source, cancelFlag, getFieldTypes(typeFactory), - ImmutableIntList.of(fields)); + ImmutableIntList.of(fields), + ','); } }; } diff --git a/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java b/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java index 012be6145085..f62433beab47 100644 --- a/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java +++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java @@ -113,14 +113,15 @@ private static void clearTimeFormats() { .compile("\"decimal\\(([0-9]+),([0-9]+)\\)"); public CsvEnumerator(Source source, AtomicBoolean cancelFlag, - List fieldTypes, List fields) { + List fieldTypes, List fields, char separator) { //noinspection unchecked this(source, cancelFlag, false, null, - (RowConverter) converter(fieldTypes, fields)); + (RowConverter) converter(fieldTypes, fields), separator); } public CsvEnumerator(Source source, AtomicBoolean cancelFlag, boolean stream, - @Nullable String @Nullable [] filterValues, RowConverter rowConverter) { + @Nullable String @Nullable [] filterValues, RowConverter rowConverter, + char separator) { this.cancelFlag = cancelFlag; this.rowConverter = rowConverter; this.filterValues = @@ -128,9 +129,9 @@ public CsvEnumerator(Source source, AtomicBoolean cancelFlag, boolean stream, : ImmutableNullableList.copyOf(filterValues); try { if (stream) { - this.reader = new CsvStreamReader(source); + this.reader = new CsvStreamReader(source, separator); } else { - this.reader = openCsv(source); + this.reader = openCsv(source, separator); } this.reader.readNext(); // skip header row } catch (IOException e) { @@ -156,14 +157,15 @@ private static RowConverter converter(List fieldTypes, /** Deduces the names and types of a table's columns by reading the first line * of a CSV file. */ public static RelDataType deduceRowType(JavaTypeFactory typeFactory, - Source source, @Nullable List fieldTypes, Boolean stream) { + Source source, @Nullable List fieldTypes, Boolean stream, + char separator) { final List types = new ArrayList<>(); final List names = new ArrayList<>(); if (stream) { names.add(FileSchemaFactory.ROWTIME_COLUMN_NAME); types.add(typeFactory.createSqlType(SqlTypeName.TIMESTAMP)); } - try (CSVReader reader = openCsv(source)) { + try (CSVReader reader = openCsv(source, separator)) { String[] strings = reader.readNext(); if (strings == null) { strings = new String[]{"EmptyFileHasNoColumns:boolean"}; @@ -247,9 +249,9 @@ public static RelDataType deduceRowType(JavaTypeFactory typeFactory, return typeFactory.createStructType(Pair.zip(names, types)); } - static CSVReader openCsv(Source source) throws IOException { + static CSVReader openCsv(Source source, char separator) throws IOException { requireNonNull(source, "source"); - return new CSVReader(source.reader()); + return new CSVReader(source.reader(), separator); } @Override public E current() { diff --git a/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java b/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java index 54dd3837e27e..e9113c7d1ac1 100644 --- a/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java +++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java @@ -53,9 +53,9 @@ class CsvStreamReader extends CSVReader implements Closeable { */ public static final long DEFAULT_MONITOR_DELAY = 2000; - CsvStreamReader(Source source) { + CsvStreamReader(Source source, char separator) { this(source, - CSVParser.DEFAULT_SEPARATOR, + separator, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, @@ -106,7 +106,7 @@ private CsvStreamReader(Source source, char separator, char quoteChar, /** * Reads the next line from the buffer and converts to a string array. * - * @return a string array with each comma-separated element as a separate entry. + * @return a string array with each delimited element as a separate entry. * * @throws IOException if bad things happen during the read */ diff --git a/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java b/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java index 5ebb2b121865..4b4c718d41a5 100644 --- a/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java +++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java @@ -37,13 +37,16 @@ public abstract class CsvTable extends AbstractTable { protected final Source source; protected final @Nullable RelProtoDataType protoRowType; + protected final char separator; private @Nullable RelDataType rowType; private @Nullable List fieldTypes; - /** Creates a CsvTable. */ - CsvTable(Source source, @Nullable RelProtoDataType protoRowType) { + /** Creates a CsvTable with a custom separator. */ + CsvTable(Source source, @Nullable RelProtoDataType protoRowType, + char separator) { this.source = source; this.protoRowType = protoRowType; + this.separator = separator; } @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) { @@ -53,7 +56,7 @@ public abstract class CsvTable extends AbstractTable { if (rowType == null) { rowType = CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source, - null, isStream()); + null, isStream(), separator); } return rowType; } @@ -63,7 +66,7 @@ public List getFieldTypes(RelDataTypeFactory typeFactory) { if (fieldTypes == null) { fieldTypes = new ArrayList<>(); CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source, - fieldTypes, isStream()); + fieldTypes, isStream(), separator); } return fieldTypes; } diff --git a/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java b/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java index 82e636cb61fa..fa37d156532c 100644 --- a/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java +++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java @@ -25,6 +25,8 @@ import org.apache.calcite.util.Source; import org.apache.calcite.util.Sources; +import au.com.bytecode.opencsv.CSVParser; + import org.checkerframework.checker.nullness.qual.Nullable; import java.io.File; @@ -50,6 +52,17 @@ public CsvTableFactory() { final Source source = Sources.file(base, fileName); final RelProtoDataType protoRowType = rowType != null ? RelDataTypeImpl.proto(rowType) : null; - return new CsvTranslatableTable(source, protoRowType); + final String separatorStr = (String) operand.get("separator"); + final char separator; + if (separatorStr == null) { + separator = CSVParser.DEFAULT_SEPARATOR; + } else if (separatorStr.length() == 1) { + separator = separatorStr.charAt(0); + } else { + throw new IllegalArgumentException( + "Invalid separator '" + separatorStr + + "'. Separator must be a single character."); + } + return new CsvTranslatableTable(source, protoRowType, separator); } } diff --git a/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java b/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java index ebfc1f2e7088..7f81defebe01 100644 --- a/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java +++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java @@ -47,9 +47,10 @@ */ public class CsvTranslatableTable extends CsvTable implements QueryableTable, TranslatableTable { - /** Creates a CsvTable. */ - CsvTranslatableTable(Source source, @Nullable RelProtoDataType protoRowType) { - super(source, protoRowType); + /** Creates a CsvTranslatableTable with a custom separator. */ + CsvTranslatableTable(Source source, @Nullable RelProtoDataType protoRowType, + char separator) { + super(source, protoRowType, separator); } @Override public String toString() { @@ -65,7 +66,8 @@ public Enumerable project(final DataContext root, @Override public Enumerator enumerator() { JavaTypeFactory typeFactory = root.getTypeFactory(); return new CsvEnumerator<>(source, cancelFlag, - getFieldTypes(typeFactory), ImmutableIntList.of(fields)); + getFieldTypes(typeFactory), ImmutableIntList.of(fields), + separator); } }; } diff --git a/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java b/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java index b842030ad045..f66effb4a32e 100644 --- a/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java +++ b/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java @@ -23,6 +23,8 @@ import org.apache.calcite.util.Sources; import org.apache.calcite.util.Util; +import au.com.bytecode.opencsv.CSVParser; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -142,7 +144,8 @@ private static boolean addTable(ImmutableMap.Builder builder, } final Source sourceSansCsv = sourceSansGz.trimOrNull(".csv"); if (sourceSansCsv != null) { - final Table table = new CsvTranslatableTable(source, null); + final Table table = + new CsvTranslatableTable(source, null, CSVParser.DEFAULT_SEPARATOR); builder.put(Util.first(tableName, sourceSansCsv.path()), table); return true; } diff --git a/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java b/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java index 0d0fdc031c39..ad774f3964e6 100644 --- a/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java +++ b/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java @@ -332,6 +332,91 @@ private static void checkEmpty(ResultSet resultSet) { sql("model-with-custom-table", "select * from CUSTOM_TABLE.EMPS").ok(); } + /** Test case for + * [CALCITE-4460] + * Support custom delimiter when parsing CSV tables. + * + *

Reads a pipe-delimited file via CsvTableFactory with a custom + * separator. */ + @Test void testCsvCustomSeparatorPipe() { + final String sql = "select * from CUSTOM_SEPARATOR.PIPE_DEPTS"; + sql("custom-separator", sql) + .returns("DEPTNO=10; NAME=Sales", + "DEPTNO=20; NAME=Marketing", + "DEPTNO=30; NAME=Accounts", + "DEPTNO=40; NAME=tic|tac|toe") + .ok(); + } + + /** Test case for + * [CALCITE-4460] + * Support custom delimiter when parsing CSV tables. + * + *

Verifies quoted content is parsed correctly when it contains the custom + * separator character. */ + @Test void testCsvCustomSeparatorEscaping() { + final String sql = "select * from CUSTOM_SEPARATOR.PIPE_DEPTS " + + "where NAME = 'tic|tac|toe'"; + sql("custom-separator", sql) + .returns("DEPTNO=40; NAME=tic|tac|toe") + .ok(); + } + + /** Test case for + * [CALCITE-4460] + * Support custom delimiter when parsing CSV tables. + * + *

Verifies that a multi-character separator is rejected. */ + @Test void testCsvCustomSeparatorInvalidMultiChar() throws SQLException { + Properties info = new Properties(); + info.put("model", + "inline:" + + "{\n" + + " version: '1.0',\n" + + " defaultSchema: 'TEST',\n" + + " schemas: [\n" + + " {\n" + + " name: 'TEST',\n" + + " tables: [\n" + + " {\n" + + " name: 'BAD',\n" + + " type: 'custom',\n" + + " factory: 'org.apache.calcite.adapter.file.CsvTableFactory',\n" + + " operand: {\n" + + " file: 'sales-csv/DEPTS.csv',\n" + + " separator: '||'\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " ]\n" + + "}"); + try { + Connection connection = + DriverManager.getConnection("jdbc:calcite:", info); + connection.close(); + throw new AssertionError("expected error"); + } catch (RuntimeException e) { + Throwable cause = e; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + assertThat(cause.getMessage(), + is("Invalid separator '||'. " + + "Separator must be a single character.")); + } + } + + /** Test case for + * [CALCITE-4460] + * Support custom delimiter when parsing CSV tables. + * + *

Verifies that omitting the separator defaults to comma. */ + @Test void testCsvDefaultSeparatorBackwardCompat() { + final String sql = "select * from CUSTOM_TABLE.EMPS"; + sql("model-with-custom-table", sql).ok(); + } + @Test void testPushDownProject() { final String sql = "explain plan for select * from EMPS"; final String expected = "PLAN=CsvTableScan(table=[[SALES, EMPS]], " diff --git a/file/src/test/resources/custom-separator.json b/file/src/test/resources/custom-separator.json new file mode 100644 index 000000000000..ebc500239431 --- /dev/null +++ b/file/src/test/resources/custom-separator.json @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +{ + "version": "1.0", + "defaultSchema": "CUSTOM_SEPARATOR", + "schemas": [ + { + "name": "CUSTOM_SEPARATOR", + "tables": [ + { + "name": "PIPE_DEPTS", + "type": "custom", + "factory": "org.apache.calcite.adapter.file.CsvTableFactory", + "operand": { + "file": "sales-csv/PIPE_DELIMITED.csv", + "separator": "|" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/file/src/test/resources/sales-csv/PIPE_DELIMITED.csv b/file/src/test/resources/sales-csv/PIPE_DELIMITED.csv new file mode 100644 index 000000000000..2a094fae300f --- /dev/null +++ b/file/src/test/resources/sales-csv/PIPE_DELIMITED.csv @@ -0,0 +1,5 @@ +DEPTNO:int|NAME:string +10|"Sales" +20|"Marketing" +30|"Accounts" +40|"tic|tac|toe" diff --git a/site/_docs/file_adapter.md b/site/_docs/file_adapter.md index a81255817992..dc31fec4134c 100644 --- a/site/_docs/file_adapter.md +++ b/site/_docs/file_adapter.md @@ -273,6 +273,26 @@ sqlline> select distinct deptno from depts; 3 rows selected (0.985 seconds) {% endhighlight %} +### CSV Custom Separator + +When using `CsvTableFactory` to define a table in a model, you can specify an +optional `separator` operand to use a custom delimiter. + +{% highlight json %} +{ + "name": "PIPE_DEPTS", + "type": "custom", + "factory": "org.apache.calcite.adapter.file.CsvTableFactory", + "operand": { + "file": "sales-csv/PIPE_DELIMITED.csv", + "separator": "|" + } +} +{% endhighlight %} + +The separator must be a single character. If not specified, it defaults to a +comma. + ## JSON files and model-free browsing Some files describe their own schema, and for these files, we do not need a model. For example, `DEPTS.json` has an integer `DEPTNO` column and a string `NAME` column: