Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions docs/source/contributor-guide/sql-file-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,46 @@ Then write a similar test file for the `reverse` function, covering column argum
literal arguments, NULLs, empty strings, and multibyte characters.
```

## Capturing per-query output

Set the system property `comet.sqlFileTest.verboseOutput` to a file path to
have the suite append a markdown-formatted record of every query it runs,
including the SQL, mode, source file and line, and Spark's actual result or
error. This is useful for documenting Spark behavior across versions or for
cross-checking results against another engine.

```shell
rm -f /tmp/sql-file-tests.md
ARGS="-ea -Xmx4g -Xss4m -XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.net=ALL-UNNAMED \
--add-opens=java.base/java.nio=ALL-UNNAMED \
--add-opens=java.base/java.util=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
-Djdk.reflect.useDirectMethodHandle=false \
-Dcomet.sqlFileTest.verboseOutput=/tmp/sql-file-tests.md"

./mvnw test -Dsuites="org.apache.comet.CometSqlFileTestSuite encode" \
-Dtest=none -DargLine="$ARGS"
```

The JDK 17 `--add-opens` flags are copied from the pom's default
`extraJavaTestArgs`. Overriding `argLine` replaces that default entirely, so
all the module-opens entries must be reproduced alongside the new property.

Skipped files (e.g. those gated by `MinSparkVersion`) do not appear in the
output. Queries marked `ignore(...)` are not executed, so they are also
absent.

## Handling test failures

When a query fails due to a known Comet bug:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,72 @@
package org.apache.comet

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path, Paths, StandardOpenOption}
import java.util.concurrent.atomic.AtomicBoolean

import org.scalactic.source.Position
import org.scalatest.Tag

import org.apache.spark.sql.CometTestBase
import org.apache.spark.SPARK_VERSION
import org.apache.spark.sql.{CometTestBase, DataFrame}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper

/**
* Writes per-query output to a markdown file when the system property
* `comet.sqlFileTest.verboseOutput` is set to a file path. Useful for capturing Spark's actual
* result for each query in the SQL test corpus, e.g. to document behavior across Spark versions
* or to cross-check against a different engine.
*/
private object VerboseOutput {
private val outputPath: Option[Path] =
sys.props.get("comet.sqlFileTest.verboseOutput").map(Paths.get(_))
private val initialized = new AtomicBoolean(false)

def enabled: Boolean = outputPath.isDefined

private def ensureInitialized(): Unit = {
outputPath.foreach { path =>
if (initialized.compareAndSet(false, true)) {
val header =
s"""# SQL file test output
|
|Spark version: **$SPARK_VERSION**
|
|Each section below is one query from a SQL test file, showing
|the SQL, the mode under which it ran, and Spark's actual output
|or error.
|
|""".stripMargin
Option(path.getParent).foreach(Files.createDirectories(_))
Files.write(path, header.getBytes(StandardCharsets.UTF_8))
}
}
}

def append(content: String): Unit = {
outputPath.foreach { path =>
ensureInitialized()
Files.write(path, content.getBytes(StandardCharsets.UTF_8), StandardOpenOption.APPEND)
}
}

def formatResult(df: DataFrame): String = {
val rows = df.collect()
if (rows.isEmpty) "(no rows)"
else
rows
.map(r => r.toSeq.map(v => if (v == null) "NULL" else v.toString).mkString(" | "))
.mkString("\n")
}

def formatError(t: Throwable): String = {
val cls = t.getClass.getName
val msg = Option(t.getMessage).getOrElse("").takeWhile(_ != '\n')
s"$cls: $msg"
}
}

class CometSqlFileTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {

override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
Expand Down Expand Up @@ -79,6 +138,15 @@ class CometSqlFileTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {

private def runTestFile(relativePath: String, file: SqlTestFile): Unit = {
val allConfigs = file.configs ++ constantFoldingExcluded
if (VerboseOutput.enabled) {
val configDesc =
if (file.configs.isEmpty) ""
else
file.configs
.map { case (k, v) => s" - `$k` = `$v`" }
.mkString("\nConfigs:\n", "\n", "\n")
VerboseOutput.append(s"\n## `$relativePath`\n$configDesc\n")
}
withSQLConf(allConfigs: _*) {
withTable(file.tables: _*) {
file.records.foreach {
Expand All @@ -93,6 +161,35 @@ class CometSqlFileTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
throw new RuntimeException(s"Error executing SQL '$sql' ${e.getMessage}", e)
}
case SqlQuery(sql, mode, line) =>
if (VerboseOutput.enabled) {
val modeLabel = mode match {
case CheckCoverageAndAnswer => "query"
case SparkAnswerOnly => "query spark_answer_only"
case WithTolerance(t) => s"query tolerance=$t"
case ExpectFallback(r) => s"query expect_fallback($r)"
case Ignore(r) => s"query ignore($r)"
case ExpectError(p) => s"query expect_error($p)"
}
val body =
try {
VerboseOutput.formatResult(spark.sql(sql))
} catch {
case e: Throwable => s"ERROR: ${VerboseOutput.formatError(e)}"
}
VerboseOutput.append(s"""### `$modeLabel`
|
|`$relativePath:$line`
|
|```sql
|$sql
|```
|
|```
|$body
|```
|
|""".stripMargin)
}
try {
val location = if (line > 0) s"$relativePath:$line" else relativePath
withClue(s"In SQL file $location, executing query:\n$sql\n") {
Expand Down
Loading