diff --git a/docs/source/contributor-guide/sql-file-tests.md b/docs/source/contributor-guide/sql-file-tests.md index bff8dc50df..fd9a19e46a 100644 --- a/docs/source/contributor-guide/sql-file-tests.md +++ b/docs/source/contributor-guide/sql-file-tests.md @@ -336,6 +336,46 @@ Then write a similar test file for the `reverse` function, covering column argum literal arguments, NULLs, empty strings, and multibyte characters. ``` +## Capturing per-query output + +Set the system property `comet.sqlFileTest.verboseOutput` to a file path to +have the suite append a markdown-formatted record of every query it runs, +including the SQL, mode, source file and line, and Spark's actual result or +error. This is useful for documenting Spark behavior across versions or for +cross-checking results against another engine. + +```shell +rm -f /tmp/sql-file-tests.md +ARGS="-ea -Xmx4g -Xss4m -XX:+IgnoreUnrecognizedVMOptions \ + --add-opens=java.base/java.lang=ALL-UNNAMED \ + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ + --add-opens=java.base/java.io=ALL-UNNAMED \ + --add-opens=java.base/java.net=ALL-UNNAMED \ + --add-opens=java.base/java.nio=ALL-UNNAMED \ + --add-opens=java.base/java.util=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ + --add-opens=java.base/sun.security.action=ALL-UNNAMED \ + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ + -Djdk.reflect.useDirectMethodHandle=false \ + -Dcomet.sqlFileTest.verboseOutput=/tmp/sql-file-tests.md" + +./mvnw test -Dsuites="org.apache.comet.CometSqlFileTestSuite encode" \ + -Dtest=none -DargLine="$ARGS" +``` + +The JDK 17 `--add-opens` flags are copied from the pom's default +`extraJavaTestArgs`. Overriding `argLine` replaces that default entirely, so +all the module-opens entries must be reproduced alongside the new property. + +Skipped files (e.g. those gated by `MinSparkVersion`) do not appear in the +output. Queries marked `ignore(...)` are not executed, so they are also +absent. + ## Handling test failures When a query fails due to a known Comet bug: diff --git a/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala b/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala index 5a0b34e056..226863679a 100644 --- a/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala @@ -20,13 +20,72 @@ package org.apache.comet import java.io.File +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Path, Paths, StandardOpenOption} +import java.util.concurrent.atomic.AtomicBoolean import org.scalactic.source.Position import org.scalatest.Tag -import org.apache.spark.sql.CometTestBase +import org.apache.spark.SPARK_VERSION +import org.apache.spark.sql.{CometTestBase, DataFrame} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +/** + * Writes per-query output to a markdown file when the system property + * `comet.sqlFileTest.verboseOutput` is set to a file path. Useful for capturing Spark's actual + * result for each query in the SQL test corpus, e.g. to document behavior across Spark versions + * or to cross-check against a different engine. + */ +private object VerboseOutput { + private val outputPath: Option[Path] = + sys.props.get("comet.sqlFileTest.verboseOutput").map(Paths.get(_)) + private val initialized = new AtomicBoolean(false) + + def enabled: Boolean = outputPath.isDefined + + private def ensureInitialized(): Unit = { + outputPath.foreach { path => + if (initialized.compareAndSet(false, true)) { + val header = + s"""# SQL file test output + | + |Spark version: **$SPARK_VERSION** + | + |Each section below is one query from a SQL test file, showing + |the SQL, the mode under which it ran, and Spark's actual output + |or error. + | + |""".stripMargin + Option(path.getParent).foreach(Files.createDirectories(_)) + Files.write(path, header.getBytes(StandardCharsets.UTF_8)) + } + } + } + + def append(content: String): Unit = { + outputPath.foreach { path => + ensureInitialized() + Files.write(path, content.getBytes(StandardCharsets.UTF_8), StandardOpenOption.APPEND) + } + } + + def formatResult(df: DataFrame): String = { + val rows = df.collect() + if (rows.isEmpty) "(no rows)" + else + rows + .map(r => r.toSeq.map(v => if (v == null) "NULL" else v.toString).mkString(" | ")) + .mkString("\n") + } + + def formatError(t: Throwable): String = { + val cls = t.getClass.getName + val msg = Option(t.getMessage).getOrElse("").takeWhile(_ != '\n') + s"$cls: $msg" + } +} + class CometSqlFileTestSuite extends CometTestBase with AdaptiveSparkPlanHelper { override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit @@ -79,6 +138,15 @@ class CometSqlFileTestSuite extends CometTestBase with AdaptiveSparkPlanHelper { private def runTestFile(relativePath: String, file: SqlTestFile): Unit = { val allConfigs = file.configs ++ constantFoldingExcluded + if (VerboseOutput.enabled) { + val configDesc = + if (file.configs.isEmpty) "" + else + file.configs + .map { case (k, v) => s" - `$k` = `$v`" } + .mkString("\nConfigs:\n", "\n", "\n") + VerboseOutput.append(s"\n## `$relativePath`\n$configDesc\n") + } withSQLConf(allConfigs: _*) { withTable(file.tables: _*) { file.records.foreach { @@ -93,6 +161,35 @@ class CometSqlFileTestSuite extends CometTestBase with AdaptiveSparkPlanHelper { throw new RuntimeException(s"Error executing SQL '$sql' ${e.getMessage}", e) } case SqlQuery(sql, mode, line) => + if (VerboseOutput.enabled) { + val modeLabel = mode match { + case CheckCoverageAndAnswer => "query" + case SparkAnswerOnly => "query spark_answer_only" + case WithTolerance(t) => s"query tolerance=$t" + case ExpectFallback(r) => s"query expect_fallback($r)" + case Ignore(r) => s"query ignore($r)" + case ExpectError(p) => s"query expect_error($p)" + } + val body = + try { + VerboseOutput.formatResult(spark.sql(sql)) + } catch { + case e: Throwable => s"ERROR: ${VerboseOutput.formatError(e)}" + } + VerboseOutput.append(s"""### `$modeLabel` + | + |`$relativePath:$line` + | + |```sql + |$sql + |``` + | + |``` + |$body + |``` + | + |""".stripMargin) + } try { val location = if (line > 0) s"$relativePath:$line" else relativePath withClue(s"In SQL file $location, executing query:\n$sql\n") {