Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c1e7e4e
feat: reorder row groups by statistics during sort pushdown
zhuqi-lucas Apr 13, 2026
8f0baba
test: add SLT tests for row group reorder by statistics
zhuqi-lucas Apr 13, 2026
f36cdec
test: add EXPLAIN assertions for row group reorder tests
zhuqi-lucas Apr 13, 2026
5b471d2
fix: use max statistics for DESC sort reorder
zhuqi-lucas Apr 13, 2026
0100fed
fix: prevent reorder+reverse double-reordering of row groups
zhuqi-lucas Apr 14, 2026
a18c746
fix: rebase conflicts and compilation errors
zhuqi-lucas Apr 16, 2026
b1784ab
refactor: introduce AccessPlanOptimizer trait for row group reordering
zhuqi-lucas Apr 16, 2026
1e8ecd2
chore: remove benchmark from this PR (tracked in #21582)
zhuqi-lucas Apr 16, 2026
9e4ac9a
fix: resolve doc link for AccessPlanOptimizer
zhuqi-lucas Apr 17, 2026
d128035
fix: restore benchmark files from upstream main
zhuqi-lucas Apr 17, 2026
09ae8b0
fix: compose reorder and reverse as sequential steps instead of mutua…
zhuqi-lucas Apr 18, 2026
be7d7b1
fix: generate scrambled+overlapping RGs for overlap benchmark
zhuqi-lucas Apr 18, 2026
88bdaac
feat: reorder files in shared work queue by statistics for TopK
zhuqi-lucas Apr 20, 2026
3996178
feat: initialize TopK dynamic filter threshold from parquet statistics
zhuqi-lucas Apr 18, 2026
bccc42b
feat: enable file reorder and RG reorder for all TopK queries
zhuqi-lucas Apr 20, 2026
e4e11b2
perf: move stats init before RG pruning so first file also benefits
zhuqi-lucas Apr 20, 2026
c94bdcc
fix: restrict RG reorder/reverse to sort pushdown path only
zhuqi-lucas Apr 20, 2026
54f4fd4
perf: move stats init before PruningPredicate build + fix CastExpr un…
zhuqi-lucas Apr 21, 2026
8fb7b00
fix: null-aware filter + restrict stats init to sort pushdown path
zhuqi-lucas Apr 21, 2026
18939c9
feat: enable stats init for ALL TopK queries + fix fuzz test tiebreaker
zhuqi-lucas Apr 21, 2026
80badc0
fix: restrict stats init to sort pushdown path to avoid over-pruning
zhuqi-lucas Apr 21, 2026
9d6b67b
fix: stats init only safe for sorted (non-overlapping) RGs
zhuqi-lucas Apr 21, 2026
5f4c54d
feat: enable stats init for pure TopK queries (no WHERE clause)
zhuqi-lucas Apr 21, 2026
a7b4e25
fix: stats init requires sort pushdown + no WHERE clause
zhuqi-lucas Apr 21, 2026
b122ae2
feat: TopK cumulative RG pruning after reorder (works with WHERE)
zhuqi-lucas Apr 21, 2026
ca94342
feat: enable RG reorder + cumulative prune for all TopK queries
zhuqi-lucas Apr 21, 2026
aad61ce
fix: only reverse/cumulate when reorder succeeds (prevents ClickBench…
zhuqi-lucas Apr 21, 2026
26d23b4
fix: escape brackets in doc comment to fix rustdoc link error
zhuqi-lucas Apr 21, 2026
7c42666
chore: remove benchmark and listing_table_partitions changes from thi…
zhuqi-lucas Apr 21, 2026
583f4db
refactor: remove stats init in favor of cumulative RG pruning + add S…
zhuqi-lucas Apr 21, 2026
ec941ff
fix: cumulative prune only without WHERE to avoid under-returning rows
zhuqi-lucas Apr 21, 2026
839ab5a
feat: restore stats init with fixes (GtEq + df.fetch() + type cast)
zhuqi-lucas Apr 21, 2026
a269ffd
fix: SortExec.fetch was 0 when create_filter was called
zhuqi-lucas Apr 21, 2026
ae9ebd7
perf: skip RG reorder when sort column not in file schema
zhuqi-lucas Apr 22, 2026
5c31674
fix: use slt:ignore for non-deterministic output_rows_skew metric
zhuqi-lucas Apr 22, 2026
2081071
Merge branch 'main' into feat/reorder-row-groups-by-stats
zhuqi-lucas Apr 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,21 +305,36 @@ async fn test_fuzz_topk_filter_pushdown() {
}

let mut queries = vec![];
let all_columns = ["id", "name", "department"];

for limit in [1, 10] {
for num_order_by_columns in [1, 2, 3] {
for order_columns in ["id", "name", "department"]
.iter()
.combinations(num_order_by_columns)
{
for order_columns in all_columns.iter().combinations(num_order_by_columns) {
for orderings in order_columns
.iter()
.map(|col| orders.get(**col).unwrap())
.multi_cartesian_product()
{
// Add remaining columns as ASC tiebreakers to make
// the ordering fully deterministic. Without this,
// optimizations that change RG read order (e.g.
// statistics-based pruning) may produce different
// but equally valid tie-breaking results.
let used: Vec<&str> = order_columns.iter().map(|c| **c).collect();
let tiebreakers: Vec<String> = all_columns
.iter()
.filter(|c| !used.contains(*c))
.map(|c| format!("{c} ASC NULLS LAST"))
.collect();
let mut all_orderings: Vec<&str> =
orderings.iter().map(|s| s.as_str()).collect();
let tiebreaker_refs: Vec<&str> =
tiebreakers.iter().map(|s| s.as_str()).collect();
all_orderings.extend(tiebreaker_refs);

let query = format!(
"SELECT * FROM test_table ORDER BY {} LIMIT {}",
orderings.into_iter().join(", "),
all_orderings.join(", "),
limit
);
queries.push(query);
Expand Down
Loading
Loading