diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index e198df65d82..7d252fa94ab 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -645,6 +645,14 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) !smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) smgrcreate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, false); + /* + * Might have to re-open if smgrcreate triggered AcceptInvalidationMessages + * (via TablespaceCreateDbspace -> LockSharedObject for non-default + * tablespaces), which may have processed a pending SHAREDINVALSMGR_ID + * message and closed our smgr entry. + */ + RelationOpenSmgr(rel); + /* Invalidate cache so that smgrnblocks() asks the kernel. */ rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber; vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM); diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 796b915156b..ee97e757115 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -633,6 +633,14 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) !smgrexists(rel->rd_smgr, FSM_FORKNUM)) smgrcreate(rel->rd_smgr, FSM_FORKNUM, false); + /* + * Might have to re-open if smgrcreate triggered AcceptInvalidationMessages + * (via TablespaceCreateDbspace -> LockSharedObject for non-default + * tablespaces), which may have processed a pending SHAREDINVALSMGR_ID + * message and closed our smgr entry. + */ + RelationOpenSmgr(rel); + /* Invalidate cache so that smgrnblocks() asks the kernel. */ rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber; fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index c837ca324d5..f625061dbd6 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -70,3 +70,4 @@ /ao_unique_index_partition.out /bfv_copy.out /copy_encoding_error.out +/vacuum_fsm_nondefault_tablespace.out diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 3c8f7965b28..604616791c8 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -167,6 +167,7 @@ test: instr_in_shmem_verify # hold locks. test: partition_locking test: vacuum_gp +test: vacuum_fsm_nondefault_tablespace test: resource_queue_stat # background analyze may affect pgstat test: pg_stat diff --git a/src/test/regress/input/vacuum_fsm_nondefault_tablespace.source b/src/test/regress/input/vacuum_fsm_nondefault_tablespace.source new file mode 100644 index 00000000000..adce9ab77de --- /dev/null +++ b/src/test/regress/input/vacuum_fsm_nondefault_tablespace.source @@ -0,0 +1,54 @@ +-- Test: VACUUM on a table in a non-default tablespace does not crash on first run. +-- +-- Bug: SIGSEGV in fsm_extend() (freespace.c:637) when vacuuming a heap table +-- (or an AO table's aoseg auxiliary table) that resides in a non-default +-- tablespace for the very first time. +-- +-- Root cause: commit "Prevent CREATE TABLE from using dangling tablespace" +-- added TablespaceLockTuple() in TablespaceCreateDbspace() for non-default +-- tablespaces. That call reaches AcceptInvalidationMessages() via +-- LockSharedObject(), which processes a pending SHAREDINVALSMGR_ID message +-- that vm_extend() had queued via CacheInvalidateSmgr(), nullifying +-- rel->rd_smgr before fsm_extend() dereferences it at freespace.c:637. +-- +-- Fix: added RelationOpenSmgr(rel) after smgrcreate() in both fsm_extend() +-- (freespace.c) and vm_extend() (visibilitymap.c) so that rd_smgr is +-- re-opened if the sinval handler closed it. + +CREATE TABLESPACE fsm_ts_test LOCATION '@testtablespace@'; + +-- Case 1: plain heap table in non-default tablespace, first VACUUM. +-- Before the fix this crashed with SIGSEGV at freespace.c:637: +-- vm_extend() -> CacheInvalidateSmgr (queues SHAREDINVALSMGR_ID) +-- fsm_extend() -> smgrcreate -> TablespaceCreateDbspace +-- -> TablespaceLockTuple -> AcceptInvalidationMessages +-- -> processes SHAREDINVALSMGR_ID -> rel->rd_smgr = NULL +-- -> rel->rd_smgr->smgr_cached_nblocks[...] = ... SIGSEGV +CREATE TABLE fsm_ts_heap (id int, val text) + TABLESPACE fsm_ts_test + DISTRIBUTED BY (id); +INSERT INTO fsm_ts_heap SELECT i, repeat('x', 80) FROM generate_series(1, 500) i; +VACUUM ANALYZE fsm_ts_heap; +SELECT count(*) FROM fsm_ts_heap; +-- Second VACUUM must also succeed (FSM/VM now exist, different code path). +VACUUM ANALYZE fsm_ts_heap; +SELECT count(*) FROM fsm_ts_heap; +DROP TABLE fsm_ts_heap; + +-- Case 2: AO table in non-default tablespace. +-- The crash occurs inside the recursive vacuum of the aoseg auxiliary table +-- (which is also a heap table stored in the same non-default tablespace). +CREATE TABLE fsm_ts_ao (id int, val text) + USING ao_row + TABLESPACE fsm_ts_test + DISTRIBUTED BY (id); +INSERT INTO fsm_ts_ao SELECT i, repeat('y', 80) FROM generate_series(1, 500) i; +VACUUM ANALYZE fsm_ts_ao; +SELECT count(*) FROM fsm_ts_ao; +-- Second VACUUM must also succeed. +VACUUM ANALYZE fsm_ts_ao; +SELECT count(*) FROM fsm_ts_ao; +DROP TABLE fsm_ts_ao; + +-- Cleanup. +DROP TABLESPACE fsm_ts_test; diff --git a/src/test/regress/output/vacuum_fsm_nondefault_tablespace.source b/src/test/regress/output/vacuum_fsm_nondefault_tablespace.source new file mode 100644 index 00000000000..fc2ff245691 --- /dev/null +++ b/src/test/regress/output/vacuum_fsm_nondefault_tablespace.source @@ -0,0 +1,70 @@ +-- Test: VACUUM on a table in a non-default tablespace does not crash on first run. +-- +-- Bug: SIGSEGV in fsm_extend() (freespace.c:637) when vacuuming a heap table +-- (or an AO table's aoseg auxiliary table) that resides in a non-default +-- tablespace for the very first time. +-- +-- Root cause: commit "Prevent CREATE TABLE from using dangling tablespace" +-- added TablespaceLockTuple() in TablespaceCreateDbspace() for non-default +-- tablespaces. That call reaches AcceptInvalidationMessages() via +-- LockSharedObject(), which processes a pending SHAREDINVALSMGR_ID message +-- that vm_extend() had queued via CacheInvalidateSmgr(), nullifying +-- rel->rd_smgr before fsm_extend() dereferences it at freespace.c:637. +-- +-- Fix: added RelationOpenSmgr(rel) after smgrcreate() in both fsm_extend() +-- (freespace.c) and vm_extend() (visibilitymap.c) so that rd_smgr is +-- re-opened if the sinval handler closed it. +CREATE TABLESPACE fsm_ts_test LOCATION '@testtablespace@'; +-- Case 1: plain heap table in non-default tablespace, first VACUUM. +-- Before the fix this crashed with SIGSEGV at freespace.c:637: +-- vm_extend() -> CacheInvalidateSmgr (queues SHAREDINVALSMGR_ID) +-- fsm_extend() -> smgrcreate -> TablespaceCreateDbspace +-- -> TablespaceLockTuple -> AcceptInvalidationMessages +-- -> processes SHAREDINVALSMGR_ID -> rel->rd_smgr = NULL +-- -> rel->rd_smgr->smgr_cached_nblocks[...] = ... SIGSEGV +CREATE TABLE fsm_ts_heap (id int, val text) + TABLESPACE fsm_ts_test + DISTRIBUTED BY (id); +INSERT INTO fsm_ts_heap SELECT i, repeat('x', 80) FROM generate_series(1, 500) i; +VACUUM ANALYZE fsm_ts_heap; +SELECT count(*) FROM fsm_ts_heap; + count +------- + 500 +(1 row) + +-- Second VACUUM must also succeed (FSM/VM now exist, different code path). +VACUUM ANALYZE fsm_ts_heap; +SELECT count(*) FROM fsm_ts_heap; + count +------- + 500 +(1 row) + +DROP TABLE fsm_ts_heap; +-- Case 2: AO table in non-default tablespace. +-- The crash occurs inside the recursive vacuum of the aoseg auxiliary table +-- (which is also a heap table stored in the same non-default tablespace). +CREATE TABLE fsm_ts_ao (id int, val text) + USING ao_row + TABLESPACE fsm_ts_test + DISTRIBUTED BY (id); +INSERT INTO fsm_ts_ao SELECT i, repeat('y', 80) FROM generate_series(1, 500) i; +VACUUM ANALYZE fsm_ts_ao; +SELECT count(*) FROM fsm_ts_ao; + count +------- + 500 +(1 row) + +-- Second VACUUM must also succeed. +VACUUM ANALYZE fsm_ts_ao; +SELECT count(*) FROM fsm_ts_ao; + count +------- + 500 +(1 row) + +DROP TABLE fsm_ts_ao; +-- Cleanup. +DROP TABLESPACE fsm_ts_test; diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index 9b5f3660fa7..3a340338616 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -64,3 +64,4 @@ /ao_unique_index_partition.sql /bfv_copy.sql /copy_encoding_error.sql +/vacuum_fsm_nondefault_tablespace.sql