主要的autovacuum共享内存结构体,存储在shared memory中,同时WorkerInfo也会存储在其中.
* The main autovacuum shmem struct. On shared memory we store this main
* struct and the array of WorkerInfo structs. This struct keeps:
* 主要的autovacuum共享内存结构体,存储在shared memory中,同时WorkerInfo也会存储在其中.
* 该结构体包括:
* av_signal set by other processes to indicate various conditions
* 其他进程设置用于提示不同的条件
* av_launcherpid the PID of the autovacuum launcher
* autovacuum launcher的PID
* av_freeWorkers the WorkerInfo freelist
* WorkerInfo空闲链表
* av_runningWorkers the WorkerInfo non-free queue
* WorkerInfo非空闲队列
* av_startingWorker pointer to WorkerInfo currently being started (cleared by
* the worker itself as soon as it's up and running)
* av_startingWorker指向当前正在启动的WorkerInfo
* av_workItems work item array
* av_workItems 工作条目数组
* This struct is protected by AutovacuumLock, except for av_signal and parts
* of the worker list (see above).
* 除了av_signal和worker list的一部分信息,该数据结构通过AutovacuumLock保护
typedef struct
sig_atomic_t av_signal[AutoVacNumSignals];
pid_t av_launcherpid;
dlist_head av_freeWorkers;
dlist_head av_runningWorkers;
WorkerInfo av_startingWorker;
AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;
64 bit的事务ID
* A 64 bit value that contains an epoch and a TransactionId. This is
* wrapped in a struct to prevent implicit conversion to/from TransactionId.
* Not all values represent valid normal XIDs.
* 保护epoch和TransactionId的64 bit值.封装在结构体中避免与事务ID的隐式转换.并不是所有的值都表示有效的普通xid。
typedef struct FullTransactionId
uint64 value;
} FullTransactionId;
/* struct to keep track of databases in worker */
typedef struct avw_dbase
Oid adw_datid;
char *adw_name;
TransactionId adw_frozenxid;
MultiXactId adw_minmulti;
PgStat_StatDBEntry *adw_entry;
} avw_dbase;
* Structure to hold info passed by _beginthreadex() to the function it calls
* via its single allowed argument.
typedef struct
ArchiveHandle *AH; /* master database connection */
ParallelSlot *slot; /* this worker's parallel slot */
} WorkerInfo;
* launch_worker
* Wrapper for starting a worker from the launcher. Besides actually starting
* it, update the database list to reflect the next time that another one will
* need to be started on the selected database. The actual database choice is
* left to do_start_worker.
* 从autovacuum launcher启动worker的封装器.
* 除了实际启动它之外,还要更新数据库链表,以反映下一次需要在选定的数据库上启动另一个worker时的情况。
* 实际的数据库选择留给do_start_worker。
* This routine is also expected to insert an entry into the database list if
* the selected database was previously absent from the list.
* 这段例程同样希望在数据库链表中插入一个新的条目,如果选定的数据库先前在链表中出现.
static void
launch_worker(TimestampTz now)
Oid dbid;
dlist_iter iter;
dbid = do_start_worker();
if (OidIsValid(dbid))
bool found = false;
* Walk the database list and update the corresponding entry. If the
* database is not on the list, we'll recreate the list.
* 遍历数据库链表,更新相应的条目.
* 如果数据库不在链表链表中,重建链表.
dlist_foreach(iter, &DatabaseList)
avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
if (avdb->adl_datid == dbid)
found = true;
* add autovacuum_naptime seconds to the current time, and use
* that as the new "next_worker" field for this database.
* 在当前时间上增加autovacuum_naptime,
* 并为该数据库使用该时间作为新的next_worker字段的值.
avdb->adl_next_worker =
TimestampTzPlusMilliseconds(now, autovacuum_naptime * 1000);
dlist_move_head(&DatabaseList, iter.cur);
* If the database was not present in the database list, we rebuild
* the list. It's possible that the database does not get into the
* list anyway, for example if it's a database that doesn't have a
* pgstat entry, but this is not a problem because we don't want to
* schedule workers regularly into those in any case.
* 如果数据库不在数据库链表中,重建链表.
* 有可能该数据库没有进入过链表中,比如,该数据库没有pgstat条目入口,
* 但这不是一个问题,因为我们不希望在任何情况调度到这些数据库上面.
if (!found)
* do_start_worker
* Bare-bones procedure for starting an autovacuum worker from the launcher.
* It determines what database to work on, sets up shared memory stuff and
* signals postmaster to start the worker. It fails gracefully if invoked when
* autovacuum_workers are already active.
* 启动autovacuum worker。
* 确定处理哪个库,配置共享内存并通知postmaster启动worker。
* 如autovacuum_workers已处于活动状态,则启动失败。
* Return value is the OID of the database that the worker is going to process,
* or InvalidOid if no worker was actually started.
* 返回正在处理的数据库OID,如worker启动不成功,则返回InvalidOid。
static Oid
List *dblist;//数据库链表
ListCell *cell;//临时变量
//typedef uint32 TransactionId;
TransactionId xidForceLimit;//事务id,无符号32bit整型
MultiXactId multiForceLimit;//
bool for_xid_wrap;
bool for_multi_wrap;
avw_dbase *avdb;
TimestampTz current_time;//当前时间
bool skipit = false;//是否跳过?
Oid retval = InvalidOid;//返回的数据库OID
MemoryContext tmpcxt,
/* return quickly when there are no free workers */
LWLockAcquire(AutovacuumLock, LW_SHARED);
if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
return InvalidOid;
* Create and switch to a temporary context to avoid leaking the memory
* allocated for the database list.
* 内存上下文切换
tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
"Start worker tmp cxt",
oldcxt = MemoryContextSwitchTo(tmpcxt);
/* use fresh stats */
/* Get a list of databases */
dblist = get_database_list();
* Determine the oldest datfrozenxid/relfrozenxid that we will allow to
* pass without forcing a vacuum. (This limit can be tightened for
* particular tables, but not loosened.)
* 确定最老的datfrozenxid/relfrozenxid,用以确定是否需要强制vacuum
recentXid = ReadNewTransactionId();
xidForceLimit = recentXid - autovacuum_freeze_max_age;
/* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
/* this can cause the limit to go backwards by 3, but that's OK */
//#define FirstNormalTransactionId ((TransactionId) 3)
if (xidForceLimit < FirstNormalTransactionId)
xidForceLimit -= FirstNormalTransactionId;
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
* Choose a database to connect to. We pick the database that was least
* recently auto-vacuumed, or one that needs vacuuming to prevent Xid
* wraparound-related data loss. If any db at risk of Xid wraparound is
* found, we pick the one with oldest datfrozenxid, independently of
* autovacuum times; similarly we pick the one with the oldest datminmxid
* if any is in MultiXactId wraparound. Note that those in Xid wraparound
* danger are given more priority than those in multi wraparound danger.
* Note that a database with no stats entry is not considered, except for
* Xid wraparound purposes. The theory is that if no one has ever
* connected to it since the stats were last initialized, it doesn't need
* vacuuming.
* XXX This could be improved if we had more info about whether it needs
* vacuuming before connecting to it. Perhaps look through the pgstats
* data for the database's tables? One idea is to keep track of the
* number of new and dead tuples per database in pgstats. However it
* isn't clear how to construct a metric that measures that and not cause
* starvation for less busy databases.
* 选择一个DB.
* 算法:选择最近最小清理的DB,或者需要清理以防止XID回卷导致数据丢失的DB.
* 如果存在XID回卷风险的DB,那么选择datfrozenxid最老的DB,而不管该DB做了多少次autovacuum.
* 自动忽略没有连接过(统计信息为空)的DB.
avdb = NULL;//待清理的DB
for_xid_wrap = false;//xid回卷
for_multi_wrap = false;
current_time = GetCurrentTimestamp();//当前时间
foreach(cell, dblist)//循环db链表
avw_dbase *tmp = lfirst(cell);
dlist_iter iter;
/* Check to see if this one is at risk of wraparound */
//TransactionIdPrecedes --- is id1 logically < id2?
if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
if (avdb == NULL ||
avdb = tmp;//选择较旧的那个
for_xid_wrap = true;
else if (for_xid_wrap)
continue; /* ignore not-at-risk DBs */
else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
if (avdb == NULL ||
MultiXactIdPrecedes(tmp->adw_minmulti, avdb->adw_minmulti))
avdb = tmp;
for_multi_wrap = true;
else if (for_multi_wrap)
continue; /* ignore not-at-risk DBs */
/* Find pgstat entry if any */
tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
* Skip a database with no pgstat entry; it means it hasn't seen any
* activity.
* 如无统计信息,跳过
if (!tmp->adw_entry)
* Also, skip a database that appears on the database list as having
* been processed recently (less than autovacuum_naptime seconds ago).
* We do this so that we don't select a database which we just
* selected, but that pgstat hasn't gotten around to updating the last
* autovacuum time yet.
* 跳过出现在数据库链表中已处理过的DB(先前小于autovacuum_naptime秒)
* 执行该操作是为了避免选择刚才才选择的DB
skipit = false;
dlist_reverse_foreach(iter, &DatabaseList)
avl_dbase *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
if (dbp->adl_datid == tmp->adw_datid)
* Skip this database if its next_worker value falls between
* the current time and the current time plus naptime.
* 未超过时(naptime定义)
if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
current_time, 0) &&
autovacuum_naptime * 1000))
skipit = true;
if (skipit)
* Remember the db with oldest autovac time. (If we are here, both
* tmp->entry and db->entry must be non-null.)
if (avdb == NULL ||
tmp->adw_entry->last_autovac_time < avdb->adw_entry->last_autovac_time)
avdb = tmp;
/* Found a database -- process it */
if (avdb != NULL)
WorkerInfo worker;
dlist_node *wptr;
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
* Get a worker entry from the freelist. We checked above, so there
* really should be a free slot.
* 从空闲链表中获取一个worker
wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
worker = dlist_container(WorkerInfoData, wi_links, wptr);
worker->wi_dboid = avdb->adw_datid;
worker->wi_proc = NULL;
worker->wi_launchtime = GetCurrentTimestamp();
AutoVacuumShmem->av_startingWorker = worker;
retval = avdb->adw_datid;
else if (skipit)
* If we skipped all databases on the list, rebuild it, because it
* probably contains a dropped database.
return retval;
* For callers that just need the XID part of the next transaction ID.
static inline TransactionId
return XidFromFullTransactionId(ReadNextFullTransactionId());
#define XidFromFullTransactionId(x) ((uint32) (x).value)
* Read nextFullXid but don't allocate it.
FullTransactionId fullXid;
LWLockAcquire(XidGenLock, LW_SHARED);
fullXid = ShmemVariableCache->nextFullXid;
return fullXid;
(gdb) handle SIGINT print nostop pass
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n)
Please answer y or n.
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n) y
Signal Stop Print Pass to program Description
SIGINT No Yes Yes Interrupt
(gdb) b autovacuum.c:launch_worker
Breakpoint 1 at 0x82f3e7: file autovacuum.c, line 1338.
(gdb) b autovacuum.c:783
Breakpoint 2 at 0x82e8f0: file autovacuum.c, line 783.
(gdb) c
[pg12@localhost test]$ psql -c "update tbl set id = 1;"
Expanded display is used automatically.
UPDATE 2000000
[pg12@localhost test]$ psql -c "update t1 set id = 1;"
Expanded display is used automatically.
UPDATE 20000
[pg12@localhost test]$ psql -c "update t2 set id = 1;"
Expanded display is used automatically.
UPDATE 10000
[pg12@localhost test]$ psql -c "select txid_current();"
Expanded display is used automatically.
(1 row)
60s后在gdb console中continue
Breakpoint 2, AutoVacLauncherMain (argc=0, argv=0x0) at autovacuum.c:783
783 if (dlist_is_empty(&DatabaseList))
(gdb) n
804 avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
(gdb) n
810 if (TimestampDifferenceExceeds(avdb->adl_next_worker,
812 launch_worker(current_time);
(gdb) p *avdb
$1 = {adl_datid = 16384, adl_next_worker = 628852948486950, adl_score = 0, adl_node = {
prev = 0xfd9880 , next = 0xfd9880 }}
(gdb) step
Breakpoint 1, launch_worker (now=628853296722794) at autovacuum.c:1338
1338 dbid = do_start_worker();
(gdb) step
do_start_worker () at autovacuum.c:1128
1128 bool skipit = false;
(gdb) n
1129 Oid retval = InvalidOid;
1134 LWLockAcquire(AutovacuumLock, LW_SHARED);
1135 if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
(gdb) p *AutoVacuumShmem
$2 = {av_signal = {0, 0}, av_launcherpid = 5476, av_freeWorkers = {head = {prev = 0x7f8ccf1a4938,
next = 0x7f8ccf1a49b8}}, av_runningWorkers = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}},
av_startingWorker = 0x0, av_workItems = {{avw_type = AVW_BRINSummarizeRange, avw_used = false,
avw_active = false, avw_database = 0, avw_relation = 0, avw_blockNumber = 0} }}
(gdb) n
1140 LWLockRelease(AutovacuumLock);
(gdb) p AutoVacuumShmem->av_runningWorkers
$3 = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}}
(gdb) n
1146 tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
1149 oldcxt = MemoryContextSwitchTo(tmpcxt);
1152 autovac_refresh_stats();
(gdb) n
1155 dblist = get_database_list();
1162 recentXid = ReadNewTransactionId();
(gdb) p *dblist
$8 = {type = T_List, length = 5, head = 0x2382d48, tail = 0x2382f90}
(gdb) n
1163 xidForceLimit = recentXid - autovacuum_freeze_max_age;
(gdb) p recentXid
$9 = 2917
(gdb) p autovacuum_freeze_max_age
$10 = 200000000
(gdb) n
1166 if (xidForceLimit < FirstNormalTransactionId)
(gdb) p xidForceLimit
$11 = 4094970213
(gdb) p FirstNormalTransactionId
$12 = 3
(gdb) n
1170 recentMulti = ReadNextMultiXactId();
1171 multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
1172 if (multiForceLimit < FirstMultiXactId)
(gdb) p recentMulti
$13 = 1
(gdb) p MultiXactMemberFreezeThreshold()
$14 = 400000000
(gdb) n
1196 avdb = NULL;
1197 for_xid_wrap = false;
1198 for_multi_wrap = false;
1199 current_time = GetCurrentTimestamp();
1200 foreach(cell, dblist)
1202 avw_dbase *tmp = lfirst(cell);
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) p *tmp --> 这是postgres数据库
$15 = {adw_datid = 13591, adw_name = 0x2382d20 "postgres", adw_frozenxid = 479, adw_minmulti = 1,
adw_entry = 0x0}
(gdb) n
1215 else if (for_xid_wrap)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1225 else if (for_multi_wrap)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1235 if (!tmp->adw_entry)
1236 continue;
1200 foreach(cell, dblist)
1202 avw_dbase *tmp = lfirst(cell);
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) p *tmp --> 这是testdb数据库
$16 = {adw_datid = 16384, adw_name = 0x2382de0 "testdb", adw_frozenxid = 531, adw_minmulti = 1, adw_entry = 0x0}
(gdb) p tmp->adw_frozenxid
$17 = 531
(gdb) p xidForceLimit
$18 = 4094970213
(gdb) n
1215 else if (for_xid_wrap)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1225 else if (for_multi_wrap)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1235 if (!tmp->adw_entry)
1245 skipit = false;
1247 dlist_reverse_foreach(iter, &DatabaseList)
1249 avl_dbase *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
1251 if (dbp->adl_datid == tmp->adw_datid)
1257 if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
1267 if (skipit)
1274 if (avdb == NULL ||
1276 avdb = tmp;
(gdb) n
1200 foreach(cell, dblist)
1202 avw_dbase *tmp = lfirst(cell);
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
1215 else if (for_xid_wrap)
(gdb) p *tmp
$19 = {adw_datid = 1, adw_name = 0x2382e60 "template1", adw_frozenxid = 479, adw_minmulti = 1, adw_entry = 0x0}
(gdb) n
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1225 else if (for_multi_wrap)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1235 if (!tmp->adw_entry)
1236 continue; --> 没有统计信息的,忽略
1200 foreach(cell, dblist)
1202 avw_dbase *tmp = lfirst(cell);
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
1215 else if (for_xid_wrap)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1225 else if (for_multi_wrap)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1235 if (!tmp->adw_entry)
1236 continue;
1200 foreach(cell, dblist)
1202 avw_dbase *tmp = lfirst(cell);
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
1215 else if (for_xid_wrap)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1225 else if (for_multi_wrap)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1235 if (!tmp->adw_entry)
1236 continue;
1200 foreach(cell, dblist)
1280 if (avdb != NULL)
1285 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1291 wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
1293 worker = dlist_container(WorkerInfoData, wi_links, wptr);
(gdb) p *wptr
$20 = {prev = 0x7f8ccf1a3510, next = 0x7f8ccf1a4978}
(gdb) n
1294 worker->wi_dboid = avdb->adw_datid;
(gdb) p *worker
$21 = {wi_links = {prev = 0x7f8ccf1a3510, next = 0x7f8ccf1a4978}, wi_dboid = 0, wi_tableoid = 0, wi_proc = 0x0,
wi_launchtime = 0, wi_dobalance = false, wi_sharedrel = false, wi_cost_delay = 0, wi_cost_limit = 0,
wi_cost_limit_base = 0}
(gdb) n
1295 worker->wi_proc = NULL;
1296 worker->wi_launchtime = GetCurrentTimestamp();
1298 AutoVacuumShmem->av_startingWorker = worker;
1300 LWLockRelease(AutovacuumLock);
(gdb) p *AutoVacuumShmem
$22 = {av_signal = {0, 0}, av_launcherpid = 5476, av_freeWorkers = {head = {prev = 0x7f8ccf1a4938,
next = 0x7f8ccf1a4978}}, av_runningWorkers = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}},
av_startingWorker = 0x7f8ccf1a49b8, av_workItems = {{avw_type = AVW_BRINSummarizeRange, avw_used = false,
avw_active = false, avw_database = 0, avw_relation = 0, avw_blockNumber = 0} }}
(gdb) n
1304 retval = avdb->adw_datid;
Program received signal SIGUSR2, User defined signal 2.
do_start_worker () at autovacuum.c:1304
1304 retval = avdb->adw_datid;
avl_sigusr2_handler (postgres_signal_arg=32764) at autovacuum.c:1405
1405 {
PG Source Code