typedef struct RelationData
RelFileNode rd_node; /* relation physical identifier */
/* use "struct" here to avoid needing to include smgr.h: */
struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */
int rd_refcnt; /* reference count */
BackendId rd_backend; /* owning backend id, if temporary relation */
bool rd_islocaltemp; /* rel is a temp rel of this session */
bool rd_isnailed; /* rel is nailed in cache */
bool rd_isvalid; /* relcache entry is valid */
char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 =
* valid, 2 = temporarily forced */
bool rd_statvalid; /* is rd_statlist valid? */
* rd_createSubid is the ID of the highest subtransaction the rel has
* survived into; or zero if the rel was not created in the current top
* transaction. This can be now be relied on, whereas previously it could
* be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is
* the ID of the highest subtransaction the relfilenode change has
* survived into, or zero if not changed in the current transaction (or we
* have forgotten changing it). rd_newRelfilenodeSubid can be forgotten
* when a relation has multiple new relfilenodes within a single
* transaction, with one of them occurring in a subsequently aborted
* subtransaction, e.g. BEGIN; TRUNCATE t; SAVEPOINT save; TRUNCATE t;
* ROLLBACK TO save; -- rd_newRelfilenode is now forgotten
SubTransactionId rd_createSubid; /* rel was created in current xact */
SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in
* current xact */
Form_pg_class rd_rel; /* RELATION tuple */
TupleDesc rd_att; /* tuple descriptor */
Oid rd_id; /* relation's object id */
LockInfoData rd_lockInfo; /* lock mgr's info for locking relation */
RuleLock *rd_rules; /* rewrite rules */
MemoryContext rd_rulescxt; /* private memory cxt for rd_rules, if any */
TriggerDesc *trigdesc; /* Trigger info, or NULL if rel has none */
/* use "struct" here to avoid needing to include rowsecurity.h: */
struct RowSecurityDesc *rd_rsdesc; /* row security policies, or NULL */
/* data managed by RelationGetFKeyList: */
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */
MemoryContext rd_partkeycxt; /* private memory cxt for the below */
struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
MemoryContext rd_pdcxt; /* private context for partdesc */
struct PartitionDescData *rd_partdesc; /* partitions, or NULL */
List *rd_partcheck; /* partition CHECK quals */
/* data managed by RelationGetIndexList: */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Oid rd_oidindex; /* OID of unique index on OID, if any */
Oid rd_pkindex; /* OID of primary key, if any */
Oid rd_replidindex; /* OID of replica identity index, if any */
/* data managed by RelationGetStatExtList: */
List *rd_statlist; /* list of OIDs of extended stats */
/* data managed by RelationGetIndexAttrBitmap: */
Bitmapset *rd_indexattr; /* columns used in non-projection indexes */
Bitmapset *rd_projindexattr; /* columns used in projection indexes */
Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
Bitmapset *rd_pkattr; /* cols included in primary key */
Bitmapset *rd_idattr; /* included in replica identity index */
Bitmapset *rd_projidx; /* Oids of projection indexes */
PublicationActions *rd_pubactions; /* publication actions */
* rd_options is set whenever rd_rel is loaded into the relcache entry.
* Note that you can NOT look into rd_rel for this data. NULL means "use
* defaults".
bytea *rd_options; /* parsed pg_class.reloptions */
/* These are non-NULL only for an index relation: */
Form_pg_index rd_index; /* pg_index tuple describing this index */
/* use "struct" here to avoid needing to include htup.h: */
struct HeapTupleData *rd_indextuple; /* all of pg_index tuple */
* index access support info (used only for an index relation)
* Note: only default support procs for each opclass are cached, namely
* those with lefttype and righttype equal to the opclass's opcintype. The
* arrays are indexed by support function number, which is a sufficient
* identifier given that restriction.
* Note: rd_amcache is available for index AMs to cache private data about
* an index. This must be just a cache since it may get reset at any time
* (in particular, it will get reset by a relcache inval message for the
* index). If used, it must point to a single memory chunk palloc'd in
* rd_indexcxt. A relcache reset will include freeing that chunk and
* setting rd_amcache = NULL.
Oid rd_amhandler; /* OID of index AM's handler function */
MemoryContext rd_indexcxt; /* private memory cxt for this stuff */
/* use "struct" here to avoid needing to include amapi.h: */
struct IndexAmRoutine *rd_amroutine; /* index AM's API struct */
Oid *rd_opfamily; /* OIDs of op families for each index col */
Oid *rd_opcintype; /* OIDs of opclass declared input data types */
RegProcedure *rd_support; /* OIDs of support procedures */
FmgrInfo *rd_supportinfo; /* lookup info for support procedures */
int16 *rd_indoption; /* per-column AM-specific flags */
List *rd_indexprs; /* index expression trees, if any */
List *rd_indpred; /* index predicate tree, if any */
Oid *rd_exclops; /* OIDs of exclusion operators, if any */
Oid *rd_exclprocs; /* OIDs of exclusion ops' procs, if any */
uint16 *rd_exclstrats; /* exclusion ops' strategy numbers, if any */
void *rd_amcache; /* available for use by index AM */
Oid *rd_indcollation; /* OIDs of index collations */
* foreign-table support
* rd_fdwroutine must point to a single memory chunk palloc'd in
* CacheMemoryContext. It will be freed and reset to NULL on a relcache
* reset.
/* use "struct" here to avoid needing to include fdwapi.h: */
struct FdwRoutine *rd_fdwroutine; /* cached function pointers, or NULL */
* Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new
* version of a table, we need to make any toast pointers inserted into it
* have the existing toast table's OID, not the OID of the transient toast
* table. If rd_toastoid isn't InvalidOid, it is the OID to place in
* toast pointers inserted into this rel. (Note it's set on the new
* version of the main heap, not the toast table itself.) This also
* causes toast_save_datum() to try to preserve toast value OIDs.
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
/* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
typedef struct RelationData *Relation;
* Buffer identifiers.
* Zero is invalid, positive is the index of a shared buffer (1..NBuffers),
* negative is the index of a local buffer (-1 .. -NLocBuffer).
typedef int Buffer;
#define InvalidBuffer 0
struct HeapTupleHeaderData
HeapTupleFields t_heap;
DatumTupleFields t_datum;
} t_choice;
ItemPointerData t_ctid; /* current TID of this or newer tuple (or a
* speculative insertion token) */
/* Fields below here must match MinimalTupleData! */
uint16 t_infomask2; /* number of attributes + various flags */
uint16 t_infomask; /* various flag bits, see below */
uint8 t_hoff; /* sizeof header incl. bitmap, padding */
/* ^ - 23 bytes - ^ */
bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]; /* bitmap of NULLs */
typedef struct ItemPointerData
BlockIdData ip_blkid;
OffsetNumber ip_posid;
} ItemPointerData;
typedef ItemPointerData *ItemPointer;
typedef struct BlockIdData
uint16 bi_hi;
uint16 bi_lo;
} BlockIdData;
typedef BlockIdData *BlockId; /* block identifier */
typedef struct HeapTupleData
uint32 t_len; /* length of *t_data */
ItemPointerData t_self; /* SelfItemPointer */
Oid t_tableOid; /* table the tuple came from */
HeapTupleHeader t_data; /* -> tuple header and data */
} HeapTupleData;
typedef HeapTupleData *HeapTuple;
#define HEAPTUPLESIZE MAXALIGN(sizeof(HeapTupleData))
#define HeapTupleHeaderIsSpeculative(tup) \
( \
(ItemPointerGetOffsetNumberNoCheck(&(tup)->t_ctid) == SpecTokenOffsetNumber) \
#define ItemPointerGetOffsetNumberNoCheck(pointer) \
( \
(pointer)->ip_posid \
#define BufferGetPage(buffer) ((Page)BufferGetBlock(buffer))
#define BufferGetBlock(buffer) \
( \
AssertMacro(BufferIsValid(buffer)), \
BufferIsLocal(buffer) ? \
LocalBufferBlockPointers[-(buffer) - 1] \
: \
(Block) (BufferBlocks + ((Size) ((buffer) - 1)) * BLCKSZ) \
#define BufferIsLocal(buffer) ((buffer) < 0)
typedef void *Block;//指向任意类型的指针
Block *LocalBufferBlockPointers = NULL;//指针的指针
* BufferGetBlockNumber
* Returns the block number associated with a buffer.
* Note:
* Assumes that the buffer is valid and pinned, else the
* value may be obsolete immediately...
BufferGetBlockNumber(Buffer buffer)
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
bufHdr = GetLocalBufferDescriptor(-buffer - 1);
bufHdr = GetBufferDescriptor(buffer - 1);
/* pinned, so OK to read tag without spinlock */
return bufHdr->tag.blockNum;
* BlockIdSet
* Sets a block identifier to the specified value.
#define BlockIdSet(blockId, blockNumber) \
( \
AssertMacro(PointerIsValid(blockId)), \
(blockId)->bi_hi = (blockNumber) >> 16, \//右移16位,得到高位
(blockId)->bi_lo = (blockNumber) & 0xffff \//高16位全部置0,得到低位
* ItemPointerSet
* Sets a disk item pointer to the specified block and offset.
#define ItemPointerSet(pointer, blockNumber, offNum) \
( \
AssertMacro(PointerIsValid(pointer)), \
BlockIdSet(&((pointer)->ip_blkid), blockNumber), \
(pointer)->ip_posid = offNum \
* PageGetItemId
* Returns an item identifier of a page.
#define PageGetItemId(page, offsetNumber) \
((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
* PageGetItem
* Retrieves an item on the given page.
* Note:
* This does not change the status of any of the resources passed.
* The semantics may change in the future.
#define PageGetItem(page, itemId) \
( \
AssertMacro(PageIsValid(page)), \
AssertMacro(ItemIdHasStorage(itemId)), \
(Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
#define ItemIdGetOffset(itemId) \
* RelationPutHeapTuple - place tuple at specified page
* !!! EREPORT(ERROR) IS DISALLOWED HERE !!! Must PANIC on failure!!!
* Note - caller must hold BUFFER_LOCK_EXCLUSIVE on the buffer.
RelationPutHeapTuple(Relation relation,
Buffer buffer,
HeapTuple tuple,
bool token)
Page pageHeader;//页头
OffsetNumber offnum;//行偏移
* A tuple that's being inserted speculatively should already have its
* token set.
//TODO token & speculatively有待考究
Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));
/* Add the tuple to the page */
pageHeader = BufferGetPage(buffer);
offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
tuple->t_len, InvalidOffsetNumber, false, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "failed to add tuple to page");
/* Update tuple->t_self to the actual position where it was stored */
ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
* Insert the correct position into CTID of the stored tuple, too (unless
* this is a speculative insertion, in which case the token is held in
* CTID field instead)
if (!token)
ItemId itemId = PageGetItemId(pageHeader, offnum);
HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
item->t_ctid = tuple->t_self;
testdb=# vacuum t_insert;
testdb=# checkpoint;
testdb=# select pg_backend_pid();
(1 row)
[root@localhost ~]# gdb -p 1582
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-100.el7
testdb=# -- 插入1行
testdb=# insert into t_insert values(10,'10','10','10');
(gdb) b RelationPutHeapTuple
Breakpoint 1 at 0x4cf492: file hio.c, line 51.
(gdb) p *relation
$5 = {rd_node = {spcNode = 1663, dbNode = 16477, relNode = 26731}, rd_smgr = 0x259db68, rd_refcnt = 1, rd_backend = -1, rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true,
rd_indexvalid = 0 '\000', rd_statvalid = false, rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7fa9814589e8, rd_att = 0x7fa981458af8, rd_id = 26731, rd_lockInfo = {lockRelId = {
relId = 26731, dbId = 16477}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0,
rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x0, rd_oidindex = 0, rd_pkindex = 0, rd_replidindex = 0, rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0, rd_keyattr = 0x0,
rd_pkattr = 0x0, rd_idattr = 0x0, rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, rd_amhandler = 0, rd_indexcxt = 0x0, rd_amroutine = 0x0,
rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0, rd_exclstrats = 0x0,
rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x2591850}
(gdb) p buffer
$6 = 95
(gdb) p tuple
$7 = (HeapTuple) 0x2539a20
(gdb) p *tuple #注:HeapTuple
$8 = {t_len = 61, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_tableOid = 26731, t_data = 0x2539a38}
(gdb) p *tuple->t_data #注:HeapTupleHeader
$9 = {t_choice = {t_heap = {t_xmin = 1612851, t_xmax = 0, t_field3 = {t_cid = 0, t_xvac = 0}}, t_datum = {datum_len_ = 1612851, datum_typmod = 0, datum_typeid = 0}}, t_ctid = {ip_blkid = {
bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_infomask2 = 4, t_infomask = 2050, t_hoff = 24 '\030', t_bits = 0x2539a4f ""}
(gdb) p token
$10 = false
(gdb) p *(PageHeader)pageHeader
$11 = {pd_lsn = {xlogid = 1, xrecoff = 3677464616}, pd_checksum = 0, pd_flags = 5, pd_lower = 60, pd_upper = 7680, pd_special = 8192, pd_pagesize_version = 8196, pd_prune_xid = 0,
pd_linp = 0x7fa96957d318}
(gdb) next
56 if (offnum == InvalidOffsetNumber)
(gdb) p offnum #2号Item被删除,在执行vacuum回收后,已可用
$12 = 2
(gdb) p *itemId
$13 = {lp_off = 7616, lp_flags = 1, lp_len = 61}
(gdb) p *item
$14 = {t_choice = {t_heap = {t_xmin = 1612851, t_xmax = 0, t_field3 = {t_cid = 0, t_xvac = 0}}, t_datum = {datum_len_ = 1612851, datum_typmod = 0, datum_typeid = 0}}, t_ctid = {ip_blkid = {
bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_infomask2 = 4, t_infomask = 2050, t_hoff = 24 '\030', t_bits = 0x7fa96957f0d7 ""}
(gdb) next
74 }
(gdb) p *item
No symbol "item" in current context.
(gdb) p tuple->t_self
$15 = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 2} #0号Block,2号偏移
(gdb) c