PostgreSQL逻辑备份pg
作者:mmseoamin日期:2023-12-20

一、原理分析

1、循环调用getopt_long解析命令行参数,将参数保存到static DumpOptions dopt;中 2、判断参数是否相容,不相容则退出:

  options -s/--schema-only and -a/--data-only cannot be used together
  options -c/--clean and -a/--data-only cannot be used together
  options --inserts/--column-inserts and -o/--oids cannot be used together
  option --if-exists requires option -c/--clean

3、调用CreateArchive打开输出文件,输出流为fout。该函数使用4个文件封装了4种不同dump文件格式,增加新文件可以增加新的导出文件类型各自封装,独立易于维护。

  CreateArchive->_allocAH:
    switch (AH->format){
      case archCustom:
        InitArchiveFmt_Custom(AH);
        break;
      case archNull:
        InitArchiveFmt_Null(AH);
        break;
      case archDirectory:
        InitArchiveFmt_Directory(AH);
        break;
      case archTar:
        InitArchiveFmt_Tar(AH);
        break;
      default:
        exit_horribly(modulename, "unrecognized file format \"%d\"\n", fmt);
    }

4、fout是一个重要的全局变量

5、调用ConnectDatabase连接数据

6、调用setup_connection,在连接上执行一些SQL语句:

  SELECT pg_catalog.set_config('search_path', '', false);
  set client_encoding to '%s'//pg_dump -E指定
  SET ROLE %s//
  SET DATESTYLE = ISO;
  SET INTERVALSTYLE = POSTGRES;
  SET extra_float_digits TO 3;
  SET synchronize_seqscans TO off;
  SET statement_timeout = 0;
  SET lock_timeout = 0;
  SET idle_in_transaction_session_timeout = 0;
  SET row_security = off;
  BEGIN;
  SET TRANSACTION ISOLATION LEVEL REPEATABLE READ, READ ONLY;

7、为兼容低版本,根据服务器版本号决定一些变量取值

8、调用tblinfo = getSchemaData(fout, &numTables);决定导出哪些数据库对象。本函数又调用如下函数,值得关注哦。为了存储每个对象的元数据,这些函数会malloc申请空间,直到pg_dump进程结束才释放。

  extinfo = getExtensions(fout, &numExtensions);
  extinfoindex = buildIndexArray(extinfo, numExtensions, sizeof(ExtensionInfo));
  getExtensionMembership(fout, extinfo, numExtensions);
  nspinfo = getNamespaces(fout, &numNamespaces);
  nspinfoindex = buildIndexArray(nspinfo, numNamespaces, sizeof(NamespaceInfo));
  tblinfo = getTables(fout, &numTables);
  tblinfoindex = buildIndexArray(tblinfo, numTables, sizeof(TableInfo));
  getOwnedSeqs(fout, tblinfo, numTables);
  funinfo = getFuncs(fout, &numFuncs);
  funinfoindex = buildIndexArray(funinfo, numFuncs, sizeof(FuncInfo));
  typinfo = getTypes(fout, &numTypes);
  typinfoindex = buildIndexArray(typinfo, numTypes, sizeof(TypeInfo));
  getProcLangs(fout, &numProcLangs);
  getAggregates(fout, &numAggregates);
  oprinfo = getOperators(fout, &numOperators);
  oprinfoindex = buildIndexArray(oprinfo, numOperators, sizeof(OprInfo));
  getAccessMethods(fout, &numAccessMethods);
  getOpclasses(fout, &numOpclasses);
  getOpfamilies(fout, &numOpfamilies);
  getTSParsers(fout, &numTSParsers);
  getTSTemplates(fout, &numTSTemplates);
  getTSDictionaries(fout, &numTSDicts);
  getTSConfigurations(fout, &numTSConfigs);
  getForeignDataWrappers(fout, &numForeignDataWrappers);
  getForeignServers(fout, &numForeignServers);
  getDefaultACLs(fout, &numDefaultACLs);
  collinfo = getCollations(fout, &numCollations);
  collinfoindex = buildIndexArray(collinfo, numCollations, sizeof(CollInfo));
  getConversions(fout, &numConversions);
  getCasts(fout, &numCasts);
  getTransforms(fout, &numTransforms);
  inhinfo = getInherits(fout, &numInherits);
  getEventTriggers(fout, &numEventTriggers);
  processExtensionTables(fout, extinfo, numExtensions);
  flagInhTables(tblinfo, numTables, inhinfo, numInherits);
  getTableAttrs(fout, tblinfo, numTables);
  flagInhAttrs(fout->dopt, tblinfo, numTables);
  getIndexes(fout, tblinfo, numTables);
  getExtendedStatistics(fout);
  getConstraints(fout, tblinfo, numTables);
  getTriggers(fout, tblinfo, numTables);
  getRules(fout, &numRules);
  getPolicies(fout, tblinfo, numTables);
  getPublications(fout);
  getPublicationTables(fout, tblinfo, numTables);
  getSubscriptions(fout);

对于每个getXXXs函数都将执行下面流程,以getTables为例:

1)根据服务器版本号查询系统表,读出对象的元数据信息

2)malloc内存空间并将查询结果存放到对象的数据结构中,TableInfo

3)对于每条元数据信息,调用selectDumpableTable标记需要导出的表,如果-t指定导出表,遍历该列表,得到对应表并标记:DUMP_COMPONENT_ALL;-T指定删除表,标记tbinfo->dobj.dump = DUMP_COMPONENT_NONE

4)dumpIdMap[dobj->dumpId] = dobj;将导出表的元数据存放到dumpIdMap数组中

5)在导出表上执行LOCK TABLE %s IN ACCESS SHARE MODE

6)将所有元数据信息保存后,执行SET statement_timeout = 0保证语句不超时,能够一直执行下去

9、调用getTableData函数,获取表对应的数据。实际上,并不是表真正数据,而是为表数据建立一个“导出对象”,将来导出时,依据导出对象获取真是的数据再导出。虽然先把导出对象放到AH->toc链表上,真正导出时导出数据,不会占用大量内存空间,但是针对这些元数据,当表特别多的时候,由于不到进程退出不释放内存,占用内存还是非常可观的。

该函数调用makeTableDataInfo:

1)view、外部表、分区表字表(从父表导出)和unlogged permanent table不用导出

2)判断该表是否制定导出时被排除

3)malloc一个TableDataInfo,保存表信息

    typedef struct _tableDataInfo
    {
      DumpableObject dobj;
      TableInfo  *tdtable;    /* link to table to dump */
      bool    oids;      /* include OIDs in data? */
      char     *filtercond;    /* WHERE condition to limit rows dumped */
    } TableDataInfo;

4)tdinfo->dobj.catId.tableoid、tdinfo->dobj.catId.oid、tdinfo->dobj.name、tdinfo->dobj.namespace 信息,并将dobj保存到dumpIdMap数组

10、如果需要导出大对象,调用getBlobs,同上也是保存到数组,并没有真正导出数据

11、调用getDependencies重新整理每个对象的依赖关系。

12、getDumpableObjects从dumpIdMap数组中获取dump对象

13、sortDumpableObjectsByTypeName、sortDataAndIndexObjectsBySize(如果是并行dump,需要按表大小排序)、sortDumpableObjects把所有对象重新排列:不同类型对象导出优先级依赖于dbObjectTypePriority数组;相同类型按名称排序

  static const int dbObjectTypePriority[] =
  {
    1,  /* DO_NAMESPACE */
    4,  /* DO_EXTENSION */
    5,  /* DO_TYPE */
    5,  /* DO_SHELL_TYPE */
    6,  /* DO_FUNC */
    7,  /* DO_AGG */
    8,  /* DO_OPERATOR */
    8,  /* DO_ACCESS_METHOD */
    9,  /* DO_OPCLASS */
    9,  /* DO_OPFAMILY */
    3,  /* DO_COLLATION */
    11,  /* DO_CONVERSION */
    18,  /* DO_TABLE */
    20,  /* DO_ATTRDEF */
    28,  /* DO_INDEX */
    29,  /* DO_STATSEXT */
    30,  /* DO_RULE */
    31,  /* DO_TRIGGER */
    27,  /* DO_CONSTRAINT */
    32,  /* DO_FK_CONSTRAINT */
    2,  /* DO_PROCLANG */
    10,  /* DO_CAST */
    23,  /* DO_TABLE_DATA */
    24,  /* DO_SEQUENCE_SET */
    19,  /* DO_DUMMY_TYPE */
    12,  /* DO_TSPARSER */
    14,  /* DO_TSDICT */
    13,  /* DO_TSTEMPLATE */
    15,  /* DO_TSCONFIG */
    16,  /* DO_FDW */
    17,  /* DO_FOREIGN_SERVER */
    32,  /* DO_DEFAULT_ACL */
    3,  /* DO_TRANSFORM */
    21,  /* DO_BLOB */
    25,  /* DO_BLOB_DATA */
    22,  /* DO_PRE_DATA_BOUNDARY */
    26,  /* DO_POST_DATA_BOUNDARY */
    33,  /* DO_EVENT_TRIGGER */
    38,  /* DO_REFRESH_MATVIEW */
    34,  /* DO_POLICY */
    35,  /* DO_PUBLICATION */
    36,  /* DO_PUBLICATION_REL */
    37  /* DO_SUBSCRIPTION */
  };

14、dumpEncoding、dumpStdStrings、dumpSearchPath导出编码信息,使用双向链表TOCEntry保存导出对象。例如:

  newToc->defn:"SET client_encoding='UTF8';\n"
  SET standard_conforming_string='on';
  SELECT pg_catalog.set_config('search_path','',false);\n

15、dumpDatabase导出本链接对应的目的数据库信息,同样是newToc,newToc->defn:CREATE DATABASE yzs WITH TEMPLATE=template0 ENCODING='UTF8' LC_COLLATE='zh_CN.UTF-8' LC_CTYPE='zh_CN.UTF-8'

16、遍历所有对象,对于每个对象调用dumpDumpableObject,本函数用一堆诸如dumpNamespace、dumpExtension等,将其插入循环链表。

  for (i = 0; i < numObjs; i++)
    dumpDumpableObject(fout, dobjs[i]);

--------------------------以上所有导出,不真正导出数据----------------------------

17、遍历链表标记哪些对象Toc entry需要导出:ProcessArchiveRestoreOptions

18、如果导出格式时plain,则调用RestoreArchive,输出到文件显示的是SQL语句,不再是不可识别的二进制文件

19、关闭句柄释放资源CloseArchive,根据函数指针调用不同文件类型的_CloseArchive(导出数据到文件 RestoreArchive -> restore_toc_entry -> _printTocEntry)

二、不同格式的处理函数

-F, --format=c|d|t|p output file format (custom, directory, tar,plain text (default))

目前,pg_dump支持4种导出格式:

custum(pg_backup_custum.c):导出二进制格式的文件。包括文件头和文件体。文件体是一个链表,保存每个备份对象,每个可备份对象都有一套统一的结构表示,支持压缩

plain(pg_backup_null.c):把SQL脚本内容输出到标准输出,默认方式

file(pg_backup_file.c):导出包括备份一个主文件和一些辅助文件,主文件方式类似于custom文件格式,辅助文件是数据文件,每个辅助文件对应备份对象中的一个表,需要和-f一起使用

tar(pg_backup_tar.c):文件备份基本类似“file”方式,但最后备份的所有文件都要归档到一个tar文件。文件最大大小为8GB(受限于tar file format)

PostgreSQL通过函数指针来实现这四种导出格式。在pg_backup_archive.h文件中有诸如下面的大量函数指针:

  typedef void (*ClosePtrType) (ArchiveHandle *AH);
  typedef void (*ReopenPtrType) (ArchiveHandle *AH);
  typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te);

这些函数指针,在下面文件里分别初始化:

  pg_backup_custum.c->InitArchiveFmt_Custom(ArchiveHandle *AH)
  pg_backup_null.c->InitArchiveFmt_Null(ArchiveHandle *AH)
  pg_backup_file.c->InitArchiveFmt_Directory(ArchiveHandle *AH)
  pg_backup_tar->InitArchiveFmt_Tar(ArchiveHandle *AH)

在数据结构ArchiveHandle中使用了大量函数指针,是的在初始化不同导出文件格式的Archive结构时,能为处理函数赋值为各自不同的处理函数。这样在pg_dump.c中只需要根据用户指定的文件格式的参数,就可以调用相应的处理函数。见第一部分的第3步。

概况的说,pg_dump导出的内容可以分为数据库对象的定义和数据。数据库对象的定义导出时通过查询系统表把对应元数据信息读取出来后,把该对象的各类信息置于一个链表上包括其依赖对象的oid。而具体的数据,也就是每个数据包的数据也被抽象为一个数据库对象,保存在此链表中。通过调节导出顺序把数据库对象的定义导出然后导出数据,置于通过链表中对应数据对象节点的信息,执行相应的SQL语句,从表中读出数据然后导出写出去。所以,在内存中只是链表上对象的定义,数据是边读边写出的,可以使用流式读出。

三、使用方法

三、使用方法

1)以目录格式导出,需要和-f一起使用。toc.dat保存所有可导出对象的信息(表定义等),其他文件是数据,以表的oid为命名,test是目录。

[postgres@localhost ~]$ pg_dump --format=d yzs -f test
[postgres@localhost ~]$ cd test
[postgres@localhost test]$ ll
total 8
-rw-rw-r--. 1 postgres postgres   31 Mar 23 06:07 3010.dat.gz
-rw-rw-r--. 1 postgres postgres 2124 Mar 23 06:07 toc.dat

2)导出SQL语句到test.sql中

[postgres@localhost ~]$ pg_dump --format=p yzs -f test.sql

3)以二进制格式输出

[postgres@localhost ~]$ pg_dump --format=c -f test yzs

4)以tar格式输出。与d格式不同在于多了一个restore.sql文件(plain格式文件),并将所有文件打包成一个文件

[postgres@localhost ~]$ pg_dump --format=t -f test yzs
[postgres@localhost ~]$ tar -xvf test
toc.dat
3010.dat
restore.sql

5)仅导出数据库结构(不指定库,默认是postgres)

pg_dump -s yzs -f 1.sql

6)导出时导出drop database和create database语句。需注意,导入时如有用户连接这该库,则drop语句执行失败

pg_dump -s yzs -C -c -f 1.txt

7、-t指定导出某些表,只导出item开头的表等对象

pg_dump -t temp* -f 1.txt yzs

8、-n只导出指定的schema,可以多个-n;-N指定不导出的schema

Tips:

1.Fp和Fc区别

        导出类型有四种,sql,二进制,tar,目录。常用的是sql和二进制。Fp和Fc处理流程基本一致,写入方式和创建archive不同。toc链(要导出的对象和数据的元信息)在内存中,二进制可以直接把这些信息导出到文件中,sql格式导出需要借助RestoreArchive函数把sql语句输出到文本文件。

2.外键约束是否被dump

        主表t1,从表t2,单独备份t1只会有t1数据,单独备份t2除了有t2数据,还会有与t1有关的约束,但不会备份t1表。

3.pg_dump/pg_restore和pg_upgrade的区别

        pg_dump/pg_restore优点:稳定易操作  缺点:耗时,需停机

        pg_upgrade 优点:停机时间短

4.pg_dump中的拓扑排序

while (!TopoSort(objs, numObjs, ordering, &nOrdering)) //根据依赖关系对对对象进行拓扑排序
	findDependencyLoops(ordering, nOrdering, numObjs);  //消除依赖关系环路
/*
 * TopoSort -- topological sort of a dump list
 *
 * Generate a re-ordering of the dump list that satisfies all the dependency
 * constraints shown in the dump list.  (Each such constraint is a fact of a
 * partial ordering.)  Minimize rearrangement of the list not needed to
 * achieve the partial ordering.
 *
 * The input is the list of numObjs objects in objs[].  This list is not
 * modified.
 *
 * Returns true if able to build an ordering that satisfies all the
 * constraints, false if not (there are contradictory constraints).
 *
 * On success (true result), ordering[] is filled with a sorted array of
 * DumpableObject pointers, of length equal to the input list length.
 *
 * On failure (false result), ordering[] is filled with an unsorted array of
 * DumpableObject pointers of length *nOrdering, listing the objects that
 * prevented the sort from being completed.  In general, these objects either
 * participate directly in a dependency cycle, or are depended on by objects
 * that are in a cycle.  (The latter objects are not actually problematic,
 * but it takes further analysis to identify which are which.)
 *
 * The caller is responsible for allocating sufficient space at *ordering.
 */
static bool
TopoSort(DumpableObject **objs,
		 int numObjs,
		 DumpableObject **ordering, /* output argument */
		 int *nOrdering)		/* output argument */
{
	DumpId		maxDumpId = getMaxDumpId();
	int		   *pendingHeap;
	int		   *beforeConstraints;
	int		   *idMap;
	DumpableObject *obj;
	int			heapLength;
	int			i,
				j,
				k;
	/*
	 * This is basically the same algorithm shown for topological sorting in
	 * Knuth's Volume 1.  However, we would like to minimize unnecessary
	 * rearrangement of the input ordering; that is, when we have a choice of
	 * which item to output next, we always want to take the one highest in
	 * the original list.  Therefore, instead of maintaining an unordered
	 * linked list of items-ready-to-output as Knuth does, we maintain a heap
	 * of their item numbers, which we can use as a priority queue.  This
	 * turns the algorithm from O(N) to O(N log N) because each insertion or
	 * removal of a heap item takes O(log N) time.  However, that's still
	 * plenty fast enough for this application.
	 */
	*nOrdering = numObjs;		/* for success return */
	/* Eliminate the null case */
	if (numObjs <= 0)
		return true;
	/* Create workspace for the above-described heap */
	pendingHeap = (int *) pg_malloc(numObjs * sizeof(int));
	/*
	 * Scan the constraints, and for each item in the input, generate a count
	 * of the number of constraints that say it must be before something else.
	 * The count for the item with dumpId j is stored in beforeConstraints[j].
	 * We also make a map showing the input-order index of the item with
	 * dumpId j.
	 */
	beforeConstraints = (int *) pg_malloc0((maxDumpId + 1) * sizeof(int));
	idMap = (int *) pg_malloc((maxDumpId + 1) * sizeof(int));
    //根据入度构建列表,确定每个顶点的入度(依赖)
	for (i = 0; i < numObjs; i++)
	{
		obj = objs[i];
		j = obj->dumpId;
		if (j <= 0 || j > maxDumpId)
			pg_fatal("invalid dumpId %d", j);
		idMap[j] = i;
		for (j = 0; j < obj->nDeps; j++)
		{
			k = obj->dependencies[j];
			if (k <= 0 || k > maxDumpId)
				pg_fatal("invalid dependency %d", k);
			beforeConstraints[k]++;
		}
	}
	/*
	 * Now initialize the heap of items-ready-to-output by filling it with the
	 * indexes of items that already have beforeConstraints[id] == 0.
	 *
	 * The essential property of a heap is heap[(j-1)/2] >= heap[j] for each j
	 * in the range 1..heapLength-1 (note we are using 0-based subscripts
	 * here, while the discussion in Knuth assumes 1-based subscripts). So, if
	 * we simply enter the indexes into pendingHeap[] in decreasing order, we
	 * a-fortiori have the heap invariant satisfied at completion of this
	 * loop, and don't need to do any sift-up comparisons.
	 */
	heapLength = 0;
    //将入度为0的顶点输出,放入到pendingHeap中备用(i正序倒序无差别)
	for (i = numObjs; --i >= 0;)
	{
		if (beforeConstraints[objs[i]->dumpId] == 0)
			pendingHeap[heapLength++] = i;
	}
	/*--------------------
	 * Now emit objects, working backwards in the output list.  At each step,
	 * we use the priority heap to select the last item that has no remaining
	 * before-constraints.  We remove that item from the heap, output it to
	 * ordering[], and decrease the beforeConstraints count of each of the
	 * items it was constrained against.  Whenever an item's beforeConstraints
	 * count is thereby decreased to zero, we insert it into the priority heap
	 * to show that it is a candidate to output.  We are done when the heap
	 * becomes empty; if we have output every element then we succeeded,
	 * otherwise we failed.
	 * i = number of ordering[] entries left to output
	 * j = objs[] index of item we are outputting
	 * k = temp for scanning constraint list for item j
	 *--------------------
	 */
	i = numObjs;
	while (heapLength > 0)
	{
		/* Select object to output by removing largest heap member */
		j = removeHeapElement(pendingHeap, heapLength--); //取一个顶点处理
		obj = objs[j];
		/* Output candidate to ordering[] */
		ordering[--i] = obj; //输出对象的顺序列表
		/* Update beforeConstraints counts of its predecessors */
        //该顶点指向的所有顶点入度减一,并将入度为0的顶点加入pendingHeap
		for (k = 0; k < obj->nDeps; k++)
		{
			int			id = obj->dependencies[k];
			if ((--beforeConstraints[id]) == 0)
				addHeapElement(idMap[id], pendingHeap, heapLength++);
		}
	}
	/*
	 * If we failed, report the objects that couldn't be output; these are the
	 * ones with beforeConstraints[] still nonzero.
	 */
	if (i != 0)
	{
		k = 0;
		for (j = 1; j <= maxDumpId; j++)
		{
			if (beforeConstraints[j] != 0)
				ordering[k++] = objs[idMap[j]];
		}
		*nOrdering = k;
	}
	/* Done */
	free(pendingHeap);
	free(beforeConstraints);
	free(idMap);
	return (i == 0);
}