在创建连接套接字之前,数据库主进程Postmaster需要保证当前只有1个Postmaster在运行,如何保证呢?这个要通过函数CreateLockFile来完成,通过函数CreateLockFile在PGDATA数据库目录中创建锁文件postmaster.pid,每次postmaster运行时,都会在PGDATA目录中创建这个文件(创建文件时设置了标志位O_EXCL)。
在数据库停止工作时,会删除这个锁文件,在数据库成功启动时,会创建这个锁文件。可以看下启动时该锁文件内容
(1)文件postmaster.pid
[wln@localhost linux]$ pg_ctl -V
pg_ctl (Postgresql) 9.3beta2
[wln@localhost linux]$ cat /home/wln/postgres9.3/data/postmaster.pid
28179
/home/wln/postgres9.3/data
1411423233
5432
/tmp
localhost
5432001 4947977
[wln@localhost linux]$ pg_ctl stop -m f
waiting for server to shut down....... done
server stopped
[wln@localhost linux]$ pg_ctl start
server starting
[wln@localhost linux]$ cat /home/wln/postgres9.3/data/postmaster.pid
16304 --对应进程 PID
/home/wln/postgres9.3/data --对应PGDATA
1411461916
5432 --对应PGPORT
/tmp
localhost --对应PGHOST,本地执行默认localhost
5432001 4980745
[wln@localhost linux]$ ps ux | grep 16304 | grep -v grep
wln 16304 1.2 0.9 152576 10360 pts/3 S 16:45 0:00 /home/wln/postgres9.3/install/bin/postgres
* As of 9.1,the contents of the data-directory lock file are:
*
* line #
* 1 postmaster PID (or negative of a standalone backend's PID)
* 2 data directory path
* 3 postmaster start timestamp (time_t representation)
* 4 port number
* 5 first Unix socket directory path (empty if none)
* 6 first listen_address (IP address or "*"; empty if no TCP port)
* 7 shared memory key (not present on Windows)
*
* Lines 6 and up are added via AddToDataDirLockFile() after initial file
* creation.
*
* The socket lock file,if used,has the same contents as lines 1-5.
*/
#define LOCK_FILE_LINE_PID 1
#define LOCK_FILE_LINE_DATA_DIR 2
#define LOCK_FILE_LINE_START_TIME 3
#define LOCK_FILE_LINE_PORT 4
#define LOCK_FILE_LINE_SOCKET_DIR 5
#define LOCK_FILE_LINE_LISTEN_ADDR 6
#define LOCK_FILE_LINE_SHMEM_KEY 7
(2)/tmp下内容
[wln@localhost linux]$ cat /tmp/.s.PGsql.5432.lock
16304
/home/wln/postgres9.3/data
1411461916
5432
/tmp
[wln@localhost tmp]$ ll .s.PGsql.5432
srwxrwxrwx 1 wln wln 0 09-23 16:45 .s.PGsql.5432
[wln@localhost tmp]$ cat .s.PGsql.5432
cat: .s.PGsql.5432: 没有那个设备或地址
--不知怎么报这个错误,该文件内容长度为0,那为什么要存在这个文件呢?不是已经有个.s.PGsql.5432.lock 文件了?
(3)函数CreateLockFile
<span style="font-size:14px;">/* * Create a lockfile. * * filename is the path name of the lockfile to create. * amPostmaster is used to determine how to encode the output PID. * socketDir is the Unix socket directory path to include (possibly empty). * isDDLock and refName are used to determine what error message to produce. */ static void CreateLockFile(const char *filename,bool amPostmaster,const char *socketDir,bool isDDLock,const char *refName) { int fd; char buffer[MAXPGPATH * 2 + 256]; int ntries; int len; int encoded_pid; pid_t other_pid; pid_t my_pid,my_p_pid,my_gp_pid; const char *envvar; /* * If the PID in the lockfile is our own PID or our parent's or * grandparent's PID,then the file must be stale (probably left over from * a prevIoUs system boot cycle). We need to check this because of the * likelihood that a reboot will assign exactly the same PID as we had in * the prevIoUs reboot,or one that's only one or two counts larger and * hence the lockfile's PID now refers to an ancestor shell process. We * allow pg_ctl to pass down its parent shell PID (our grandparent PID) * via the environment variable PG_GRANDPARENT_PID; this is so that * launching the postmaster via pg_ctl can be just as reliable as * launching it directly. There is no provision for detecting * further-removed ancestor processes,but if the init script is written * carefully then all but the immediate parent shell will be root-owned * processes and so the kill test will fail with EPERM. Note that we * cannot get a false negative this way,because an existing postmaster * would surely never launch a competing postmaster or pg_ctl process * directly. */ my_pid = getpid(); #ifndef WIN32 my_p_pid = getppid(); #else /* * Windows hasn't got getppid(),but doesn't need it since it's not using * real kill() either... */ my_p_pid = 0; #endif envvar = getenv("PG_GRANDPARENT_PID"); if (envvar) my_gp_pid = atoi(envvar); else my_gp_pid = 0; /* * We need a loop here because of race conditions. But don't loop forever * (for example,a non-writable $PGDATA directory might cause a failure * that won't go away). 100 tries seems like plenty. */ for (ntries = 0;; ntries++) { /* * Try to create the lock file --- O_EXCL makes this atomic. * * Think not to make the file protection weaker than 0600. See * comments below. */ fd = open(filename,O_RDWR | O_CREAT | O_EXCL,0600); if (fd >= 0) break; /* Success; exit the retry loop */ /* * Couldn't create the pid file. Probably it already exists. */ if ((errno != EEXIST && errno != EACCES) || ntries > 100) ereport(FATAL,(errcode_for_file_access(),errmsg("could not create lock file \"%s\": %m",filename))); /* * Read the file to get the old owner's PID. Note race condition * here: file might have been deleted since we tried to create it. */ fd = open(filename,O_RDONLY,0600); if (fd < 0) { if (errno == ENOENT) continue; /* race condition; try again */ ereport(FATAL,errmsg("could not open lock file \"%s\": %m",filename))); } if ((len = read(fd,buffer,sizeof(buffer) - 1)) < 0) ereport(FATAL,errmsg("could not read lock file \"%s\": %m",filename))); close(fd); if (len == 0) { ereport(FATAL,(errcode(ERRCODE_LOCK_FILE_EXISTS),errmsg("lock file \"%s\" is empty",filename),errhint("Either another server is starting,or the lock file is the remnant of a prevIoUs server startup crash."))); } buffer[len] = '\0'; encoded_pid = atoi(buffer); /* if pid < 0,the pid is for postgres,not postmaster */ other_pid = (pid_t) (encoded_pid < 0 ? -encoded_pid : encoded_pid); if (other_pid <= 0) elog(FATAL,"bogus data in lock file \"%s\": \"%s\"",filename,buffer); /* * Check to see if the other process still exists * * Per discussion above,my_pid,and my_gp_pid can be * ignored as false matches. * * Normally kill() will fail with ESRCH if the given PID doesn't * exist. * * We can treat the EPERM-error case as okay because that error * implies that the existing process has a different userid than we * do,which means it cannot be a competing postmaster. A postmaster * cannot successfully attach to a data directory owned by a userid * other than its own. (This is now checked directly in * checkDataDir(),but has been true for a long time because of the * restriction that the data directory isn't group- or * world-accessible.) Also,since we create the lockfiles mode 600,* we'd have Failed above if the lockfile belonged to another userid * --- which means that whatever process kill() is reporting about * isn't the one that made the lockfile. (NOTE: this last * consideration is the only one that keeps us from blowing away a * Unix socket file belonging to an instance of Postgres being run by * someone else,at least on machines where /tmp hasn't got a * stickybit.) */ if (other_pid != my_pid && other_pid != my_p_pid && other_pid != my_gp_pid) { if (kill(other_pid,0) == 0 || (errno != ESRCH && errno != EPERM)) { /* lockfile belongs to a live process */ ereport(FATAL,errmsg("lock file \"%s\" already exists",isDDLock ? (encoded_pid < 0 ? errhint("Is another postgres (PID %d) running in data directory \"%s\"?",(int) other_pid,refName) : errhint("Is another postmaster (PID %d) running in data directory \"%s\"?",refName)) : (encoded_pid < 0 ? errhint("Is another postgres (PID %d) using socket file \"%s\"?",refName) : errhint("Is another postmaster (PID %d) using socket file \"%s\"?",refName)))); } } /* * No,the creating process did not exist. However,it could be that * the postmaster crashed (or more likely was kill -9'd by a clueless * admin) but has left orphan backends behind. Check for this by * looking to see if there is an associated shmem segment that is * still in use. * * Note: because postmaster.pid is written in multiple steps,we might * not find the shmem ID values in it; we can't treat that as an * error. */ if (isDDLock) { char *ptr = buffer; unsigned long id1,id2; int lineno; for (lineno = 1; lineno < LOCK_FILE_LINE_SHMEM_KEY; lineno++) { if ((ptr = strchr(ptr,'\n')) == NULL) break; ptr++; } if (ptr != NULL && sscanf(ptr,"%lu %lu",&id1,&id2) == 2) { if (PGSharedMemoryIsInUse(id1,id2)) ereport(FATAL,errmsg("pre-existing shared memory block " "(key %lu,ID %lu) is still in use",id1,id2),errhint("If you're sure there are no old " "server processes still running,remove " "the shared memory block " "or just delete the file \"%s\".",filename))); } } /* * Looks like nobody's home. Unlink the file and try again to create * it. Need a loop because of possible race condition against other * would-be creators. */ if (unlink(filename) < 0) ereport(FATAL,errmsg("could not remove old lock file \"%s\": %m",errhint("The file seems accidentally left over,but " "it could not be removed. Please remove the file " "by hand and try again."))); } /* * Successfully created the file,now fill it. See comment in miscadmin.h * about the contents. Note that we write the same first five lines into * both datadir and socket lockfiles; although more stuff may get added to * the datadir lockfile later. */ snprintf(buffer,sizeof(buffer),"%d\n%s\n%ld\n%d\n%s\n",amPostmaster ? (int) my_pid : -((int) my_pid),DataDir,(long) MyStartTime,PostPortNumber,socketDir); /* * In a standalone backend,the next line (LOCK_FILE_LINE_LISTEN_ADDR) * will never receive data,so fill it in as empty now. */ if (isDDLock && !amPostmaster) strlcat(buffer,"\n",sizeof(buffer)); errno = 0; if (write(fd,strlen(buffer)) != strlen(buffer)) { int save_errno = errno; close(fd); unlink(filename); /* if write didn't set errno,assume problem is no disk space */ errno = save_errno ? save_errno : ENOSPC; ereport(FATAL,errmsg("could not write lock file \"%s\": %m",filename))); } if (pg_fsync(fd) != 0) { int save_errno = errno; close(fd); unlink(filename); errno = save_errno; ereport(FATAL,filename))); } if (close(fd) != 0) { int save_errno = errno; unlink(filename); errno = save_errno; ereport(FATAL,filename))); } /* * Arrange to unlink the lock file(s) at proc_exit. If this is the first * one,set up the on_proc_exit function to do it; then add this lock file * to the list of files to unlink. */ if (lock_files == NIL) on_proc_exit(UnlinkLockFiles,0); lock_files = lappend(lock_files,pstrdup(filename)); }</span>
(4)learn CreateLockFile
open(pathname,O_RDWR | O_CREAT | O_EXCL,0666); 打开失败,返回-1
#include <stdio.h> #include <string.h> #include <errno.h> #include <fcntl.h> int main(void) { int fd; extern int errno; if((fd = open("/dev/dsp2",O_WRONLY)) < 0) { printf("errno=%d\n",errno); char * mesg = strerror(errno); printf("Mesg:%s\n",mesg); } exit(0); }[wln@localhost linux]$ ./erron2
errno=2
Mesg:No such file or directory
相关函数 raise,signal
表头文件 #include<sys/types.h>
#include<signal.h>
定义函数 int kill(pid_t pid,int sig);
函数说明
kill()可以用来送参数sig指定的信号给参数pid指定的进程。参数
pid有几种情况:
pid>0 将信号传给进程识别码为pid 的进程。
pid=0 将信号传给和目前进程相同进程组的所有进程
pid=-1 将信号广播传送给系统内所有的进程
pid<0 将信号传给进程组识别码为pid绝对值的所有进程
参数sig代表的信号编号可参考附录D
返回值 执行成功则返回0,如果有错误则返回-1。
错误代码 EINVAL 参数sig 不合法
ESRCH 参数pid 所指定的进程或进程组不存在
EPERM 权限不够无法传送信号给指定进程