[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [leafnode-list] lockfile_exists disaster, analysis of 1.9.19 and



krasel@xxxxxxxxxxxxxxxxxxxxxxxxxxxx (Cornelius Krasel) writes:

> Here is what I believe to be a fix. Patch your version of 1.9.19 with it:

Unsafe.

1/ may fail when locking actually succeeds.
2/ doesn't detect permission errors.

Proper fix against 2.0b8 attached.

I'm now going to have a drink with my girl-friend, so someone please
adjust this to 1.9.19.

I'm attaching the separated lockfile.c file for convenience.

-- 
Matthias Andree


--- leafnode-2.0b8/miscutil.c.lock	Wed Jun 13 19:40:19 2001
+++ leafnode-2.0b8/miscutil.c	Wed Jun 13 19:41:57 2001
@@ -221,54 +221,6 @@
 }
 
 /*
- * check if lockfile exists: if yes (and another instance of fetch is
- * still active) return 1; otherwise generate it. If this fails,
- * also return 1, otherwise return 0
- */
-int lockfile_exists( int silent, int block ) {
-    int fd;
-    int ret;
-    struct flock fl;
-
-    /* The file descriptor is closed by _exit(2). Very ugly. */
-    fd = open( lockfile, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR );
-    if ( fd == -1 ) {
-	if (!silent)
-	    printf( "Could not open lockfile %s for writing, "
-		    "abort program ...\n", lockfile );
-	syslog( LOG_ERR, "Could not open lockfile %s for writing: %m",
-		lockfile );
-	return 1;
-    }
-    fl.l_type = F_WRLCK;
-    fl.l_start = 0;
-    fl.l_whence = SEEK_SET;
-    fl.l_len = 0;
-    fl.l_pid = getpid();
-    ret = fcntl( fd, block ? F_SETLKW : F_SETLK, &fl );
-    if ( ret == -1 ) {
-	if ( errno == EACCES || errno == EAGAIN ) {
-	    if (!silent)
-		printf( "lockfile  %s exists, abort ...\n" ,lockfile );
-	    syslog( LOG_ERR, "lockfile %s exists, abort ...", lockfile);
-	    return 1;
-	} else {
-	    if (!silent)
-		printf( " locking %s failed: %s, abort ...\n"
-			, lockfile, strerror( errno ) );
-	    syslog( LOG_ERR, "locking %s failed: %m, abort", lockfile );
-	    return 1;
-	}
-    } else {
-	char pid[11];  /* Hope nobody has sizeof( pid_t ) > 4 */
-	memset( pid, 0, 11 ); 
-	snprintf( pid, 11, "%d\n", fl.l_pid );
-	write( fd, pid, strlen(pid) ); 
-	return 0;
-    }
-}
-
-/*
  * check whether "groupname" is represented in interesting.groups without
  * touching the file
  */
--- leafnode-2.0b8/lockfile.c.lock	Wed Jun 13 19:39:56 2001
+++ leafnode-2.0b8/lockfile.c	Wed Jun 13 20:12:54 2001
@@ -0,0 +1,246 @@
+/* 
+    lockfile.c - library module to safely create a lock file
+    Copyright (C) 2001  Matthias Andree <matthias.andree@xxxxxx>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "leafnode.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h> /* for kill */
+
+/*
+ * check if lockfile exists: if yes (and another instance of fetch is
+ * still active) return 1; otherwise generate it. If this fails, also
+ * return 1, otherwise return 0. return -1 for other errors.
+ *
+ * requires: atomic link
+ * features: NFS safe (but leafnode does not work distributed)
+ * stale detection by PID in lock file (may be fooled)
+ */
+
+/* get nlink count of filedes, 
+   log and return -1 in case of trouble */
+static nlink_t fd_st_nlink(const int fd) {
+    struct stat st;
+    
+    if (fstat(fd, &st)) {
+	syslog(LOG_ERR, "Cannot fstat %d: %m", fd);
+	return -1;
+    }
+
+    return st.st_nlink;
+}
+
+/* returns: 
+ * -1 for error
+ *  0 if lock file is stale and has been erased
+ *  1 if lock file is still in use or held by another host
+ */
+static int lock_is_stale(const char * const name) {
+    char buf[512];
+    int fd;
+    int len;
+    char *pid;
+    char *host;
+    char *tmp;
+    unsigned long npid; 
+
+    fd = open(name, O_RDONLY, 0);
+    if (fd < 0) { 
+	if (errno == ENOENT) {
+	    /* file has just disappeared, thus it's not stale */
+	    return 0;
+	} else {
+	    syslog(LOG_ERR, "cannot open %s for reading: %m", name);
+	    return -1;
+	}
+    }
+
+    if ((len = read(fd, buf, sizeof(buf)-1)) < 0) {
+	syslog(LOG_ERR, "read error on %s: %m", name);
+	(void)close(fd);
+	return -1;
+    }
+
+    if (close(fd) < 0) {
+	syslog(LOG_ERR, "read error on %s: %m", name);
+	return -1;
+    }	
+
+    /* read pid and host */
+    buf[len-1] = '\0'; 
+    pid = host = buf;
+    /* we expect a single \n here */
+    host += strcspn(host, "\n");
+    *(host++) = '\0'; 
+
+    /* kill trailing \n */
+    tmp = host;
+    tmp += strcspn(tmp, "\n");
+    *tmp = '\0';
+
+    npid = strtoul(pid, 0, 10);
+    if (npid == ULONG_MAX && errno == ERANGE) {
+	/* overflow error, should not happen, don't bother */
+	syslog(LOG_ERR, "bogus pid in %s: %m", name);
+	return -1;
+    }
+
+    if (strcasecmp(host, fqdn)) {
+	syslog(LOG_ERR, "lockfile held by pid %lu on host %s, we're %s", npid, host, fqdn);
+	return 0; /* other host holds the lock */
+    }
+
+    /* okay, we can see if there's still a process with that pid active */
+    if (kill(npid, 0) && errno == ESRCH) {
+	/* no such process, good */
+	if (!unlink(name)) {
+	    syslog(LOG_NOTICE, "erased stale pid %lu host %s lockfile %s",
+		   npid, host, name);
+	    return 1;
+	} else {
+	    syslog(LOG_ERR, "unable to erase stale pid %lu host %s lockfile %s", 
+		   npid, host, name);
+	    return 0;
+	}
+    }
+
+    /* there is a process active */
+    return 0;
+}
+
+/* safe mkstemp replacement, ensures the file is only read and writable
+ * by owner, some systems create these with 0777 or 0666 permissions. */
+int safe_mkstemp(char *template) {
+	mode_t oldmask;
+	int ret;
+
+	oldmask = umask(077);
+	ret = mkstemp(template);
+	(void)umask(oldmask);
+
+	return ret;
+}
+
+
+
+int lockfile_exists(const int silent, const int block) { /* silent is currently unused */
+/* returns: 
+ *  0: if locking succeeded
+ *  1: if locking failed because the lock is held by someone else and isn't stale
+ * -1: for other errors
+ */
+    char *l2, *pid;
+    int fd;
+    int have_lock = 0;
+    const char * const append = ".XXXXXXXXXX";
+
+    (void)silent; /* shut up compiler warning */
+
+    /* kill bogus fqdn */
+    if (!strchr(fqdn, '.') 
+	|| !strncasecmp(fqdn, "localhost", 9) 
+	|| !strncmp(fqdn, "127.0.0.", 8)
+	) 
+    {
+	syslog(LOG_CRIT, "Internal error: must not try to lock with local host name %s", fqdn);
+	return -1;
+    }
+
+    l2 = critmalloc(strlen(lockfile) + strlen(append) + 1, "lockfile_exists");
+    pid = critmalloc(strlen(fqdn) + sizeof(unsigned long) * 4 + 4, "lockfile_exists");
+
+    strcpy(l2, lockfile);
+    strcat(l2, append);
+ 
+    /* make a temporary file */
+    fd = safe_mkstemp(l2);
+    if (fd < 0) {
+	syslog(LOG_ERR, "mkstemp(%s) failed: %m", l2);
+	free(l2);
+	free(pid);
+	return -1;
+    }
+
+    /* write our PID and host into it (stale detection) */
+    sprintf(pid, "%lu\n%s\n", (unsigned long)getpid(), fqdn); /* safe, see malloc above */
+    if (write(fd, pid, strlen(pid)) < 0	|| fsync(fd) < 0)
+    {
+	syslog(LOG_ERR, "cannot write to %s: %m", l2);
+	if (unlink(l2)) syslog(LOG_ERR, "Cannot remove lock helper file %s: %m", l2);
+	free(l2);
+	free(pid);
+	return -1;
+    }
+
+    /* and try to finally lock */
+    while (!have_lock) {
+	if (0 == link(l2, lockfile)) {
+	    /* link succeeded. good. */
+	    have_lock = 1;
+	    break;
+	} else {
+	    if (2 == fd_st_nlink(fd)) {
+		/* link failed, but st_nlink has increased to 2, good. */
+		have_lock = 1;
+	    } else if (!block) {
+		/* don't bother to retry, we're in nonblocking mode */
+		break;
+	    } else {
+		int stale;
+		struct timeval tv = { 1, 0 };
+
+		/* Could not create link. Check if the lock file is stale. */
+		stale = lock_is_stale(lockfile);
+		
+		if (stale == -1) break;
+
+		if (stale == 1) continue;
+		
+		/* retry after a second, select does not interfere with alarm */
+		if (select(0, NULL, NULL, NULL, &tv) < 0) {
+		    /* must not happen */
+		    syslog(LOG_ERR, "lockfile_exists: select failed: %m");
+		    break;
+		}
+	    }
+	}
+    }
+
+    if (close(fd) < 0) {
+	syslog(LOG_ERR, "cannot write to %s: %m", l2);
+	have_lock = 0;
+    }
+
+    /* unlink l2, but just log if unable to unlink, ignore otherwise */
+    if (unlink(l2)) syslog(LOG_ERR, "Cannot remove lock helper file %s: %m", l2);
+    
+    /* clean up */
+    free(l2);
+    free(pid);
+
+    /* mind the return logic */
+    return have_lock ? 0 : 1;
+}
--- leafnode-2.0b8/Makefile.in.lock	Wed Jan 10 10:16:06 2001
+++ leafnode-2.0b8/Makefile.in	Wed Jun 13 19:40:53 2001
@@ -49,12 +49,12 @@
 SRCFILES = nntputil.c configutil.c xoverutil.c activutil.c miscutil.c \
 	   artutil.c filterutil.c localutil.c config.c \
 	   applyfilter.c checkgroups.c newsq.c nntpd.c fetchnews.c texpire.c \
-	   rnews.c \
+	   rnews.c lockfile.c \
 	   vsnprintf.c strdup.c mkstemp.c getaline.c getline.c critmem.c \
 	   inet_ntop.c
 HDRFILES = leafnode.h acconfig.h getline.h
 LIBFILES = nntputil.o configutil.o xoverutil.o activutil.o miscutil.o \
-	   artutil.o filterutil.o localutil.o \
+	   artutil.o filterutil.o localutil.o lockfile.o \
 	   vsnprintf.o strdup.o mkstemp.o getaline.o critmem.o inet_ntop.o
 OWNLIBS  = liblnutil.a @PCRELIB@
 PROGRAMS = leafnode rnews fetchnews texpire checkgroups applyfilter newsq


/* 
    lockfile.c - library module to safely create a lock file
    Copyright (C) 2001  Matthias Andree <matthias.andree@xxxxxx>

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "leafnode.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h> /* for kill */

/*
 * check if lockfile exists: if yes (and another instance of fetch is
 * still active) return 1; otherwise generate it. If this fails, also
 * return 1, otherwise return 0. return -1 for other errors.
 *
 * requires: atomic link
 * features: NFS safe (but leafnode does not work distributed)
 * stale detection by PID in lock file (may be fooled)
 */

/* get nlink count of filedes, 
   log and return -1 in case of trouble */
static nlink_t fd_st_nlink(const int fd) {
    struct stat st;
    
    if (fstat(fd, &st)) {
	syslog(LOG_ERR, "Cannot fstat %d: %m", fd);
	return -1;
    }

    return st.st_nlink;
}

/* returns: 
 * -1 for error
 *  0 if lock file is stale and has been erased
 *  1 if lock file is still in use or held by another host
 */
static int lock_is_stale(const char * const name) {
    char buf[512];
    int fd;
    int len;
    char *pid;
    char *host;
    char *tmp;
    unsigned long npid; 

    fd = open(name, O_RDONLY, 0);
    if (fd < 0) { 
	if (errno == ENOENT) {
	    /* file has just disappeared, thus it's not stale */
	    return 0;
	} else {
	    syslog(LOG_ERR, "cannot open %s for reading: %m", name);
	    return -1;
	}
    }

    if ((len = read(fd, buf, sizeof(buf)-1)) < 0) {
	syslog(LOG_ERR, "read error on %s: %m", name);
	(void)close(fd);
	return -1;
    }

    if (close(fd) < 0) {
	syslog(LOG_ERR, "read error on %s: %m", name);
	return -1;
    }	

    /* read pid and host */
    buf[len-1] = '\0'; 
    pid = host = buf;
    /* we expect a single \n here */
    host += strcspn(host, "\n");
    *(host++) = '\0'; 

    /* kill trailing \n */
    tmp = host;
    tmp += strcspn(tmp, "\n");
    *tmp = '\0';

    npid = strtoul(pid, 0, 10);
    if (npid == ULONG_MAX && errno == ERANGE) {
	/* overflow error, should not happen, don't bother */
	syslog(LOG_ERR, "bogus pid in %s: %m", name);
	return -1;
    }

    if (strcasecmp(host, fqdn)) {
	syslog(LOG_ERR, "lockfile held by pid %lu on host %s, we're %s", npid, host, fqdn);
	return 0; /* other host holds the lock */
    }

    /* okay, we can see if there's still a process with that pid active */
    if (kill(npid, 0) && errno == ESRCH) {
	/* no such process, good */
	if (!unlink(name)) {
	    syslog(LOG_NOTICE, "erased stale pid %lu host %s lockfile %s",
		   npid, host, name);
	    return 1;
	} else {
	    syslog(LOG_ERR, "unable to erase stale pid %lu host %s lockfile %s", 
		   npid, host, name);
	    return 0;
	}
    }

    /* there is a process active */
    return 0;
}

/* safe mkstemp replacement, ensures the file is only read and writable
 * by owner, some systems create these with 0777 or 0666 permissions. */
int safe_mkstemp(char *template) {
	mode_t oldmask;
	int ret;

	oldmask = umask(077);
	ret = mkstemp(template);
	(void)umask(oldmask);

	return ret;
}



int lockfile_exists(const int silent, const int block) { /* silent is currently unused */
/* returns: 
 *  0: if locking succeeded
 *  1: if locking failed because the lock is held by someone else and isn't stale
 * -1: for other errors
 */
    char *l2, *pid;
    int fd;
    int have_lock = 0;
    const char * const append = ".XXXXXXXXXX";

    (void)silent; /* shut up compiler warning */

    /* kill bogus fqdn */
    if (!strchr(fqdn, '.') 
	|| !strncasecmp(fqdn, "localhost", 9) 
	|| !strncmp(fqdn, "127.0.0.", 8)
	) 
    {
	syslog(LOG_CRIT, "Internal error: must not try to lock with local host name %s", fqdn);
	return -1;
    }

    l2 = critmalloc(strlen(lockfile) + strlen(append) + 1, "lockfile_exists");
    pid = critmalloc(strlen(fqdn) + sizeof(unsigned long) * 4 + 4, "lockfile_exists");

    strcpy(l2, lockfile);
    strcat(l2, append);
 
    /* make a temporary file */
    fd = safe_mkstemp(l2);
    if (fd < 0) {
	syslog(LOG_ERR, "mkstemp(%s) failed: %m", l2);
	free(l2);
	free(pid);
	return -1;
    }

    /* write our PID and host into it (stale detection) */
    sprintf(pid, "%lu\n%s\n", (unsigned long)getpid(), fqdn); /* safe, see malloc above */
    if (write(fd, pid, strlen(pid)) < 0	|| fsync(fd) < 0)
    {
	syslog(LOG_ERR, "cannot write to %s: %m", l2);
	if (unlink(l2)) syslog(LOG_ERR, "Cannot remove lock helper file %s: %m", l2);
	free(l2);
	free(pid);
	return -1;
    }

    /* and try to finally lock */
    while (!have_lock) {
	if (0 == link(l2, lockfile)) {
	    /* link succeeded. good. */
	    have_lock = 1;
	    break;
	} else {
	    if (2 == fd_st_nlink(fd)) {
		/* link failed, but st_nlink has increased to 2, good. */
		have_lock = 1;
	    } else if (!block) {
		/* don't bother to retry, we're in nonblocking mode */
		break;
	    } else {
		int stale;
		struct timeval tv = { 1, 0 };

		/* Could not create link. Check if the lock file is stale. */
		stale = lock_is_stale(lockfile);
		
		if (stale == -1) break;

		if (stale == 1) continue;
		
		/* retry after a second, select does not interfere with alarm */
		if (select(0, NULL, NULL, NULL, &tv) < 0) {
		    /* must not happen */
		    syslog(LOG_ERR, "lockfile_exists: select failed: %m");
		    break;
		}
	    }
	}
    }

    if (close(fd) < 0) {
	syslog(LOG_ERR, "cannot write to %s: %m", l2);
	have_lock = 0;
    }

    /* unlink l2, but just log if unable to unlink, ignore otherwise */
    if (unlink(l2)) syslog(LOG_ERR, "Cannot remove lock helper file %s: %m", l2);
    
    /* clean up */
    free(l2);
    free(pid);

    /* mind the return logic */
    return have_lock ? 0 : 1;
}