/* Distributed Clearinghouses Checksum database cleaner * * --C-LICENSE-- * $Revision: 1.300 $ */ #include "srvr_defs.h" #include "dcc_ck.h" #include static DCC_EMSG dcc_emsg; static WF dbclean_wf; static WHITE_TBL dbclean_white_tbl; static DCC_CLNT_CTXT *ctxt; static DCC_OP_RESP aop_resp; static int flods_off; static int dccd_unlocked; /* dccd has been told to unlock */ static DCC_SRVR_NM server_def = DCC_SRVR_NM_DEF(0); static DCC_SRVR_NM server; static DCC_CLNT_ID server_id = DCC_ID_INVALID; #ifdef DCC_USE_DBCLEAN_F static DB_OPEN_MODES db_mode = DB_OPEN_WRITE | DB_OPEN_DCCD_DEFAULT; #else static DB_OPEN_MODES db_mode = DB_OPEN_MSYNC | DB_OPEN_DBCLEAN_DEFAULT; #endif static u_char cleardb; /* 1=clear the database */ static enum { NORMAL_MODE, /* started by cron */ REPAIR_MODE, /* server says: bad database */ QUICK_MODE, /* server says: too big for window */ HASH_MODE, /* server says: hash table full */ FAILSAFE_MODE, /* server says: no cron job */ DEL_MODE /* server says: record deletion */ } clean_mode = NORMAL_MODE; static u_char standalone; /* 1=don't talk to dccd */ static u_char keep_white; /* 1=do not rebuild whitelist */ static u_char dbclean_ssd_mode; static const char *homedir; static const char *db_dir; static const char *hash_dir; typedef struct { DCC_PATH link_dir; /* "" or /tmpfs */ DCC_PATH base; /* /var/dcc/dcc_db */ DCC_PATH next; /* /{var/dcc,tmpfs}/dcc_db */ DCC_PATH new; /* /{var/dcc,tmpfs}/dcc_db-new */ DCC_PATH prev; /* /{var/dcc,tmpfs}/dcc_db */ DCC_PATH prev_old; /* /{var/dcc,tmpfs}/dcc_db-old */ char prev_msg[sizeof(DCC_PATH)+sizeof(" -> ")+sizeof(DCC_PATH)]; } paths_t; static paths_t db_paths, hash_paths; static int old_db_fd = -1; static DB_HADDR prev_db_hash_used; static DB_PARMS prev_db_parms; static DB_PARMS new_db_parms; static DB_PTR old_db_pos, prev_db_csize, new_db_csize; static DB_PTR new_db_fsize; static u_int new_db_blksize; static FLOD_MMAPS new_flod_mmaps; static u_char adj_delay_pos; static DB_PTR min_confirm_pos; static u_char cur_db_created; static u_char new_db_created; static int new_db_fd = -1; static u_char new_hash_created; static int expire_secs = -1; static int def_expire_secs = DB_EXPIRE_SECS_DEF; static int expire_spamsecs = -1; static int def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF; static int have_expire_parms = 0; static double def_exp_ratio = 0.0; static DB_EX_SECS new_ex_secs; static time_t new_all_secs[DCC_DIM_CKS]; static DB_EX_TS new_all_ts, new_spam_ts; static DB_EX_TS ancient_ts, stale_ts; static DB_HADDR new_hash_len; static int expired_rcds, comp_rcds, obs_rcds, expired_cks; static int white_cks, kept_cks; static DCC_TS future_ts; #define RESTART_DELAY (60*5) #define SHORT_DELAY 30 static struct timeval clean_start; static struct timeval progress_rpt_last; /* when previous progress report */ static struct timeval progress_rpt_checked; /* when last checked */ static struct timeval progress_rpt_start; /* start of progress reporting */ #define REPORT_INTERVAL_SECS (5*60) #define REPORT_INTERVAL_FAST_SECS 10 #define UNLOCK_INTERVAL_USECS (DCC_US/2) static int progress_rpt_cnt; /* operations until next check */ static int progress_rpt_base; static u_char progress_rpt_started; /* 1=have started reporting progress */ static int progress_rpt_percent; static const u_char zero_rcd[DB_RCD_LEN_MAX] = {0}; static void get_paths(u_char db); static void write_new_flush(void); static void write_new_rcd(const void *, int); static void write_new_hdr(u_char); static void rename_bail(const char *, const char *); static u_char expire(DB_PTR); static u_char copy_db(void); static u_char catchup(DCC_EMSG *); static void parse_white(void); static void build_hash(void); static u_char persist_aop(DCC_AOPS, u_int32_t, int); static void dccd_new_db(const char *); static void finish(void); static void sigterm(int); static void usage(u_char die) { const char str[] = { "usage: [-dfFNPSVqu] [-i id]" " [-a [server-addr][,server-port]] [-h homedir]\n" " [-H hash-file-dir] [-D db-file-dir] [-G on]" " [-R mode] [-s hash-size]\n" " [-e seconds] [-E spamsecs] [-L ltype,facility.level]"}; static u_char complained; /* its important to try to run, so don't give up unless necessary */ if (die) { dcc_logbad(EX_USAGE, complained ? "giving up" : str); } else if (!complained) { dcc_error_msg("%s\ncontinuing", str); complained = 1; } } int main(int argc, char **argv) { u_char print_version = 0; typedef struct srvr_nm_port { /* -a hostname,port args */ struct srvr_nm_port *fwd; in_port_t port; char nm[DCC_MAXDOMAINLEN]; } SRVR_NM_PORT; SRVR_NM_PORT *nms_ports; SRVR_NM_PORT *new_nm_port, **old_nm_port; struct stat cur_db_sb; const char *cp; char *p; u_long l; int i; gettimeofday(&db_time, 0); clean_start = db_time; timeval2ts(&future_ts, &clean_start, 24*60*60); dcc_syslog_init(1, argv[0], 0); nms_ports = 0; /* this must match DBCLEAN_GETOPTS in cron-dccd.in */ while ((i = getopt(argc, argv, "64dfFNPSVqui:a:h:H:D:G:R:s:e:E:L:")) != -1) { switch (i) { case '6': case '4': /* obsolete with *.3.104, but cannot be deleted because * copied old versions of the cron script might copy * -6 or -4 from old dcc_conf files */ break; case 'd': if (db_debug++) ++dcc_clnt_debug; break; case 'f': db_mode &= ~(DB_OPEN_MSYNC | DB_OPEN_WRITE | DB_OPEN_DBCLEAN_DEFAULT); db_mode |= DB_OPEN_MSYNC; break; case 'F': db_mode &= ~(DB_OPEN_MSYNC | DB_OPEN_WRITE | DB_OPEN_DBCLEAN_DEFAULT); db_mode |= DB_OPEN_WRITE; break; case 'N': /* make a new, clear database */ cleardb = 1; standalone = 1; break; case 'P': if (have_expire_parms > 0) dcc_logbad(EX_USAGE, "do not use -P with -e or -E"); have_expire_parms = -1; break; case 'S': standalone = 1; break; case 'V': dcc_version_print(); print_version = 1; break; case 'q': trace_quiet = 1; break; case 'u': dbclean_ssd_mode = 1; break; case 'i': l = strtoul(optarg, &p, 10); if (*p != '\0' || !DCC_ID_SRVR_NORMAL(l)) { dcc_error_msg("invalid DCC ID \"-i %s\"", optarg); } else { server_id = l; } break; case 'a': new_nm_port = malloc(sizeof(*new_nm_port)); memset(new_nm_port, 0, sizeof(*new_nm_port)); cp = dcc_parse_nm_port(&dcc_emsg, optarg, 0, new_nm_port->nm, sizeof(new_nm_port->nm), &new_nm_port->port, 0, 0, 0, 0); if (!cp) { dcc_error_msg("%s", dcc_emsg.c); free(new_nm_port); break; } cp += strspn(cp, DCC_WHITESPACE); if (*cp != '\0') { dcc_error_msg("unrecognized port number in" "\"-a %s\"", optarg); free(new_nm_port); } else { old_nm_port = &nms_ports; while (*old_nm_port) { old_nm_port = &(*old_nm_port)->fwd; } *old_nm_port = new_nm_port; } break; case 'h': homedir = optarg; break; case 'H': #ifndef DCC_FSTATFS_COMPAT dcc_error_msg("dbclean -H usually needs a" " compatible fstatfs()"); #endif hash_dir = optarg; break; case 'D': #ifndef DCC_FSTATFS_COMPAT dcc_error_msg("dbclean -D usually needs a" " compatible fstatfs()"); #endif db_dir = optarg; break; case 'G': dcc_syslog_init(1, argv[0], " grey"); if (have_expire_parms > 0) dcc_logbad(EX_USAGE, "do not use -G with -e or -E"); if (strcasecmp(optarg, "on")) usage(0); /* be generous and allow -Gxxx */ grey_on = 1; have_expire_parms = -1; break; case 'R': if (!strcasecmp(optarg, "bad")) clean_mode = REPAIR_MODE; else if (!strcasecmp(optarg, "quick")) clean_mode = QUICK_MODE; else if (!strcasecmp(optarg, "hash")) clean_mode = HASH_MODE; else if (!strcasecmp(optarg, "failsafe")) clean_mode = FAILSAFE_MODE; else if (!strcasecmp(optarg, "del")) clean_mode = DEL_MODE; else dcc_logbad(EX_USAGE, "unrecognized repair mode -R %s", optarg); break; case 's': /* hash table size in entries */ new_hash_len = strtoul(optarg, &p, 0); if (*p != '\0' || new_hash_len < MIN_HASH_ENTRIES || new_hash_len > MAX_HASH_ENTRIES) dcc_logbad(EX_USAGE, "invalid database size \"%s\"", optarg); break; case 'e': /* expiration for non-bulk checksums */ if (grey_on) dcc_logbad(EX_USAGE, "do not use -e with -G"); if (have_expire_parms < 0) dcc_logbad(EX_USAGE, "-e cannot be used with -P"); have_expire_parms = 1; expire_secs = dcc_get_secs(optarg, 0, DB_EXPIRE_SECS_MIN, DB_EXPIRE_SECS_MAX, -1); if (expire_secs < 0) dcc_logbad(EX_USAGE, "invalid expiration seconds" " \"-e %s\"", optarg); break; case 'E': /* expiration for bulk checksums */ if (grey_on) dcc_logbad(EX_USAGE, "do not use -E with -G"); if (have_expire_parms < 0) dcc_logbad(EX_USAGE, "do not use -E with -P"); have_expire_parms = 1; expire_spamsecs = dcc_get_secs(optarg, 0, DB_EXPIRE_SECS_MIN, DB_EXPIRE_SECS_MAX, -1); if (expire_spamsecs < 0) dcc_logbad(EX_USAGE, "invalid spam expiration seconds" " \"-E %s\"", optarg); break; case 'L': dcc_parse_log_opt(optarg); break; default: usage(0); } } argc -= optind; argv += optind; if (argc != 0) usage(1); dcc_clnt_unthread_init(); /* move to the target directory * and set homedir for dcc_fnm2rel_good() */ if (!dcc_cdhome(&dcc_emsg, homedir, 0)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); /* compute the database file names */ get_paths(1); get_paths(0); cp = ""; switch (clean_mode) { case NORMAL_MODE: cp = "cleaning"; break; case REPAIR_MODE: cp = "repairing"; break; case QUICK_MODE: cp = "quick cleaning"; break; case HASH_MODE: cp = "expanding hash table in"; break; case FAILSAFE_MODE: cp = "work around missing cron job for"; break; case DEL_MODE: cp = "clean up deletion in"; break; } quiet_trace_msg(DCC_VERSION" %s %s", cp, db_paths.base.c); /* see if we can talk to the server */ if (!standalone) { const ID_TBL *id_tbl; SRVR_NM_PORT *nm_port; /* we must have the server-ID to talk to the server */ if (server_id == DCC_ID_INVALID && !standalone) { if (print_version) exit(EX_OK); usage(1); } i = load_ids(&dcc_emsg, server_id, &id_tbl, 1, db_debug > 1); if (!id_tbl) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); /* merely complain about a sick file if we got a password */ if (i <= 0) dcc_error_msg("%s", dcc_emsg.c); nm_port = nms_ports; for (;;) { DCC_CLNT_FGS clnt_fgs; memcpy(&server, &server_def, sizeof(server)); server.port = DCC_GREY2PORT(grey_on); server.clnt_id = server_id; memcpy(server.passwd, id_tbl->cur_passwd, sizeof(server.passwd)); if (nm_port) { if (nm_port->nm[0] != '\0') memcpy(server.hostname, nm_port->nm, sizeof(server.hostname)); if (nm_port->port != 0) server.port = nm_port->port; } /* try hard to contact dccd */ clnt_fgs = DCC_CLNT_FG_SLOW; if (grey_on) clnt_fgs |= DCC_CLNT_FG_GREY; if ((0 != (ctxt = dcc_tmp_clnt_init(&dcc_emsg, 0, &server, 0, 0, clnt_fgs, 0))) || (0 != (ctxt = dcc_tmp_clnt_init(&dcc_emsg,0, &server, 0, 0, clnt_fgs, 0)))) { const DCC_SRVR_CLASS *class; /* after we find a working IP address, * ensure that we never try another */ class = DCC_GREY2CLASS(grey_on); dcc_ip2str(server.hostname, sizeof(server.hostname), &class->addrs[class->srvr_inx ].ip); dcc_ctxts_lock(); dcc_unmap_close_info(0); dcc_rel_ctxt(ctxt); dcc_ctxts_unlock(); ctxt = dcc_tmp_clnt_init(&dcc_emsg, 0, &server, 0, 0, clnt_fgs, 0); if (ctxt) break; /* start over if that failed */ nm_port = nms_ports; continue; } if (!nm_port) dcc_logbad(EX_DCC_RESTART, "initial contact: %s", dcc_emsg.c); /* try the next port and IP address we've been given * and eventually fall back on the default */ nm_port = nm_port->fwd; } } atexit(finish); signal(SIGHUP, sigterm); signal(SIGTERM, sigterm); signal(SIGINT, sigterm); #ifdef SIGXFSZ signal(SIGXFSZ, SIG_IGN); #endif /* exclude other instances of this program */ if (!lock_dbclean(&dcc_emsg, db_paths.base.c)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s: dbclean already running?", dcc_emsg.c); /* create & the lock new database file */ unlink_whine(0, db_paths.new.c, 1); new_db_fd = dcc_lock_open(&dcc_emsg, db_paths.new.c, O_RDWR|O_CREAT, DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0); if (new_db_fd == -1) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); if (0 > ftruncate(new_db_fd, 0)) dcc_logbad(EX_IOERR, "ftruncate(%s,0): %s", db_paths.new.c, ERROR_STR()); /* Look for an existing file and a hint about the new size. */ old_db_fd = open(db_paths.base.c, O_RDONLY, 0); if (old_db_fd < 0) { if (errno != ENOENT) dcc_logbad(EX_IOERR, "open(%s): %s", db_paths.prev_msg, ERROR_STR()); /* empty a missing database */ prev_db_csize = 0; cleardb = 1; } else { if (0 > fstat(old_db_fd, &cur_db_sb)) dcc_logbad(EX_IOERR, "stat(%s): %s", db_paths.prev.c, ERROR_STR()); prev_db_csize = cur_db_sb.st_size; if (prev_db_csize == 0) cleardb = 1; /* empty an empty database */ close(old_db_fd); old_db_fd = -1; } new_db_blksize = get_db_blksize(prev_db_csize, 0); new_db_fsize = 0; new_db_created = 1; new_db_csize = DB_PTR_BASE; write_new_hdr(1); if (standalone) { u_char busy; /* Open and lock the current database to ensure * the daemon is not running. */ old_db_fd = dcc_lock_open(&dcc_emsg, db_paths.base.c, O_RDWR, DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, &busy); if (busy) dcc_logbad(EX_USAGE, "database %s in use: %s", db_paths.prev_msg, dcc_emsg.c); if (cleardb && stat(db_paths.prev.c, &cur_db_sb) >= 0) { if (cur_db_sb.st_size != 0) dcc_logbad(EX_USAGE, "%s already exists", db_paths.prev_msg); cur_db_created = 1; } /* create and lock the current database if it did not exist * to ensure that the server daemon is not running */ if (old_db_fd < 0) { old_db_fd = dcc_lock_open(&dcc_emsg, db_paths.base.c, O_RDWR|O_CREAT, DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0); if (old_db_fd < 0) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); cur_db_created = 1; } } else { /* Tell the daemon to start turning off the flooding * so we can adjust its positions in the flood map file * Try very hard to talk to it because releasing the database * can cause some UNIX flavors to stall dccd. */ ++flods_off; if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_SHUTDOWN, SHORT_DELAY)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); } /* resolve whitelisted host names before locking the old database */ parse_white(); /* Tell the daemon to unlock the database between operations * and insist it stop flooding. */ if (!standalone) { /* give the daemon a chance to stop pumping the floods */ for (;;) { if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK, SHORT_DELAY)) dcc_logbad(EX_UNAVAILABLE, "%s", dcc_emsg.c); i = flod_running(aop_resp.resp.val.string); if (i < 0) dcc_logbad(EX_PROTOCOL, "%s: unrecognized \"%s\"", dcc_aop2str(0, 0, DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK), aop_resp.resp.val.string); if (i == 0) break; if (time(0) > clean_start.tv_sec+45) { if (flods_off < 2) { ++flods_off; if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_HALT, SHORT_DELAY)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); continue; } if (time(0) > clean_start.tv_sec+60) dcc_logbad(EX_UNAVAILABLE, "failed to stop floods: %s", aop_resp.resp.val.string); } usleep(100*1000); } dccd_unlocked = 1; if (!persist_aop(DCC_AOP_DB_CLEAN, 0, SHORT_DELAY)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); /* The daemon adds its own and removes our hold on flooding * when we tell it to unlock the database after every * operation. */ --flods_off; } if (cleardb) { quiet_trace_msg(DCC_VERSION" %s database %s %s", cur_db_created ? "creating" : "clearing", db_paths.next.c, hash_paths.next.c); } else if (clean_mode == REPAIR_MODE) { dcc_error_msg("explicit repair of %s", db_paths.prev_msg); } else { if (!db_open(&dcc_emsg, old_db_fd, db_paths.base.c, hash_paths.base.c, 0, DB_OPEN_RDONLY | (standalone ? DB_OPEN_LOCK_NOWAIT : DB_OPEN_LOCK_WAIT))) { /* If the hash table is sick, check timestamps only * as much as no hash table allows. * Then rebuild the hash table. */ dcc_error_msg("%s; must repair", dcc_emsg.c); clean_mode = REPAIR_MODE; } else { if (db_debug) { quiet_trace_msg("%d old hash entries total," " %d or %d%% used in %s", ADJ_HLEN(db_hash_len), ADJ_HLEN(db_hash_used), (int)((ADJ_HLEN(db_hash_used) * 100.0) /ADJ_HLEN(db_hash_len)), hash_paths.prev.c); } prev_db_parms = db_parms; prev_db_hash_used = db_hash_used; /* save a handle on the old database to get * reports that arrive while we expire it */ old_db_fd = dup(db_fd); if (old_db_fd < 0) dcc_logbad(EX_OSERR, "dup(%s): %s", db_paths.prev.c, ERROR_STR()); /* read old and create new database files */ if (!expire(db_csize)) { prev_db_hash_used = 0; clean_mode = REPAIR_MODE; } } if (clean_mode == REPAIR_MODE) dcc_error_msg("repairing %s", db_paths.prev_msg); } /* Copy the current file with minimal expiring * if we are repairing the hash table (including now repairing * after encountering problems while expiring). */ if (clean_mode == REPAIR_MODE && !cleardb && !copy_db()) exit(EX_UNAVAILABLE); build_hash(); /* Push the checksums to the disk. Do it now with dccd running as * much as as it can despite our saturating the disk * to minimize failures by dccd to answer. */ if (!dbclean_flush(&dcc_emsg)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); /* Copy any records from the old file to the new file that were * added to the old file while we were creating the new file. */ if (!cleardb && !catchup(&dcc_emsg)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); /* We have the new database locked. * * Delete the current hash file and its optional symbolic link. * Install both new files and the optional symbolic links. */ unlink_whine(0, hash_paths.prev.c, 1); unlink_whine(0, hash_paths.prev_old.c, 1); rename_bail(hash_paths.new.c, hash_paths.next.c); if (hash_paths.link_dir.c[0] != '\0') { unlink_whine(0, hash_paths.base.c, 1); if (0 > symlink(hash_paths.next.c, hash_paths.base.c)) dcc_error_msg("symlink(%s, %s): %s", hash_paths.next.c, hash_paths.base.c, ERROR_STR()); } new_hash_created = 0; if (db_hash_fd >= 0) strcpy(db_hash_nm.c, hash_paths.base.c); unlink_whine(0, db_paths.prev.c, 1); unlink_whine(0, db_paths.prev_old.c, 1); rename_bail(db_paths.new.c, db_paths.next.c); if (db_paths.link_dir.c[0] != '\0') { unlink_whine(0, db_paths.base.c, 1); if (0 > symlink(db_paths.next.c, db_paths.base.c)) dcc_error_msg("symlink(%s, %s): %s", db_paths.next.c, db_paths.base.c, ERROR_STR()); } new_db_created = 0; if (db_fd > 0) strcpy(db_nm.c, db_paths.base.c); cur_db_created = 0; if (cleardb) { flod_mmap_path_set(); unlink_whine(0, flod_mmap_path.c, 1); if (!db_close(DB_CLOSE)) exit(EX_UNAVAILABLE); exit(EX_OK); } /* if the daemon was not running, we're finished */ if (standalone) { /* install the flood positions if things are ok */ if (flod_mmaps) { memcpy(flod_mmaps, &new_flod_mmaps, sizeof(new_flod_mmaps)); flod_unmap(0, 0); } if (!db_close(DB_CLOSE)) exit(EX_UNAVAILABLE); exit(EX_OK); } /* tell the daemon to switch to the new database. This will leave * the daemon stuck waiting for us to unlock the new database. */ dccd_new_db("copy late arrivals"); /* install the flood positions if things are ok */ if (flod_mmaps) { memcpy(flod_mmaps, &new_flod_mmaps, sizeof(new_flod_mmaps)); flod_unmap(0, 0); } /* Copy any records from the old file to the new file in the * race to tell the daemon to switch to the new file. * The new file is still locked from build_hash(). * The daemon should be stuck waiting to open it in the * DCC_AOP_DB_NEW request via the preceding dccd_new_db(). * * Since the daemon has switched and probably cannot go back, * ignore any errors */ catchup(0); if (!db_close(DB_CLOSE)) exit(EX_UNAVAILABLE); /* finish() will be called via exit() to tell the daemon to resume * flooding if necessary. However, in the normal case, we removed * all counts against flooding before calling dccd_new_db() */ exit(EX_OK); } /* compute the old and new database or hash table file names */ static void get_paths(u_char db) { char arg0, arg; const char *arg_dir; paths_t *paths; DCC_PATH rel, prev, prev_link_dir; struct stat sb; int i; if (db) { paths = &db_paths; arg0 = arg = 'D'; arg_dir = db_dir; } else { paths = &hash_paths; arg0 = arg = 'H'; arg_dir = hash_dir; /* by default the hash table follows the database */ if (!arg_dir) { arg = 'D'; arg_dir = db_dir; } } memset(paths, 0, sizeof(*paths)); if (arg_dir && !dcc_fnm2abs(&paths->link_dir, arg_dir, 0)) dcc_error_msg("\"-%c %s\" is too long", arg, arg_dir); snprintf(rel.c, sizeof(rel.c), "%s%s", grey_on ? DB_GREY_NAME : DB_DCC_NAME, db ? "" : DB_HASH_SUFFIX); if (!dcc_fnm2abs(&paths->base, rel.c, 0)) dcc_logbad(EX_DATAERR, "impossibly long %s/%s", dcc_homedir.c, rel.c); paths->prev = paths->base; STRLCPY(paths->prev_msg, paths->base.c, sizeof(paths->prev_msg)); /* Find the previous file. */ memset(&prev_link_dir, 0, sizeof(prev_link_dir)); memset(&prev, 0, sizeof(prev)); if ((0 > stat(paths->base.c, &sb) ? (errno == ENOENT) : S_ISREG(sb.st_mode)) && 0 <= lstat(paths->base.c, &sb) && S_ISLNK(sb.st_mode) && 0 < readlink(paths->base.c, prev.c, sizeof(prev.c)) && 1 < (i = strlen(prev.c) - strlen(rel.c)) && prev.c[i-1] == '/' && !strcmp(rel.c, &prev.c[i])) { paths->prev = prev; memcpy(&prev_link_dir, &prev, i-1); snprintf(paths->prev_msg, sizeof(paths->prev_msg), "%s -> %s", paths->base.c, paths->prev.c); if (db_debug && (!arg_dir || strcmp(prev_link_dir.c, arg_dir))) quiet_trace_msg("infer previous \"-%c %s\"", arg0, prev_link_dir.c); } /* assume -H or -D if started by dccd */ if (paths->link_dir.c[0] == '\0' && clean_mode != NORMAL_MODE) paths->link_dir = prev_link_dir; if (!dcc_fnm2abs(&paths->prev_old, paths->prev.c, "-old")) dcc_logbad(EX_DATAERR, "impossibly long %s-old", paths->prev.c); if (paths->link_dir.c[0] != '\0' && 0 > stat(paths->link_dir.c, &sb)) { if (errno != ENOENT) { dcc_error_msg("-%c %s: %s", arg, paths->link_dir.c, ERROR_STR()); memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } else if (0 > mkdir(paths->link_dir.c, 0755)) { dcc_error_msg("-%c %s mkdir(): %s", arg, paths->link_dir.c, ERROR_STR()); memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } else if (0 > stat(paths->link_dir.c, &sb)) { memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } } if (paths->link_dir.c[0] != '\0' && !S_ISDIR(sb.st_mode)) { dcc_error_msg("-%c %s: not a directory", arg, paths->link_dir.c); memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } if (paths->link_dir.c[0] != '\0' && 0 > access(paths->link_dir.c, W_OK)) { dcc_error_msg("-%c %s: %s", arg, paths->link_dir.c, ERROR_STR()); memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } if (paths->link_dir.c[0] != '\0' && snprintf(paths->next.c, ISZ(paths->next), "%s/%s", paths->link_dir.c, rel.c) >= ISZ(paths->next)) { dcc_error_msg("%s/%s is too long", paths->link_dir.c, rel.c); memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } if (paths->link_dir.c[0] != '\0' && !dcc_fnm2abs(&paths->new, paths->next.c, "-new")) { dcc_error_msg("%s-new is too long", paths->next.c); memset(&paths->link_dir, 0, sizeof(paths->link_dir)); } if (paths->link_dir.c[0] == '\0') { paths->next = paths->base; if (!dcc_fnm2abs(&paths->new, paths->next.c, "-new")) { dcc_logbad(EX_DATAERR, "impossibly long %s-new", paths->new.c); } } } /* adjust output flood positions */ static DB_PTR /* next position to adjust */ adj_mmap(void) { FLOD_MMAP *mp; DB_PTR delta, new_pos; delta = new_db_csize - old_db_pos; new_pos = 0; for (mp = new_flod_mmaps.mmaps; mp <= LAST(new_flod_mmaps.mmaps); ++mp) { /* do nothing to marks we have already adjusted */ if (!(mp->flags & FLODMAP_FG_MARK)) continue; if (mp->confirm_pos > old_db_pos) { /* note the next mark that will need adjusting * but do not adjust it yet */ if (new_pos == 0 || new_pos > mp->confirm_pos) new_pos = mp->confirm_pos; } else { /* adjust marks not past the current position */ mp->confirm_pos += delta; mp->flags &= ~FLODMAP_FG_MARK; } } /* adjust the delay position if we just passed it */ if (adj_delay_pos) { if (new_flod_mmaps.delay_pos > old_db_pos) { /* note the next mark that will need adjusting */ if (new_pos == 0 || new_pos > new_flod_mmaps.delay_pos) new_pos = new_flod_mmaps.delay_pos; } else { new_flod_mmaps.delay_pos += delta; /* do it only once */ adj_delay_pos = 0; } } return new_pos; /* return next postion to adjust */ } /* find a checksum */ static const char * /* 0=broken database */ get_ck(DB_RCD_CK **ckp, /* point this to the checksum */ DCC_CK_TYPES type, const DCC_SUM *sum, DB_ST *hash_st, DB_ST *rcd2_st) /* put the record or garbage here */ { DB_FOUND db_result; /* We must lock the file to keep the daemon from changing the * internal hash table links. */ if (!DB_IS_LOCKED() && 0 > db_lock()) return 0; db_result = db_lookup(&dcc_emsg, type, sum, hash_st, rcd2_st, ckp); switch (db_result) { case DB_FOUND_SYSERR: dcc_error_msg("hash lookup for \"%s\" from "L_HxPAT": %s", DB_TYPE2STR(type), old_db_pos, dcc_emsg.c); break; case DB_FOUND_IT: return ""; case DB_FOUND_EMPTY: return "empty"; case DB_FOUND_CHAIN: return "chain"; case DB_FOUND_INTRUDER: return "intruder"; } return 0; } /* check the leading report for a not recent checksum */ static int /* -1=broken database 0=expire 1=keep */ ck_lead(DCC_CK_TYPES type, const DB_RCD *rcd, /* the record under consideration */ const DB_RCD_CK *rcd_ck, DB_ST *hash_st, DB_ST *rcd2_st) { DB_RCD_CK *lead_ck; DB_PTR prev; const char *ck_result; /* If the total for the checksum in the target record is so large * that it ensures that the total will be large, * and if the record is not ancient, * then we do not need to spend time looking for the leader */ if (DB_TGTS_CK(rcd_ck) >= db_tholds[type] && !ts_older_ts(&rcd->ts, &new_spam_ts[type])) return 1; ck_result = get_ck(&lead_ck, type, &rcd_ck->sum, hash_st, rcd2_st); if (!ck_result) return -1; if (!lead_ck) { dcc_error_msg("no leader for %s %s at "L_HxPAT": %s", DB_TYPE2STR(type), ck2str_err(type, &rcd_ck->sum, 0), old_db_pos, ck_result); return -1; } /* Servers with the same name but differing server-IDs * are in a single hash chain. */ while (type == DCC_CK_SRVR_ID && rcd->srvr_id != rcd2_st->d.r->srvr_id) { prev = DB_PTR_EX(lead_ck->prev); if (prev == DB_PTR_NULL) { dcc_error_msg("null hash chain link" " for %s %s %d at "L_HxPAT " starting from %s %d at "L_HxPAT, DB_TYPE2STR(type), ck2str_err(type, &rcd_ck->sum, 0), rcd2_st->d.r->srvr_id, rcd2_st->s.rptr, ck2str_err(type, &lead_ck->sum, 0), rcd->srvr_id, old_db_pos); return -1; } lead_ck = db_map_rcd_ck(&dcc_emsg, rcd2_st, prev, type); if (!lead_ck) { dcc_error_msg("leader checksum for %s %s %d" " at "L_HxPAT": %s", DB_TYPE2STR(type), ck2str_err(type, &rcd_ck->sum, 0), rcd->srvr_id, old_db_pos, dcc_emsg.c); return -1; } } /* We know the target report is not recent. * Forget the target report if the leader's total is trivial. */ if (DB_TGTS_CK(lead_ck) < db_tholds[type]) return 0; /* Forget the target if both the target and the leader are ancient. * The leader might not be the newest checksum, but it usually is. * The target might be the leader. */ if (ts_older_ts(&rcd2_st->d.r->ts, &new_spam_ts[type]) && ts_older_ts(&rcd->ts, &new_spam_ts[type])) return 0; return 1; } static int /* -1=broken database 0=expire 1=keep */ ck_lead_rep(DCC_CK_TYPES type, const DB_RCD *rcd, /* the record under consideration */ const DB_RCD_CK *rcd_ck, const DB_RCD_CK *rcd_ck_lim, DB_ST *hash_st, DB_ST *rcd2_st) { DB_RCD_CK *lead_ck; DCC_TGTS bulk_tgts, total_tgts; const char *ck_result; /* expire all reputation checksums older than 30 days */ if (ts_older_ts(&rcd->ts, &new_spam_ts[type])) return 0; /* Keep reputation checksums * that are not ancient * and if the reputation is not zero * or if the total is large enough to be exculpatory * This must be consistent with the summarizing in the server. * * We do not need to check the leader if the count * in the target record is so large that it ensures * that the leader's value will be non-trivial */ if (type == DCC_CK_REP_TOTAL) { if (DB_TGTS_CK(rcd_ck) >= DCC_REP_TGTS_TOTAL_SIGNIF) return 1; /* the next checksum is often DCC_CK_REP_BULK */ if (rcd_ck+1 < rcd_ck_lim && DB_CK_TYPE(rcd_ck+1) == DCC_CK_REP_BULK && DB_TGTS_CK(rcd_ck+1) >= DCC_REP_TGTS_BULK_SIGNIF) return 1; } else { if (DB_TGTS_CK(rcd_ck) >= DCC_REP_TGTS_BULK_SIGNIF) return 1; /* because the caller remembers our answer, * and because a TOTAL checksum if present precedes a bulk * checksum, we know the there is no TOTAL checksum in this * record */ } /* The known totals up to the target are not enough * We must check one or both leaders */ ck_result = get_ck(&lead_ck, DCC_CK_REP_BULK, &rcd_ck->sum, hash_st, rcd2_st); if (!ck_result) return -1; if (!lead_ck) { if (type == DCC_CK_REP_BULK) { dcc_error_msg("no leader for %s %s at "L_HxPAT": %s", DB_TYPE2STR(DCC_CK_REP_BULK), ck2str_err(DCC_CK_REP_BULK, &rcd_ck->sum, 0), old_db_pos, ck_result); return -1; } /* with no reports of bulk mail, * we know the reputation is zero */ return 0; } bulk_tgts = DB_TGTS_CK(lead_ck); if (bulk_tgts >= DCC_REP_TGTS_BULK_SIGNIF) return 1; ck_result = get_ck(&lead_ck, DCC_CK_REP_TOTAL, &rcd_ck->sum, hash_st, rcd2_st); if (!ck_result) return -1; if (!lead_ck) { if (type == DCC_CK_REP_TOTAL) { dcc_error_msg("no leader for %s %s at "L_HxPAT": %s", DB_TYPE2STR(DCC_CK_REP_TOTAL), ck2str_err(DCC_CK_REP_TOTAL, &rcd_ck->sum, 0), old_db_pos, ck_result); return -1; } return 0; /* we know the reputation is 0 */ } total_tgts = DB_TGTS_CK(lead_ck); if (total_tgts >= DCC_REP_TGTS_TOTAL_SIGNIF) return 1; if (total_tgts >= DCC_REP_TGTS_TOTAL_MIN && get_reputation(bulk_tgts, total_tgts) != 0) return 1; return 0; } static void report_progress_init(void) { gettimeofday(&db_time, 0); progress_rpt_start.tv_sec = db_time.tv_sec; progress_rpt_checked = db_time; progress_rpt_last = db_time; progress_rpt_base = 100; progress_rpt_cnt = progress_rpt_base; progress_rpt_started = 0; } static time_t /* us since last check */ report_progress(u_char final, const char *s1, const char *s2, DB_PTR done, DB_PTR total, DB_PTR scale) { time_t reported_us, checked_us, secs, interval; double percent; if (!total) percent = 100.0; else percent = (done*100.0)/total; gettimeofday(&db_time, 0); checked_us = tv_diff2us(&db_time, &progress_rpt_checked); progress_rpt_checked = db_time; /* Check frequently enough to report or unlock the database. * Adjust the number of operations until the next check * based on the time spent on the previous */ if (checked_us > 0) progress_rpt_base = ((progress_rpt_base * 0.5 * DCC_US * min(REPORT_INTERVAL_FAST_SECS*DCC_US, UNLOCK_INTERVAL_USECS)) / checked_us); else progress_rpt_base = 100; if (progress_rpt_base < 100) progress_rpt_base = 100; if (progress_rpt_base > 10*1000) progress_rpt_base = 10*1000; progress_rpt_cnt = progress_rpt_base; interval = ((db_debug > 1) ? REPORT_INTERVAL_FAST_SECS : REPORT_INTERVAL_SECS); /* try not to start reporting progress at the end */ if (!progress_rpt_started && (total*1.0 - done*1.0) / progress_rpt_base <= interval*1.0) return checked_us; reported_us = tv_diff2us(&db_time, &progress_rpt_last); if (reported_us >= interval * DCC_US || (final && progress_rpt_percent != 100)) { progress_rpt_started = 1; progress_rpt_percent = percent; secs = db_time.tv_sec - progress_rpt_start.tv_sec; secs -= secs % interval; progress_rpt_last.tv_sec = progress_rpt_start.tv_sec + secs; if (db_debug > 1) quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%" " db_mmaps=%d hash=%d", s1, done/scale, total/scale, s2, progress_rpt_percent, db_stats.db_mmaps, db_stats.hash_mmaps); else quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%", s1, done/scale, total/scale, s2, progress_rpt_percent); } if (clean_mode == QUICK_MODE && !final) { if (db_time.tv_sec > clean_start.tv_sec + 15*60) dcc_logbad(EX_UNAVAILABLE, "quick cleaning too slow"); } return checked_us; } /* delete old, less fuzzy checksums in the new record */ static void trim_old_fuz(DB_RCD *new, DB_RCD_CK **end_ck) { DB_RCD_CK *rcd_ck; DCC_CK_TYPES type; int len; rcd_ck = new->cks; while (rcd_ck < *end_ck) { type = DB_CK_TYPE(rcd_ck); if (!ts_older_ts(&new->ts, &new_all_ts[type])) { ++rcd_ck; continue; } ++obs_rcds; new->fgs_num_cks = (DB_NUM_CKS(new) - 1) | DB_RCD_FG_TRIM; --*end_ck; len = (char *)*end_ck - (char *)rcd_ck; if (len == 0) return; memmove(rcd_ck, rcd_ck+1, len); } } static void adj_def_expire(void) { double new_dbsize, new_dbsize1, day_rate, db_ratio; int spam_secs, secs; char new_dbsize_buf[20], csize_buf[20], old_csize_buf[20]; char day_rate_buf[20]; /* do this only once */ if (def_exp_ratio != 0.0) return; /* Compute the ratio of size of the database 24 hours from now * to the size of the window. Assume: * - We will receive about the same number of reports in the next * 24 hours as the last 24. This is a good assumption for * weekdays, but as much as 30% wrong about weekends. * - Dbclean will be run once per day at the current time. * - The size of the database is a linear function of expiration * duration. This is tenuous when the spam expiration duration * is less than 1 day. * Use the maximum of two guesses for tomorrow's database size. * One guess is the current size, base on assuming that * we will use roughly the same expiration durations and * so the database will grow to about size it now has. * The other guess uses the previous database size and the * avarage data rate. It compensates for short term changes * in the rate and for running dbclean more than once per day. */ new_dbsize = db_parms.db_csize; size2str(csize_buf, sizeof(csize_buf), new_dbsize, 1); new_dbsize1 = db_parms.old_db_csize; size2str(old_csize_buf, sizeof(old_csize_buf), new_dbsize1, 1); day_rate = db_add_rate(&db_parms, 0, 0); if (day_rate >= 0.0) day_rate *= (24*60*60); size2str(day_rate_buf, sizeof(day_rate_buf), day_rate, 1); /* without information, be pessimistic and assume 1.4 GByte/day */ if (day_rate <= 0.0 && !grey_on) day_rate = 1.4*1024.0*1024.0*1024.0; if (day_rate > 0.0) { new_dbsize1 += day_rate; if (new_dbsize < new_dbsize1) new_dbsize = new_dbsize1; } size2str(new_dbsize_buf, sizeof(new_dbsize_buf), new_dbsize, 1); if (db_debug) quiet_trace_msg("predict new_dbsize=%s from db_csize=%s" " old_db_csize=%s rate=%s", new_dbsize_buf, csize_buf, old_csize_buf, day_rate_buf); /* Assume there will be 20% as many bytes used in the hash table * as in the database */ new_dbsize *= 1.2; /* we cannot adjust the defaults * - the first time dbclean is run * - if the previous run used a larger than default value * - there is no need to reduce the default because the predicted * maximum size is smaller than the target maximum */ spam_secs = db_parms.ex_secs[DCC_CK_FUZ2].spam; if (spam_secs != 0 && spam_secs <= DB_EXPIRE_SPAMSECS_DEF && new_dbsize > db_max_byte && (db_ratio = (db_max_byte / new_dbsize)) < 1.0 && ((def_exp_ratio = (spam_secs*db_ratio)/DB_EXPIRE_SPAMSECS_DEF) <= 0.99)) { /* change the two durations together and so with same errors */ def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF * def_exp_ratio; def_expire_secs = DB_EXPIRE_SECS_DEF * def_exp_ratio; def_expire_secs -= def_expire_secs % (60*60); if (def_expire_secs < DB_EXPIRE_SECS_DEF_MIN) def_expire_secs = DB_EXPIRE_SECS_DEF_MIN; def_expire_spamsecs -= def_expire_spamsecs % (24*60*60); if (def_expire_spamsecs < DB_EXPIRE_SPAMSECS_DEF_MIN) def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF_MIN; #if DCC_DB_MIN_MBYTE == 0 && !defined(DCC_HAVE_PHYSMEM) if (def_expire_secs == DB_EXPIRE_SECS_DEF_MIN || def_expire_spamsecs == DB_EXPIRE_SPAMSECS_DEF_MIN) quiet_trace_msg("cannot determine physical RAM; rebuild" " with ./configure with-db-memory"); #endif return; } def_exp_ratio = 1.0; /* if the defaults do not need to be reduced now but they * were reduced before, then relax them gently */ if (spam_secs < DB_EXPIRE_SPAMSECS_DEF) { secs = (clean_start.tv_sec - ts2secs(&db_parms.ex_spam[DCC_CK_FUZ2])); if (secs > 0 && secs < DB_EXPIRE_SPAMSECS_DEF) def_expire_spamsecs = secs; secs = (clean_start.tv_sec - ts2secs(&db_parms.ex_all[DCC_CK_FUZ2])); if (secs > 0 && secs < DB_EXPIRE_SECS_DEF) def_expire_secs = secs; } } static u_char expire_fail(void) { /* Only stop copying the old database if we are repairing. */ if (clean_mode == REPAIR_MODE) return 1; flod_unmap(0, 0); db_close(DB_CLOSE); return 0; } /* copy the existing database, discard junk and old entries */ static u_char /* 1=done 0=database broken */ expire(DB_PTR old_db_csize) { #define EXPIRE_BAIL() {if (expire_fail()) goto copy_failed; return 0;} DCC_TS ts; u_char emptied, reduced_defaults, whines; u_char old_ok[DCC_DIM_CKS]; DB_RCD rcd_buf, new; DB_ST *rcd_st, *hash_st, *rcd2_st; const DB_RCD *rcd; const DB_RCD_CK *rcd_ck, *rcd_ck_lim, *rcd_ck2; DB_RCD_CK *new_ck; DCC_TGTS tgts_raw, ck_tgts; u_char expire_rcd; /* 1=expire entire record */ u_char split_ok; /* 1=split ok because not floodable */ u_char obs_lvl; int rcd_num_cks, new_num_cks, nokeep_num_cks; DB_PTR rcd_prev, min_delay_pos, next_adj_pos; FLOD_MMAP *mp; DCC_CK_TYPES prev_type, type, type2; int rcd_len; struct stat sb; time_t need_unlock; DB_PTR rep_prev_db_pos = DB_PTR_NULL; int rep_prev_lead = 0; int i; reduced_defaults = 0; if (expire_secs < 0) { adj_def_expire(); if (def_expire_secs > expire_spamsecs && expire_spamsecs > 0) { expire_secs = expire_spamsecs; } else { if (def_expire_secs != DB_EXPIRE_SECS_DEF && def_exp_ratio != 1.0) reduced_defaults = 1; expire_secs = def_expire_secs; } } if (expire_spamsecs < 0) { adj_def_expire(); if (def_expire_spamsecs < expire_secs) { expire_spamsecs = expire_secs; } else { if (def_expire_spamsecs != DB_EXPIRE_SPAMSECS_DEF && def_exp_ratio != 1.0) reduced_defaults = 1; expire_spamsecs = def_expire_spamsecs; } } if (expire_spamsecs > 0 && expire_spamsecs < expire_secs) dcc_logbad(EX_USAGE, "spam expiration -E must be longer than -e"); expired_rcds = 0; expired_cks = 0; kept_cks = white_cks; need_unlock = 0; report_progress_init(); /* Compute timestamps for records we keep. * Use the values from the previous use of dbclean as defaults * unless they are bogus */ memset(old_ok, 0, sizeof(old_ok)); secs2ts(&ts, clean_start.tv_sec); for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { DB_EX_SEC *th = &db_parms.ex_secs[type]; if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)) continue; if (DCC_CK_IS_REP(grey_on, type)) continue; if (th->spam <= 0 || th->spam > DB_EXPIRE_SECS_MAX) continue; if (th->all <= 0 || th->all > th->spam) continue; if (ts_newer_ts(&db_parms.ex_spam[type], &ts)) continue; if (ts_newer_ts(&db_parms.ex_all[type], &ts)) continue; old_ok[type] = 1; /* old values for this type are ok */ } for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { DB_EX_SEC *new_th = &new_ex_secs[type]; int old_all = db_parms.ex_secs[type].all; int old_spam = db_parms.ex_secs[type].spam; if (type == DCC_CK_SRVR_ID) { /* keep server-ID declarations */ new_th->all = DB_EXPIRE_SERVER_ID; new_th->spam = DB_EXPIRE_SERVER_ID; } else if (grey_on) { if (old_ok[type]) { /* This is the path by which the dccd -G * parameters are used. */ new_th->all = old_all; new_th->spam = old_spam; } else if (DCC_CK_IS_GREY_TRIPLE(1, type)) { new_th->all = DEF_GREY_WINDOW; new_th->spam = DEF_GREY_WHITE; } else if (DCC_CK_IS_GREY_MSG(1, type) || type == DCC_CK_BODY) { new_th->all = DEF_GREY_WINDOW; new_th->spam = DEF_GREY_WINDOW; } else { new_th->all = 1; new_th->spam = 1; } } else if (DCC_CK_IS_REP(grey_on, type)) { /* reputations need consistent expirations * but allow "never" */ if (expire_secs >= DB_EXPIRE_SECS_MAX) { new_th->all = DB_EXPIRE_SECS_MAX; new_th->spam = DB_EXPIRE_SECS_MAX; } else { new_th->all = DB_EXPIRE_REP_SECS_DEF; new_th->spam = DB_EXPIRE_REP_SPAMSECS_DEF; } } else if (have_expire_parms < 0 && old_ok[type] && (db_parms.flags & DB_PARM_FG_EXP_SET)) { /* use the old durations they are valid * and we have no expiriation parameters */ new_th->all = old_all; new_th->spam = old_spam; } else { new_th->all = expire_secs; new_th->spam = (DCC_CK_LONG_TERM(type) ? expire_spamsecs : expire_secs); if (reduced_defaults) { quiet_trace_msg("adjust default by" " %4.2f to -e%dhours" " -E%ddays", def_exp_ratio, expire_secs/(60*60), expire_spamsecs / (24*60*60)); reduced_defaults = 0; } } /* compute oldest timestamp for this type of checksum, * without going crazy with "-Enever" */ secs2ts(&new_spam_ts[type], clean_start.tv_sec - min(clean_start.tv_sec, new_th->spam)); new_all_secs[type] = clean_start.tv_sec - min(clean_start.tv_sec, new_th->all); secs2ts(&new_all_ts[type], new_all_secs[type]); secs2ts(&stale_ts[type], clean_start.tv_sec - min(clean_start.tv_sec, max(DB_EXPIRE_SECS_DEF, new_th->all))); secs2ts(&ancient_ts[type], clean_start.tv_sec - min(clean_start.tv_sec, 2*new_th->spam)); } /* put the timestamps into the new file */ write_new_hdr(1); /* if we are running as root, * don't change the owner of the database */ if (getuid() == 0) { if (0 > fstat(old_db_fd, &sb)) dcc_logbad(EX_IOERR, "fstat(%s): %s", db_paths.base.c, ERROR_STR()); if (0 > fchown(new_db_fd, sb.st_uid, sb.st_gid)) dcc_logbad(EX_IOERR, "fchown(%s,%d,%d): %s", db_paths.new.c, (int)sb.st_uid, (int)sb.st_gid, ERROR_STR()); } if (DB_PTR_BASE != lseek(old_db_fd, DB_PTR_BASE, SEEK_SET)) dcc_logbad(EX_IOERR, "lseek(%s,%d): %s", db_paths.prev.c, DB_PTR_BASE, ERROR_STR()); read_rcd_invalidate(0); flod_mmap(0, &db_parms.sn, 0, 1); if (flod_mmaps) memcpy(&new_flod_mmaps, flod_mmaps, sizeof(new_flod_mmaps)); min_confirm_pos = DB_PTR_NULL; min_delay_pos = DB_PTR_NULL; next_adj_pos = DB_PTR_BASE; for (mp = new_flod_mmaps.mmaps; mp <= LAST(new_flod_mmaps.mmaps); ++mp) { if (mp->rem_hostname[0] == '\0') { mp->flags &= ~FLODMAP_FG_MARK; } else { mp->flags |= FLODMAP_FG_MARK; } } adj_delay_pos = (new_flod_mmaps.delay_pos != 0) ? 1 : 0; emptied = 1; whines = 20; timeval2ts(&new_flod_mmaps.sn, &clean_start, 0); if (clean_mode != REPAIR_MODE) { rcd_st = GET_DB_ST(); hash_st = GET_DB_ST(); rcd2_st = GET_DB_ST(); } else { rcd_st = 0; hash_st = 0; rcd2_st = 0; } /* copy the old file to the new, * discarding and compressing old data as we go */ for (old_db_pos = DB_PTR_BASE; old_db_pos < old_db_csize; old_db_pos += rcd_len) { if (--progress_rpt_cnt <= 0) need_unlock += report_progress(0, " processed", "MBytes", old_db_pos, old_db_csize, 1024*1024); if (old_db_pos == next_adj_pos) next_adj_pos = adj_mmap(); if (clean_mode != REPAIR_MODE) { /* read the record by mapping if not repairing */ if (!db_map_rcd(0, rcd_st, old_db_pos, &rcd_len)) EXPIRE_BAIL(); rcd = rcd_st->d.r; } else { rcd_len = read_rcd(0, &rcd_buf, old_db_fd, old_db_pos, db_paths.prev.c); if (rcd_len <= 0) { if (rcd_len == 0) dcc_error_msg("unexpected EOF in %s at " L_HxPAT" instead of " L_HxPAT, db_paths.prev.c, old_db_pos, old_db_csize); /* give up and ask our neighbors to rewind */ EXPIRE_BAIL(); } rcd = &rcd_buf; } if (old_db_pos + rcd_len > old_db_csize) { dcc_error_msg("bad record length %d at "L_HxPAT" in %s", rcd_len, old_db_pos, db_paths.prev.c); EXPIRE_BAIL(); } /* skip end-of-page padding */ if (rcd_len == sizeof(*rcd)-sizeof(rcd->cks)) { if (memcmp(rcd, zero_rcd, rcd_len)) { dcc_error_msg("non-zero end-of-page padding" " at "L_HxPAT" in %s", old_db_pos, db_paths.prev.c); EXPIRE_BAIL(); } if (old_db_pos % sys_pagesize < sys_pagesize - DB_RCD_LEN_MAX) { dcc_error_msg("stray end-of-page padding" " at "L_HxPAT" in %s", old_db_pos, db_paths.prev.c); EXPIRE_BAIL(); } continue; } memcpy(&new, rcd, sizeof(new)-sizeof(new.cks)); new.fgs_num_cks &= ~DB_RCD_FG_MASK; if (DB_RCD_ID(rcd) == DCC_ID_WHITE) { /* skip whitelist entries if whitelist source is ok */ if (!keep_white) continue; /* refresh whitelist entries if source is bad */ timeval2ts(&new.ts, &clean_start, 0); } rcd_num_cks = DB_NUM_CKS(rcd); /* expire or throw away deleted reports */ tgts_raw = DB_TGTS_RCD_RAW(&new); if (tgts_raw == 0) { ++expired_rcds; expired_cks += rcd_num_cks; continue; } if (tgts_raw > DCC_TGTS_MAX_DB) { dcc_error_msg("report at "L_HxPAT " with bogus target count" " %#x in %s", old_db_pos, tgts_raw, db_paths.prev.c); EXPIRE_BAIL(); } if (ts_newer_ts(&new.ts, &future_ts)) { if (whines > 0) dcc_error_msg("discarding report at "L_HxPAT " from the future %s%s", old_db_pos, ts2str_err(&new.ts), --whines == 0 ? "; stop complaining" : ""); ++expired_rcds; expired_cks += rcd_num_cks; continue; } expire_rcd = 1; /* assume record will be deleted */ obs_lvl = 0; split_ok = 1; /* assume it cannot be flooded */ nokeep_num_cks = 0; new_ck = new.cks; rcd_ck = rcd->cks; rcd_ck_lim = &rcd->cks[rcd_num_cks]; for (prev_type = DCC_CK_INVALID; rcd_ck < rcd_ck_lim; prev_type = type, ++rcd_ck) { type = DB_CK_TYPE(rcd_ck); if (!DCC_CK_TYPE_DB_OK(type)) { dcc_error_msg("%s checksum at "L_HxPAT" in %s", DB_TYPE2STR(type), old_db_pos, db_paths.prev.c); EXPIRE_BAIL(); } /* A pointer of DB_PTR_BAD implies that dbclean * or the system crashed before the cleaned database * reached the disk and that we cannot trust any * of the pointers and so must repair. * A garbage pointer implies that the database is * trash. */ rcd_prev = DB_PTR_EX(rcd_ck->prev); if (rcd_prev == DB_PTR_NULL) { ; } else if (DB_PTR_IS_BAD(rcd_prev, old_db_pos)) { if (whines > 0) dcc_error_msg("invalid link "L_HxPAT " at " L_HxPAT" in %s%s", rcd_prev, old_db_pos, db_paths.prev.c, --whines == 0 ? "; stop complaining" : ""); if (clean_mode != REPAIR_MODE || rcd_prev != DB_PTR_BAD) EXPIRE_BAIL(); } if (type <= prev_type && prev_type != DCC_CK_FLOD_PATH && clean_mode != REPAIR_MODE) { dcc_error_msg("out of order %s" " checksum at "L_HxPAT" in %s", DB_TYPE2STR(type), old_db_pos, db_paths.prev.c); EXPIRE_BAIL(); } if (tgts_raw == DCC_TGTS_REP_ADJ) { if (!DCC_CK_IS_REP(grey_on, type)) { dcc_error_msg("discarding unauthorized" " reputation adjustment" " for %s at "L_HxPAT " in %s", DB_TYPE2STR(type), old_db_pos, db_paths.prev.c); ++expired_cks; new.fgs_num_cks = (DB_NUM_CKS(&new) | DB_RCD_FG_TRIM); continue; } /* discard useless reputation scaling if the * back-pointer reliably marks it useless */ if (clean_mode != REPAIR_MODE && DB_PTR_EX(rcd_ck->prev) == DB_PTR_NULL) { ++expired_cks; continue; } } /* Silently discard junk from other servers, * provided it is junk by default */ if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type) && DB_GLOBAL_NOKEEP(grey_on, type) && type != DCC_CK_FLOD_PATH && type != DCC_CK_SRVR_ID && DB_RCD_ID(&new) != DCC_ID_WHITE) { ++expired_cks; continue; } /* Keep paths except on old records or records that * have been trimmed or compressed. * Never remove paths from server-ID declarations. */ if (type == DCC_CK_FLOD_PATH) { if (DB_RCD_TRIMMED(&new) || DB_RCD_ID(&new) == DCC_ID_COMP) continue; /* forget line number on old whitelist entry */ if (DB_RCD_ID(&new) == DCC_ID_WHITE) continue; rcd_ck2 = rcd_ck+1; for (;;) { type2 = DB_CK_TYPE(rcd_ck2); if (type2 == DCC_CK_SRVR_ID || !ts_older_ts(&new.ts, &new_all_ts[type2])) { /* keep this path since this report * is a server-ID declaration * or not old */ *new_ck = *rcd_ck; ++new_ck; ++new.fgs_num_cks; ++nokeep_num_cks; break; } if (++rcd_ck2 >= rcd_ck_lim) { /* we are discarding this path */ new.fgs_num_cks = (DB_NUM_CKS(&new) | DB_RCD_FG_TRIM); break; } } continue; } if (!ts_older_ts(&new.ts, &new_all_ts[type])) { /* This report is recent. However, junk * doesn't make the report needed */ if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type) && DB_RCD_ID(&new) != DCC_ID_WHITE) { ++nokeep_num_cks; } else if (DB_CK_JUNK(rcd_ck)) { /* This checksum is obsolete. * If it has the highest level of * fuzziness, then it controls whether * the whole report is needed,. */ if (obs_lvl < db_ck_fuzziness[type]) { obs_lvl = db_ck_fuzziness[type]; expire_rcd = 1; } } else { /* This checksum is not obsolete. * If it is at least as fuzzy as any * other checksum, then it can say * the report is needed */ if (obs_lvl <= db_ck_fuzziness[type]) { obs_lvl = db_ck_fuzziness[type]; expire_rcd = 0; split_ok = 0; /* might flood it */ } /* note 1st plausible delay and * flooding positions */ if (min_delay_pos == DB_PTR_NULL && DB_RCD_DELAY(&new)) min_delay_pos = new_db_csize; if (min_confirm_pos == DB_PTR_NULL && !DB_RCD_TRIMMED(&new)) min_confirm_pos = new_db_csize; } } else { /* This checksum is not recent, * but it might not be old enough to expire. * * Throw away delete requests * and other servers' useless checksums */ if (tgts_raw == DCC_TGTS_DEL || DB_TEST_NOKEEP(db_parms.nokeep_cks, type)) { ++expired_cks; new.fgs_num_cks = (DB_NUM_CKS(&new) | DB_RCD_FG_TRIM); continue; } /* Throw away old obsolete checksums * and entire reports if the fuzziest * checksum is obsolete. * A checksum is obsolete if it was marked * obsolete or if its total is spam * or if should have * been expired before. * An old report of a less fuzzy but still * common checksum that is not compressible * with new reports can otherwise never * expire. */ if (DB_CK_JUNK(rcd_ck) || (clean_mode != REPAIR_MODE && ts_older_ts(&new.ts, &ancient_ts[type]))) { if (obs_lvl < db_ck_fuzziness[type]) { obs_lvl = db_ck_fuzziness[type]; expire_rcd = 1; } ++expired_cks; new.fgs_num_cks = (DB_NUM_CKS(&new) | DB_RCD_FG_TRIM); continue; } /* old summaries are unneeded, because * they have already been flooded. * They do not contribute to local counts */ if (DB_RCD_SUMRY(&new) && ts_older_ts(&new.ts, &stale_ts[type])) continue; /* Discard this checksum if its ultimate total * is low or ancient * or if it reaches spam after this report. * To determine the ultimate total, we must * have a hash table to find the newest record, * which contains the final total */ if (clean_mode != REPAIR_MODE) { if (DCC_CK_IS_REP(grey_on, type)) { /* use the answer for the previous * reputation checksum if known */ if (rep_prev_db_pos == old_db_pos) { i = rep_prev_lead; } else { rep_prev_db_pos = old_db_pos; i = ck_lead_rep(type, rcd_st->d.r, rcd_ck, rcd_ck_lim, hash_st, rcd2_st); rep_prev_lead = i; } } else { i = ck_lead(type, rcd_st->d.r, rcd_ck, hash_st, rcd2_st); } if (i < 0) EXPIRE_BAIL(); if (!i) { ++expired_cks; new.fgs_num_cks = (DB_NUM_CKS(&new) | DB_RCD_FG_TRIM); continue; } } /* We did not delete this checksum and so * it might be fuzzy enough to control whether * the entire record should be expired */ if (obs_lvl <= db_ck_fuzziness[type]) { expire_rcd = 0; split_ok = 1; /* If this is the fuzziest checksum we * have seen, then preceding and so * less fuzzy checksums are unneeded * if they are old. * Assume that checksums are ordered * in the record by fuzziness. */ if (obs_lvl < db_ck_fuzziness[type]) { obs_lvl = db_ck_fuzziness[type]; if (obs_lvl != DCC_CK_FUZ_LVL_REP && !grey_on) trim_old_fuz(&new, &new_ck); } } } /* Keep this checksum if we decide the whole report * is needed, and unless we trim it in favor of a * later checksum. */ *new_ck = *rcd_ck; new_ck->prev = DB_PTR_CP(DB_PTR_BAD); ++new_ck; ++new.fgs_num_cks; } /* Occassionally let the daemon work with the old file. * We are copying from the old database to the new dcc_db file. * If we are repairing, we reading the old dcc_db file with * read(); otherwise we have opened the database with * db_open(DB_OPEN_RDONLY) and are using mmap(). */ if (need_unlock >= UNLOCK_INTERVAL_USECS) { need_unlock = 0; if (!standalone && !db_unlock()) EXPIRE_BAIL(); } /* if none of its checksums are needed, * then discard the entire record */ if (expire_rcd) { expired_cks += DB_NUM_CKS(&new); ++expired_rcds; continue; } new_num_cks = DB_NUM_CKS(&new); kept_cks += new_num_cks - nokeep_num_cks; /* Put the new record into the new file. * * If all of the record is recent, if it contains 1 checksum, * or if all of its totals are the same, then simply add it. * * Otherwise, split it into records of identical counts * to allow compression or combining with other records. */ if (new_num_cks > 1 && (split_ok || DB_RCD_ID(&new) == DCC_ID_COMP || DB_RCD_TRIMMED(&new))) { for (;;) { /* skip the checksums that have the same total * as the first checksum to leave them with the * original new report */ new_ck = new.cks; ck_tgts = DB_TGTS_CK(new_ck); for (i = 1; i < new_num_cks; ++i) { ++new_ck; if (DB_TGTS_CK(new_ck) != ck_tgts) break; } if (new_num_cks <= i) break; new_num_cks -= i; /* write the checksums with the common total */ new.srvr_id = DCC_ID_COMP; new.fgs_num_cks = i; write_new_rcd(&new, sizeof(new) - sizeof(new.cks) + i*sizeof(new.cks[0])); /* handle the remaining checksums */ new.fgs_num_cks = new_num_cks; memmove(&new.cks[0], &new.cks[i], new_num_cks*sizeof(new.cks[0])); } } /* write the rest (or all) of the new record */ write_new_rcd(&new, sizeof(new) - sizeof(new.cks) + new_num_cks*sizeof(new.cks[0])); } emptied = cleardb; copy_failed:; /* Act as if we copied the entire file on failure. */ old_db_pos = old_db_csize; if (clean_mode != REPAIR_MODE) { free_db_st(rcd_st); free_db_st(hash_st); free_db_st(rcd2_st); } /* notice if there are no summarizable or floodable reports */ if (min_delay_pos == DB_PTR_NULL) min_delay_pos = new_db_csize; if (min_confirm_pos == DB_PTR_NULL) min_confirm_pos = new_db_csize; /* do final adjustment of the flooding positions */ adj_mmap(); /* force flooding positions to be right if the system crashed with * the flod.map file on the disk more up to date and so after the * database file on the disk */ for (mp = new_flod_mmaps.mmaps; mp <= LAST(new_flod_mmaps.mmaps); ++mp) { if (mp->rem_hostname[0] != '\0') { if (mp->confirm_pos > new_db_csize) mp->confirm_pos = new_db_csize; else if (mp->confirm_pos < min_confirm_pos) mp->confirm_pos = min_confirm_pos; } } if (new_flod_mmaps.delay_pos < min_delay_pos || new_flod_mmaps.delay_pos > new_db_csize) new_flod_mmaps.delay_pos = min_delay_pos; /* We are finished with the old file. */ i = db_close(DB_CLOSE); write_new_hdr(emptied); report_progress(1, " processed", "MBytes", old_db_pos, old_db_csize, 1024*1024); if (grey_on) quiet_trace_msg("expired %d records and %d checksums in %s", expired_rcds, expired_cks, db_paths.base.c); else quiet_trace_msg("expired %d records and %d checksums," " obsoleted %d checksums in %s", expired_rcds, expired_cks, obs_rcds, db_paths.base.c); return i; } static void copy_v5_ex_secs(DB_EX_SECS ex_secs, const DB_V5_EX_SECS v5_ex_secs) { int i; for (i = 0; i < DCC_DIM_CKS; ++i) { ex_secs[i].all = v5_ex_secs[i].all; ex_secs[i].spam = v5_ex_secs[i].spam; } } /* copy the database copy while doing minimal expiring */ static u_char copy_db(void) { #ifdef DB_VERSION5_STR static DB_VERSION_BUF old_version5 = DB_VERSION5_STR; #endif #ifdef DB_VERSION4_STR static DB_VERSION_BUF old_version4 = DB_VERSION4_STR; #endif #ifdef DB_VERSION3_STR static DB_VERSION_BUF old_version3 = DB_VERSION3_STR; #endif union { DB_HDR hdr; #ifdef DB_VERSION5_STR DB_V5_PARMS v5; #endif #ifdef DB_VERSION4_STR DB_V4_PARMS v4; #endif #ifdef DB_VERSION3_STR DB_V3_PARMS v3; #endif } old_db; struct timeval sn; /* do not lock the old database because the daemon must continue * to answer requests */ if (old_db_fd < 0) { old_db_fd = open(db_paths.base.c, O_RDONLY, 0); if (old_db_fd == -1) dcc_logbad(EX_IOERR, "open(%s): %s", db_paths.prev_msg, ERROR_STR()); } if (!read_db_hdr(&dcc_emsg, &old_db.hdr, old_db_fd, db_paths.prev_msg)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); if (!memcmp(old_db.hdr.p.version, db_version_buf, sizeof(old_db.hdr.p.version))) { prev_db_parms = old_db.hdr.p; #ifdef DB_VERSION5_STR } else if (!memcmp(old_db.v5.version, old_version5, sizeof(old_db.v5.version))) { memset(&prev_db_parms, 0, sizeof(prev_db_parms)); memcpy(prev_db_parms.version, db_version_buf, sizeof(prev_db_parms.version)); prev_db_parms.db_csize = old_db.v5.db_csize; prev_db_parms.blksize = old_db.v5.blksize; prev_db_parms.sn = old_db.v5.sn; prev_db_parms.cleared = old_db.v5.cleared; prev_db_parms.cleaned = old_db.v5.cleaned; prev_db_parms.cleaned_cron = old_db.v5.cleaned_cron; memcpy(prev_db_parms.ex_spam, old_db.v5.ex_spam, sizeof(prev_db_parms.ex_spam)); memcpy(prev_db_parms.ex_all, old_db.v5.ex_spam, sizeof(prev_db_parms.ex_all)); copy_v5_ex_secs(prev_db_parms.ex_secs, old_db.v5.ex_secs); prev_db_parms.nokeep_cks = old_db.v5.nokeep_cks; prev_db_parms.flags = old_db.v5.flags; prev_db_parms.old_db_csize = old_db.v5.old_db_csize; prev_db_parms.db_added = old_db.v5.db_added; prev_db_parms.hash_used = old_db.v5.hash_used; prev_db_parms.old_hash_used = old_db.v5.old_hash_used; prev_db_parms.hash_added = old_db.v5.hash_added; prev_db_parms.rate_secs = old_db.v5.rate_secs; prev_db_parms.last_rate_sec = old_db.v5.last_rate_sec; prev_db_parms.old_kept_cks = old_db.v5.old_kept_cks; prev_db_parms.min_confirm_pos = old_db.v5.min_confirm_pos; prev_db_parms.failsafe_cleanings = old_db.v5.failsafe_cleanings; #endif #ifdef DB_VERSION4_STR } else if (!memcmp(old_db.v4.version, old_version4, sizeof(old_db.v4.version))) { memset(&prev_db_parms, 0, sizeof(prev_db_parms)); memcpy(prev_db_parms.version, db_version_buf, sizeof(prev_db_parms.version)); prev_db_parms.db_csize = old_db.v4.db_csize; prev_db_parms.blksize = old_db.v4.blksize; prev_db_parms.sn = old_db.v4.sn; prev_db_parms.cleared = old_db.v4.cleared; prev_db_parms.cleaned = old_db.v4.cleaned; prev_db_parms.cleaned_cron = old_db.v4.cleaned_cron; memcpy(prev_db_parms.ex_spam, old_db.v4.ex_spam, sizeof(prev_db_parms.ex_spam)); memcpy(prev_db_parms.ex_all, old_db.v4.ex_spam, sizeof(prev_db_parms.ex_all)); copy_v5_ex_secs(prev_db_parms.ex_secs, old_db.v4.ex_secs); prev_db_parms.nokeep_cks = old_db.v4.nokeep_cks; prev_db_parms.flags = old_db.v4.flags; prev_db_parms.old_db_csize = old_db.v4.old_db_csize; prev_db_parms.db_added = old_db.v4.db_added; prev_db_parms.hash_used = old_db.v4.hash_used; prev_db_parms.old_hash_used = old_db.v4.old_hash_used; prev_db_parms.hash_added = old_db.v4.hash_added; prev_db_parms.rate_secs = old_db.v4.rate_secs; prev_db_parms.last_rate_sec = old_db.v4.last_rate_sec; prev_db_parms.old_kept_cks = old_db.v4.old_kept_cks; #endif #ifdef DB_VERSION3_STR } else if (!memcmp(old_db.v3.version, old_version3, sizeof(old_db.v3.version))) { memset(&prev_db_parms, 0, sizeof(prev_db_parms)); memcpy(prev_db_parms.version, db_version_buf, sizeof(prev_db_parms.version)); prev_db_parms.db_csize = old_db.v3.db_csize; prev_db_parms.blksize = old_db.v3.blksize; prev_db_parms.sn = old_db.v3.sn; memcpy(prev_db_parms.ex_spam, old_db.v3.ex_spam, sizeof(prev_db_parms.ex_spam)); copy_v5_ex_secs(prev_db_parms.ex_secs, old_db.v3.ex_secs); prev_db_parms.nokeep_cks = old_db.v3.nokeep_cks; if (old_db.v3.flags & DB_PARM_V3_FG_GREY) prev_db_parms.flags |= DB_PARM_FG_GREY; if (old_db.v3.flags & DB_PARM_V3_FG_NEED_RWD) prev_db_parms.flags |= DB_PARM_FG_NEED_RWD; prev_db_parms.old_db_csize = old_db.v3.old_db_csize; prev_db_parms.db_added = old_db.v3.db_added; prev_db_parms.hash_used = old_db.v3.hash_used; prev_db_parms.old_hash_used = old_db.v3.old_hash_used; prev_db_parms.hash_added = old_db.v3.hash_added; prev_db_parms.rate_secs = old_db.v3.rate_secs; prev_db_parms.last_rate_sec = old_db.v3.last_rate_sec; prev_db_parms.old_kept_cks = old_db.v3.old_kept_cks; ts2timeval(&sn, &prev_db_parms.sn); prev_db_parms.cleared = sn.tv_sec; prev_db_parms.cleaned = sn.tv_sec; if (old_db.v3.flags & DB_PARM_V3_FG_SELF_CLEAN2) { prev_db_parms.cleared -= 2*24*60*60; prev_db_parms.cleaned -= 24*60*60; } #endif } else { dcc_logbad(EX_IOERR, "%s has the wrong magic \"%.*s\"", db_paths.prev_msg, ISZ(DB_VERSION_BUF), old_db.hdr.p.version); } db_parms.sn = prev_db_parms.sn; db_parms.cleared = prev_db_parms.cleared; db_parms.cleaned = prev_db_parms.cleaned; db_parms.cleaned_cron = prev_db_parms.cleaned_cron; memcpy(db_parms.ex_all, prev_db_parms.ex_all, sizeof(db_parms.ex_all)); memcpy(db_parms.ex_spam, prev_db_parms.ex_spam, sizeof(db_parms.ex_spam)); memcpy(&db_parms.ex_secs, &prev_db_parms.ex_secs, sizeof(db_parms.ex_secs)); db_parms.nokeep_cks = prev_db_parms.nokeep_cks; db_parms.flags = prev_db_parms.flags; set_db_tholds(db_parms.nokeep_cks); return expire(prev_db_parms.db_csize); } /* Copy any records from the old file to the new file that were * added to the old file while we were creating the new file. */ static u_char catchup(DCC_EMSG *emsg) { DB_HDR old_db_hdr; DB_RCD rcd; int rcd_len; u_char result; int count, old_count; /* Because dccd knows dbclean is running, dccd will have been * keeping its header block more accurate than usual. */ result = 1; count = 0; do { old_count = count; if (!read_db_hdr(&dcc_emsg, &old_db_hdr, old_db_fd, db_paths.prev.c)) { emsg = 0; result = 0; break; } if (old_db_hdr.p.db_csize < old_db_pos) { dcc_error_msg("%s mysteriously truncated", db_paths.prev.c); result = 0; break; } if ((off_t)old_db_pos != lseek(old_db_fd, old_db_pos, SEEK_SET)) { dcc_pemsg(EX_IOERR, emsg, "lseek(%s, "L_HxPAT"): %s", db_paths.prev.c, old_db_pos, ERROR_STR()); emsg = 0; result = 0; break; } read_rcd_invalidate(0); while (old_db_pos < old_db_hdr.p.db_csize) { rcd_len = read_rcd(emsg, &rcd, old_db_fd, old_db_pos, db_paths.prev.c); if (rcd_len <= 0) { if (rcd_len == 0) dcc_pemsg(EX_IOERR, emsg, "premature EOF in %s" " at "L_HxPAT " instead of "L_HxPAT, db_paths.prev.c, old_db_pos, old_db_hdr.p.db_csize); emsg = 0; result = 0; break; } /* If something bad happens, we may not be able to * go back to the old file. Carry on to get as much * data as we can although we know the dccd daemon * may croak when we release it */ if (!db_add_rcd(emsg, &rcd, 0)) { emsg = 0; result = 0; break; } old_db_pos += rcd_len; ++count; } } while (result && old_count != count); if (count > 0 && db_debug >= 1) quiet_trace_msg("copied %d late reports%s", count, result ? "" : " with problems"); if (!dbclean_flush(emsg)) result = 0; return result; } /* try to compress an old report with a predecessor */ static void compress_old(DB_ST *rcd_st, DB_ST *rcd2_st) { DB_PTR prev; DB_RCD *cur_rcd, *prev_rcd; DB_RCD_CK *cur_ck, *prev_ck; int cur_ck_num, prev_ck_num; DCC_TGTS cur_tgts, prev_tgts; DCC_CK_TYPES cur_type, prev_type; time_t cur_secs, prev_secs; int retries = 0; cur_rcd = rcd_st->d.r; /* can't compress with whitelisting, reputation adjustment * or other special values */ cur_tgts = DB_TGTS_RCD_RAW(cur_rcd); if (cur_tgts > DCC_TGTS_TOO_MANY) return; cur_secs = ts2secs(&cur_rcd->ts); /* Before spending the time to map a preceding checksum, * find at least one checksum worth keeping and that might * be combined or compressed with its predecessor. */ prev = DCC_CK_INVALID; for (cur_ck_num = DB_NUM_CKS(cur_rcd), cur_ck = cur_rcd->cks; cur_ck_num != 0; --cur_ck_num, ++cur_ck) { if (DB_CK_JUNK(cur_ck)) continue; cur_type = DB_CK_TYPE(cur_ck); /* cannot compressed server-ID assertions because that * changes the server-ID */ if (cur_type == DCC_CK_SRVR_ID) return; if (DB_TEST_NOKEEP(db_parms.nokeep_cks, cur_type)) continue; /* all of the checksums in the current record must be old */ if (cur_secs >= new_all_secs[cur_type]) return; /* note the first, probably least fuzzy candidate */ if (prev == DB_PTR_NULL) prev = DB_PTR_EX(cur_ck->prev); } again:; if (prev == DB_PTR_NULL) return; /* Check that the current and previous records are old * and contain the same useful checksums. */ if (!db_map_rcd(&dcc_emsg, rcd2_st, prev, 0)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); prev_rcd = rcd2_st->d.r; prev_secs = 0; prev_ck_num = DB_NUM_CKS(prev_rcd); prev_ck = prev_rcd->cks; cur_ck_num = DB_NUM_CKS(cur_rcd); cur_ck = cur_rcd->cks; for (;;) { /* we must run out of checksums in the two reports at the * same time */ if (prev_ck_num == 0 || cur_ck_num == 0) { if (prev_ck_num == cur_ck_num) break; return; } /* ignore paths and other junk */ if (DB_CK_JUNK(prev_ck)) { --prev_ck_num; ++prev_ck; continue; } prev_type = DB_CK_TYPE(prev_ck); if (DB_TEST_NOKEEP(db_parms.nokeep_cks, prev_type)) { --prev_ck_num; ++prev_ck; continue; } if (DB_CK_JUNK(cur_ck)) { --cur_ck_num; ++cur_ck; continue; } cur_type = DB_CK_TYPE(cur_ck); if (DB_TEST_NOKEEP(db_parms.nokeep_cks, cur_type)) { --cur_ck_num; ++cur_ck; continue; } /* because the checksums are ordered, we know to * give up at the first mismatch */ if (cur_type != prev_type || memcmp(&cur_ck->sum, &prev_ck->sum, sizeof(cur_ck->sum))) return; if (prev_secs == 0) prev_secs = ts2secs(&prev_rcd->ts); if (prev_secs >= new_all_secs[cur_type]) { /* This previous record is new enough to be valuable * and so the current record is out of order. * It must have been delayed among the floods. * Try to compress it with a preceding record. */ if (++retries > 4) return; prev = DB_PTR_EX(prev_ck->prev); goto again; } /* Expirations change reputations, so do not compress * reputation reports from different eras. * Check only the first of the 1 or 2 reputation checksums * in the two records, because they share the timestamps of * their records. * Start 1-week eras at midnight Sunday in phase with the * weekly variation in spam. */ if (DCC_CK_IS_REP(grey_on, cur_type) && ((cur_secs + DCC_EPOCH_WEEK) / (7*24*60*60) != (prev_secs + DCC_EPOCH_WEEK) / (7*24*60*60))) { if (cur_secs > prev_secs) return; /* Start again with preceding record if * this pair is out of order and cannot * be compressed. This can be necessary around * midnight at the ends of eras. */ if (++retries > 4) return; prev = DB_PTR_EX(prev_ck->prev); goto again; } --prev_ck_num; ++prev_ck; --cur_ck_num; ++cur_ck; } /* The current and previous records are compatible. * Add the count of the previous record to the current record * and mark the previous record useless. * The individual totals in the current record are already correct, * so postpone worrying about the deleted record. */ if (cur_tgts < DCC_TGTS_TOO_MANY) { prev_tgts = DB_TGTS_RCD(prev_rcd); /* can't compress with whitelisting, reputation adjustment * or other special values */ if (prev_tgts > DCC_TGTS_TOO_MANY || prev_tgts == 0) return; if (prev_tgts == DCC_TGTS_TOO_MANY) { cur_tgts = DCC_TGTS_TOO_MANY; } else { cur_tgts += prev_tgts; if (cur_tgts > DCC_TGTS_TOO_MANY) cur_tgts = DCC_TGTS_TOO_MANY; } DB_TGTS_RCD_SET(cur_rcd, cur_tgts); } /* Mark the previous record to be deleted next time. */ DB_TGTS_RCD_SET(prev_rcd, 0); DIRTY_RCD(rcd2_st, 0); cur_rcd->srvr_id = DCC_ID_COMP; cur_rcd->fgs_num_cks = DB_NUM_CKS(cur_rcd); /* use the newest timestamp */ if (ts_older_ts(&cur_rcd->ts, &prev_rcd->ts)) cur_rcd->ts = prev_rcd->ts; DIRTY_RCD(rcd_st, 0); ++comp_rcds; } /* write a parsed whitelist checksum * This does not detect duplicate entries */ static int white_write(DCC_EMSG *emsg, WF *wf, DCC_CK_TYPES type, DCC_SUM *sum, DCC_TGTS tgts) { DB_RCD rcd; int rcd_len; char buf[30]; DCC_FNM_LNO_BUF fnm_buf; /* ignore checksums that clients are never supposed to send * to the server or for some other reason cannot be whitelisted */ switch (type) { case DCC_CK_INVALID: case DCC_CK_ENV_TO: case DCC_CK_G_MSG_R_TOTAL: case DCC_CK_G_TRIPLE_R_BULK: case DCC_CK_SRVR_ID: dcc_pemsg(EX_DATAERR, emsg, "%s checksum cannot be used%s", type2str_err(type, 0, 0, grey_on), wf_fnm_lno(&fnm_buf, wf)); return 0; case DCC_CK_IP: case DCC_CK_ENV_FROM: case DCC_CK_FROM: case DCC_CK_MESSAGE_ID: case DCC_CK_RECEIVED: case DCC_CK_SUB: case DCC_CK_BODY: case DCC_CK_FUZ1: case DCC_CK_FUZ2: break; /* these are ok */ } if (tgts == DCC_TGTS_OK_MX || tgts == DCC_TGTS_OK_MXDCC) { tgts = DCC_TGTS_OK; } else if (tgts == DCC_TGTS_SUBMIT_CLIENT) { if (db_debug > 1) quiet_trace_msg("\"%s\" ignored%s", tgts2str(buf, sizeof(buf), tgts, 0), wf_fnm_lno(&fnm_buf, wf)); return 1; } /* Greylist whitelist entries cannot involve blacklisting. * They use DCC_TGTS_GREY_WHITE to signal whitelisting */ if (grey_on) { /* ignore anything except whitelisting */ if (tgts != DCC_TGTS_OK) { dcc_pemsg(EX_DATAERR, emsg, "\"%s\" ignored%s", tgts2str(buf, sizeof(buf), tgts, 0), wf_fnm_lno(&fnm_buf, wf)); return 0; } tgts = DCC_TGTS_GREY_WHITE; } memset(&rcd, 0, sizeof(rcd)); timeval2ts(&rcd.ts, &clean_start, 0); rcd.srvr_id = DCC_ID_WHITE; DB_TGTS_RCD_SET(&rcd, tgts); rcd.cks[0].type_fgs = DCC_CK_FLOD_PATH; memcpy(rcd.cks[0].sum.b, &wf->lno, sizeof(wf->lno)); rcd.cks[0].sum.b[sizeof(wf->lno)] = wf->fno; rcd.cks[1].type_fgs = type; rcd.cks[1].sum = *sum; rcd_len = sizeof(rcd) - sizeof(rcd.cks) + 2*sizeof(rcd.cks[0]); rcd.fgs_num_cks = 2; write_new_rcd(&rcd, rcd_len); ++white_cks; return 1; } #define MAX_IP_RANGE_LEN (1<<16) /* fix dcc.man if this changes */ static int /* 1=ok, 0=bad entry, -1=fatal */ white_range(DCC_EMSG *emsg, WF *wf, const DCC_IP_RANGE *range, DCC_TGTS tgts) { u_int range_len; struct in6_addr addr; DCC_SUM sum; DCC_FNM_LNO_BUF fnm_buf; int result; /* Allow only class-B sized blocks of addresses, * because server whitelist entries for an address block * require one checksum per IP address in the block. * A line in a server whitelist file specifying a * class-B or MAX_IP_RANGE_LEN address block requires adding * 65,536 checksums to the server database. * Instead, use client whiteclnt block entries. */ range_len = len_ip_range(range); if (range_len > MAX_IP_RANGE_LEN) { dcc_pemsg(EX_NOHOST, emsg, "address block too large%s", wf_fnm_lno(&fnm_buf, wf)); return 0; } result = 0; addr = range->lo; while (range_len-- != 0) { ipv6tock(&sum, &addr); result = white_write(emsg, wf, DCC_CK_IP, &sum, tgts); if (result <= 0) return result; inc_ip6(&addr); } return 1; } /* Add the whitelist of certified non-spam and non-spammers * and otherwise start the database */ static void parse_white(void) { DCC_CK_TYPES type; int white_fd; white_cks = 0; if (!keep_white) { memset(&dbclean_white_tbl, 0,sizeof(dbclean_white_tbl)); for (type = 0; type <= DCC_CK_TYPE_LAST; ++type) dbclean_white_tbl.hdr.tholds_rej.t[type] = THOLD_UNSET; wf_init(&dbclean_wf, 0); dcc_fnm2rel_good(&dbclean_wf.ascii_nm, WHITELIST_NM(grey_on), 0); dbclean_wf.wtbl = &dbclean_white_tbl; white_fd = open(dbclean_wf.ascii_nm.c, O_RDONLY, 0); if (white_fd < 0) { /* worry only if the file exists but can't be used */ if (errno != ENOENT) { dcc_error_msg("open(%s): %s", dbclean_wf.ascii_nm.c, ERROR_STR()); keep_white = 1; } } else { if (0 > parse_whitefile(0, &dbclean_wf, white_fd, white_write, white_range)) keep_white = 1; if (0 > close(white_fd)) dcc_error_msg("close(%s): %s", dbclean_wf.ascii_nm.c, ERROR_STR()); } } if (keep_white) { /* If the whitelist was bad, purge the new database of * the bad new whitelist. We will use the existing * whitelist */ write_new_flush(); new_db_csize = DB_PTR_BASE; if (0 > ftruncate(new_db_fd, DB_PTR_BASE)) dcc_logbad(EX_IOERR, "truncate(%s,%d): %s", db_paths.new.c, DB_PTR_BASE, ERROR_STR()); new_db_fsize = DB_PTR_BASE; white_cks = 0; } /* update the counts in the database file */ write_new_hdr(1); } /* check for conflicts in the whitelist file */ static void check_white(const DB_RCD *rcd, DB_ST *rcd2_st) { static int msgs; static int prev_lno1, prev_lno2; static int prev_fno1, prev_fno2; const DB_RCD_CK *rcd_ck, *prev_ck; int lno1, lno2; int fno1, fno2; DCC_TGTS tgts1, tgts2; char tgts1_buf[30], tgts2_buf[30]; const char *fname1, *fname2; DCC_CK_TYPES type; DB_PTR prev; /* don't check if we have already complained enough */ if (msgs > 20) return; rcd_ck = rcd->cks; /* it is pointless without line numbers, which are lacking only * if we saved the old whitelist entries because the file is * broken */ if (DB_NUM_CKS(rcd) != 2 || DB_CK_TYPE(rcd_ck) != DCC_CK_FLOD_PATH) return; /* conflict is impossible with a single line */ ++rcd_ck; prev = DB_PTR_EX(rcd_ck->prev); if (prev == DB_PTR_NULL) return; type = DB_CK_TYPE(rcd_ck); prev_ck = db_map_rcd_ck(&dcc_emsg, rcd2_st, prev, type); if (!prev_ck) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); tgts1 = DB_TGTS_RCD(rcd2_st->d.r); tgts2 = DB_TGTS_RCD(rcd); if (tgts1 == tgts2) return; /* no conflict */ memcpy(&lno1, rcd2_st->d.r->cks[0].sum.b, sizeof(lno1)); fno1 = rcd2_st->d.r->cks[0].sum.b[sizeof(lno1)]; memcpy(&lno2, rcd->cks[0].sum.b, sizeof(lno2)); fno2 = rcd->cks[0].sum.b[sizeof(lno2)]; if (lno1 == prev_lno1 && fno1 == prev_fno1 && lno2 == prev_lno2 && fno2 == prev_fno2) return; fname1 = wf_fnm(&dbclean_wf, fno1); fname2 = wf_fnm(&dbclean_wf, fno2); if (fname1 == fname2) { fname1 = ""; } else { fname1 = dcc_path2fnm(fname1); } dcc_error_msg("\"%s\" in line %d%s%s conflicts with \"%s\"" " in line %d of %s", tgts2str(tgts1_buf, sizeof(tgts1_buf), tgts1, grey_on), lno1, *fname1 != '\0' ? " of " : "", fname1, tgts2str(tgts2_buf, sizeof(tgts2_buf), tgts2, grey_on), lno2, fname2); ++msgs; prev_lno1 = lno1; prev_fno1 = fno1; prev_lno2 = lno2; prev_fno2 = fno2; } /* rebuild the hash table and the totals and links within the database file * finish with the file locked */ static void build_hash(void) { DB_ST *rcd_st, *rcd2_st; DB_PTR rcd_pos; int rcd_len; int rcd_cks, rcd_sums; DB_PTR rcds, sums; const DB_RCD_CK *rcd_ck; DB_HADDR guess_hash_len; double db_rate, hash_ratio; if (!db_buf_init(0, new_db_blksize)) dcc_logbad(EX_SOFTWARE, "bad new blksize"); if (new_hash_len == 0) { /* Try to choose a hash table size now so that when it * is next time to rebuild after 24 hours of incoming * checksums, the alpha or load factor will still be 0.9. * We probably ran 24 hours ago, so the old hash size * is an estimate of the size tomorrow. * * Guess the number of distinct checksums added * tomorrow based on the current average rate */ db_rate = db_add_rate(&new_db_parms, 1, 0); if (db_rate > 0.0) { /* Increase the average rate by 10% to account * for the 30% decrease often seen on weekends. */ guess_hash_len = db_rate * 1.1 * 24*60*60; /* predict # of distinct checksums in current data */ hash_ratio = prev_db_parms.old_kept_cks; if (hash_ratio == 0.0) { hash_ratio = 1.0; } else { hash_ratio = (ADJ_HLEN(prev_db_parms .old_hash_used) / hash_ratio); if (hash_ratio > 1.0 || hash_ratio < 0.3) hash_ratio = 1.0; } guess_hash_len += (kept_cks * hash_ratio) + white_cks; if (db_debug) quiet_trace_msg("old hash size %d" " predict %d from db_rate %.1f" " hash_ratio %.1f=%d/%d" " kept %d white %d", prev_db_hash_used, guess_hash_len, db_rate, hash_ratio, ADJ_HLEN(prev_db_parms .old_hash_used), prev_db_parms.old_kept_cks, kept_cks, white_cks); } else { /* guess if we do not have a good measure * of the recent rate */ guess_hash_len = kept_cks+white_cks; guess_hash_len += guess_hash_len/5; } /* Double the hash size if it was exhausted */ new_hash_len = prev_db_hash_used; if (new_hash_len <= prev_db_parms.old_hash_used) new_hash_len = 2 * prev_db_parms.old_hash_used; if (new_hash_len < guess_hash_len) new_hash_len = guess_hash_len; /* load factor 0.9 */ new_hash_len += new_hash_len/10 + MIN_HASH_ENTRIES; if (new_hash_len > db_max_hash_entries) { quiet_trace_msg("default hash size %d" " > maximum %d entries", new_hash_len, db_max_hash_entries); new_hash_len = db_max_hash_entries; } if (new_hash_len < MIN_HASH_ENTRIES) new_hash_len = MIN_HASH_ENTRIES; } /* Open and lock the new database */ unlink_whine(0, hash_paths.new.c, 1); new_hash_created = 1; if (!db_open(&dcc_emsg, -1, db_paths.new.c, hash_paths.new.c, new_hash_len, DB_OPEN_LOCK_NOWAIT | db_mode)) { dcc_error_msg("%s", dcc_emsg.c); dcc_logbad(emsg_ex_code(&dcc_emsg), "could not start database %s", db_paths.new.c); } /* guess which checksums we will keep so that we can count them */ if (prev_db_parms.nokeep_cks != 0) db_parms.nokeep_cks = prev_db_parms.nokeep_cks; /* add every record in the database file to the hash table and * fix its accumulated counts and reverse links */ comp_rcds = 0; sums = 0; rcds = 0; report_progress_init(); rcd_st = GET_DB_ST(); rcd2_st = GET_DB_ST(); for (rcd_pos = DB_PTR_BASE; rcd_pos < db_csize; rcd_pos += rcd_len) { if (--progress_rpt_cnt <= 0) { report_progress(0, " hash rebuilt", "checksums", sums, kept_cks, 1); } if (!db_map_rcd(&dcc_emsg, rcd_st, rcd_pos, &rcd_len)) { dcc_error_msg("%s", dcc_emsg.c); dcc_logbad(emsg_ex_code(&dcc_emsg), "hash build failed reading" " record at "L_HxPAT, rcd_pos); } /* skip end of block padding */ if (rcd_st->d.r->fgs_num_cks == 0) continue; ++rcds; /* count the checksums we'll link in this record */ rcd_cks = DB_NUM_CKS(rcd_st->d.r); rcd_sums = 0; for (rcd_ck = rcd_st->d.r->cks; rcd_ck < &rcd_st->d.r->cks[rcd_cks]; ++rcd_ck) { if (!DB_TEST_NOKEEP(db_parms.nokeep_cks, DB_CK_TYPE(rcd_ck))) ++rcd_sums; } sums += rcd_sums; if (!db_link_rcd(&dcc_emsg, rcd_st)) { dcc_logbad(emsg_ex_code(&dcc_emsg), "relinking record at "L_HxPAT": %s", rcd_pos, dcc_emsg.c); } DIRTY_RCD(rcd_st, 0); /* check for conflicts in the whitelist file */ if (DB_RCD_ID(rcd_st->d.r) == DCC_ID_WHITE) check_white(rcd_st->d.r, rcd2_st); else compress_old(rcd_st, rcd2_st); } free_db_st(rcd_st); free_db_st(rcd2_st); report_progress(1, " hash rebuilt", "checksums", sums, kept_cks, 1); db_parms.old_hash_used = db_hash_used; db_parms.old_kept_cks = kept_cks; db_parms.hash_used = db_hash_used; db_parms.old_db_csize = db_csize; if (!db_flush_parms(&dcc_emsg)) dcc_logbad(emsg_ex_code(&dcc_emsg), "%s", dcc_emsg.c); quiet_trace_msg("hashed "L_DPAT" records containing "L_DPAT" checksums," " compressed %d records", rcds, sums, comp_rcds); quiet_trace_msg("%d hash entries total, %d or %d%% used", ADJ_HLEN(db_hash_len), ADJ_HLEN(db_hash_used), (int)((ADJ_HLEN(db_hash_used)*100.0) / ADJ_HLEN(db_hash_len))); } /* use a large buffer to encourage the file system to avoid fragmentation */ static union { u_char c[DB_MIN_MIN_MBYTE*1024*1024]; DB_HDR hdr; } write_new_db_buf; static u_int write_new_db_buflen = 0; static DB_PTR write_new_base; static void write_new_flush(void) { int i; if (write_new_db_buflen != 0) { if (write_new_base != (DB_PTR)lseek(new_db_fd, write_new_base, SEEK_SET)) { dcc_logbad(EX_IOERR, "lseek(%s, 0): %s", db_paths.new.c, ERROR_STR()); } i = write(new_db_fd, &write_new_db_buf, write_new_db_buflen); if ((DB_PTR)i != write_new_db_buflen) { if (i < 0) dcc_logbad(EX_IOERR, "write(%s): %s", db_paths.new.c, ERROR_STR()); else dcc_logbad(EX_IOERR, "write(%s)=%d instead of %d", db_paths.new.c, i, write_new_db_buflen); } if (new_db_fsize < write_new_base+(DB_PTR)i) new_db_fsize = write_new_base+i; } write_new_base = new_db_csize; write_new_db_buflen = 0; } static void write_new_buf(const void *buf, int buflen) { if (write_new_db_buflen + buflen > ISZ(write_new_db_buf)) write_new_flush(); memcpy(&write_new_db_buf.c[write_new_db_buflen], buf, buflen); write_new_db_buflen += buflen; } /* add a record to the new file */ static void write_new_rcd(const void *buf, int buflen) { DB_PTR new_blk_num; int pad, i; /* pad accross block boundaries */ new_blk_num = DB_PTR2BLK_NUM(new_db_csize + buflen, new_db_blksize); if (new_blk_num != DB_PTR2BLK_NUM(new_db_csize, new_db_blksize)) { pad = new_blk_num*new_db_blksize - new_db_csize; pad = (((pad + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN) * DB_RCD_HDR_LEN); do { i = sizeof(zero_rcd); if (i > pad) i = pad; write_new_buf(zero_rcd, i); pad -= i; new_db_csize += i; } while (pad != 0); } write_new_buf(buf, buflen); new_db_csize += buflen; } /* write the magic string at the head of the database file */ static void write_new_hdr(u_char emptied) { DB_HDR *new; time_t new_rate_secs; DCC_CK_TYPES type; int i; write_new_flush(); memset(&write_new_db_buf, 0, sizeof(write_new_db_buf)); write_new_base = 0; if (new_db_fsize > ISZ(DB_HDR) || new_db_blksize == 0) { write_new_db_buflen = sizeof(DB_HDR); } else { write_new_db_buflen = new_db_blksize; if (write_new_db_buflen > ISZ(write_new_db_buf)) write_new_db_buflen = ISZ(write_new_db_buf); } new = &write_new_db_buf.hdr; memset(new, 0, sizeof(*new)); memcpy(new->p.version, db_version_buf, sizeof(new->p.version)); timeval2ts(&new->p.sn, &clean_start, 0); if (emptied) { new->p.cleared = clean_start.tv_sec; } else { if (TIME_T(prev_db_parms.cleared) < clean_start.tv_sec && prev_db_parms.cleared >= 30*365*24*60*60) { /* after 2000 and before now */ new->p.cleared = prev_db_parms.cleared; } else { new->p.cleared = clean_start.tv_sec; new->p.flags |= DB_PARM_FG_NO_CLR; } switch (clean_mode) { case NORMAL_MODE: new->p.cleaned = clean_start.tv_sec; new->p.cleaned_cron = new->p.cleaned; new->p.failsafe_cleanings = 0; break; case FAILSAFE_MODE: new->p.cleaned = clean_start.tv_sec; new->p.cleaned_cron = prev_db_parms.cleaned_cron; new->p.failsafe_cleanings = 1+(prev_db_parms .failsafe_cleanings); break; case REPAIR_MODE: case QUICK_MODE: case HASH_MODE: case DEL_MODE: new->p.cleaned = prev_db_parms.cleaned; new->p.cleaned_cron = prev_db_parms.cleaned_cron; new->p.failsafe_cleanings = (prev_db_parms .failsafe_cleanings); break; } } if (grey_on) new->p.flags |= DB_PARM_FG_GREY; if (dbclean_ssd_mode) new->p.flags |= DB_PARM_FG_SSD; if (emptied || (prev_db_parms.flags & DB_PARM_FG_NEED_RWD)) new->p.flags |= DB_PARM_FG_NEED_RWD; if (have_expire_parms > 0 || (have_expire_parms < 0 && (prev_db_parms.flags & DB_PARM_FG_EXP_SET))) new->p.flags |= DB_PARM_FG_EXP_SET; if (prev_db_parms.flags & DB_PARM_FG_NO_CLR) new->p.flags |= DB_PARM_FG_NO_CLR; new->p.nokeep_cks = (emptied || prev_db_parms.nokeep_cks == 0 ? def_nokeep_cks() : prev_db_parms.nokeep_cks); new->p.blksize = new_db_blksize; new->p.db_csize = new_db_csize; /* update the traffic counts */ if (!emptied && prev_db_parms.db_csize != 0 && prev_db_parms.db_csize >= prev_db_parms.old_db_csize && prev_db_parms.hash_used != 0 && prev_db_parms.hash_used >= prev_db_parms.old_hash_used) { if (prev_db_parms.rate_secs > 0 && prev_db_parms.rate_secs <= DB_MAX_RATE_SECS) { new->p.db_added = prev_db_parms.db_added; new->p.hash_added = prev_db_parms.hash_added; new->p.rate_secs = prev_db_parms.rate_secs; } if (prev_db_parms.prev_rate_secs > 0 && prev_db_parms.prev_rate_secs <= DB_MAX_RATE_SECS) { new->p.prev_db_added = prev_db_parms.prev_db_added; new->p.prev_hash_added = prev_db_parms.prev_hash_added; new->p.prev_rate_secs = prev_db_parms.prev_rate_secs; } new->p.last_rate_sec = clean_start.tv_sec; new_rate_secs = clean_start.tv_sec - ts2secs(&prev_db_parms.sn); if (new_rate_secs > 0 && new_rate_secs <= DB_MAX_RATE_SECS) { new_rate_secs += new->p.rate_secs; new->p.db_added += (prev_db_parms.db_csize - prev_db_parms.old_db_csize); new->p.hash_added += (prev_db_parms.hash_used - prev_db_parms.old_hash_used); new->p.rate_secs = new_rate_secs; if (new_rate_secs >= DB_NEW_RATE_SECS) { new->p.prev_db_added = new->p.db_added; new->p.prev_hash_added = new->p.hash_added; new->p.prev_rate_secs = new->p.rate_secs; new->p.db_added = 0; new->p.hash_added = 0; new->p.rate_secs = 0; } } } for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { if (new_ex_secs[type].all != 0) { new->p.ex_secs[type].all = new_ex_secs[type].all; new->p.ex_secs[type].spam = new_ex_secs[type].spam; new->p.ex_all[type] = new_all_ts[type]; new->p.ex_spam[type] = new_spam_ts[type]; } else { new->p.ex_secs[type].all = def_expire_secs; new->p.ex_secs[type].spam = (DCC_CK_LONG_TERM(type) ? def_expire_spamsecs : def_expire_secs); } } new->p.min_confirm_pos = min_confirm_pos; new_db_parms = new->p; /* ensure that the last block of the file is complete */ for (;;) { write_new_flush(); if (new_db_blksize == 0) break; i = new_db_fsize % new_db_blksize; if (i == 0) break; write_new_db_buflen = new_db_blksize - i; if (write_new_db_buflen > ISZ(write_new_db_buf)) write_new_db_buflen = ISZ(write_new_db_buf); memset(&write_new_db_buf, 0, write_new_db_buflen); write_new_base = new_db_fsize; } } static void rename_bail(const char *from, const char *to) { if (0 > rename(from, to)) dcc_logbad(EX_IOERR, "rename(%s, %s): %s", from, to, ERROR_STR()); } /* try for a long time or until the server hears */ static u_char /* 1=ok, 0=failed */ persist_aop(DCC_AOPS aop, u_int32_t val1, int secs) /* try for this long */ { DCC_CLNT_FGS clnt_fgs; clnt_fgs = DCC_CLNT_FG_NO_FAIL; if (grey_on) clnt_fgs |= DCC_CLNT_FG_GREY; return dcc_aop_persist(&dcc_emsg, ctxt, clnt_fgs, db_debug != 0, aop, val1, secs, &aop_resp); } /* tell the daemon to switch to the new database */ static void dccd_new_db(const char *msg) { /* Send a round of NOPs and ask about status to ensure the server * has dealt with requests that arrived while we had the database * locked and otherwise caught up. We want to try to ensure that * the server is listening when we re-open the database so that * it does not leave flooding off. * On some systems with lame mmap() support including BSD/OS, the * the daemon can stall for minutes in close(). If that or something * else makes the daemon stall, this can appear to fail. */ if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_LIST, RESTART_DELAY)) dcc_error_msg("%s: %s; continuing", msg, dcc_emsg.c); dccd_unlocked = 0; if (!persist_aop(DCC_AOP_DB_NEW, 0, RESTART_DELAY)) { /* This cannot be a fatal error, * lest we leave the database broken */ dcc_error_msg("%s: %s; continuing", msg, dcc_emsg.c); } } static void finish(void) { DB_CLOSE_MODE bailing = DB_CLOSE; /* Delete created but not installed files. */ if (new_db_created) { unlink_whine(0, db_paths.new.c, 1); new_db_created = 0; bailing = DB_CLOSE_DISCARD_ALL; } /* We don't really know if a new hash file was created, * so don't worry about problems */ if (new_hash_created) { unlink_whine(0, hash_paths.new.c, 1); new_hash_created = 0; bailing = DB_CLOSE_DISCARD_ALL; } if (cur_db_created) { unlink_whine(0, db_paths.base.c, 1); unlink_whine(0, db_paths.next.c, 1); unlink_whine(0, hash_paths.base.c, 1); unlink_whine(0, hash_paths.next.c, 1); cur_db_created = 0; bailing = DB_CLOSE_DISCARD_ALL; } if (new_db_fd >= 0) { if (0 > close(new_db_fd)) dcc_error_msg("close(%s): %s", db_paths.new.c, ERROR_STR()); new_db_fd = -1; } if (old_db_fd >= 0) { if (0 > close(old_db_fd)) dcc_error_msg("close(%s): %s", db_paths.base.c, ERROR_STR()); old_db_fd = -1; } flod_unmap(0, 0); /* release the daemon, but if the database is still open, it's bad */ db_close(bailing); /* tell the daemon to switch databases */ if (dccd_unlocked) dccd_new_db("finish"); while (flods_off > 0) { --flods_off; if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_RESUME, RESTART_DELAY)) dcc_error_msg("%s", dcc_emsg.c); } unlock_dbclean(); } /* terminate with a signal */ static void DCC_NORET sigterm(int s) { dcc_error_msg("interrupted by signal %d", s); exit(s+EX_DCC_SIGNAL); }