/*
* pwalk.c Parrallel Walk a file system and report file meta data
Copyright (C) 2013 John F Dey
Copyright (C) 2013-2016 OSNEXUS Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* pwalk is inspired by du but designed to be used with large
* file systems ( > 10 million files )
*
* History: dir.c, walk.c, walkv2,3,4, pwalkfs.c
*
* Example of using the directory call, opendir and readdir to
* simulate ls.
*
* 1997.03.20 John Dey Although this is the first documented date for
* this file I have versions that date from 1988.
* 2002.09.04 John Dey walk the directory and gather stats
* 2002.09.06 John Dey make to look like du -a
* 2004.07.06 John Dey add -a and -k
* 2008.04.01 John Dey CSV output for database use
* 2009.04.12 John Dey v3.1
* replaced constants with "FILENAME_MAX",
* Size of directory is size of all files in directory plus itself
* Added printStat function
* print file count on line with direcories
* 2009.05.18 check for control charaters and double qutoes in file names;
* escape the double quotes and print bad file names to stderr
* 2009.12.30 size for dir should just be dir size; Fix; count returns 0
* for normal files and count of just the local directory; Previously count
* return the recursive file count for the whole tree.
*
2010.01.08 john dey; New field to output: file name extension.
Extension is defined as the last part of the name after a Dot "."
if no dot is found extension is empty ""
new feature: accepts multible dirctory names as cmd line argument
This line of code has been replaced
if ( f.st_mode & S_IFDIR && (f.st_mode & S_IFMT != S_IFLNK) ) {
With this new line of code:
if ( S_ISDIR(f.st_mode) ) { Or I could have done: if ( (f.st_mode & S_IFDIR) == S_IFDIR )
2010.01.11 John Dey
Complete re-write of walkv4 transforming it into pwalk.
pwalk is a threaded version of walkv4.
pwalk will call fileDir as a new thread until MAXTHRDS is reached.
2010.02.01 pwalk v1 did not detach nor did it join the theads; v2
fixes this short comming;
2010.03.24 john dey; New physical hardware is available to run pwalk.
16 threads are only using about 20% CPU with 10% IO wait. Based on this
the thread count will be doubled to 32.
2010.11.29 Add mutex for printStat
2012.10.09 --NoSnap flag added. ignore directories that have the name
.snapshot
2013.08.02 john f dey; Add GNU license, --version flag added
2013.10.15 steve umbehocker; added verbose --debug spam mode
added compare and copy capabilities
added retain= option to backup only recently modified files
added cusomizable maxthreads
added summary total display at end (disable with --nototals)
updated and expanded usage section
fixed segfault in walker
2013.10.16 steve umbehocker; added --log mode logging
added purge mode hooks
added dryrun/force flags
2013.10.21 steve umbehocker; added --status logging
added purge w/ retain
2013.10.22 steve umbehocker; added check so purge cannot compare against emty dir
added execute operation logic to do cp/unlink operations outside
of the mutex so that they're parallelized
fixed sync/copy to create empty leaf directories
2014.11.21 steve umbehocker; added fix for files with spaces and special chars
2015.06.12 steve umbehocker; added fix for --help and --version
2015.06.18 osnexus eng; merging dot fix from john
2015.07.07 osnexus eng; fix to skip invalid dirs without exiting
2015.07.07 osnexus eng; added --exclude= option
2015.07.07 osnexus eng; fix to add thread cleanup on err condition
2015.10.14 osnexus eng; adding .qsnaps to --nosnaps option
2016.10.24 osnexus eng; added check to copy only regular files
2016.10.24 osnexus eng; added fix to deal with filenames containing "$" chars
2016.10.24 osnexus eng; added fix to use raw path with stat() and escaped paths with shell commands
2016.10.24 osnexus eng; added stderr redirect to /dev/null for cp/rsync/mv commands
2016.10.24 osnexus eng; fixed rsync usage for copying directory permissions
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <utime.h>
#include <time.h>
#include <errno.h>
#include <pthread.h>
#include <unistd.h>
#undef THRD_DEBUG
static char *Version = "v4.1.1 - 10/26/2016 - (c) 2013 John F Dey and (c) 2013-2016 OSNEXUS - GPLv2 License";
static char *whoami = "pwalk";
//char *errFile = ">> pwalk.err 2>&1 ";
char *errFile = "> /dev/null 2>&1 ";
int EXCLUDE_DIRS = 0; /* if set ignore comma delimited specified directories like those called .snapshot */
int TARGETDIR = 0; /* if set copy the files to the target path */
int COMPAREDIR = 0; /* if set diff the files to the compare dir to determine where the holee are then purge */
int DEBUG_MODE = 0; /* verbose output */
int CMP_ATIME = 0; /* indiates that compare operations should look at atime */
int CMP_MTIME = 1;
int CMP_CTIME = 0;
int SHOWTOTALS = 1; /* show totals at the end via atexit() */
int DOTS = 0; /* print little dots to show progress */
int QUIET = 0; /* quiet gives the best backup performance */
int PURGE = 0;
int LOG = 0;
int NODIRPERMS = 0;
unsigned int DU_MODE = 0;
int STATUS = 0;
int RETENTION_DAYS = 60;
size_t MAXTHRDS = 32;
size_t RETAIN = 0;
size_t DRYRUN = 0;
size_t FORCE = 0;
int MOVE_MODE = 0;
unsigned long long DU_TOTAL = 4096L;
char * excludeDirs[1024];
char excludeDirData[8192];
size_t excludeDirCount = 0;
typedef enum operationType { OP_NONE, OP_COPY, OP_DELETE } operationType_t;
operationType_t opType;
char opCommand[FILENAME_MAX + FILENAME_MAX + 1];
typedef struct fileData {
char dname[FILENAME_MAX + 1]; /* full path and basename */
int THRDslot; /* slot ID 0 - MAXTHRDS */
int THRDid; /* unique ID increaments with each new THRD */
int flag; /* 0 if thread; recursion > 0 */
pthread_t thread_id; /* system assigned */
pthread_attr_t tattr;
} fileData_t;
int ThreadCNT = 0; /* ThreadCNT < MAXTHRDS */
int totalTHRDS = 0;
struct fileData *fdslot;
pthread_mutex_t mutexLog;
pthread_mutex_t mutexCounter;
pthread_mutex_t mutexFD;
pthread_mutex_t mutexPrintStat;
pthread_cond_t condFD;
char targetdir[FILENAME_MAX + 1];
char comparedir[FILENAME_MAX + 1];
char logfile[FILENAME_MAX + 1];
char statusfile[FILENAME_MAX + 1];
int rootdirlen = 0;
time_t currenttime = 0;
time_t laststatustime = 0;
void incrementCounter(size_t *counter)
{
pthread_mutex_lock(&mutexCounter);
(*counter)++;
pthread_mutex_unlock(&mutexCounter);
}
/* counters for summing up and printing totals at the end */
typedef struct counterReport
{
size_t scanned;
size_t copied;
size_t modified;
size_t created;
size_t error;
size_t skipped;
size_t deleted;
} counterReport_t;
counterReport_t report = { 0, 0, 0, 0, 0, 0, 0 };
void incrementScanned(){ incrementCounter(&report.scanned); }
void incrementCopied(){ incrementCounter(&report.copied); }
void incrementModified(){ incrementCounter(&report.modified); }
void incrementCreated(){ incrementCounter(&report.created); }
void incrementError(){ incrementCounter(&report.error); }
void incrementDeleted(){ incrementCounter(&report.deleted); }
void incrementSkipped(){ incrementCounter(&report.skipped); }
void
printVersion()
{
fprintf(stderr, "%s version %s\n", whoami, Version);
fprintf(stderr, "%s Copyright (C) 2013-2016 John F Dey & Steve Umbehocker\n", whoami);
fprintf(stderr, " * pwalk comes with ABSOLUTELY NO WARRANTY;\n");
fprintf(stderr, " * This is free software, you can redistribute it and/or modify it under the terms of the GNU General Public License\n");
fprintf(stderr, " * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n");
}
void
printHelp()
{
fprintf(stderr, "%s version %s\n\n", whoami, Version);
fprintf(stderr, "Usage : \n");
fprintf(stderr, "%s --help --version\n", whoami);
fprintf(stderr, " Common Args :\n");
fprintf(stderr, " --dryrun : use this to test commands\n");
fprintf(stderr, " without making any changes to the system\n");
fprintf(stderr, " --maxthreads=N : indicates the number of threads (default=32)\n");
fprintf(stderr, " --nototals : disables printing of totals after the scan\n");
fprintf(stderr, " --dots : prints a dot and total every 1000 files scanned.\n");
fprintf(stderr, " --quiet : no chatter, speeds up the scan.\n");
fprintf(stderr, " --nosnap : Ignore directories with name .snapshot and .qsnaps\n");
fprintf(stderr, " --excludedirs=DIR1,DIR2,... : Ignore directories with specified names.\n");
fprintf(stderr, " --debug : Verbose debug spam\n");
fprintf(stderr, " Output Format : CSV\n");
fprintf(stderr, " Fields : DateStamp,\"inode\",\"filename\",\"fileExtension\",\"UID\",\n");
fprintf(stderr, " \"GID\",\"st_size\",\"st_blocks\",\"st_mode\",\"atime\",\n");
fprintf(stderr, " \"mtime\",\"ctime\",\"File Count\",\"Directory Size\"\n");
fprintf(stderr, "\n");
fprintf(stderr, "Walk Usage : \n");
fprintf(stderr, "%s SOURCEDIR\n", whoami);
fprintf(stderr, " Command Args :\n");
fprintf(stderr, " SOURCEDIR : Fully qualified path to the directory to walk\n");
fprintf(stderr, "\n");
fprintf(stderr, "Copy/Backup Usage : \n");
fprintf(stderr, "%s --targetdir=TARGETDIR SOURCEDIR\n", whoami);
fprintf(stderr, "%s --retain=30 --targetdir=TARGETDIR SOURCEDIR\n", whoami);
fprintf(stderr, " Command Args :\n");
fprintf(stderr, " --move : move files rather than copy\n");
fprintf(stderr, " --nodirperms : don't copy directory permissions\n");
fprintf(stderr, " --targetdir : copy files to specified TARGETDIR\n");
fprintf(stderr, " --atime : copy if access time change (default=no atime)\n");
fprintf(stderr, " --backuplog=LOGFILE : log all files that were copied.\n");
fprintf(stderr, " --status=STATUSFILE : write periodic status updates to specified file\n");
fprintf(stderr, " --retain : copy if file ctime or mtime within retention period\n");
fprintf(stderr, " specified in days. eg: --retain=60\n");
fprintf(stderr, " --nomtime : ignore mtime (default=use mtime)\n");
fprintf(stderr, " SOURCEDIR : Fully qualified path to the directory to walk\n");
fprintf(stderr, "\n");
fprintf(stderr, "Delete/Purge Usage : \n");
fprintf(stderr, "%s --purge [--force] --comparedir=COMPAREDIR PURGEDIR\n", whoami);
fprintf(stderr, "%s --purge [--force] --retain=N PURGEDIR\n", whoami);
fprintf(stderr, " Command Args :\n");
fprintf(stderr, " --comparedir=DIR : compare against this dir but dont touch any files\n");
fprintf(stderr, " in it. comparedir is usually the SOURCEDIR from\n");
fprintf(stderr, " a prior copy/sync stage.\n");
fprintf(stderr, " --purge : WARNING: this deletes files older than the\n");
fprintf(stderr, " retain period -OR- if retain is not specified\n");
fprintf(stderr, " --comparedir is required. The comparedir is\n");
fprintf(stderr, " compared against the specified dir and any files\n");
fprintf(stderr, " not found in the comparedir are purged.\n");
fprintf(stderr, " --force : NOTE: default is a *dry-run* for purge, you must\n");
fprintf(stderr, " specify --force option to actually purge files\n");
fprintf(stderr, " --atime : keep if access time within retain period\n");
fprintf(stderr, " --retain : keep if file ctime or mtime within retention period\n");
fprintf(stderr, " specified in days. eg: --retain=60\n");
fprintf(stderr, "\n");
}
int excludeDir(const char *dirName)
{
if(excludeDirCount == 0)
return 0;
int i = 0;
for(i = 0; i < excludeDirCount; i++)
{
if(!strcmp(excludeDirs[i], dirName))
return 1;
}
return 0;
}
void
updateStatus(char *message)
{
time_t now;
time(&now);
size_t elapsed = (size_t)difftime(now, currenttime);
size_t hours = elapsed / 3600;
size_t minutes = (elapsed % 3600) / 60;
size_t seconds = elapsed % 60;
FILE *statusFp = fopen(statusfile, "w");
if(statusFp != NULL)
{
fprintf(statusFp, " status: %s\n", message);
fprintf(statusFp, " scanned: %lu\n", report.scanned);
fprintf(statusFp, " copied: %lu\n", report.copied);
fprintf(statusFp, " modified: %lu\n", report.modified);
fprintf(statusFp, " created: %lu\n", report.created);
fprintf(statusFp, " skipped: %lu\n", report.skipped);
fprintf(statusFp, " deleted: %lu\n", report.deleted);
fprintf(statusFp, " errors: %lu\n", report.error);
fprintf(statusFp, " elapsed: %02lu:%02lu:%02lu\n", hours, minutes, seconds);
fclose(statusFp);
}
}
void printDot()
{
if(DOTS)
{
if(report.scanned % 1000 == 0)
printf("%ld", report.scanned);
else if(report.scanned % 100 == 0)
printf(".");
}
}
void updateRunningStatus()
{
if(STATUS && (report.scanned % 256 == 0))
{
time_t now;
time(&now);
size_t elapsed = (size_t)difftime(now, laststatustime);
if(elapsed >= 10)
{
laststatustime = now;
updateStatus("running");
}
}
}
void
printTotals()
{
if(STATUS)
{
updateStatus("completed");
}
if(DU_MODE)
{
fprintf(stderr, "TOTAL: %llu\n", DU_TOTAL);
}
if(!SHOWTOTALS)
return;
time_t now;
time(&now);
size_t elapsed = (size_t)difftime(now, currenttime);
size_t hours = elapsed / 3600;
size_t minutes = (elapsed % 3600) / 60;
size_t seconds = elapsed % 60;
fprintf(stderr, "\n\n%12s %12s %12s %12s %12s %12s %12s %12s\n", "Scanned", "Copied", "Modified", "Created", "Deleted", "Skipped", "Errors", "Elapsed (HH:MM:SS)");
fprintf(stderr, "%12lu %12lu %12lu %12lu %12lu %12lu %12lu %02lu:%02lu:%02lu\n", report.scanned, report.copied, report.modified, report.created, report.deleted, report.skipped, report.error, hours, minutes, seconds);
}
void
getDirPath(char *dirPath, char *fullPath, size_t bufferLen)
{
strncpy(dirPath, fullPath, FILENAME_MAX + FILENAME_MAX);
size_t pos = strlen(dirPath) - 1;
while(pos > 0 && dirPath[pos] != '/')
{
dirPath[pos] = 0;
pos--;
}
}
void
logPath(char *targetPath, char *mode)
{
if(!LOG || (logfile[0] == 0))
return;
pthread_mutex_lock(&mutexLog);
FILE *fp = fopen(logfile, "a");
if(fp != NULL)
{
if(targetPath != NULL)
{
if(mode != NULL && strlen(mode))
fprintf(fp, "%s: %s\n", mode, targetPath);
else
fprintf(fp, "%s\n", targetPath);
}
fclose(fp);
}
pthread_mutex_unlock(&mutexLog);
}
int
dirContainsFiles(char *targetPath)
{
struct dirent *ep = NULL;
DIR *dp = opendir(targetPath);
if(dp != NULL)
{
while(ep = readdir(dp))
{
if(ep->d_name[0] == '.')
continue;
//fprintf(stderr, "INFO: directory entry '%s'", ep->d_name);
closedir(dp);
return 1;
}
closedir(dp);
}
return 0;
}
void logLine(struct stat *f, const char *sourcePath, const char *exten, const long fileCnt, const long dirSz)
{
if(!DOTS && !QUIET)
{
char outFile[FILENAME_MAX + FILENAME_MAX];
sprintf(outFile, "\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"\n",
(long)f->st_ino, sourcePath, (exten) ? exten : "", (long)f->st_uid,
(long)f->st_gid, (long)f->st_size, (long)f->st_blocks, (int)f->st_mode,
(long)f->st_atime, (long)f->st_mtime, (long)f->st_ctime, fileCnt, dirSz);
fputs(outFile, stdout);
}
}
void logDirLine(struct stat *f, const char *sourcePath)
{
if(!DOTS && !QUIET && DEBUG_MODE)
{
char outFile[FILENAME_MAX + FILENAME_MAX];
sprintf(outFile, "INFO: CHECKING/CREATING DIR: \"%ld\",\"%s\"\n", (long)f->st_ino, sourcePath);
fputs(outFile, stdout);
}
}
void logCompareLine(const char *sourcePath, const char *targetPath)
{
char outFile[FILENAME_MAX + FILENAME_MAX];
sprintf(outFile, "INFO: COMPARING [%s] [%s]", sourcePath, targetPath);
fputs(outFile, stdout);
}
int purgeFile(struct stat *f, char *sourcePath, char *sourcePathRaw, char *targetPath, char *targetPathRaw)
{
if(S_ISDIR(f->st_mode))
{
if(DEBUG_MODE) printf("INFO: Purge skipping DIR: '%s'\n", sourcePath);
incrementSkipped();
return -1;
}
if(!RETAIN && COMPAREDIR)
{
if(DEBUG_MODE)
printf("comparing: \n\t%s :: %s\n\t%s :: %s\n", sourcePath, targetPath, sourcePathRaw, targetPathRaw);
struct stat statTarget = { 0 };
if(stat(targetPathRaw, &statTarget) == -1 && S_ISREG(f->st_mode))
{
incrementDeleted();
if(DEBUG_MODE)
printf("INFO: PURGE [%s] : %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", sourcePath);
if(!DRYRUN && FORCE)
{
opType = OP_DELETE;
strncpy(opCommand, sourcePathRaw, FILENAME_MAX + FILENAME_MAX);
if(DOTS)
printf("!");
}
}
else
incrementSkipped();
return 0;
}
if(RETAIN && (currenttime > f->st_ctime) && (
(!CMP_CTIME || (difftime(currenttime, f->st_ctime) > RETAIN)) &&
(!CMP_MTIME || (difftime(currenttime, f->st_mtime) > RETAIN)) &&
(!CMP_ATIME || (difftime(currenttime, f->st_atime) > RETAIN))))
{
incrementDeleted();
if(DEBUG_MODE)
{
printf("INFO: PURGE [%s] (%lu days old): %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath);
}
if(!DRYRUN && FORCE)
{
//we want to just save the path so we can do the purge outside of this criticial section
//this way our delete is parallelized
strncpy(opCommand, sourcePath, FILENAME_MAX + FILENAME_MAX);
opType = OP_DELETE;
if(DOTS)
printf("!");
}
}
else
incrementSkipped();
return 0;
}
void getProcessedSourcePath(const char *fname, char *sourcePath)
{
const char *s = fname;
char *t;
t = sourcePath;
while(*s) {
if(*s == '"')
*t++ = '\\';
if(*s == '$')
*t++ = '\\';
*t++ = *s++;
}
*t++ = *s++;
}
void getTargetPathFromSource(const char *sourcePath, char *targetPath)
{
if(TARGETDIR)
{
//Add a trailing / to the base path if it's not there.
size_t targetdirLen = strlen(targetdir);
if((targetdirLen + 1 < FILENAME_MAX + FILENAME_MAX) && targetdir[targetdirLen - 1] != '/')
{
targetdir[targetdirLen] = '/';
targetdir[targetdirLen + 1] = 0;
}
size_t offset = rootdirlen + 1;
strncpy(targetPath, targetdir, FILENAME_MAX + FILENAME_MAX);
strncat(targetPath, &sourcePath[offset], FILENAME_MAX + FILENAME_MAX);
}
else if(COMPAREDIR)
{
//Add a trailing / to the base path if it's not there.
size_t comparedirLen = strlen(comparedir);
if((comparedirLen + 1 < FILENAME_MAX + FILENAME_MAX) && comparedir[comparedirLen - 1] != '/')
{
comparedir[comparedirLen] = '/';
comparedir[comparedirLen + 1] = 0;
}
size_t offset = rootdirlen + 1;
strncpy(targetPath, comparedir, FILENAME_MAX + FILENAME_MAX);
strncat(targetPath, &sourcePath[offset], FILENAME_MAX + FILENAME_MAX);
}
//else { } //standard PWALK
}
int makeTargetDirPath(const char *fname)
{
char sourcePath[FILENAME_MAX + FILENAME_MAX];
char targetPath[FILENAME_MAX + FILENAME_MAX];
char targetPathRaw[FILENAME_MAX + FILENAME_MAX];
getProcessedSourcePath(fname, sourcePath);
getTargetPathFromSource(fname, targetPathRaw);
getTargetPathFromSource(sourcePath, targetPath);
struct stat statSource = { 0 };
stat(fname, &statSource);
struct stat statTarget = { 0 };
stat(targetPathRaw, &statTarget);
if(!S_ISDIR(statSource.st_mode))
{
fprintf(stderr, "ERR: source '%s' is not a directory.", sourcePath);
exit(-1);
}
logDirLine(&statTarget, targetPath);
if(!S_ISDIR(statTarget.st_mode))
{
incrementCreated();
incrementCopied();
char command[FILENAME_MAX + FILENAME_MAX];
command[0] = '\0';
snprintf(command, FILENAME_MAX + FILENAME_MAX, "mkdir \"%s\" %s", targetPath, errFile);
if(DEBUG_MODE)
printf("INFO: making DIR: %s\n", command);
if(!DRYRUN)
{
system(command);
struct utimbuf times;
times.actime = statSource.st_atime;
times.modtime = statSource.st_mtime;
utime(targetPath, ×);
}
//preserve the permissions for the directories
if(!NODIRPERMS)
{
if(DEBUG_MODE)
printf("INFO: updating DIR perms via rsync: %s\n", command);
snprintf(command, FILENAME_MAX + FILENAME_MAX, "rsync -ptgo -A -X -d --no-recursive --exclude=* \"%s/\" \"%s\" %s", sourcePath, targetPath, errFile);
if(!DRYRUN)
system(command);
/*
//NOTE: This old technique doesn't copy the extended attributes of the directory
snprintf(command, FILENAME_MAX + FILENAME_MAX, "chown --reference=\"%s\" \"%s\" %s", sourcePath, targetPath, errFile);
if(!DRYRUN)
system(command);
snprintf(command, FILENAME_MAX + FILENAME_MAX, "chmod --reference=\"%s\" \"%s\" %s", sourcePath, targetPath, errFile);
if(!DRYRUN)
system(command);
*/
}
}
else
{
if(DEBUG_MODE)
printf("INFO: verified DIR exists: %s\n", fname);
}
return 0;
}
/*
* this needs to be in a crital secion (and it is!)
*/
void
printStat(char *fname, char *exten, struct stat *f, long fileCnt, long dirSz)
{
char sourcePath[FILENAME_MAX + FILENAME_MAX];
char sourcePathDir[FILENAME_MAX + FILENAME_MAX];
char targetPath[FILENAME_MAX + FILENAME_MAX];
char targetPathDir[FILENAME_MAX + FILENAME_MAX];
char targetPathRaw[FILENAME_MAX + FILENAME_MAX];
getProcessedSourcePath(fname, sourcePath);
getTargetPathFromSource(fname, targetPathRaw);
getTargetPathFromSource(sourcePath, targetPath);
opType = OP_NONE;
logLine(f, sourcePath, exten, fileCnt, dirSz);
updateRunningStatus();
printDot();
incrementScanned();
struct stat statSource = { 0 };
stat(fname, &statSource);
if(S_ISDIR(statSource.st_mode))
{
return;
}
if(PURGE)
{
purgeFile(f, sourcePath, fname, targetPath, targetPathRaw);
return;
}
//If a target dir was specified then we need to do file copy operations
if(TARGETDIR)
{
struct stat statTarget = { 0 };
char doCopy = 0;
char doCompare = 0;
if(stat(targetPathRaw, &statTarget) == -1 || MOVE_MODE)
{
if(DEBUG_MODE)fprintf(stderr, "INFO: File '%s' doesn't exist at target, doing copy '%s'\n", sourcePath, targetPath);
doCopy = 1;
}
else if(S_ISDIR(f->st_mode))
{
if(DEBUG_MODE)fprintf(stderr, "ERR: Target is a directory '%s'\n", targetPath);
incrementError();
return;
}
else if(!S_ISREG(f->st_mode))
{
if(DEBUG_MODE)fprintf(stderr, "ERR: Target not regular file '%s'\n", targetPath);
incrementError();
return;
}
else
{
//printf("target exists '%s'", targetPath);
doCompare = 1;
}
if(doCompare)
{
if(DEBUG_MODE)
{
logCompareLine(sourcePath, targetPath);
}
if(MOVE_MODE)
{
if(DEBUG_MODE) printf("INFO: File has MOVE MODE, copying\n");
doCopy = 1;
}
else if(f->st_mode != f->st_mode)
{
if(DEBUG_MODE) printf("INFO: File has MODE CHANGE, copying\n");
doCopy = 1;
}
else if(f->st_size != f->st_size)
{
if(DEBUG_MODE) printf("INFO: File has SIZE CHANGE, copying\n");
doCopy = 1;
}
// the target file will always have a different create time so we don't
// want to use == for ctime compare, rather we just want to see if the source
// was created after the target, if so, need to recopy
else if(CMP_CTIME && (f->st_ctime < f->st_ctime))
{
if(DEBUG_MODE) printf("INFO: CTIME on target is older than source, copying\n");
doCopy = 1;
}
else if(CMP_MTIME && (f->st_mtime != f->st_mtime))
{
if(DEBUG_MODE) printf("INFO: File has MTIME CHANGE, copying\n");
}
else
{
if(DEBUG_MODE) printf("INFO: Detected no file changes, copy not required.\n");
}
if(!doCopy && CMP_ATIME && (f->st_atime != f->st_atime))
{
if(DEBUG_MODE) printf("INFO: File has only ATIME CHANGE, updating timestamps\n");
struct utimbuf times;
times.actime = f->st_atime;
times.modtime = f->st_mtime;
utime(targetPath, ×);
}
}
if(doCopy)
{
if(RETAIN)
{
if((currenttime < f->st_ctime) ||
(CMP_CTIME && (difftime(currenttime, f->st_ctime) < RETAIN)) ||
(CMP_MTIME && (difftime(currenttime, f->st_mtime) < RETAIN)) ||
(CMP_ATIME && (difftime(currenttime, f->st_atime) < RETAIN)))
{
if(DEBUG_MODE)
printf("INFO: INSIDE RETAIN (%lu days old): %s\n", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath);
}
else
{
if(DEBUG_MODE)
printf("INFO: OUTSIDE RETAIN (%lu days old), skipping: %s\n", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath);
incrementSkipped();
return;
}
}
if(doCompare)
incrementModified();
else
incrementCreated();
char command[FILENAME_MAX + FILENAME_MAX];
command[0] = '\0';
if(MOVE_MODE)
snprintf(command, FILENAME_MAX + FILENAME_MAX, "mv \"%s\" \"%s\" %s", sourcePath, targetPath, errFile);
else
snprintf(command, FILENAME_MAX + FILENAME_MAX, "cp --no-dereference --preserve=all \"%s\" \"%s\" %s", sourcePath, targetPath, errFile);
if(DEBUG_MODE)
printf("INFO: [%s]\n", command);
if(!DRYRUN)
{
//we want to just save the command so we can do the copy outside of this criticial section
//this way our copy op is parallelized
incrementCopied();
opType = OP_COPY;
strncpy(opCommand, command, FILENAME_MAX + FILENAME_MAX);
}
if(LOG)
logPath(targetPath, "");
}
}
}
void execOperation(operationType_t doOperation, char *doCommand)
{
if(doOperation == OP_COPY)
system(doCommand);
if(doOperation == OP_DELETE)
unlink(doCommand);
}
void cleanupThread(struct fileData *fd)
{
if((fd != NULL) && (fd->flag == 0)) { /* this instance of fileDir is a thread */
pthread_mutex_lock(&mutexFD);
if((--ThreadCNT) == 0)
pthread_cond_signal(&condFD);
fd->THRDslot = -1;
pthread_mutex_unlock(&mutexFD);
pthread_exit(EXIT_SUCCESS);
}
}
/*
* Open a directory and read the conents. Call stat with each
* file name.
*
* Recursively call self for each sub dir.
*
* print inode meta data for each file, one line per file in CSV format
*/
void
*fileDir(void *arg)
{
char *s, *t, *u, *dot, *end_dname;
char fname[FILENAME_MAX + 1];
int slot, id, found;
DIR *dirp;
long localCnt = 0; /* number of files in a specific directory */
unsigned long long localSz = 0; /* byte cnt of files in the local directory 2010.07 */
struct dirent *d;
struct stat f;
struct fileData *fd, local;
fd = (struct fileData *) arg;
if(DEBUG_MODE)
printf("Start %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname);
if(fd == NULL)
{
fprintf(stderr, "ERR: invalid directory specified, skipping.\n");
return NULL;
}
if((dirp = opendir(fd->dname)) == NULL)
{
fprintf(stderr, "ERR: unable to open directory '%s', skipping.\n", fd->dname);
cleanupThread(fd);
return NULL;
}
/* find the end of fs->name and put '/' at the end <end_dname>
points to char after '/' */
s = fd->dname + strlen(fd->dname);
*s++ = '/';
end_dname = s;
while((d = readdir(dirp)) != NULL) {
if(strcmp(".", d->d_name) == 0)
continue;
if(strcmp("..", d->d_name) == 0)
continue;
localCnt++;
s = d->d_name;
t = end_dname;
while(*s)
*t++ = *s++;
*t = '\0';
if(lstat(fd->dname, &f) == -1) {
fprintf(stderr, "ERR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname);
continue;
}
if(!S_ISDIR(f.st_mode) && !S_ISREG(f.st_mode))
{
fprintf(stderr, "ERR: skipping non-dir, non-reg file: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname);
if(LOG && DEBUG_MODE)
logPath(fd->dname, "NOTREG");
continue;
}
/* Follow Sub dirs recursivly but don't follow links */
localSz += f.st_size;
DU_TOTAL += f.st_size;
if(S_ISDIR(f.st_mode))
{
if(TARGETDIR)
{
makeTargetDirPath(fd->dname);
}
if(EXCLUDE_DIRS > 0 && excludeDir(d->d_name)) {
if(DEBUG_MODE) fprintf(stderr, "INFO: skipping directory: %s\n", d->d_name);
continue;
}
pthread_mutex_lock(&mutexFD);
if(ThreadCNT < MAXTHRDS) {
ThreadCNT++;
id = totalTHRDS++;
slot = 0; found = -1;
while(slot < MAXTHRDS) {
if(fdslot[slot].THRDslot == -1) {
found = slot;
break;
}
slot++;
}
if(found == -1)
fprintf(stderr, "ERR: SlotE %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, "no available threads");
else
fdslot[slot].THRDslot = slot;
}
else
slot = -1;
pthread_mutex_unlock(&mutexFD);
if(slot != -1) {
strcpy(fdslot[slot].dname, (const char*)fd->dname);
fdslot[slot].THRDid = id;
fdslot[slot].flag = 0;
if(pthread_create(&fdslot[slot].thread_id, &fdslot[0].tattr,
fileDir, (void*)&fdslot[slot])) {
/* thread create failed, undo slot */
pthread_mutex_lock(&mutexFD);
/* this won't go to zero here, so no need cond_signal */
ThreadCNT--;
fdslot[slot].THRDslot = -1;
pthread_mutex_unlock(&mutexFD);
goto local_call;
}
}
else {
local_call:
strcpy(local.dname, (const char*)fd->dname);
local.THRDslot = fd->THRDslot;
local.THRDid = fd->THRDid;
local.flag = fd->flag + 1;
fileDir((void*)&local);
}
}
else
{
s = end_dname + 1; dot = NULL;
while(*s)
{
if(*s == '.')
dot = s + 1;
s++;
}
pthread_mutex_lock(&mutexPrintStat);
printStat(fd->dname, dot, &f, (long)0, (long)0);
operationType_t doOperation = opType;
opType = OP_NONE;
char doCommand[FILENAME_MAX + FILENAME_MAX + 1];
strncpy(doCommand, opCommand, FILENAME_MAX + FILENAME_MAX);
pthread_mutex_unlock(&mutexPrintStat);
execOperation(doOperation, doCommand);
}
}
closedir(dirp);
*--end_dname = '\0';
#ifdef THRD_DEBUG
printf("Ending %2d%5d %2d<%s>\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname);
#endif /* THRD_DEBUG */
if(lstat(fd->dname, &f) == -1) {
fprintf(stderr, "ERR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname);
}
else {
s = end_dname - 1; dot = NULL;
while(*s != '/' && (s != fd->dname))
{
if(*s == '.') { dot = s + 1; break; }
s--;
}
if(s + 1 == dot) /* Dot file is not an extension Exp: /.bashrc */
dot = NULL;
pthread_mutex_lock(&mutexPrintStat);
printStat(fd->dname, dot, &f, localCnt, localSz);
operationType_t doOperation = opType;
opType = OP_NONE;
char doCommand[FILENAME_MAX + FILENAME_MAX + 1];
strncpy(doCommand, opCommand, FILENAME_MAX + FILENAME_MAX);
pthread_mutex_unlock(&mutexPrintStat);
execOperation(doOperation, doCommand);
}
cleanupThread(fd);
/* return ; */
}
int
main(int argc, char* argv[])
{
int error, i;
char *s, *c;
void *status;
if(argc < 2 || argv == NULL)
{
printHelp();
exit(EXIT_FAILURE);
}
argc--; argv++;
while(argc > 1)
{
if(!strncmp(*argv, "--maxthreads=", 13))
{
MAXTHRDS = (size_t)atoi(&((*argv)[strlen("--maxthreads=")]));
if(MAXTHRDS > 64) MAXTHRDS = 64;
if(MAXTHRDS <= 2) MAXTHRDS = 2;
if(DEBUG_MODE) printf("INFO: MAXTHRDS='%lu'\n", MAXTHRDS);
}
else if(!strncmp(*argv, "--retain=", 9))
{
RETAIN = (size_t)atoi(&((*argv)[strlen("--retain=")])) * 24 * 3600;
if(DEBUG_MODE) printf("INFO: RETAIN='%lu'\n", RETAIN);
}
else if(!strcmp(*argv, "--move"))
MOVE_MODE = 1;
else if(!strcmp(*argv, "--debug"))
DEBUG_MODE = 1;
else if(!strcmp(*argv, "--du"))
{
DU_MODE = 1;
SHOWTOTALS = 0;
QUIET = 1;
}
else if(!strcmp(*argv, "--nototals"))
SHOWTOTALS = 0;
else if(!strcmp(*argv, "--noatime"))
CMP_ATIME = 0;
else if(!strcmp(*argv, "--atime"))
CMP_ATIME = 1;
else if(!strcmp(*argv, "--nomtime"))
CMP_MTIME = 0;
else if(!strcmp(*argv, "--mtime"))
CMP_MTIME = 1;
else if(!strcmp(*argv, "--noctime"))
CMP_CTIME = 0;
else if(!strcmp(*argv, "--ctime"))
CMP_CTIME = 1;
else if(!strncmp(*argv, "--excludedirs=", 14))
{
memset(excludeDirData, 0, sizeof(excludeDirData));
strncpy(excludeDirData, &((*argv)[strlen("--excludedirs=")]), sizeof(excludeDirData));
if(strlen(excludeDirData) == 0)
{
fprintf(stderr, "ERR: Invalid exclude file name.\n");
}
else
{
char *token = strtok(excludeDirData, ",");
while(token != NULL && excludeDirCount < 1023)
{
excludeDirs[excludeDirCount] = strdup(token);
if(DEBUG_MODE) printf("INFO: Adding exclude dir '%s'\n", token);
excludeDirCount++;
EXCLUDE_DIRS++;
token = strtok(NULL, ",");
}
}
}
else if(!strcmp(*argv, "--NoSnap") || !strcmp(*argv, "--nosnap"))
{
excludeDirs[excludeDirCount] = strdup(".snapshots");
if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n");
excludeDirCount++;
EXCLUDE_DIRS++;
excludeDirs[excludeDirCount] = strdup(".qsnaps");
if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n");
excludeDirCount++;
EXCLUDE_DIRS++;
}
else if(!strcmp(*argv, "--dryrun"))
DRYRUN = 1;
else if(!strcmp(*argv, "--force"))
FORCE = 1;
else if(!strcmp(*argv, "--nodirperms"))
NODIRPERMS = 1;
else if(!strcmp(*argv, "--purge"))
{
PURGE = 1;
}
else if(!strncmp(*argv, "--backuplog=", 12))
{
LOG = 1;
memset(logfile, 0, sizeof(logfile));
strncpy(logfile, &((*argv)[strlen("--backuplog=")]), sizeof(logfile));
if(strlen(logfile) == 0)
{
fprintf(stderr, "ERR: Invalid log file name.");
LOG = 0;
}
if(DEBUG_MODE) printf("INFO: LOGFILE='%s'\n", logfile);
}
else if(!strncmp(*argv, "--status=", 9))
{
STATUS = 1;
memset(statusfile, 0, sizeof(statusfile));
strncpy(statusfile, &((*argv)[strlen("--status=")]), sizeof(statusfile));
if(strlen(statusfile) == 0)
{
fprintf(stderr, "ERR: Invalid status log file name.");
STATUS = 0;
}
if(DEBUG_MODE) printf("INFO: STATUS='%s'\n", statusfile);
}
else if(!strncmp(*argv, "--targetdir=", 12))
{
TARGETDIR = 1;
memset(targetdir, 0, sizeof(targetdir));
strncpy(targetdir, &((*argv)[strlen("--targetdir=")]), sizeof(targetdir));
if(DEBUG_MODE)
printf("INFO: TARGETDIR='%s'\n", targetdir);
}
else if(!strncmp(*argv, "--comparedir=", 13))
{
COMPAREDIR = 1;
memset(comparedir, 0, sizeof(comparedir));
strncpy(comparedir, &((*argv)[strlen("--comparedir=")]), sizeof(comparedir));
if(DEBUG_MODE) printf("INFO: COMPAREDIR='%s'\n", comparedir);
if(!dirContainsFiles(comparedir))
{
printf("ERR: Cannot run purge operation against a comparison directory with zero files '%s'\n", comparedir);
exit(EXIT_FAILURE);
}
}
else if(!strcmp(*argv, "--dots"))
DOTS = 1;
else if(!strcmp(*argv, "--quiet"))
QUIET = 1;
argc--;
argv++;
}
if(!strcmp(*argv, "--help"))
{
printHelp();
exit(0);
}
if(!strcmp(*argv, "--version") || !strcmp(*argv, "-v"))
{
printVersion();
exit(0);
}
time(¤ttime);
fdslot = malloc(sizeof(struct fileData) * MAXTHRDS);
for(i = 0; i < MAXTHRDS; i++)
{
fdslot[i].THRDslot = -1;
if((error = pthread_attr_init(&fdslot[i].tattr)))
fprintf(stderr, "Failed to create pthread attr: %s\n",
strerror(error));
else if((error = pthread_attr_setdetachstate(&fdslot[i].tattr,
PTHREAD_CREATE_DETACHED)
))
fprintf(stderr, "failed to set attribute detached: %s\n",
strerror(error));
}
pthread_mutex_init(&mutexFD, NULL);
pthread_cond_init(&condFD, NULL);
strcpy(fdslot[0].dname, (const char*)*argv);
struct stat statRoot = { 0 };
if(stat(fdslot[0].dname, &statRoot) == -1 || !S_ISDIR(statRoot.st_mode))
{
fprintf(stderr, "ERR: Invalid directory specification '%s'\n", fdslot[0].dname);
}
rootdirlen = strlen(fdslot[0].dname);
//Trim the trailing slash so we don't get // in the path
if(fdslot[0].dname[strlen(fdslot[0].dname) - 1] == '/')
{
fdslot[0].dname[strlen(fdslot[0].dname) - 1] = '\0';
rootdirlen--;
}
fdslot[0].THRDslot = ThreadCNT++;
fdslot[0].THRDid = totalTHRDS++;
fdslot[0].flag = 0;
if(TARGETDIR)
{
makeTargetDirPath(fdslot[0].dname);
}
if((error = pthread_create(&(fdslot[0].thread_id), &fdslot[0].tattr, fileDir,
(void*)&fdslot[0]))) {
fprintf(stderr, "failed to start a thread: %s\n", strerror(error));
exit(error);
}
if(SHOWTOTALS || STATUS || DU_MODE)
atexit(printTotals);
pthread_mutex_lock(&mutexFD);
if(ThreadCNT > 0)
pthread_cond_wait(&condFD, &mutexFD);
pthread_mutex_unlock(&mutexFD);
pthread_exit(NULL);
}