Difference between revisions of "Pwalk.c"
From OSNEXUS Online Documentation Site
m |
m |
||
(2 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
<pre> | <pre> | ||
/* | /* | ||
− | * pwalk.c Parrallel Walk a file system and report file meta data | + | * pwalk.c Parrallel Walk a file system and report file meta data |
− | Copyright (C) 2013 John F Dey | + | Copyright (C) 2013 John F Dey |
− | Copyright (C) 2013 | + | Copyright (C) 2013-2016 OSNEXUS Corporation |
− | This program is free software; you can redistribute it and/or | + | This program is free software; you can redistribute it and/or |
− | modify it under the terms of the GNU General Public License | + | modify it under the terms of the GNU General Public License |
− | as published by the Free Software Foundation; either version 2 | + | as published by the Free Software Foundation; either version 2 |
− | of the License, or (at your option) any later version. | + | of the License, or (at your option) any later version. |
− | This program is distributed in the hope that it will be useful, | + | This program is distributed in the hope that it will be useful, |
− | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | but WITHOUT ANY WARRANTY; without even the implied warranty of |
− | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
− | GNU General Public License for more details. | + | GNU General Public License for more details. |
− | You should have received a copy of the GNU General Public License | + | You should have received a copy of the GNU General Public License |
− | along with this program; if not, write to the Free Software | + | along with this program; if not, write to the Free Software |
− | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | + | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
− | * pwalk is inspired by du but designed to be used with large | + | * pwalk is inspired by du but designed to be used with large |
− | * file systems ( > 10 million files ) | + | * file systems ( > 10 million files ) |
− | * | + | * |
* History: dir.c, walk.c, walkv2,3,4, pwalkfs.c | * History: dir.c, walk.c, walkv2,3,4, pwalkfs.c | ||
* | * | ||
Line 29: | Line 29: | ||
* | * | ||
* 1997.03.20 John Dey Although this is the first documented date for | * 1997.03.20 John Dey Although this is the first documented date for | ||
− | * this file I have versions that date from 1988. | + | * this file I have versions that date from 1988. |
* 2002.09.04 John Dey walk the directory and gather stats | * 2002.09.04 John Dey walk the directory and gather stats | ||
* 2002.09.06 John Dey make to look like du -a | * 2002.09.06 John Dey make to look like du -a | ||
− | * 2004.07.06 John Dey add -a and -k | + | * 2004.07.06 John Dey add -a and -k |
* 2008.04.01 John Dey CSV output for database use | * 2008.04.01 John Dey CSV output for database use | ||
* 2009.04.12 John Dey v3.1 | * 2009.04.12 John Dey v3.1 | ||
− | * replaced constants with "FILENAME_MAX", | + | * replaced constants with "FILENAME_MAX", |
* Size of directory is size of all files in directory plus itself | * Size of directory is size of all files in directory plus itself | ||
* Added printStat function | * Added printStat function | ||
* print file count on line with direcories | * print file count on line with direcories | ||
− | * 2009.05.18 check for control charaters and double qutoes in file names; | + | * 2009.05.18 check for control charaters and double qutoes in file names; |
* escape the double quotes and print bad file names to stderr | * escape the double quotes and print bad file names to stderr | ||
− | * 2009.12.30 size for dir should just be dir size; Fix; count returns 0 | + | * 2009.12.30 size for dir should just be dir size; Fix; count returns 0 |
* for normal files and count of just the local directory; Previously count | * for normal files and count of just the local directory; Previously count | ||
− | * return the recursive file count for the whole tree. | + | * return the recursive file count for the whole tree. |
* | * | ||
− | + | 2010.01.08 john dey; New field to output: file name extension. | |
− | + | Extension is defined as the last part of the name after a Dot "." | |
− | + | if no dot is found extension is empty "" | |
− | + | new feature: accepts multible dirctory names as cmd line argument | |
− | + | This line of code has been replaced | |
− | + | if ( f.st_mode & S_IFDIR && (f.st_mode & S_IFMT != S_IFLNK) ) { | |
− | + | With this new line of code: | |
− | + | if ( S_ISDIR(f.st_mode) ) { Or I could have done: if ( (f.st_mode & S_IFDIR) == S_IFDIR ) | |
− | + | 2010.01.11 John Dey | |
− | + | Complete re-write of walkv4 transforming it into pwalk. | |
− | + | pwalk is a threaded version of walkv4. | |
− | + | pwalk will call fileDir as a new thread until MAXTHRDS is reached. | |
− | + | 2010.02.01 pwalk v1 did not detach nor did it join the theads; v2 | |
− | + | fixes this short comming; | |
− | + | 2010.03.24 john dey; New physical hardware is available to run pwalk. | |
− | + | 16 threads are only using about 20% CPU with 10% IO wait. Based on this | |
− | + | the thread count will be doubled to 32. | |
− | + | 2010.11.29 Add mutex for printStat | |
− | + | 2012.10.09 --NoSnap flag added. ignore directories that have the name | |
− | + | .snapshot | |
− | + | 2013.08.02 john f dey; Add GNU license, --version flag added | |
− | + | 2013.10.15 steve umbehocker; added verbose --debug spam mode | |
− | + | added compare and copy capabilities | |
− | + | added retain= option to backup only recently modified files | |
− | + | added cusomizable maxthreads | |
− | + | added summary total display at end (disable with --nototals) | |
− | + | updated and expanded usage section | |
− | + | fixed segfault in walker | |
− | + | 2013.10.16 steve umbehocker; added --log mode logging | |
− | + | added purge mode hooks | |
− | + | added dryrun/force flags | |
− | + | 2013.10.21 steve umbehocker; added --status logging | |
− | + | added purge w/ retain | |
− | + | 2013.10.22 steve umbehocker; added check so purge cannot compare against emty dir | |
− | + | added execute operation logic to do cp/unlink operations outside | |
− | + | of the mutex so that they're parallelized | |
− | + | fixed sync/copy to create empty leaf directories | |
− | + | 2014.11.21 steve umbehocker; added fix for files with spaces and special chars | |
− | + | 2015.06.12 steve umbehocker; added fix for --help and --version | |
− | + | 2015.06.18 osnexus eng; merging dot fix from john | |
− | */ | + | 2015.07.07 osnexus eng; fix to skip invalid dirs without exiting |
+ | 2015.07.07 osnexus eng; added --exclude= option | ||
+ | 2015.07.07 osnexus eng; fix to add thread cleanup on err condition | ||
+ | 2015.10.14 osnexus eng; adding .qsnaps to --nosnaps option | ||
+ | 2016.10.24 osnexus eng; added check to copy only regular files | ||
+ | 2016.10.24 osnexus eng; added fix to deal with filenames containing "$" chars | ||
+ | 2016.10.24 osnexus eng; added fix to use raw path with stat() and escaped paths with shell commands | ||
+ | 2016.10.24 osnexus eng; added stderr redirect to /dev/null for cp/rsync/mv commands | ||
+ | 2016.10.24 osnexus eng; fixed rsync usage for copying directory permissions | ||
+ | */ | ||
#include <stdio.h> | #include <stdio.h> | ||
Line 99: | Line 108: | ||
#include <errno.h> | #include <errno.h> | ||
#include <pthread.h> | #include <pthread.h> | ||
+ | #include <unistd.h> | ||
#undef THRD_DEBUG | #undef THRD_DEBUG | ||
− | static char *Version = " | + | static char *Version = "v4.1.1 - 10/26/2016 - (c) 2013 John F Dey and (c) 2013-2016 OSNEXUS - GPLv2 License"; |
static char *whoami = "pwalk"; | static char *whoami = "pwalk"; | ||
− | + | //char *errFile = ">> pwalk.err 2>&1 "; | |
− | int | + | char *errFile = "> /dev/null 2>&1 "; |
− | int TARGETDIR =0; /* if set copy the files to the target path */ | + | int EXCLUDE_DIRS = 0; /* if set ignore comma delimited specified directories like those called .snapshot */ |
− | int COMPAREDIR =0; /* if set diff the files to the compare dir to determine where the holee are then purge */ | + | int TARGETDIR = 0; /* if set copy the files to the target path */ |
− | int DEBUG_MODE =0; /* verbose output */ | + | int COMPAREDIR = 0; /* if set diff the files to the compare dir to determine where the holee are then purge */ |
− | int CMP_ATIME =0; /* indiates that compare operations should look at atime */ | + | int DEBUG_MODE = 0; /* verbose output */ |
− | int CMP_MTIME =1; | + | int CMP_ATIME = 0; /* indiates that compare operations should look at atime */ |
− | int CMP_CTIME =0; | + | int CMP_MTIME = 1; |
− | int SHOWTOTALS =1; /* show totals at the end via atexit() */ | + | int CMP_CTIME = 0; |
− | int DOTS =0; /* print little dots to show progress */ | + | int SHOWTOTALS = 1; /* show totals at the end via atexit() */ |
− | int QUIET =0; /* quiet gives the best backup performance */ | + | int DOTS = 0; /* print little dots to show progress */ |
− | int PURGE =0; | + | int QUIET = 0; /* quiet gives the best backup performance */ |
− | int LOG =0; | + | int PURGE = 0; |
− | int NODIRPERMS =0; | + | int LOG = 0; |
− | unsigned int DU_MODE =0; | + | int NODIRPERMS = 0; |
− | int STATUS =0; | + | unsigned int DU_MODE = 0; |
− | int RETENTION_DAYS =60; | + | int STATUS = 0; |
− | size_t MAXTHRDS =32; | + | int RETENTION_DAYS = 60; |
− | size_t RETAIN =0; | + | size_t MAXTHRDS = 32; |
− | size_t DRYRUN =0; | + | size_t RETAIN = 0; |
− | size_t FORCE =0; | + | size_t DRYRUN = 0; |
− | int MOVE_MODE =0; | + | size_t FORCE = 0; |
− | unsigned long long DU_TOTAL =4096L; | + | int MOVE_MODE = 0; |
+ | unsigned long long DU_TOTAL = 4096L; | ||
+ | char * excludeDirs[1024]; | ||
+ | char excludeDirData[8192]; | ||
+ | size_t excludeDirCount = 0; | ||
typedef enum operationType { OP_NONE, OP_COPY, OP_DELETE } operationType_t; | typedef enum operationType { OP_NONE, OP_COPY, OP_DELETE } operationType_t; | ||
operationType_t opType; | operationType_t opType; | ||
− | char opCommand[FILENAME_MAX+FILENAME_MAX+1]; | + | char opCommand[FILENAME_MAX + FILENAME_MAX + 1]; |
− | struct fileData { | + | typedef struct fileData { |
− | char dname[FILENAME_MAX+1]; /* full path and basename */ | + | char dname[FILENAME_MAX + 1]; /* full path and basename */ |
int THRDslot; /* slot ID 0 - MAXTHRDS */ | int THRDslot; /* slot ID 0 - MAXTHRDS */ | ||
int THRDid; /* unique ID increaments with each new THRD */ | int THRDid; /* unique ID increaments with each new THRD */ | ||
Line 139: | Line 153: | ||
pthread_t thread_id; /* system assigned */ | pthread_t thread_id; /* system assigned */ | ||
pthread_attr_t tattr; | pthread_attr_t tattr; | ||
− | + | } fileData_t; | |
− | int ThreadCNT | + | int ThreadCNT = 0; /* ThreadCNT < MAXTHRDS */ |
− | int totalTHRDS =0; | + | int totalTHRDS = 0; |
struct fileData *fdslot; | struct fileData *fdslot; | ||
+ | pthread_mutex_t mutexLog; | ||
+ | pthread_mutex_t mutexCounter; | ||
pthread_mutex_t mutexFD; | pthread_mutex_t mutexFD; | ||
pthread_mutex_t mutexPrintStat; | pthread_mutex_t mutexPrintStat; | ||
+ | pthread_cond_t condFD; | ||
− | char targetdir[FILENAME_MAX+1]; | + | char targetdir[FILENAME_MAX + 1]; |
− | char comparedir[FILENAME_MAX+1]; | + | char comparedir[FILENAME_MAX + 1]; |
− | char logfile[FILENAME_MAX+1]; | + | char logfile[FILENAME_MAX + 1]; |
− | char statusfile[FILENAME_MAX+1]; | + | char statusfile[FILENAME_MAX + 1]; |
− | int rootdirlen =0; | + | int rootdirlen = 0; |
+ | time_t currenttime = 0; | ||
+ | time_t laststatustime = 0; | ||
+ | |||
+ | void incrementCounter(size_t *counter) | ||
+ | { | ||
+ | pthread_mutex_lock(&mutexCounter); | ||
+ | (*counter)++; | ||
+ | pthread_mutex_unlock(&mutexCounter); | ||
+ | } | ||
/* counters for summing up and printing totals at the end */ | /* counters for summing up and printing totals at the end */ | ||
− | size_t | + | typedef struct counterReport |
− | size_t | + | { |
− | size_t | + | size_t scanned; |
− | size_t | + | size_t copied; |
− | size_t | + | size_t modified; |
− | size_t | + | size_t created; |
− | size_t | + | |
− | + | size_t error; | |
− | + | size_t skipped; | |
+ | size_t deleted; | ||
+ | } counterReport_t; | ||
+ | |||
+ | counterReport_t report = { 0, 0, 0, 0, 0, 0, 0 }; | ||
+ | |||
+ | void incrementScanned(){ incrementCounter(&report.scanned); } | ||
+ | void incrementCopied(){ incrementCounter(&report.copied); } | ||
+ | void incrementModified(){ incrementCounter(&report.modified); } | ||
+ | void incrementCreated(){ incrementCounter(&report.created); } | ||
+ | void incrementError(){ incrementCounter(&report.error); } | ||
+ | void incrementDeleted(){ incrementCounter(&report.deleted); } | ||
+ | void incrementSkipped(){ incrementCounter(&report.skipped); } | ||
void | void | ||
− | printVersion( ) | + | printVersion() |
{ | { | ||
− | + | fprintf(stderr, "%s version %s\n", whoami, Version); | |
− | + | fprintf(stderr, "%s Copyright (C) 2013-2016 John F Dey & Steve Umbehocker\n", whoami); | |
− | + | fprintf(stderr, " * pwalk comes with ABSOLUTELY NO WARRANTY;\n"); | |
− | + | fprintf(stderr, " * This is free software, you can redistribute it and/or modify it under the terms of the GNU General Public License\n"); | |
− | + | fprintf(stderr, " * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n"); | |
} | } | ||
void | void | ||
− | printHelp( ) | + | printHelp() |
{ | { | ||
− | + | fprintf(stderr, "%s version %s\n\n", whoami, Version); | |
− | + | fprintf(stderr, "Usage : \n"); | |
− | + | fprintf(stderr, "%s --help --version\n", whoami); | |
− | + | fprintf(stderr, " Common Args :\n"); | |
− | + | fprintf(stderr, " --dryrun : use this to test commands\n"); | |
− | + | fprintf(stderr, " without making any changes to the system\n"); | |
− | + | fprintf(stderr, " --maxthreads=N : indicates the number of threads (default=32)\n"); | |
− | + | fprintf(stderr, " --nototals : disables printing of totals after the scan\n"); | |
− | + | fprintf(stderr, " --dots : prints a dot and total every 1000 files scanned.\n"); | |
− | + | fprintf(stderr, " --quiet : no chatter, speeds up the scan.\n"); | |
− | + | fprintf(stderr, " --nosnap : Ignore directories with name .snapshot and .qsnaps\n"); | |
− | + | fprintf(stderr, " --excludedirs=DIR1,DIR2,... : Ignore directories with specified names.\n"); | |
− | + | fprintf(stderr, " --debug : Verbose debug spam\n"); | |
− | + | fprintf(stderr, " Output Format : CSV\n"); | |
− | + | fprintf(stderr, " Fields : DateStamp,\"inode\",\"filename\",\"fileExtension\",\"UID\",\n"); | |
− | + | fprintf(stderr, " \"GID\",\"st_size\",\"st_blocks\",\"st_mode\",\"atime\",\n"); | |
− | + | fprintf(stderr, " \"mtime\",\"ctime\",\"File Count\",\"Directory Size\"\n"); | |
− | + | fprintf(stderr, "\n"); | |
− | + | fprintf(stderr, "Walk Usage : \n"); | |
− | + | fprintf(stderr, "%s SOURCEDIR\n", whoami); | |
− | + | fprintf(stderr, " Command Args :\n"); | |
− | + | fprintf(stderr, " SOURCEDIR : Fully qualified path to the directory to walk\n"); | |
− | + | fprintf(stderr, "\n"); | |
− | + | fprintf(stderr, "Copy/Backup Usage : \n"); | |
− | + | fprintf(stderr, "%s --targetdir=TARGETDIR SOURCEDIR\n", whoami); | |
− | + | fprintf(stderr, "%s --retain=30 --targetdir=TARGETDIR SOURCEDIR\n", whoami); | |
− | + | fprintf(stderr, " Command Args :\n"); | |
− | + | fprintf(stderr, " --move : move files rather than copy\n"); | |
− | + | fprintf(stderr, " --nodirperms : don't copy directory permissions\n"); | |
− | + | fprintf(stderr, " --targetdir : copy files to specified TARGETDIR\n"); | |
− | + | fprintf(stderr, " --atime : copy if access time change (default=no atime)\n"); | |
− | + | fprintf(stderr, " --backuplog=LOGFILE : log all files that were copied.\n"); | |
− | + | fprintf(stderr, " --status=STATUSFILE : write periodic status updates to specified file\n"); | |
− | + | fprintf(stderr, " --retain : copy if file ctime or mtime within retention period\n"); | |
− | + | fprintf(stderr, " specified in days. eg: --retain=60\n"); | |
− | + | fprintf(stderr, " --nomtime : ignore mtime (default=use mtime)\n"); | |
− | + | fprintf(stderr, " SOURCEDIR : Fully qualified path to the directory to walk\n"); | |
− | + | fprintf(stderr, "\n"); | |
− | + | fprintf(stderr, "Delete/Purge Usage : \n"); | |
− | + | fprintf(stderr, "%s --purge [--force] --comparedir=COMPAREDIR PURGEDIR\n", whoami); | |
− | + | fprintf(stderr, "%s --purge [--force] --retain=N PURGEDIR\n", whoami); | |
− | + | fprintf(stderr, " Command Args :\n"); | |
− | + | fprintf(stderr, " --comparedir=DIR : compare against this dir but dont touch any files\n"); | |
− | + | fprintf(stderr, " in it. comparedir is usually the SOURCEDIR from\n"); | |
− | + | fprintf(stderr, " a prior copy/sync stage.\n"); | |
− | + | fprintf(stderr, " --purge : WARNING: this deletes files older than the\n"); | |
− | + | fprintf(stderr, " retain period -OR- if retain is not specified\n"); | |
− | + | fprintf(stderr, " --comparedir is required. The comparedir is\n"); | |
− | + | fprintf(stderr, " compared against the specified dir and any files\n"); | |
− | + | fprintf(stderr, " not found in the comparedir are purged.\n"); | |
− | + | fprintf(stderr, " --force : NOTE: default is a *dry-run* for purge, you must\n"); | |
− | + | fprintf(stderr, " specify --force option to actually purge files\n"); | |
− | + | fprintf(stderr, " --atime : keep if access time within retain period\n"); | |
− | + | fprintf(stderr, " --retain : keep if file ctime or mtime within retention period\n"); | |
− | + | fprintf(stderr, " specified in days. eg: --retain=60\n"); | |
− | + | fprintf(stderr, "\n"); | |
+ | } | ||
+ | |||
+ | int excludeDir(const char *dirName) | ||
+ | { | ||
+ | if(excludeDirCount == 0) | ||
+ | return 0; | ||
+ | int i = 0; | ||
+ | for(i = 0; i < excludeDirCount; i++) | ||
+ | { | ||
+ | if(!strcmp(excludeDirs[i], dirName)) | ||
+ | return 1; | ||
+ | } | ||
+ | return 0; | ||
} | } | ||
Line 241: | Line 292: | ||
time(&now); | time(&now); | ||
size_t elapsed = (size_t)difftime(now, currenttime); | size_t elapsed = (size_t)difftime(now, currenttime); | ||
− | size_t hours = elapsed/3600; | + | size_t hours = elapsed / 3600; |
− | size_t minutes = (elapsed%3600)/60; | + | size_t minutes = (elapsed % 3600) / 60; |
− | size_t seconds = elapsed%60; | + | size_t seconds = elapsed % 60; |
FILE *statusFp = fopen(statusfile, "w"); | FILE *statusFp = fopen(statusfile, "w"); | ||
if(statusFp != NULL) | if(statusFp != NULL) | ||
{ | { | ||
fprintf(statusFp, " status: %s\n", message); | fprintf(statusFp, " status: %s\n", message); | ||
− | fprintf(statusFp, " scanned: %lu\n", | + | fprintf(statusFp, " scanned: %lu\n", report.scanned); |
− | fprintf(statusFp, " copied: %lu\n", | + | fprintf(statusFp, " copied: %lu\n", report.copied); |
− | fprintf(statusFp, " modified: %lu\n", | + | fprintf(statusFp, " modified: %lu\n", report.modified); |
− | fprintf(statusFp, " created: %lu\n", | + | fprintf(statusFp, " created: %lu\n", report.created); |
− | fprintf(statusFp, " skipped: %lu\n", | + | fprintf(statusFp, " skipped: %lu\n", report.skipped); |
− | fprintf(statusFp, " deleted: %lu\n", | + | fprintf(statusFp, " deleted: %lu\n", report.deleted); |
− | fprintf(statusFp, " errors: %lu\n", | + | fprintf(statusFp, " errors: %lu\n", report.error); |
fprintf(statusFp, " elapsed: %02lu:%02lu:%02lu\n", hours, minutes, seconds); | fprintf(statusFp, " elapsed: %02lu:%02lu:%02lu\n", hours, minutes, seconds); | ||
fclose(statusFp); | fclose(statusFp); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | void printDot() | ||
+ | { | ||
+ | if(DOTS) | ||
+ | { | ||
+ | if(report.scanned % 1000 == 0) | ||
+ | printf("%ld", report.scanned); | ||
+ | else if(report.scanned % 100 == 0) | ||
+ | printf("."); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | void updateRunningStatus() | ||
+ | { | ||
+ | if(STATUS && (report.scanned % 256 == 0)) | ||
+ | { | ||
+ | time_t now; | ||
+ | time(&now); | ||
+ | size_t elapsed = (size_t)difftime(now, laststatustime); | ||
+ | if(elapsed >= 10) | ||
+ | { | ||
+ | laststatustime = now; | ||
+ | updateStatus("running"); | ||
+ | } | ||
} | } | ||
} | } | ||
void | void | ||
− | printTotals( ) | + | printTotals() |
{ | { | ||
− | + | if(STATUS) | |
− | + | { | |
− | + | updateStatus("completed"); | |
− | + | } | |
− | + | if(DU_MODE) | |
− | + | { | |
− | + | fprintf(stderr, "TOTAL: %llu\n", DU_TOTAL); | |
− | + | } | |
− | + | if(!SHOWTOTALS) | |
− | + | return; | |
− | + | time_t now; | |
− | + | time(&now); | |
− | + | size_t elapsed = (size_t)difftime(now, currenttime); | |
− | + | size_t hours = elapsed / 3600; | |
− | + | size_t minutes = (elapsed % 3600) / 60; | |
− | + | size_t seconds = elapsed % 60; | |
− | + | fprintf(stderr, "\n\n%12s %12s %12s %12s %12s %12s %12s %12s\n", "Scanned", "Copied", "Modified", "Created", "Deleted", "Skipped", "Errors", "Elapsed (HH:MM:SS)"); | |
− | + | fprintf(stderr, "%12lu %12lu %12lu %12lu %12lu %12lu %12lu %02lu:%02lu:%02lu\n", report.scanned, report.copied, report.modified, report.created, report.deleted, report.skipped, report.error, hours, minutes, seconds); | |
} | } | ||
− | void | + | void |
getDirPath(char *dirPath, char *fullPath, size_t bufferLen) | getDirPath(char *dirPath, char *fullPath, size_t bufferLen) | ||
{ | { | ||
− | strncpy(dirPath, fullPath, FILENAME_MAX+FILENAME_MAX); | + | strncpy(dirPath, fullPath, FILENAME_MAX + FILENAME_MAX); |
− | size_t pos = strlen(dirPath)-1; | + | size_t pos = strlen(dirPath) - 1; |
while(pos > 0 && dirPath[pos] != '/') | while(pos > 0 && dirPath[pos] != '/') | ||
{ | { | ||
− | dirPath[pos]=0; | + | dirPath[pos] = 0; |
pos--; | pos--; | ||
} | } | ||
Line 298: | Line 375: | ||
void | void | ||
− | logPath(char *targetPath) | + | logPath(char *targetPath, char *mode) |
{ | { | ||
if(!LOG || (logfile[0] == 0)) | if(!LOG || (logfile[0] == 0)) | ||
return; | return; | ||
− | + | pthread_mutex_lock(&mutexLog); | |
FILE *fp = fopen(logfile, "a"); | FILE *fp = fopen(logfile, "a"); | ||
if(fp != NULL) | if(fp != NULL) | ||
{ | { | ||
− | fprintf(fp, "%s\n", targetPath); | + | if(targetPath != NULL) |
+ | { | ||
+ | if(mode != NULL && strlen(mode)) | ||
+ | fprintf(fp, "%s: %s\n", mode, targetPath); | ||
+ | else | ||
+ | fprintf(fp, "%s\n", targetPath); | ||
+ | } | ||
fclose(fp); | fclose(fp); | ||
} | } | ||
+ | pthread_mutex_unlock(&mutexLog); | ||
} | } | ||
Line 314: | Line 398: | ||
dirContainsFiles(char *targetPath) | dirContainsFiles(char *targetPath) | ||
{ | { | ||
− | + | struct dirent *ep = NULL; | |
− | + | DIR *dp = opendir(targetPath); | |
− | + | if(dp != NULL) | |
− | + | ||
− | + | ||
{ | { | ||
− | if(ep->d_name[0] == '.') | + | while(ep = readdir(dp)) |
− | + | { | |
− | + | if(ep->d_name[0] == '.') | |
+ | continue; | ||
+ | //fprintf(stderr, "INFO: directory entry '%s'", ep->d_name); | ||
+ | closedir(dp); | ||
+ | return 1; | ||
+ | } | ||
closedir(dp); | closedir(dp); | ||
− | |||
} | } | ||
− | + | return 0; | |
− | + | ||
− | + | ||
} | } | ||
− | + | void logLine(struct stat *f, const char *sourcePath, const char *exten, const long fileCnt, const long dirSz) | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
{ | { | ||
− | + | if(!DOTS && !QUIET) | |
− | + | { | |
− | + | char outFile[FILENAME_MAX + FILENAME_MAX]; | |
− | + | sprintf(outFile, "\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"\n", | |
− | + | (long)f->st_ino, sourcePath, (exten) ? exten : "", (long)f->st_uid, | |
+ | (long)f->st_gid, (long)f->st_size, (long)f->st_blocks, (int)f->st_mode, | ||
+ | (long)f->st_atime, (long)f->st_mtime, (long)f->st_ctime, fileCnt, dirSz); | ||
+ | fputs(outFile, stdout); | ||
+ | } | ||
+ | } | ||
− | + | void logDirLine(struct stat *f, const char *sourcePath) | |
− | + | { | |
− | + | if(!DOTS && !QUIET && DEBUG_MODE) | |
+ | { | ||
+ | char outFile[FILENAME_MAX + FILENAME_MAX]; | ||
+ | sprintf(outFile, "INFO: CHECKING/CREATING DIR: \"%ld\",\"%s\"\n", (long)f->st_ino, sourcePath); | ||
+ | fputs(outFile, stdout); | ||
+ | } | ||
+ | } | ||
− | + | void logCompareLine(const char *sourcePath, const char *targetPath) | |
− | + | { | |
− | + | char outFile[FILENAME_MAX + FILENAME_MAX]; | |
− | + | sprintf(outFile, "INFO: COMPARING [%s] [%s]", sourcePath, targetPath); | |
− | + | fputs(outFile, stdout); | |
− | + | } | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | int purgeFile(struct stat *f, char *sourcePath, char *sourcePathRaw, char *targetPath, char *targetPathRaw) | |
− | + | { | |
− | + | if(S_ISDIR(f->st_mode)) | |
− | + | { | |
− | + | if(DEBUG_MODE) printf("INFO: Purge skipping DIR: '%s'\n", sourcePath); | |
− | + | incrementSkipped(); | |
+ | return -1; | ||
+ | } | ||
+ | if(!RETAIN && COMPAREDIR) | ||
+ | { | ||
+ | if(DEBUG_MODE) | ||
+ | printf("comparing: \n\t%s :: %s\n\t%s :: %s\n", sourcePath, targetPath, sourcePathRaw, targetPathRaw); | ||
+ | struct stat statTarget = { 0 }; | ||
+ | if(stat(targetPathRaw, &statTarget) == -1 && S_ISREG(f->st_mode)) | ||
+ | { | ||
+ | incrementDeleted(); | ||
+ | if(DEBUG_MODE) | ||
+ | printf("INFO: PURGE [%s] : %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", sourcePath); | ||
+ | if(!DRYRUN && FORCE) | ||
+ | { | ||
+ | opType = OP_DELETE; | ||
+ | strncpy(opCommand, sourcePathRaw, FILENAME_MAX + FILENAME_MAX); | ||
+ | if(DOTS) | ||
+ | printf("!"); | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | incrementSkipped(); | ||
+ | return 0; | ||
+ | } | ||
− | + | if(RETAIN && (currenttime > f->st_ctime) && ( | |
− | + | (!CMP_CTIME || (difftime(currenttime, f->st_ctime) > RETAIN)) && | |
− | + | (!CMP_MTIME || (difftime(currenttime, f->st_mtime) > RETAIN)) && | |
− | + | (!CMP_ATIME || (difftime(currenttime, f->st_atime) > RETAIN)))) | |
− | + | { | |
− | + | incrementDeleted(); | |
− | + | if(DEBUG_MODE) | |
− | + | { | |
+ | printf("INFO: PURGE [%s] (%lu days old): %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath); | ||
+ | } | ||
+ | if(!DRYRUN && FORCE) | ||
+ | { | ||
+ | //we want to just save the path so we can do the purge outside of this criticial section | ||
+ | //this way our delete is parallelized | ||
+ | strncpy(opCommand, sourcePath, FILENAME_MAX + FILENAME_MAX); | ||
+ | opType = OP_DELETE; | ||
+ | if(DOTS) | ||
+ | printf("!"); | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | incrementSkipped(); | ||
+ | return 0; | ||
+ | } | ||
− | + | void getProcessedSourcePath(const char *fname, char *sourcePath) | |
− | + | { | |
− | + | const char *s = fname; | |
− | + | char *t; | |
− | + | t = sourcePath; | |
− | + | while(*s) { | |
− | + | if(*s == '"') | |
− | + | *t++ = '\\'; | |
− | + | if(*s == '$') | |
− | + | *t++ = '\\'; | |
− | + | *t++ = *s++; | |
+ | } | ||
+ | *t++ = *s++; | ||
+ | } | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | void getTargetPathFromSource(const char *sourcePath, char *targetPath) | |
+ | { | ||
+ | if(TARGETDIR) | ||
+ | { | ||
+ | //Add a trailing / to the base path if it's not there. | ||
+ | size_t targetdirLen = strlen(targetdir); | ||
+ | if((targetdirLen + 1 < FILENAME_MAX + FILENAME_MAX) && targetdir[targetdirLen - 1] != '/') | ||
+ | { | ||
+ | targetdir[targetdirLen] = '/'; | ||
+ | targetdir[targetdirLen + 1] = 0; | ||
+ | } | ||
+ | size_t offset = rootdirlen + 1; | ||
+ | strncpy(targetPath, targetdir, FILENAME_MAX + FILENAME_MAX); | ||
+ | strncat(targetPath, &sourcePath[offset], FILENAME_MAX + FILENAME_MAX); | ||
+ | } | ||
+ | else if(COMPAREDIR) | ||
+ | { | ||
+ | //Add a trailing / to the base path if it's not there. | ||
+ | size_t comparedirLen = strlen(comparedir); | ||
+ | if((comparedirLen + 1 < FILENAME_MAX + FILENAME_MAX) && comparedir[comparedirLen - 1] != '/') | ||
+ | { | ||
+ | comparedir[comparedirLen] = '/'; | ||
+ | comparedir[comparedirLen + 1] = 0; | ||
+ | } | ||
+ | size_t offset = rootdirlen + 1; | ||
+ | strncpy(targetPath, comparedir, FILENAME_MAX + FILENAME_MAX); | ||
+ | strncat(targetPath, &sourcePath[offset], FILENAME_MAX + FILENAME_MAX); | ||
+ | } | ||
+ | //else { } //standard PWALK | ||
+ | } | ||
− | + | int makeTargetDirPath(const char *fname) | |
− | + | { | |
− | + | char sourcePath[FILENAME_MAX + FILENAME_MAX]; | |
− | + | char targetPath[FILENAME_MAX + FILENAME_MAX]; | |
− | + | char targetPathRaw[FILENAME_MAX + FILENAME_MAX]; | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | getProcessedSourcePath(fname, sourcePath); | |
− | + | getTargetPathFromSource(fname, targetPathRaw); | |
+ | getTargetPathFromSource(sourcePath, targetPath); | ||
− | + | struct stat statSource = { 0 }; | |
− | + | stat(fname, &statSource); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | struct stat statTarget = { 0 }; | |
− | + | stat(targetPathRaw, &statTarget); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | if(!S_ISDIR(statSource.st_mode)) | |
− | + | { | |
− | + | fprintf(stderr, "ERR: source '%s' is not a directory.", sourcePath); | |
+ | exit(-1); | ||
+ | } | ||
− | + | logDirLine(&statTarget, targetPath); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | if(!S_ISDIR(statTarget.st_mode)) | |
− | + | { | |
− | + | incrementCreated(); | |
− | + | incrementCopied(); | |
+ | char command[FILENAME_MAX + FILENAME_MAX]; | ||
+ | command[0] = '\0'; | ||
+ | snprintf(command, FILENAME_MAX + FILENAME_MAX, "mkdir \"%s\" %s", targetPath, errFile); | ||
− | + | if(DEBUG_MODE) | |
− | + | printf("INFO: making DIR: %s\n", command); | |
+ | if(!DRYRUN) | ||
+ | { | ||
+ | system(command); | ||
+ | struct utimbuf times; | ||
+ | times.actime = statSource.st_atime; | ||
+ | times.modtime = statSource.st_mtime; | ||
+ | utime(targetPath, ×); | ||
+ | } | ||
− | + | //preserve the permissions for the directories | |
− | + | if(!NODIRPERMS) | |
− | + | { | |
− | + | if(DEBUG_MODE) | |
− | + | printf("INFO: updating DIR perms via rsync: %s\n", command); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | snprintf(command, FILENAME_MAX + FILENAME_MAX, "rsync -ptgo -A -X -d --no-recursive --exclude=* \"%s/\" \"%s\" %s", sourcePath, targetPath, errFile); | |
− | + | if(!DRYRUN) | |
− | + | system(command); | |
− | + | /* | |
− | + | //NOTE: This old technique doesn't copy the extended attributes of the directory | |
− | + | snprintf(command, FILENAME_MAX + FILENAME_MAX, "chown --reference=\"%s\" \"%s\" %s", sourcePath, targetPath, errFile); | |
− | + | if(!DRYRUN) | |
− | + | system(command); | |
− | + | snprintf(command, FILENAME_MAX + FILENAME_MAX, "chmod --reference=\"%s\" \"%s\" %s", sourcePath, targetPath, errFile); | |
− | + | if(!DRYRUN) | |
+ | system(command); | ||
+ | */ | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | if(DEBUG_MODE) | ||
+ | printf("INFO: verified DIR exists: %s\n", fname); | ||
+ | } | ||
+ | return 0; | ||
+ | } | ||
− | + | /* | |
− | + | * this needs to be in a crital secion (and it is!) | |
− | + | */ | |
− | + | void | |
− | + | printStat(char *fname, char *exten, struct stat *f, long fileCnt, long dirSz) | |
− | + | { | |
− | + | char sourcePath[FILENAME_MAX + FILENAME_MAX]; | |
− | + | char sourcePathDir[FILENAME_MAX + FILENAME_MAX]; | |
− | + | char targetPath[FILENAME_MAX + FILENAME_MAX]; | |
− | + | char targetPathDir[FILENAME_MAX + FILENAME_MAX]; | |
− | + | char targetPathRaw[FILENAME_MAX + FILENAME_MAX]; | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | getProcessedSourcePath(fname, sourcePath); | |
− | + | getTargetPathFromSource(fname, targetPathRaw); | |
− | + | getTargetPathFromSource(sourcePath, targetPath); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | opType = OP_NONE; | |
− | + | logLine(f, sourcePath, exten, fileCnt, dirSz); | |
− | + | updateRunningStatus(); | |
− | + | printDot(); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | incrementScanned(); | |
− | + | ||
− | + | ||
− | + | struct stat statSource = { 0 }; | |
− | + | stat(fname, &statSource); | |
− | + | if(S_ISDIR(statSource.st_mode)) | |
− | + | { | |
− | + | return; | |
− | + | } | |
− | + | if(PURGE) | |
− | + | { | |
− | + | purgeFile(f, sourcePath, fname, targetPath, targetPathRaw); | |
− | + | return; | |
− | + | } | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | //If a target dir was specified then we need to do file copy operations | |
− | + | if(TARGETDIR) | |
− | + | { | |
− | + | struct stat statTarget = { 0 }; | |
− | + | char doCopy = 0; | |
− | + | char doCompare = 0; | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | if(stat(targetPathRaw, &statTarget) == -1 || MOVE_MODE) | |
− | + | { | |
− | + | if(DEBUG_MODE)fprintf(stderr, "INFO: File '%s' doesn't exist at target, doing copy '%s'\n", sourcePath, targetPath); | |
− | + | doCopy = 1; | |
− | + | } | |
− | + | else if(S_ISDIR(f->st_mode)) | |
− | + | { | |
− | + | if(DEBUG_MODE)fprintf(stderr, "ERR: Target is a directory '%s'\n", targetPath); | |
− | + | incrementError(); | |
+ | return; | ||
+ | } | ||
+ | else if(!S_ISREG(f->st_mode)) | ||
+ | { | ||
+ | if(DEBUG_MODE)fprintf(stderr, "ERR: Target not regular file '%s'\n", targetPath); | ||
+ | incrementError(); | ||
+ | return; | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | //printf("target exists '%s'", targetPath); | ||
+ | doCompare = 1; | ||
+ | } | ||
− | + | if(doCompare) | |
− | + | { | |
− | + | if(DEBUG_MODE) | |
− | + | { | |
− | + | logCompareLine(sourcePath, targetPath); | |
− | + | } | |
− | + | if(MOVE_MODE) | |
− | + | { | |
− | + | if(DEBUG_MODE) printf("INFO: File has MOVE MODE, copying\n"); | |
− | + | doCopy = 1; | |
− | + | } | |
− | + | else if(f->st_mode != f->st_mode) | |
− | + | { | |
− | + | if(DEBUG_MODE) printf("INFO: File has MODE CHANGE, copying\n"); | |
− | + | doCopy = 1; | |
− | + | } | |
− | + | else if(f->st_size != f->st_size) | |
− | + | { | |
− | + | if(DEBUG_MODE) printf("INFO: File has SIZE CHANGE, copying\n"); | |
− | + | doCopy = 1; | |
+ | } | ||
+ | // the target file will always have a different create time so we don't | ||
+ | // want to use == for ctime compare, rather we just want to see if the source | ||
+ | // was created after the target, if so, need to recopy | ||
+ | else if(CMP_CTIME && (f->st_ctime < f->st_ctime)) | ||
+ | { | ||
+ | if(DEBUG_MODE) printf("INFO: CTIME on target is older than source, copying\n"); | ||
+ | doCopy = 1; | ||
+ | } | ||
+ | else if(CMP_MTIME && (f->st_mtime != f->st_mtime)) | ||
+ | { | ||
+ | if(DEBUG_MODE) printf("INFO: File has MTIME CHANGE, copying\n"); | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | if(DEBUG_MODE) printf("INFO: Detected no file changes, copy not required.\n"); | ||
+ | } | ||
+ | if(!doCopy && CMP_ATIME && (f->st_atime != f->st_atime)) | ||
+ | { | ||
+ | if(DEBUG_MODE) printf("INFO: File has only ATIME CHANGE, updating timestamps\n"); | ||
+ | struct utimbuf times; | ||
+ | times.actime = f->st_atime; | ||
+ | times.modtime = f->st_mtime; | ||
+ | utime(targetPath, ×); | ||
+ | } | ||
+ | } | ||
− | + | if(doCopy) | |
− | + | { | |
− | + | if(RETAIN) | |
− | + | { | |
− | + | if((currenttime < f->st_ctime) || | |
− | + | (CMP_CTIME && (difftime(currenttime, f->st_ctime) < RETAIN)) || | |
− | + | (CMP_MTIME && (difftime(currenttime, f->st_mtime) < RETAIN)) || | |
− | + | (CMP_ATIME && (difftime(currenttime, f->st_atime) < RETAIN))) | |
− | + | { | |
− | + | if(DEBUG_MODE) | |
− | + | printf("INFO: INSIDE RETAIN (%lu days old): %s\n", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath); | |
− | + | } | |
− | + | else | |
− | + | { | |
− | + | if(DEBUG_MODE) | |
− | + | printf("INFO: OUTSIDE RETAIN (%lu days old), skipping: %s\n", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath); | |
− | + | incrementSkipped(); | |
− | + | return; | |
− | + | } | |
− | + | } | |
− | + | ||
− | + | if(doCompare) | |
− | + | incrementModified(); | |
− | + | else | |
+ | incrementCreated(); | ||
+ | char command[FILENAME_MAX + FILENAME_MAX]; | ||
+ | command[0] = '\0'; | ||
+ | if(MOVE_MODE) | ||
+ | snprintf(command, FILENAME_MAX + FILENAME_MAX, "mv \"%s\" \"%s\" %s", sourcePath, targetPath, errFile); | ||
+ | else | ||
+ | snprintf(command, FILENAME_MAX + FILENAME_MAX, "cp --no-dereference --preserve=all \"%s\" \"%s\" %s", sourcePath, targetPath, errFile); | ||
+ | if(DEBUG_MODE) | ||
+ | printf("INFO: [%s]\n", command); | ||
+ | if(!DRYRUN) | ||
+ | { | ||
+ | //we want to just save the command so we can do the copy outside of this criticial section | ||
+ | //this way our copy op is parallelized | ||
+ | incrementCopied(); | ||
+ | opType = OP_COPY; | ||
+ | strncpy(opCommand, command, FILENAME_MAX + FILENAME_MAX); | ||
+ | } | ||
+ | if(LOG) | ||
+ | logPath(targetPath, ""); | ||
+ | } | ||
+ | } | ||
} | } | ||
Line 688: | Line 785: | ||
unlink(doCommand); | unlink(doCommand); | ||
} | } | ||
+ | |||
+ | void cleanupThread(struct fileData *fd) | ||
+ | { | ||
+ | if((fd != NULL) && (fd->flag == 0)) { /* this instance of fileDir is a thread */ | ||
+ | pthread_mutex_lock(&mutexFD); | ||
+ | if((--ThreadCNT) == 0) | ||
+ | pthread_cond_signal(&condFD); | ||
+ | fd->THRDslot = -1; | ||
+ | pthread_mutex_unlock(&mutexFD); | ||
+ | pthread_exit(EXIT_SUCCESS); | ||
+ | } | ||
+ | } | ||
+ | |||
/* | /* | ||
* Open a directory and read the conents. Call stat with each | * Open a directory and read the conents. Call stat with each | ||
− | * file name. | + | * file name. |
* | * | ||
− | * Recursively call self for each sub dir. | + | * Recursively call self for each sub dir. |
* | * | ||
* print inode meta data for each file, one line per file in CSV format | * print inode meta data for each file, one line per file in CSV format | ||
*/ | */ | ||
void | void | ||
− | *fileDir( void *arg ) | + | *fileDir(void *arg) |
{ | { | ||
char *s, *t, *u, *dot, *end_dname; | char *s, *t, *u, *dot, *end_dname; | ||
− | char fname[FILENAME_MAX+1]; | + | char fname[FILENAME_MAX + 1]; |
int slot, id, found; | int slot, id, found; | ||
DIR *dirp; | DIR *dirp; | ||
− | long localCnt =0; /* number of files in a specific directory */ | + | long localCnt = 0; /* number of files in a specific directory */ |
− | unsigned long long localSz | + | unsigned long long localSz = 0; /* byte cnt of files in the local directory 2010.07 */ |
struct dirent *d; | struct dirent *d; | ||
struct stat f; | struct stat f; | ||
Line 711: | Line 821: | ||
fd = (struct fileData *) arg; | fd = (struct fileData *) arg; | ||
− | + | if(DEBUG_MODE) | |
− | printf( "Start %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname ); | + | printf("Start %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); |
− | + | ||
− | if ( (dirp = opendir( fd->dname )) == NULL ) { | + | if(fd == NULL) |
− | + | { | |
+ | fprintf(stderr, "ERR: invalid directory specified, skipping.\n"); | ||
+ | return NULL; | ||
+ | } | ||
+ | if((dirp = opendir(fd->dname)) == NULL) | ||
+ | { | ||
+ | fprintf(stderr, "ERR: unable to open directory '%s', skipping.\n", fd->dname); | ||
+ | cleanupThread(fd); | ||
+ | return NULL; | ||
} | } | ||
/* find the end of fs->name and put '/' at the end <end_dname> | /* find the end of fs->name and put '/' at the end <end_dname> | ||
Line 722: | Line 840: | ||
*s++ = '/'; | *s++ = '/'; | ||
end_dname = s; | end_dname = s; | ||
− | while ( (d = readdir( dirp )) != NULL ) { | + | while((d = readdir(dirp)) != NULL) { |
− | if ( strcmp(".",d->d_name) == 0 ) continue; | + | if(strcmp(".", d->d_name) == 0) |
− | if ( strcmp("..",d->d_name) == 0 ) continue; | + | continue; |
+ | if(strcmp("..", d->d_name) == 0) | ||
+ | continue; | ||
localCnt++; | localCnt++; | ||
s = d->d_name; | s = d->d_name; | ||
t = end_dname; | t = end_dname; | ||
− | while ( *s ) | + | while(*s) |
*t++ = *s++; | *t++ = *s++; | ||
− | *t = '\0'; | + | *t = '\0'; |
− | if ( lstat ( fd->dname, &f ) == -1 ) { | + | if(lstat(fd->dname, &f) == -1) { |
− | fprintf( stderr, " | + | fprintf(stderr, "ERR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); |
continue; | continue; | ||
− | } | + | } |
+ | if(!S_ISDIR(f.st_mode) && !S_ISREG(f.st_mode)) | ||
+ | { | ||
+ | fprintf(stderr, "ERR: skipping non-dir, non-reg file: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); | ||
+ | if(LOG && DEBUG_MODE) | ||
+ | logPath(fd->dname, "NOTREG"); | ||
+ | continue; | ||
+ | } | ||
/* Follow Sub dirs recursivly but don't follow links */ | /* Follow Sub dirs recursivly but don't follow links */ | ||
localSz += f.st_size; | localSz += f.st_size; | ||
DU_TOTAL += f.st_size; | DU_TOTAL += f.st_size; | ||
− | if ( S_ISDIR(f.st_mode) ) { | + | if(S_ISDIR(f.st_mode)) |
− | if ( | + | { |
− | + | if(TARGETDIR) | |
− | + | { | |
+ | makeTargetDirPath(fd->dname); | ||
} | } | ||
− | pthread_mutex_lock (&mutexFD); | + | |
− | if ( ThreadCNT < MAXTHRDS ) { | + | if(EXCLUDE_DIRS > 0 && excludeDir(d->d_name)) { |
+ | if(DEBUG_MODE) fprintf(stderr, "INFO: skipping directory: %s\n", d->d_name); | ||
+ | continue; | ||
+ | } | ||
+ | |||
+ | pthread_mutex_lock(&mutexFD); | ||
+ | if(ThreadCNT < MAXTHRDS) { | ||
ThreadCNT++; | ThreadCNT++; | ||
id = totalTHRDS++; | id = totalTHRDS++; | ||
− | slot =0; found = -1; | + | slot = 0; found = -1; |
− | while ( slot < MAXTHRDS ) { | + | while(slot < MAXTHRDS) { |
− | if ( fdslot[slot].THRDslot == -1 ) { | + | if(fdslot[slot].THRDslot == -1) { |
found = slot; | found = slot; | ||
break; | break; | ||
Line 755: | Line 889: | ||
slot++; | slot++; | ||
} | } | ||
− | if ( found == -1 ) | + | if(found == -1) |
− | + | fprintf(stderr, "ERR: SlotE %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, "no available threads"); | |
else | else | ||
− | + | fdslot[slot].THRDslot = slot; | |
− | } else | + | } |
+ | else | ||
slot = -1; | slot = -1; | ||
− | pthread_mutex_unlock (&mutexFD); | + | pthread_mutex_unlock(&mutexFD); |
− | if ( slot != -1 ) { | + | if(slot != -1) { |
− | strcpy( fdslot[slot].dname, (const char*)fd->dname ); | + | strcpy(fdslot[slot].dname, (const char*)fd->dname); |
fdslot[slot].THRDid = id; | fdslot[slot].THRDid = id; | ||
fdslot[slot].flag = 0; | fdslot[slot].flag = 0; | ||
− | pthread_create( &fdslot[slot].thread_id, &fdslot[0].tattr, | + | if(pthread_create(&fdslot[slot].thread_id, &fdslot[0].tattr, |
− | + | fileDir, (void*)&fdslot[slot])) { | |
− | } else { | + | /* thread create failed, undo slot */ |
− | strcpy( local.dname, (const char*)fd->dname ); | + | pthread_mutex_lock(&mutexFD); |
+ | /* this won't go to zero here, so no need cond_signal */ | ||
+ | ThreadCNT--; | ||
+ | fdslot[slot].THRDslot = -1; | ||
+ | pthread_mutex_unlock(&mutexFD); | ||
+ | goto local_call; | ||
+ | } | ||
+ | } | ||
+ | else { | ||
+ | local_call: | ||
+ | strcpy(local.dname, (const char*)fd->dname); | ||
local.THRDslot = fd->THRDslot; | local.THRDslot = fd->THRDslot; | ||
local.THRDid = fd->THRDid; | local.THRDid = fd->THRDid; | ||
local.flag = fd->flag + 1; | local.flag = fd->flag + 1; | ||
− | fileDir( (void*) &local ); | + | fileDir((void*)&local); |
+ | } | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | s = end_dname + 1; dot = NULL; | ||
+ | while(*s) | ||
+ | { | ||
+ | if(*s == '.') | ||
+ | dot = s + 1; | ||
+ | s++; | ||
} | } | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | pthread_mutex_lock(&mutexPrintStat); | |
− | + | printStat(fd->dname, dot, &f, (long)0, (long)0); | |
− | + | operationType_t doOperation = opType; | |
− | + | opType = OP_NONE; | |
− | + | char doCommand[FILENAME_MAX + FILENAME_MAX + 1]; | |
− | + | strncpy(doCommand, opCommand, FILENAME_MAX + FILENAME_MAX); | |
− | + | pthread_mutex_unlock(&mutexPrintStat); | |
− | + | execOperation(doOperation, doCommand); | |
} | } | ||
} | } | ||
− | closedir( dirp ); | + | closedir(dirp); |
*--end_dname = '\0'; | *--end_dname = '\0'; | ||
#ifdef THRD_DEBUG | #ifdef THRD_DEBUG | ||
− | printf( "Ending %2d%5d %2d<%s>\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname ); | + | printf("Ending %2d%5d %2d<%s>\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); |
#endif /* THRD_DEBUG */ | #endif /* THRD_DEBUG */ | ||
− | if ( lstat ( fd->dname, &f ) == -1 ) { | + | if(lstat(fd->dname, &f) == -1) { |
− | fprintf( stderr, " | + | fprintf(stderr, "ERR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); |
} | } | ||
else { | else { | ||
s = end_dname - 1; dot = NULL; | s = end_dname - 1; dot = NULL; | ||
− | while ( *s != '/' && (s != fd->dname)) | + | while(*s != '/' && (s != fd->dname)) |
{ | { | ||
− | if (*s == '.') { dot = s+1; break; } | + | if(*s == '.') { dot = s + 1; break; } |
− | s--; | + | s--; |
} | } | ||
− | if ( s+1 == dot ) /* Dot file is not an extension Exp: /.bashrc */ | + | if(s + 1 == dot) /* Dot file is not an extension Exp: /.bashrc */ |
dot = NULL; | dot = NULL; | ||
− | pthread_mutex_lock (&mutexPrintStat); | + | pthread_mutex_lock(&mutexPrintStat); |
− | printStat( fd->dname, dot, &f, localCnt, localSz ); | + | printStat(fd->dname, dot, &f, localCnt, localSz); |
− | + | operationType_t doOperation = opType; | |
− | + | opType = OP_NONE; | |
− | + | char doCommand[FILENAME_MAX + FILENAME_MAX + 1]; | |
− | + | strncpy(doCommand, opCommand, FILENAME_MAX + FILENAME_MAX); | |
− | pthread_mutex_unlock (&mutexPrintStat); | + | pthread_mutex_unlock(&mutexPrintStat); |
execOperation(doOperation, doCommand); | execOperation(doOperation, doCommand); | ||
} | } | ||
− | + | cleanupThread(fd); | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
/* return ; */ | /* return ; */ | ||
} | } | ||
− | |||
int | int | ||
− | main( int argc, char* argv[] ) | + | main(int argc, char* argv[]) |
{ | { | ||
int error, i; | int error, i; | ||
char *s, *c; | char *s, *c; | ||
void *status; | void *status; | ||
− | if ( argc < 2 || argv == NULL) | + | if(argc < 2 || argv == NULL) |
{ | { | ||
− | printHelp( ); | + | printHelp(); |
− | exit( EXIT_FAILURE ); | + | exit(EXIT_FAILURE); |
} | } | ||
argc--; argv++; | argc--; argv++; | ||
while(argc > 1) | while(argc > 1) | ||
{ | { | ||
− | if (!strncmp( *argv, "--maxthreads=", 13 )) | + | if(!strncmp(*argv, "--maxthreads=", 13)) |
{ | { | ||
− | + | MAXTHRDS = (size_t)atoi(&((*argv)[strlen("--maxthreads=")])); | |
− | + | if(MAXTHRDS > 64) MAXTHRDS = 64; | |
− | + | if(MAXTHRDS <= 2) MAXTHRDS = 2; | |
− | + | if(DEBUG_MODE) printf("INFO: MAXTHRDS='%lu'\n", MAXTHRDS); | |
} | } | ||
− | else if (!strncmp( *argv, "--retain=", 9 )) | + | else if(!strncmp(*argv, "--retain=", 9)) |
{ | { | ||
− | + | RETAIN = (size_t)atoi(&((*argv)[strlen("--retain=")])) * 24 * 3600; | |
− | + | if(DEBUG_MODE) printf("INFO: RETAIN='%lu'\n", RETAIN); | |
} | } | ||
− | else if ( !strcmp( *argv, "--move" ) ) | + | else if(!strcmp(*argv, "--move")) |
− | + | MOVE_MODE = 1; | |
− | else if ( !strcmp( *argv, "--debug" ) ) | + | else if(!strcmp(*argv, "--debug")) |
− | + | DEBUG_MODE = 1; | |
− | else if ( !strcmp( *argv, "--du" ) ) | + | else if(!strcmp(*argv, "--du")) |
{ | { | ||
− | + | DU_MODE = 1; | |
− | + | SHOWTOTALS = 0; | |
− | + | QUIET = 1; | |
} | } | ||
− | else if ( !strcmp( *argv, "--nototals" ) ) | + | else if(!strcmp(*argv, "--nototals")) |
− | + | SHOWTOTALS = 0; | |
− | else if ( !strcmp( *argv, "--noatime" ) ) | + | else if(!strcmp(*argv, "--noatime")) |
− | + | CMP_ATIME = 0; | |
− | else if ( !strcmp( *argv, "--atime" ) ) | + | else if(!strcmp(*argv, "--atime")) |
− | + | CMP_ATIME = 1; | |
− | else if ( !strcmp( *argv, "--nomtime" ) ) | + | else if(!strcmp(*argv, "--nomtime")) |
− | + | CMP_MTIME = 0; | |
− | else if ( !strcmp( *argv, "--mtime" ) ) | + | else if(!strcmp(*argv, "--mtime")) |
− | + | CMP_MTIME = 1; | |
− | else if ( !strcmp( *argv, "--noctime" ) ) | + | else if(!strcmp(*argv, "--noctime")) |
− | + | CMP_CTIME = 0; | |
− | else if ( !strcmp( *argv, "--ctime" ) ) | + | else if(!strcmp(*argv, "--ctime")) |
− | + | CMP_CTIME = 1; | |
− | else if ( ! | + | else if(!strncmp(*argv, "--excludedirs=", 14)) |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
{ | { | ||
− | + | memset(excludeDirData, 0, sizeof(excludeDirData)); | |
+ | strncpy(excludeDirData, &((*argv)[strlen("--excludedirs=")]), sizeof(excludeDirData)); | ||
+ | if(strlen(excludeDirData) == 0) | ||
+ | { | ||
+ | fprintf(stderr, "ERR: Invalid exclude file name.\n"); | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | char *token = strtok(excludeDirData, ","); | ||
+ | while(token != NULL && excludeDirCount < 1023) | ||
+ | { | ||
+ | excludeDirs[excludeDirCount] = strdup(token); | ||
+ | if(DEBUG_MODE) printf("INFO: Adding exclude dir '%s'\n", token); | ||
+ | excludeDirCount++; | ||
+ | EXCLUDE_DIRS++; | ||
+ | token = strtok(NULL, ","); | ||
+ | } | ||
+ | } | ||
} | } | ||
− | else if ( ! | + | else if(!strcmp(*argv, "--NoSnap") || !strcmp(*argv, "--nosnap")) |
{ | { | ||
− | + | excludeDirs[excludeDirCount] = strdup(".snapshots"); | |
− | + | if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n"); | |
− | + | excludeDirCount++; | |
− | + | EXCLUDE_DIRS++; | |
− | + | excludeDirs[excludeDirCount] = strdup(".qsnaps"); | |
− | + | if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n"); | |
− | + | excludeDirCount++; | |
− | + | EXCLUDE_DIRS++; | |
− | + | ||
} | } | ||
− | else if ( ! | + | else if(!strcmp(*argv, "--dryrun")) |
+ | DRYRUN = 1; | ||
+ | else if(!strcmp(*argv, "--force")) | ||
+ | FORCE = 1; | ||
+ | else if(!strcmp(*argv, "--nodirperms")) | ||
+ | NODIRPERMS = 1; | ||
+ | else if(!strcmp(*argv, "--purge")) | ||
{ | { | ||
− | + | PURGE = 1; | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
} | } | ||
− | else if ( !strncmp( *argv, "-- | + | else if(!strncmp(*argv, "--backuplog=", 12)) |
{ | { | ||
− | + | LOG = 1; | |
− | + | memset(logfile, 0, sizeof(logfile)); | |
− | + | strncpy(logfile, &((*argv)[strlen("--backuplog=")]), sizeof(logfile)); | |
− | + | if(strlen(logfile) == 0) | |
− | + | { | |
+ | fprintf(stderr, "ERR: Invalid log file name."); | ||
+ | LOG = 0; | ||
+ | } | ||
+ | if(DEBUG_MODE) printf("INFO: LOGFILE='%s'\n", logfile); | ||
} | } | ||
− | else if ( !strncmp( *argv, "-- | + | else if(!strncmp(*argv, "--status=", 9)) |
{ | { | ||
− | + | STATUS = 1; | |
− | + | memset(statusfile, 0, sizeof(statusfile)); | |
− | + | strncpy(statusfile, &((*argv)[strlen("--status=")]), sizeof(statusfile)); | |
− | + | if(strlen(statusfile) == 0) | |
− | + | { | |
− | + | fprintf(stderr, "ERR: Invalid status log file name."); | |
− | + | STATUS = 0; | |
− | + | } | |
− | + | if(DEBUG_MODE) printf("INFO: STATUS='%s'\n", statusfile); | |
+ | } | ||
+ | else if(!strncmp(*argv, "--targetdir=", 12)) | ||
+ | { | ||
+ | TARGETDIR = 1; | ||
+ | memset(targetdir, 0, sizeof(targetdir)); | ||
+ | strncpy(targetdir, &((*argv)[strlen("--targetdir=")]), sizeof(targetdir)); | ||
+ | if(DEBUG_MODE) | ||
+ | printf("INFO: TARGETDIR='%s'\n", targetdir); | ||
+ | } | ||
+ | else if(!strncmp(*argv, "--comparedir=", 13)) | ||
+ | { | ||
+ | COMPAREDIR = 1; | ||
+ | memset(comparedir, 0, sizeof(comparedir)); | ||
+ | strncpy(comparedir, &((*argv)[strlen("--comparedir=")]), sizeof(comparedir)); | ||
+ | if(DEBUG_MODE) printf("INFO: COMPAREDIR='%s'\n", comparedir); | ||
+ | if(!dirContainsFiles(comparedir)) | ||
+ | { | ||
+ | printf("ERR: Cannot run purge operation against a comparison directory with zero files '%s'\n", comparedir); | ||
+ | exit(EXIT_FAILURE); | ||
+ | } | ||
} | } | ||
− | else if ( !strcmp( *argv, "--dots" ) ) | + | else if(!strcmp(*argv, "--dots")) |
− | + | DOTS = 1; | |
− | else if ( !strcmp( *argv, "--quiet" ) ) | + | else if(!strcmp(*argv, "--quiet")) |
− | + | QUIET = 1; | |
− | argc--; argv++; | + | argc--; |
+ | argv++; | ||
} | } | ||
− | if (!strcmp( *argv, "--help" )) | + | if(!strcmp(*argv, "--help")) |
{ | { | ||
− | + | printHelp(); | |
− | + | exit(0); | |
} | } | ||
− | if (!strcmp( *argv, "--version" ) || !strcmp( *argv, "-v" )) | + | if(!strcmp(*argv, "--version") || !strcmp(*argv, "-v")) |
{ | { | ||
− | + | printVersion(); | |
− | + | exit(0); | |
} | } | ||
Line 959: | Line 1,130: | ||
fdslot = malloc(sizeof(struct fileData) * MAXTHRDS); | fdslot = malloc(sizeof(struct fileData) * MAXTHRDS); | ||
− | + | for(i = 0; i < MAXTHRDS; i++) | |
− | for ( i=0; i<MAXTHRDS; i++ ) | + | |
{ | { | ||
fdslot[i].THRDslot = -1; | fdslot[i].THRDslot = -1; | ||
− | if ( (error = pthread_attr_init( &fdslot[i].tattr )) ) | + | if((error = pthread_attr_init(&fdslot[i].tattr))) |
− | fprintf( stderr, "Failed to create pthread attr: %s\n", | + | fprintf(stderr, "Failed to create pthread attr: %s\n", |
− | + | strerror(error)); | |
− | else if ( (error = pthread_attr_setdetachstate( &fdslot[i].tattr, | + | else if((error = pthread_attr_setdetachstate(&fdslot[i].tattr, |
− | + | PTHREAD_CREATE_DETACHED) | |
− | + | )) | |
− | fprintf( stderr, "failed to set attribute detached: %s\n", | + | fprintf(stderr, "failed to set attribute detached: %s\n", |
− | + | strerror(error)); | |
} | } | ||
pthread_mutex_init(&mutexFD, NULL); | pthread_mutex_init(&mutexFD, NULL); | ||
− | strcpy( fdslot[0].dname, (const char*) *argv ); | + | pthread_cond_init(&condFD, NULL); |
+ | strcpy(fdslot[0].dname, (const char*)*argv); | ||
+ | struct stat statRoot = { 0 }; | ||
+ | if(stat(fdslot[0].dname, &statRoot) == -1 || !S_ISDIR(statRoot.st_mode)) | ||
+ | { | ||
+ | fprintf(stderr, "ERR: Invalid directory specification '%s'\n", fdslot[0].dname); | ||
+ | } | ||
+ | |||
rootdirlen = strlen(fdslot[0].dname); | rootdirlen = strlen(fdslot[0].dname); | ||
//Trim the trailing slash so we don't get // in the path | //Trim the trailing slash so we don't get // in the path | ||
− | if(fdslot[0].dname[strlen(fdslot[0].dname)-1] == '/') | + | if(fdslot[0].dname[strlen(fdslot[0].dname) - 1] == '/') |
{ | { | ||
− | fdslot[0].dname[strlen(fdslot[0].dname)-1] = '\0'; | + | fdslot[0].dname[strlen(fdslot[0].dname) - 1] = '\0'; |
rootdirlen--; | rootdirlen--; | ||
} | } | ||
Line 984: | Line 1,161: | ||
fdslot[0].THRDid = totalTHRDS++; | fdslot[0].THRDid = totalTHRDS++; | ||
fdslot[0].flag = 0; | fdslot[0].flag = 0; | ||
− | pthread_create( &(fdslot[0].thread_id), &fdslot[0].tattr, fileDir, | + | |
− | + | if(TARGETDIR) | |
+ | { | ||
+ | makeTargetDirPath(fdslot[0].dname); | ||
+ | } | ||
+ | |||
+ | if((error = pthread_create(&(fdslot[0].thread_id), &fdslot[0].tattr, fileDir, | ||
+ | (void*)&fdslot[0]))) { | ||
+ | fprintf(stderr, "failed to start a thread: %s\n", strerror(error)); | ||
+ | exit(error); | ||
+ | } | ||
if(SHOWTOTALS || STATUS || DU_MODE) | if(SHOWTOTALS || STATUS || DU_MODE) | ||
atexit(printTotals); | atexit(printTotals); | ||
− | pthread_exit( NULL ); | + | |
+ | pthread_mutex_lock(&mutexFD); | ||
+ | if(ThreadCNT > 0) | ||
+ | pthread_cond_wait(&condFD, &mutexFD); | ||
+ | pthread_mutex_unlock(&mutexFD); | ||
+ | pthread_exit(NULL); | ||
} | } | ||
</pre> | </pre> |
Latest revision as of 23:16, 25 October 2016
/* * pwalk.c Parrallel Walk a file system and report file meta data Copyright (C) 2013 John F Dey Copyright (C) 2013-2016 OSNEXUS Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * pwalk is inspired by du but designed to be used with large * file systems ( > 10 million files ) * * History: dir.c, walk.c, walkv2,3,4, pwalkfs.c * * Example of using the directory call, opendir and readdir to * simulate ls. * * 1997.03.20 John Dey Although this is the first documented date for * this file I have versions that date from 1988. * 2002.09.04 John Dey walk the directory and gather stats * 2002.09.06 John Dey make to look like du -a * 2004.07.06 John Dey add -a and -k * 2008.04.01 John Dey CSV output for database use * 2009.04.12 John Dey v3.1 * replaced constants with "FILENAME_MAX", * Size of directory is size of all files in directory plus itself * Added printStat function * print file count on line with direcories * 2009.05.18 check for control charaters and double qutoes in file names; * escape the double quotes and print bad file names to stderr * 2009.12.30 size for dir should just be dir size; Fix; count returns 0 * for normal files and count of just the local directory; Previously count * return the recursive file count for the whole tree. * 2010.01.08 john dey; New field to output: file name extension. Extension is defined as the last part of the name after a Dot "." if no dot is found extension is empty "" new feature: accepts multible dirctory names as cmd line argument This line of code has been replaced if ( f.st_mode & S_IFDIR && (f.st_mode & S_IFMT != S_IFLNK) ) { With this new line of code: if ( S_ISDIR(f.st_mode) ) { Or I could have done: if ( (f.st_mode & S_IFDIR) == S_IFDIR ) 2010.01.11 John Dey Complete re-write of walkv4 transforming it into pwalk. pwalk is a threaded version of walkv4. pwalk will call fileDir as a new thread until MAXTHRDS is reached. 2010.02.01 pwalk v1 did not detach nor did it join the theads; v2 fixes this short comming; 2010.03.24 john dey; New physical hardware is available to run pwalk. 16 threads are only using about 20% CPU with 10% IO wait. Based on this the thread count will be doubled to 32. 2010.11.29 Add mutex for printStat 2012.10.09 --NoSnap flag added. ignore directories that have the name .snapshot 2013.08.02 john f dey; Add GNU license, --version flag added 2013.10.15 steve umbehocker; added verbose --debug spam mode added compare and copy capabilities added retain= option to backup only recently modified files added cusomizable maxthreads added summary total display at end (disable with --nototals) updated and expanded usage section fixed segfault in walker 2013.10.16 steve umbehocker; added --log mode logging added purge mode hooks added dryrun/force flags 2013.10.21 steve umbehocker; added --status logging added purge w/ retain 2013.10.22 steve umbehocker; added check so purge cannot compare against emty dir added execute operation logic to do cp/unlink operations outside of the mutex so that they're parallelized fixed sync/copy to create empty leaf directories 2014.11.21 steve umbehocker; added fix for files with spaces and special chars 2015.06.12 steve umbehocker; added fix for --help and --version 2015.06.18 osnexus eng; merging dot fix from john 2015.07.07 osnexus eng; fix to skip invalid dirs without exiting 2015.07.07 osnexus eng; added --exclude= option 2015.07.07 osnexus eng; fix to add thread cleanup on err condition 2015.10.14 osnexus eng; adding .qsnaps to --nosnaps option 2016.10.24 osnexus eng; added check to copy only regular files 2016.10.24 osnexus eng; added fix to deal with filenames containing "$" chars 2016.10.24 osnexus eng; added fix to use raw path with stat() and escaped paths with shell commands 2016.10.24 osnexus eng; added stderr redirect to /dev/null for cp/rsync/mv commands 2016.10.24 osnexus eng; fixed rsync usage for copying directory permissions */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <dirent.h> #include <sys/types.h> #include <sys/stat.h> #include <utime.h> #include <time.h> #include <errno.h> #include <pthread.h> #include <unistd.h> #undef THRD_DEBUG static char *Version = "v4.1.1 - 10/26/2016 - (c) 2013 John F Dey and (c) 2013-2016 OSNEXUS - GPLv2 License"; static char *whoami = "pwalk"; //char *errFile = ">> pwalk.err 2>&1 "; char *errFile = "> /dev/null 2>&1 "; int EXCLUDE_DIRS = 0; /* if set ignore comma delimited specified directories like those called .snapshot */ int TARGETDIR = 0; /* if set copy the files to the target path */ int COMPAREDIR = 0; /* if set diff the files to the compare dir to determine where the holee are then purge */ int DEBUG_MODE = 0; /* verbose output */ int CMP_ATIME = 0; /* indiates that compare operations should look at atime */ int CMP_MTIME = 1; int CMP_CTIME = 0; int SHOWTOTALS = 1; /* show totals at the end via atexit() */ int DOTS = 0; /* print little dots to show progress */ int QUIET = 0; /* quiet gives the best backup performance */ int PURGE = 0; int LOG = 0; int NODIRPERMS = 0; unsigned int DU_MODE = 0; int STATUS = 0; int RETENTION_DAYS = 60; size_t MAXTHRDS = 32; size_t RETAIN = 0; size_t DRYRUN = 0; size_t FORCE = 0; int MOVE_MODE = 0; unsigned long long DU_TOTAL = 4096L; char * excludeDirs[1024]; char excludeDirData[8192]; size_t excludeDirCount = 0; typedef enum operationType { OP_NONE, OP_COPY, OP_DELETE } operationType_t; operationType_t opType; char opCommand[FILENAME_MAX + FILENAME_MAX + 1]; typedef struct fileData { char dname[FILENAME_MAX + 1]; /* full path and basename */ int THRDslot; /* slot ID 0 - MAXTHRDS */ int THRDid; /* unique ID increaments with each new THRD */ int flag; /* 0 if thread; recursion > 0 */ pthread_t thread_id; /* system assigned */ pthread_attr_t tattr; } fileData_t; int ThreadCNT = 0; /* ThreadCNT < MAXTHRDS */ int totalTHRDS = 0; struct fileData *fdslot; pthread_mutex_t mutexLog; pthread_mutex_t mutexCounter; pthread_mutex_t mutexFD; pthread_mutex_t mutexPrintStat; pthread_cond_t condFD; char targetdir[FILENAME_MAX + 1]; char comparedir[FILENAME_MAX + 1]; char logfile[FILENAME_MAX + 1]; char statusfile[FILENAME_MAX + 1]; int rootdirlen = 0; time_t currenttime = 0; time_t laststatustime = 0; void incrementCounter(size_t *counter) { pthread_mutex_lock(&mutexCounter); (*counter)++; pthread_mutex_unlock(&mutexCounter); } /* counters for summing up and printing totals at the end */ typedef struct counterReport { size_t scanned; size_t copied; size_t modified; size_t created; size_t error; size_t skipped; size_t deleted; } counterReport_t; counterReport_t report = { 0, 0, 0, 0, 0, 0, 0 }; void incrementScanned(){ incrementCounter(&report.scanned); } void incrementCopied(){ incrementCounter(&report.copied); } void incrementModified(){ incrementCounter(&report.modified); } void incrementCreated(){ incrementCounter(&report.created); } void incrementError(){ incrementCounter(&report.error); } void incrementDeleted(){ incrementCounter(&report.deleted); } void incrementSkipped(){ incrementCounter(&report.skipped); } void printVersion() { fprintf(stderr, "%s version %s\n", whoami, Version); fprintf(stderr, "%s Copyright (C) 2013-2016 John F Dey & Steve Umbehocker\n", whoami); fprintf(stderr, " * pwalk comes with ABSOLUTELY NO WARRANTY;\n"); fprintf(stderr, " * This is free software, you can redistribute it and/or modify it under the terms of the GNU General Public License\n"); fprintf(stderr, " * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n"); } void printHelp() { fprintf(stderr, "%s version %s\n\n", whoami, Version); fprintf(stderr, "Usage : \n"); fprintf(stderr, "%s --help --version\n", whoami); fprintf(stderr, " Common Args :\n"); fprintf(stderr, " --dryrun : use this to test commands\n"); fprintf(stderr, " without making any changes to the system\n"); fprintf(stderr, " --maxthreads=N : indicates the number of threads (default=32)\n"); fprintf(stderr, " --nototals : disables printing of totals after the scan\n"); fprintf(stderr, " --dots : prints a dot and total every 1000 files scanned.\n"); fprintf(stderr, " --quiet : no chatter, speeds up the scan.\n"); fprintf(stderr, " --nosnap : Ignore directories with name .snapshot and .qsnaps\n"); fprintf(stderr, " --excludedirs=DIR1,DIR2,... : Ignore directories with specified names.\n"); fprintf(stderr, " --debug : Verbose debug spam\n"); fprintf(stderr, " Output Format : CSV\n"); fprintf(stderr, " Fields : DateStamp,\"inode\",\"filename\",\"fileExtension\",\"UID\",\n"); fprintf(stderr, " \"GID\",\"st_size\",\"st_blocks\",\"st_mode\",\"atime\",\n"); fprintf(stderr, " \"mtime\",\"ctime\",\"File Count\",\"Directory Size\"\n"); fprintf(stderr, "\n"); fprintf(stderr, "Walk Usage : \n"); fprintf(stderr, "%s SOURCEDIR\n", whoami); fprintf(stderr, " Command Args :\n"); fprintf(stderr, " SOURCEDIR : Fully qualified path to the directory to walk\n"); fprintf(stderr, "\n"); fprintf(stderr, "Copy/Backup Usage : \n"); fprintf(stderr, "%s --targetdir=TARGETDIR SOURCEDIR\n", whoami); fprintf(stderr, "%s --retain=30 --targetdir=TARGETDIR SOURCEDIR\n", whoami); fprintf(stderr, " Command Args :\n"); fprintf(stderr, " --move : move files rather than copy\n"); fprintf(stderr, " --nodirperms : don't copy directory permissions\n"); fprintf(stderr, " --targetdir : copy files to specified TARGETDIR\n"); fprintf(stderr, " --atime : copy if access time change (default=no atime)\n"); fprintf(stderr, " --backuplog=LOGFILE : log all files that were copied.\n"); fprintf(stderr, " --status=STATUSFILE : write periodic status updates to specified file\n"); fprintf(stderr, " --retain : copy if file ctime or mtime within retention period\n"); fprintf(stderr, " specified in days. eg: --retain=60\n"); fprintf(stderr, " --nomtime : ignore mtime (default=use mtime)\n"); fprintf(stderr, " SOURCEDIR : Fully qualified path to the directory to walk\n"); fprintf(stderr, "\n"); fprintf(stderr, "Delete/Purge Usage : \n"); fprintf(stderr, "%s --purge [--force] --comparedir=COMPAREDIR PURGEDIR\n", whoami); fprintf(stderr, "%s --purge [--force] --retain=N PURGEDIR\n", whoami); fprintf(stderr, " Command Args :\n"); fprintf(stderr, " --comparedir=DIR : compare against this dir but dont touch any files\n"); fprintf(stderr, " in it. comparedir is usually the SOURCEDIR from\n"); fprintf(stderr, " a prior copy/sync stage.\n"); fprintf(stderr, " --purge : WARNING: this deletes files older than the\n"); fprintf(stderr, " retain period -OR- if retain is not specified\n"); fprintf(stderr, " --comparedir is required. The comparedir is\n"); fprintf(stderr, " compared against the specified dir and any files\n"); fprintf(stderr, " not found in the comparedir are purged.\n"); fprintf(stderr, " --force : NOTE: default is a *dry-run* for purge, you must\n"); fprintf(stderr, " specify --force option to actually purge files\n"); fprintf(stderr, " --atime : keep if access time within retain period\n"); fprintf(stderr, " --retain : keep if file ctime or mtime within retention period\n"); fprintf(stderr, " specified in days. eg: --retain=60\n"); fprintf(stderr, "\n"); } int excludeDir(const char *dirName) { if(excludeDirCount == 0) return 0; int i = 0; for(i = 0; i < excludeDirCount; i++) { if(!strcmp(excludeDirs[i], dirName)) return 1; } return 0; } void updateStatus(char *message) { time_t now; time(&now); size_t elapsed = (size_t)difftime(now, currenttime); size_t hours = elapsed / 3600; size_t minutes = (elapsed % 3600) / 60; size_t seconds = elapsed % 60; FILE *statusFp = fopen(statusfile, "w"); if(statusFp != NULL) { fprintf(statusFp, " status: %s\n", message); fprintf(statusFp, " scanned: %lu\n", report.scanned); fprintf(statusFp, " copied: %lu\n", report.copied); fprintf(statusFp, " modified: %lu\n", report.modified); fprintf(statusFp, " created: %lu\n", report.created); fprintf(statusFp, " skipped: %lu\n", report.skipped); fprintf(statusFp, " deleted: %lu\n", report.deleted); fprintf(statusFp, " errors: %lu\n", report.error); fprintf(statusFp, " elapsed: %02lu:%02lu:%02lu\n", hours, minutes, seconds); fclose(statusFp); } } void printDot() { if(DOTS) { if(report.scanned % 1000 == 0) printf("%ld", report.scanned); else if(report.scanned % 100 == 0) printf("."); } } void updateRunningStatus() { if(STATUS && (report.scanned % 256 == 0)) { time_t now; time(&now); size_t elapsed = (size_t)difftime(now, laststatustime); if(elapsed >= 10) { laststatustime = now; updateStatus("running"); } } } void printTotals() { if(STATUS) { updateStatus("completed"); } if(DU_MODE) { fprintf(stderr, "TOTAL: %llu\n", DU_TOTAL); } if(!SHOWTOTALS) return; time_t now; time(&now); size_t elapsed = (size_t)difftime(now, currenttime); size_t hours = elapsed / 3600; size_t minutes = (elapsed % 3600) / 60; size_t seconds = elapsed % 60; fprintf(stderr, "\n\n%12s %12s %12s %12s %12s %12s %12s %12s\n", "Scanned", "Copied", "Modified", "Created", "Deleted", "Skipped", "Errors", "Elapsed (HH:MM:SS)"); fprintf(stderr, "%12lu %12lu %12lu %12lu %12lu %12lu %12lu %02lu:%02lu:%02lu\n", report.scanned, report.copied, report.modified, report.created, report.deleted, report.skipped, report.error, hours, minutes, seconds); } void getDirPath(char *dirPath, char *fullPath, size_t bufferLen) { strncpy(dirPath, fullPath, FILENAME_MAX + FILENAME_MAX); size_t pos = strlen(dirPath) - 1; while(pos > 0 && dirPath[pos] != '/') { dirPath[pos] = 0; pos--; } } void logPath(char *targetPath, char *mode) { if(!LOG || (logfile[0] == 0)) return; pthread_mutex_lock(&mutexLog); FILE *fp = fopen(logfile, "a"); if(fp != NULL) { if(targetPath != NULL) { if(mode != NULL && strlen(mode)) fprintf(fp, "%s: %s\n", mode, targetPath); else fprintf(fp, "%s\n", targetPath); } fclose(fp); } pthread_mutex_unlock(&mutexLog); } int dirContainsFiles(char *targetPath) { struct dirent *ep = NULL; DIR *dp = opendir(targetPath); if(dp != NULL) { while(ep = readdir(dp)) { if(ep->d_name[0] == '.') continue; //fprintf(stderr, "INFO: directory entry '%s'", ep->d_name); closedir(dp); return 1; } closedir(dp); } return 0; } void logLine(struct stat *f, const char *sourcePath, const char *exten, const long fileCnt, const long dirSz) { if(!DOTS && !QUIET) { char outFile[FILENAME_MAX + FILENAME_MAX]; sprintf(outFile, "\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"\n", (long)f->st_ino, sourcePath, (exten) ? exten : "", (long)f->st_uid, (long)f->st_gid, (long)f->st_size, (long)f->st_blocks, (int)f->st_mode, (long)f->st_atime, (long)f->st_mtime, (long)f->st_ctime, fileCnt, dirSz); fputs(outFile, stdout); } } void logDirLine(struct stat *f, const char *sourcePath) { if(!DOTS && !QUIET && DEBUG_MODE) { char outFile[FILENAME_MAX + FILENAME_MAX]; sprintf(outFile, "INFO: CHECKING/CREATING DIR: \"%ld\",\"%s\"\n", (long)f->st_ino, sourcePath); fputs(outFile, stdout); } } void logCompareLine(const char *sourcePath, const char *targetPath) { char outFile[FILENAME_MAX + FILENAME_MAX]; sprintf(outFile, "INFO: COMPARING [%s] [%s]", sourcePath, targetPath); fputs(outFile, stdout); } int purgeFile(struct stat *f, char *sourcePath, char *sourcePathRaw, char *targetPath, char *targetPathRaw) { if(S_ISDIR(f->st_mode)) { if(DEBUG_MODE) printf("INFO: Purge skipping DIR: '%s'\n", sourcePath); incrementSkipped(); return -1; } if(!RETAIN && COMPAREDIR) { if(DEBUG_MODE) printf("comparing: \n\t%s :: %s\n\t%s :: %s\n", sourcePath, targetPath, sourcePathRaw, targetPathRaw); struct stat statTarget = { 0 }; if(stat(targetPathRaw, &statTarget) == -1 && S_ISREG(f->st_mode)) { incrementDeleted(); if(DEBUG_MODE) printf("INFO: PURGE [%s] : %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", sourcePath); if(!DRYRUN && FORCE) { opType = OP_DELETE; strncpy(opCommand, sourcePathRaw, FILENAME_MAX + FILENAME_MAX); if(DOTS) printf("!"); } } else incrementSkipped(); return 0; } if(RETAIN && (currenttime > f->st_ctime) && ( (!CMP_CTIME || (difftime(currenttime, f->st_ctime) > RETAIN)) && (!CMP_MTIME || (difftime(currenttime, f->st_mtime) > RETAIN)) && (!CMP_ATIME || (difftime(currenttime, f->st_atime) > RETAIN)))) { incrementDeleted(); if(DEBUG_MODE) { printf("INFO: PURGE [%s] (%lu days old): %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath); } if(!DRYRUN && FORCE) { //we want to just save the path so we can do the purge outside of this criticial section //this way our delete is parallelized strncpy(opCommand, sourcePath, FILENAME_MAX + FILENAME_MAX); opType = OP_DELETE; if(DOTS) printf("!"); } } else incrementSkipped(); return 0; } void getProcessedSourcePath(const char *fname, char *sourcePath) { const char *s = fname; char *t; t = sourcePath; while(*s) { if(*s == '"') *t++ = '\\'; if(*s == '$') *t++ = '\\'; *t++ = *s++; } *t++ = *s++; } void getTargetPathFromSource(const char *sourcePath, char *targetPath) { if(TARGETDIR) { //Add a trailing / to the base path if it's not there. size_t targetdirLen = strlen(targetdir); if((targetdirLen + 1 < FILENAME_MAX + FILENAME_MAX) && targetdir[targetdirLen - 1] != '/') { targetdir[targetdirLen] = '/'; targetdir[targetdirLen + 1] = 0; } size_t offset = rootdirlen + 1; strncpy(targetPath, targetdir, FILENAME_MAX + FILENAME_MAX); strncat(targetPath, &sourcePath[offset], FILENAME_MAX + FILENAME_MAX); } else if(COMPAREDIR) { //Add a trailing / to the base path if it's not there. size_t comparedirLen = strlen(comparedir); if((comparedirLen + 1 < FILENAME_MAX + FILENAME_MAX) && comparedir[comparedirLen - 1] != '/') { comparedir[comparedirLen] = '/'; comparedir[comparedirLen + 1] = 0; } size_t offset = rootdirlen + 1; strncpy(targetPath, comparedir, FILENAME_MAX + FILENAME_MAX); strncat(targetPath, &sourcePath[offset], FILENAME_MAX + FILENAME_MAX); } //else { } //standard PWALK } int makeTargetDirPath(const char *fname) { char sourcePath[FILENAME_MAX + FILENAME_MAX]; char targetPath[FILENAME_MAX + FILENAME_MAX]; char targetPathRaw[FILENAME_MAX + FILENAME_MAX]; getProcessedSourcePath(fname, sourcePath); getTargetPathFromSource(fname, targetPathRaw); getTargetPathFromSource(sourcePath, targetPath); struct stat statSource = { 0 }; stat(fname, &statSource); struct stat statTarget = { 0 }; stat(targetPathRaw, &statTarget); if(!S_ISDIR(statSource.st_mode)) { fprintf(stderr, "ERR: source '%s' is not a directory.", sourcePath); exit(-1); } logDirLine(&statTarget, targetPath); if(!S_ISDIR(statTarget.st_mode)) { incrementCreated(); incrementCopied(); char command[FILENAME_MAX + FILENAME_MAX]; command[0] = '\0'; snprintf(command, FILENAME_MAX + FILENAME_MAX, "mkdir \"%s\" %s", targetPath, errFile); if(DEBUG_MODE) printf("INFO: making DIR: %s\n", command); if(!DRYRUN) { system(command); struct utimbuf times; times.actime = statSource.st_atime; times.modtime = statSource.st_mtime; utime(targetPath, ×); } //preserve the permissions for the directories if(!NODIRPERMS) { if(DEBUG_MODE) printf("INFO: updating DIR perms via rsync: %s\n", command); snprintf(command, FILENAME_MAX + FILENAME_MAX, "rsync -ptgo -A -X -d --no-recursive --exclude=* \"%s/\" \"%s\" %s", sourcePath, targetPath, errFile); if(!DRYRUN) system(command); /* //NOTE: This old technique doesn't copy the extended attributes of the directory snprintf(command, FILENAME_MAX + FILENAME_MAX, "chown --reference=\"%s\" \"%s\" %s", sourcePath, targetPath, errFile); if(!DRYRUN) system(command); snprintf(command, FILENAME_MAX + FILENAME_MAX, "chmod --reference=\"%s\" \"%s\" %s", sourcePath, targetPath, errFile); if(!DRYRUN) system(command); */ } } else { if(DEBUG_MODE) printf("INFO: verified DIR exists: %s\n", fname); } return 0; } /* * this needs to be in a crital secion (and it is!) */ void printStat(char *fname, char *exten, struct stat *f, long fileCnt, long dirSz) { char sourcePath[FILENAME_MAX + FILENAME_MAX]; char sourcePathDir[FILENAME_MAX + FILENAME_MAX]; char targetPath[FILENAME_MAX + FILENAME_MAX]; char targetPathDir[FILENAME_MAX + FILENAME_MAX]; char targetPathRaw[FILENAME_MAX + FILENAME_MAX]; getProcessedSourcePath(fname, sourcePath); getTargetPathFromSource(fname, targetPathRaw); getTargetPathFromSource(sourcePath, targetPath); opType = OP_NONE; logLine(f, sourcePath, exten, fileCnt, dirSz); updateRunningStatus(); printDot(); incrementScanned(); struct stat statSource = { 0 }; stat(fname, &statSource); if(S_ISDIR(statSource.st_mode)) { return; } if(PURGE) { purgeFile(f, sourcePath, fname, targetPath, targetPathRaw); return; } //If a target dir was specified then we need to do file copy operations if(TARGETDIR) { struct stat statTarget = { 0 }; char doCopy = 0; char doCompare = 0; if(stat(targetPathRaw, &statTarget) == -1 || MOVE_MODE) { if(DEBUG_MODE)fprintf(stderr, "INFO: File '%s' doesn't exist at target, doing copy '%s'\n", sourcePath, targetPath); doCopy = 1; } else if(S_ISDIR(f->st_mode)) { if(DEBUG_MODE)fprintf(stderr, "ERR: Target is a directory '%s'\n", targetPath); incrementError(); return; } else if(!S_ISREG(f->st_mode)) { if(DEBUG_MODE)fprintf(stderr, "ERR: Target not regular file '%s'\n", targetPath); incrementError(); return; } else { //printf("target exists '%s'", targetPath); doCompare = 1; } if(doCompare) { if(DEBUG_MODE) { logCompareLine(sourcePath, targetPath); } if(MOVE_MODE) { if(DEBUG_MODE) printf("INFO: File has MOVE MODE, copying\n"); doCopy = 1; } else if(f->st_mode != f->st_mode) { if(DEBUG_MODE) printf("INFO: File has MODE CHANGE, copying\n"); doCopy = 1; } else if(f->st_size != f->st_size) { if(DEBUG_MODE) printf("INFO: File has SIZE CHANGE, copying\n"); doCopy = 1; } // the target file will always have a different create time so we don't // want to use == for ctime compare, rather we just want to see if the source // was created after the target, if so, need to recopy else if(CMP_CTIME && (f->st_ctime < f->st_ctime)) { if(DEBUG_MODE) printf("INFO: CTIME on target is older than source, copying\n"); doCopy = 1; } else if(CMP_MTIME && (f->st_mtime != f->st_mtime)) { if(DEBUG_MODE) printf("INFO: File has MTIME CHANGE, copying\n"); } else { if(DEBUG_MODE) printf("INFO: Detected no file changes, copy not required.\n"); } if(!doCopy && CMP_ATIME && (f->st_atime != f->st_atime)) { if(DEBUG_MODE) printf("INFO: File has only ATIME CHANGE, updating timestamps\n"); struct utimbuf times; times.actime = f->st_atime; times.modtime = f->st_mtime; utime(targetPath, ×); } } if(doCopy) { if(RETAIN) { if((currenttime < f->st_ctime) || (CMP_CTIME && (difftime(currenttime, f->st_ctime) < RETAIN)) || (CMP_MTIME && (difftime(currenttime, f->st_mtime) < RETAIN)) || (CMP_ATIME && (difftime(currenttime, f->st_atime) < RETAIN))) { if(DEBUG_MODE) printf("INFO: INSIDE RETAIN (%lu days old): %s\n", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath); } else { if(DEBUG_MODE) printf("INFO: OUTSIDE RETAIN (%lu days old), skipping: %s\n", (size_t)difftime(currenttime, f->st_mtime) / (3600 * 24), sourcePath); incrementSkipped(); return; } } if(doCompare) incrementModified(); else incrementCreated(); char command[FILENAME_MAX + FILENAME_MAX]; command[0] = '\0'; if(MOVE_MODE) snprintf(command, FILENAME_MAX + FILENAME_MAX, "mv \"%s\" \"%s\" %s", sourcePath, targetPath, errFile); else snprintf(command, FILENAME_MAX + FILENAME_MAX, "cp --no-dereference --preserve=all \"%s\" \"%s\" %s", sourcePath, targetPath, errFile); if(DEBUG_MODE) printf("INFO: [%s]\n", command); if(!DRYRUN) { //we want to just save the command so we can do the copy outside of this criticial section //this way our copy op is parallelized incrementCopied(); opType = OP_COPY; strncpy(opCommand, command, FILENAME_MAX + FILENAME_MAX); } if(LOG) logPath(targetPath, ""); } } } void execOperation(operationType_t doOperation, char *doCommand) { if(doOperation == OP_COPY) system(doCommand); if(doOperation == OP_DELETE) unlink(doCommand); } void cleanupThread(struct fileData *fd) { if((fd != NULL) && (fd->flag == 0)) { /* this instance of fileDir is a thread */ pthread_mutex_lock(&mutexFD); if((--ThreadCNT) == 0) pthread_cond_signal(&condFD); fd->THRDslot = -1; pthread_mutex_unlock(&mutexFD); pthread_exit(EXIT_SUCCESS); } } /* * Open a directory and read the conents. Call stat with each * file name. * * Recursively call self for each sub dir. * * print inode meta data for each file, one line per file in CSV format */ void *fileDir(void *arg) { char *s, *t, *u, *dot, *end_dname; char fname[FILENAME_MAX + 1]; int slot, id, found; DIR *dirp; long localCnt = 0; /* number of files in a specific directory */ unsigned long long localSz = 0; /* byte cnt of files in the local directory 2010.07 */ struct dirent *d; struct stat f; struct fileData *fd, local; fd = (struct fileData *) arg; if(DEBUG_MODE) printf("Start %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); if(fd == NULL) { fprintf(stderr, "ERR: invalid directory specified, skipping.\n"); return NULL; } if((dirp = opendir(fd->dname)) == NULL) { fprintf(stderr, "ERR: unable to open directory '%s', skipping.\n", fd->dname); cleanupThread(fd); return NULL; } /* find the end of fs->name and put '/' at the end <end_dname> points to char after '/' */ s = fd->dname + strlen(fd->dname); *s++ = '/'; end_dname = s; while((d = readdir(dirp)) != NULL) { if(strcmp(".", d->d_name) == 0) continue; if(strcmp("..", d->d_name) == 0) continue; localCnt++; s = d->d_name; t = end_dname; while(*s) *t++ = *s++; *t = '\0'; if(lstat(fd->dname, &f) == -1) { fprintf(stderr, "ERR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); continue; } if(!S_ISDIR(f.st_mode) && !S_ISREG(f.st_mode)) { fprintf(stderr, "ERR: skipping non-dir, non-reg file: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); if(LOG && DEBUG_MODE) logPath(fd->dname, "NOTREG"); continue; } /* Follow Sub dirs recursivly but don't follow links */ localSz += f.st_size; DU_TOTAL += f.st_size; if(S_ISDIR(f.st_mode)) { if(TARGETDIR) { makeTargetDirPath(fd->dname); } if(EXCLUDE_DIRS > 0 && excludeDir(d->d_name)) { if(DEBUG_MODE) fprintf(stderr, "INFO: skipping directory: %s\n", d->d_name); continue; } pthread_mutex_lock(&mutexFD); if(ThreadCNT < MAXTHRDS) { ThreadCNT++; id = totalTHRDS++; slot = 0; found = -1; while(slot < MAXTHRDS) { if(fdslot[slot].THRDslot == -1) { found = slot; break; } slot++; } if(found == -1) fprintf(stderr, "ERR: SlotE %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, "no available threads"); else fdslot[slot].THRDslot = slot; } else slot = -1; pthread_mutex_unlock(&mutexFD); if(slot != -1) { strcpy(fdslot[slot].dname, (const char*)fd->dname); fdslot[slot].THRDid = id; fdslot[slot].flag = 0; if(pthread_create(&fdslot[slot].thread_id, &fdslot[0].tattr, fileDir, (void*)&fdslot[slot])) { /* thread create failed, undo slot */ pthread_mutex_lock(&mutexFD); /* this won't go to zero here, so no need cond_signal */ ThreadCNT--; fdslot[slot].THRDslot = -1; pthread_mutex_unlock(&mutexFD); goto local_call; } } else { local_call: strcpy(local.dname, (const char*)fd->dname); local.THRDslot = fd->THRDslot; local.THRDid = fd->THRDid; local.flag = fd->flag + 1; fileDir((void*)&local); } } else { s = end_dname + 1; dot = NULL; while(*s) { if(*s == '.') dot = s + 1; s++; } pthread_mutex_lock(&mutexPrintStat); printStat(fd->dname, dot, &f, (long)0, (long)0); operationType_t doOperation = opType; opType = OP_NONE; char doCommand[FILENAME_MAX + FILENAME_MAX + 1]; strncpy(doCommand, opCommand, FILENAME_MAX + FILENAME_MAX); pthread_mutex_unlock(&mutexPrintStat); execOperation(doOperation, doCommand); } } closedir(dirp); *--end_dname = '\0'; #ifdef THRD_DEBUG printf("Ending %2d%5d %2d<%s>\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); #endif /* THRD_DEBUG */ if(lstat(fd->dname, &f) == -1) { fprintf(stderr, "ERR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname); } else { s = end_dname - 1; dot = NULL; while(*s != '/' && (s != fd->dname)) { if(*s == '.') { dot = s + 1; break; } s--; } if(s + 1 == dot) /* Dot file is not an extension Exp: /.bashrc */ dot = NULL; pthread_mutex_lock(&mutexPrintStat); printStat(fd->dname, dot, &f, localCnt, localSz); operationType_t doOperation = opType; opType = OP_NONE; char doCommand[FILENAME_MAX + FILENAME_MAX + 1]; strncpy(doCommand, opCommand, FILENAME_MAX + FILENAME_MAX); pthread_mutex_unlock(&mutexPrintStat); execOperation(doOperation, doCommand); } cleanupThread(fd); /* return ; */ } int main(int argc, char* argv[]) { int error, i; char *s, *c; void *status; if(argc < 2 || argv == NULL) { printHelp(); exit(EXIT_FAILURE); } argc--; argv++; while(argc > 1) { if(!strncmp(*argv, "--maxthreads=", 13)) { MAXTHRDS = (size_t)atoi(&((*argv)[strlen("--maxthreads=")])); if(MAXTHRDS > 64) MAXTHRDS = 64; if(MAXTHRDS <= 2) MAXTHRDS = 2; if(DEBUG_MODE) printf("INFO: MAXTHRDS='%lu'\n", MAXTHRDS); } else if(!strncmp(*argv, "--retain=", 9)) { RETAIN = (size_t)atoi(&((*argv)[strlen("--retain=")])) * 24 * 3600; if(DEBUG_MODE) printf("INFO: RETAIN='%lu'\n", RETAIN); } else if(!strcmp(*argv, "--move")) MOVE_MODE = 1; else if(!strcmp(*argv, "--debug")) DEBUG_MODE = 1; else if(!strcmp(*argv, "--du")) { DU_MODE = 1; SHOWTOTALS = 0; QUIET = 1; } else if(!strcmp(*argv, "--nototals")) SHOWTOTALS = 0; else if(!strcmp(*argv, "--noatime")) CMP_ATIME = 0; else if(!strcmp(*argv, "--atime")) CMP_ATIME = 1; else if(!strcmp(*argv, "--nomtime")) CMP_MTIME = 0; else if(!strcmp(*argv, "--mtime")) CMP_MTIME = 1; else if(!strcmp(*argv, "--noctime")) CMP_CTIME = 0; else if(!strcmp(*argv, "--ctime")) CMP_CTIME = 1; else if(!strncmp(*argv, "--excludedirs=", 14)) { memset(excludeDirData, 0, sizeof(excludeDirData)); strncpy(excludeDirData, &((*argv)[strlen("--excludedirs=")]), sizeof(excludeDirData)); if(strlen(excludeDirData) == 0) { fprintf(stderr, "ERR: Invalid exclude file name.\n"); } else { char *token = strtok(excludeDirData, ","); while(token != NULL && excludeDirCount < 1023) { excludeDirs[excludeDirCount] = strdup(token); if(DEBUG_MODE) printf("INFO: Adding exclude dir '%s'\n", token); excludeDirCount++; EXCLUDE_DIRS++; token = strtok(NULL, ","); } } } else if(!strcmp(*argv, "--NoSnap") || !strcmp(*argv, "--nosnap")) { excludeDirs[excludeDirCount] = strdup(".snapshots"); if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n"); excludeDirCount++; EXCLUDE_DIRS++; excludeDirs[excludeDirCount] = strdup(".qsnaps"); if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n"); excludeDirCount++; EXCLUDE_DIRS++; } else if(!strcmp(*argv, "--dryrun")) DRYRUN = 1; else if(!strcmp(*argv, "--force")) FORCE = 1; else if(!strcmp(*argv, "--nodirperms")) NODIRPERMS = 1; else if(!strcmp(*argv, "--purge")) { PURGE = 1; } else if(!strncmp(*argv, "--backuplog=", 12)) { LOG = 1; memset(logfile, 0, sizeof(logfile)); strncpy(logfile, &((*argv)[strlen("--backuplog=")]), sizeof(logfile)); if(strlen(logfile) == 0) { fprintf(stderr, "ERR: Invalid log file name."); LOG = 0; } if(DEBUG_MODE) printf("INFO: LOGFILE='%s'\n", logfile); } else if(!strncmp(*argv, "--status=", 9)) { STATUS = 1; memset(statusfile, 0, sizeof(statusfile)); strncpy(statusfile, &((*argv)[strlen("--status=")]), sizeof(statusfile)); if(strlen(statusfile) == 0) { fprintf(stderr, "ERR: Invalid status log file name."); STATUS = 0; } if(DEBUG_MODE) printf("INFO: STATUS='%s'\n", statusfile); } else if(!strncmp(*argv, "--targetdir=", 12)) { TARGETDIR = 1; memset(targetdir, 0, sizeof(targetdir)); strncpy(targetdir, &((*argv)[strlen("--targetdir=")]), sizeof(targetdir)); if(DEBUG_MODE) printf("INFO: TARGETDIR='%s'\n", targetdir); } else if(!strncmp(*argv, "--comparedir=", 13)) { COMPAREDIR = 1; memset(comparedir, 0, sizeof(comparedir)); strncpy(comparedir, &((*argv)[strlen("--comparedir=")]), sizeof(comparedir)); if(DEBUG_MODE) printf("INFO: COMPAREDIR='%s'\n", comparedir); if(!dirContainsFiles(comparedir)) { printf("ERR: Cannot run purge operation against a comparison directory with zero files '%s'\n", comparedir); exit(EXIT_FAILURE); } } else if(!strcmp(*argv, "--dots")) DOTS = 1; else if(!strcmp(*argv, "--quiet")) QUIET = 1; argc--; argv++; } if(!strcmp(*argv, "--help")) { printHelp(); exit(0); } if(!strcmp(*argv, "--version") || !strcmp(*argv, "-v")) { printVersion(); exit(0); } time(¤ttime); fdslot = malloc(sizeof(struct fileData) * MAXTHRDS); for(i = 0; i < MAXTHRDS; i++) { fdslot[i].THRDslot = -1; if((error = pthread_attr_init(&fdslot[i].tattr))) fprintf(stderr, "Failed to create pthread attr: %s\n", strerror(error)); else if((error = pthread_attr_setdetachstate(&fdslot[i].tattr, PTHREAD_CREATE_DETACHED) )) fprintf(stderr, "failed to set attribute detached: %s\n", strerror(error)); } pthread_mutex_init(&mutexFD, NULL); pthread_cond_init(&condFD, NULL); strcpy(fdslot[0].dname, (const char*)*argv); struct stat statRoot = { 0 }; if(stat(fdslot[0].dname, &statRoot) == -1 || !S_ISDIR(statRoot.st_mode)) { fprintf(stderr, "ERR: Invalid directory specification '%s'\n", fdslot[0].dname); } rootdirlen = strlen(fdslot[0].dname); //Trim the trailing slash so we don't get // in the path if(fdslot[0].dname[strlen(fdslot[0].dname) - 1] == '/') { fdslot[0].dname[strlen(fdslot[0].dname) - 1] = '\0'; rootdirlen--; } fdslot[0].THRDslot = ThreadCNT++; fdslot[0].THRDid = totalTHRDS++; fdslot[0].flag = 0; if(TARGETDIR) { makeTargetDirPath(fdslot[0].dname); } if((error = pthread_create(&(fdslot[0].thread_id), &fdslot[0].tattr, fileDir, (void*)&fdslot[0]))) { fprintf(stderr, "failed to start a thread: %s\n", strerror(error)); exit(error); } if(SHOWTOTALS || STATUS || DU_MODE) atexit(printTotals); pthread_mutex_lock(&mutexFD); if(ThreadCNT > 0) pthread_cond_wait(&condFD, &mutexFD); pthread_mutex_unlock(&mutexFD); pthread_exit(NULL); }