Pwalk.c

From OSNEXUS Online Documentation Site
Revision as of 16:15, 16 July 2015 by Qadmin (Talk | contribs)

Jump to: navigation, search
/*
 *  pwalk.c  Parrallel Walk a file system and report file meta data 

Copyright (C) 2013 John F Dey 
Copyright (C) 2013 Steve Umbehocker 

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

 *  pwalk is inspired by du but designed to be used with large 
 *  file systems ( > 10 million files ) 
 *  
 *  History: dir.c, walk.c, walkv2,3,4, pwalkfs.c
 *
 *  Example of using the directory call, opendir and readdir to
 *  simulate ls.
 *
 *  1997.03.20 John Dey Although this is the first documented date for
 *             this file I have versions that date from 1988. 
 *  2002.09.04 John Dey walk the directory and gather stats
 *  2002.09.06 John Dey make to look like du -a
 *  2004.07.06 John Dey add -a and -k 
 *  2008.04.01 John Dey CSV output for database use
 *  2009.04.12 John Dey v3.1
 *  replaced constants with "FILENAME_MAX", 
 *  Size of directory is size of all files in directory plus itself
 *  Added printStat function
 *  print file count on line with direcories
 *  2009.05.18 check for control charaters and double qutoes in file names; 
 *  escape the double quotes and print bad file names to stderr
 *  2009.12.30 size for dir should just be dir size; Fix; count returns 0 
 *  for normal files and count of just the local directory; Previously count
 *  return the recursive file count for the whole tree. 
 *
    2010.01.08 john dey; New field to output: file name extension. 
    Extension is defined as the last part of the name after a Dot "." 
    if no dot is found extension is empty ""
    new feature: accepts multible dirctory names as cmd line argument

    This line of code has been replaced
     if ( f.st_mode & S_IFDIR && (f.st_mode & S_IFMT != S_IFLNK) ) {
    With this new line of code:
     if ( S_ISDIR(f.st_mode)  ) { Or I could have done: if ( (f.st_mode & S_IFDIR) == S_IFDIR )
   2010.01.11  John Dey
   Complete re-write of walkv4 transforming it into pwalk.
   pwalk is a threaded version of walkv4.
   pwalk will call fileDir as a new thread until MAXTHRDS is reached.
   2010.02.01 pwalk v1 did not detach nor did it join the theads; v2
   fixes this short comming;

   2010.03.24 john dey; New physical hardware is available to run pwalk.
   16 threads are only using about 20% CPU with 10% IO wait. Based on this
   the thread count will be doubled to 32.
   2010.11.29 Add mutex for printStat
   2012.10.09 --NoSnap flag added.  ignore directories that have the name
              .snapshot
   2013.08.02 john f dey; Add GNU license, --version flag added
   2013.10.15 steve umbehocker; added verbose --debug spam mode
                added compare and copy capabilities
                added retain= option to backup only recently modified files
                added cusomizable maxthreads
                added summary total display at end (disable with --nototals)
                updated and expanded usage section
                fixed segfault in walker
   2013.10.16 steve umbehocker; added --log mode logging
                added purge mode hooks
                added dryrun/force flags
   2013.10.21 steve umbehocker; added --status logging
                added purge w/ retain
   2013.10.22 steve umbehocker; added check so purge cannot compare against emty dir
                added execute operation logic to do cp/unlink operations outside
                of the mutex so that they're parallelized
                fixed sync/copy to create empty leaf directories
   2014.11.21 steve umbehocker; added fix for files with spaces and special chars
   2015.06.12 steve umbehocker; added fix for --help and --version
   2015.06.18 osnexus eng; merging dot fix from john
   2015.07.07 osnexus eng; fix to skip invalid dirs without exiting
   2015.07.07 osnexus eng; added --exclude= option
   2015.07.07 osnexus eng; fix to add thread cleanup on err condition
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <utime.h>
#include <time.h>
#include <errno.h>
#include <pthread.h>

#undef THRD_DEBUG 

static char *Version = "v3.17.1 - 07/16/2015 - John F Dey john@fuzzdog.com, Steve U. eng@osnexus.com";
static char *whoami = "pwalk";

int EXCLUDE_DIRS =0; /* if set ignore comma delimited specified directories like those called .snapshot */
int TARGETDIR =0; /* if set copy the files to the target path */
int COMPAREDIR =0; /* if set diff the files to the compare dir to determine where the holee are then purge */
int DEBUG_MODE =0; /* verbose output */
int CMP_ATIME =0; /* indiates that compare operations should look at atime */
int CMP_MTIME =1;
int CMP_CTIME =0;
int SHOWTOTALS =1; /* show totals at the end via atexit() */
int DOTS =0; /* print little dots to show progress */
int QUIET =0; /* quiet gives the best backup performance */
int PURGE =0;
int LOG =0;
int NODIRPERMS =0;
unsigned int DU_MODE =0;
int STATUS =0;
int RETENTION_DAYS =60;
size_t MAXTHRDS =32;
size_t RETAIN =0;
size_t DRYRUN =0;
size_t FORCE =0;
int MOVE_MODE =0;
unsigned long long DU_TOTAL =4096L;
char * excludeDirs[1024];
char excludeDirData[8192];
size_t excludeDirCount = 0;

typedef enum operationType { OP_NONE, OP_COPY, OP_DELETE } operationType_t;
operationType_t opType;
char opCommand[FILENAME_MAX+FILENAME_MAX+1];

typedef struct fileData {
    char dname[FILENAME_MAX+1];  /* full path and basename */
    int  THRDslot;              /* slot ID 0 - MAXTHRDS */
    int  THRDid;                /* unique ID increaments with each new THRD */
    int  flag;                  /* 0 if thread; recursion > 0 */
    pthread_t thread_id;        /* system assigned */
    pthread_attr_t tattr;
    } fileData_t;

int ThreadCNT  =0; /* ThreadCNT < MAXTHRDS */
int totalTHRDS =0;
struct fileData *fdslot;
pthread_mutex_t mutexFD;
pthread_mutex_t mutexPrintStat;

char targetdir[FILENAME_MAX+1];
char comparedir[FILENAME_MAX+1];
char logfile[FILENAME_MAX+1];
char statusfile[FILENAME_MAX+1];
int rootdirlen =0;

/* counters for summing up and printing totals at the end */
size_t scancount =0;
size_t copycount =0;
size_t modifiedcount =0;
size_t createdcount =0;
size_t errorcount =0;
size_t skippedcount =0;
size_t deletedcount =0;
time_t currenttime =0;
time_t laststatustime =0;

void
printVersion( )
{
   fprintf( stderr, "%s version %s\n", whoami, Version );
   fprintf( stderr, "%s Copyright (C) 2013 John F Dey & Steve Umbehocker\n", whoami );
   fprintf( stderr, " * pwalk comes with ABSOLUTELY NO WARRANTY;\n" );
   fprintf( stderr, " * This is free software, you can redistribute it and/or modify it under the terms of the GNU General Public License\n" );
   fprintf( stderr, " * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n" );
}

void
printHelp( )
{
   fprintf( stderr, "%s version %s\n\n", whoami, Version );
   fprintf( stderr, "Usage : \n");
   fprintf( stderr, "%s --help --version\n", whoami );
   fprintf( stderr, "       Common Args :\n");
   fprintf( stderr, "         --dryrun        : use this to test commands\n" );
   fprintf( stderr, "                           without making any changes to the system\n" );
   fprintf( stderr, "         --maxthreads=N  : indicates the number of threads (default=32)\n" );
   fprintf( stderr, "         --nototals      : disables printing of totals after the scan\n" );
   fprintf( stderr, "         --dots          : prints a dot and total every 1000 files scanned.\n" );
   fprintf( stderr, "         --quiet         : no chatter, speeds up the scan.\n" );
   fprintf( stderr, "         --nosnap        : Ignore directories with name .snapshot\n");
   fprintf( stderr, "         --excludedirs=DIR1,DIR2,... : Ignore directories with specified names.\n");
   fprintf( stderr, "         --debug         : Verbose debug spam\n");
   fprintf( stderr, "       Output Format : CSV\n" );
   fprintf( stderr, "               Fields : DateStamp,\"inode\",\"filename\",\"fileExtension\",\"UID\",\n");
   fprintf( stderr, "                        \"GID\",\"st_size\",\"st_blocks\",\"st_mode\",\"atime\",\n" );
   fprintf( stderr, "                        \"mtime\",\"ctime\",\"File Count\",\"Directory Size\"\n");
   fprintf( stderr, "\n" );
   fprintf( stderr, "Walk Usage : \n");
   fprintf( stderr, "%s SOURCEDIR\n", whoami); 
   fprintf( stderr, "       Command Args :\n");
   fprintf( stderr, "         SOURCEDIR       : Fully qualified path to the directory to walk\n");
   fprintf( stderr, "\n" );
   fprintf( stderr, "Copy/Backup Usage : \n");
   fprintf( stderr, "%s --targetdir=TARGETDIR SOURCEDIR\n", whoami); 
   fprintf( stderr, "%s --retain=30 --targetdir=TARGETDIR SOURCEDIR\n", whoami); 
   fprintf( stderr, "       Command Args :\n" );
   fprintf( stderr, "         --move              : move files rather than copy\n" );
   fprintf( stderr, "         --nodirperms        : don't copy directory permissions\n" );
   fprintf( stderr, "         --targetdir         : copy files to specified TARGETDIR\n" );
   fprintf( stderr, "         --atime             : copy if access time change (default=no atime)\n" );
   fprintf( stderr, "         --backuplog=LOGFILE : log all files that were copied.\n" );
   fprintf( stderr, "         --status=STATUSFILE : write periodic status updates to specified file\n" );
   fprintf( stderr, "         --retain            : copy if file ctime or mtime within retention period\n" );
   fprintf( stderr, "                               specified in days. eg: --retain=60\n" );
   fprintf( stderr, "         --nomtime           : ignore mtime (default=use mtime)\n" );
   fprintf( stderr, "         SOURCEDIR           : Fully qualified path to the directory to walk\n");
   fprintf( stderr, "\n" );
   fprintf( stderr, "Delete/Purge Usage : \n");
   fprintf( stderr, "%s --purge [--force] --comparedir=COMPAREDIR PURGEDIR\n", whoami); 
   fprintf( stderr, "%s --purge [--force] --retain=N PURGEDIR\n", whoami); 
   fprintf( stderr, "       Command Args :\n" );
   fprintf( stderr, "         --comparedir=DIR : compare against this dir but dont touch any files\n" );
   fprintf( stderr, "                            in it. comparedir is usually the SOURCEDIR from\n" );
   fprintf( stderr, "                            a prior copy/sync stage.\n" );
   fprintf( stderr, "         --purge          : WARNING: this deletes files older than the\n" );
   fprintf( stderr, "                            retain period -OR- if retain is not specified\n" );
   fprintf( stderr, "                            --comparedir is required. The comparedir is\n" );
   fprintf( stderr, "                            compared against the specified dir and any files\n" );
   fprintf( stderr, "                            not found in the comparedir are purged.\n" );
   fprintf( stderr, "         --force          : NOTE: default is a *dry-run* for purge, you must\n");
   fprintf( stderr, "                            specify --force option to actually purge files\n" );
   fprintf( stderr, "         --atime          : keep if access time within retain period\n" );
   fprintf( stderr, "         --retain         : keep if file ctime or mtime within retention period\n" );
   fprintf( stderr, "                            specified in days. eg: --retain=60\n" );
   fprintf( stderr, "\n");
}

int excludeDir(const char *dirName)
{
    if(excludeDirCount == 0)
        return 0;
    int i = 0;
    for(i=0;i<excludeDirCount;i++)
    {
        if(!strcmp(excludeDirs[i], dirName))
            return 1;
    }
    return 0;
}

void
updateStatus(char *message)
{
    time_t now;
    time(&now);
    size_t elapsed = (size_t)difftime(now, currenttime);
    size_t hours = elapsed/3600;
    size_t minutes = (elapsed%3600)/60;
    size_t seconds = elapsed%60;
    FILE *statusFp = fopen(statusfile, "w");
    if(statusFp != NULL)
    {
        fprintf(statusFp, "   status: %s\n", message);
        fprintf(statusFp, "  scanned: %lu\n", scancount);
        fprintf(statusFp, "   copied: %lu\n", copycount);
        fprintf(statusFp, " modified: %lu\n", modifiedcount);
        fprintf(statusFp, "  created: %lu\n", createdcount);
        fprintf(statusFp, "  skipped: %lu\n", skippedcount);
        fprintf(statusFp, "  deleted: %lu\n", deletedcount);
        fprintf(statusFp, "   errors: %lu\n", errorcount);
        fprintf(statusFp, "  elapsed: %02lu:%02lu:%02lu\n", hours, minutes, seconds);
        fclose(statusFp);
    }
}

void
printTotals( )
{
   if(STATUS)
   {
       updateStatus("completed");
   }
   if(DU_MODE)
   {
       fprintf(stderr, "TOTAL: %llu\n", DU_TOTAL);
   }
   if(!SHOWTOTALS)
       return;

   time_t now;
   time(&now);
   size_t elapsed = (size_t)difftime(now, currenttime);
   size_t hours = elapsed/3600;
   size_t minutes = (elapsed%3600)/60;
   size_t seconds = elapsed%60;

   fprintf( stderr, "\n\n%12s %12s %12s %12s %12s %12s %12s      %12s\n",            "Scanned", "Copied", "Modified",     "Created",   "Deleted",    "Skipped",     "Errors", "Elapsed (HH:MM:SS)");
   fprintf( stderr,   "%12lu %12lu %12lu %12lu %12lu %12lu %12lu     %02lu:%02lu:%02lu\n", scancount, copycount, modifiedcount, createdcount, deletedcount, skippedcount, errorcount, hours, minutes, seconds);
}

void 
getDirPath(char *dirPath, char *fullPath, size_t bufferLen)
{
    strncpy(dirPath, fullPath, FILENAME_MAX+FILENAME_MAX);
    size_t pos = strlen(dirPath)-1;
    while(pos > 0 && dirPath[pos] != '/')
    {
        dirPath[pos]=0;
        pos--;
    }
}

void
logPath(char *targetPath)
{
    if(!LOG || (logfile[0] == 0))
        return;

    FILE *fp = fopen(logfile, "a");
    if(fp != NULL)
    {
        fprintf(fp, "%s\n", targetPath);
        fclose(fp);
    }
}

int
dirContainsFiles(char *targetPath)
{
  struct dirent *ep = NULL;
  DIR *dp = opendir(targetPath);
  if (dp != NULL)
  {
    while(ep = readdir(dp))
    {
        if(ep->d_name[0] == '.')
            continue;
        //fprintf(stderr, "INFO: directory entry '%s'", ep->d_name);
        closedir(dp);
        return 1;
    }
    closedir(dp);
  }
  return 0;
}

/*
 *  this needs to be in a crital secion  (and it is!)
 */
void
printStat( char *fname, char *exten, struct stat *f, long fileCnt, long dirSz )
{
   char sourcePath[FILENAME_MAX+FILENAME_MAX];
   char sourcePathDir[FILENAME_MAX+FILENAME_MAX];
   char targetPath[FILENAME_MAX+FILENAME_MAX];
   char targetPathDir[FILENAME_MAX+FILENAME_MAX];
   char outFile[FILENAME_MAX+FILENAME_MAX];

   char *s, *t = sourcePath;
   int cnt = 0;
   char Sep=',';  /* this was added to help with debugging */

   opType = OP_NONE;
   cnt =0;
   /* fix bad file name is moved inside printStat to make it thread safe */
   s = fname;
   while ( *s ) {
      if ( *s == '"' )
         *t++ = '\\';
      //if ( *s < 32 ) {
      //   s++;
      //   cnt++;
      //} else
         *t++ = *s++;
   }
   *t++ = *s++;

   char badfile=0;
   if ( cnt )
   {
      fprintf( stderr, "Bad File: %s\n", fname );
      badfile=1;
   }

   if(!DOTS && !QUIET)
   {
     sprintf ( outFile, "\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"\n",
      (long)f->st_ino, sourcePath, (exten)? exten:"", (long)f->st_uid,
      (long)f->st_gid, (long)f->st_size, (long)f->st_blocks, (int)f->st_mode,
      (long)f->st_atime, (long)f->st_mtime, (long)f->st_ctime, fileCnt, dirSz );
     fputs( outFile, stdout );
   }

   if(STATUS && (scancount%256==0))
   {
       time_t now;
       time(&now);
       size_t elapsed = (size_t)difftime(now, laststatustime);
       if(elapsed >= 10)
       {
           laststatustime = now;
           updateStatus("running");
       }
   }

   if(DOTS)
   {
     if(scancount%1000==0)
         printf("%ld", scancount);
     else if(scancount%100==0)
         printf(".");
   }

   scancount++;

   if(PURGE)
   {
       if(S_ISDIR(f->st_mode))
       {
           if(DEBUG_MODE) printf("INFO: Purge skipping DIR: '%s'\n", sourcePath);
           skippedcount++;
           return;
       }
       if(!RETAIN && COMPAREDIR)
       {
           //Add a trailing / to the base path if it's not there.
           size_t comparedirLen = strlen(comparedir);
           if((comparedirLen+1 < FILENAME_MAX+FILENAME_MAX) && comparedir[comparedirLen-1] != '/')
           {
               comparedir[comparedirLen] = '/';
               comparedir[comparedirLen+1] = 0;
           }
           //Build the full path to the target file
           size_t offset = rootdirlen+1;
           strncpy(targetPath, comparedir, FILENAME_MAX+FILENAME_MAX);
           strncat(targetPath, &sourcePath[offset], FILENAME_MAX+FILENAME_MAX);

           //Build the full path to the target directory by trimming from the end of the full file path
           getDirPath(targetPathDir, targetPath, FILENAME_MAX+FILENAME_MAX);

           if(DEBUG_MODE) 
              printf("comparing: %s : %s\n", targetPath, sourcePath);
           struct stat statTarget = {0};
           if(stat(targetPath, &statTarget) == -1 && S_ISREG(f->st_mode))
           {
               deletedcount++;
               if(DEBUG_MODE)
                   printf("INFO: PURGE [%s] : %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", sourcePath);
               if(!DRYRUN && FORCE)
               {
                   opType = OP_DELETE;
                   strncpy(opCommand, sourcePath, FILENAME_MAX+FILENAME_MAX);
                   if(DOTS)
                       printf("!");
               }
           }
           else
               skippedcount++;
           return;
       }

       if(RETAIN && (currenttime > f->st_ctime) && (
          (!CMP_CTIME || (difftime(currenttime, f->st_ctime) > RETAIN)) &&
          (!CMP_MTIME || (difftime(currenttime, f->st_mtime) > RETAIN)) &&
          (!CMP_ATIME || (difftime(currenttime, f->st_atime) > RETAIN))))
       {
           deletedcount++;
           if(DEBUG_MODE)
           {
               printf("INFO: PURGE [%s] (%lu days old): %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", (size_t)difftime(currenttime, f->st_mtime)/(3600*24), sourcePath);
           }
           if(!DRYRUN && FORCE)
           {
               //we want to just save the path so we can do the purge outside of this criticial section
               //this way our delete is parallelized
               strncpy(opCommand, sourcePath, FILENAME_MAX+FILENAME_MAX);
               opType = OP_DELETE;
               if(DOTS)
                   printf("!");
           }
       }
       else
           skippedcount++;
       return;
   }

   //If a target dir was specified then we need to do file copy operations
   if(TARGETDIR && !badfile)
   {

       //Add a trailing / to the base path if it's not there.
       size_t targetdirLen = strlen(targetdir);
       if((targetdirLen+1 < FILENAME_MAX+FILENAME_MAX) && targetdir[targetdirLen-1] != '/')
       {
           targetdir[targetdirLen] = '/';
           targetdir[targetdirLen+1] = 0;
       }

       //Build the full path to the target file
       size_t offset = rootdirlen+1;
       strncpy(targetPath, targetdir, FILENAME_MAX+FILENAME_MAX);
       strncat(targetPath, &sourcePath[offset], FILENAME_MAX+FILENAME_MAX);

       //Build the full path to the target directory by trimming from the end of the full file path
       getDirPath(targetPathDir, targetPath, FILENAME_MAX+FILENAME_MAX);

       struct stat statTargetDir = {0};
       if(stat(targetPathDir, &statTargetDir) == -1 || !S_ISDIR(statTargetDir.st_mode))
       {
           getDirPath(sourcePathDir, sourcePath, FILENAME_MAX+FILENAME_MAX);
           struct stat statSourceDir = {0};
           if(!stat(sourcePathDir, &statSourceDir) && S_ISDIR(statSourceDir.st_mode))
           {
               char command[FILENAME_MAX+FILENAME_MAX];
               command[0]='\0';
               snprintf(command, FILENAME_MAX+FILENAME_MAX, "mkdir -p \"%s\"", targetPathDir);

               if(DEBUG_MODE)
                   printf("INFO: making DIR: %s\n", command);
               if(!DRYRUN)
               {
                   system(command);
                   struct utimbuf times;
                   times.actime = statSourceDir.st_atime;
                   times.modtime = statSourceDir.st_mtime;
                   utime(targetPathDir, &times);
               }

               //preserve the permissions for the directories
               if(!NODIRPERMS)
                   snprintf(command, FILENAME_MAX+FILENAME_MAX, "rsync  -dAXogtpx --no-recursive \"%s/\" \"%s\"", sourcePathDir, targetPathDir);
               if(DEBUG_MODE)
                   printf("INFO: updating DIR perms: %s\n", command);
               if(!DRYRUN)
               {
                   system(command);
               }
           }
           else
           {
               fprintf(stderr, "ERR: Failed to create target directory '%s'\n", targetPathDir);
               errorcount++;
               return;
           }
       }

       if(S_ISDIR(f->st_mode))
       {
           struct stat statTargetDir = {0};
           if(stat(targetPath, &statTargetDir) == -1)
           {
               char command[FILENAME_MAX+FILENAME_MAX];
               command[0]='\0';

               snprintf(command, FILENAME_MAX+FILENAME_MAX, "mkdir -p \"%s\"", targetPath);
               if(DEBUG_MODE)
                   printf("INFO: making DIR: %s\n", command);
               if(!DRYRUN)
               {
                   system(command);
                   struct utimbuf times;
                   times.actime = f->st_atime;
                   times.modtime = f->st_mtime;
                   utime(targetPath, &times);
               }
               //preserve the permissions for the directories
               if(!NODIRPERMS)
                   snprintf(command, FILENAME_MAX+FILENAME_MAX, "rsync  -dAXogtpx --no-recursive \"%s/\" \"%s\"", sourcePath, targetPath);
               if(DEBUG_MODE)
                   printf("INFO: updating DIR perms: %s\n", command);
               if(!DRYRUN)
               {
                   system(command);
               }
               createdcount++;
               if(DEBUG_MODE) 
                   printf("INFO: Created DIR: '%s'\n", sourcePath);
           }
           return;
       }

       struct stat statTarget = {0};
       char doCopy = 0;
       char doCompare = 0;

       if(stat(targetPath, &statTarget) == -1 || MOVE_MODE)
       {
           doCopy = 1;
       }
       else if(S_ISDIR(statTarget.st_mode))
       {
           fprintf(stderr, "ERR: Target is a directory '%s'\n", targetPath);
           errorcount++;
       }
       else if(!S_ISREG(statTarget.st_mode))
       {
           fprintf(stderr, "ERR: Target not regular file '%s'\n", targetPath);
           errorcount++;
       }
       else
       {
           doCompare = 1;
       }

       if(doCompare)
       {
           if(DEBUG_MODE)
           {
               sprintf ( outFile, "INFO: comparing with [\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"]\n",
                 (long)statTarget.st_ino, targetPath, (exten)? exten:"", (long)statTarget.st_uid,
                 (long)statTarget.st_gid, (long)statTarget.st_size, (long)statTarget.st_blocks, (int)statTarget.st_mode,
                 (long)statTarget.st_atime, (long)statTarget.st_mtime, (long)statTarget.st_ctime, fileCnt, dirSz );
               fputs( outFile, stdout );
           }
           if(MOVE_MODE){ doCopy=1; }
           else if(statTarget.st_mode != f->st_mode)
           {
               if(DEBUG_MODE) printf("INFO: File has MODE CHANGE, copying\n");
               doCopy = 1;
           }
           else if(statTarget.st_size != f->st_size)
           {
               if(DEBUG_MODE) printf("INFO: File has SIZE CHANGE, copying\n");
               doCopy = 1;
           }
           //  the target file will always have a different create time so we don't 
           //  want to use == for ctime compare, rather we just want to see if the source
           //  was created after the target, if so, need to recopy
           else if(CMP_CTIME && (statTarget.st_ctime < f->st_ctime))
           {
               if(DEBUG_MODE) printf("INFO: CTIME on target is older than source, copying\n");
               doCopy = 1;
           }
           else if(CMP_MTIME && (statTarget.st_mtime != f->st_mtime))
           {
               if(DEBUG_MODE) printf("INFO: File has MTIME CHANGE, copying\n");
               doCopy = 1;
           }

           if(!doCopy && CMP_ATIME && (statTarget.st_atime != f->st_atime))
           {
               if(DEBUG_MODE) printf("INFO: File has only ATIME CHANGE, updating timestamps\n");
               struct utimbuf times;
               times.actime = f->st_atime;
               times.modtime = f->st_mtime;
               utime(targetPath, &times);
           }
       }

       if(doCopy)
       {
           if(RETAIN)
           {
               if((currenttime < f->st_ctime) || 
                  (CMP_CTIME && (difftime(currenttime, f->st_ctime) < RETAIN)) ||
                  (CMP_MTIME && (difftime(currenttime, f->st_mtime) < RETAIN)) ||
                  (CMP_ATIME && (difftime(currenttime, f->st_atime) < RETAIN)))
               {
                   if(DEBUG_MODE)
                       printf("INFO: INSIDE RETAIN (%lu days old): %s\n", (size_t)difftime(currenttime, f->st_mtime)/(3600*24), sourcePath);
               }
               else
               {
                   if(DEBUG_MODE)
                        printf("INFO: OUTSIDE RETAIN (%lu days old), skipping: %s\n", (size_t)difftime(currenttime, f->st_mtime)/(3600*24), sourcePath);
                   skippedcount++;
                   return;
               }
           }

           copycount++;
           if(doCompare)
               modifiedcount++;
           else
               createdcount++;
           char command[FILENAME_MAX+FILENAME_MAX];
           command[0]='\0';
           if(MOVE_MODE)
               snprintf(command, FILENAME_MAX+FILENAME_MAX, "mv \"%s\" \"%s\"", sourcePath, targetPath);
           else
               snprintf(command, FILENAME_MAX+FILENAME_MAX, "cp --no-dereference --preserve=all \"%s\" \"%s\"", sourcePath, targetPath);
           if(DEBUG_MODE) 
               printf("INFO: %s\n", command);
           if(!DRYRUN)
           {
               //we want to just save the command so we can do the copy outside of this criticial section
               //this way our copy op is parallelized
               opType = OP_COPY;
               strncpy(opCommand, command, FILENAME_MAX+FILENAME_MAX);
           }
           if(LOG)
               logPath(targetPath);
       }
   }
}

void execOperation(operationType_t doOperation, char *doCommand)
{
    if(doOperation == OP_COPY)
        system(doCommand);
    if(doOperation == OP_DELETE)
        unlink(doCommand);
}

void cleanupThread(struct fileData *fd)
{
    if ( (fd != NULL) && (fd->flag == 0) ) { /* this instance of fileDir is a thread */ 
        pthread_mutex_lock ( &mutexFD );
        --ThreadCNT;
        fd->THRDslot = -1;
        pthread_mutex_unlock ( &mutexFD );
        pthread_exit( EXIT_SUCCESS );
    }
}


/*
 *  Open a directory and read the conents.  Call stat with each
 *  file name. 
 *
 *  Recursively call self for each sub dir. 
 *
 *  print inode meta data for each file, one line per file in CSV format
 */
void
*fileDir( void *arg ) 
{
    char *s, *t, *u, *dot, *end_dname;
    char fname[FILENAME_MAX+1];
    int  slot, id, found;
    DIR *dirp;
    long localCnt =0; /* number of files in a specific directory */
    unsigned long long localSz  =0; /* byte cnt of files in the local directory 2010.07 */
    struct dirent *d;
    struct stat f;
    struct fileData *fd, local;

    fd = (struct fileData *) arg;
#ifdef THRD_DEBUG
    printf( "Start %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
#endif /* THRD_DEBUG */
    if (fd == NULL)
    {
        fprintf( stderr, "ERROR: invalid directory specified, skipping.\n");
        return;
    }
    if((dirp = opendir( fd->dname )) == NULL )
    {
        fprintf( stderr, "ERROR: unable to open directory '%s', skipping.\n", fd->dname);
        cleanupThread(fd);
        return;
    }
    /* find the end of fs->name and put '/' at the end <end_dname>
       points to char after '/' */
    s = fd->dname + strlen(fd->dname);
    *s++ = '/';
    end_dname = s;
    while ( (d = readdir( dirp )) != NULL ) {
        if ( strcmp(".",d->d_name) == 0 ) continue;
        if ( strcmp("..",d->d_name) == 0 ) continue;
        localCnt++;
        s = d->d_name;
        t = end_dname;
        while ( *s ) 
            *t++ = *s++;
        *t = '\0'; 
        if ( lstat ( fd->dname, &f ) == -1 ) {
            fprintf( stderr, "ERROR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
            continue;
        } 
        /* Follow Sub dirs recursivly but don't follow links */
        localSz += f.st_size;
        DU_TOTAL += f.st_size;
        if ( S_ISDIR(f.st_mode) ) {
            if (EXCLUDE_DIRS && excludeDir( d->d_name ) ) {
               if(DEBUG_MODE) fprintf( stderr, "INFO: skipping directory: %s\n", d->d_name );
               continue;
            }
            pthread_mutex_lock (&mutexFD);
            if ( ThreadCNT < MAXTHRDS ) {
                ThreadCNT++;
                id = totalTHRDS++;
                slot =0; found = -1;
                while ( slot < MAXTHRDS ) {
                    if ( fdslot[slot].THRDslot == -1 ) {
                        found = slot;
                        break;
                    }
                    slot++;
                }
                if ( found == -1 )
                   fprintf( stderr, "ERROR: SlotE %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, "no available threads" );
                else
                   fdslot[slot].THRDslot = slot;
            } else 
                slot = -1;
            pthread_mutex_unlock (&mutexFD);
            if ( slot != -1 ) {
                strcpy( fdslot[slot].dname, (const char*)fd->dname );
                fdslot[slot].THRDid = id;
                fdslot[slot].flag = 0;
                pthread_create( &fdslot[slot].thread_id, &fdslot[0].tattr, 
                                fileDir, (void*)&fdslot[slot] );
            } else {
                strcpy( local.dname, (const char*)fd->dname );
                local.THRDslot = fd->THRDslot;
                local.THRDid = fd->THRDid;
                local.flag = fd->flag + 1;
                fileDir( (void*) &local );
            }
        } else {
           s = end_dname + 1; dot = NULL;
           while ( *s ) 
           { 
               if (*s == '.') 
                   dot = s+1; 
               s++; 
           }

           pthread_mutex_lock (&mutexPrintStat);
           printStat( fd->dname, dot, &f, (long)0, (long)0 );
             operationType_t doOperation = opType;
             opType = OP_NONE;
             char doCommand[FILENAME_MAX+FILENAME_MAX+1];
             strncpy(doCommand, opCommand, FILENAME_MAX+FILENAME_MAX);
           pthread_mutex_unlock (&mutexPrintStat);
           execOperation(doOperation, doCommand);
        }
    }
    closedir( dirp );
    *--end_dname = '\0';
#ifdef THRD_DEBUG
    printf( "Ending %2d%5d %2d<%s>\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
#endif /* THRD_DEBUG */
    if ( lstat ( fd->dname, &f ) == -1 ) {
        fprintf( stderr, "ERROR: lstat failed: %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
    }
    else {
        s = end_dname - 1; dot = NULL;
        while ( *s != '/' && (s != fd->dname)) 
        {
            if (*s == '.') { dot = s+1; break; }
            s--; 
        }
        if ( s+1 == dot ) /* Dot file is not an extension Exp: /.bashrc */
            dot = NULL;

        pthread_mutex_lock (&mutexPrintStat);
        printStat( fd->dname, dot, &f, localCnt, localSz );
          operationType_t doOperation = opType;
          opType = OP_NONE;
          char doCommand[FILENAME_MAX+FILENAME_MAX+1];
          strncpy(doCommand, opCommand, FILENAME_MAX+FILENAME_MAX);
        pthread_mutex_unlock (&mutexPrintStat);
        execOperation(doOperation, doCommand);
    }
    cleanupThread(fd);
    /* return ; */
}

int
main( int argc, char* argv[] )
{
    int error, i;
    char *s, *c;
    void *status;
    if ( argc < 2 || argv == NULL)
    {
        printHelp( );
        exit( EXIT_FAILURE );
    }
    argc--; argv++;
    while(argc > 1)
    {
        if (!strncmp( *argv, "--maxthreads=", 13 ))
        {
           MAXTHRDS = (size_t)atoi(&((*argv)[strlen("--maxthreads=")]));
           if(MAXTHRDS > 64) MAXTHRDS = 64;
           if(MAXTHRDS <= 2) MAXTHRDS = 2;
           if(DEBUG_MODE) printf("INFO: MAXTHRDS='%lu'\n", MAXTHRDS);
        }
        else if (!strncmp( *argv, "--retain=", 9 ))
        {
           RETAIN = (size_t)atoi(&((*argv)[strlen("--retain=")]))*24*3600;
           if(DEBUG_MODE) printf("INFO: RETAIN='%lu'\n", RETAIN);
        }
        else if ( !strcmp( *argv, "--move" ) )
           MOVE_MODE = 1;
        else if ( !strcmp( *argv, "--debug" ) )
           DEBUG_MODE = 1;
        else if ( !strcmp( *argv, "--du" ) )
        {
           DU_MODE = 1;
           SHOWTOTALS = 0;
           QUIET = 1;
        }
        else if ( !strcmp( *argv, "--nototals" ) )
           SHOWTOTALS = 0;
        else if ( !strcmp( *argv, "--noatime" ) )
           CMP_ATIME = 0;
        else if ( !strcmp( *argv, "--atime" ) )
           CMP_ATIME = 1;
        else if ( !strcmp( *argv, "--nomtime" ) )
           CMP_MTIME = 0;
        else if ( !strcmp( *argv, "--mtime" ) )
           CMP_MTIME = 1;
        else if ( !strcmp( *argv, "--noctime" ) )
           CMP_CTIME = 0;
        else if ( !strcmp( *argv, "--ctime" ) )
           CMP_CTIME = 1;
        else if ( !strncmp( *argv, "--excludedirs=", 14 ) )
        {
           memset(excludeDirData, 0, sizeof(excludeDirData));
           strncpy(excludeDirData, &((*argv)[strlen("--excludedirs=")]), sizeof(excludeDirData));
           if(strlen(excludeDirData) == 0)
           {
               fprintf( stderr, "ERR: Invalid exclude file name.\n");
           }
           else
           {
               char *token = strtok(excludeDirData, ",");
               while(token != NULL && excludeDirCount < 1023)
               {
                   excludeDirs[excludeDirCount] = strdup(token);
                   if(DEBUG_MODE) printf("INFO: Adding exclude dir '%s'\n", token);
                   excludeDirCount++;
                   token = strtok(NULL, ",");
               }
               EXCLUDE_DIRS = 1;
           }
        }
        else if ( !strcmp( *argv, "--NoSnap" ) || !strcmp( *argv, "--nosnap" ) )
        {
           excludeDirs[excludeDirCount] = strdup(".snapshots");
           if(DEBUG_MODE) printf("INFO: Adding exclude dir '.snapshots'\n");
           excludeDirCount++;
           EXCLUDE_DIRS = 1;
        }
        else if ( !strcmp( *argv, "--dryrun" ) )
           DRYRUN = 1;
        else if ( !strcmp( *argv, "--force" ) )
           FORCE = 1;
        else if ( !strcmp( *argv, "--nodirperms" ) )
           NODIRPERMS = 1;
        else if ( !strcmp( *argv, "--purge" ) )
        {
           PURGE = 1;
        }
        else if ( !strncmp( *argv, "--backuplog=", 12 ) )
        {
           LOG = 1; 
           memset(logfile, 0, sizeof(logfile));
           strncpy(logfile, &((*argv)[strlen("--backuplog=")]), sizeof(logfile));
           if(strlen(logfile) == 0)
           {
               fprintf( stderr, "ERR: Invalid log file name.");
               LOG=0;
           }
           if(DEBUG_MODE) printf("INFO: LOGFILE='%s'\n", logfile);
        }
        else if ( !strncmp( *argv, "--status=", 9 ) )
        {
           STATUS = 1; 
           memset(statusfile, 0, sizeof(statusfile));
           strncpy(statusfile, &((*argv)[strlen("--status=")]), sizeof(statusfile));
           if(strlen(statusfile) == 0)
           {
               fprintf( stderr, "ERR: Invalid status log file name.");
               STATUS=0;
           }
           if(DEBUG_MODE) printf("INFO: STATUS='%s'\n", statusfile);
        }
        else if ( !strncmp( *argv, "--targetdir=", 12 ) )
        {
           TARGETDIR = 1; 
           memset(targetdir, 0, sizeof(targetdir));
           strncpy(targetdir, &((*argv)[strlen("--targetdir=")]), sizeof(targetdir));
           if(DEBUG_MODE) 
               printf("INFO: TARGETDIR='%s'\n", targetdir);
        }
        else if ( !strncmp( *argv, "--comparedir=", 13 ) )
        {
           COMPAREDIR = 1; 
           memset(comparedir, 0, sizeof(comparedir));
           strncpy(comparedir, &((*argv)[strlen("--comparedir=")]), sizeof(comparedir));
           if(DEBUG_MODE) printf("INFO: COMPAREDIR='%s'\n", comparedir);
           if(!dirContainsFiles(comparedir))
           {
               printf("ERR: Cannot run purge operation against a comparison directory with no files '%s'\n", comparedir);
               exit( EXIT_FAILURE );
           }
        }
        else if ( !strcmp( *argv, "--dots" ) )
           DOTS = 1;
        else if ( !strcmp( *argv, "--quiet" ) )
           QUIET = 1;
        argc--; argv++;
    }

    if (!strcmp( *argv, "--help" ))
    {
       printHelp( );
       exit( 0 );
    }
    if (!strcmp( *argv, "--version" ) || !strcmp( *argv, "-v" ))
    {
       printVersion( );
       exit( 0 );
    }

    time(&currenttime);

    fdslot = malloc(sizeof(struct fileData) * MAXTHRDS);

    for ( i=0; i<MAXTHRDS; i++ )
    {
        fdslot[i].THRDslot = -1;
        if ( (error = pthread_attr_init( &fdslot[i].tattr )) )
            fprintf( stderr, "Failed to create pthread attr: %s\n",
                             strerror(error));
        else if ( (error = pthread_attr_setdetachstate( &fdslot[i].tattr,
                             PTHREAD_CREATE_DETACHED)
                  ) )
            fprintf( stderr, "failed to set attribute detached: %s\n",
                             strerror(error));
    }
    pthread_mutex_init(&mutexFD, NULL);
    strcpy( fdslot[0].dname, (const char*) *argv );
    rootdirlen = strlen(fdslot[0].dname);
    //Trim the trailing slash so we don't get // in the path
    if(fdslot[0].dname[strlen(fdslot[0].dname)-1] == '/')
    {
        fdslot[0].dname[strlen(fdslot[0].dname)-1] = '\0';
        rootdirlen--;
    }
    fdslot[0].THRDslot = ThreadCNT++;
    fdslot[0].THRDid = totalTHRDS++;
    fdslot[0].flag = 0;
    pthread_create( &(fdslot[0].thread_id), &fdslot[0].tattr, fileDir,
                    (void*)&fdslot[0] );

    if(SHOWTOTALS || STATUS || DU_MODE)
        atexit(printTotals);
    pthread_exit( NULL );
}