Pwalk.c

From OSNEXUS Online Documentation Site
Revision as of 09:33, 18 June 2015 by Qadmin (Talk | contribs)

Jump to: navigation, search

/*

*  pwalk.c  Parrallel Walk a file system and report file meta data

Copyright (C) 2013 John F Dey Copyright (C) 2013 Steve Umbehocker

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

*  pwalk is inspired by du but designed to be used with large
*  file systems ( > 10 million files )
*
*  History: dir.c, walk.c, walkv2,3,4, pwalkfs.c
*
*  Example of using the directory call, opendir and readdir to
*  simulate ls.
*
*  1997.03.20 John Dey Although this is the first documented date for
*             this file I have versions that date from 1988.
*  2002.09.04 John Dey walk the directory and gather stats
*  2002.09.06 John Dey make to look like du -a
*  2004.07.06 John Dey add -a and -k
*  2008.04.01 John Dey CSV output for database use
*  2009.04.12 John Dey v3.1
*  replaced constants with "FILENAME_MAX",
*  Size of directory is size of all files in directory plus itself
*  Added printStat function
*  print file count on line with direcories
*  2009.05.18 check for control charaters and double qutoes in file names;
*  escape the double quotes and print bad file names to stderr
*  2009.12.30 size for dir should just be dir size; Fix; count returns 0
*  for normal files and count of just the local directory; Previously count
*  return the recursive file count for the whole tree.
*
   2010.01.08 john dey; New field to output: file name extension.
   Extension is defined as the last part of the name after a Dot "."
   if no dot is found extension is empty ""
   new feature: accepts multible dirctory names as cmd line argument
   This line of code has been replaced
    if ( f.st_mode & S_IFDIR && (f.st_mode & S_IFMT != S_IFLNK) ) {
   With this new line of code:
    if ( S_ISDIR(f.st_mode)  ) { Or I could have done: if ( (f.st_mode & S_IFDIR) == S_IFDIR )
  2010.01.11  John Dey
  Complete re-write of walkv4 transforming it into pwalk.
  pwalk is a threaded version of walkv4.
  pwalk will call fileDir as a new thread until MAXTHRDS is reached.
  2010.02.01 pwalk v1 did not detach nor did it join the theads; v2
  fixes this short comming;
  2010.03.24 john dey; New physical hardware is available to run pwalk.
  16 threads are only using about 20% CPU with 10% IO wait. Based on this
  the thread count will be doubled to 32.
  2010.11.29 Add mutex for printStat
  2012.10.09 --NoSnap flag added.  ignore directories that have the name
             .snapshot
  2013.08.02 john f dey; Add GNU license, --version flag added
  2013.10.15 steve umbehocker; added verbose --debug spam mode
               added compare and copy capabilities
               added retain= option to backup only recently modified files
               added cusomizable maxthreads
               added summary total display at end (disable with --nototals)
               updated and expanded usage section
               fixed segfault in walker
  2013.10.16 steve umbehocker; added --log mode logging
               added purge mode hooks
               added dryrun/force flags
  2013.10.21 steve umbehocker; added --status logging
               added purge w/ retain
  2013.10.22 steve umbehocker; added check so purge cannot compare against emty dir
               added execute operation logic to do cp/unlink operations outside
               of the mutex so that they're parallelized
               fixed sync/copy to create empty leaf directories
  2014.11.21 steve umbehocker; added fix for files with spaces and special chars
  2015.06.12 steve umbehocker; added fix for --help and --version
  • /
  1. include <stdio.h>
  2. include <stdlib.h>
  3. include <string.h>
  4. include <dirent.h>
  5. include <sys/types.h>
  6. include <sys/stat.h>
  7. include <utime.h>
  8. include <time.h>
  9. include <errno.h>
  10. include <pthread.h>
  1. undef THRD_DEBUG

static char *Version = "v3.16 - 06/12/2015 - John F Dey john@fuzzdog.com, Steve U. eng@osnexus.com"; static char *whoami = "pwalk";

int SKIP_SNAPSHOTS =0; /* if set ignore directories called .snapshot */ int TARGETDIR =0; /* if set copy the files to the target path */ int COMPAREDIR =0; /* if set diff the files to the compare dir to determine where the holee are then purge */ int DEBUG_MODE =0; /* verbose output */ int CMP_ATIME =0; /* indiates that compare operations should look at atime */ int CMP_MTIME =1; int CMP_CTIME =0; int SHOWTOTALS =1; /* show totals at the end via atexit() */ int DOTS =0; /* print little dots to show progress */ int QUIET =0; /* quiet gives the best backup performance */ int PURGE =0; int LOG =0; int NODIRPERMS =0; unsigned int DU_MODE =0; int STATUS =0; int RETENTION_DAYS =60; size_t MAXTHRDS =32; size_t RETAIN =0; size_t DRYRUN =0; size_t FORCE =0; int MOVE_MODE =0; unsigned long long DU_TOTAL =4096L;

typedef enum operationType { OP_NONE, OP_COPY, OP_DELETE } operationType_t; operationType_t opType; char opCommand[FILENAME_MAX+FILENAME_MAX+1];

struct fileData {

   char dname[FILENAME_MAX+1];  /* full path and basename */
   int  THRDslot;              /* slot ID 0 - MAXTHRDS */
   int  THRDid;                /* unique ID increaments with each new THRD */
   int  flag;                  /* 0 if thread; recursion > 0 */
   pthread_t thread_id;        /* system assigned */
   pthread_attr_t tattr;
   };

int ThreadCNT =0; /* ThreadCNT < MAXTHRDS */ int totalTHRDS =0; struct fileData *fdslot; pthread_mutex_t mutexFD; pthread_mutex_t mutexPrintStat;

char targetdir[FILENAME_MAX+1]; char comparedir[FILENAME_MAX+1]; char logfile[FILENAME_MAX+1]; char statusfile[FILENAME_MAX+1]; int rootdirlen =0;

/* counters for summing up and printing totals at the end */ size_t scancount =0; size_t copycount =0; size_t modifiedcount =0; size_t createdcount =0; size_t errorcount =0; size_t skippedcount =0; size_t deletedcount =0; time_t currenttime =0; time_t laststatustime =0;

void printVersion( ) {

  fprintf( stderr, "%s version %s\n", whoami, Version );
  fprintf( stderr, "%s Copyright (C) 2013 John F Dey & Steve Umbehocker\n", whoami );
  fprintf( stderr, " * pwalk comes with ABSOLUTELY NO WARRANTY;\n" );
  fprintf( stderr, " * This is free software, you can redistribute it and/or modify it under the terms of the GNU General Public License\n" );
  fprintf( stderr, " * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n" );

}

void printHelp( ) {

  fprintf( stderr, "%s version %s\n\n", whoami, Version );
  fprintf( stderr, "Usage : \n");
  fprintf( stderr, "%s --help --version\n", whoami );
  fprintf( stderr, "          Common Args :\n");
  fprintf( stderr, "             --dryrun : use this to test commands\n" );
  fprintf( stderr, "                        without making any changes to the system\n" );
  fprintf( stderr, "       --maxthreads=N : indicates the number of threads (default=32)\n" );
  fprintf( stderr, "           --nototals : disables printing of totals after the scan\n" );
  fprintf( stderr, "               --dots : prints a dot and total every 1000 files scanned.\n" );
  fprintf( stderr, "              --quiet : no chatter, speeds up the scan.\n" );
  fprintf( stderr, "             --nosnap : Ignore directories with name .snapshot\n");
  fprintf( stderr, "              --debug : Verbose debug spam\n");
  fprintf( stderr, "        Output Format : CSV\n" );
  fprintf( stderr, "               Fields : DateStamp,\"inode\",\"filename\",\"fileExtension\",\"UID\",\n");
  fprintf( stderr, "                        \"GID\",\"st_size\",\"st_blocks\",\"st_mode\",\"atime\",\n" );
  fprintf( stderr, "                        \"mtime\",\"ctime\",\"File Count\",\"Directory Size\"\n");
  fprintf( stderr, "\n" );
  fprintf( stderr, "Walk Usage : \n");
  fprintf( stderr, "%s SOURCEDIR\n", whoami);
  fprintf( stderr, "         Command Args :\n");
  fprintf( stderr, "            SOURCEDIR : Fully qualified path to the directory to walk\n");
  fprintf( stderr, "\n" );
  fprintf( stderr, "Copy/Backup Usage : \n");
  fprintf( stderr, "%s --targetdir=TARGETDIR SOURCEDIR\n", whoami);
  fprintf( stderr, "%s --retain=30 --targetdir=TARGETDIR SOURCEDIR\n", whoami);
  fprintf( stderr, "         Command Args :\n" );
  fprintf( stderr, "               --move : move files rather than copy\n" );
  fprintf( stderr, "         --nodirperms : don't copy directory permissions\n" );
  fprintf( stderr, "          --targetdir : copy files to specified TARGETDIR\n" );
  fprintf( stderr, "              --atime : copy if access time change (default=no atime)\n" );
  fprintf( stderr, "  --backuplog=LOGFILE : log all files that were copied.\n" );
  fprintf( stderr, "  --status=STATUSFILE : write periodic status updates to specified file\n" );
  fprintf( stderr, "             --retain : copy if file ctime or mtime within retention period\n" );
  fprintf( stderr, "                        specified in days. eg: --retain=60\n" );
  //fprintf( stderr, "            --noctime : copy if create time change (default=no ctime)\n" );
  fprintf( stderr, "            --nomtime : ignore mtime (default=use mtime)\n" );
  fprintf( stderr, "            SOURCEDIR : Fully qualified path to the directory to walk\n");
  fprintf( stderr, "\n" );
  fprintf( stderr, "Delete/Purge Usage : \n");
  fprintf( stderr, "%s --purge [--force] --comparedir=COMPAREDIR PURGEDIR\n", whoami);
  fprintf( stderr, "%s --purge [--force] --retain=N PURGEDIR\n", whoami);
  fprintf( stderr, "         Command Args :\n" );
  fprintf( stderr, "         --comparedir : compare against this dir but dont touch any files\n" );
  fprintf( stderr, "                        in it. comparedir is usually the SOURCEDIR from\n" );
  fprintf( stderr, "                        a prior copy/sync stage.\n" );
  fprintf( stderr, "              --purge : !!WARNING!! this deletes files older than the\n" );
  fprintf( stderr, "                        retain period -OR- if retain is not specified\n" );
  fprintf( stderr, "                        --comparedir is required. The comparedir is\n" );
  fprintf( stderr, "                        compared against the specified dir and any files\n" );
  fprintf( stderr, "                        not found in the comparedir are purged.\n" );
  fprintf( stderr, "              --force : !NOTE! default is a *dry-run* for purge, you must\n");
  fprintf( stderr, "                        specify --force option to actually purge files\n" );
  fprintf( stderr, "              --atime : keep if access time within retain period\n" );
  fprintf( stderr, "             --retain : keep if file ctime or mtime within retention period\n" );
  fprintf( stderr, "                        specified in days. eg: --retain=60\n" );
  fprintf( stderr, "\n");

}

void updateStatus(char *message) {

   time_t now;
   time(&now);
   size_t elapsed = (size_t)difftime(now, currenttime);
   size_t hours = elapsed/3600;
   size_t minutes = (elapsed%3600)/60;
   size_t seconds = elapsed%60;
   FILE *statusFp = fopen(statusfile, "w");
   if(statusFp != NULL)
   {
       fprintf(statusFp, "   status: %s\n", message);
       fprintf(statusFp, "  scanned: %lu\n", scancount);
       fprintf(statusFp, "   copied: %lu\n", copycount);
       fprintf(statusFp, " modified: %lu\n", modifiedcount);
       fprintf(statusFp, "  created: %lu\n", createdcount);
       fprintf(statusFp, "  skipped: %lu\n", skippedcount);
       fprintf(statusFp, "  deleted: %lu\n", deletedcount);
       fprintf(statusFp, "   errors: %lu\n", errorcount);
       fprintf(statusFp, "  elapsed: %02lu:%02lu:%02lu\n", hours, minutes, seconds);
       fclose(statusFp);
   }

}

void printTotals( ) {

  if(STATUS)
  {
      updateStatus("completed");
  }
  if(DU_MODE)
  {
      fprintf(stderr, "TOTAL: %llu\n", DU_TOTAL);
  }
  if(!SHOWTOTALS)
      return;
  time_t now;
  time(&now);
  size_t elapsed = (size_t)difftime(now, currenttime);
  size_t hours = elapsed/3600;
  size_t minutes = (elapsed%3600)/60;
  size_t seconds = elapsed%60;
  fprintf( stderr, "\n\n%12s %12s %12s %12s %12s %12s %12s      %12s\n",            "Scanned", "Copied", "Modified",     "Created",   "Deleted",    "Skipped",     "Errors", "Elapsed (HH:MM:SS)");
  fprintf( stderr,   "%12lu %12lu %12lu %12lu %12lu %12lu %12lu     %02lu:%02lu:%02lu\n", scancount, copycount, modifiedcount, createdcount, deletedcount, skippedcount, errorcount, hours, minutes, seconds);

}

void getDirPath(char *dirPath, char *fullPath, size_t bufferLen) {

   strncpy(dirPath, fullPath, FILENAME_MAX+FILENAME_MAX);
   size_t pos = strlen(dirPath)-1;
   while(pos > 0 && dirPath[pos] != '/')
   {
       dirPath[pos]=0;
       pos--;
   }

}

void logPath(char *targetPath) {

   if(!LOG || (logfile[0] == 0))
       return;
   FILE *fp = fopen(logfile, "a");
   if(fp != NULL)
   {
       fprintf(fp, "%s\n", targetPath);
       fclose(fp);
   }

}

int dirContainsFiles(char *targetPath) {

 struct dirent *ep = NULL;
 DIR *dp = opendir(targetPath);
 if (dp != NULL)
 {
   while(ep = readdir(dp))
   {
       if(ep->d_name[0] == '.')
           continue;
       //fprintf(stderr, "INFO: directory entry '%s'", ep->d_name);
       closedir(dp);
       return 1;
   }
   closedir(dp);
 }
 return 0;

}

/*

*  this needs to be in a crital secion  (and it is!)
*/

void printStat( char *fname, char *exten, struct stat *f, long fileCnt, long dirSz ) {

  char sourcePath[FILENAME_MAX+FILENAME_MAX];
  char sourcePathDir[FILENAME_MAX+FILENAME_MAX];
  char targetPath[FILENAME_MAX+FILENAME_MAX];
  char targetPathDir[FILENAME_MAX+FILENAME_MAX];
  char outFile[FILENAME_MAX+FILENAME_MAX];
  char *s, *t = sourcePath;
  int cnt = 0;
  char Sep=',';  /* this was added to help with debugging */
  opType = OP_NONE;
  cnt =0;
  /* fix bad file name is moved inside printStat to make it thread safe */
  s = fname;
  while ( *s ) {
     if ( *s == '"' )
        *t++ = '\\';
     //if ( *s < 32 ) {
     //   s++;
     //   cnt++;
     //} else
        *t++ = *s++;
  }
  *t++ = *s++;
  char badfile=0;
  if ( cnt )
  {
     fprintf( stderr, "Bad File: %s\n", fname );
     badfile=1;
  }
  if(!DOTS && !QUIET)
  {
    sprintf ( outFile, "\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"\n",
     (long)f->st_ino, sourcePath, (exten)? exten:"", (long)f->st_uid,
     (long)f->st_gid, (long)f->st_size, (long)f->st_blocks, (int)f->st_mode,
     (long)f->st_atime, (long)f->st_mtime, (long)f->st_ctime, fileCnt, dirSz );
    fputs( outFile, stdout );
  }
  if(STATUS && (scancount%256==0))
  {
      time_t now;
      time(&now);
      size_t elapsed = (size_t)difftime(now, laststatustime);
      if(elapsed >= 10)
      {
          laststatustime = now;
          updateStatus("running");
      }
  }
  if(DOTS)
  {
    if(scancount%1000==0)
        printf("%ld", scancount);
    else if(scancount%100==0)
        printf(".");
  }
  scancount++;
  if(PURGE)
  {
      if(S_ISDIR(f->st_mode))
      {
          if(DEBUG_MODE) printf("INFO: Purge skipping DIR: '%s'\n", sourcePath);
          skippedcount++;
          return;
      }
      if(!RETAIN && COMPAREDIR)
      {
          //Add a trailing / to the base path if it's not there.
          size_t comparedirLen = strlen(comparedir);
          if((comparedirLen+1 < FILENAME_MAX+FILENAME_MAX) && comparedir[comparedirLen-1] != '/')
          {
              comparedir[comparedirLen] = '/';
              comparedir[comparedirLen+1] = 0;
          }
          //Build the full path to the target file
          size_t offset = rootdirlen+1;
          strncpy(targetPath, comparedir, FILENAME_MAX+FILENAME_MAX);
          strncat(targetPath, &sourcePath[offset], FILENAME_MAX+FILENAME_MAX);
          //Build the full path to the target directory by trimming from the end of the full file path
          getDirPath(targetPathDir, targetPath, FILENAME_MAX+FILENAME_MAX);
          if(DEBUG_MODE)
             printf("comparing: %s : %s\n", targetPath, sourcePath);
          struct stat statTarget = {0};
          if(stat(targetPath, &statTarget) == -1 && S_ISREG(f->st_mode))
          {
              deletedcount++;
              if(DEBUG_MODE)
                  printf("INFO: PURGE [%s] : %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", sourcePath);
              if(!DRYRUN && FORCE)
              {
                  opType = OP_DELETE;
                  strncpy(opCommand, sourcePath, FILENAME_MAX+FILENAME_MAX);
                  if(DOTS)
                      printf("!");
              }
          }
          else
              skippedcount++;
          return;
      }
      if(RETAIN && (currenttime > f->st_ctime) && (
         (!CMP_CTIME || (difftime(currenttime, f->st_ctime) > RETAIN)) &&
         (!CMP_MTIME || (difftime(currenttime, f->st_mtime) > RETAIN)) &&
         (!CMP_ATIME || (difftime(currenttime, f->st_atime) > RETAIN))))
      {
          deletedcount++;
          if(DEBUG_MODE)
          {
              printf("INFO: PURGE [%s] (%lu days old): %s\n", (DRYRUN || !FORCE) ? "dryrun" : "UNLINK", (size_t)difftime(currenttime, f->st_mtime)/(3600*24), sourcePath);
          }
          if(!DRYRUN && FORCE)
          {
              //we want to just save the path so we can do the purge outside of this criticial section
              //this way our delete is parallelized
              strncpy(opCommand, sourcePath, FILENAME_MAX+FILENAME_MAX);
              opType = OP_DELETE;
              if(DOTS)
                  printf("!");
          }
      }
      else
          skippedcount++;
      return;
  }
  //If a target dir was specified then we need to do file copy operations
  if(TARGETDIR && !badfile)
  {
      //Add a trailing / to the base path if it's not there.
      size_t targetdirLen = strlen(targetdir);
      if((targetdirLen+1 < FILENAME_MAX+FILENAME_MAX) && targetdir[targetdirLen-1] != '/')
      {
          targetdir[targetdirLen] = '/';
          targetdir[targetdirLen+1] = 0;
      }
      //Build the full path to the target file
      size_t offset = rootdirlen+1;
      strncpy(targetPath, targetdir, FILENAME_MAX+FILENAME_MAX);
      strncat(targetPath, &sourcePath[offset], FILENAME_MAX+FILENAME_MAX);
      //Build the full path to the target directory by trimming from the end of the full file path
      getDirPath(targetPathDir, targetPath, FILENAME_MAX+FILENAME_MAX);
      struct stat statTargetDir = {0};
      if(stat(targetPathDir, &statTargetDir) == -1 || !S_ISDIR(statTargetDir.st_mode))
      {
          getDirPath(sourcePathDir, sourcePath, FILENAME_MAX+FILENAME_MAX);
          struct stat statSourceDir = {0};
          if(!stat(sourcePathDir, &statSourceDir) && S_ISDIR(statSourceDir.st_mode))
          {
              char command[FILENAME_MAX+FILENAME_MAX];
              command[0]='\0';
              snprintf(command, FILENAME_MAX+FILENAME_MAX, "mkdir -p \"%s\"", targetPathDir);
              if(DEBUG_MODE)
                  printf("INFO: making DIR: %s\n", command);
              if(!DRYRUN)
              {
                  system(command);
                  struct utimbuf times;
                  times.actime = statSourceDir.st_atime;
                  times.modtime = statSourceDir.st_mtime;
                  utime(targetPathDir, &times);
              }
              //preserve the permissions for the directories
              if(!NODIRPERMS)
                  snprintf(command, FILENAME_MAX+FILENAME_MAX, "rsync  -dAXogtpx --no-recursive \"%s/\" \"%s\"", sourcePathDir, targetPathDir);
              if(DEBUG_MODE)
                  printf("INFO: updating DIR perms: %s\n", command);
              if(!DRYRUN)
              {
                  system(command);
              }
          }
          else
          {
              fprintf(stderr, "ERR: Failed to create target directory '%s'\n", targetPathDir);
              errorcount++;
              return;
          }
      }
      if(S_ISDIR(f->st_mode))
      {
          struct stat statTargetDir = {0};
          if(stat(targetPath, &statTargetDir) == -1)
          {
              char command[FILENAME_MAX+FILENAME_MAX];
              command[0]='\0';
              snprintf(command, FILENAME_MAX+FILENAME_MAX, "mkdir -p \"%s\"", targetPath);
              if(DEBUG_MODE)
                  printf("INFO: making DIR: %s\n", command);
              if(!DRYRUN)
              {
                  system(command);
                  struct utimbuf times;
                  times.actime = f->st_atime;
                  times.modtime = f->st_mtime;
                  utime(targetPath, &times);
              }
              //preserve the permissions for the directories
              if(!NODIRPERMS)
                  snprintf(command, FILENAME_MAX+FILENAME_MAX, "rsync  -dAXogtpx --no-recursive \"%s/\" \"%s\"", sourcePath, targetPath);
              if(DEBUG_MODE)
                  printf("INFO: updating DIR perms: %s\n", command);
              if(!DRYRUN)
              {
                  system(command);
              }
              createdcount++;
              if(DEBUG_MODE)
                  printf("INFO: Created DIR: '%s'\n", sourcePath);
          }
          return;
      }
      struct stat statTarget = {0};
      char doCopy = 0;
      char doCompare = 0;
      if(stat(targetPath, &statTarget) == -1 || MOVE_MODE)
      {
          doCopy = 1;
      }
      else if(S_ISDIR(statTarget.st_mode))
      {
          fprintf(stderr, "ERR: Target is a directory '%s'\n", targetPath);
          errorcount++;
      }
      else if(!S_ISREG(statTarget.st_mode))
      {
          fprintf(stderr, "ERR: Target not regular file '%s'\n", targetPath);
          errorcount++;
      }
      else
      {
          doCompare = 1;
      }
      if(doCompare)
      {
          if(DEBUG_MODE)
          {
              sprintf ( outFile, "INFO: comparing with [\"%ld\",\"%s\",\"%s\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%07o\",\"%ld\",\"%ld\",\"%ld\",\"%ld\",\"%ld\"]\n",
                (long)statTarget.st_ino, targetPath, (exten)? exten:"", (long)statTarget.st_uid,
                (long)statTarget.st_gid, (long)statTarget.st_size, (long)statTarget.st_blocks, (int)statTarget.st_mode,
                (long)statTarget.st_atime, (long)statTarget.st_mtime, (long)statTarget.st_ctime, fileCnt, dirSz );
              fputs( outFile, stdout );
          }
          if(MOVE_MODE){ doCopy=1; }
          else if(statTarget.st_mode != f->st_mode)
          {
              if(DEBUG_MODE) printf("INFO: File has MODE CHANGE, copying\n");
              doCopy = 1;
          }
          else if(statTarget.st_size != f->st_size)
          {
              if(DEBUG_MODE) printf("INFO: File has SIZE CHANGE, copying\n");
              doCopy = 1;
          }
          //  the target file will always have a different create time so we don't
          //  want to use == for ctime compare, rather we just want to see if the source
          //  was created after the target, if so, need to recopy
          else if(CMP_CTIME && (statTarget.st_ctime < f->st_ctime))
          {
              if(DEBUG_MODE) printf("INFO: CTIME on target is older than source, copying\n");
              doCopy = 1;
          }
          else if(CMP_MTIME && (statTarget.st_mtime != f->st_mtime))
          {
              if(DEBUG_MODE) printf("INFO: File has MTIME CHANGE, copying\n");
              doCopy = 1;
          }
          if(!doCopy && CMP_ATIME && (statTarget.st_atime != f->st_atime))
          {
              if(DEBUG_MODE) printf("INFO: File has only ATIME CHANGE, updating timestamps\n");
              struct utimbuf times;
              times.actime = f->st_atime;
              times.modtime = f->st_mtime;
              utime(targetPath, &times);
          }
      }
      if(doCopy)
      {
          if(RETAIN)
          {
              if((currenttime < f->st_ctime) ||
                 (CMP_CTIME && (difftime(currenttime, f->st_ctime) < RETAIN)) ||
                 (CMP_MTIME && (difftime(currenttime, f->st_mtime) < RETAIN)) ||
                 (CMP_ATIME && (difftime(currenttime, f->st_atime) < RETAIN)))
              {
                  if(DEBUG_MODE)
                      printf("INFO: INSIDE RETAIN (%lu days old): %s\n", (size_t)difftime(currenttime, f->st_mtime)/(3600*24), sourcePath);
              }
              else
              {
                  if(DEBUG_MODE)
                       printf("INFO: OUTSIDE RETAIN (%lu days old), skipping: %s\n", (size_t)difftime(currenttime, f->st_mtime)/(3600*24), sourcePath);
                  skippedcount++;
                  return;
              }
          }
          copycount++;
          if(doCompare)
              modifiedcount++;
          else
              createdcount++;
          char command[FILENAME_MAX+FILENAME_MAX];
          command[0]='\0';
          if(MOVE_MODE)
              snprintf(command, FILENAME_MAX+FILENAME_MAX, "mv \"%s\" \"%s\"", sourcePath, targetPath);
          else
              snprintf(command, FILENAME_MAX+FILENAME_MAX, "cp --no-dereference --preserve=all \"%s\" \"%s\"", sourcePath, targetPath);
          if(DEBUG_MODE)
              printf("INFO: %s\n", command);
          if(!DRYRUN)
          {
              //we want to just save the command so we can do the copy outside of this criticial section
              //this way our copy op is parallelized
              opType = OP_COPY;
              strncpy(opCommand, command, FILENAME_MAX+FILENAME_MAX);
          }
          if(LOG)
              logPath(targetPath);
      }
  }

}

void execOperation(operationType_t doOperation, char *doCommand) {

   if(doOperation == OP_COPY)
       system(doCommand);
   if(doOperation == OP_DELETE)
       unlink(doCommand);

}

/*

*  Open a directory and read the conents.  Call stat with each
*  file name.
*
*  Recursively call self for each sub dir.
*
*  print inode meta data for each file, one line per file in CSV format
*/

void

  • fileDir( void *arg )

{

   char *s, *t, *u, *dot, *end_dname;
   char fname[FILENAME_MAX+1];
   int  slot, id, found;
   DIR *dirp;
   long localCnt =0; /* number of files in a specific directory */
   unsigned long long localSz  =0; /* byte cnt of files in the local directory 2010.07 */
   struct dirent *d;
   struct stat f;
   struct fileData *fd, local;
   fd = (struct fileData *) arg;
  1. ifdef THRD_DEBUG
   printf( "Start %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
  1. endif /* THRD_DEBUG */
   if ( (dirp = opendir( fd->dname )) == NULL ) {
       exit ( 1 );
   }
   /* find the end of fs->name and put '/' at the end <end_dname>
      points to char after '/' */
   s = fd->dname + strlen(fd->dname);
   *s++ = '/';
   end_dname = s;
   while ( (d = readdir( dirp )) != NULL ) {
       if ( strcmp(".",d->d_name) == 0 ) continue;
       if ( strcmp("..",d->d_name) == 0 ) continue;
       localCnt++;
       s = d->d_name;
       t = end_dname;
       while ( *s )
           *t++ = *s++;
       *t = '\0';
       if ( lstat ( fd->dname, &f ) == -1 ) {
           fprintf( stderr, "error %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
           continue;
       }
       /* Follow Sub dirs recursivly but don't follow links */
       localSz += f.st_size;
       DU_TOTAL += f.st_size;
       if ( S_ISDIR(f.st_mode) ) {
           if ( SKIP_SNAPSHOTS && !strcmp( ".snapshot", d->d_name ) ) {
              fprintf( stderr, "SnapShot: %s\n", d->d_name );
              continue;
           }
           pthread_mutex_lock (&mutexFD);
           if ( ThreadCNT < MAXTHRDS ) {
               ThreadCNT++;
               id = totalTHRDS++;
               slot =0; found = -1;
               while ( slot < MAXTHRDS ) {
                   if ( fdslot[slot].THRDslot == -1 ) {
                       found = slot;
                       break;
                   }
                   slot++;
               }
               if ( found == -1 )
                  fprintf( stderr, "SlotE %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, "no available threads" );
               else
                  fdslot[slot].THRDslot = slot;
           } else
               slot = -1;
           pthread_mutex_unlock (&mutexFD);
           if ( slot != -1 ) {
               strcpy( fdslot[slot].dname, (const char*)fd->dname );
               fdslot[slot].THRDid = id;
               fdslot[slot].flag = 0;
               pthread_create( &fdslot[slot].thread_id, &fdslot[0].tattr,
                               fileDir, (void*)&fdslot[slot] );
           } else {
               strcpy( local.dname, (const char*)fd->dname );
               local.THRDslot = fd->THRDslot;
               local.THRDid = fd->THRDid;
               local.flag = fd->flag + 1;
               fileDir( (void*) &local );
           }
       } else {
          s = end_dname + 1; dot = '\0';
          while ( *s )
          {
              if (*s == '.')
                  dot = s+1;
              s++;
          }
          pthread_mutex_lock (&mutexPrintStat);
          printStat( fd->dname, dot, &f, (long)0, (long)0 );
            operationType_t doOperation = opType;
            opType = OP_NONE;
            char doCommand[FILENAME_MAX+FILENAME_MAX+1];
            strncpy(doCommand, opCommand, FILENAME_MAX+FILENAME_MAX);
          pthread_mutex_unlock (&mutexPrintStat);
          execOperation(doOperation, doCommand);
       }
   }
   closedir( dirp );
   *--end_dname = '\0';
  1. ifdef THRD_DEBUG
   printf( "Ending %2d%5d %2d<%s>\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
  1. endif /* THRD_DEBUG */
   if ( lstat ( fd->dname, &f ) == -1 ) {
       fprintf( stderr, "ERROR %2d%5d %2d %s\n", fd->THRDslot, fd->THRDid, fd->flag, fd->dname );
   }
   else {
       s = end_dname - 1; dot = '\0';
       while ( *s != '/' && (s != fd->dname))
       {
           if (*s == '.')
              dot = s+1;
           s--;
       }
       if ( s+2 == dot ) /* this dot is next to the slash like /.R */
           dot = '\0';
       pthread_mutex_lock (&mutexPrintStat);
       printStat( fd->dname, dot, &f, localCnt, localSz );
         operationType_t doOperation = opType;
         opType = OP_NONE;
         char doCommand[FILENAME_MAX+FILENAME_MAX+1];
         strncpy(doCommand, opCommand, FILENAME_MAX+FILENAME_MAX);
       pthread_mutex_unlock (&mutexPrintStat);
       execOperation(doOperation, doCommand);
   }
   if ( fd->flag == 0 ) { /* this instance of fileDir is a thread */
       pthread_mutex_lock ( &mutexFD );
       --ThreadCNT;
       fd->THRDslot = -1;
       pthread_mutex_unlock ( &mutexFD );
       pthread_exit( EXIT_SUCCESS );
   }
   /* return ; */

}


int main( int argc, char* argv[] ) {

   int error, i;
   char *s, *c;
   void *status;
   if ( argc < 2 || argv == NULL)
   {
       printHelp( );
       exit( EXIT_FAILURE );
   }
   argc--; argv++;
   while(argc > 1)
   {
       if (!strncmp( *argv, "--maxthreads=", 13 ))
       {
          MAXTHRDS = (size_t)atoi(&((*argv)[strlen("--maxthreads=")]));
          if(MAXTHRDS > 64) MAXTHRDS = 64;
          if(MAXTHRDS <= 2) MAXTHRDS = 2;
          if(DEBUG_MODE) printf("INFO: MAXTHRDS='%lu'\n", MAXTHRDS);
       }
       else if (!strncmp( *argv, "--retain=", 9 ))
       {
          RETAIN = (size_t)atoi(&((*argv)[strlen("--retain=")]))*24*3600;
          if(DEBUG_MODE) printf("INFO: RETAIN='%lu'\n", RETAIN);
       }
       else if ( !strcmp( *argv, "--move" ) )
          MOVE_MODE = 1;
       else if ( !strcmp( *argv, "--debug" ) )
          DEBUG_MODE = 1;
       else if ( !strcmp( *argv, "--du" ) )
       {
          DU_MODE = 1;
          SHOWTOTALS = 0;
          QUIET = 1;
       }
       else if ( !strcmp( *argv, "--nototals" ) )
          SHOWTOTALS = 0;
       else if ( !strcmp( *argv, "--noatime" ) )
          CMP_ATIME = 0;
       else if ( !strcmp( *argv, "--atime" ) )
          CMP_ATIME = 1;
       else if ( !strcmp( *argv, "--nomtime" ) )
          CMP_MTIME = 0;
       else if ( !strcmp( *argv, "--mtime" ) )
          CMP_MTIME = 1;
       else if ( !strcmp( *argv, "--noctime" ) )
          CMP_CTIME = 0;
       else if ( !strcmp( *argv, "--ctime" ) )
          CMP_CTIME = 1;
       else if ( !strcmp( *argv, "--NoSnap" ) || !strcmp( *argv, "--nosnap" ) )
          SKIP_SNAPSHOTS = 1;
       else if ( !strcmp( *argv, "--dryrun" ) )
          DRYRUN = 1;
       else if ( !strcmp( *argv, "--force" ) )
          FORCE = 1;
       else if ( !strcmp( *argv, "--nodirperms" ) )
          NODIRPERMS = 1;
       else if ( !strcmp( *argv, "--purge" ) )
       {
          PURGE = 1;
       }
       else if ( !strncmp( *argv, "--backuplog=", 12 ) )
       {
          LOG = 1;
          memset(logfile, 0, sizeof(logfile));
          strncpy(logfile, &((*argv)[strlen("--backuplog=")]), sizeof(logfile));
          if(strlen(logfile) == 0)
          {
              fprintf( stderr, "ERR: Invalid log file name.");
              LOG=0;
          }
          if(DEBUG_MODE) printf("INFO: LOGFILE='%s'\n", logfile);
       }
       else if ( !strncmp( *argv, "--status=", 9 ) )
       {
          STATUS = 1;
          memset(statusfile, 0, sizeof(statusfile));
          strncpy(statusfile, &((*argv)[strlen("--status=")]), sizeof(statusfile));
          if(strlen(statusfile) == 0)
          {
              fprintf( stderr, "ERR: Invalid status log file name.");
              STATUS=0;
          }
          if(DEBUG_MODE) printf("INFO: STATUS='%s'\n", statusfile);
       }
       else if ( !strncmp( *argv, "--targetdir=", 12 ) )
       {
          TARGETDIR = 1;
          memset(targetdir, 0, sizeof(targetdir));
          strncpy(targetdir, &((*argv)[strlen("--targetdir=")]), sizeof(targetdir));
          if(DEBUG_MODE)
              printf("INFO: TARGETDIR='%s'\n", targetdir);
       }
       else if ( !strncmp( *argv, "--comparedir=", 13 ) )
       {
          COMPAREDIR = 1;
          memset(comparedir, 0, sizeof(comparedir));
          strncpy(comparedir, &((*argv)[strlen("--comparedir=")]), sizeof(comparedir));
          if(DEBUG_MODE) printf("INFO: COMPAREDIR='%s'\n", comparedir);
          if(!dirContainsFiles(comparedir))
          {
              printf("ERR: Cannot run purge operation against a comparison directory with no files '%s'\n", comparedir);
              exit( EXIT_FAILURE );
          }
       }
       else if ( !strcmp( *argv, "--dots" ) )
          DOTS = 1;
       else if ( !strcmp( *argv, "--quiet" ) )
          QUIET = 1;
       argc--; argv++;
   }
   if (!strcmp( *argv, "--help" ))
   {
      printHelp( );
      exit( 0 );
   }
   if (!strcmp( *argv, "--version" ) || !strcmp( *argv, "-v" ))
   {
      printVersion( );
      exit( 0 );
   }
   time(&currenttime);
   fdslot = malloc(sizeof(struct fileData) * MAXTHRDS);
   for ( i=0; i<MAXTHRDS; i++ )
   {
       fdslot[i].THRDslot = -1;
       if ( (error = pthread_attr_init( &fdslot[i].tattr )) )
           fprintf( stderr, "Failed to create pthread attr: %s\n",
                            strerror(error));
       else if ( (error = pthread_attr_setdetachstate( &fdslot[i].tattr,
                            PTHREAD_CREATE_DETACHED)
                 ) )
           fprintf( stderr, "failed to set attribute detached: %s\n",
                            strerror(error));
   }
   pthread_mutex_init(&mutexFD, NULL);
   strcpy( fdslot[0].dname, (const char*) *argv );
   rootdirlen = strlen(fdslot[0].dname);
   //Trim the trailing slash so we don't get // in the path
   if(fdslot[0].dname[strlen(fdslot[0].dname)-1] == '/')
   {
       fdslot[0].dname[strlen(fdslot[0].dname)-1] = '\0';
       rootdirlen--;
   }
   fdslot[0].THRDslot = ThreadCNT++;
   fdslot[0].THRDid = totalTHRDS++;
   fdslot[0].flag = 0;
   pthread_create( &(fdslot[0].thread_id), &fdslot[0].tattr, fileDir,
                   (void*)&fdslot[0] );
   if(SHOWTOTALS || STATUS || DU_MODE)
       atexit(printTotals);
   pthread_exit( NULL );

}