#!/usr/bin/perl -w ######################################################################## # # Copyright 2004 Tim U. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # 02111-1307 USA # ######################################################################## # # Usage: # split-dir [options] src-dir size[suffix] [dest-dir-prefix] # # src-dir - Source directory to copy files from. # size - Size in [suffix]-bytes (MB by default) # suffix - Size suffix, similar to dd. # # Options: # -h Hard link the files (default). # -c Copy the files. # -m Move the files. # ######################################################################## #-------- Config Variables --------- my $DEFAULT_MULT = 'M'; my $DEFAULT_OP = 'hardlink'; #----- End of Config Variables ----- my %SIZE_MULTS = ( 'T' => 1099511627776, # 2^40 't' => 1000000000000, 'G' => 1073741824, # 2^30 'g' => 1000000000, 'M' => 1048576, # 2^20 'm' => 1000000, 'K' => 1024, # 2^10 'k' => 1000, 'b' => 512, 'w' => 2, 'c' => 1, ); my %OPT_TO_OP = ( 'h' => 'hardlink', 'm' => 'move', 'c' => 'copy', ); my ( $source_dir, # Dir which has files to be copied from. $dest_prefix, # Prefix to be used for the destination # directories. $dir_num, # The current dest dir number (starting from one). $totalsize, # Total size of files in current dest dir. $max_destdir_size, # Maximum bytes to store in destination # directories. $op, # Do a {hardlink,move,copy} on the files. ); $op = $DEFAULT_OP; # main processArgs(); splitDirectory(); exit(0); ######################################################################## # # processArgs - Process command line arguments # # Global variables used: # $OPT_TO_OP # $op # $source_dir # $dest_prefix # $max_destdir_size # $SIZE_MULTS # ######################################################################## sub processArgs { local ( $size_str, $size, $size_suffix ); if($#ARGV < 1 || $#ARGV > 3) # last index { die("Usage:\n" . " $0 [-h|-c|-m] src-dir size[suffix] [dest-dir-prefix]\n" ); } if($ARGV[0] =~ /^-(.*)$/) { if(!exists $OPT_TO_OP{$1}) { die "Invalid option: -$1\n"; } $op = $OPT_TO_OP{$1}; shift @ARGV; } $source_dir = $ARGV[0]; $source_dir =~ s,/*$,,; # get rid of trailing slash(es) $size_str = $ARGV[1]; $dest_prefix = ( defined $ARGV[2] ) ? $ARGV[2] : "${source_dir}-"; # parse size, convert to bytes ($size, $size_suffix) = ($size_str =~ /^(.*)([A-Za-z])$/); if(!exists $SIZE_MULTS{$size_suffix}) { die "Invalid size suffix '$size_suffix'\n"; } $max_destdir_size = $size * $SIZE_MULTS{$size_suffix}; } ######################################################################## # # splitDirectory - Copy files from $source_dir into destination # directories of no more than $max_destdir_size # bytes. # # Global variables used: # $source_dir # $dest_prefix # $dir_num # $totalsize # $max_destdir_size # ######################################################################## sub splitDirectory { local ( $destdir_base ); # start processing files, recursing dirs $dir_num = 1; recurseDir($source_dir); $destdir_base = makeSeqName($dest_prefix, $dir_num); print("*** $destdir_base " . " (last one) Size: $totalsize/$max_destdir_size\n"); } ######################################################################## # # recurseDir - Recursively processes all files under given directory # and copy them to the destination directories. # # Parameters: # $_[0] Given directory. # # Global variables used: # $dest_prefix # $dir_num # $max_destdir_size # $totalsize # $dir_num # $source_dir # $op # ######################################################################## sub recurseDir { local ( $dir, $filesize, $filecount, $graft_path_dir, $destdir, $destdir_base ); $dir = $_[0]; print("*** Processing $dir/\n"); if ( !opendir(local $dirhandle, "$dir") ) { print("$0: $dir: $!\n"); return; } $filecount = 0; foreach $file (sort readdir($dirhandle)) { if ( "$file" eq "." || "$file" eq ".." ) { next; } elsif ( -d "$dir/$file" ) { recurseDir("$dir/$file"); } else { $filesize = (-s "$dir/$file"); $totalsize += $filesize; $destdir_base = makeSeqName($dest_prefix, $dir_num); if($totalsize > $max_destdir_size) { print("*** Filled $destdir_base. Size: " . ($totalsize - $filesize) . "/$max_destdir_size\n"); $totalsize = $filesize; $dir_num++; $destdir_base = makeSeqName($dest_prefix, $dir_num); } # cut off $source_dir/ $graft_path_dir = "$dir"; $graft_path_dir =~ s,^$source_dir/*,,; $src_file = "$dir/$file"; $destdir = "$destdir_base/$graft_path_dir"; $destdir =~ s,/*$,,; # get rid of trailing slash(es) $dest_file = "$destdir/$file"; #print("+ $dest_file\n"); mkdirPath("$destdir"); if($op eq 'hardlink') { link($src_file, $dest_file) || die("link($src_file, $dest_file): $!\n"); } elsif($op eq 'move') { rename($src_file, $dest_file) || die("rename($src_file, $dest_file): $!\n"); } elsif($op eq 'copy') { archiveFile($src_file, $dest_file); } else { die("Invalid op '$op'\n"); } } $filecount++; } # handle empty directories if($filecount == 0) { # cut off $source_dir/ $graft_path_dir = "$dir"; $graft_path_dir =~ s,^$source_dir/*,,; $destdir_base = makeSeqName($dest_prefix, $dir_num); $destdir = "$destdir_base/$graft_path_dir"; $destdir =~ s,/*$,,; # get rid of trailing slash(es) mkdirPath("$destdir"); copyStats("$dir", "$destdir"); } closedir($dirhandle); } ######################################################################## # # archiveFile - Like the command "cp -a src_file dest_file" # # Parameters: # $_[0] Source File # $_[1] Destination File # ######################################################################## sub archiveFile { local ( $src_file, $dest_file, $perms, $atime, $mtime ); $src_file = $_[0]; $dest_file = $_[1]; copyFile($src_file, $dest_file); copyStats($src_file, $dest_file); } ######################################################################## # # copyFile - Copies a file from one location to another. # # Parameters: # $_[0] Source File # $_[1] Destination File # ######################################################################## sub copyFile { local ( $src_file, $dest_file, $buffer, $charsRead ); $src_file = $_[0]; $dest_file = $_[1]; open(SRCFILE, '<', $src_file) || die "$src_file: $!\n"; open(DESTFILE, '>', $dest_file) || die "$dest_file: $!\n"; while(($charsRead = read(SRCFILE, $buffer, 65536))) { print DESTFILE $buffer; } if(!defined $charsRead) { die("read() failed: $!\n"); } close(SRCFILE); close(DESTFILE); } ######################################################################## # # copyStats - Copies a file's stats (uid/gid/perms/times) to another # file. # # Parameters: # $_[0] Source File # $_[1] Destination File # ######################################################################## sub copyStats { local ( $src_file, $dest_file, $perms, $atime, $mtime ); $src_file = $_[0]; $dest_file = $_[1]; @fstats = stat($src_file); $perms = $fstats[2] & 07777; $uid = $fstats[4]; $gid = $fstats[5]; $atime = $fstats[8]; $mtime = $fstats[9]; chmod($perms, $dest_file); chown($uid, $gid, $dest_file); utime($atime, $mtime, $dest_file); } ######################################################################## # # mkdirPath - Like the command "mkdir -p /path/to/create" # # Parameters: # $_[0] Path to create. # ######################################################################## sub mkdirPath { local ( $top_dir, $subpath, $path ); ( $top_dir, $subpath ) = ( $_[0] =~ m,^(/*[^/]+)/*(.*)$, ); if(length($top_dir) && !-d $top_dir) { mkdir($top_dir) || die "mkdir($top_dir): $!\n"; } $path = $top_dir; foreach $dir ( split(/\//, $subpath) ) { $path .= "/$dir"; if(!-d $path) { mkdir($path) || die "mkdir($path): $!\n"; } } } ######################################################################## # # makeSeqName - Return the current name in the sequence. # # Parameters: # $_[0] Prefix # $_[1] Sequence Number # ######################################################################## sub makeSeqName { local ( $prefix = $_[0], $num = $_[1], ); return sprintf('%s%02d', $prefix, $num); }