#!/usr/bin/perl # Author: Kevin P. Inscoe . # File: split.pl # Date of creation: May 18, 2006. # Warranty: None expressed or implied. # License: The Open Software License. V1.1 # http://www.opensource.org/licenses/osl.php # # OSI Certified Open Source Software. http://www.opensource.org/licenses/ # # Prerequisites: Perl 5.004 (minimum). # The purpose of this program is to take large input files and split them # into smaller files by size in sequential order ending in .nnn zero padded # # Usage: split.pl file <-b sizem/g> <-v> # # -b split at number of bytes (or megabtes/m or gigabytes/g) # Default is 100mb files. # -v Turns on logging # # Requirements require 5.004; use strict; use Getopt::Std; # Globals use vars qw($opt_b $opt_v); # default to 100 mb files my $sizebreak=100000000; my $size=0; my $tmp=0; my $filename; my $currentsize=0; my $bytecount=0; my $recs=0; my $files=1; my $ctr=1; my $newfile; my $pad; # Get our parameters print "Raw parameters: @ARGV\n" if ($opt_v); my $status = getopts('vb:'); ($status == 0 or @ARGV < 1) and die ("Usage: $0 [-v] [-b size (m/g)] file\n"); getopts('vb:'); # -v is a boolean flags, -b takes an argument # Sets opt_* as a side effect. print "Split size is $opt_b\n" if ($opt_v); # does the file exists? $filename = $ARGV[$#ARGV]; print "Input file is $filename\n" if ($opt_v); # if opt_b was set else use the default set above if ($opt_b ne '') { $tmp=$opt_b; # Convert size nomenclature to numbers if ($opt_b =~ m/m$/) { # it's megabytes $tmp =~ s/m//g; $sizebreak = $tmp * 1000000; } if ($opt_b =~ m/g$/) { # it's gigabytes $tmp =~ s/g//g; $sizebreak = $tmp * 1000000000; } } print "Size break is $sizebreak;\n" if ($opt_v); if (-e $filename) { $newfile=$filename . ".001"; open(IN, "<$filename"); open(OUT, ">$newfile"); while() { $recs++; # Get the number of bytes in current buffer $size = length($_); if ( $currentsize > $sizebreak ) { $currentsize=$size; $ctr++; close (OUT); $pad = &pad_zeros($ctr); $newfile=$filename . "." . $pad; open(OUT, ">$newfile"); $files++; print OUT; } else { $currentsize = $currentsize + $size; $bytecount = $bytecount + $size; print OUT; } } # Cleanup and summary report close(IN); close(OUT); print "\n$recs lines were processed into $files file(s), $bytecount total bytes.\n"; exit; } else { print "$0 file not found - $filename, aborting...\n"; exit -1; } exit 0; sub pad_zeros () { my ($unpadded, $size); ($unpadded) = @_; $size = length($unpadded); if ( $size > 3 ) { print "ERROR: too many files!\n"; return $unpadded; } if ( $size == 3 ) { return $unpadded; } if ( $size == 2 ) { return "0" . $unpadded; } if ( $size == 1 ) { return "00" . $unpadded; } if ( $size == 0 ) { print "ERROR: empty counter at pad_zeros()!\n"; exit -1; } }