#!/usr/bin/perl # findnotin.pl -- shell: finds lines that are NOT in the first file but # : ARE in the second file. # # Description: This script finds any line which exists in the second file, # : but NOT IN the first file. It does not consider how many # : times, but only if a line of text appears in the second file, # : but NOT in the first file. This is useful for comparing # : configuration files without all the clutter of diff. # : For example, you don't care how many extra blank lines one # : file has over the other, you want to find what the critical # : differences are. if (@ARGV < 2) { print "Finds whats NOT in file1 but IS in file2\n"; print "\nUsage : $0 file1 file2\n\n"; print "Optional third argument prints bare NOTIN lines only.\n"; print "The third argument can be anything.\n"; print "NOTIN lines are printed in the order they occur in the second\n"; print "file. The number of times a line occurs is not considered.\n"; print "Blank lines are ignored.\n"; exit(1); } $file1 = $ARGV[0]; $file2 = $ARGV[1]; $cleanout = 0; if (defined($ARGV[2])) { $cleanout = 1; } ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1, $atime,$mtime,$ctime,$blksize,$blocks) = stat($file1); ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2, $atime,$mtime,$ctime,$blksize,$blocks) = stat($file2); if (($size1 + $size2) > 99999999) { print "One or both files are too large to process\n"; print "Combined size of both files must be less than 50MB\n"; exit(1); } # Just in case we need to print out the date or something. ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time); @wkdays = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"); $year += 1900; $mon += 1; $today = sprintf("%04d%02d%02d %02d:%02d:%02d", $year, $mon, $mday, $hour, $min, $sec); # slurping is slower. Always read in text files one line at a time, # like this. open(IN, "<$file1") or die "Can't open $file1 for input"; while ($line =) { push(@file1, $line); $isinfile1{"$line"} = 1; } close(IN); open(IN, "<$file2") or die "Can't open $file2 for input"; while ($line =) { push(@file2, $line); } close(IN); # Go through the second file and if the line is NOT already defined from the # first file, and, its not already defined as seen in the second file, then # store it away as seen already, and print the NOTIN message. # This means, that if we see the exact same line again, we don't just keep # updating it and printing the NOTIN message over and over for the same notin # line. It also turns out that this is also needed for perl to remember the # hash name. It's odd, but doing it any other way loses the information. # if ($cleanout) { foreach $line (@file2) { if ((!defined($isinfile1{"$line"})) && (!defined($alreadyprinted{"$line"}))) { print "$line"; $alreadyprinted{$line} = 1; } } } else { foreach $line (@file2) { if ((!defined($isinfile1{"$line"})) && (!defined($alreadyprinted{"$line"}))) { print "NOTIN $file1 $line"; $alreadyprinted{$line} = 1; } } } exit(0);