#!/usr/bin/perl
# findnotin.pl -- shell: finds lines that are NOT in the first file but
#                      : ARE in the second file.
#
#  Description: This script finds any line which exists in the second file,
#             : but NOT IN the first file. It does not consider how many
#             : times, but only if a line of text appears in the second file,
#             : but NOT in the first file. This is useful for comparing
#             : configuration files without all the clutter of diff.
#             : For example, you don't care how many extra blank lines one
#             : file has over the other, you want to find what the critical
#             : differences are.

if (@ARGV < 2) {
    print "Finds whats NOT in file1 but IS in file2\n";
    print "\nUsage : $0 file1 file2\n\n";
    print "Optional third argument prints bare NOTIN lines only.\n";
    print "The third argument can be anything.\n";
    print "NOTIN lines are printed in the order they occur in the second\n";
    print "file. The number of times a line occurs is not considered.\n";
    print "Blank lines are ignored.\n";
    exit(1);
}

$file1 = $ARGV[0];
$file2 = $ARGV[1];

$cleanout = 0;
if (defined($ARGV[2])) {
    $cleanout = 1;
}


($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1,
$atime,$mtime,$ctime,$blksize,$blocks) = stat($file1);
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2,
$atime,$mtime,$ctime,$blksize,$blocks) = stat($file2);

if (($size1 + $size2) > 99999999) {
    print "One or both files are too large to process\n";
    print "Combined size of both files must be less than 50MB\n";
    exit(1);
}



# Just in case we need to print out the date or something.
($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
@wkdays = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
$year += 1900;
$mon += 1;
$today = sprintf("%04d%02d%02d %02d:%02d:%02d", $year, $mon, $mday, $hour, $min, $sec);

# slurping is slower. Always read in text files one line at a time,
# like this.
open(IN, "<$file1") or die "Can't open $file1 for input";
while ($line =<IN>) {
    push(@file1, $line);
    $isinfile1{"$line"} = 1;
}
close(IN);

open(IN, "<$file2") or die "Can't open $file2 for input";
while ($line =<IN>) {
    push(@file2, $line);
}
close(IN);



# Go through the second file and if the line is NOT already defined from the
# first file, and, its not already defined as seen in the second file, then
# store it away as seen already, and print the NOTIN message.
# This means, that if we see the exact same line again, we don't just keep
# updating it and printing the NOTIN message over and over for the same notin
# line. It also turns out that this is also needed for perl to remember the
# hash name. It's odd, but doing it any other way loses the information.
#

if ($cleanout) {
    foreach $line (@file2) {
        if ((!defined($isinfile1{"$line"})) &&  (!defined($alreadyprinted{"$line"}))) {
            print "$line";
            $alreadyprinted{$line} = 1;
        }
    }
}
else {
    foreach $line (@file2) {
        if ((!defined($isinfile1{"$line"})) &&  (!defined($alreadyprinted{"$line"}))) {
            print "NOTIN $file1 $line";
            $alreadyprinted{$line} = 1;
        }
    }
}

exit(0);
