#!/usr/bin/perl
# findin.pl -- shell: finds lines that are in the first file AND ALSO in the
#                      : second file.
#
#  Description: This script finds any line which exists in the first file,
#             : AND IN the second file. It does not consider how many
#             : times, but only if a line of text appears in both files.
#             : It could be named findinboth.pl, but just findin.pl is long 
#             : enough. This is useful if you want to make sure that both
#             : files contain all the same lines, but you don't care how 
#             : many times. For example comparing two different versions
#             : of a configuration file.
#             : Results are displyed in the order they occur in the second
#             : line, so reversing the command line filenames can change
#             : the order of output, although the same lines are displayed.
#             : Optional third argument displays just the matching lines
#             : and nothing else. The third argument doesn't have to be
#             : anything in particular, just existance says produce clean
#             : output.


if (@ARGV < 2) {
    print "  Finds whats IN file1 AND IN file2\n";
    print "  Usage : $0 file1 file2\n";
    print "  findin.pl ignores blank lines. By default, findin.pl displays\n";
    print "  lines with a prefix including the filename. Any third CLI argument\n";
    print "  turns off the prefix and displays the lines exactly as they are\n";
    print "  in the files.\n";
    exit(1);
}

$file1 = $ARGV[0];
$file2 = $ARGV[1];

$cleanout = 0;
if (defined($ARGV[2])) {
    $cleanout = 1;
}

($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1,
$atime,$mtime,$ctime,$blksize,$blocks) = stat($file1);
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2,
$atime,$mtime,$ctime,$blksize,$blocks) = stat($file2);

if (($size1 + $size2) > 49999999) {
    print "One or both files are too large to process\n";
    print "Combined size of both files must be less than 50MB\n";
    exit(1);
}



# Just in case we need to print out the date or something.
($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
@wkdays = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
$year += 1900;
$mon += 1;
$today = sprintf("%04d%02d%02d %02d:%02d:%02d", $year, $mon, $mday, $hour, $min, $sec);


open(IN, "<$file1") or die "Can't open $file1 for input";
@file1 = <IN>;
close(IN);

open(IN, "<$file2") or die "Can't open $file2 for input";
@file2 = <IN>;
close(IN);

# Store a hash of the line with value 1.
foreach $line (@file1) {
    $isinfile1{"$line"} = 1;
}



# Go through the second file and if the line IS already defined from the
# first file, and, its not already defined as seen in the second file, then
# store it away as seen already, and print the ISINBOTH message.
# This means, that if we see the exact same line again, we don't just keep
# updating it and printing the ISINBOTH message over and over for the same
# line. It also turns out that this is also needed for perl to remember the
# hash name. It's odd, but doing it any other way loses the information.
#
# If a line from file2 is defined from file1, AND its NOT alreadprinted
# then print it out and store the hash in %alreadyprinted. So %alreadyprinted
# is the hash of file2, but only used to know if its already been printed.


if ($cleanout) {
    foreach $line (@file2) {
        if ((defined($isinfile1{"$line"})) &&  (!defined($alreadyprinted{"$line"}))) {
            print "$line";
            $alreadyprinted{$line} = 1;
        }
    }
}
else {
    foreach $line (@file2) {
        if ((defined($isinfile1{"$line"})) &&  (!defined($alreadyprinted{"$line"}))) {
            print "ISINBOTH $line";
            $alreadyprinted{$line} = 1;
        }
    }
}
exit(0);
