#!/usr/bin/perl # findin.pl -- shell: finds lines that are in the first file AND ALSO in the # : second file. # # Description: This script finds any line which exists in the first file, # : AND IN the second file. It does not consider how many # : times, but only if a line of text appears in both files. # : It could be named findinboth.pl, but just findin.pl is long # : enough. This is useful if you want to make sure that both # : files contain all the same lines, but you don't care how # : many times. For example comparing two different versions # : of a configuration file. # : Results are displyed in the order they occur in the second # : line, so reversing the command line filenames can change # : the order of output, although the same lines are displayed. # : Optional third argument displays just the matching lines # : and nothing else. The third argument doesn't have to be # : anything in particular, just existance says produce clean # : output. if (@ARGV < 2) { print " Finds whats IN file1 AND IN file2\n"; print " Usage : $0 file1 file2\n"; print " findin.pl ignores blank lines. By default, findin.pl displays\n"; print " lines with a prefix including the filename. Any third CLI argument\n"; print " turns off the prefix and displays the lines exactly as they are\n"; print " in the files.\n"; exit(1); } $file1 = $ARGV[0]; $file2 = $ARGV[1]; $cleanout = 0; if (defined($ARGV[2])) { $cleanout = 1; } ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1, $atime,$mtime,$ctime,$blksize,$blocks) = stat($file1); ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2, $atime,$mtime,$ctime,$blksize,$blocks) = stat($file2); if (($size1 + $size2) > 49999999) { print "One or both files are too large to process\n"; print "Combined size of both files must be less than 50MB\n"; exit(1); } # Just in case we need to print out the date or something. ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time); @wkdays = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"); $year += 1900; $mon += 1; $today = sprintf("%04d%02d%02d %02d:%02d:%02d", $year, $mon, $mday, $hour, $min, $sec); open(IN, "<$file1") or die "Can't open $file1 for input"; @file1 = ; close(IN); open(IN, "<$file2") or die "Can't open $file2 for input"; @file2 = ; close(IN); # Store a hash of the line with value 1. foreach $line (@file1) { $isinfile1{"$line"} = 1; } # Go through the second file and if the line IS already defined from the # first file, and, its not already defined as seen in the second file, then # store it away as seen already, and print the ISINBOTH message. # This means, that if we see the exact same line again, we don't just keep # updating it and printing the ISINBOTH message over and over for the same # line. It also turns out that this is also needed for perl to remember the # hash name. It's odd, but doing it any other way loses the information. # # If a line from file2 is defined from file1, AND its NOT alreadprinted # then print it out and store the hash in %alreadyprinted. So %alreadyprinted # is the hash of file2, but only used to know if its already been printed. if ($cleanout) { foreach $line (@file2) { if ((defined($isinfile1{"$line"})) && (!defined($alreadyprinted{"$line"}))) { print "$line"; $alreadyprinted{$line} = 1; } } } else { foreach $line (@file2) { if ((defined($isinfile1{"$line"})) && (!defined($alreadyprinted{"$line"}))) { print "ISINBOTH $line"; $alreadyprinted{$line} = 1; } } } exit(0);