discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/parse.pl

#!/usr/bin/perl

################################################################################################
#who:  Kevin Palmer, KP Web Design
#what: perl script which searches a given file for a given string, highlights all matches of
#      that string, and prints the highlighted text to the screen
#last edited: 10/25/01
#why: Slowtwitch Search Engine
################################################################################################

# The CGI.pm module is by far the best CGI module for Perl
use CGI qw(:standard);

print header(-expires=>"$expire");

my $client=new CGI();

$text = $client->url_param('text');
$url = $client->url_param('url'); #file url
$my_url=$client->self_url();

$file = "/home/slowtwitch/slowtwitch.com/www"  . substr($url,25); # relative file path from /cgi-bin (cut 'http://www.slowtwitch.com' from url)
$path = substr($url,0,rindex($url, '/'));  #file path minus filename

unless(-e "$file") { die "File does not exist: $file\n"; }
open(FILE,"<$file") or die "Cannot open $file.\n";

while(<FILE>) {
  my $line="$_";

   #replace HTML codes for non-enlish characters with english "equivalents"
#   $line =~ s/\&auml\;/a/gi;
#   $line =~ s/\&uuml\;/u/gi;

   #if "unreadable EOL" character exists, split page at "EOLs" to yield "lines"
   if ($line =~ /\r/) {
	@lines = split(\r,$line);
	foreach $x (@lines) {

          $x =~ s/$text/<span style=\"border: 1px dashed \#666; padding-left: 5px; padding-right: 5px; background-color: \#DDD\">$&<\/span>/gi;  #highlight search text

          ####edit relative image and file paths for correctness from cgi-bin/

          ##edit image paths so image links work properly from cgi-bin/
          if ($x =~ m/src=\"\w\w\w\w/i) {	#no path, just filename (assumes all filenames are at least 4 characters long)
            if ($& ne "src=\"http") {		#not an absolute link (assumes all absolute links are to outside domains)
              my @parts = split(/\"/, $&);
              $x =~ s/src=\"\w\w\w\w/$parts[0]\"$path\/$parts[1]/gi;
            }
          }
          $x =~ s/src=\"\.\./src=\"$path\/\.\./gi;	# parent path (relative link)
          $x =~ s/src=\"\//src=\"$path\//gi;		# absolute virtual path

          ##edit anchor paths so links work properly from cgi-bin/

           $x =~ s/<a href=\"/$&$path\//gi;  #edit all links, then go back later for mistakes
	   $x =~ s/<a href=\"$path\/http/<a href="http/gi;		# absolute link (href="http://...")
           $x =~ s/<a href=\"$path\/\#/<a href=\"$my_url\#/gi;		# internal anchor (href="#...")
           $x =~ s/<a href=\"$path\/\//<a href=\"$path\//gi;		# absolute virtual path (href="/...")
#	   $x =~ s/<a href=\"$path\/\.\./<a href=\"$path\/\.\./gi;	# parent path (relative link) (href="../...")

	}
	$line = join(\r,@lines);
   }
   else { #no "unreadable EOL" characters present

        $line =~ s/$text/<span style=\"border: 1px dashed \#666; padding-left: 5px; padding-right: 5px; background-color: \#DDD\">$&<\/span>/gi;  #highlight search text

        ####edit relative image and file paths for correctness from cgi-bin/

        ##edit image paths so image links work properly from cgi-bin/
        if ($line =~ m/src=\"\w\w\w\w/i) {	#no path, just filename (assumes all filenames are at least 4 characters long)
          if ($& ne "src=\"http") {	#not an absolute link (assumes all absolute links are to outside domains)
            my @parts = split(/\"/, $&);
            $line =~ s/src=\"\w\w\w\w/$parts[0]\"$path\/$parts[1]/gi;
          }
        }
        $line =~ s/src=\"\.\./src=\"$path\/\.\./gi;	# parent path (relative link)
        $line =~ s/src=\"\//src=\"$path\//gi;		# absolute virtual path

           $line =~ s/<a href=\"/$&$path\//gi;  #edit all links, then go back later for mistakes
	   $line =~ s/<a href=\"$path\/http/<a href="http/gi;		# absolute link (href="http://...")
           $line =~ s/<a href=\"$path\/\#/<a href=\"$my_url\#/gi;	# internal anchor (href="#...")
           $line =~ s/<a href=\"$path\/\//<a href=\"$path\//gi;		# absolute virtual path (href="/...")
#	   $line =~ s/<a href=\"$path\/\.\./<a href=\"$path\/\.\./gi;	# parent path (relative link) (href="../...")

   }

   print $line;

} #endwhile

close FILE;

1;