discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/parse.pl
2024-06-17 21:49:12 +10:00

95 lines
4.1 KiB
Perl
Executable File

#!/usr/bin/perl
################################################################################################
#who: Kevin Palmer, KP Web Design
#what: perl script which searches a given file for a given string, highlights all matches of
# that string, and prints the highlighted text to the screen
#last edited: 10/25/01
#why: Slowtwitch Search Engine
################################################################################################
# The CGI.pm module is by far the best CGI module for Perl
use CGI qw(:standard);
print header(-expires=>"$expire");
my $client=new CGI();
$text = $client->url_param('text');
$url = $client->url_param('url'); #file url
$my_url=$client->self_url();
$file = "/home/slowtwitch/slowtwitch.com/www" . substr($url,25); # relative file path from /cgi-bin (cut 'http://www.slowtwitch.com' from url)
$path = substr($url,0,rindex($url, '/')); #file path minus filename
unless(-e "$file") { die "File does not exist: $file\n"; }
open(FILE,"<$file") or die "Cannot open $file.\n";
while(<FILE>) {
my $line="$_";
#replace HTML codes for non-enlish characters with english "equivalents"
# $line =~ s/\&auml\;/a/gi;
# $line =~ s/\&uuml\;/u/gi;
#if "unreadable EOL" character exists, split page at "EOLs" to yield "lines"
if ($line =~ /\r/) {
@lines = split(\r,$line);
foreach $x (@lines) {
$x =~ s/$text/<span style=\"border: 1px dashed \#666; padding-left: 5px; padding-right: 5px; background-color: \#DDD\">$&<\/span>/gi; #highlight search text
####edit relative image and file paths for correctness from cgi-bin/
##edit image paths so image links work properly from cgi-bin/
if ($x =~ m/src=\"\w\w\w\w/i) { #no path, just filename (assumes all filenames are at least 4 characters long)
if ($& ne "src=\"http") { #not an absolute link (assumes all absolute links are to outside domains)
my @parts = split(/\"/, $&);
$x =~ s/src=\"\w\w\w\w/$parts[0]\"$path\/$parts[1]/gi;
}
}
$x =~ s/src=\"\.\./src=\"$path\/\.\./gi; # parent path (relative link)
$x =~ s/src=\"\//src=\"$path\//gi; # absolute virtual path
##edit anchor paths so links work properly from cgi-bin/
$x =~ s/<a href=\"/$&$path\//gi; #edit all links, then go back later for mistakes
$x =~ s/<a href=\"$path\/http/<a href="http/gi; # absolute link (href="http://...")
$x =~ s/<a href=\"$path\/\#/<a href=\"$my_url\#/gi; # internal anchor (href="#...")
$x =~ s/<a href=\"$path\/\//<a href=\"$path\//gi; # absolute virtual path (href="/...")
# $x =~ s/<a href=\"$path\/\.\./<a href=\"$path\/\.\./gi; # parent path (relative link) (href="../...")
}
$line = join(\r,@lines);
}
else { #no "unreadable EOL" characters present
$line =~ s/$text/<span style=\"border: 1px dashed \#666; padding-left: 5px; padding-right: 5px; background-color: \#DDD\">$&<\/span>/gi; #highlight search text
####edit relative image and file paths for correctness from cgi-bin/
##edit image paths so image links work properly from cgi-bin/
if ($line =~ m/src=\"\w\w\w\w/i) { #no path, just filename (assumes all filenames are at least 4 characters long)
if ($& ne "src=\"http") { #not an absolute link (assumes all absolute links are to outside domains)
my @parts = split(/\"/, $&);
$line =~ s/src=\"\w\w\w\w/$parts[0]\"$path\/$parts[1]/gi;
}
}
$line =~ s/src=\"\.\./src=\"$path\/\.\./gi; # parent path (relative link)
$line =~ s/src=\"\//src=\"$path\//gi; # absolute virtual path
$line =~ s/<a href=\"/$&$path\//gi; #edit all links, then go back later for mistakes
$line =~ s/<a href=\"$path\/http/<a href="http/gi; # absolute link (href="http://...")
$line =~ s/<a href=\"$path\/\#/<a href=\"$my_url\#/gi; # internal anchor (href="#...")
$line =~ s/<a href=\"$path\/\//<a href=\"$path\//gi; # absolute virtual path (href="/...")
# $line =~ s/<a href=\"$path\/\.\./<a href=\"$path\/\.\./gi; # parent path (relative link) (href="../...")
}
print $line;
} #endwhile
close FILE;
1;