discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/RDF.pm

156 lines
4.8 KiB
Perl
Raw Normal View History

2024-06-17 11:49:12 +00:00
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::RDF
# Author : Scott Beck
# CVS Info : 087,071,086,086,085
# $Id: RDF.pm,v 1.2 2001/04/11 02:37:12 alex Exp $
#
# Copyright (c) 2000 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description: An RDF parser.
#
package GT::RDF;
use GT::Base;
use strict;
use vars qw/$DEBUG @ISA $TAG $ERRORS/;
@ISA = qw(GT::Base);
$DEBUG = 0;
$TAG = 'Topic|ExternalPage';
$ERRORS = {};
sub init {
my $self = shift;
my $opt = {};
if (@_ == 1) {
$self->io (shift()) or return;
}
else {
if (ref $_[0] eq 'HASH') { $opt = shift }
elsif (defined ($_[0]) and not @_ % 2) { $opt = {@_} }
exists ($opt->{io}) or return $self->error ("BADARGS", "FATAL", 'CLASS->new (%opt) %opt must contain the key io and it must be either a file handle or a path to a file.');
$self->io ($opt->{io});
}
$self->{io} || return $self->error ("BADARGS", "FATAL", 'CLASS->new (\\*FH) -or- CLASS->new (%opts). You must define in input. Either a file or a file handle');
return $self;
}
sub io {
my ($self, $io) = @_;
if (ref $io eq 'GLOB') {
$self->{io} = $io;
}
elsif (-e $io) {
my $fh = \do { local *FH; *FH };
open $fh, $io or return $self->error ("OPENREAD", "FATAL", $!);
$self->{io} = $fh;
}
else {
return $self->error ("BADARGS", "FATAL", '$obj->io (\*FH) -or- $obj->io ("/path/to/file")');
}
}
sub parse {
my $self = shift;
my $io = $self->{io};
while (1) {
$self->{name} = '';
$self->{attribs} = {};
$self->{tags} = [];
my $parse;
if ($self->{buffer} =~ s,(<($TAG).*?</\2[^>]*?>),$parse = $1; '',oes) {
my @tokens = grep !/^\s*$/, split /(<[^>]+?>)/, $parse;
my $start = shift (@tokens);
# Discard closing tag
pop (@tokens);
# Get the start tag and its attributes
$start =~ /^<($TAG)\s*(.*[^\/])>$/os;
$self->{name} = $1;
my $attr = $2;
if ($attr) {
my @tmp = split (/"/, $attr);
my $ret = {};
my $last = '';
for (0 .. $#tmp) {
if (!$_ % 2) {
$tmp[$_] =~ s/^\s+|=$//g;
$last = $tmp[$_];
$ret->{$last} = '';
}
else {
$ret->{$last} = $tmp[$_];
}
}
$self->{attribs} = $ret;
}
# Parse the remaining tags.
my $last_entry;
for (@tokens) {
if (/^<([^\/\s]+)\s*(.*?[^\/])?>$/s) {
my $tag = $1;
my $attr = $2;
my $ret = {};
if ($attr) {
my @tmp = split (/"/, $attr);
my $last = '';
for (0 .. $#tmp) {
if (!$_ % 2) {
$tmp[$_] =~ s/^\s+|=$//g;
$last = $tmp[$_];
$ret->{$last} = '';
}
else {
$ret->{$last} = $tmp[$_];
}
}
}
$last_entry = { name => $tag, attribs => $ret };
push (@{$self->{tags}}, $last_entry);
}
elsif (/^<([^\s\/]+)\s*(.*?)\/>$/s) {
my $tag = $1;
my $attr = $2;
my $ret = {};
if ($attr) {
my @tmp = split (/"/, $attr);
my $last = '';
for (0 .. $#tmp) {
if (!$_ % 2) {
$tmp[$_] =~ s/^\s+|=$//g;
$last = $tmp[$_];
$ret->{$last} = '';
}
else {
$ret->{$last} = $tmp[$_];
}
}
}
my $entry = { name => $tag, attribs => $ret };
push (@{$self->{tags}}, $entry);
}
elsif (/^([^<]+)$/ and $last_entry) {
$last_entry->{data} = $1;
}
}
return $self;
}
# No match
else {
my $tmp;
read ($io, $tmp, 3072) or last;
$self->{buffer} .= $tmp;
}
}
return;
}