156 lines
4.8 KiB
Perl
156 lines
4.8 KiB
Perl
|
# ==================================================================
|
||
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||
|
#
|
||
|
# GT::RDF
|
||
|
# Author : Scott Beck
|
||
|
# CVS Info : 087,071,086,086,085
|
||
|
# $Id: RDF.pm,v 1.2 2001/04/11 02:37:12 alex Exp $
|
||
|
#
|
||
|
# Copyright (c) 2000 Gossamer Threads Inc. All Rights Reserved.
|
||
|
# ==================================================================
|
||
|
#
|
||
|
# Description: An RDF parser.
|
||
|
#
|
||
|
|
||
|
package GT::RDF;
|
||
|
|
||
|
use GT::Base;
|
||
|
use strict;
|
||
|
use vars qw/$DEBUG @ISA $TAG $ERRORS/;
|
||
|
|
||
|
@ISA = qw(GT::Base);
|
||
|
$DEBUG = 0;
|
||
|
$TAG = 'Topic|ExternalPage';
|
||
|
$ERRORS = {};
|
||
|
|
||
|
sub init {
|
||
|
my $self = shift;
|
||
|
my $opt = {};
|
||
|
if (@_ == 1) {
|
||
|
$self->io (shift()) or return;
|
||
|
}
|
||
|
else {
|
||
|
if (ref $_[0] eq 'HASH') { $opt = shift }
|
||
|
elsif (defined ($_[0]) and not @_ % 2) { $opt = {@_} }
|
||
|
exists ($opt->{io}) or return $self->error ("BADARGS", "FATAL", 'CLASS->new (%opt) %opt must contain the key io and it must be either a file handle or a path to a file.');
|
||
|
$self->io ($opt->{io});
|
||
|
}
|
||
|
$self->{io} || return $self->error ("BADARGS", "FATAL", 'CLASS->new (\\*FH) -or- CLASS->new (%opts). You must define in input. Either a file or a file handle');
|
||
|
return $self;
|
||
|
}
|
||
|
|
||
|
sub io {
|
||
|
my ($self, $io) = @_;
|
||
|
if (ref $io eq 'GLOB') {
|
||
|
$self->{io} = $io;
|
||
|
}
|
||
|
elsif (-e $io) {
|
||
|
my $fh = \do { local *FH; *FH };
|
||
|
open $fh, $io or return $self->error ("OPENREAD", "FATAL", $!);
|
||
|
$self->{io} = $fh;
|
||
|
}
|
||
|
else {
|
||
|
return $self->error ("BADARGS", "FATAL", '$obj->io (\*FH) -or- $obj->io ("/path/to/file")');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub parse {
|
||
|
my $self = shift;
|
||
|
|
||
|
my $io = $self->{io};
|
||
|
|
||
|
while (1) {
|
||
|
$self->{name} = '';
|
||
|
$self->{attribs} = {};
|
||
|
$self->{tags} = [];
|
||
|
my $parse;
|
||
|
if ($self->{buffer} =~ s,(<($TAG).*?</\2[^>]*?>),$parse = $1; '',oes) {
|
||
|
my @tokens = grep !/^\s*$/, split /(<[^>]+?>)/, $parse;
|
||
|
my $start = shift (@tokens);
|
||
|
|
||
|
# Discard closing tag
|
||
|
pop (@tokens);
|
||
|
|
||
|
# Get the start tag and its attributes
|
||
|
$start =~ /^<($TAG)\s*(.*[^\/])>$/os;
|
||
|
$self->{name} = $1;
|
||
|
my $attr = $2;
|
||
|
if ($attr) {
|
||
|
my @tmp = split (/"/, $attr);
|
||
|
my $ret = {};
|
||
|
my $last = '';
|
||
|
for (0 .. $#tmp) {
|
||
|
if (!$_ % 2) {
|
||
|
$tmp[$_] =~ s/^\s+|=$//g;
|
||
|
$last = $tmp[$_];
|
||
|
$ret->{$last} = '';
|
||
|
}
|
||
|
else {
|
||
|
$ret->{$last} = $tmp[$_];
|
||
|
}
|
||
|
}
|
||
|
$self->{attribs} = $ret;
|
||
|
}
|
||
|
|
||
|
# Parse the remaining tags.
|
||
|
my $last_entry;
|
||
|
for (@tokens) {
|
||
|
if (/^<([^\/\s]+)\s*(.*?[^\/])?>$/s) {
|
||
|
my $tag = $1;
|
||
|
my $attr = $2;
|
||
|
my $ret = {};
|
||
|
if ($attr) {
|
||
|
my @tmp = split (/"/, $attr);
|
||
|
my $last = '';
|
||
|
for (0 .. $#tmp) {
|
||
|
if (!$_ % 2) {
|
||
|
$tmp[$_] =~ s/^\s+|=$//g;
|
||
|
$last = $tmp[$_];
|
||
|
$ret->{$last} = '';
|
||
|
}
|
||
|
else {
|
||
|
$ret->{$last} = $tmp[$_];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
$last_entry = { name => $tag, attribs => $ret };
|
||
|
push (@{$self->{tags}}, $last_entry);
|
||
|
}
|
||
|
elsif (/^<([^\s\/]+)\s*(.*?)\/>$/s) {
|
||
|
my $tag = $1;
|
||
|
my $attr = $2;
|
||
|
my $ret = {};
|
||
|
if ($attr) {
|
||
|
my @tmp = split (/"/, $attr);
|
||
|
my $last = '';
|
||
|
for (0 .. $#tmp) {
|
||
|
if (!$_ % 2) {
|
||
|
$tmp[$_] =~ s/^\s+|=$//g;
|
||
|
$last = $tmp[$_];
|
||
|
$ret->{$last} = '';
|
||
|
}
|
||
|
else {
|
||
|
$ret->{$last} = $tmp[$_];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
my $entry = { name => $tag, attribs => $ret };
|
||
|
push (@{$self->{tags}}, $entry);
|
||
|
}
|
||
|
elsif (/^([^<]+)$/ and $last_entry) {
|
||
|
$last_entry->{data} = $1;
|
||
|
}
|
||
|
}
|
||
|
return $self;
|
||
|
}
|
||
|
# No match
|
||
|
else {
|
||
|
my $tmp;
|
||
|
read ($io, $tmp, 3072) or last;
|
||
|
$self->{buffer} .= $tmp;
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|