# ================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::RDF # Author : Scott Beck # CVS Info : # $Id: RDF.pm,v 1.2 2001/04/11 02:37:12 alex Exp $ # # Copyright (c) 2000 Gossamer Threads Inc. All Rights Reserved. # ================================================================== # # Description: An RDF parser. # package GT::RDF; use GT::Base; use strict; use vars qw/$DEBUG @ISA $TAG $ERRORS/; @ISA = qw(GT::Base); $DEBUG = 0; $TAG = 'Topic|ExternalPage'; $ERRORS = {}; sub init { my $self = shift; my $opt = {}; if (@_ == 1) { $self->io (shift()) or return; } else { if (ref $_[0] eq 'HASH') { $opt = shift } elsif (defined ($_[0]) and not @_ % 2) { $opt = {@_} } exists ($opt->{io}) or return $self->error ("BADARGS", "FATAL", 'CLASS->new (%opt) %opt must contain the key io and it must be either a file handle or a path to a file.'); $self->io ($opt->{io}); } $self->{io} || return $self->error ("BADARGS", "FATAL", 'CLASS->new (\\*FH) -or- CLASS->new (%opts). You must define in input. Either a file or a file handle'); return $self; } sub io { my ($self, $io) = @_; if (ref $io eq 'GLOB') { $self->{io} = $io; } elsif (-e $io) { my $fh = \do { local *FH; *FH }; open $fh, $io or return $self->error ("OPENREAD", "FATAL", $!); $self->{io} = $fh; } else { return $self->error ("BADARGS", "FATAL", '$obj->io (\*FH) -or- $obj->io ("/path/to/file")'); } } sub parse { my $self = shift; my $io = $self->{io}; while (1) { $self->{name} = ''; $self->{attribs} = {}; $self->{tags} = []; my $parse; if ($self->{buffer} =~ s,(<($TAG).*?]*?>),$parse = $1; '',oes) { my @tokens = grep !/^\s*$/, split /(<[^>]+?>)/, $parse; my $start = shift (@tokens); # Discard closing tag pop (@tokens); # Get the start tag and its attributes $start =~ /^<($TAG)\s*(.*[^\/])>$/os; $self->{name} = $1; my $attr = $2; if ($attr) { my @tmp = split (/"/, $attr); my $ret = {}; my $last = ''; for (0 .. $#tmp) { if (!$_ % 2) { $tmp[$_] =~ s/^\s+|=$//g; $last = $tmp[$_]; $ret->{$last} = ''; } else { $ret->{$last} = $tmp[$_]; } } $self->{attribs} = $ret; } # Parse the remaining tags. my $last_entry; for (@tokens) { if (/^<([^\/\s]+)\s*(.*?[^\/])?>$/s) { my $tag = $1; my $attr = $2; my $ret = {}; if ($attr) { my @tmp = split (/"/, $attr); my $last = ''; for (0 .. $#tmp) { if (!$_ % 2) { $tmp[$_] =~ s/^\s+|=$//g; $last = $tmp[$_]; $ret->{$last} = ''; } else { $ret->{$last} = $tmp[$_]; } } } $last_entry = { name => $tag, attribs => $ret }; push (@{$self->{tags}}, $last_entry); } elsif (/^<([^\s\/]+)\s*(.*?)\/>$/s) { my $tag = $1; my $attr = $2; my $ret = {}; if ($attr) { my @tmp = split (/"/, $attr); my $last = ''; for (0 .. $#tmp) { if (!$_ % 2) { $tmp[$_] =~ s/^\s+|=$//g; $last = $tmp[$_]; $ret->{$last} = ''; } else { $ret->{$last} = $tmp[$_]; } } } my $entry = { name => $tag, attribs => $ret }; push (@{$self->{tags}}, $entry); } elsif (/^([^<]+)$/ and $last_entry) { $last_entry->{data} = $1; } } return $self; } # No match else { my $tmp; read ($io, $tmp, 3072) or last; $self->{buffer} .= $tmp; } } return; }