534 lines
26 KiB
Perl
534 lines
26 KiB
Perl
# ==================================================================
|
|
# Gossamer Links - enhanced directory management system
|
|
#
|
|
# Website : http://gossamer-threads.com/
|
|
# Support : http://gossamer-threads.com/scripts/support/
|
|
# CVS Info : 087,071,086,086,085
|
|
# Revision : $Id: RDFS2.pm,v 1.20 2005/04/07 19:34:41 brewt Exp $
|
|
#
|
|
# Copyright (c) 2001 Gossamer Threads Inc. All Rights Reserved.
|
|
# Redistribution in part or in whole strictly prohibited. Please
|
|
# see LICENSE file for full details.
|
|
# ==================================================================
|
|
|
|
package Links::Import::RDFS2;
|
|
|
|
use 5.004_04;
|
|
use strict;
|
|
use vars qw/$Warning_Code $Critical_Code $Mild_Code $Print_Out/;
|
|
|
|
use DBI;
|
|
use GT::SQL;
|
|
use GT::RDF;
|
|
|
|
sub critical {
|
|
$Critical_Code->(@_);
|
|
}
|
|
|
|
sub warning {
|
|
$Warning_Code->(@_);
|
|
}
|
|
|
|
sub mild_warning {
|
|
ref $Mild_Code eq 'CODE' and $Mild_Code->(@_);
|
|
}
|
|
|
|
sub import_print {
|
|
if (ref $Print_Out eq 'CODE') {
|
|
$Print_Out->(@_);
|
|
}
|
|
else {
|
|
print @_;
|
|
}
|
|
}
|
|
|
|
# Takes 3-4 arguments: hash reference, 2 or 3 code refs
|
|
# The hash reference is the options hash for an import.
|
|
# The first code reference will be called when a warning occurs.
|
|
# The second code reference will be called when a critical error occurs.
|
|
# If provided, the third code reference will be called when a mild warning occurs
|
|
sub import {
|
|
my $opt = shift;
|
|
return if ref $opt ne 'HASH';
|
|
{
|
|
my $warning = shift;
|
|
return if ref $warning ne 'CODE';
|
|
$Warning_Code = $warning;
|
|
|
|
my $critical = shift;
|
|
return if ref $critical ne 'CODE';
|
|
$Critical_Code = $critical;
|
|
|
|
my $mild = shift;
|
|
$Mild_Code = $mild if ref $mild eq 'CODE';
|
|
|
|
my $output = shift;
|
|
$Print_Out = $output if ref $output eq 'CODE';
|
|
}
|
|
|
|
my $DB = new GT::SQL(def_path => $$opt{destination}, subclass => 0);
|
|
my $e_prefix = $DB->prefix;
|
|
my $e_dbh;
|
|
{
|
|
my $table = $DB->table("Links");
|
|
$table->connect();
|
|
$e_dbh = $table->{driver}->connect();
|
|
}
|
|
|
|
my $Links_counter;
|
|
my $Category_counter;
|
|
my $odbc = 0;
|
|
if (($DB->table('Links')->{connect}{driver} || "") eq "ODBC") {
|
|
$odbc = 1;
|
|
# Set max read properties for DBI.
|
|
$e_dbh->{LongReadLen} = 1000000;
|
|
}
|
|
|
|
if ($$opt{clear_tables}) {
|
|
# Delete everything from all tables, EXCEPT for the `admin' user from the Users table.
|
|
# Also ignore --rdf-user if specified.
|
|
if ($$opt{rdf_user}) {
|
|
my $sth = $e_dbh->prepare("SELECT COUNT(*) FROM ${e_prefix}Users WHERE Username = ?") or critical "Unable to prepare query `SELECT COUNT(*) FROM ${e_prefix}Users WHERE Username = ?': ".$e_dbh->errstr;
|
|
$sth->execute($$opt{rdf_user}) or critical "Unable to execute query `SELECT COUNT(*) FROM ${e_prefix}Users WHERE Username = ?': ".$sth->errstr;
|
|
if ($sth->fetchrow_array) {
|
|
$e_dbh->do("DELETE FROM ${e_prefix}Users WHERE Username <> 'admin' AND Username <> ".$e_dbh->quote($$opt{rdf_user}));
|
|
}
|
|
else {
|
|
critical "The rdf username that you specified ($$opt{rdf_user}) does not exist. Please create the user.";
|
|
}
|
|
}
|
|
else {
|
|
$e_dbh->do("DELETE FROM ${e_prefix}Users WHERE Username <> 'admin'") or critical "Unable to delete all existing users: ".$e_dbh->errstr;
|
|
}
|
|
for (qw/Links Category CatLinks CatRelations Category_Score_List
|
|
Category_Word_List ClickTrack Editors EmailMailings EmailTemplates
|
|
Links_Score_List Links_Word_List MailingIndex MailingList
|
|
MailingListIndex Verify/) {
|
|
$e_dbh->do("DELETE FROM $e_prefix$_");
|
|
}
|
|
unless ($$opt{straight_import}) {
|
|
$Links_counter = $Category_counter = 0;
|
|
}
|
|
}
|
|
else {
|
|
my $sth = $e_dbh->prepare("SELECT MAX(ID) FROM ${e_prefix}Links") or critical "Unable to prepare query `SELECT MAX(ID) FROM ${e_prefix}Links': ".$e_dbh->errstr;
|
|
$sth->execute() or critical "Unable to execute query `SELECT MAX(ID) FROM ${e_prefix}Links': ".$sth->errstr;
|
|
$Links_counter = $sth->fetchrow_array;
|
|
$sth->finish();
|
|
|
|
$sth = $e_dbh->prepare("SELECT MAX(ID) FROM ${e_prefix}Category") or critical "Unable to prepare query `SELECT MAX(ID) FROM ${e_prefix}Category': ".$e_dbh->errstr;
|
|
$sth->execute() or critical "Unable to execute query `SELECT MAX(ID) FROM ${e_prefix}Category': ".$sth->errstr;
|
|
$Category_counter = $sth->fetchrow_array;
|
|
$sth->finish();
|
|
}
|
|
|
|
my $gzip = $$opt{with_gzip};
|
|
my $need_gz = -B $$opt{source}; # Tests if the file is binary.
|
|
|
|
my $is_win = $^O =~ /win/i;
|
|
|
|
my $gzip_filename = $is_win ? "gzip.exe" : "gzip";
|
|
|
|
if (not $gzip and $need_gz) {
|
|
# Try to find gzip
|
|
my $dir_sep = $is_win ? ";" : ":";
|
|
my @locations = split /$dir_sep/, $ENV{PATH};
|
|
for (@locations) {
|
|
-x "$_/$gzip_filename" and $gzip = "$_/$gzip_filename", last;
|
|
}
|
|
$gzip or critical "\nUnable to locate gzip (Searched @locations). Please specify with --with-gzip=\"/path/to/gzip\"";
|
|
}
|
|
elsif ($gzip and -d $gzip) {
|
|
if (-x "$gzip/$gzip_filename") {
|
|
$gzip = "$gzip/$gzip_filename";
|
|
}
|
|
else {
|
|
critical "\nThe directory $gzip does not contain a valid gzip program";
|
|
}
|
|
}
|
|
if ($need_gz) {
|
|
-x $gzip or critical "\nUnable to find an executable gzip";
|
|
}
|
|
|
|
my $rdf = \do { local *FH; *FH };
|
|
|
|
if ($$opt{with_gzip} or $need_gz) {
|
|
import_print "\nOpening uncompressed stream from gzip compressed file `$$opt{source}' ...\n";
|
|
open $rdf, " $gzip -c -d $$opt{source} |" or critical "Unable to open `$gzip -c -d $$opt{source} |': $!";
|
|
}
|
|
else {
|
|
import_print "\nOpening stream from non-compressed file `$$opt{source}' ...\n";
|
|
open $rdf, "<$$opt{source}" or critical "Unable to open $$opt{source}: $!";
|
|
}
|
|
|
|
import_print "Stream opened.\n";
|
|
# Do the import
|
|
{
|
|
my $count_cats_sth = $e_dbh->prepare("SELECT COUNT(*) FROM ${e_prefix}Category WHERE Full_Name = ?") or critical "Unable to prepare query `SELECT COUNT(*) FROM ${e_prefix}Category WHERE Full_Name = ?': ".$e_dbh->errstr;
|
|
my $cat_id_sth = $e_dbh->prepare("SELECT ID FROM ${e_prefix}Category WHERE Full_Name = ?") or critical "Unable to prepare query `SELECT ID FROM ${e_prefix}Category WHERE Full_Name = ?': ".$e_dbh->errstr;
|
|
|
|
my $cat_ins_sth = $odbc
|
|
? $e_dbh->prepare("SET IDENTITY_INSERT ${e_prefix}Category ON; INSERT INTO ${e_prefix}Category (ID, Name, Full_Name, FatherID, Number_of_Links, Has_New_Links, Has_Changed_Links) VALUES (?, ?, ?, ?, 0, 'No', 'No')")
|
|
: $e_dbh->prepare("INSERT INTO ${e_prefix}Category (ID, Name, Full_Name, FatherID, Number_of_Links, Has_New_Links, Has_Changed_Links) VALUES (?, ?, ?, ?, 0, 'No', 'No')");
|
|
$cat_ins_sth or critical "Unable to prepare query `INSERT INTO ${e_prefix}Category (ID, Name, Full_Name, FatherID) VALUES (?, ?, ?, ?)': ".$e_dbh->errstr;
|
|
|
|
my $sub_cats_sth = $e_dbh->prepare("UPDATE ${e_prefix}Category SET FatherID = ? WHERE Full_Name LIKE ? AND Full_Name NOT LIKE ?") or critical "Unable to prepare query `UPDATE ${e_prefix}Category SET FatherID = ? WHERE Full_Name LIKE ? AND Full_Name NOT LIKE ?': ".$e_dbh->errstr;
|
|
my $get_num_links_sth = $e_dbh->prepare("SELECT COUNT(*) FROM ${e_prefix}CatLinks WHERE CategoryID = ?") or critical "Unable to prepare query `SELECT COUNT(*) FROM ${e_prefix}CatLinks WHERE CategoryID = ?': ".$e_dbh->errstr;
|
|
my $num_links_sth = $e_dbh->prepare("UPDATE ${e_prefix}Category SET Number_of_Links = ? WHERE ID = ?") or critical "Unable to prepare query `UPDATE ${e_prefix}Category SET Number_of_Links = ? WHERE ID = ?': ".$e_dbh->errstr;
|
|
|
|
my $insert_link_sth = $odbc
|
|
? $e_dbh->prepare("SET IDENTITY_INSERT ${e_prefix}Links ON; INSERT INTO ${e_prefix}Links (ID, Title, URL, Add_Date, Mod_Date, Description, LinkOwner, Status, Votes, Rating, Hits) VALUES (?, ?, ?, ?, ?, ?, ".$e_dbh->quote($$opt{rdf_user} or "admin").",0,0,0,0)")
|
|
: $e_dbh->prepare("INSERT INTO ${e_prefix}Links (ID, Title, URL, Add_Date, Mod_Date, Description, LinkOwner, Status, Votes, Rating, Hits) VALUES (?, ?, ?, ?, ?, ?, ".$e_dbh->quote($$opt{rdf_user} or "admin").",0,0,0,0)");
|
|
$insert_link_sth or critical "Unable to prepare query `INSERT INTO ${e_prefix}Links (Title, URL, Add_Date, Mod_Date, Description, LinkOwner) VALUES (?, ?, ?, ?, ?, ?)': ".$e_dbh->errstr;
|
|
|
|
my $link_exists_sth = $e_dbh->prepare("SELECT COUNT(*) FROM ${e_prefix}Links, ${e_prefix}CatLinks WHERE URL = ? AND CategoryID = ? AND ID = LinkID") or critical "Unable to prepare query `SELECT * FROM Links, CatLinks WHERE URL = ? AND CategoryID = ? AND ID = LinkID': ".$e_dbh->errstr;
|
|
|
|
my $cat_links_sth = $e_dbh->prepare("INSERT INTO ${e_prefix}CatLinks (LinkID, CategoryID) VALUES (?, ?)") or critical "Unable to prepare query `INSERT INTO ${e_prefix}CatLinks (LinkID, CategoryID) VALUES (?, ?)': ".$e_dbh->errstr;
|
|
|
|
my $count_links_sth = $e_dbh->prepare("SELECT COUNT(*) FROM ${e_prefix}CatLinks WHERE CategoryID = ?") or critical "Unable to prepare query `SELECT COUNT(*) FROM ${e_prefix}CatLinks WHERE CategoryID = ?': ".$e_dbh->errstr;
|
|
|
|
import_print "\nImporting from $$opt{source}\n";
|
|
my $links_imported = 0;
|
|
my $cats_imported = 0;
|
|
my %cat_needs_num; # $cat_needs_num{cat_id} = 1 if the category needs to have its Number_Of_Links updated
|
|
|
|
my $base_cat = $$opt{rdf_destination};
|
|
my $base_cat_id;
|
|
if (defined $base_cat) {
|
|
$base_cat =~ s|//+|/|g; # Remove doubled (and more) slashes
|
|
$base_cat =~ s|^/+||; # Remove any leading slashes
|
|
$base_cat =~ s|/+$||; # And any trailing ones
|
|
}
|
|
else {
|
|
$base_cat = "";
|
|
}
|
|
if (length $base_cat) {
|
|
$count_cats_sth->execute($base_cat) or critical "Unable to execute query `SELECT COUNT(*) FROM ${e_prefix}Category WHERE Full_Name = ?': ".$count_cats_sth->errstr;
|
|
if ($count_cats_sth->fetchrow_array) {
|
|
$cat_id_sth->execute($base_cat) or critical "Unable to execute query `SELECT ID FROM ${e_prefix}Category WHERE Full_Name = ?': ".$cat_id_sth->errstr;
|
|
$base_cat_id = $cat_id_sth->fetchrow_array;
|
|
}
|
|
else { # Category doesn't exist
|
|
my @missing_cats = $base_cat;
|
|
mild_warning "$base_cat does not exist and is being created";
|
|
my $fn = $base_cat;
|
|
while ($fn =~ s[/[^/]*\Z][]) {
|
|
$count_cats_sth->execute($fn) or critical "Unable to execute query `SELECT COUNT(*) FROM ${e_prefix}Category WHERE Full_Name = ?': ".$count_cats_sth->errstr;
|
|
if ($count_cats_sth->fetchrow_array) { # It exists
|
|
last;
|
|
}
|
|
else {
|
|
unshift @missing_cats, $fn;
|
|
mild_warning "$fn is needed for base category $base_cat and does not exist. It will be created";
|
|
}
|
|
}
|
|
for (@missing_cats) {
|
|
my ($name) = m[([^/]+)\Z];
|
|
my ($father_full) = m[\A(.*)/];
|
|
my $father_id;
|
|
if ($father_full) {
|
|
$cat_id_sth->execute($father_full) or critical "Unable to execute query `SELECT ID FROM ${e_prefix}Category WHERE Full_Name = ?': ".$cat_id_sth->errstr;
|
|
$father_id = $cat_id_sth->fetchrow_array;
|
|
}
|
|
else { # Must be the root category
|
|
$father_id = 0;
|
|
}
|
|
$cat_ins_sth->execute(++$Category_counter,$name,$_,$father_id) or critical "Unable to create missing category $_: ".$cat_ins_sth->errstr;
|
|
$cats_imported++;
|
|
$sub_cats_sth->execute($Category_counter,"$_/%","$_/%/%") or critical "Unable to execute query `UPDATE ${e_prefix}Category SET FatherID = ? WHERE Full_Name LIKE ? and Full_Name NOT LIKE ?': ".$sub_cats_sth->errstr;
|
|
}
|
|
$base_cat_id = $Category_counter; # The last one inserted will be the category import base
|
|
}
|
|
}
|
|
else {
|
|
$base_cat_id = 0;
|
|
}
|
|
my $cat = $$opt{rdf_category};
|
|
|
|
|
|
# -------------------------------------------------------------------
|
|
# Main code, get a parser object and start parsing!
|
|
#
|
|
# New for 2.2.0 - XML::Parser-based code, which should be significantly faster.
|
|
# It should, however, still be considered experimental.
|
|
#
|
|
if ($$opt{xml_parser}) {
|
|
require XML::Parser;
|
|
my (%links, %want, %current, @in);
|
|
my $last_status = -1;
|
|
my $insert_cat = sub {
|
|
my $cat_name = $current{category};
|
|
|
|
# If we are under ODBC we need to reset the sth handle to avoid a "Invalid Cursor State" error.
|
|
$odbc and $count_cats_sth->finish;
|
|
|
|
# Check to make sure we haven't seen this category before
|
|
# Set $found to the category ID.
|
|
$count_cats_sth->execute($cat_name) or critical "Execute: $DBI::errstr";
|
|
if ($count_cats_sth->fetchrow_array) {
|
|
$cat_id_sth->finish if ($odbc); # Need to reset cursor under odbc.
|
|
$cat_id_sth->execute($cat_name) or critical "Execute: $DBI::errstr";
|
|
$current{cat_id} = $cat_id_sth->fetchrow_array;
|
|
}
|
|
else {
|
|
my ($father_name, $short_name) = $current{category} =~ m|(?:(.*)/)?([^/]+)$|;
|
|
my $father_id;
|
|
if ($father_name) {
|
|
$cat_id_sth->finish if ($odbc); # Need to reset cursor under odbc.
|
|
$cat_id_sth->execute($father_name) or critical "Execute: $DBI::errstr";
|
|
$father_id = $cat_id_sth->fetchrow_array;
|
|
}
|
|
else {
|
|
$father_id = 0;
|
|
}
|
|
|
|
$cat_ins_sth->execute(++$Category_counter, $short_name, $current{category}, $father_id) or critical "Execute: $DBI::errstr";
|
|
$cats_imported++;
|
|
$current{cat_id} = $Category_counter;
|
|
}
|
|
|
|
for my $link (keys %links) {
|
|
# Either append, or insert new link.
|
|
if ($$opt{rdf_update}) {
|
|
$link_exists_sth->execute($link, $current{cat_id}) or critical "Execute: $DBI::errstr";
|
|
next if $link_exists_sth->fetchrow;
|
|
}
|
|
# Title can only be 100 characters (ODBC fatals about data that is too long).
|
|
my $title = substr($links{$link}->{title} || $link, 0, 100);
|
|
$insert_link_sth->execute(++$Links_counter, $title, $link, $$opt{rdf_add_date}, $$opt{rdf_add_date}, $links{$link}->{description} || "") or critical "Execute: $DBI::errstr";
|
|
$cat_links_sth->execute($Links_counter, $current{cat_id}) or critical "Execute: $DBI::errstr";
|
|
$cat_needs_num{$current{cat_id}} = 1;
|
|
$links_imported++;
|
|
}
|
|
%links = ();
|
|
return scalar keys %links;
|
|
};
|
|
|
|
my $parser = XML::Parser->new(
|
|
Handlers => {
|
|
Start => sub {
|
|
my ($parser, $tag, %attr) = @_;
|
|
if ($tag eq 'Topic') {
|
|
{
|
|
my $disp_topic = $attr{'r:id'};
|
|
substr($disp_topic, 30) = '...' if length $disp_topic > 33;
|
|
my $padding = " " x (33 - length $disp_topic);
|
|
$disp_topic = "(L:$links_imported, C:$cats_imported) $disp_topic$padding";
|
|
import_print("\r$disp_topic");
|
|
}
|
|
if ($current{category}) {
|
|
my $cat_count = $insert_cat->();
|
|
import_print "$cat_count ";
|
|
}
|
|
my $dmoz_cat = $attr{'r:id'};
|
|
if ($dmoz_cat =~ /^\Q$cat\E(.*)/) {
|
|
my $topic = $base_cat . '/' . $1;
|
|
$topic =~ s|/{2,}|/|g;
|
|
$topic =~ s|^/||;
|
|
$topic =~ s|/$||;
|
|
$topic =~ y|_| |;
|
|
$current{category} = $topic;
|
|
}
|
|
else {
|
|
delete $current{category};
|
|
import_print "skipping";
|
|
}
|
|
}
|
|
elsif ($tag eq 'link' and $in[-1] eq 'Topic' and $current{category} and $attr{'r:resource'}) {
|
|
$links{$attr{'r:resource'}} = {};
|
|
}
|
|
elsif ($tag eq 'ExternalPage' and $current{category} and $attr{about}) {
|
|
$current{ExternalPage} = $attr{about};
|
|
}
|
|
elsif ($tag eq 'd:Title' and $in[-1] eq 'ExternalPage' and $current{ExternalPage}) {
|
|
$want{title} = \$links{$current{ExternalPage}}->{title};
|
|
}
|
|
elsif ($tag eq 'd:Description' and $in[-1] eq 'ExternalPage' and $current{ExternalPage}) {
|
|
$want{description} = \$links{$current{ExternalPage}}->{description};
|
|
}
|
|
|
|
push @in, $tag;
|
|
},
|
|
End => sub {
|
|
my ($parser, $tag) = @_;
|
|
pop @in;
|
|
if ($tag eq 'd:Description') {
|
|
delete $want{description};
|
|
}
|
|
elsif ($tag eq 'd:Title') {
|
|
delete $want{title};
|
|
}
|
|
},
|
|
Char => sub {
|
|
my ($parser, $text) = @_;
|
|
if ($want{title}) {
|
|
${$want{title}} = $text;
|
|
}
|
|
elsif ($want{description}) {
|
|
${$want{description}} = $text;
|
|
}
|
|
}
|
|
}
|
|
);
|
|
|
|
$parser->parse($rdf, ProtocolEncoding => 'ISO-8859-1');
|
|
}
|
|
else {
|
|
my $parse = GT::RDF->new (io => $rdf);
|
|
my ($found, $was_found, %links);
|
|
my $cat_count = 0;
|
|
my $first = 0;
|
|
|
|
while ($parse->parse) {
|
|
|
|
# Either we have a topic, external page or unknown.
|
|
if ($parse->{name} eq 'Topic') {
|
|
|
|
# Add extra links that did not have external page info.
|
|
if (defined $found) {
|
|
foreach my $link (keys %links) {
|
|
if ($$opt{rdf_update}) {
|
|
$link_exists_sth->execute($parse->{attribs}{about},$found) or critical "Execute: $DBI::errstr";
|
|
unless ($link_exists_sth->fetchrow_array) {
|
|
$insert_link_sth->execute(++$Links_counter, $link, $link, $$opt{rdf_add_date}, $$opt{rdf_add_date}, "") or critical "Execute: $DBI::errstr";
|
|
$cat_links_sth->execute($Links_counter, $found) or critical "Execute: $DBI::errstr";
|
|
$cat_needs_num{$found} = 1;
|
|
$links_imported++;
|
|
}
|
|
}
|
|
else {
|
|
$insert_link_sth->execute(++$Links_counter, $link, $link, $$opt{rdf_add_date}, $$opt{rdf_add_date}, "") or critical "Execute: $DBI::errstr";
|
|
$cat_links_sth->execute($Links_counter, $found) or critical "Execute: $DBI::errstr";
|
|
$cat_needs_num{$found} = 1;
|
|
$links_imported++;
|
|
}
|
|
}
|
|
import_print "$cat_count ";
|
|
}
|
|
else {
|
|
import_print "skipping" if $first++;
|
|
}
|
|
|
|
# Clear out our links hash, and set found to undef.
|
|
$cat_count = 0;
|
|
%links = ();
|
|
$was_found = $found;
|
|
$found = undef;
|
|
|
|
# We've finished a topic, start a new one.
|
|
my $dmoz_cat = $parse->{attribs}{'r:id'};
|
|
if ($dmoz_cat =~ /^\Q$cat\E(.*)/) {
|
|
my $topic = $base_cat . '/' . $1;
|
|
$topic =~ s|//+|/|g;
|
|
$topic =~ s|^/||;
|
|
$topic =~ s|/$||;
|
|
$topic =~ s|_| |g;
|
|
if ($topic) {
|
|
|
|
# If we are under ODBC we need to reset the sth handle to avoid a "Invalid Cursor State" error.
|
|
$odbc and ($count_cats_sth->finish);
|
|
|
|
# Check to make sure we haven't seen this category before
|
|
# Set $found to the category ID.
|
|
$count_cats_sth->execute($topic) or critical "Execute: $DBI::errstr";
|
|
if ($count_cats_sth->fetchrow_array) {
|
|
$cat_id_sth->finish if ($odbc); # Need to reset cursor under odbc.
|
|
$cat_id_sth->execute($topic) or critical "Execute: $DBI::errstr";
|
|
$found = $cat_id_sth->fetchrow_array;
|
|
}
|
|
else {
|
|
my ($short_name) = $topic =~ m|([^/]+)$|;
|
|
my ($father_name) = $topic =~ m|(.*)/|;
|
|
my $father_id;
|
|
if ($father_name) {
|
|
$cat_id_sth->finish if ($odbc); # Need to reset cursor under odbc.
|
|
$cat_id_sth->execute($father_name) or critical "Execute: $DBI::errstr";
|
|
$father_id = $cat_id_sth->fetchrow_array;
|
|
}
|
|
else {
|
|
$father_id = 0;
|
|
}
|
|
|
|
$cat_ins_sth->execute(++$Category_counter, $short_name, $topic, $father_id) or critical "Execute: $DBI::errstr";
|
|
$cats_imported++;
|
|
$found = $Category_counter;
|
|
}
|
|
}
|
|
}
|
|
{
|
|
my $disp_topic = $parse->{attribs}{'r:id'};
|
|
substr($disp_topic, 30) = '...' if length $disp_topic > 33;
|
|
my $padding = " " x (33 - length $disp_topic);
|
|
$disp_topic = "(L:$links_imported, C:$cats_imported) $disp_topic$padding";
|
|
import_print("\r$disp_topic");
|
|
}
|
|
if (defined $found) {
|
|
for (@{$parse->{tags}}) {
|
|
next unless ($_->{attribs}{'r:resource'});
|
|
$links{$_->{attribs}{'r:resource'}} = 1;
|
|
$cat_count++;
|
|
}
|
|
}
|
|
else {
|
|
return 1 if $was_found;
|
|
}
|
|
}
|
|
elsif (defined ($found) and $parse->{name} eq 'ExternalPage') {
|
|
|
|
# Remove from our simple link list.
|
|
delete $links{$parse->{attribs}{about}};
|
|
|
|
# Insert with description or title if it does not exist and we are not overwritting
|
|
my ($title, $desc);
|
|
for (@{$parse->{tags}}) {
|
|
if ($_->{name} eq 'd:Title' and $_->{data}) { $title = $_->{data} }
|
|
elsif ($_->{name} eq 'd:Description' and $_->{data}) { $desc = $_->{data} }
|
|
}
|
|
$title ||= $parse->{attribs}{about};
|
|
$desc ||= '';
|
|
|
|
# Either append, or insert new link.
|
|
if ($$opt{rdf_update}) {
|
|
$link_exists_sth->execute ($parse->{attribs}{about},$found) or critical "Execute: $DBI::errstr";
|
|
unless ($link_exists_sth->fetchrow_array) {
|
|
# Title can only be 100 characters (ODBC fatals about data that is too long).
|
|
$title = substr($title, 0, 100);
|
|
$insert_link_sth->execute(++$Links_counter, $title, $parse->{attribs}{about}, $$opt{rdf_add_date}, $$opt{rdf_add_date}, $desc) or critical "Execute: $DBI::errstr";
|
|
$cat_links_sth->execute($Links_counter, $found) or critical "Execute: $DBI::errstr";
|
|
$cat_needs_num{$found} = 1;
|
|
$links_imported++;
|
|
}
|
|
}
|
|
else {
|
|
# Title can only be 100 characters (ODBC fatals about data that is too long).
|
|
$title = substr($title, 0, 100);
|
|
$insert_link_sth->execute(++$Links_counter, $title, $parse->{attribs}{about}, $$opt{rdf_add_date}, $$opt{rdf_add_date}, $desc) or critical "Execute: $DBI::errstr";
|
|
$cat_links_sth->execute($Links_counter, $found) or critical "Execute: $DBI::errstr";
|
|
$cat_needs_num{$found} = 1;
|
|
$links_imported++;
|
|
}
|
|
}
|
|
elsif (defined $found) {
|
|
require GT::Dumper;
|
|
critical "STRANGE TAG: " . GT::Dumper::Dumper($parse) . "\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
# Now we have to go through the categories to update each one's Number_Of_Links
|
|
for (keys %cat_needs_num) {
|
|
$count_links_sth->execute($_) or critical "Unable to count links for Category ID $_: ".$count_links_sth->errstr;
|
|
my $links = $count_links_sth->fetchrow_array;
|
|
$count_links_sth->finish if ($odbc);
|
|
$num_links_sth->execute($links,$_) or critical "Unable to update number of links for Category ID $_: ".$num_links_sth->errstr;
|
|
}
|
|
}
|
|
import_print "\nNOTE: You must run Rebuild Cat. tree, Repair Tables, and Rebuild Search after performing an import!\n";
|
|
|
|
$e_dbh->disconnect;
|
|
1;
|
|
}
|
|
|
|
|
|
'"I am lying," said the man. Was he?';
|