240 lines
6.9 KiB
Perl
240 lines
6.9 KiB
Perl
# ====================================================================
|
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
|
#
|
|
# GT::SQL::Search::LUCENE::Indexer
|
|
# Author: Scott Beck
|
|
# CVS Info : 087,071,086,086,085
|
|
# $Id: Indexer.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
|
|
#
|
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
# ====================================================================
|
|
#
|
|
|
|
package GT::SQL::Search::LUCENE::Indexer;
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Preamble information related to the object
|
|
use strict;
|
|
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
|
|
use Lucene;
|
|
use GT::SQL::Search::Base::Indexer;
|
|
use GT::TempFile;
|
|
@ISA = qw/ GT::SQL::Search::Base::Indexer /;
|
|
$DEBUG = 0;
|
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
|
|
$ERRORS = {
|
|
INDEX_CORRUPT => 'Could not create an Indexer, this probably means your index is corrupted and you should rebuild it. The error was: %s',
|
|
DELETE_FAILED => 'Could not delete some records: %s'
|
|
};
|
|
$ERROR_MESSAGE = 'GT::SQL';
|
|
|
|
sub load {
|
|
my $class = shift;
|
|
return $class->new(@_)
|
|
}
|
|
|
|
sub _get_path {
|
|
my $self = shift;
|
|
my $name = $self->{table}->name;
|
|
my $tmpdir = GT::TempFile::find_tmpdir();
|
|
my $path = $tmpdir . '/' . $name;
|
|
$path = $1 if $path =~ /(.*)/; # XXX untaint
|
|
return $path;
|
|
}
|
|
|
|
sub _get_store {
|
|
my ($self, $create) = @_;
|
|
my $path = $self->_get_path;
|
|
return Lucene::Store::FSDirectory->getDirectory($path, $create);
|
|
}
|
|
|
|
sub _get_indexer {
|
|
my ($self, $create) = @_;
|
|
my %weights = $self->{table}->_weight_cols() or return $self->error(NOWEIGHTS => 'WARN');
|
|
|
|
my ($pk) = $self->{table}->pk;
|
|
if (!$pk) {
|
|
return $self->error('NOPRIMARYKEY','WARN');
|
|
}
|
|
my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
|
|
my $store = $self->_get_store($create);
|
|
|
|
my $iw;
|
|
eval { $iw = new Lucene::Index::IndexWriter($store, $analyzer, $create); };
|
|
if ($@) {
|
|
return $self->error('INDEX_CORRUPT', 'WARN', "$@");
|
|
}
|
|
return $iw;
|
|
}
|
|
|
|
sub drop_search_driver {
|
|
# ------------------------------------------------------------------------------
|
|
my $self = shift;
|
|
my $path = $self->_get_path;
|
|
require File::Tools;
|
|
File::Tools::deldir($path);
|
|
return 1;
|
|
}
|
|
|
|
sub add_search_driver {
|
|
# ------------------------------------------------------------------------------
|
|
my $self = shift;
|
|
$self->_get_indexer(1) or return;
|
|
return 1;
|
|
}
|
|
|
|
sub post_create_table {
|
|
# ------------------------------------------------------------------------------
|
|
# creates the index tables..
|
|
#
|
|
return $_[0]->add_search_driver(@_);
|
|
}
|
|
|
|
sub post_drop_table {
|
|
# -------------------------------------------------------
|
|
# Remove the index tables.
|
|
#
|
|
return $_[0]->drop_search_driver(@_);
|
|
}
|
|
|
|
|
|
sub post_add_record {
|
|
# -------------------------------------------------------
|
|
# indexes a single record
|
|
my ($self, $rec, $insert_sth, $no_optimize) = @_;
|
|
|
|
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
|
|
my %weights = $tbl->_weight_cols() or return;
|
|
|
|
my $indexer = $self->_get_indexer(0) or return $self->{_debug} ? () : 1;
|
|
my $doc = new Lucene::Document;
|
|
my ($pk) = $self->{table}->pk;
|
|
delete $weights{$pk};
|
|
for my $column_name (keys %weights) {
|
|
my $field = Lucene::Document::Field->UnStored($column_name, $rec->{$column_name});
|
|
$field->setBoost($weights{$column_name});
|
|
$doc->add($field);
|
|
}
|
|
$doc->add(Lucene::Document::Field->Keyword($pk, ($tbl->ai && $insert_sth ? $insert_sth->insert_id : $rec->{$pk})));
|
|
$indexer->addDocument($doc);
|
|
$indexer->optimize if !$no_optimize;
|
|
$indexer->close;
|
|
undef $indexer;
|
|
return 1;
|
|
}
|
|
|
|
sub reindex_all {
|
|
# -------------------------------------------------------
|
|
my $self = shift;
|
|
my $table = shift;
|
|
my $opts = shift;
|
|
my $tick = $opts->{tick} || 0;
|
|
my $max = $opts->{max} || 5000;
|
|
|
|
my $indexer = $self->_get_indexer(1) or return $self->{_debug} ? () : 1; # clobbers the old one
|
|
$indexer->close;
|
|
undef $indexer;
|
|
|
|
my %weights = $self->{table}->_weight_cols() or return;
|
|
my @weight_list = keys %weights;
|
|
my ($pk) = $self->{table}->pk();
|
|
|
|
# Go through the table and index each field.
|
|
my $iterations = 1;
|
|
my $count = 0;
|
|
|
|
while (1) {
|
|
if ($max) {
|
|
my $offset = ($iterations-1) * $max;
|
|
$table->select_options("LIMIT $offset,$max");
|
|
}
|
|
my $cond = $opts->{cond} || {};
|
|
my $sth = $table->select($cond, [$pk, @weight_list]);
|
|
my $done = 1;
|
|
|
|
while (my $rec = $sth->fetchrow_hashref() ) {
|
|
$self->post_add_record($rec, undef, 1);
|
|
$done = 0;
|
|
if ($tick) {
|
|
$count++;
|
|
$count % $tick or (print "$count ");
|
|
$count % ($tick*10) or (print "\n");
|
|
}
|
|
}
|
|
last if $done;
|
|
$iterations++;
|
|
last if !$max;
|
|
}
|
|
$indexer = $self->_get_indexer(0) or return;
|
|
$indexer->optimize;
|
|
$indexer->close;
|
|
undef $indexer;
|
|
return 1;
|
|
}
|
|
|
|
sub pre_delete_record {
|
|
# -------------------------------------------------------
|
|
# Delete a records index values.
|
|
#
|
|
my ($self, $where) = @_;
|
|
|
|
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
|
|
my ($pk) = $tbl->pk();
|
|
my $q = $tbl->select($where, [$pk]);
|
|
|
|
my $reader = eval { Lucene::Index::IndexReader->open($self->_get_store(0)); };
|
|
if ($@) {
|
|
return $self->{_debug} ? $self->error('INDEX_CORRUPT', 'WARN', "$@") : 1;
|
|
}
|
|
|
|
my @errors;
|
|
while (my ($item_id) = $q->fetchrow) {
|
|
my $t = new Lucene::Index::Term($pk => $item_id);
|
|
eval { $reader->deleteDocuments($t); };
|
|
if ($@) {
|
|
push @errors, "$@";
|
|
}
|
|
}
|
|
$reader->close;
|
|
undef $reader;
|
|
if (@errors) {
|
|
return $self->{_debug} ? $self->error('DELETE_FAILED', 'WARN', join(", ", @errors)) : 1;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub post_update_record {
|
|
# -------------------------------------------------------
|
|
my ( $self, $set_cond, $where_cond, $tmp ) = @_;
|
|
|
|
# delete the previous record
|
|
eval {
|
|
$self->pre_delete_record($where_cond) or return $self->{_debug} ? () : 1;
|
|
};
|
|
#
|
|
# the new record
|
|
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
|
|
my ($pk) = $tbl->pk();
|
|
my %weights = $self->{table}->_weight_cols();
|
|
my @weight_list = keys %weights;
|
|
my $q = $tbl->select($where_cond, [$pk, @weight_list]);
|
|
while (my $href = $q->fetchrow_hashref) {
|
|
$self->post_add_record($href);
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
sub reindex_record {
|
|
# -------------------------------------------------------
|
|
# reindexes a record. basically deletes all associated records from current db abnd does an index.
|
|
# it's safe to use this
|
|
my ($self, $rec) = @_;
|
|
|
|
$self->delete_record($rec);
|
|
$self->index_record($rec);
|
|
}
|
|
|
|
1;
|