First pass at adding key files
This commit is contained in:
@ -0,0 +1,239 @@
|
||||
# ====================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::SQL::Search::LUCENE::Indexer
|
||||
# Author: Scott Beck
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Indexer.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ====================================================================
|
||||
#
|
||||
|
||||
package GT::SQL::Search::LUCENE::Indexer;
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
use strict;
|
||||
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
|
||||
use Lucene;
|
||||
use GT::SQL::Search::Base::Indexer;
|
||||
use GT::TempFile;
|
||||
@ISA = qw/ GT::SQL::Search::Base::Indexer /;
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
|
||||
$ERRORS = {
|
||||
INDEX_CORRUPT => 'Could not create an Indexer, this probably means your index is corrupted and you should rebuild it. The error was: %s',
|
||||
DELETE_FAILED => 'Could not delete some records: %s'
|
||||
};
|
||||
$ERROR_MESSAGE = 'GT::SQL';
|
||||
|
||||
sub load {
|
||||
my $class = shift;
|
||||
return $class->new(@_)
|
||||
}
|
||||
|
||||
sub _get_path {
|
||||
my $self = shift;
|
||||
my $name = $self->{table}->name;
|
||||
my $tmpdir = GT::TempFile::find_tmpdir();
|
||||
my $path = $tmpdir . '/' . $name;
|
||||
$path = $1 if $path =~ /(.*)/; # XXX untaint
|
||||
return $path;
|
||||
}
|
||||
|
||||
sub _get_store {
|
||||
my ($self, $create) = @_;
|
||||
my $path = $self->_get_path;
|
||||
return Lucene::Store::FSDirectory->getDirectory($path, $create);
|
||||
}
|
||||
|
||||
sub _get_indexer {
|
||||
my ($self, $create) = @_;
|
||||
my %weights = $self->{table}->_weight_cols() or return $self->error(NOWEIGHTS => 'WARN');
|
||||
|
||||
my ($pk) = $self->{table}->pk;
|
||||
if (!$pk) {
|
||||
return $self->error('NOPRIMARYKEY','WARN');
|
||||
}
|
||||
my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
|
||||
my $store = $self->_get_store($create);
|
||||
|
||||
my $iw;
|
||||
eval { $iw = new Lucene::Index::IndexWriter($store, $analyzer, $create); };
|
||||
if ($@) {
|
||||
return $self->error('INDEX_CORRUPT', 'WARN', "$@");
|
||||
}
|
||||
return $iw;
|
||||
}
|
||||
|
||||
sub drop_search_driver {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $path = $self->_get_path;
|
||||
require File::Tools;
|
||||
File::Tools::deldir($path);
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub add_search_driver {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
$self->_get_indexer(1) or return;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub post_create_table {
|
||||
# ------------------------------------------------------------------------------
|
||||
# creates the index tables..
|
||||
#
|
||||
return $_[0]->add_search_driver(@_);
|
||||
}
|
||||
|
||||
sub post_drop_table {
|
||||
# -------------------------------------------------------
|
||||
# Remove the index tables.
|
||||
#
|
||||
return $_[0]->drop_search_driver(@_);
|
||||
}
|
||||
|
||||
|
||||
sub post_add_record {
|
||||
# -------------------------------------------------------
|
||||
# indexes a single record
|
||||
my ($self, $rec, $insert_sth, $no_optimize) = @_;
|
||||
|
||||
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
|
||||
my %weights = $tbl->_weight_cols() or return;
|
||||
|
||||
my $indexer = $self->_get_indexer(0) or return $self->{_debug} ? () : 1;
|
||||
my $doc = new Lucene::Document;
|
||||
my ($pk) = $self->{table}->pk;
|
||||
delete $weights{$pk};
|
||||
for my $column_name (keys %weights) {
|
||||
my $field = Lucene::Document::Field->UnStored($column_name, $rec->{$column_name});
|
||||
$field->setBoost($weights{$column_name});
|
||||
$doc->add($field);
|
||||
}
|
||||
$doc->add(Lucene::Document::Field->Keyword($pk, ($tbl->ai && $insert_sth ? $insert_sth->insert_id : $rec->{$pk})));
|
||||
$indexer->addDocument($doc);
|
||||
$indexer->optimize if !$no_optimize;
|
||||
$indexer->close;
|
||||
undef $indexer;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub reindex_all {
|
||||
# -------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $table = shift;
|
||||
my $opts = shift;
|
||||
my $tick = $opts->{tick} || 0;
|
||||
my $max = $opts->{max} || 5000;
|
||||
|
||||
my $indexer = $self->_get_indexer(1) or return $self->{_debug} ? () : 1; # clobbers the old one
|
||||
$indexer->close;
|
||||
undef $indexer;
|
||||
|
||||
my %weights = $self->{table}->_weight_cols() or return;
|
||||
my @weight_list = keys %weights;
|
||||
my ($pk) = $self->{table}->pk();
|
||||
|
||||
# Go through the table and index each field.
|
||||
my $iterations = 1;
|
||||
my $count = 0;
|
||||
|
||||
while (1) {
|
||||
if ($max) {
|
||||
my $offset = ($iterations-1) * $max;
|
||||
$table->select_options("LIMIT $offset,$max");
|
||||
}
|
||||
my $cond = $opts->{cond} || {};
|
||||
my $sth = $table->select($cond, [$pk, @weight_list]);
|
||||
my $done = 1;
|
||||
|
||||
while (my $rec = $sth->fetchrow_hashref() ) {
|
||||
$self->post_add_record($rec, undef, 1);
|
||||
$done = 0;
|
||||
if ($tick) {
|
||||
$count++;
|
||||
$count % $tick or (print "$count ");
|
||||
$count % ($tick*10) or (print "\n");
|
||||
}
|
||||
}
|
||||
last if $done;
|
||||
$iterations++;
|
||||
last if !$max;
|
||||
}
|
||||
$indexer = $self->_get_indexer(0) or return;
|
||||
$indexer->optimize;
|
||||
$indexer->close;
|
||||
undef $indexer;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub pre_delete_record {
|
||||
# -------------------------------------------------------
|
||||
# Delete a records index values.
|
||||
#
|
||||
my ($self, $where) = @_;
|
||||
|
||||
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
|
||||
my ($pk) = $tbl->pk();
|
||||
my $q = $tbl->select($where, [$pk]);
|
||||
|
||||
my $reader = eval { Lucene::Index::IndexReader->open($self->_get_store(0)); };
|
||||
if ($@) {
|
||||
return $self->{_debug} ? $self->error('INDEX_CORRUPT', 'WARN', "$@") : 1;
|
||||
}
|
||||
|
||||
my @errors;
|
||||
while (my ($item_id) = $q->fetchrow) {
|
||||
my $t = new Lucene::Index::Term($pk => $item_id);
|
||||
eval { $reader->deleteDocuments($t); };
|
||||
if ($@) {
|
||||
push @errors, "$@";
|
||||
}
|
||||
}
|
||||
$reader->close;
|
||||
undef $reader;
|
||||
if (@errors) {
|
||||
return $self->{_debug} ? $self->error('DELETE_FAILED', 'WARN', join(", ", @errors)) : 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub post_update_record {
|
||||
# -------------------------------------------------------
|
||||
my ( $self, $set_cond, $where_cond, $tmp ) = @_;
|
||||
|
||||
# delete the previous record
|
||||
eval {
|
||||
$self->pre_delete_record($where_cond) or return $self->{_debug} ? () : 1;
|
||||
};
|
||||
#
|
||||
# the new record
|
||||
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
|
||||
my ($pk) = $tbl->pk();
|
||||
my %weights = $self->{table}->_weight_cols();
|
||||
my @weight_list = keys %weights;
|
||||
my $q = $tbl->select($where_cond, [$pk, @weight_list]);
|
||||
while (my $href = $q->fetchrow_hashref) {
|
||||
$self->post_add_record($href);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
sub reindex_record {
|
||||
# -------------------------------------------------------
|
||||
# reindexes a record. basically deletes all associated records from current db abnd does an index.
|
||||
# it's safe to use this
|
||||
my ($self, $rec) = @_;
|
||||
|
||||
$self->delete_record($rec);
|
||||
$self->index_record($rec);
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user