# ==================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::SQL::Search::LUCENE::Indexer # Author: Scott Beck # CVS Info : 087,071,086,086,085 # $Id: Indexer.pm,v 1.2 2006/12/07 22:42:16 aki Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # ==================================================================== # package GT::SQL::Search::LUCENE::Indexer; # ------------------------------------------------------------------------------ # Preamble information related to the object use strict; use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/; use Lucene; use GT::SQL::Search::Base::Indexer; use GT::TempFile; @ISA = qw/ GT::SQL::Search::Base::Indexer /; $DEBUG = 0; $VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/; $ERRORS = { INDEX_CORRUPT => 'Could not create an Indexer, this probably means your index is corrupted and you should rebuild it. The error was: %s', DELETE_FAILED => 'Could not delete some records: %s' }; $ERROR_MESSAGE = 'GT::SQL'; sub load { my $class = shift; return $class->new(@_) } sub _get_path { my $self = shift; my $name = $self->{table}->name; my $tmpdir = GT::TempFile::find_tmpdir(); my $path = $tmpdir . '/' . $name; $path = $1 if $path =~ /(.*)/; # XXX untaint return $path; } sub _get_store { my ($self, $create) = @_; my $path = $self->_get_path; return Lucene::Store::FSDirectory->getDirectory($path, $create); } sub _get_indexer { my ($self, $create) = @_; my %weights = $self->{table}->_weight_cols() or return $self->error(NOWEIGHTS => 'WARN'); my ($pk) = $self->{table}->pk; if (!$pk) { return $self->error('NOPRIMARYKEY','WARN'); } my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer; my $store = $self->_get_store($create); my $iw; eval { $iw = new Lucene::Index::IndexWriter($store, $analyzer, $create); }; if ($@) { return $self->error('INDEX_CORRUPT', 'WARN', "$@"); } return $iw; } sub drop_search_driver { # ------------------------------------------------------------------------------ my $self = shift; my $path = $self->_get_path; require File::Tools; File::Tools::deldir($path); return 1; } sub add_search_driver { # ------------------------------------------------------------------------------ my $self = shift; $self->_get_indexer(1) or return; return 1; } sub post_create_table { # ------------------------------------------------------------------------------ # creates the index tables.. # return $_[0]->add_search_driver(@_); } sub post_drop_table { # ------------------------------------------------------- # Remove the index tables. # return $_[0]->drop_search_driver(@_); } sub post_add_record { # ------------------------------------------------------- # indexes a single record my ($self, $rec, $insert_sth, $no_optimize) = @_; my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' ); my %weights = $tbl->_weight_cols() or return; my $indexer = $self->_get_indexer(0) or return $self->{_debug} ? () : 1; my $doc = new Lucene::Document; my ($pk) = $self->{table}->pk; delete $weights{$pk}; for my $column_name (keys %weights) { my $field = Lucene::Document::Field->UnStored($column_name, $rec->{$column_name}); $field->setBoost($weights{$column_name}); $doc->add($field); } $doc->add(Lucene::Document::Field->Keyword($pk, ($tbl->ai && $insert_sth ? $insert_sth->insert_id : $rec->{$pk}))); $indexer->addDocument($doc); $indexer->optimize if !$no_optimize; $indexer->close; undef $indexer; return 1; } sub reindex_all { # ------------------------------------------------------- my $self = shift; my $table = shift; my $opts = shift; my $tick = $opts->{tick} || 0; my $max = $opts->{max} || 5000; my $indexer = $self->_get_indexer(1) or return $self->{_debug} ? () : 1; # clobbers the old one $indexer->close; undef $indexer; my %weights = $self->{table}->_weight_cols() or return; my @weight_list = keys %weights; my ($pk) = $self->{table}->pk(); # Go through the table and index each field. my $iterations = 1; my $count = 0; while (1) { if ($max) { my $offset = ($iterations-1) * $max; $table->select_options("LIMIT $offset,$max"); } my $cond = $opts->{cond} || {}; my $sth = $table->select($cond, [$pk, @weight_list]); my $done = 1; while (my $rec = $sth->fetchrow_hashref() ) { $self->post_add_record($rec, undef, 1); $done = 0; if ($tick) { $count++; $count % $tick or (print "$count "); $count % ($tick*10) or (print "\n"); } } last if $done; $iterations++; last if !$max; } $indexer = $self->_get_indexer(0) or return; $indexer->optimize; $indexer->close; undef $indexer; return 1; } sub pre_delete_record { # ------------------------------------------------------- # Delete a records index values. # my ($self, $where) = @_; my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' ); my ($pk) = $tbl->pk(); my $q = $tbl->select($where, [$pk]); my $reader = eval { Lucene::Index::IndexReader->open($self->_get_store(0)); }; if ($@) { return $self->{_debug} ? $self->error('INDEX_CORRUPT', 'WARN', "$@") : 1; } my @errors; while (my ($item_id) = $q->fetchrow) { my $t = new Lucene::Index::Term($pk => $item_id); eval { $reader->deleteDocuments($t); }; if ($@) { push @errors, "$@"; } } $reader->close; undef $reader; if (@errors) { return $self->{_debug} ? $self->error('DELETE_FAILED', 'WARN', join(", ", @errors)) : 1; } return 1; } sub post_update_record { # ------------------------------------------------------- my ( $self, $set_cond, $where_cond, $tmp ) = @_; # delete the previous record eval { $self->pre_delete_record($where_cond) or return $self->{_debug} ? () : 1; }; # # the new record my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' ); my ($pk) = $tbl->pk(); my %weights = $self->{table}->_weight_cols(); my @weight_list = keys %weights; my $q = $tbl->select($where_cond, [$pk, @weight_list]); while (my $href = $q->fetchrow_hashref) { $self->post_add_record($href); } return 1; } sub reindex_record { # ------------------------------------------------------- # reindexes a record. basically deletes all associated records from current db abnd does an index. # it's safe to use this my ($self, $rec) = @_; $self->delete_record($rec); $self->index_record($rec); } 1;