First pass at adding key files

This commit is contained in:
dsainty
2024-06-17 21:49:12 +10:00
commit aa25e9347f
1274 changed files with 392549 additions and 0 deletions

View File

@ -0,0 +1,239 @@
# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::LUCENE::Indexer
# Author: Scott Beck
# CVS Info : 087,071,086,086,085
# $Id: Indexer.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
package GT::SQL::Search::LUCENE::Indexer;
# ------------------------------------------------------------------------------
# Preamble information related to the object
use strict;
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
use Lucene;
use GT::SQL::Search::Base::Indexer;
use GT::TempFile;
@ISA = qw/ GT::SQL::Search::Base::Indexer /;
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
$ERRORS = {
INDEX_CORRUPT => 'Could not create an Indexer, this probably means your index is corrupted and you should rebuild it. The error was: %s',
DELETE_FAILED => 'Could not delete some records: %s'
};
$ERROR_MESSAGE = 'GT::SQL';
sub load {
my $class = shift;
return $class->new(@_)
}
sub _get_path {
my $self = shift;
my $name = $self->{table}->name;
my $tmpdir = GT::TempFile::find_tmpdir();
my $path = $tmpdir . '/' . $name;
$path = $1 if $path =~ /(.*)/; # XXX untaint
return $path;
}
sub _get_store {
my ($self, $create) = @_;
my $path = $self->_get_path;
return Lucene::Store::FSDirectory->getDirectory($path, $create);
}
sub _get_indexer {
my ($self, $create) = @_;
my %weights = $self->{table}->_weight_cols() or return $self->error(NOWEIGHTS => 'WARN');
my ($pk) = $self->{table}->pk;
if (!$pk) {
return $self->error('NOPRIMARYKEY','WARN');
}
my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
my $store = $self->_get_store($create);
my $iw;
eval { $iw = new Lucene::Index::IndexWriter($store, $analyzer, $create); };
if ($@) {
return $self->error('INDEX_CORRUPT', 'WARN', "$@");
}
return $iw;
}
sub drop_search_driver {
# ------------------------------------------------------------------------------
my $self = shift;
my $path = $self->_get_path;
require File::Tools;
File::Tools::deldir($path);
return 1;
}
sub add_search_driver {
# ------------------------------------------------------------------------------
my $self = shift;
$self->_get_indexer(1) or return;
return 1;
}
sub post_create_table {
# ------------------------------------------------------------------------------
# creates the index tables..
#
return $_[0]->add_search_driver(@_);
}
sub post_drop_table {
# -------------------------------------------------------
# Remove the index tables.
#
return $_[0]->drop_search_driver(@_);
}
sub post_add_record {
# -------------------------------------------------------
# indexes a single record
my ($self, $rec, $insert_sth, $no_optimize) = @_;
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
my %weights = $tbl->_weight_cols() or return;
my $indexer = $self->_get_indexer(0) or return $self->{_debug} ? () : 1;
my $doc = new Lucene::Document;
my ($pk) = $self->{table}->pk;
delete $weights{$pk};
for my $column_name (keys %weights) {
my $field = Lucene::Document::Field->UnStored($column_name, $rec->{$column_name});
$field->setBoost($weights{$column_name});
$doc->add($field);
}
$doc->add(Lucene::Document::Field->Keyword($pk, ($tbl->ai && $insert_sth ? $insert_sth->insert_id : $rec->{$pk})));
$indexer->addDocument($doc);
$indexer->optimize if !$no_optimize;
$indexer->close;
undef $indexer;
return 1;
}
sub reindex_all {
# -------------------------------------------------------
my $self = shift;
my $table = shift;
my $opts = shift;
my $tick = $opts->{tick} || 0;
my $max = $opts->{max} || 5000;
my $indexer = $self->_get_indexer(1) or return $self->{_debug} ? () : 1; # clobbers the old one
$indexer->close;
undef $indexer;
my %weights = $self->{table}->_weight_cols() or return;
my @weight_list = keys %weights;
my ($pk) = $self->{table}->pk();
# Go through the table and index each field.
my $iterations = 1;
my $count = 0;
while (1) {
if ($max) {
my $offset = ($iterations-1) * $max;
$table->select_options("LIMIT $offset,$max");
}
my $cond = $opts->{cond} || {};
my $sth = $table->select($cond, [$pk, @weight_list]);
my $done = 1;
while (my $rec = $sth->fetchrow_hashref() ) {
$self->post_add_record($rec, undef, 1);
$done = 0;
if ($tick) {
$count++;
$count % $tick or (print "$count ");
$count % ($tick*10) or (print "\n");
}
}
last if $done;
$iterations++;
last if !$max;
}
$indexer = $self->_get_indexer(0) or return;
$indexer->optimize;
$indexer->close;
undef $indexer;
return 1;
}
sub pre_delete_record {
# -------------------------------------------------------
# Delete a records index values.
#
my ($self, $where) = @_;
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
my ($pk) = $tbl->pk();
my $q = $tbl->select($where, [$pk]);
my $reader = eval { Lucene::Index::IndexReader->open($self->_get_store(0)); };
if ($@) {
return $self->{_debug} ? $self->error('INDEX_CORRUPT', 'WARN', "$@") : 1;
}
my @errors;
while (my ($item_id) = $q->fetchrow) {
my $t = new Lucene::Index::Term($pk => $item_id);
eval { $reader->deleteDocuments($t); };
if ($@) {
push @errors, "$@";
}
}
$reader->close;
undef $reader;
if (@errors) {
return $self->{_debug} ? $self->error('DELETE_FAILED', 'WARN', join(", ", @errors)) : 1;
}
return 1;
}
sub post_update_record {
# -------------------------------------------------------
my ( $self, $set_cond, $where_cond, $tmp ) = @_;
# delete the previous record
eval {
$self->pre_delete_record($where_cond) or return $self->{_debug} ? () : 1;
};
#
# the new record
my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
my ($pk) = $tbl->pk();
my %weights = $self->{table}->_weight_cols();
my @weight_list = keys %weights;
my $q = $tbl->select($where_cond, [$pk, @weight_list]);
while (my $href = $q->fetchrow_hashref) {
$self->post_add_record($href);
}
return 1;
}
sub reindex_record {
# -------------------------------------------------------
# reindexes a record. basically deletes all associated records from current db abnd does an index.
# it's safe to use this
my ($self, $rec) = @_;
$self->delete_record($rec);
$self->index_record($rec);
}
1;

View File

@ -0,0 +1,206 @@
NAME
Lucene -- API to the C++ port of the Lucene search engine
SYNOPSIS
Initialize/Empty Lucene index
my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer();
my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 1);
my $tmp_writer = new Lucene::Index::IndexWriter($store, $analyzer, 1);
$tmp_writer->close;
undef $tmp_writer;
Choose your Analyzer (string tokenizer)
# lowercases text and splits it at non-letter characters
my $analyzer = Lucene::Analysis::SimpleAnalyzer();
# same as before and removes stop words
my $analyzer = Lucene::Analysis::StopAnalyzer();
# splits text at whitespace characters
my $analyzer = Lucene::Analysis::WhitespaceAnalyzer();
# lowercases text, tokenized it based on a grammer that
# leaves named authorities intact (e-mails, company names,
# web hostnames, IP addresses, etc) and removed stop words
my $analyzer = Lucene::Analysis::Standard::StandardAnalyzer();
Choose your Store (storage engine)
# in-memory storage
my $store = new Lucene::Store::RAMDirectory();
# disk-based storage
my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 0);
Open and configure an IndexWriter
my $writer = new Lucene::Index::IndexWriter($store, $analyzer, 0);
# optional settings for power users
$writer->setMergeFactor(100);
$writer->setUseCompoundFile(0);
$writer->setMaxFieldLength(255);
$writer->setMinMergeDocs(10);
$writer->setMaxMergeDocs(100);
Create Documents and add Fields
my $doc = new Lucene::Document;
# field gets analyzed, indexed and stored
$doc->add(Lucene::Document::Field->Text("content", $content));
# field gets indexed and stored
$doc->add(Lucene::Document::Field->Keyword("isbn", $isbn));
# field gets just stored
$doc->add(Lucene::Document::Field->UnIndexed("sales_rank", $sales_rank));
# field gets analyzed and indexed
$doc->add(Lucene::Document::Field->UnStored("categories", $categories));
Add Documents to an IndexWriter
$writer->addDocument($doc);
Optimize your index and close the IndexWriter
$writer->optimize();
$writer->close();
undef $writer;
Delete Documents
my $reader = Lucene::Index::IndexReader->open($store);
my $term = new Lucene::Index::Term("isbn", $isbn);
$reader->deleteDocuments($term);
$reader->close();
undef $reader;
Query index
# initalize searcher and parser
my $analyzer = Lucene::Analysis::SimpleAnalyzer();
my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 0);
my $searcher = new Lucene::Search::IndexSearcher($store);
my $parser = new Lucene::QueryParser("default_field", $analyzer);
# build a query on the default field
my $query = $parser->parse("perl");
# build a query on another field
my $query = $parser->parse("title:cookbook");
# define a sort on one field
my $sortfield = new Lucene::Search::SortField("unixtime");
my $reversed_sortfield = new Lucene::Search::SortField("unixtime", 1);
my $sort = new Lucene::Search::Sort($sortfield);
# define a sort on two fields
my $sort = new Lucene::Search::Sort($sortfield1, $sortfield2);
# use Lucene's INDEXORDER or RELEVANCE sort
my $sort = Lucene::Search::Sort->INDEXORDER;
my $sort = Lucene::Search::Sort->RELEVANCE;
# query index and get results
my $hits = $searcher->search($query);
my $sorted_hits = $searcher->search($query, $sort);
# get number of results
my $num_hits = $hits->length();
# get fields and ranking score for each hit
for (my $i = 0; $i < $num_hits; $i++) {
my $doc = $hits->doc($i);
my $score = $hits->score($i);
my $title = $doc->get("title");
my $isbn = $doc->get("isbn");
}
# free memory and close searcher
undef $hits;
undef $query;
undef $parser;
undef $analyzer;
$searcher->close();
undef $fsdir;
undef $searcher;
}
Close your Store
$store->close;
undef $store;
DESCRIPTION
Like it or not Apache Lucene has become the de-facto standard for
open-source high-performance search. It has a large user-base, is well
documented and has plenty of committers. Unfortunately Apache Lucene is
entirely written in Java and therefore of relatively little use for perl
programmers. Fortunately in the recent years a group of C++ programmers
led by Ben van Klinken decided to port Java Lucene to C++.
The purpose of the module is to export the C++ Lucene API to perl and at
the same time be as close as possible to the original Java API. This has
the combined advantage of providing perl programmers with a
well-documented API and giving them access to a C++ search engine
library that is supposedly faster than the original.
CHARACTER SUPPORT
Currently only ISO 8859-1 (Latin-1) characters are supported. Obviously
this included all ASCII characters.
INDEX COMPATIBLITY
For the moment indices produced by this module are not compatible with
those from Apache Lucene. The reason for this is that this module uses
1-byte character encoding as opposed to 2-byte (widechar) encoding with
Apache Lucene.
INSTALLATION
This module requires the clucene library to be installed. The best way
to get it is to go to the following page
http://sourceforge.net/projects/clucene/
and download the latest STABLE clucene-core version. Currently it is
clucene-core-0.9.15. Make sure you compile it in ASCII mode and install
it in your standard library path.
On a Linux platform this goes as follows:
wget http://kent.dl.sourceforge.net/sourceforge/clucene/clucene-core-0.9.15.tar.gz
cd clucene-core-0.9.15
./autogen.sh
./configure --disable-debug --prefix=/usr --exec-prefix=/usr --enable-ascii
make
make check
(as root) make install
To install the perl module itself, run the following commands:
perl Makefile.PL
make
make test
(as root) make install
AUTHOR
Thomas Busch <tbusch at cpan dot org>
COPYRIGHT AND LICENSE
Copyright (c) 2006 Thomas Busch
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
SEE ALSO
Plucene - a pure-Perl implementation of Lucene
KinoSearch - a search engine library inspired by Lucene
DISCLAIMER OF WARRANTY
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
NECESSARY SERVICING, REPAIR, OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE
TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.

View File

@ -0,0 +1,115 @@
# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::STH
# Author: Aki Mimoto
# CVS Info : 087,071,086,086,085
# CVS Info : 087,071,086,086,085
# $Id: STH.pm,v 1.1 2006/12/07 07:04:51 aki Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
package GT::SQL::Search::LUCENE::STH;
#--------------------------------------------------------------------------------
use strict;
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE /;
require GT::SQL::Search::Base::STH;
@ISA = ('GT::SQL::Search::STH');
$ATTRIBS = {
'db_sort' => 1,
'hits' => undef
};
$ERROR_MESSAGE = 'GT::SQL::Search::STH';
sub init {
#--------------------------------------------------------------------------------
# GT::SQL::Search::STH expects a full set of results in $self->{results}. For
# Lucene the only time a full set of results is there is when we are sorting
# on a field that is not weighted, otherwise the results in $self->{results} is
# the proper page and number of results.
my $self = shift;
$self->set(@_);
--$self->{nh} if $self->{nh};
# Here we allow hits to override our concept of rows. This is only useful
# when !$self->{db_sort}
$self->{rows} = $self->{hits}
? $self->{hits}
: $self->{results}
? scalar(keys %{$self->{results}})
: 0;
if ($self->{db_sort}) {
$self->get_db_sorted_results;
}
else {
$self->get_sorted_results;
}
}
sub get_sorted_results {
# Just sorts the results out of $self->{results} which should have been setup
# by a search driver
my ($self) = @_;
my $results = $self->{results};
$self->{index} = 0;
$self->{max_index} = $self->{mh} - 1;
$self->{'order'} = [ sort {
( $results->{$b} || return 0 ) <=> ( $results->{$a} || 0 )
} keys %{$results} ];
$self->debug_dumper( "Results will be presented in the following order: ", $self->{'order'} ) if ($self->{_debug});
}
sub get_db_sorted_results {
# This assumes $self->{results} has a full result set, i.e. without any LIMIT
# It then selects the result set using the SQL driver to do the sorting. This
# is for Search modules which can not handle their own sorting
my ($self) = @_;
my $sb = GT::SQL::Search::Base::Search->clean_sb($self->{sb}, $self->{so});
$self->{index} = $self->{nh} * $self->{mh} || 0;
$self->{max_index} = $self->{index} + $self->{mh} - 1; # index + mh is the first record which should not be returned.
if ($self->{max_index} > $self->{rows}) {
$self->{max_index} = $self->{rows};
$self->{rows} = $self->{rows} - $self->{index};
$self->{rows} < 0 ? $self->{rows} = 0 : 0;
}
else {
$self->{rows} = $self->{mh};
}
my ( $table, $pk ) = $self->_table_info();
my ( $query, $where, $st, $limit );
$where = qq!$pk in(! . join( ",", keys %{$self->{results}}) . ')';
$self->{mh} and $limit = qq!LIMIT $self->{index}, $self->{rows}!;
$query = qq!
SELECT $pk
FROM $table
WHERE $where
$sb
$limit
!;
$self->debug( "Row fetch query: $query" ) if ($self->{_debug});
my $sth = $self->{table}->{driver}->prepare( $query );
$sth->execute();
$self->{index} = 0;
$self->{max_hits} = $self->{rows};
# Fetch the results in sorted order
my $order = $sth->fetchall_arrayref();
$sth->finish();
$self->{'order'} = [ map { $_->[0] } @{$order} ];
}
1;

View File

@ -0,0 +1,260 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Search::LUCENE::Search
# Author : Scott Beck
# CVS Info : 087,071,086,086,085
# $Id: Search.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Class used to search indexed tables.
#
package GT::SQL::Search::LUCENE::Search;
# ------------------------------------------------------------------------------
use strict;
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS $ERRORS $ERROR_MESSAGE /;
use Lucene;
use GT::TempFile;
use GT::SQL::Search::LUCENE::STH;
use GT::SQL::Search::Base::Search;
@ISA = qw( GT::SQL::Search::Base::Search );
# ------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
$ERRORS = {
SEARCH_ERROR => "Error searching: %s",
QUERY_ERROR => "Query error: %s"
};
$ERROR_MESSAGE = 'GT::SQL';
sub load {
shift;
return GT::SQL::Search::LUCENE::Search->new(@_)
}
sub _get_path {
my $self = shift;
my $name = $self->{table}->name;
my $tmpdir = GT::TempFile::find_tmpdir();
my $path = $tmpdir . '/' . $name;
$path = $1 if $path =~ /(.*)/; # XXX untaint
return $path;
}
sub _get_store {
my ($self, $create) = @_;
my $path = $self->_get_path;
return Lucene::Store::FSDirectory->getDirectory($path, $create);
}
sub query {
# --------------------------------------------------
# Returns a sth based on a query
#
# Options:
# - paging
# mh : max hits
# nh : number hit (or page of hits)
#
# - searching
# ww : whole word
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
# substring : search for substrings of words
# bool : 'and' => and search, 'or' => or search, '' => regular query
# query : the string of things to ask for
#
# - filtering
# field_name : value # Find all rows with field_name = value
# field_name : ">value" # Find all rows with field_name > value.
# field_name : "<value" # Find all rows with field_name < value.
# field_name-gt : value # Find all rows with field_name > value.
# field_name-lt : value # Find all rows with field_name < value.
#
# Parameters:
# ( $CGI ) : a single cgi object
# ( $HASH ) : a hash of the parameters
#
my $self = shift;
# create an easily accessible argument hash
my $args = $self->common_param(@_);
my $tbl = $self->{table};
# see if we can setup the filtering constraints
my $filter = { %$args };
my $query = delete $args->{query} || $self->{query} || '';
my $ftr_cond;
# parse query
$self->debug( "Search Query: $query" ) if ($self->{_debug});
my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
$self->{rejected_keywords} = $rejected;
# setup the additional input parameters
$query_struct = $self->_preset_options( $query_struct, $args );
# now sort into distinct buckets
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
# with the buckets, it's now possible to create a query string
# that can be passed directly into the Lucene search.
my $query_string = '';
foreach my $search_type ( keys %$buckets ) {
my $bucket = $buckets->{$search_type};
foreach my $token ( keys %$bucket ) {
next unless $token;
my $properties = $bucket->{$token} or next;
$token =~ s/(["()])/\\$1/g;
$token =~ s/\b(or|and)\b/ /g;
my $e = ' ';
# handle boolean operations
$properties->{mode} ||= '';
if ( $properties->{mode} eq 'must' ) {
$e .= '+';
}
elsif ( $properties->{mode} eq 'cannot' ) {
$e .= '-';
}
# deal with phrase vs keyword
if ( $properties->{phrase} ) {
$e .= '"' . $token . '"' unless $token =~ /^"|"$/;
}
else {
$e .= $token;
# substring match
if ($properties->{mode} ne 'substring') {
$e .= '*' if $properties->{substring};
}
}
$query_string .= $e;
}
}
# calculate the cursor constraints
foreach my $k (qw( nh mh so sb )) {
next if defined $args->{$k};
$args->{$k} = $self->{$k} || '';
}
$args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
$args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
$args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score';
# Score is the default
$args->{so} = (defined $args->{so} and $args->{so} =~ /^(asc|desc)(?:end)?$/i) ? lc($1) : 'asc';
my %weights = $tbl->_weight_cols();
my @sortfields;
my $do_mysql_sort = 0;
for (ref($args->{sb}) eq 'ARRAY' ? @{$args->{sb}} : $args->{sb}) {
if (!exists $weights{$_}) {
$do_mysql_sort = 1 if $_ ne 'score';
next;
}
push @sortfields, new Lucene::Search::SortField($_, $args->{so} ne 'asc');
}
my $sort = @sortfields ? new Lucene::Search::Sort(@sortfields) : Lucene::Search::Sort->RELEVANCE;
my $store = $self->_get_store(0);
my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
my $searcher = eval { new Lucene::Search::IndexSearcher($store); };
if ($@) {
$self->{_debug} and $self->error('SEARCH_ERROR', 'WARN', "$@");
return $self->sth({}, 0); # no hits
}
# Random default field, it's not used
my $parser = new Lucene::MultiFieldQueryParser((keys %weights)[0], $analyzer);
my $pquery = eval { $parser->parse($query_string, [keys %weights], $analyzer); };
if ($@) {
$self->{_debug} and $self->error('QUERY_ERROR', 'WARN', "$@");
return $self->sth({}, 0); # no hits
}
my $hits = $searcher->search($pquery, $sort);
my $num_hits = $hits->length;
## Setup a limit only if there is no callback. The callback argument requires a full results list
my ($offset, $max_hits) = (0, $num_hits);
unless ($self->{callback} or $do_mysql_sort) {
$offset = ( $args->{nh} - 1 ) * $args->{mh};
$max_hits = $offset + $args->{mh};
}
$max_hits = $num_hits if $max_hits > $num_hits;
my ($pk) = $self->{table}->pk;
my @indexes;
my $results = {};
for (my $i= $offset; $i < $max_hits; ++$i) {
my $doc = $hits->doc($i);
my $value = $doc->get($pk);
my $score = $hits->score($i);
$results->{$value} = $score;
}
# now handle filters
my $cols = $self->{'table'}->cols();
my %filters = map {
(my $tmp = $_) =~ s/-[lg]t$//;
$cols->{$tmp} ? ($_ => $args->{$_}) : ()
} keys %{$args};
if (keys %filters) {
$self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
$results = $self->filter(\%filters, $results);
}
elsif ($self->{filter}) {
$self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
$results = $self->_filter_query( $self->{filter}, $results );
}
else {
$self->debug( "No filters being used.") if ($self->{_debug});
}
# now this query should probably clear the filters once it's been used, so i'll dothat here
$self->{filter} = undef;
# now run through a callback function if needed.
if ($self->{callback}) {
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
$self->{_debug} and $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
return $self->sth({}, 0); # no hits
}
$self->debug_dumper ("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
$results = $self->{callback}->($self, $results);
$self->debug_dumper ("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
}
$self->{rows} = $num_hits;
return $self->sth($results, $do_mysql_sort);
}
sub sth {
#--------------------------------------------------------------------------------
my ($self, $results, $db_sort) = @_;
my $sth = GT::SQL::Search::LUCENE::STH->new(
'results' => $results,
'hits' => $self->{rows},
'db' => $self->{table}->{driver},
'db_sort' => $db_sort,
# pass the following attributes down to the STH handler
map({ ($_ => $self->{$_}) } qw/ table sb so score_col score_sort nh mh rows _debug /)
);
return $sth;
}
1;