First pass at adding key files

2024-06-17 21:49:12 +10:00
commit aa25e9347f
1274 changed files with 392549 additions and 0 deletions
--- a/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/Indexer.pm
+++ b/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/Indexer.pm
@ -0,0 +1,239 @@
+# ====================================================================
+# Gossamer Threads Module Library - http://gossamer-threads.com/
+#
+#   GT::SQL::Search::LUCENE::Indexer
+#   Author: Scott Beck
+#   CVS Info : 087,071,086,086,085      
+#   $Id: Indexer.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
+#
+# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
+# ====================================================================
+#
+
+package GT::SQL::Search::LUCENE::Indexer;
+
+# ------------------------------------------------------------------------------
+# Preamble information related to the object
+use strict;
+use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
+use Lucene;
+use GT::SQL::Search::Base::Indexer;
+use GT::TempFile;
+@ISA = qw/ GT::SQL::Search::Base::Indexer /;
+$DEBUG   = 0;
+$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
+$ERRORS = {
+    INDEX_CORRUPT => 'Could not create an Indexer, this probably means your index is corrupted and you should rebuild it. The error was: %s',
+    DELETE_FAILED => 'Could not delete some records: %s'
+};
+$ERROR_MESSAGE = 'GT::SQL';
+
+sub load {
+    my $class = shift;
+    return $class->new(@_)
+}
+
+sub _get_path {
+    my $self = shift;
+    my $name    = $self->{table}->name;
+    my $tmpdir  = GT::TempFile::find_tmpdir();
+    my $path = $tmpdir . '/' . $name;
+    $path = $1 if $path =~ /(.*)/; # XXX untaint
+    return $path;
+}
+
+sub _get_store {
+    my ($self, $create) = @_;
+    my $path = $self->_get_path;
+    return Lucene::Store::FSDirectory->getDirectory($path, $create);
+}
+
+sub _get_indexer {
+    my ($self, $create) = @_;
+    my %weights = $self->{table}->_weight_cols() or return $self->error(NOWEIGHTS => 'WARN');
+
+    my ($pk) = $self->{table}->pk;
+    if (!$pk) {
+        return $self->error('NOPRIMARYKEY','WARN');
+    }
+    my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
+    my $store = $self->_get_store($create);
+
+    my $iw;
+    eval { $iw = new Lucene::Index::IndexWriter($store, $analyzer, $create); };
+    if ($@) {
+        return $self->error('INDEX_CORRUPT', 'WARN', "$@");
+    }
+    return $iw;
+}
+
+sub drop_search_driver {
+# ------------------------------------------------------------------------------
+    my $self    = shift;
+    my $path = $self->_get_path;
+    require File::Tools;
+    File::Tools::deldir($path);
+    return 1;
+}
+
+sub add_search_driver {
+# ------------------------------------------------------------------------------
+    my $self    = shift;
+    $self->_get_indexer(1) or return;
+    return 1;
+}
+
+sub post_create_table {
+# ------------------------------------------------------------------------------
+# creates the index tables..
+#
+    return $_[0]->add_search_driver(@_);
+}
+
+sub post_drop_table {
+# -------------------------------------------------------
+# Remove the index tables.
+#
+    return $_[0]->drop_search_driver(@_);
+}
+
+
+sub post_add_record {
+# -------------------------------------------------------
+# indexes a single record
+    my ($self, $rec, $insert_sth, $no_optimize) = @_;
+
+    my $tbl     = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
+    my %weights = $tbl->_weight_cols() or return;
+
+    my $indexer = $self->_get_indexer(0) or return $self->{_debug} ? () : 1;
+    my $doc = new Lucene::Document;
+    my ($pk) = $self->{table}->pk;
+    delete $weights{$pk};
+    for my $column_name (keys %weights) {
+        my $field = Lucene::Document::Field->UnStored($column_name, $rec->{$column_name});
+        $field->setBoost($weights{$column_name});
+        $doc->add($field);
+    }
+    $doc->add(Lucene::Document::Field->Keyword($pk, ($tbl->ai && $insert_sth ? $insert_sth->insert_id : $rec->{$pk})));
+    $indexer->addDocument($doc);
+    $indexer->optimize if !$no_optimize;
+    $indexer->close;
+    undef $indexer;
+    return 1;
+}
+
+sub reindex_all {
+# -------------------------------------------------------
+    my $self    = shift;
+    my $table   = shift;
+    my $opts    = shift;
+    my $tick    = $opts->{tick} || 0;
+    my $max     = $opts->{max}  || 5000;
+
+    my $indexer = $self->_get_indexer(1) or return $self->{_debug} ? () : 1; # clobbers the old one
+    $indexer->close;
+    undef $indexer;
+
+    my %weights     = $self->{table}->_weight_cols() or return;
+    my @weight_list = keys %weights;
+    my ($pk)    = $self->{table}->pk();
+
+# Go through the table and index each field.
+    my $iterations = 1;
+    my $count = 0;
+
+    while (1) {
+        if ($max) {
+            my $offset = ($iterations-1) * $max;
+            $table->select_options("LIMIT $offset,$max");
+        }
+        my $cond     = $opts->{cond} || {};
+        my $sth      = $table->select($cond, [$pk, @weight_list]);
+        my $done     = 1;
+
+        while (my $rec = $sth->fetchrow_hashref() ) {
+            $self->post_add_record($rec, undef, 1);
+            $done = 0;
+            if ($tick) {
+                $count++;
+                $count % $tick      or (print "$count ");
+                $count % ($tick*10) or (print "\n");
+            }
+        }
+        last if $done;
+        $iterations++;
+        last if !$max;
+    }
+    $indexer = $self->_get_indexer(0) or return;
+    $indexer->optimize;
+    $indexer->close;
+    undef $indexer;
+    return 1;
+}
+
+sub pre_delete_record {
+# -------------------------------------------------------
+# Delete a records index values.
+#
+    my ($self, $where) = @_; 
+
+    my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
+    my ($pk) = $tbl->pk();
+    my $q = $tbl->select($where, [$pk]);
+
+    my $reader = eval { Lucene::Index::IndexReader->open($self->_get_store(0)); };
+    if ($@) {
+        return $self->{_debug} ? $self->error('INDEX_CORRUPT', 'WARN', "$@") : 1;
+    }
+
+    my @errors;
+    while (my ($item_id) = $q->fetchrow) {
+        my $t =  new Lucene::Index::Term($pk => $item_id);
+        eval { $reader->deleteDocuments($t); };
+        if ($@) {
+            push @errors, "$@";
+        }
+    }
+    $reader->close;
+    undef $reader;
+    if (@errors) {
+        return $self->{_debug} ? $self->error('DELETE_FAILED', 'WARN', join(", ", @errors)) : 1;
+    }
+    return 1;
+}
+
+sub post_update_record {
+# -------------------------------------------------------
+    my ( $self, $set_cond, $where_cond, $tmp ) = @_;
+
+# delete the previous record
+    eval {
+        $self->pre_delete_record($where_cond) or return $self->{_debug} ? () : 1;
+    };
+#
+# the new record
+    my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
+    my ($pk) = $tbl->pk();
+    my %weights = $self->{table}->_weight_cols();
+    my @weight_list = keys %weights;
+    my $q = $tbl->select($where_cond, [$pk, @weight_list]);
+    while (my $href = $q->fetchrow_hashref) {
+        $self->post_add_record($href);
+    }
+
+    return 1;
+
+}
+
+sub reindex_record {
+# -------------------------------------------------------
+# reindexes a record. basically deletes all associated records from current db abnd does an index.
+# it's safe to use this
+    my ($self, $rec) = @_;
+
+    $self->delete_record($rec);
+    $self->index_record($rec);
+}
+
+1;
--- a/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/Lucene.txt
+++ b/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/Lucene.txt
@ -0,0 +1,206 @@
+NAME
+    Lucene -- API to the C++ port of the Lucene search engine
+
+SYNOPSIS
+  Initialize/Empty Lucene index
+      my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer();
+      my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 1);
+
+      my $tmp_writer = new Lucene::Index::IndexWriter($store, $analyzer, 1);
+      $tmp_writer->close;
+      undef $tmp_writer;
+
+  Choose your Analyzer (string tokenizer)
+      # lowercases text and splits it at non-letter characters 
+      my $analyzer = Lucene::Analysis::SimpleAnalyzer();
+      # same as before and removes stop words
+      my $analyzer = Lucene::Analysis::StopAnalyzer();
+      # splits text at whitespace characters
+      my $analyzer = Lucene::Analysis::WhitespaceAnalyzer();
+      # lowercases text, tokenized it based on a grammer that 
+      # leaves named authorities intact (e-mails, company names,
+      # web hostnames, IP addresses, etc) and removed stop words
+      my $analyzer = Lucene::Analysis::Standard::StandardAnalyzer();
+
+  Choose your Store (storage engine)
+      # in-memory storage
+      my $store = new Lucene::Store::RAMDirectory();
+      # disk-based storage
+      my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 0);
+
+  Open and configure an IndexWriter
+      my $writer = new Lucene::Index::IndexWriter($store, $analyzer, 0);
+      # optional settings for power users
+      $writer->setMergeFactor(100);
+      $writer->setUseCompoundFile(0);
+      $writer->setMaxFieldLength(255);
+      $writer->setMinMergeDocs(10);
+      $writer->setMaxMergeDocs(100);
+
+  Create Documents and add Fields
+      my $doc = new Lucene::Document;
+      # field gets analyzed, indexed and stored
+      $doc->add(Lucene::Document::Field->Text("content", $content));
+      # field gets indexed and stored
+      $doc->add(Lucene::Document::Field->Keyword("isbn", $isbn));
+      # field gets just stored
+      $doc->add(Lucene::Document::Field->UnIndexed("sales_rank", $sales_rank));
+      # field gets analyzed and indexed 
+      $doc->add(Lucene::Document::Field->UnStored("categories", $categories));
+
+  Add Documents to an IndexWriter
+      $writer->addDocument($doc);
+
+  Optimize your index and close the IndexWriter
+      $writer->optimize();
+      $writer->close();
+      undef $writer;
+
+  Delete Documents
+      my $reader = Lucene::Index::IndexReader->open($store);
+      my $term = new Lucene::Index::Term("isbn", $isbn);
+      $reader->deleteDocuments($term);
+      $reader->close();
+      undef $reader;
+
+  Query index
+      # initalize searcher and parser
+      my $analyzer = Lucene::Analysis::SimpleAnalyzer();
+      my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 0);
+      my $searcher = new Lucene::Search::IndexSearcher($store);
+      my $parser = new Lucene::QueryParser("default_field", $analyzer);
+
+      # build a query on the default field
+      my $query = $parser->parse("perl");
+
+      # build a query on another field
+      my $query = $parser->parse("title:cookbook");
+
+      # define a sort on one field
+      my $sortfield = new Lucene::Search::SortField("unixtime"); 
+      my $reversed_sortfield = new Lucene::Search::SortField("unixtime", 1);
+      my $sort = new Lucene::Search::Sort($sortfield);
+
+      # define a sort on two fields
+      my $sort = new Lucene::Search::Sort($sortfield1, $sortfield2);
+
+      # use Lucene's INDEXORDER or RELEVANCE sort
+      my $sort = Lucene::Search::Sort->INDEXORDER;
+      my $sort = Lucene::Search::Sort->RELEVANCE;
+
+      # query index and get results
+      my $hits = $searcher->search($query);
+      my $sorted_hits = $searcher->search($query, $sort);
+
+      # get number of results
+      my $num_hits = $hits->length();
+
+      # get fields and ranking score for each hit
+      for (my $i = 0; $i < $num_hits; $i++) {
+        my $doc = $hits->doc($i);
+        my $score = $hits->score($i);
+        my $title = $doc->get("title");
+        my $isbn = $doc->get("isbn");
+      }
+
+      # free memory and close searcher
+      undef $hits;
+      undef $query;
+      undef $parser;
+      undef $analyzer;
+      $searcher->close();
+      undef $fsdir;
+      undef $searcher;
+    }
+
+  Close your Store
+      $store->close;
+      undef $store;
+
+DESCRIPTION
+    Like it or not Apache Lucene has become the de-facto standard for
+    open-source high-performance search. It has a large user-base, is well
+    documented and has plenty of committers. Unfortunately Apache Lucene is
+    entirely written in Java and therefore of relatively little use for perl
+    programmers. Fortunately in the recent years a group of C++ programmers
+    led by Ben van Klinken decided to port Java Lucene to C++.
+
+    The purpose of the module is to export the C++ Lucene API to perl and at
+    the same time be as close as possible to the original Java API. This has
+    the combined advantage of providing perl programmers with a
+    well-documented API and giving them access to a C++ search engine
+    library that is supposedly faster than the original.
+
+CHARACTER SUPPORT
+    Currently only ISO 8859-1 (Latin-1) characters are supported. Obviously
+    this included all ASCII characters.
+
+INDEX COMPATIBLITY
+    For the moment indices produced by this module are not compatible with
+    those from Apache Lucene. The reason for this is that this module uses
+    1-byte character encoding as opposed to 2-byte (widechar) encoding with
+    Apache Lucene.
+
+INSTALLATION
+    This module requires the clucene library to be installed. The best way
+    to get it is to go to the following page
+
+        http://sourceforge.net/projects/clucene/
+
+    and download the latest STABLE clucene-core version. Currently it is
+    clucene-core-0.9.15. Make sure you compile it in ASCII mode and install
+    it in your standard library path.
+
+    On a Linux platform this goes as follows:
+
+        wget http://kent.dl.sourceforge.net/sourceforge/clucene/clucene-core-0.9.15.tar.gz
+        cd clucene-core-0.9.15
+        ./autogen.sh
+        ./configure --disable-debug --prefix=/usr --exec-prefix=/usr --enable-ascii
+        make
+        make check
+        (as root) make install
+
+    To install the perl module itself, run the following commands:
+
+        perl Makefile.PL
+        make
+        make test
+        (as root) make install
+
+AUTHOR
+    Thomas Busch <tbusch at cpan dot org>
+
+COPYRIGHT AND LICENSE
+    Copyright (c) 2006 Thomas Busch
+
+    This library is free software; you can redistribute it and/or modify it
+    under the same terms as Perl itself.
+
+SEE ALSO
+    Plucene - a pure-Perl implementation of Lucene
+
+    KinoSearch - a search engine library inspired by Lucene
+
+DISCLAIMER OF WARRANTY
+    BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+    FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+    PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
+    YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+    NECESSARY SERVICING, REPAIR, OR CORRECTION.
+
+    IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+    REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE
+    TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR
+    CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+    SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+    RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+    FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+    SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+    DAMAGES.
+
--- a/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/STH.pm
+++ b/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/STH.pm
@ -0,0 +1,115 @@
+# ====================================================================
+# Gossamer Threads Module Library - http://gossamer-threads.com/
+#
+#   GT::SQL::Search::STH
+#   Author: Aki Mimoto
+#   CVS Info : 087,071,086,086,085      
+#   CVS Info : 087,071,086,086,085      
+#   $Id: STH.pm,v 1.1 2006/12/07 07:04:51 aki Exp $
+#
+# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
+# ====================================================================
+#
+
+package GT::SQL::Search::LUCENE::STH;
+#--------------------------------------------------------------------------------
+use strict;
+use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE /;
+require GT::SQL::Search::Base::STH;
+
+@ISA = ('GT::SQL::Search::STH');
+$ATTRIBS = {
+    'db_sort' => 1,
+    'hits'    => undef
+};
+$ERROR_MESSAGE = 'GT::SQL::Search::STH';
+
+
+
+sub init {
+#--------------------------------------------------------------------------------
+# GT::SQL::Search::STH expects a full set of results in $self->{results}. For
+# Lucene the only time a full set of results is there is when we are sorting
+# on a field that is not weighted, otherwise the results in $self->{results} is
+# the proper page and number of results.
+    my $self    = shift;
+
+    $self->set(@_);
+
+    --$self->{nh} if $self->{nh};
+
+    # Here we allow hits to override our concept of rows. This is only useful
+    # when !$self->{db_sort}
+    $self->{rows} = $self->{hits}
+        ? $self->{hits}
+        : $self->{results}
+            ? scalar(keys %{$self->{results}})
+            : 0;
+
+    if ($self->{db_sort}) {
+        $self->get_db_sorted_results;
+    }
+    else {
+        $self->get_sorted_results;
+    }
+}
+
+sub get_sorted_results {
+# Just sorts the results out of $self->{results} which should have been setup
+# by a search driver
+    my ($self) = @_;
+    my $results = $self->{results};
+    $self->{index} = 0;
+    $self->{max_index} = $self->{mh} - 1;
+    $self->{'order'}  = [ sort { 
+                                    ( $results->{$b} || return 0 ) <=> ( $results->{$a} || 0 )
+                              } keys %{$results} ];
+    $self->debug_dumper( "Results will be presented in the following order: ", $self->{'order'} ) if ($self->{_debug});
+}
+
+sub get_db_sorted_results {
+# This assumes $self->{results} has a full result set, i.e. without any LIMIT
+# It then selects the result set using the SQL driver to do the sorting. This
+# is for Search modules which can not handle their own sorting
+    my ($self) = @_;
+
+    my $sb = GT::SQL::Search::Base::Search->clean_sb($self->{sb}, $self->{so});
+    $self->{index}  = $self->{nh} * $self->{mh} || 0;
+    $self->{max_index} = $self->{index} + $self->{mh} - 1; # index + mh is the first record which should not be returned.
+    if ($self->{max_index} > $self->{rows}) {
+        $self->{max_index}  = $self->{rows};
+        $self->{rows}       = $self->{rows} - $self->{index};
+        $self->{rows} < 0 ? $self->{rows} = 0 : 0;
+    }
+
+    else {
+        $self->{rows}       = $self->{mh};
+    }
+    my ( $table, $pk ) = $self->_table_info();
+    my ( $query, $where, $st, $limit );
+
+    $where      = qq!$pk in(! . join( ",", keys %{$self->{results}}) . ')';
+    $self->{mh} and $limit = qq!LIMIT $self->{index}, $self->{rows}!;
+    $query      = qq!
+        SELECT $pk
+        FROM   $table
+        WHERE  $where
+        $sb
+        $limit
+    !;
+    $self->debug( "Row fetch query: $query" ) if ($self->{_debug});
+    my $sth        = $self->{table}->{driver}->prepare( $query );
+    $sth->execute();
+
+    $self->{index}    = 0;
+    $self->{max_hits} = $self->{rows};
+
+    # Fetch the results in sorted order
+    my $order = $sth->fetchall_arrayref();
+    $sth->finish();
+
+    $self->{'order'}  = [ map { $_->[0] } @{$order} ];
+}
+
+1;
+
--- a/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/Search.pm
+++ b/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/Search.pm
@ -0,0 +1,260 @@
+# ==================================================================
+# Gossamer Threads Module Library - http://gossamer-threads.com/
+#
+#   GT::Search::LUCENE::Search
+#   Author  : Scott Beck
+#   CVS Info : 087,071,086,086,085      
+#   $Id: Search.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
+#
+# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
+# ==================================================================
+#
+# Description:
+#   Class used to search indexed tables.
+#
+
+package GT::SQL::Search::LUCENE::Search;
+# ------------------------------------------------------------------------------
+use strict;
+use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS $ERRORS $ERROR_MESSAGE /;
+use Lucene;
+use GT::TempFile;
+use GT::SQL::Search::LUCENE::STH;
+use GT::SQL::Search::Base::Search;
+@ISA = qw( GT::SQL::Search::Base::Search );
+
+# ------------------------------------------------------------------------------
+# Preamble information related to the object
+
+$DEBUG   = 0;
+$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
+$ERRORS = {
+    SEARCH_ERROR => "Error searching: %s",
+    QUERY_ERROR  => "Query error: %s"
+};
+$ERROR_MESSAGE = 'GT::SQL';
+
+sub load {
+    shift;
+    return GT::SQL::Search::LUCENE::Search->new(@_)
+}
+
+sub _get_path {
+    my $self = shift;
+    my $name    = $self->{table}->name;
+    my $tmpdir  = GT::TempFile::find_tmpdir();
+    my $path = $tmpdir . '/' . $name;
+    $path = $1 if $path =~ /(.*)/; # XXX untaint
+    return $path;
+}
+
+sub _get_store {
+    my ($self, $create) = @_;
+    my $path = $self->_get_path;
+    return Lucene::Store::FSDirectory->getDirectory($path, $create);
+}
+
+sub query {
+# --------------------------------------------------
+# Returns a sth based on a query
+#
+# Options:
+#        - paging
+#           mh            : max hits
+#           nh            : number hit (or page of hits)
+#
+#        - searching
+#           ww            : whole word
+#           ma            : 1 => OR match, 0 => AND match, undefined => QUERY
+#           substring     : search for substrings of words
+#           bool          : 'and' => and search, 'or' => or search, '' => regular query
+#           query         : the string of things to ask for
+#
+#        - filtering
+#           field_name    : value       # Find all rows with field_name = value
+#           field_name    : ">value"    # Find all rows with field_name > value.
+#           field_name    : "<value"    # Find all rows with field_name < value.
+#           field_name-gt : value       # Find all rows with field_name > value.
+#           field_name-lt : value       # Find all rows with field_name < value.
+#
+# Parameters:
+#        ( $CGI ) : a single cgi object
+#        ( $HASH ) : a hash of the parameters
+#
+    my $self = shift;
+
+# create an easily accessible argument hash
+    my $args = $self->common_param(@_);
+    my $tbl = $self->{table};
+
+# see if we can setup the filtering constraints
+    my $filter = { %$args }; 
+    my $query  = delete $args->{query} || $self->{query} || '';
+    my $ftr_cond;
+
+# parse query
+    $self->debug( "Search Query: $query" ) if ($self->{_debug});
+
+    my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
+
+    $self->{rejected_keywords} = $rejected;
+
+# setup the additional input parameters
+    $query_struct = $self->_preset_options( $query_struct, $args );
+
+# now sort into distinct buckets
+    my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
+    $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
+
+# with the buckets, it's now possible to create a query string
+# that can be passed directly into the Lucene search.
+    my $query_string = '';
+
+    foreach my $search_type ( keys %$buckets ) {
+        my $bucket = $buckets->{$search_type};
+        foreach my $token ( keys %$bucket ) {
+            next unless $token;
+            my $properties = $bucket->{$token} or next;
+            $token =~ s/(["()])/\\$1/g;
+            $token =~ s/\b(or|and)\b/ /g;
+
+            my $e = ' ';
+
+# handle boolean operations
+            $properties->{mode} ||= '';
+            if ( $properties->{mode} eq 'must' ) {
+                $e .= '+';
+            }
+            elsif ( $properties->{mode} eq 'cannot' ) {
+                $e .= '-';
+            }
+
+# deal with phrase vs keyword
+            if ( $properties->{phrase} ) {
+                $e .= '"' . $token . '"' unless $token =~ /^"|"$/;
+            }
+            else {
+                $e .= $token;
+
+# substring match
+                if ($properties->{mode} ne 'substring') {
+                    $e .= '*' if $properties->{substring};
+                }
+            }
+
+            $query_string .= $e;
+        }
+    }
+
+# calculate the cursor constraints
+    foreach my $k (qw( nh mh so sb )) {
+        next if defined $args->{$k};
+        $args->{$k} = $self->{$k} || '';
+    }
+    $args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
+    $args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
+    $args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ )  ? $1 : 'score';
+
+    # Score is the default
+    $args->{so} = (defined $args->{so} and $args->{so} =~ /^(asc|desc)(?:end)?$/i) ? lc($1) : 'asc';
+
+    my %weights = $tbl->_weight_cols();
+    my @sortfields;
+    my $do_mysql_sort = 0;
+    for (ref($args->{sb}) eq 'ARRAY' ? @{$args->{sb}} : $args->{sb}) {
+        if (!exists $weights{$_}) {
+            $do_mysql_sort = 1 if $_ ne 'score';
+            next;
+        }
+        push @sortfields, new Lucene::Search::SortField($_, $args->{so} ne 'asc');
+    }
+    my $sort = @sortfields ? new Lucene::Search::Sort(@sortfields) : Lucene::Search::Sort->RELEVANCE;
+    my $store = $self->_get_store(0);
+    my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
+    my $searcher = eval { new Lucene::Search::IndexSearcher($store); };
+    if ($@) {
+        $self->{_debug} and $self->error('SEARCH_ERROR', 'WARN', "$@");
+        return $self->sth({}, 0); # no hits
+    }
+    # Random default field, it's not used
+    my $parser = new Lucene::MultiFieldQueryParser((keys %weights)[0], $analyzer);
+    my $pquery = eval { $parser->parse($query_string, [keys %weights], $analyzer); };
+    if ($@) {
+        $self->{_debug} and $self->error('QUERY_ERROR', 'WARN', "$@");
+        return $self->sth({}, 0); # no hits
+    }
+    my $hits = $searcher->search($pquery, $sort);
+    my $num_hits = $hits->length;
+
+## Setup a limit only if there is no callback. The callback argument requires a full results list
+    my ($offset, $max_hits) = (0, $num_hits);
+    unless ($self->{callback} or $do_mysql_sort) {
+        $offset = ( $args->{nh} - 1 ) * $args->{mh};
+        $max_hits = $offset + $args->{mh};
+    }
+    $max_hits = $num_hits if $max_hits > $num_hits;
+    my ($pk) = $self->{table}->pk;
+    my @indexes;
+    my $results = {};
+    for (my $i= $offset; $i < $max_hits; ++$i) {
+        my $doc = $hits->doc($i);
+        my $value = $doc->get($pk);
+        my $score = $hits->score($i);
+        $results->{$value} = $score;
+    }
+
+# now handle filters
+    my $cols    = $self->{'table'}->cols();
+    my %filters = map {
+        (my $tmp = $_) =~ s/-[lg]t$//;
+        $cols->{$tmp} ? ($_ => $args->{$_}) : ()
+    } keys %{$args};
+
+    if (keys %filters) {
+        $self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
+        $results = $self->filter(\%filters, $results);
+    }
+    elsif ($self->{filter}) {
+        $self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
+        $results = $self->_filter_query( $self->{filter}, $results );
+    }
+    else {
+        $self->debug( "No filters being used.") if ($self->{_debug});
+    }
+
+# now this query should probably clear the filters once it's been used, so i'll dothat here
+    $self->{filter} = undef;
+
+# now run through a callback function if needed.
+    if ($self->{callback}) {
+        unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
+            $self->{_debug} and $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
+            return $self->sth({}, 0); # no hits
+        }
+        $self->debug_dumper ("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
+        $results = $self->{callback}->($self, $results);
+        $self->debug_dumper ("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
+    }
+
+    $self->{rows} = $num_hits;
+
+    return $self->sth($results, $do_mysql_sort);
+}
+
+sub sth {
+#--------------------------------------------------------------------------------
+    my ($self, $results, $db_sort) = @_;
+
+    my $sth = GT::SQL::Search::LUCENE::STH->new(
+        'results' => $results,
+        'hits'    => $self->{rows},
+        'db'      => $self->{table}->{driver},
+        'db_sort' => $db_sort,
+# pass the following attributes down to the STH handler
+        map({ ($_ => $self->{$_}) } qw/ table sb so score_col score_sort nh mh rows _debug /)
+    );
+
+    return $sth;
+}
+
+1;