First pass at adding key files

2024-06-17 21:49:12 +10:00
commit aa25e9347f
1274 changed files with 392549 additions and 0 deletions
--- a/site/slowtwitch.com/cgi-bin/articles/admin/GT/SQL/Search.pm
+++ b/site/slowtwitch.com/cgi-bin/articles/admin/GT/SQL/Search.pm
@ -0,0 +1,585 @@
+# ==================================================================
+# Gossamer Threads Module Library - http://gossamer-threads.com/
+#
+#   GT::SQL::Search
+#   Author  : Aki Mimoto
+#   CVS Info : 087,071,086,086,085      
+#   $Id: Search.pm,v 1.62 2008/09/23 23:55:26 brewt Exp $
+#
+# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
+# ==================================================================
+#
+# Description:
+#   highlevel class for searching, works with GT::SQL::Indexer
+#
+
+package GT::SQL::Search;
+#--------------------------------------------------------------------------------
+
+# pragmas
+use strict;
+use vars qw/@ISA $ERRORS $ERROR_MESSAGE $VERSION/;
+
+# includes
+use GT::Base;
+use GT::AutoLoader;
+
+# variables
+$VERSION       = sprintf "%d.%03d", q$Revision: 1.62 $ =~ /(\d+)\.(\d+)/;
+@ISA           = qw(GT::Base);
+$ERROR_MESSAGE = 'GT::SQL';
+$ERRORS        = {
+    UNKNOWNDRIVER => 'Unknown driver requested: %s',
+    NOTABLE       => 'Cannot find reference to table object'
+};
+
+sub load_search {
+#--------------------------------------------------------------------------------
+# checks if there is driver for this current database and if so, loads that
+# instead (since it would be faster)
+#
+    my $class  = shift;
+    my $opts   = ref $_[0] ? $_[0] : {@_};
+    $opts->{mode} = 'Search';
+    my $driver = $class->load_driver( $opts ) or return;
+    my $pkg    = "GT::SQL::Search::${driver}::Search";
+    return $pkg->load(@_);
+}
+
+sub load_indexer {
+#--------------------------------------------------------------------------------
+# checks if there is driver for this current database and if so, loads that
+# instead (since it would be faster)
+#
+    my $class  = shift;
+    my $opts   = ref $_[0] ? $_[0] : {@_};
+    $opts->{mode} = 'Indexer';
+    my $driver = $class->load_driver( $opts ) or return;
+    my $pkg    = "GT::SQL::Search::${driver}::Indexer";
+
+    return $pkg->load(@_);
+}
+
+sub driver_ok {
+#--------------------------------------------------------------------------------
+# checks to see if a particular driver is allowed on this system
+#
+    my $class  = shift;
+    my $driver = uc shift or return;
+    my $opts   = ref $_[0] ? $_[0] : {@_};
+    my $mode   = $opts->{mode} || 'Indexer';
+    my $tbl    = $opts->{table} or return GT::SQL::Search->error( 'NOTABLE', 'FATAL' );
+    my $pkg = 'GT::SQL::Search::' . $driver . '::' . $mode;
+
+    eval { require "GT/SQL/Search/$driver/$mode.pm" };
+    $@ and return GT::SQL::Search->error('UNKNOWNDRIVER', 'WARN', $driver);
+    return $pkg->can('ok') ? $pkg->ok($tbl) : 1;
+}
+
+sub load_driver {
+#--------------------------------------------------------------------------------
+# Loads a driver into memory.
+#
+    my $class  = shift;
+    my $opts   = ref $_[0] ? $_[0] : {@_};
+    my $tbl    = $opts->{table};
+    my $mode   = $opts->{mode} || 'Indexer';
+    my $driver = uc($opts->{driver} || $tbl->{schema}->{search_driver} || 'NONINDEXED');
+
+    require "GT/SQL/Search/$driver/$mode.pm";
+    return $driver;
+}
+
+sub available_drivers {
+#--------------------------------------------------------------------------------
+# Returns a list of available drivers.
+#
+    my $class  = shift;
+
+    (my $path   = $INC{'GT/SQL/Search.pm'}) =~ s/\.pm$//;
+    opendir DHANDLE, $path or return $class->fatal(CANTOPENDIR => $path, "$!");
+    my @arr;
+    for my $driver_name (readdir DHANDLE) {
+        next if $driver_name =~ y/a-z//;
+	next if $driver_name eq 'LUCENE';
+        -f "$path/$driver_name/Search.pm"  and -r _ or next;
+        -f "$path/$driver_name/Indexer.pm" and -r _ or next;
+        my $loaded = eval {
+            require "GT/SQL/Search/$driver_name/Search.pm";
+            require "GT/SQL/Search/$driver_name/Indexer.pm";
+        };
+        push @arr, $driver_name if $loaded;
+    }
+    closedir DHANDLE;
+    return wantarray ? @arr : \@arr;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+GT::SQL::Search - internal driver for searching
+
+=head1 SYNOPSIS
+
+This implements the query string based searching scheme for GT::SQL.  Driver
+based, it is designed to take advantage of the different indexing schemes
+available on different database engines.  
+
+=head1 DESCRIPTION
+
+Instead of describing how Search.pm is interfaced* this will describe how a
+driver should be structured and how a new driver can be implemented.
+
+* as it is never accessed directly by the programmer as it was designed to be
+called through the functions GT::SQL::Table::query and GT::SQL::Table::query_sth
+
+=head2 Drivers
+
+A driver has two parts. The Indexer and the Search packages are the most
+important. Howserver, for any driver in the search, there must exist a directory
+with the name of the driver in ALL CAPS. For exampel, MYSQL for MySQL, POSTGRES
+for Postgres. Within each driver directory, The Indexer and Search portions of
+the driver contains all the information required for initializing the database
+table and searching the database.
+
+The Indexing package of the driver handles all the data that is manipulated in
+the database and also the initializes and the database for indexing.
+
+The Search package handles the queries and retrieves results for the eventual
+consumption by the calling program.
+
+Drivers are simply subclasses of the base driver module, GT::SQL::Search::Base
+and operate by overriding certain key functions.
+
+The next few sections will cover how to create a search driver, and assumes a
+fair bit of familiarity with GT::SQL.
+
+=head2 Structure of an Indexing Driver
+
+The following is an absolutely simple skeleton driver that does nothing and but
+called "CUSTOM". Found in the CUSTOM directory, this is the search package, and
+would be call Search.pm in the GT/SQL/Search/CUSTOM library directory.
+
+    package GT::SQL::Search::CUSTOM::Search;
+    #------------------------------------------
+        use strict;
+        use vars qw/ @ISA /;
+        use GT::SQL::Search::Base::Search;
+        @ISA = qw( GT::SQL::Search::Base::Search );
+    
+    sub load { my $package_name = shift; return GT::SQL::Search::CUSTOM::Search->new(@_) };
+    
+    # overrides would go here
+    
+    1;
+
+For the indexer, another file, Indexer.pm would be found in the
+GT/SQL/Search/CUSTOM directory.
+
+    package GT::SQL::Search::CUSTOM::Indexer;
+    #------------------------------------------
+    
+        use strict;
+        use vars qw/ @ISA /;
+        use GT::SQL::Search::Base;
+        @ISA = qw/ GT::SQL::Search::Base::Indexer /;
+    
+    sub load { my $package_name = shift; return GT::SQL::Search::CUSTOM::Indexer->new(@_) };
+    
+    # overrides would go here
+    
+    1;
+
+The almost empty subs that immediately return with a value are functions that
+can be overridden to do special tasks. More will be detailed later.
+
+The Driver has been split into two packages. The original package name,
+GT::SQL::Search::Nothing, houses the Search package.
+GT::SQL::Search::Nothing::Indexer is the Indexing portion of the seach system.
+"::Indexer" must be appended to the orginial search name for the indexer.
+
+Each of the override functions are triggered at points just before and after a
+major event occurs in GT::SQL. Depending on the type of actions you require, you
+pick and chose which events you'd like your driver to attach to.
+
+=head2 Structure of Indexing Driver
+
+The Indexer is responsible for creating all the indexes, maintaining them and
+when the table is dropped, removing all the associated indexes.
+
+The following header must be defined for the Indexer.
+GT::SQL::Search::Base::Indexer is the superclass that our driver inherits from.
+
+    package GT::SQL::Search::CUSTOM::Indexer;
+    #------------------------------------------
+    
+        use strict;
+        use vars qw/ @ISA /;
+        use GT::Base;
+        use GT::SQL::Search::Base::Indexer;
+        @ISA = qw/ GT::SQL::Search::Base::Indexer /;
+
+In addition to the header, the following function must be defined.
+GT::SQL::Search::Driver::Indexer::load creates the new object and allows for
+special preinitialization that must occur. You can also create another driver
+silently (such as defaulting to INTERNAL after a version check fails).
+
+    sub load { my $package_name = shift; return GT::SQL::Search::CUSTOM::Indexer->new(@_) };
+
+Finally, there are the overrides. None of the override functions need be defined
+in your driver. Any calls made to undefined methods will silently fallback to
+the superclass driver's methods. When a method has been overridden, the function
+must return a true value when it is successful, otherwise the action will fail
+and an error generated.
+
+Whenever a object is created it will receive one property $self->{table} which
+is the table that is being worked upon. This property is available in all the
+method calls and is required for methods such as _create_table and
+_drop_search_driver methods.
+
+When a table is first created or when a table is destroyed the following two
+functions are called. They are not passed any special values, however, these are
+all class methods and $self->{table} will be a reference to the current table in
+use.
+
+This set of overrides are used by GT::SQL::Creator when the ::create method is
+called. They are called just prior and then after the create table sql query has
+been executed.
+
+=over 2
+
+=item pre_create_table
+
+=item post_create_table
+
+These functions receive no special parameters. They will receive the data to the
+table in the $self->{table} property.
+
+=back
+
+This next set of functions take place in GT::SQL::Editor.
+
+=over 2
+
+=item drop_search_driver
+
+This method receives no special parameters but is responsible for removing all
+indexes and "things" associated with the indexing schema.
+
+=item add_search_driver
+
+Receives no extra parameters. Creates all indexes and does all actions required
+to initialize indexing scheme.
+
+=item pre_add_column
+
+=item post_add_column
+
+The previous two functions are called just before and after a new column is
+added.
+
+pre_add_column accepts $name (of column), $col (hashref of column attributes).
+The method will only be called if the column has a weight associated with it.
+The function must return a non-zero value if successful. Note that the returned
+value will be passed into the post_add_column so temporary values can be passed
+through if required.
+
+post_add_column accepts $name (of column), $col (hashref of column attributes),
+$results (of pre_add_column). This method is called just after the column has
+been inserted into the database.
+
+=item pre_delete_column
+
+=item post_delete_column
+
+These previous functions are called just before and after the sql for a old
+column is deleted. They must remove all objects and "things" associated with a
+particular column's index.
+
+pre_delete_column accepts $name (of column), $col (hashref of column
+attributes). The method will only be called if the column has a weight
+associated with it. The function must return a non-zero value if successful.
+Note that the returned value will be passed into the post_delete_column so
+temporary values can be passed through if required.
+
+post_delete_column accepts $name (of column), $col (hashref of column
+attributes), $results (of pre_add_column). This method is called just after the
+column has been dropped from the database.
+
+=item pre_drop_table
+
+=item post_drop_table
+
+The two previous methods are used before and after the table is dropped. The
+methods must remove any tables or "things" related to indexing from the table.
+
+pre_drop_table receives no arguments. It can find a copy of the current table
+and columns associated in $self->{table}.
+
+post_drop_table receives one argument, which is the result of the
+pre_drop_table.
+
+=back 
+
+The following set of functions take place in GT::SQL::Table
+
+=over 2
+
+=item pre_add_record
+
+=item post_add_record
+
+Called just before and after an insert occurs. These functions take the record
+and indexes them as required.
+
+pre_add_record will receive one argument, $rec, hashref, which is the record
+that will be inserted into the database. Table information can be found by
+accessing $self->{table} Much like the other functions, on success the result
+will be cached and fed into the post_add_record function.
+
+post_add_record receives $rec, a hashref to describing the new result, the $sth
+of the insert query, and the result of the pre_add_record method. The result
+from $sth->insert_id if there is a ai field will be the new unique primary key.
+
+=item pre_update_record
+
+=item post_update_record
+
+Intercepts the update request before and just after the sql query is executed.
+This override has the potential of being rather messy. More than one record can
+be modified in this action and the indexer must work a lot to ensure the
+database is up to snuff.
+
+pre_update_record receives two parameters, $set_cond, $where_cond. $set_cond is
+a hashref containing the new values that must be set, and $where_cond is a
+GT::SQL::Condition object selecting records to update. The result once again, is
+cached and if undef is considered an error.
+
+post_update_record takes the same parameters as pre_update_record, except one
+extra paremeter, the result of pre_update_record.
+
+=item pre_delete_record
+
+=item post_delete_record
+
+Called just before and after the deletion request for records are called.
+
+pre_delete_record, has only one parameter, $where, a GT::SQL::Condition object
+telling which records to delete. The results of this method are passed to
+post_delete_record.
+
+post_delete_record, has one addition parameter to pre_delete_record and like
+most post_ methods, is the result of the pre_delete_record method.
+
+=item pre_delete_all_records
+
+=item post_delete_all_records
+
+These two functions are quite simple, but they are different from drop search
+driver in that though the records are all dropped, the framework for all the
+indexing is not dropped as well.
+
+Neither function is passed any special data, except for post_delete_all_records
+which receives the rsults of the pre_delete_all_records method.
+
+=item reindex_all
+
+This function is sometimes called by the user to refresh the index. The
+motivation for this, in the case of the INTERNAL driver, is sometimes due to
+outside manipulation of the database tables, the index can become
+non-representative of the data in the tables. This method is to force the
+indexing system to fix errors that have passed.
+
+=item ok
+
+This function is called by GT::SQL::Search as a package method,
+GT::SQL::Search::EXAMPLE::Indexer->ok( $table ); and is passed a table object
+reference. What this function must do is to return a true or false value that
+tells the search system if this driver can be used. The MYSQL driver has a good
+example for this, it tests to ensure that the mysql database system version is
+at least 3.23.23.
+
+=back 
+
+=head2 Structure of a Search Driver
+
+The Searcher is responsible for only one thing, to return results from a query
+search. You can override the parser, however, subclassing the following methods
+will have full parsing for all things such as +/-, string parsing and substring
+matching.
+
+The structures passed into the methods get a little complicated so beware!
+
+ALL the following functions receive two parameters, the first is a search
+parameters detailing the words/phrases to search for, the second parameter is
+the current result set of IDs => scores.
+
+There are two types of search parameters, one for words and the other for
+phrases. The structure is a little messy so I'll detail them here.
+
+For words, the structure is like the following:
+
+    $word_search = {
+        'word' => {
+            substring => '1', # set to 1 if this is substring match
+            phrase    => 0,   # not a phrase
+            keyword   => 1,   # is a keyword
+            mode      => '',  # can also be must, cannot to mean +/-
+        },
+        'word2' => ...
+    }
+
+For phrases the structure will become:
+
+    $phrase_search => {
+        'phrase' => {
+            substring => undef # never required
+            phrase    => [
+                'word1',
+                'word2',
+                'word3',
+                ...
+            ],              # for searching by indiv word if required
+            keyword   => 0, # not a keyword
+            mode      => ''    # can also be must, cannot
+        },
+        'phrase2' => ...
+    }
+
+Based on these structures, hopefully it will be easy enough to build whatever is
+required to grab the appropriate records.
+
+Finally, the second item passed in will be a hash filled with ID => score values
+of search results. They look something like this:
+
+    $results = {
+        1 => 56,
+        2 => 31,
+        4 => 6
+    }
+
+It is important for all the methods to take the results and return the results,
+as the result set will be daisychained down like a set to be operated on by
+various searching schemes.
+
+At the end of the query, the results in this set will be sorted and returned to
+the user as an sth.
+
+Operations on this set are preformed by the following five methods. 
+
+=over 2
+
+=item _query
+
+This method is called just after all the query string has been parsed and put
+into their proper buckets. This method is overridden by the INTERNAL driver to
+decide it wants to switch to the NONINDEX driver for better performance.
+
+Two parameters are passed in, ( $input, $buckets ). $input is a hash that
+contains all the form/cgi parameters passed to the $tbl->query function and
+$buckets is s the structure that is created after the query string is parsed.
+You may also call $self->SUPER::_query( $input, $buckets ) to pass the request
+along normally.
+
+You must return undef or an STH from this function.
+
+=item _union_query
+
+This method takes a $word_search and does a simple match query. If it finds
+records with any of the words included, it will append the results to the list.
+Passed in is the $results and it must return the altered results set.
+
+This method must also implement substring searching.
+
+=item _phrase_query
+
+Just like the union_query, however it searches based on phrases.
+
+=item _phrase_intersect_query
+
+This takes a $phrase_search and a $result as parameters. This method must look
+to find results that are found within the current result set that have the
+passed phrases as well. However, if there are no results found, this method can
+look for more results.
+
+=item _intersect_query
+
+Takes two parameters, a $word_search, and $results. Just like the
+_phrase_intersect query, if there are results already, tries to whittle away the
+result set. If there are no results, tries to look for results that have all the
+keywords in a record.
+
+This method must also implement substring searching.
+
+=item _disjoin_query
+
+Takes two parameters, a $word_search, and $results. This will look through the
+result set and remove all matches to any of the keywords.
+
+This method must also implement substring searching.
+
+=item _phrase_disjoin_query
+
+Two parameters, $phrase_search and $results are passed to this method. This does
+the exact same thing as _disjoin_query but it looks for phrases.
+
+=item query
+
+If you choose to override this method, you will have full control of the query.
+
+This method accepts a $CGI or a $HASH object and performs the following
+
+  Options:
+         - paging
+            mh            : max hits
+            nh            : number hit (or page of hits)
+            sb            : column to sort by (default is by score)
+
+         - searching
+            ww            : whole word
+            ma            : 1 => OR match, 0 => AND match, undefined => QUERY
+            substring     : search for substrings of words
+            bool          : 'and' => and search, 'or' => or search, '' => regular query
+            query         : the string of things to ask for 
+
+         - filtering
+            field_name    : value       # Find all rows with field_name = value
+            field_name    : ">value"    # Find all rows with field_name > value.
+            field_name    : "<value"    # Find all rows with field_name < value.
+            field_name-gt : value       # Find all rows with field_name > value.
+            field_name-lt : value       # Find all rows with field_name < value.
+
+The function must return a STH object. However, you may find useful the
+GT::SQL::Search::STH object, which will automatically handle mh, nh, and
+alternative sorting requests. All you will have to do is
+
+    sub query { ... your code ... return $self->sth( $results ); }
+
+Where results is a hashref containing primarykeyvalue => scorevalues.
+
+=item alternate_driver_query
+
+There is no reason to override this method, however, if you would like to use
+another driver's search instead of the current, this method will let you do so. 
+
+Accepting 2 parameters, ( $drivername, $input ), where $drivername is the name
+of the driver you'd like to use and $input is the parameters passed to the
+method. Returned is an $sth value (undef if an error has occurred). This method
+was used in the INTERNAL driver to shunt to NONINDEXED if it found the search
+would take too long.
+
+=back
+
+=head1 COPYRIGHT
+
+Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
+http://www.gossamer-threads.com/
+
+=head1 VERSION
+
+Revision: $Id: Search.pm,v 1.62 2008/09/23 23:55:26 brewt Exp $
+
+=cut