First pass at adding key files

This commit is contained in:
dsainty
2024-06-17 21:49:12 +10:00
commit aa25e9347f
1274 changed files with 392549 additions and 0 deletions

View File

@ -0,0 +1,187 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Search::MYSQL::Indexer
# Author : Aki Mimoto
# CVS Info : 087,071,086,086,085
# $Id: Indexer.pm,v 1.17 2004/08/28 03:53:49 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Class used to search indexed tables.
#
package GT::SQL::Search::MYSQL::Indexer;
# ------------------------------------------------------------------------------
use strict;
use vars qw/@ISA $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
use GT::SQL::Search::Base::Indexer;
@ISA = qw/GT::SQL::Search::Base::Indexer/;
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.17 $ =~ /(\d+)\.(\d+)/;
$ERRORS = {
NOTFROMWEB => 'There are far too many records in table %s for create/destroy of this indexing scheme from the web. Please use alternative method.',
MYSQLNONSUPPORT => 'Driver MYSQL requires MySQL version 3.23.23 or greater. Currently MySQL version: %s'
};
@$GT::SQL::ERRORS{ keys %$ERRORS } = values %$ERRORS;
$ERROR_MESSAGE = 'GT::SQL';
sub load {
my $class = shift;
return $class->new(@_);
}
sub ok {
# ------------------------------------------------------------------------------
my ($class, $tbl) = @_;
unless (uc $tbl->{connect}->{driver} eq 'MYSQL') {
return $class->error ('MYSQLNONSUPPORT', 'WARN', $tbl->{connect}->{driver});
}
my $sth = $tbl->do_query(qq!SELECT VERSION()!);
my $version = $sth->fetchrow;
my ($maj, $min) = split (/\./, $version);
unless ($maj > 3 or ($maj == 3 and $min >= 23)) {
return $class->error(MYSQLNONSUPPORT => WARN => $version);
}
return 1;
}
sub drop_search_driver {
# ------------------------------------------------------------------------------
my $self = shift;
$self->too_much() and return;
my $tbl = $self->{table} or return;
$tbl->connect();
my %weights = $tbl->weight() or return;
my $tblname = $tbl->name();
# Group the fulltext columns by value of the weight
my %cols_grouped;
foreach ( keys %weights ) {
my $val = $weights{$_} or next;
push @{$cols_grouped{$val}}, $_;
}
# Drop unified fulltext columns if required
if ( keys %cols_grouped > 1 ) {
$cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
}
# For each value grouped column set create a full text
# column
foreach my $v ( keys %cols_grouped ) {
my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
my $res = eval {
$tbl->do_query(qq!
ALTER TABLE $tblname
DROP INDEX $ft_name
!);
};
# Break on errors that can't be handled
if ( $@ ) {
next if $@ !~ /exist/i;
$self->warn( "$@" );
return;
}
}
return 1;
}
sub add_search_driver {
# ------------------------------------------------------------------------------
my $self = shift;
$self->too_much() and return;
my $tbl = $self->{table} or return $self->error(BADARGS => FATAL => "table must be passed into add_search_driver.");
my %weights = $tbl->weight() or return $self->error(NOWEIGHTS => 'WARN');
my $tblname = $tbl->name() or return $self->error(BADARGS => FATAL => "table does not have a name?");
# group the fulltext columns by value of the weight
my %cols_grouped;
foreach ( keys %weights ) {
my $val = $weights{$_} or next;
push @{$cols_grouped{$val}}, $_;
}
# Create unified fulltext columns if required
if ( keys %cols_grouped > 1 ) {
$cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
}
# for each value grouped column set create a full text
# column
foreach my $v ( keys %cols_grouped ) {
my $cols = join(",", sort @{$cols_grouped{$v}});
my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
my $res = eval {
$tbl->do_query(qq!
ALTER TABLE $tblname
ADD FULLTEXT $ft_name ( $cols )
!);
};
# break on errors that can't be handled
if ( $@ ) {
next if $@ =~ /duplicate/i;
$self->warn( "$@" );
return;
}
}
return 1;
}
sub too_much {
# ------------------------------------------------------------------------------
# returns true if there are too many records to be used on the Web
#
if ( $ENV{REQUEST_METHOD} ) {
my $self = shift;
my $tbl = $self->{table};
if ( $tbl->count() > 5000 ) {
$self->error( 'NOTFROMWEB', 'WARN', $tbl->name() );
return 1
}
}
return;
}
sub post_create_table {
# ------------------------------------------------------------------------------
shift->add_search_driver(@_);
}
sub reindex_all {
# ------------------------------------------------------------------------------
# this will drop all the fulltext columns and reindex all of them. This should
# not be required unless the user changes the weights on one of their columns.
# Unfortunately, this method is not particularly smart and risks not dropping
# certain index columns and reindexes even when it's not required. It must be
# recoded at a future date, but as this action won't happen frequently and will
# rarely affect the user, it is not a priority.
#
my $self = shift;
$self->drop_search_driver;
$self->add_search_driver;
}
1;

View File

@ -0,0 +1,51 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Search::MYSQL::Search
# Author : Aki Mimoto
# CVS Info : 087,071,086,086,085
# $Id: Search.pm,v 1.14 2004/01/13 01:35:19 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Class used to search indexed tables.
#
package GT::SQL::Search::MYSQL::Search;
# ------------------------------------------------------------------------------
use strict;
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
use GT::SQL::Search::Base::Search;
@ISA = qw( GT::SQL::Search::Base::Search );
# ------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.14 $ =~ /(\d+)\.(\d+)/;
$ATTRIBS = {
min_word_size => 4
};
sub load {
# --------------------------------------------------
my $self = shift;
my $opts = $self->common_param( @_ );
# determine which mysql search variant to use.
my $tbl = $opts->{table};
my $ver_sth = $tbl->do_query( 'SELECT VERSION()' );
my $version = $ver_sth->fetchrow_array();
my ( $maj, $min ) = split /\./, $version;
my $pkg = 'GT::SQL::Search::MYSQL::';
$pkg .= $maj > 3 ? 'VER4' : 'VER3';
eval "require $pkg";
return $pkg->new(@_)
}
1;

View File

@ -0,0 +1,178 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Search::MYSQL::VER3
# Author : Aki Mimoto
# CVS Info : 087,071,086,086,085
# $Id: VER3.pm,v 1.3 2004/08/28 03:53:49 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Class used to search indexed tables.
#
package GT::SQL::Search::MYSQL::VER3;
# ------------------------------------------------------------------------------
use strict;
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
use GT::SQL::Search::Base::Search;
@ISA = qw( GT::SQL::Search::Base::Search );
# ------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/;
$ATTRIBS = {
min_word_size => 4
};
sub _phrase_query {
# ------------------------------------------------------------------------------
my $self = shift;
my $phrases = shift or return $_[0];
my $results = shift || {};
foreach my $phrase ( values %{$phrases} ) {
$self->debug_dumper( "Unioning: ", $phrase ) if ($self->{_debug});
my $tmp = {};
foreach my $keyword ( @{ $phrase->{phrase}|| [] } ) {
$tmp = $self->_get_words ( [ $keyword ], $tmp, 'intersect' );
keys %$tmp or return {};
}
foreach my $key ( keys %$tmp ) { $results->{$key} += $tmp->{$key} }
}
return $results;
}
sub _get_phrase {
# ------------------------------------------------------------------------------
# one day change this so it does words properly
return _get_words(@_);
}
sub _union_query {
# ------------------------------------------------------------------------------
return _get_words(@_);
}
sub _intersect_query {
# ------------------------------------------------------------------------------
my ( $self, $keywords, $results ) = @_;
$keywords or return $results;
foreach my $keyword ( keys %{ $keywords || {} } ) {
$results = $self->_get_words ( [ $keyword ], $results, 'intersect' );
keys %$results or return {};
}
return $results;
}
sub _phrase_intersect_query {
# ------------------------------------------------------------------------------
my $self = shift;
my $phrases = shift or return $_[0];
my $results = shift || {};
my $tmp = $self->_phrase_query ( $phrases, $results );
keys %$results or return $tmp;
foreach my $key ( keys %$results ) {
if ( $tmp->{$key} ) {
$results->{$key} += $tmp->{$key};
}
else {
delete $results->{$key}
}
}
return $results;
}
sub _disjoin_query {
# ------------------------------------------------------------------------------
my $self = shift;
my $words = shift or return shift;
my $results = shift || {};
$results = $self->_get_words([ keys %{$words || {}} ], $results, 'disjoin' );
return $results;
}
sub _phrase_disjoin_query {
# ------------------------------------------------------------------------------
my $self = shift;
my $phrases = shift or return shift;
my $results = shift || {};
my $tmp = $self->_phrase_query ( $phrases, $results );
keys %$results or return $tmp;
foreach my $key ( keys %$results ) {
$tmp->{$key} and delete $results->{$key};
}
}
sub _get_words {
# ------------------------------------------------------------------------------
my $self = shift;
my $words = shift or return $_[0] || {};
my $results = shift || {};
my $mode = lc shift;
my $tbl = $self->{table} or $self->error( 'BADARGS', 'FATAL', 'Must have table object defined' );
my $tname = $tbl->name();
my $wordlist = join " ", ( ref $words ? ( ref $words eq 'HASH' ? keys %$words : @$words ) : $words ) ;
my ($pk) = $tbl->pk;
my %weights = $tbl->_weight_cols();
my $cols = join(",", keys %weights);
my $qwrds = quotemeta( $wordlist );
my $where = ( $results and keys %$results )
? ("AND $pk IN(" . join(',', keys %$results) . ")")
: '';
my $query = qq!
SELECT $pk, MATCH($cols) AGAINST ('$qwrds') AS SCORE
FROM $tname
WHERE MATCH($cols) AGAINST ('$qwrds')
$where
!;
my $sth = $tbl->do_query( $query ) or return;
if ( $mode eq 'disjoin' ) {
while ( my $result = $sth->fetchrow ) {
delete $results->{$result};
}
}
elsif ( $mode eq 'intersect' ) {
my $tmp = {};
while ( my $aref = $sth->fetchrow_arrayref ) {
$tmp->{$aref->[0]} = $aref->[1];
}
if ( $results and keys %$results ) {
while (my ($id, $score) = each %$results) {
if (not defined $tmp->{$id}) {
delete $results->{$id};
next;
}
$results->{$id} += $score;
}
}
else {
$results = $tmp;
}
}
else {
while ( my $aref = $sth->fetchrow_arrayref ) {
$results->{$aref->[0]} += $aref->[1];
}
}
return $results;
}
1;

View File

@ -0,0 +1,355 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Search::MYSQL::VER4
# Author : Aki Mimoto
# CVS Info : 087,071,086,086,085
# $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Class used to search indexed tables.
#
package GT::SQL::Search::MYSQL::VER4;
# ------------------------------------------------------------------------------
use strict;
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
use GT::SQL::Search::Base::Search;
@ISA = qw( GT::SQL::Search::Base::Search );
# ------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
$STOPWORDS = { map { $_ => 1 } qw/
a's able about above according accordingly across actually after
afterwards again against ain't all allow allows almost alone
along already also although always am among amongst an and another
any anybody anyhow anyone anything anyway anyways anywhere apart
appear appreciate appropriate are aren't around as aside ask asking
associated at available away awfully be became because become becomes
becoming been before beforehand behind being believe below beside
besides best better between beyond both brief but by c'mon c's came
can can't cannot cant cause causes certain certainly changes clearly
co com come comes concerning consequently consider considering
contain containing contains corresponding could couldn't course currently
definitely described despite did didn't different do does doesn't
doing don't done down downwards during each edu eg eight either else
elsewhere enough entirely especially et etc even ever every everybody
everyone everything everywhere ex exactly example except far few
fifth first five followed following follows for former formerly
forth four from further furthermore get gets getting given gives
go goes going gone got gotten greetings had hadn't happens hardly
has hasn't have haven't having he he's hello help hence her here
here's hereafter hereby herein hereupon hers herself hi him himself
his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
immediate in inasmuch inc indeed indicate indicated indicates inner
insofar instead into inward is isn't it it'd it'll it's its itself
just keep keeps kept know knows known last lately later latter latterly
least less lest let let's like liked likely little look looking looks
ltd mainly many may maybe me mean meanwhile merely might more
moreover most mostly much must my myself name namely nd near nearly
necessary need needs neither never nevertheless new next nine no
nobody non none noone nor normally not nothing novel now nowhere
obviously of off often oh ok okay old on once one ones only onto
or other others otherwise ought our ours ourselves out outside over
overall own particular particularly per perhaps placed please plus
possible presumably probably provides que quite qv rather rd re
really reasonably regarding regardless regards relatively respectively
right said same saw say saying says second secondly see seeing seem
seemed seeming seems seen self selves sensible sent serious seriously
seven several shall she should shouldn't since six so some somebody
somehow someone something sometime sometimes somewhat somewhere
soon sorry specified specify specifying still sub such sup sure
t's take taken tell tends th than thank thanks thanx that that's
thats the their theirs them themselves then thence there there's
thereafter thereby therefore therein theres thereupon these they
they'd they'll they're they've think third this thorough thoroughly
those though three through throughout thru thus to together too
took toward towards tried tries truly try trying twice two un
under unfortunately unless unlikely until unto up upon us use used
useful uses using usually value various very via viz vs want wants
was wasn't way we we'd we'll we're we've welcome well went were
weren't what what's whatever when whence whenever where where's
whereafter whereas whereby wherein whereupon wherever whether
which while whither who who's whoever whole whom whose why will
willing wish with within without won't wonder would would wouldn't
yes yet you you'd you'll you're you've your yours yourself
yourselves zero
/ };
$ATTRIBS = {
min_word_size => 4,
stopwords => $STOPWORDS,
};
sub query {
# --------------------------------------------------
# Returns a sth based on a query
#
# Options:
# - paging
# mh : max hits
# nh : number hit (or page of hits)
#
# - searching
# ww : whole word
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
# substring : search for substrings of words
# bool : 'and' => and search, 'or' => or search, '' => regular query
# query : the string of things to ask for
#
# - filtering
# field_name : value # Find all rows with field_name = value
# field_name : ">value" # Find all rows with field_name > value.
# field_name : "<value" # Find all rows with field_name < value.
# field_name-gt : value # Find all rows with field_name > value.
# field_name-lt : value # Find all rows with field_name < value.
#
# Parameters:
# ( $CGI ) : a single cgi object
# ( $HASH ) : a hash of the parameters
#
my $self = shift;
# create an easily accessible argument hash
my $args = $self->common_param(@_);
# see if we can setup the filtering constraints
my $filter = { %$args };
my $query = delete $args->{query} || $self->{query} || '';
my $ftr_cond;
# parse query
$self->debug( "Search Query: $query" ) if ($self->{_debug});
my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
$self->{rejected_keywords} = $rejected;
# setup the additional input parameters
$query_struct = $self->_preset_options( $query_struct, $args );
# now sort into distinct buckets
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
# with the buckets, it's now possible to create a query string
# that can be passed directly into the FULLTEXT search.
my $query_string = '';
foreach my $search_type ( keys %$buckets ) {
my $bucket = $buckets->{$search_type};
foreach my $token ( keys %$bucket ) {
next unless $token;
my $properties = $bucket->{$token} or next;
my $e = ' ';
# handle boolean operations
$properties->{mode} ||= '';
if ( $properties->{mode} eq 'must' ) {
$e .= '+';
}
elsif ( $properties->{mode} eq 'cannot' ) {
$e .= '-';
}
# deal with phrase vs keyword
if ( $properties->{phrase} ) {
$e .= '"' . quotemeta( $token ) . '"';
}
else {
$e .= quotemeta $token;
# substring match
$e .= '*' if $properties->{substring};
}
$query_string .= $e;
}
}
# start building the GT::SQL::COndition object that will allow us to
# to retreive the data
require GT::SQL::Condition;
my $tbl = $self->{table};
my $constraints = GT::SQL::Condition->new;
# create the GT::SQL::Condition object that will become the filtering
# constraints
my $filt = $self->{filter};
if ( $filt and ref $filt eq 'HASH' ) {
foreach my $fkey ( keys %$filt ) {
next if exists $args->{$fkey};
$args->{$fkey} = $filt->{$fkey};
}
}
if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
$constraints->add( $filter_cond );
}
# if the cached filter object is a Condition object, append
# it to the filter set
if ( $filt and UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
$constraints->add( $filt );
}
# create our fulltext query condition
my %weights = $tbl->_weight_cols();
my $cols = join(",", keys %weights);
if ( $query_string ) {
$constraints->add( GT::SQL::Condition->new(
"MATCH( $cols )",
"AGAINST",
\"('$query_string' IN BOOLEAN MODE)" ) );
}
# calculate the cursor constraints
foreach my $k (qw( nh mh so sb )) {
next if defined $args->{$k};
$args->{$k} = $self->{$k} || '';
}
$args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
$args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
$args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score';
# if the sorting method is "score" the order is forced to "descend" (as there
# is almost no reason to order by worst matches)
# if the storing key is not "score", the default order will be "ascend"
$args->{so} =
$args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
# check that sb is not dangerous
my $sb = $self->clean_sb($args->{sb}, $args->{so});
$self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
# Setup a limit only if there is no callback. The callback argument requires a full results list
unless ( $self->{callback} ) {
my $offset = ( $args->{nh} - 1 ) * $args->{mh};
$tbl->select_options($sb) if ($sb);
$tbl->select_options("LIMIT $offset, $args->{mh}");
}
my $sth;
# if the weights are all the same value, the query can be optimized
# to use just one MATCH AGAINST argument. However, if the weights
# are different, each element must be sectioned and queried separately
# with the weight value multipler
# check to see if all the weight values are the same.
my $base_weight;
my $weights_same = 1;
foreach ( values %weights ) {
$base_weight ||= $_ or next; # init and skip 0s
next if $base_weight == $_;
$weights_same = 0;
last;
}
# multiplex the action
my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
unless ( $query_string ) {
$sth = $tbl->select( [ $result_cols ], $constraints ) or return;
}
elsif ( $weights_same ) {
$sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
or return;
}
else {
# group the multiplier counts
my %column_multiplier;
foreach ( keys %weights ) {
push @{$column_multiplier{$weights{$_}}}, $_;
}
my @search_parameters;
foreach my $val ( keys %column_multiplier ) {
next unless $val;
my $cols_ar = $column_multiplier{ $val } or next;
my $search_cols = join ",", @$cols_ar;
if ( $val > 1 ) {
push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
}
else {
push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
}
}
my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
$sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
or return;
}
# If we have a callback, we fetch the primary key => score and pass that hash into
# the filter.
if ($self->{callback}) {
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
}
my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
$self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
my $filtered = $self->{callback}->($self, \%results) || {};
$self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
$self->{rows} = scalar keys %$filtered;
return $self->sth($filtered);
}
# count the number of hits. create a query for this purpose only if we are required to.
$self->{rows} = $sth->rows();
if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
$self->{rows} = $tbl->count($constraints);
}
return $sth;
}
sub clean_sb {
# -------------------------------------------------------------------------------
# Convert the sort by, sort order into an sql string.
#
my ($class, $sb, $so) = @_;
my $output = '';
return $output unless ($sb);
if ($sb and not ref $sb) {
if ($sb =~ /^[\w\s,]+$/) {
if ($sb =~ /\s(?:asc|desc)/i) {
$output = 'ORDER BY ' . $sb;
}
else {
$output = 'ORDER BY ' . $sb . ' ' . $so;
}
}
else {
$class->error('BADSB', 'WARN', $sb);
}
}
elsif (ref $sb eq 'ARRAY') {
foreach ( @$sb ) {
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
}
$output = 'ORDER BY ' . join(',', @$sb);
}
return $output;
}
1;