First pass at adding key files
This commit is contained in:
@ -0,0 +1,187 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Search::MYSQL::Indexer
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Indexer.pm,v 1.17 2004/08/28 03:53:49 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Class used to search indexed tables.
|
||||
#
|
||||
|
||||
package GT::SQL::Search::MYSQL::Indexer;
|
||||
# ------------------------------------------------------------------------------
|
||||
use strict;
|
||||
use vars qw/@ISA $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
|
||||
use GT::SQL::Search::Base::Indexer;
|
||||
@ISA = qw/GT::SQL::Search::Base::Indexer/;
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.17 $ =~ /(\d+)\.(\d+)/;
|
||||
|
||||
$ERRORS = {
|
||||
NOTFROMWEB => 'There are far too many records in table %s for create/destroy of this indexing scheme from the web. Please use alternative method.',
|
||||
MYSQLNONSUPPORT => 'Driver MYSQL requires MySQL version 3.23.23 or greater. Currently MySQL version: %s'
|
||||
};
|
||||
|
||||
@$GT::SQL::ERRORS{ keys %$ERRORS } = values %$ERRORS;
|
||||
|
||||
$ERROR_MESSAGE = 'GT::SQL';
|
||||
|
||||
sub load {
|
||||
my $class = shift;
|
||||
return $class->new(@_);
|
||||
}
|
||||
|
||||
sub ok {
|
||||
# ------------------------------------------------------------------------------
|
||||
my ($class, $tbl) = @_;
|
||||
unless (uc $tbl->{connect}->{driver} eq 'MYSQL') {
|
||||
return $class->error ('MYSQLNONSUPPORT', 'WARN', $tbl->{connect}->{driver});
|
||||
}
|
||||
my $sth = $tbl->do_query(qq!SELECT VERSION()!);
|
||||
my $version = $sth->fetchrow;
|
||||
my ($maj, $min) = split (/\./, $version);
|
||||
unless ($maj > 3 or ($maj == 3 and $min >= 23)) {
|
||||
return $class->error(MYSQLNONSUPPORT => WARN => $version);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub drop_search_driver {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
|
||||
$self->too_much() and return;
|
||||
|
||||
my $tbl = $self->{table} or return;
|
||||
$tbl->connect();
|
||||
|
||||
my %weights = $tbl->weight() or return;
|
||||
my $tblname = $tbl->name();
|
||||
|
||||
# Group the fulltext columns by value of the weight
|
||||
my %cols_grouped;
|
||||
foreach ( keys %weights ) {
|
||||
my $val = $weights{$_} or next;
|
||||
push @{$cols_grouped{$val}}, $_;
|
||||
}
|
||||
|
||||
# Drop unified fulltext columns if required
|
||||
if ( keys %cols_grouped > 1 ) {
|
||||
$cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
|
||||
}
|
||||
|
||||
# For each value grouped column set create a full text
|
||||
# column
|
||||
foreach my $v ( keys %cols_grouped ) {
|
||||
|
||||
my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
|
||||
|
||||
my $res = eval {
|
||||
$tbl->do_query(qq!
|
||||
ALTER TABLE $tblname
|
||||
DROP INDEX $ft_name
|
||||
!);
|
||||
};
|
||||
|
||||
# Break on errors that can't be handled
|
||||
if ( $@ ) {
|
||||
next if $@ !~ /exist/i;
|
||||
$self->warn( "$@" );
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub add_search_driver {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
|
||||
$self->too_much() and return;
|
||||
|
||||
my $tbl = $self->{table} or return $self->error(BADARGS => FATAL => "table must be passed into add_search_driver.");
|
||||
my %weights = $tbl->weight() or return $self->error(NOWEIGHTS => 'WARN');
|
||||
my $tblname = $tbl->name() or return $self->error(BADARGS => FATAL => "table does not have a name?");
|
||||
|
||||
# group the fulltext columns by value of the weight
|
||||
my %cols_grouped;
|
||||
foreach ( keys %weights ) {
|
||||
my $val = $weights{$_} or next;
|
||||
push @{$cols_grouped{$val}}, $_;
|
||||
}
|
||||
|
||||
# Create unified fulltext columns if required
|
||||
if ( keys %cols_grouped > 1 ) {
|
||||
$cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
|
||||
}
|
||||
|
||||
# for each value grouped column set create a full text
|
||||
# column
|
||||
foreach my $v ( keys %cols_grouped ) {
|
||||
|
||||
my $cols = join(",", sort @{$cols_grouped{$v}});
|
||||
my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
|
||||
|
||||
my $res = eval {
|
||||
$tbl->do_query(qq!
|
||||
ALTER TABLE $tblname
|
||||
ADD FULLTEXT $ft_name ( $cols )
|
||||
!);
|
||||
};
|
||||
|
||||
# break on errors that can't be handled
|
||||
if ( $@ ) {
|
||||
next if $@ =~ /duplicate/i;
|
||||
$self->warn( "$@" );
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
sub too_much {
|
||||
# ------------------------------------------------------------------------------
|
||||
# returns true if there are too many records to be used on the Web
|
||||
#
|
||||
if ( $ENV{REQUEST_METHOD} ) {
|
||||
my $self = shift;
|
||||
my $tbl = $self->{table};
|
||||
if ( $tbl->count() > 5000 ) {
|
||||
$self->error( 'NOTFROMWEB', 'WARN', $tbl->name() );
|
||||
return 1
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sub post_create_table {
|
||||
# ------------------------------------------------------------------------------
|
||||
shift->add_search_driver(@_);
|
||||
}
|
||||
|
||||
sub reindex_all {
|
||||
# ------------------------------------------------------------------------------
|
||||
# this will drop all the fulltext columns and reindex all of them. This should
|
||||
# not be required unless the user changes the weights on one of their columns.
|
||||
# Unfortunately, this method is not particularly smart and risks not dropping
|
||||
# certain index columns and reindexes even when it's not required. It must be
|
||||
# recoded at a future date, but as this action won't happen frequently and will
|
||||
# rarely affect the user, it is not a priority.
|
||||
#
|
||||
my $self = shift;
|
||||
|
||||
$self->drop_search_driver;
|
||||
$self->add_search_driver;
|
||||
}
|
||||
|
||||
1;
|
@ -0,0 +1,51 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Search::MYSQL::Search
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Search.pm,v 1.14 2004/01/13 01:35:19 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Class used to search indexed tables.
|
||||
#
|
||||
|
||||
package GT::SQL::Search::MYSQL::Search;
|
||||
# ------------------------------------------------------------------------------
|
||||
use strict;
|
||||
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
|
||||
use GT::SQL::Search::Base::Search;
|
||||
@ISA = qw( GT::SQL::Search::Base::Search );
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.14 $ =~ /(\d+)\.(\d+)/;
|
||||
$ATTRIBS = {
|
||||
min_word_size => 4
|
||||
};
|
||||
|
||||
sub load {
|
||||
# --------------------------------------------------
|
||||
my $self = shift;
|
||||
my $opts = $self->common_param( @_ );
|
||||
|
||||
# determine which mysql search variant to use.
|
||||
my $tbl = $opts->{table};
|
||||
my $ver_sth = $tbl->do_query( 'SELECT VERSION()' );
|
||||
my $version = $ver_sth->fetchrow_array();
|
||||
|
||||
my ( $maj, $min ) = split /\./, $version;
|
||||
|
||||
my $pkg = 'GT::SQL::Search::MYSQL::';
|
||||
$pkg .= $maj > 3 ? 'VER4' : 'VER3';
|
||||
|
||||
eval "require $pkg";
|
||||
return $pkg->new(@_)
|
||||
}
|
||||
|
||||
1;
|
178
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER3.pm
Normal file
178
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER3.pm
Normal file
@ -0,0 +1,178 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Search::MYSQL::VER3
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: VER3.pm,v 1.3 2004/08/28 03:53:49 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Class used to search indexed tables.
|
||||
#
|
||||
|
||||
package GT::SQL::Search::MYSQL::VER3;
|
||||
# ------------------------------------------------------------------------------
|
||||
use strict;
|
||||
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
|
||||
use GT::SQL::Search::Base::Search;
|
||||
@ISA = qw( GT::SQL::Search::Base::Search );
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/;
|
||||
$ATTRIBS = {
|
||||
min_word_size => 4
|
||||
};
|
||||
|
||||
sub _phrase_query {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $phrases = shift or return $_[0];
|
||||
my $results = shift || {};
|
||||
|
||||
foreach my $phrase ( values %{$phrases} ) {
|
||||
$self->debug_dumper( "Unioning: ", $phrase ) if ($self->{_debug});
|
||||
|
||||
my $tmp = {};
|
||||
foreach my $keyword ( @{ $phrase->{phrase}|| [] } ) {
|
||||
$tmp = $self->_get_words ( [ $keyword ], $tmp, 'intersect' );
|
||||
keys %$tmp or return {};
|
||||
}
|
||||
foreach my $key ( keys %$tmp ) { $results->{$key} += $tmp->{$key} }
|
||||
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
sub _get_phrase {
|
||||
# ------------------------------------------------------------------------------
|
||||
# one day change this so it does words properly
|
||||
return _get_words(@_);
|
||||
}
|
||||
|
||||
sub _union_query {
|
||||
# ------------------------------------------------------------------------------
|
||||
return _get_words(@_);
|
||||
}
|
||||
|
||||
sub _intersect_query {
|
||||
# ------------------------------------------------------------------------------
|
||||
my ( $self, $keywords, $results ) = @_;
|
||||
$keywords or return $results;
|
||||
|
||||
foreach my $keyword ( keys %{ $keywords || {} } ) {
|
||||
$results = $self->_get_words ( [ $keyword ], $results, 'intersect' );
|
||||
keys %$results or return {};
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
sub _phrase_intersect_query {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $phrases = shift or return $_[0];
|
||||
my $results = shift || {};
|
||||
|
||||
my $tmp = $self->_phrase_query ( $phrases, $results );
|
||||
keys %$results or return $tmp;
|
||||
foreach my $key ( keys %$results ) {
|
||||
if ( $tmp->{$key} ) {
|
||||
$results->{$key} += $tmp->{$key};
|
||||
}
|
||||
else {
|
||||
delete $results->{$key}
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
sub _disjoin_query {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $words = shift or return shift;
|
||||
my $results = shift || {};
|
||||
|
||||
$results = $self->_get_words([ keys %{$words || {}} ], $results, 'disjoin' );
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
sub _phrase_disjoin_query {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $phrases = shift or return shift;
|
||||
my $results = shift || {};
|
||||
|
||||
my $tmp = $self->_phrase_query ( $phrases, $results );
|
||||
keys %$results or return $tmp;
|
||||
foreach my $key ( keys %$results ) {
|
||||
$tmp->{$key} and delete $results->{$key};
|
||||
}
|
||||
}
|
||||
|
||||
sub _get_words {
|
||||
# ------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $words = shift or return $_[0] || {};
|
||||
my $results = shift || {};
|
||||
my $mode = lc shift;
|
||||
|
||||
my $tbl = $self->{table} or $self->error( 'BADARGS', 'FATAL', 'Must have table object defined' );
|
||||
my $tname = $tbl->name();
|
||||
my $wordlist = join " ", ( ref $words ? ( ref $words eq 'HASH' ? keys %$words : @$words ) : $words ) ;
|
||||
my ($pk) = $tbl->pk;
|
||||
|
||||
my %weights = $tbl->_weight_cols();
|
||||
my $cols = join(",", keys %weights);
|
||||
my $qwrds = quotemeta( $wordlist );
|
||||
my $where = ( $results and keys %$results )
|
||||
? ("AND $pk IN(" . join(',', keys %$results) . ")")
|
||||
: '';
|
||||
my $query = qq!
|
||||
SELECT $pk, MATCH($cols) AGAINST ('$qwrds') AS SCORE
|
||||
FROM $tname
|
||||
WHERE MATCH($cols) AGAINST ('$qwrds')
|
||||
$where
|
||||
!;
|
||||
my $sth = $tbl->do_query( $query ) or return;
|
||||
|
||||
if ( $mode eq 'disjoin' ) {
|
||||
while ( my $result = $sth->fetchrow ) {
|
||||
delete $results->{$result};
|
||||
}
|
||||
}
|
||||
elsif ( $mode eq 'intersect' ) {
|
||||
my $tmp = {};
|
||||
while ( my $aref = $sth->fetchrow_arrayref ) {
|
||||
$tmp->{$aref->[0]} = $aref->[1];
|
||||
}
|
||||
if ( $results and keys %$results ) {
|
||||
while (my ($id, $score) = each %$results) {
|
||||
if (not defined $tmp->{$id}) {
|
||||
delete $results->{$id};
|
||||
next;
|
||||
}
|
||||
$results->{$id} += $score;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$results = $tmp;
|
||||
}
|
||||
}
|
||||
else {
|
||||
while ( my $aref = $sth->fetchrow_arrayref ) {
|
||||
$results->{$aref->[0]} += $aref->[1];
|
||||
}
|
||||
}
|
||||
return $results;
|
||||
}
|
||||
|
||||
1;
|
355
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
Normal file
355
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
Normal file
@ -0,0 +1,355 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Search::MYSQL::VER4
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Class used to search indexed tables.
|
||||
#
|
||||
|
||||
package GT::SQL::Search::MYSQL::VER4;
|
||||
# ------------------------------------------------------------------------------
|
||||
use strict;
|
||||
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
|
||||
use GT::SQL::Search::Base::Search;
|
||||
@ISA = qw( GT::SQL::Search::Base::Search );
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
|
||||
$STOPWORDS = { map { $_ => 1 } qw/
|
||||
|
||||
a's able about above according accordingly across actually after
|
||||
afterwards again against ain't all allow allows almost alone
|
||||
along already also although always am among amongst an and another
|
||||
any anybody anyhow anyone anything anyway anyways anywhere apart
|
||||
appear appreciate appropriate are aren't around as aside ask asking
|
||||
associated at available away awfully be became because become becomes
|
||||
becoming been before beforehand behind being believe below beside
|
||||
besides best better between beyond both brief but by c'mon c's came
|
||||
can can't cannot cant cause causes certain certainly changes clearly
|
||||
co com come comes concerning consequently consider considering
|
||||
contain containing contains corresponding could couldn't course currently
|
||||
definitely described despite did didn't different do does doesn't
|
||||
doing don't done down downwards during each edu eg eight either else
|
||||
elsewhere enough entirely especially et etc even ever every everybody
|
||||
everyone everything everywhere ex exactly example except far few
|
||||
fifth first five followed following follows for former formerly
|
||||
forth four from further furthermore get gets getting given gives
|
||||
go goes going gone got gotten greetings had hadn't happens hardly
|
||||
has hasn't have haven't having he he's hello help hence her here
|
||||
here's hereafter hereby herein hereupon hers herself hi him himself
|
||||
his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
|
||||
immediate in inasmuch inc indeed indicate indicated indicates inner
|
||||
insofar instead into inward is isn't it it'd it'll it's its itself
|
||||
just keep keeps kept know knows known last lately later latter latterly
|
||||
least less lest let let's like liked likely little look looking looks
|
||||
ltd mainly many may maybe me mean meanwhile merely might more
|
||||
moreover most mostly much must my myself name namely nd near nearly
|
||||
necessary need needs neither never nevertheless new next nine no
|
||||
nobody non none noone nor normally not nothing novel now nowhere
|
||||
obviously of off often oh ok okay old on once one ones only onto
|
||||
or other others otherwise ought our ours ourselves out outside over
|
||||
overall own particular particularly per perhaps placed please plus
|
||||
possible presumably probably provides que quite qv rather rd re
|
||||
really reasonably regarding regardless regards relatively respectively
|
||||
right said same saw say saying says second secondly see seeing seem
|
||||
seemed seeming seems seen self selves sensible sent serious seriously
|
||||
seven several shall she should shouldn't since six so some somebody
|
||||
somehow someone something sometime sometimes somewhat somewhere
|
||||
soon sorry specified specify specifying still sub such sup sure
|
||||
t's take taken tell tends th than thank thanks thanx that that's
|
||||
thats the their theirs them themselves then thence there there's
|
||||
thereafter thereby therefore therein theres thereupon these they
|
||||
they'd they'll they're they've think third this thorough thoroughly
|
||||
those though three through throughout thru thus to together too
|
||||
took toward towards tried tries truly try trying twice two un
|
||||
under unfortunately unless unlikely until unto up upon us use used
|
||||
useful uses using usually value various very via viz vs want wants
|
||||
was wasn't way we we'd we'll we're we've welcome well went were
|
||||
weren't what what's whatever when whence whenever where where's
|
||||
whereafter whereas whereby wherein whereupon wherever whether
|
||||
which while whither who who's whoever whole whom whose why will
|
||||
willing wish with within without won't wonder would would wouldn't
|
||||
yes yet you you'd you'll you're you've your yours yourself
|
||||
yourselves zero
|
||||
|
||||
/ };
|
||||
|
||||
$ATTRIBS = {
|
||||
min_word_size => 4,
|
||||
stopwords => $STOPWORDS,
|
||||
};
|
||||
|
||||
sub query {
|
||||
# --------------------------------------------------
|
||||
# Returns a sth based on a query
|
||||
#
|
||||
# Options:
|
||||
# - paging
|
||||
# mh : max hits
|
||||
# nh : number hit (or page of hits)
|
||||
#
|
||||
# - searching
|
||||
# ww : whole word
|
||||
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
|
||||
# substring : search for substrings of words
|
||||
# bool : 'and' => and search, 'or' => or search, '' => regular query
|
||||
# query : the string of things to ask for
|
||||
#
|
||||
# - filtering
|
||||
# field_name : value # Find all rows with field_name = value
|
||||
# field_name : ">value" # Find all rows with field_name > value.
|
||||
# field_name : "<value" # Find all rows with field_name < value.
|
||||
# field_name-gt : value # Find all rows with field_name > value.
|
||||
# field_name-lt : value # Find all rows with field_name < value.
|
||||
#
|
||||
# Parameters:
|
||||
# ( $CGI ) : a single cgi object
|
||||
# ( $HASH ) : a hash of the parameters
|
||||
#
|
||||
my $self = shift;
|
||||
|
||||
# create an easily accessible argument hash
|
||||
my $args = $self->common_param(@_);
|
||||
|
||||
# see if we can setup the filtering constraints
|
||||
my $filter = { %$args };
|
||||
my $query = delete $args->{query} || $self->{query} || '';
|
||||
my $ftr_cond;
|
||||
|
||||
# parse query
|
||||
$self->debug( "Search Query: $query" ) if ($self->{_debug});
|
||||
my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
|
||||
|
||||
$self->{rejected_keywords} = $rejected;
|
||||
|
||||
# setup the additional input parameters
|
||||
$query_struct = $self->_preset_options( $query_struct, $args );
|
||||
|
||||
# now sort into distinct buckets
|
||||
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
|
||||
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
|
||||
|
||||
# with the buckets, it's now possible to create a query string
|
||||
# that can be passed directly into the FULLTEXT search.
|
||||
my $query_string = '';
|
||||
|
||||
foreach my $search_type ( keys %$buckets ) {
|
||||
my $bucket = $buckets->{$search_type};
|
||||
foreach my $token ( keys %$bucket ) {
|
||||
next unless $token;
|
||||
my $properties = $bucket->{$token} or next;
|
||||
|
||||
my $e = ' ';
|
||||
|
||||
# handle boolean operations
|
||||
$properties->{mode} ||= '';
|
||||
if ( $properties->{mode} eq 'must' ) {
|
||||
$e .= '+';
|
||||
}
|
||||
elsif ( $properties->{mode} eq 'cannot' ) {
|
||||
$e .= '-';
|
||||
}
|
||||
|
||||
# deal with phrase vs keyword
|
||||
if ( $properties->{phrase} ) {
|
||||
$e .= '"' . quotemeta( $token ) . '"';
|
||||
}
|
||||
else {
|
||||
$e .= quotemeta $token;
|
||||
|
||||
# substring match
|
||||
$e .= '*' if $properties->{substring};
|
||||
}
|
||||
|
||||
$query_string .= $e;
|
||||
}
|
||||
}
|
||||
|
||||
# start building the GT::SQL::COndition object that will allow us to
|
||||
# to retreive the data
|
||||
|
||||
require GT::SQL::Condition;
|
||||
my $tbl = $self->{table};
|
||||
my $constraints = GT::SQL::Condition->new;
|
||||
|
||||
# create the GT::SQL::Condition object that will become the filtering
|
||||
# constraints
|
||||
my $filt = $self->{filter};
|
||||
|
||||
if ( $filt and ref $filt eq 'HASH' ) {
|
||||
foreach my $fkey ( keys %$filt ) {
|
||||
next if exists $args->{$fkey};
|
||||
$args->{$fkey} = $filt->{$fkey};
|
||||
}
|
||||
}
|
||||
|
||||
if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
|
||||
$constraints->add( $filter_cond );
|
||||
}
|
||||
|
||||
# if the cached filter object is a Condition object, append
|
||||
# it to the filter set
|
||||
if ( $filt and UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
|
||||
$constraints->add( $filt );
|
||||
}
|
||||
|
||||
# create our fulltext query condition
|
||||
my %weights = $tbl->_weight_cols();
|
||||
my $cols = join(",", keys %weights);
|
||||
if ( $query_string ) {
|
||||
$constraints->add( GT::SQL::Condition->new(
|
||||
"MATCH( $cols )",
|
||||
"AGAINST",
|
||||
\"('$query_string' IN BOOLEAN MODE)" ) );
|
||||
}
|
||||
|
||||
# calculate the cursor constraints
|
||||
foreach my $k (qw( nh mh so sb )) {
|
||||
next if defined $args->{$k};
|
||||
$args->{$k} = $self->{$k} || '';
|
||||
}
|
||||
$args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
|
||||
$args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
|
||||
$args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score';
|
||||
|
||||
# if the sorting method is "score" the order is forced to "descend" (as there
|
||||
# is almost no reason to order by worst matches)
|
||||
# if the storing key is not "score", the default order will be "ascend"
|
||||
$args->{so} =
|
||||
$args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
|
||||
( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
|
||||
|
||||
# check that sb is not dangerous
|
||||
my $sb = $self->clean_sb($args->{sb}, $args->{so});
|
||||
|
||||
$self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
|
||||
|
||||
# Setup a limit only if there is no callback. The callback argument requires a full results list
|
||||
unless ( $self->{callback} ) {
|
||||
my $offset = ( $args->{nh} - 1 ) * $args->{mh};
|
||||
$tbl->select_options($sb) if ($sb);
|
||||
$tbl->select_options("LIMIT $offset, $args->{mh}");
|
||||
}
|
||||
|
||||
my $sth;
|
||||
|
||||
# if the weights are all the same value, the query can be optimized
|
||||
# to use just one MATCH AGAINST argument. However, if the weights
|
||||
# are different, each element must be sectioned and queried separately
|
||||
# with the weight value multipler
|
||||
|
||||
# check to see if all the weight values are the same.
|
||||
my $base_weight;
|
||||
my $weights_same = 1;
|
||||
foreach ( values %weights ) {
|
||||
$base_weight ||= $_ or next; # init and skip 0s
|
||||
next if $base_weight == $_;
|
||||
$weights_same = 0;
|
||||
last;
|
||||
}
|
||||
|
||||
# multiplex the action
|
||||
my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
|
||||
|
||||
unless ( $query_string ) {
|
||||
$sth = $tbl->select( [ $result_cols ], $constraints ) or return;
|
||||
}
|
||||
elsif ( $weights_same ) {
|
||||
$sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
|
||||
or return;
|
||||
}
|
||||
else {
|
||||
|
||||
# group the multiplier counts
|
||||
my %column_multiplier;
|
||||
foreach ( keys %weights ) {
|
||||
push @{$column_multiplier{$weights{$_}}}, $_;
|
||||
}
|
||||
|
||||
my @search_parameters;
|
||||
foreach my $val ( keys %column_multiplier ) {
|
||||
next unless $val;
|
||||
|
||||
my $cols_ar = $column_multiplier{ $val } or next;
|
||||
my $search_cols = join ",", @$cols_ar;
|
||||
|
||||
if ( $val > 1 ) {
|
||||
push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
|
||||
}
|
||||
else {
|
||||
push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
|
||||
}
|
||||
}
|
||||
|
||||
my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
|
||||
|
||||
$sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
|
||||
or return;
|
||||
}
|
||||
|
||||
# If we have a callback, we fetch the primary key => score and pass that hash into
|
||||
# the filter.
|
||||
if ($self->{callback}) {
|
||||
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
|
||||
return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
|
||||
}
|
||||
my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
|
||||
|
||||
$self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
|
||||
my $filtered = $self->{callback}->($self, \%results) || {};
|
||||
$self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
|
||||
|
||||
$self->{rows} = scalar keys %$filtered;
|
||||
return $self->sth($filtered);
|
||||
}
|
||||
|
||||
# count the number of hits. create a query for this purpose only if we are required to.
|
||||
$self->{rows} = $sth->rows();
|
||||
if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
|
||||
$self->{rows} = $tbl->count($constraints);
|
||||
}
|
||||
return $sth;
|
||||
}
|
||||
|
||||
sub clean_sb {
|
||||
# -------------------------------------------------------------------------------
|
||||
# Convert the sort by, sort order into an sql string.
|
||||
#
|
||||
my ($class, $sb, $so) = @_;
|
||||
my $output = '';
|
||||
|
||||
return $output unless ($sb);
|
||||
|
||||
if ($sb and not ref $sb) {
|
||||
if ($sb =~ /^[\w\s,]+$/) {
|
||||
if ($sb =~ /\s(?:asc|desc)/i) {
|
||||
$output = 'ORDER BY ' . $sb;
|
||||
}
|
||||
else {
|
||||
$output = 'ORDER BY ' . $sb . ' ' . $so;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$class->error('BADSB', 'WARN', $sb);
|
||||
}
|
||||
}
|
||||
elsif (ref $sb eq 'ARRAY') {
|
||||
foreach ( @$sb ) {
|
||||
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
|
||||
}
|
||||
$output = 'ORDER BY ' . join(',', @$sb);
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user