356 lines
14 KiB
Perl
356 lines
14 KiB
Perl
# ==================================================================
|
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
|
#
|
|
# GT::Search::MYSQL::VER4
|
|
# Author : Aki Mimoto
|
|
# CVS Info : 087,071,086,086,085
|
|
# $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
|
|
#
|
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
# ==================================================================
|
|
#
|
|
# Description:
|
|
# Class used to search indexed tables.
|
|
#
|
|
|
|
package GT::SQL::Search::MYSQL::VER4;
|
|
# ------------------------------------------------------------------------------
|
|
use strict;
|
|
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
|
|
use GT::SQL::Search::Base::Search;
|
|
@ISA = qw( GT::SQL::Search::Base::Search );
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# Preamble information related to the object
|
|
|
|
$DEBUG = 0;
|
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
|
|
$STOPWORDS = { map { $_ => 1 } qw/
|
|
|
|
a's able about above according accordingly across actually after
|
|
afterwards again against ain't all allow allows almost alone
|
|
along already also although always am among amongst an and another
|
|
any anybody anyhow anyone anything anyway anyways anywhere apart
|
|
appear appreciate appropriate are aren't around as aside ask asking
|
|
associated at available away awfully be became because become becomes
|
|
becoming been before beforehand behind being believe below beside
|
|
besides best better between beyond both brief but by c'mon c's came
|
|
can can't cannot cant cause causes certain certainly changes clearly
|
|
co com come comes concerning consequently consider considering
|
|
contain containing contains corresponding could couldn't course currently
|
|
definitely described despite did didn't different do does doesn't
|
|
doing don't done down downwards during each edu eg eight either else
|
|
elsewhere enough entirely especially et etc even ever every everybody
|
|
everyone everything everywhere ex exactly example except far few
|
|
fifth first five followed following follows for former formerly
|
|
forth four from further furthermore get gets getting given gives
|
|
go goes going gone got gotten greetings had hadn't happens hardly
|
|
has hasn't have haven't having he he's hello help hence her here
|
|
here's hereafter hereby herein hereupon hers herself hi him himself
|
|
his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
|
|
immediate in inasmuch inc indeed indicate indicated indicates inner
|
|
insofar instead into inward is isn't it it'd it'll it's its itself
|
|
just keep keeps kept know knows known last lately later latter latterly
|
|
least less lest let let's like liked likely little look looking looks
|
|
ltd mainly many may maybe me mean meanwhile merely might more
|
|
moreover most mostly much must my myself name namely nd near nearly
|
|
necessary need needs neither never nevertheless new next nine no
|
|
nobody non none noone nor normally not nothing novel now nowhere
|
|
obviously of off often oh ok okay old on once one ones only onto
|
|
or other others otherwise ought our ours ourselves out outside over
|
|
overall own particular particularly per perhaps placed please plus
|
|
possible presumably probably provides que quite qv rather rd re
|
|
really reasonably regarding regardless regards relatively respectively
|
|
right said same saw say saying says second secondly see seeing seem
|
|
seemed seeming seems seen self selves sensible sent serious seriously
|
|
seven several shall she should shouldn't since six so some somebody
|
|
somehow someone something sometime sometimes somewhat somewhere
|
|
soon sorry specified specify specifying still sub such sup sure
|
|
t's take taken tell tends th than thank thanks thanx that that's
|
|
thats the their theirs them themselves then thence there there's
|
|
thereafter thereby therefore therein theres thereupon these they
|
|
they'd they'll they're they've think third this thorough thoroughly
|
|
those though three through throughout thru thus to together too
|
|
took toward towards tried tries truly try trying twice two un
|
|
under unfortunately unless unlikely until unto up upon us use used
|
|
useful uses using usually value various very via viz vs want wants
|
|
was wasn't way we we'd we'll we're we've welcome well went were
|
|
weren't what what's whatever when whence whenever where where's
|
|
whereafter whereas whereby wherein whereupon wherever whether
|
|
which while whither who who's whoever whole whom whose why will
|
|
willing wish with within without won't wonder would would wouldn't
|
|
yes yet you you'd you'll you're you've your yours yourself
|
|
yourselves zero
|
|
|
|
/ };
|
|
|
|
$ATTRIBS = {
|
|
min_word_size => 4,
|
|
stopwords => $STOPWORDS,
|
|
};
|
|
|
|
sub query {
|
|
# --------------------------------------------------
|
|
# Returns a sth based on a query
|
|
#
|
|
# Options:
|
|
# - paging
|
|
# mh : max hits
|
|
# nh : number hit (or page of hits)
|
|
#
|
|
# - searching
|
|
# ww : whole word
|
|
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
|
|
# substring : search for substrings of words
|
|
# bool : 'and' => and search, 'or' => or search, '' => regular query
|
|
# query : the string of things to ask for
|
|
#
|
|
# - filtering
|
|
# field_name : value # Find all rows with field_name = value
|
|
# field_name : ">value" # Find all rows with field_name > value.
|
|
# field_name : "<value" # Find all rows with field_name < value.
|
|
# field_name-gt : value # Find all rows with field_name > value.
|
|
# field_name-lt : value # Find all rows with field_name < value.
|
|
#
|
|
# Parameters:
|
|
# ( $CGI ) : a single cgi object
|
|
# ( $HASH ) : a hash of the parameters
|
|
#
|
|
my $self = shift;
|
|
|
|
# create an easily accessible argument hash
|
|
my $args = $self->common_param(@_);
|
|
|
|
# see if we can setup the filtering constraints
|
|
my $filter = { %$args };
|
|
my $query = delete $args->{query} || $self->{query} || '';
|
|
my $ftr_cond;
|
|
|
|
# parse query
|
|
$self->debug( "Search Query: $query" ) if ($self->{_debug});
|
|
my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
|
|
|
|
$self->{rejected_keywords} = $rejected;
|
|
|
|
# setup the additional input parameters
|
|
$query_struct = $self->_preset_options( $query_struct, $args );
|
|
|
|
# now sort into distinct buckets
|
|
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
|
|
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
|
|
|
|
# with the buckets, it's now possible to create a query string
|
|
# that can be passed directly into the FULLTEXT search.
|
|
my $query_string = '';
|
|
|
|
foreach my $search_type ( keys %$buckets ) {
|
|
my $bucket = $buckets->{$search_type};
|
|
foreach my $token ( keys %$bucket ) {
|
|
next unless $token;
|
|
my $properties = $bucket->{$token} or next;
|
|
|
|
my $e = ' ';
|
|
|
|
# handle boolean operations
|
|
$properties->{mode} ||= '';
|
|
if ( $properties->{mode} eq 'must' ) {
|
|
$e .= '+';
|
|
}
|
|
elsif ( $properties->{mode} eq 'cannot' ) {
|
|
$e .= '-';
|
|
}
|
|
|
|
# deal with phrase vs keyword
|
|
if ( $properties->{phrase} ) {
|
|
$e .= '"' . quotemeta( $token ) . '"';
|
|
}
|
|
else {
|
|
$e .= quotemeta $token;
|
|
|
|
# substring match
|
|
$e .= '*' if $properties->{substring};
|
|
}
|
|
|
|
$query_string .= $e;
|
|
}
|
|
}
|
|
|
|
# start building the GT::SQL::COndition object that will allow us to
|
|
# to retreive the data
|
|
|
|
require GT::SQL::Condition;
|
|
my $tbl = $self->{table};
|
|
my $constraints = GT::SQL::Condition->new;
|
|
|
|
# create the GT::SQL::Condition object that will become the filtering
|
|
# constraints
|
|
my $filt = $self->{filter};
|
|
|
|
if ( $filt and ref $filt eq 'HASH' ) {
|
|
foreach my $fkey ( keys %$filt ) {
|
|
next if exists $args->{$fkey};
|
|
$args->{$fkey} = $filt->{$fkey};
|
|
}
|
|
}
|
|
|
|
if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
|
|
$constraints->add( $filter_cond );
|
|
}
|
|
|
|
# if the cached filter object is a Condition object, append
|
|
# it to the filter set
|
|
if ( $filt and UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
|
|
$constraints->add( $filt );
|
|
}
|
|
|
|
# create our fulltext query condition
|
|
my %weights = $tbl->_weight_cols();
|
|
my $cols = join(",", keys %weights);
|
|
if ( $query_string ) {
|
|
$constraints->add( GT::SQL::Condition->new(
|
|
"MATCH( $cols )",
|
|
"AGAINST",
|
|
\"('$query_string' IN BOOLEAN MODE)" ) );
|
|
}
|
|
|
|
# calculate the cursor constraints
|
|
foreach my $k (qw( nh mh so sb )) {
|
|
next if defined $args->{$k};
|
|
$args->{$k} = $self->{$k} || '';
|
|
}
|
|
$args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
|
|
$args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
|
|
$args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score';
|
|
|
|
# if the sorting method is "score" the order is forced to "descend" (as there
|
|
# is almost no reason to order by worst matches)
|
|
# if the storing key is not "score", the default order will be "ascend"
|
|
$args->{so} =
|
|
$args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
|
|
( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
|
|
|
|
# check that sb is not dangerous
|
|
my $sb = $self->clean_sb($args->{sb}, $args->{so});
|
|
|
|
$self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
|
|
|
|
# Setup a limit only if there is no callback. The callback argument requires a full results list
|
|
unless ( $self->{callback} ) {
|
|
my $offset = ( $args->{nh} - 1 ) * $args->{mh};
|
|
$tbl->select_options($sb) if ($sb);
|
|
$tbl->select_options("LIMIT $offset, $args->{mh}");
|
|
}
|
|
|
|
my $sth;
|
|
|
|
# if the weights are all the same value, the query can be optimized
|
|
# to use just one MATCH AGAINST argument. However, if the weights
|
|
# are different, each element must be sectioned and queried separately
|
|
# with the weight value multipler
|
|
|
|
# check to see if all the weight values are the same.
|
|
my $base_weight;
|
|
my $weights_same = 1;
|
|
foreach ( values %weights ) {
|
|
$base_weight ||= $_ or next; # init and skip 0s
|
|
next if $base_weight == $_;
|
|
$weights_same = 0;
|
|
last;
|
|
}
|
|
|
|
# multiplex the action
|
|
my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
|
|
|
|
unless ( $query_string ) {
|
|
$sth = $tbl->select( [ $result_cols ], $constraints ) or return;
|
|
}
|
|
elsif ( $weights_same ) {
|
|
$sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
|
|
or return;
|
|
}
|
|
else {
|
|
|
|
# group the multiplier counts
|
|
my %column_multiplier;
|
|
foreach ( keys %weights ) {
|
|
push @{$column_multiplier{$weights{$_}}}, $_;
|
|
}
|
|
|
|
my @search_parameters;
|
|
foreach my $val ( keys %column_multiplier ) {
|
|
next unless $val;
|
|
|
|
my $cols_ar = $column_multiplier{ $val } or next;
|
|
my $search_cols = join ",", @$cols_ar;
|
|
|
|
if ( $val > 1 ) {
|
|
push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
|
|
}
|
|
else {
|
|
push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
|
|
}
|
|
}
|
|
|
|
my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
|
|
|
|
$sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
|
|
or return;
|
|
}
|
|
|
|
# If we have a callback, we fetch the primary key => score and pass that hash into
|
|
# the filter.
|
|
if ($self->{callback}) {
|
|
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
|
|
return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
|
|
}
|
|
my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
|
|
|
|
$self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
|
|
my $filtered = $self->{callback}->($self, \%results) || {};
|
|
$self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
|
|
|
|
$self->{rows} = scalar keys %$filtered;
|
|
return $self->sth($filtered);
|
|
}
|
|
|
|
# count the number of hits. create a query for this purpose only if we are required to.
|
|
$self->{rows} = $sth->rows();
|
|
if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
|
|
$self->{rows} = $tbl->count($constraints);
|
|
}
|
|
return $sth;
|
|
}
|
|
|
|
sub clean_sb {
|
|
# -------------------------------------------------------------------------------
|
|
# Convert the sort by, sort order into an sql string.
|
|
#
|
|
my ($class, $sb, $so) = @_;
|
|
my $output = '';
|
|
|
|
return $output unless ($sb);
|
|
|
|
if ($sb and not ref $sb) {
|
|
if ($sb =~ /^[\w\s,]+$/) {
|
|
if ($sb =~ /\s(?:asc|desc)/i) {
|
|
$output = 'ORDER BY ' . $sb;
|
|
}
|
|
else {
|
|
$output = 'ORDER BY ' . $sb . ' ' . $so;
|
|
}
|
|
}
|
|
else {
|
|
$class->error('BADSB', 'WARN', $sb);
|
|
}
|
|
}
|
|
elsif (ref $sb eq 'ARRAY') {
|
|
foreach ( @$sb ) {
|
|
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
|
|
}
|
|
$output = 'ORDER BY ' . join(',', @$sb);
|
|
}
|
|
return $output;
|
|
}
|
|
|
|
1;
|