discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
2024-06-17 21:49:12 +10:00

356 lines
14 KiB
Perl

# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Search::MYSQL::VER4
# Author : Aki Mimoto
# CVS Info : 087,071,086,086,085
# $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Class used to search indexed tables.
#
package GT::SQL::Search::MYSQL::VER4;
# ------------------------------------------------------------------------------
use strict;
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
use GT::SQL::Search::Base::Search;
@ISA = qw( GT::SQL::Search::Base::Search );
# ------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
$STOPWORDS = { map { $_ => 1 } qw/
a's able about above according accordingly across actually after
afterwards again against ain't all allow allows almost alone
along already also although always am among amongst an and another
any anybody anyhow anyone anything anyway anyways anywhere apart
appear appreciate appropriate are aren't around as aside ask asking
associated at available away awfully be became because become becomes
becoming been before beforehand behind being believe below beside
besides best better between beyond both brief but by c'mon c's came
can can't cannot cant cause causes certain certainly changes clearly
co com come comes concerning consequently consider considering
contain containing contains corresponding could couldn't course currently
definitely described despite did didn't different do does doesn't
doing don't done down downwards during each edu eg eight either else
elsewhere enough entirely especially et etc even ever every everybody
everyone everything everywhere ex exactly example except far few
fifth first five followed following follows for former formerly
forth four from further furthermore get gets getting given gives
go goes going gone got gotten greetings had hadn't happens hardly
has hasn't have haven't having he he's hello help hence her here
here's hereafter hereby herein hereupon hers herself hi him himself
his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
immediate in inasmuch inc indeed indicate indicated indicates inner
insofar instead into inward is isn't it it'd it'll it's its itself
just keep keeps kept know knows known last lately later latter latterly
least less lest let let's like liked likely little look looking looks
ltd mainly many may maybe me mean meanwhile merely might more
moreover most mostly much must my myself name namely nd near nearly
necessary need needs neither never nevertheless new next nine no
nobody non none noone nor normally not nothing novel now nowhere
obviously of off often oh ok okay old on once one ones only onto
or other others otherwise ought our ours ourselves out outside over
overall own particular particularly per perhaps placed please plus
possible presumably probably provides que quite qv rather rd re
really reasonably regarding regardless regards relatively respectively
right said same saw say saying says second secondly see seeing seem
seemed seeming seems seen self selves sensible sent serious seriously
seven several shall she should shouldn't since six so some somebody
somehow someone something sometime sometimes somewhat somewhere
soon sorry specified specify specifying still sub such sup sure
t's take taken tell tends th than thank thanks thanx that that's
thats the their theirs them themselves then thence there there's
thereafter thereby therefore therein theres thereupon these they
they'd they'll they're they've think third this thorough thoroughly
those though three through throughout thru thus to together too
took toward towards tried tries truly try trying twice two un
under unfortunately unless unlikely until unto up upon us use used
useful uses using usually value various very via viz vs want wants
was wasn't way we we'd we'll we're we've welcome well went were
weren't what what's whatever when whence whenever where where's
whereafter whereas whereby wherein whereupon wherever whether
which while whither who who's whoever whole whom whose why will
willing wish with within without won't wonder would would wouldn't
yes yet you you'd you'll you're you've your yours yourself
yourselves zero
/ };
$ATTRIBS = {
min_word_size => 4,
stopwords => $STOPWORDS,
};
sub query {
# --------------------------------------------------
# Returns a sth based on a query
#
# Options:
# - paging
# mh : max hits
# nh : number hit (or page of hits)
#
# - searching
# ww : whole word
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
# substring : search for substrings of words
# bool : 'and' => and search, 'or' => or search, '' => regular query
# query : the string of things to ask for
#
# - filtering
# field_name : value # Find all rows with field_name = value
# field_name : ">value" # Find all rows with field_name > value.
# field_name : "<value" # Find all rows with field_name < value.
# field_name-gt : value # Find all rows with field_name > value.
# field_name-lt : value # Find all rows with field_name < value.
#
# Parameters:
# ( $CGI ) : a single cgi object
# ( $HASH ) : a hash of the parameters
#
my $self = shift;
# create an easily accessible argument hash
my $args = $self->common_param(@_);
# see if we can setup the filtering constraints
my $filter = { %$args };
my $query = delete $args->{query} || $self->{query} || '';
my $ftr_cond;
# parse query
$self->debug( "Search Query: $query" ) if ($self->{_debug});
my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
$self->{rejected_keywords} = $rejected;
# setup the additional input parameters
$query_struct = $self->_preset_options( $query_struct, $args );
# now sort into distinct buckets
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
# with the buckets, it's now possible to create a query string
# that can be passed directly into the FULLTEXT search.
my $query_string = '';
foreach my $search_type ( keys %$buckets ) {
my $bucket = $buckets->{$search_type};
foreach my $token ( keys %$bucket ) {
next unless $token;
my $properties = $bucket->{$token} or next;
my $e = ' ';
# handle boolean operations
$properties->{mode} ||= '';
if ( $properties->{mode} eq 'must' ) {
$e .= '+';
}
elsif ( $properties->{mode} eq 'cannot' ) {
$e .= '-';
}
# deal with phrase vs keyword
if ( $properties->{phrase} ) {
$e .= '"' . quotemeta( $token ) . '"';
}
else {
$e .= quotemeta $token;
# substring match
$e .= '*' if $properties->{substring};
}
$query_string .= $e;
}
}
# start building the GT::SQL::COndition object that will allow us to
# to retreive the data
require GT::SQL::Condition;
my $tbl = $self->{table};
my $constraints = GT::SQL::Condition->new;
# create the GT::SQL::Condition object that will become the filtering
# constraints
my $filt = $self->{filter};
if ( $filt and ref $filt eq 'HASH' ) {
foreach my $fkey ( keys %$filt ) {
next if exists $args->{$fkey};
$args->{$fkey} = $filt->{$fkey};
}
}
if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
$constraints->add( $filter_cond );
}
# if the cached filter object is a Condition object, append
# it to the filter set
if ( $filt and UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
$constraints->add( $filt );
}
# create our fulltext query condition
my %weights = $tbl->_weight_cols();
my $cols = join(",", keys %weights);
if ( $query_string ) {
$constraints->add( GT::SQL::Condition->new(
"MATCH( $cols )",
"AGAINST",
\"('$query_string' IN BOOLEAN MODE)" ) );
}
# calculate the cursor constraints
foreach my $k (qw( nh mh so sb )) {
next if defined $args->{$k};
$args->{$k} = $self->{$k} || '';
}
$args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
$args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
$args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score';
# if the sorting method is "score" the order is forced to "descend" (as there
# is almost no reason to order by worst matches)
# if the storing key is not "score", the default order will be "ascend"
$args->{so} =
$args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
# check that sb is not dangerous
my $sb = $self->clean_sb($args->{sb}, $args->{so});
$self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
# Setup a limit only if there is no callback. The callback argument requires a full results list
unless ( $self->{callback} ) {
my $offset = ( $args->{nh} - 1 ) * $args->{mh};
$tbl->select_options($sb) if ($sb);
$tbl->select_options("LIMIT $offset, $args->{mh}");
}
my $sth;
# if the weights are all the same value, the query can be optimized
# to use just one MATCH AGAINST argument. However, if the weights
# are different, each element must be sectioned and queried separately
# with the weight value multipler
# check to see if all the weight values are the same.
my $base_weight;
my $weights_same = 1;
foreach ( values %weights ) {
$base_weight ||= $_ or next; # init and skip 0s
next if $base_weight == $_;
$weights_same = 0;
last;
}
# multiplex the action
my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
unless ( $query_string ) {
$sth = $tbl->select( [ $result_cols ], $constraints ) or return;
}
elsif ( $weights_same ) {
$sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
or return;
}
else {
# group the multiplier counts
my %column_multiplier;
foreach ( keys %weights ) {
push @{$column_multiplier{$weights{$_}}}, $_;
}
my @search_parameters;
foreach my $val ( keys %column_multiplier ) {
next unless $val;
my $cols_ar = $column_multiplier{ $val } or next;
my $search_cols = join ",", @$cols_ar;
if ( $val > 1 ) {
push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
}
else {
push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
}
}
my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
$sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
or return;
}
# If we have a callback, we fetch the primary key => score and pass that hash into
# the filter.
if ($self->{callback}) {
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
}
my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
$self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
my $filtered = $self->{callback}->($self, \%results) || {};
$self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
$self->{rows} = scalar keys %$filtered;
return $self->sth($filtered);
}
# count the number of hits. create a query for this purpose only if we are required to.
$self->{rows} = $sth->rows();
if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
$self->{rows} = $tbl->count($constraints);
}
return $sth;
}
sub clean_sb {
# -------------------------------------------------------------------------------
# Convert the sort by, sort order into an sql string.
#
my ($class, $sb, $so) = @_;
my $output = '';
return $output unless ($sb);
if ($sb and not ref $sb) {
if ($sb =~ /^[\w\s,]+$/) {
if ($sb =~ /\s(?:asc|desc)/i) {
$output = 'ORDER BY ' . $sb;
}
else {
$output = 'ORDER BY ' . $sb . ' ' . $so;
}
}
else {
$class->error('BADSB', 'WARN', $sb);
}
}
elsif (ref $sb eq 'ARRAY') {
foreach ( @$sb ) {
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
}
$output = 'ORDER BY ' . join(',', @$sb);
}
return $output;
}
1;