First pass at adding key files
This commit is contained in:
355
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
Normal file
355
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
Normal file
@ -0,0 +1,355 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Search::MYSQL::VER4
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Class used to search indexed tables.
|
||||
#
|
||||
|
||||
package GT::SQL::Search::MYSQL::VER4;
|
||||
# ------------------------------------------------------------------------------
|
||||
use strict;
|
||||
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
|
||||
use GT::SQL::Search::Base::Search;
|
||||
@ISA = qw( GT::SQL::Search::Base::Search );
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
|
||||
$STOPWORDS = { map { $_ => 1 } qw/
|
||||
|
||||
a's able about above according accordingly across actually after
|
||||
afterwards again against ain't all allow allows almost alone
|
||||
along already also although always am among amongst an and another
|
||||
any anybody anyhow anyone anything anyway anyways anywhere apart
|
||||
appear appreciate appropriate are aren't around as aside ask asking
|
||||
associated at available away awfully be became because become becomes
|
||||
becoming been before beforehand behind being believe below beside
|
||||
besides best better between beyond both brief but by c'mon c's came
|
||||
can can't cannot cant cause causes certain certainly changes clearly
|
||||
co com come comes concerning consequently consider considering
|
||||
contain containing contains corresponding could couldn't course currently
|
||||
definitely described despite did didn't different do does doesn't
|
||||
doing don't done down downwards during each edu eg eight either else
|
||||
elsewhere enough entirely especially et etc even ever every everybody
|
||||
everyone everything everywhere ex exactly example except far few
|
||||
fifth first five followed following follows for former formerly
|
||||
forth four from further furthermore get gets getting given gives
|
||||
go goes going gone got gotten greetings had hadn't happens hardly
|
||||
has hasn't have haven't having he he's hello help hence her here
|
||||
here's hereafter hereby herein hereupon hers herself hi him himself
|
||||
his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
|
||||
immediate in inasmuch inc indeed indicate indicated indicates inner
|
||||
insofar instead into inward is isn't it it'd it'll it's its itself
|
||||
just keep keeps kept know knows known last lately later latter latterly
|
||||
least less lest let let's like liked likely little look looking looks
|
||||
ltd mainly many may maybe me mean meanwhile merely might more
|
||||
moreover most mostly much must my myself name namely nd near nearly
|
||||
necessary need needs neither never nevertheless new next nine no
|
||||
nobody non none noone nor normally not nothing novel now nowhere
|
||||
obviously of off often oh ok okay old on once one ones only onto
|
||||
or other others otherwise ought our ours ourselves out outside over
|
||||
overall own particular particularly per perhaps placed please plus
|
||||
possible presumably probably provides que quite qv rather rd re
|
||||
really reasonably regarding regardless regards relatively respectively
|
||||
right said same saw say saying says second secondly see seeing seem
|
||||
seemed seeming seems seen self selves sensible sent serious seriously
|
||||
seven several shall she should shouldn't since six so some somebody
|
||||
somehow someone something sometime sometimes somewhat somewhere
|
||||
soon sorry specified specify specifying still sub such sup sure
|
||||
t's take taken tell tends th than thank thanks thanx that that's
|
||||
thats the their theirs them themselves then thence there there's
|
||||
thereafter thereby therefore therein theres thereupon these they
|
||||
they'd they'll they're they've think third this thorough thoroughly
|
||||
those though three through throughout thru thus to together too
|
||||
took toward towards tried tries truly try trying twice two un
|
||||
under unfortunately unless unlikely until unto up upon us use used
|
||||
useful uses using usually value various very via viz vs want wants
|
||||
was wasn't way we we'd we'll we're we've welcome well went were
|
||||
weren't what what's whatever when whence whenever where where's
|
||||
whereafter whereas whereby wherein whereupon wherever whether
|
||||
which while whither who who's whoever whole whom whose why will
|
||||
willing wish with within without won't wonder would would wouldn't
|
||||
yes yet you you'd you'll you're you've your yours yourself
|
||||
yourselves zero
|
||||
|
||||
/ };
|
||||
|
||||
$ATTRIBS = {
|
||||
min_word_size => 4,
|
||||
stopwords => $STOPWORDS,
|
||||
};
|
||||
|
||||
sub query {
|
||||
# --------------------------------------------------
|
||||
# Returns a sth based on a query
|
||||
#
|
||||
# Options:
|
||||
# - paging
|
||||
# mh : max hits
|
||||
# nh : number hit (or page of hits)
|
||||
#
|
||||
# - searching
|
||||
# ww : whole word
|
||||
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
|
||||
# substring : search for substrings of words
|
||||
# bool : 'and' => and search, 'or' => or search, '' => regular query
|
||||
# query : the string of things to ask for
|
||||
#
|
||||
# - filtering
|
||||
# field_name : value # Find all rows with field_name = value
|
||||
# field_name : ">value" # Find all rows with field_name > value.
|
||||
# field_name : "<value" # Find all rows with field_name < value.
|
||||
# field_name-gt : value # Find all rows with field_name > value.
|
||||
# field_name-lt : value # Find all rows with field_name < value.
|
||||
#
|
||||
# Parameters:
|
||||
# ( $CGI ) : a single cgi object
|
||||
# ( $HASH ) : a hash of the parameters
|
||||
#
|
||||
my $self = shift;
|
||||
|
||||
# create an easily accessible argument hash
|
||||
my $args = $self->common_param(@_);
|
||||
|
||||
# see if we can setup the filtering constraints
|
||||
my $filter = { %$args };
|
||||
my $query = delete $args->{query} || $self->{query} || '';
|
||||
my $ftr_cond;
|
||||
|
||||
# parse query
|
||||
$self->debug( "Search Query: $query" ) if ($self->{_debug});
|
||||
my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
|
||||
|
||||
$self->{rejected_keywords} = $rejected;
|
||||
|
||||
# setup the additional input parameters
|
||||
$query_struct = $self->_preset_options( $query_struct, $args );
|
||||
|
||||
# now sort into distinct buckets
|
||||
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
|
||||
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
|
||||
|
||||
# with the buckets, it's now possible to create a query string
|
||||
# that can be passed directly into the FULLTEXT search.
|
||||
my $query_string = '';
|
||||
|
||||
foreach my $search_type ( keys %$buckets ) {
|
||||
my $bucket = $buckets->{$search_type};
|
||||
foreach my $token ( keys %$bucket ) {
|
||||
next unless $token;
|
||||
my $properties = $bucket->{$token} or next;
|
||||
|
||||
my $e = ' ';
|
||||
|
||||
# handle boolean operations
|
||||
$properties->{mode} ||= '';
|
||||
if ( $properties->{mode} eq 'must' ) {
|
||||
$e .= '+';
|
||||
}
|
||||
elsif ( $properties->{mode} eq 'cannot' ) {
|
||||
$e .= '-';
|
||||
}
|
||||
|
||||
# deal with phrase vs keyword
|
||||
if ( $properties->{phrase} ) {
|
||||
$e .= '"' . quotemeta( $token ) . '"';
|
||||
}
|
||||
else {
|
||||
$e .= quotemeta $token;
|
||||
|
||||
# substring match
|
||||
$e .= '*' if $properties->{substring};
|
||||
}
|
||||
|
||||
$query_string .= $e;
|
||||
}
|
||||
}
|
||||
|
||||
# start building the GT::SQL::COndition object that will allow us to
|
||||
# to retreive the data
|
||||
|
||||
require GT::SQL::Condition;
|
||||
my $tbl = $self->{table};
|
||||
my $constraints = GT::SQL::Condition->new;
|
||||
|
||||
# create the GT::SQL::Condition object that will become the filtering
|
||||
# constraints
|
||||
my $filt = $self->{filter};
|
||||
|
||||
if ( $filt and ref $filt eq 'HASH' ) {
|
||||
foreach my $fkey ( keys %$filt ) {
|
||||
next if exists $args->{$fkey};
|
||||
$args->{$fkey} = $filt->{$fkey};
|
||||
}
|
||||
}
|
||||
|
||||
if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
|
||||
$constraints->add( $filter_cond );
|
||||
}
|
||||
|
||||
# if the cached filter object is a Condition object, append
|
||||
# it to the filter set
|
||||
if ( $filt and UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
|
||||
$constraints->add( $filt );
|
||||
}
|
||||
|
||||
# create our fulltext query condition
|
||||
my %weights = $tbl->_weight_cols();
|
||||
my $cols = join(",", keys %weights);
|
||||
if ( $query_string ) {
|
||||
$constraints->add( GT::SQL::Condition->new(
|
||||
"MATCH( $cols )",
|
||||
"AGAINST",
|
||||
\"('$query_string' IN BOOLEAN MODE)" ) );
|
||||
}
|
||||
|
||||
# calculate the cursor constraints
|
||||
foreach my $k (qw( nh mh so sb )) {
|
||||
next if defined $args->{$k};
|
||||
$args->{$k} = $self->{$k} || '';
|
||||
}
|
||||
$args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
|
||||
$args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
|
||||
$args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score';
|
||||
|
||||
# if the sorting method is "score" the order is forced to "descend" (as there
|
||||
# is almost no reason to order by worst matches)
|
||||
# if the storing key is not "score", the default order will be "ascend"
|
||||
$args->{so} =
|
||||
$args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
|
||||
( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
|
||||
|
||||
# check that sb is not dangerous
|
||||
my $sb = $self->clean_sb($args->{sb}, $args->{so});
|
||||
|
||||
$self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
|
||||
|
||||
# Setup a limit only if there is no callback. The callback argument requires a full results list
|
||||
unless ( $self->{callback} ) {
|
||||
my $offset = ( $args->{nh} - 1 ) * $args->{mh};
|
||||
$tbl->select_options($sb) if ($sb);
|
||||
$tbl->select_options("LIMIT $offset, $args->{mh}");
|
||||
}
|
||||
|
||||
my $sth;
|
||||
|
||||
# if the weights are all the same value, the query can be optimized
|
||||
# to use just one MATCH AGAINST argument. However, if the weights
|
||||
# are different, each element must be sectioned and queried separately
|
||||
# with the weight value multipler
|
||||
|
||||
# check to see if all the weight values are the same.
|
||||
my $base_weight;
|
||||
my $weights_same = 1;
|
||||
foreach ( values %weights ) {
|
||||
$base_weight ||= $_ or next; # init and skip 0s
|
||||
next if $base_weight == $_;
|
||||
$weights_same = 0;
|
||||
last;
|
||||
}
|
||||
|
||||
# multiplex the action
|
||||
my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
|
||||
|
||||
unless ( $query_string ) {
|
||||
$sth = $tbl->select( [ $result_cols ], $constraints ) or return;
|
||||
}
|
||||
elsif ( $weights_same ) {
|
||||
$sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
|
||||
or return;
|
||||
}
|
||||
else {
|
||||
|
||||
# group the multiplier counts
|
||||
my %column_multiplier;
|
||||
foreach ( keys %weights ) {
|
||||
push @{$column_multiplier{$weights{$_}}}, $_;
|
||||
}
|
||||
|
||||
my @search_parameters;
|
||||
foreach my $val ( keys %column_multiplier ) {
|
||||
next unless $val;
|
||||
|
||||
my $cols_ar = $column_multiplier{ $val } or next;
|
||||
my $search_cols = join ",", @$cols_ar;
|
||||
|
||||
if ( $val > 1 ) {
|
||||
push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
|
||||
}
|
||||
else {
|
||||
push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
|
||||
}
|
||||
}
|
||||
|
||||
my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
|
||||
|
||||
$sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
|
||||
or return;
|
||||
}
|
||||
|
||||
# If we have a callback, we fetch the primary key => score and pass that hash into
|
||||
# the filter.
|
||||
if ($self->{callback}) {
|
||||
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
|
||||
return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
|
||||
}
|
||||
my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
|
||||
|
||||
$self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
|
||||
my $filtered = $self->{callback}->($self, \%results) || {};
|
||||
$self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
|
||||
|
||||
$self->{rows} = scalar keys %$filtered;
|
||||
return $self->sth($filtered);
|
||||
}
|
||||
|
||||
# count the number of hits. create a query for this purpose only if we are required to.
|
||||
$self->{rows} = $sth->rows();
|
||||
if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
|
||||
$self->{rows} = $tbl->count($constraints);
|
||||
}
|
||||
return $sth;
|
||||
}
|
||||
|
||||
sub clean_sb {
|
||||
# -------------------------------------------------------------------------------
|
||||
# Convert the sort by, sort order into an sql string.
|
||||
#
|
||||
my ($class, $sb, $so) = @_;
|
||||
my $output = '';
|
||||
|
||||
return $output unless ($sb);
|
||||
|
||||
if ($sb and not ref $sb) {
|
||||
if ($sb =~ /^[\w\s,]+$/) {
|
||||
if ($sb =~ /\s(?:asc|desc)/i) {
|
||||
$output = 'ORDER BY ' . $sb;
|
||||
}
|
||||
else {
|
||||
$output = 'ORDER BY ' . $sb . ' ' . $so;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$class->error('BADSB', 'WARN', $sb);
|
||||
}
|
||||
}
|
||||
elsif (ref $sb eq 'ARRAY') {
|
||||
foreach ( @$sb ) {
|
||||
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
|
||||
}
|
||||
$output = 'ORDER BY ' . join(',', @$sb);
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user