First pass at adding key files
This commit is contained in:
@ -0,0 +1,82 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::SQL::Search::Base::Common
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Common.pm,v 1.8 2004/10/13 21:45:02 aki Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Base classes upon which all search drivers are based
|
||||
#
|
||||
package GT::SQL::Search::Base::Common;
|
||||
|
||||
use strict;
|
||||
use Exporter;
|
||||
use vars qw/ @ISA @EXPORT $STOPWORDS /;
|
||||
|
||||
@ISA = qw( Exporter );
|
||||
@EXPORT = qw( &_tokenize &_check_word $STOPWORDS );
|
||||
|
||||
$STOPWORDS = { map { $_ => 1 } qw/
|
||||
of about or all several also she among since an some and such are than
|
||||
as that at the be them because there been these between they both this
|
||||
but those by to do toward during towards each upon either for from was
|
||||
had were has what have when he where her which his while however with if
|
||||
within in would into you your is it its many more most must on re it
|
||||
test not above add am pm jan january feb february mar march apr april
|
||||
may jun june jul july aug august sep sept september oct october nov
|
||||
november dec december find & > < we http com www inc other
|
||||
including
|
||||
/ };
|
||||
|
||||
sub _tokenize {
|
||||
#--------------------------------------------------------------------------------
|
||||
# takes a strings and chops it up into little bits
|
||||
my $self = shift;
|
||||
my $text = shift;
|
||||
my ( @words, $i, %rejected, $word, $code );
|
||||
|
||||
# split on any non-word (includes accents) characters
|
||||
@words = split /[^\w\x80-\xFF\-]+/, lc $text;
|
||||
$self->debug_dumper( "Words: ", \@words ) if ($self->{_debug});
|
||||
|
||||
# drop all words that are too small, etc.
|
||||
$i = 0;
|
||||
while ( $i <= $#words ) {
|
||||
$word = $words[ $i ];
|
||||
if ((exists $self->{stopwords}{$word} and ($code = 'STOPWORD')) or
|
||||
(length($word) < $self->{min_word_size} and $code = 'TOOSMALL' ) or
|
||||
(length($word) > $self->{max_word_size} and $code = 'TOOBIG')) {
|
||||
splice( @words, $i, 1 );
|
||||
$rejected{$word} = $self->{'rejections'}->{$code};
|
||||
}
|
||||
else {
|
||||
$i++; # Words ok.
|
||||
}
|
||||
}
|
||||
$self->debug_dumper( "Accepted Words: ", \@words ) if ($self->{_debug});
|
||||
$self->debug_dumper( "Rejected Words: ", \%rejected ) if ($self->{_debug});
|
||||
|
||||
return ( \@words, \%rejected );
|
||||
}
|
||||
|
||||
sub _check_word {
|
||||
#--------------------------------------------------------------------------------
|
||||
# Returns an error code if it is an invalid word, otherwise returns nothing.
|
||||
#
|
||||
my $self = shift;
|
||||
my $word = shift;
|
||||
my $code;
|
||||
if ((exists $self->{stopwords}{$word} and ($code = 'STOPWORD')) or
|
||||
(length($word) < $self->{min_word_size} and $code = 'TOOSMALL' ) or
|
||||
(length($word) > $self->{max_word_size} and $code = 'TOOBIG')) {
|
||||
return $code;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
1;
|
@ -0,0 +1,78 @@
|
||||
# ====================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::SQL::Search::Base::Indexer
|
||||
# Author: Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Indexer.pm,v 1.4 2004/01/13 01:35:19 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ====================================================================
|
||||
#
|
||||
# Description:
|
||||
#
|
||||
#
|
||||
|
||||
package GT::SQL::Search::Base::Indexer;
|
||||
|
||||
use strict;
|
||||
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
|
||||
use GT::Base;
|
||||
use GT::SQL::Search::Base::Common;
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/;
|
||||
@ISA = qw/GT::Base GT::SQL::Search::Base::Common/;
|
||||
$ATTRIBS = {
|
||||
driver => undef,
|
||||
stopwords => $STOPWORDS,
|
||||
rejections => {
|
||||
STOPWORD => "is a stopword",
|
||||
TOOSMALL => "is too small a word",
|
||||
TOOBIG => "is too big a word"
|
||||
},
|
||||
table => '',
|
||||
init => 0,
|
||||
debug => 0,
|
||||
min_word_size => 3,
|
||||
max_word_size => 50,
|
||||
};
|
||||
|
||||
sub drop_search_driver { 1 }
|
||||
sub add_search_driver { 1 }
|
||||
|
||||
# found in GT::SQL::Creator
|
||||
sub pre_create_table { 1 }
|
||||
sub post_create_table { 1 }
|
||||
|
||||
# GT::SQL::Editor
|
||||
sub pre_add_column { 1 }
|
||||
sub post_add_column { 1 }
|
||||
|
||||
sub pre_delete_column { 1 }
|
||||
sub post_delete_column { 1 }
|
||||
|
||||
sub pre_drop_table { 1 }
|
||||
sub post_drop_table { 1 }
|
||||
|
||||
# GT::SQL::Table
|
||||
sub pre_add_record { 1 }
|
||||
sub post_add_record { 1 }
|
||||
|
||||
sub pre_update_record { 1 }
|
||||
sub post_update_record { 1 }
|
||||
|
||||
sub pre_delete_record { 1 }
|
||||
sub post_delete_record { 1 }
|
||||
|
||||
sub pre_delete_all_records { 1 }
|
||||
sub post_delete_all_records { 1 }
|
||||
|
||||
sub driver_ok { 1 }
|
||||
|
||||
sub reindex_all { 1 }
|
||||
|
||||
1;
|
287
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/Base/STH.pm
Normal file
287
site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/Base/STH.pm
Normal file
@ -0,0 +1,287 @@
|
||||
# ====================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::SQL::Search::STH
|
||||
# Author: Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: STH.pm,v 1.25 2004/08/28 03:53:46 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ====================================================================
|
||||
#
|
||||
|
||||
package GT::SQL::Search::STH;
|
||||
#--------------------------------------------------------------------------------
|
||||
use strict;
|
||||
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE /;
|
||||
use GT::Base;
|
||||
|
||||
@ISA = ('GT::Base');
|
||||
$ATTRIBS = {
|
||||
'_debug' => 0,
|
||||
'sth' => undef,
|
||||
'results' => {},
|
||||
'db' => undef,
|
||||
'table' => undef,
|
||||
'index' => 0,
|
||||
'order' => [],
|
||||
'sb' => 'score',
|
||||
'so' => '',
|
||||
'score_col' => 'SCORE',
|
||||
'score_sort'=> 0,
|
||||
'nh' => 0,
|
||||
'mh' => 0
|
||||
};
|
||||
$ERROR_MESSAGE = 'GT::SQL';
|
||||
$ERRORS = {
|
||||
BADSB => 'Invalid character found in so: "%s"',
|
||||
};
|
||||
|
||||
sub init {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
|
||||
# setup the options
|
||||
$self->set(@_);
|
||||
|
||||
# correct a few of the values
|
||||
--$self->{nh} if $self->{nh};
|
||||
|
||||
my $sth;
|
||||
my $results = $self->{results};
|
||||
$self->{rows} = scalar( $results ? keys %{$results} : 0 );
|
||||
|
||||
# if we have asked to have sorting by another column (non score), create the part of the query that handles taht
|
||||
$self->debug( "Setting up sorting for GT::SQL::Search::STH" ) if ($self->{_debug});
|
||||
my $sb;
|
||||
|
||||
# clean up the sort by columns.
|
||||
unless ($self->{'score_sort'}) {
|
||||
$sb = GT::SQL::Search::Base::Search->clean_sb($self->{sb}, $self->{so});
|
||||
}
|
||||
|
||||
# setup the max hits and the offsets
|
||||
$self->{index} = $self->{nh} * $self->{mh} || 0;
|
||||
$self->{max_index} = $self->{index} + $self->{mh} - 1; # index + mh is the first record which should not be returned.
|
||||
|
||||
if ( $self->{max_index} > $self->{rows} ) {
|
||||
$self->{max_index} = $self->{rows};
|
||||
$self->{rows} = $self->{rows} - $self->{index};
|
||||
$self->{rows} < 0 ? $self->{rows} = 0 : 0;
|
||||
}
|
||||
|
||||
else {
|
||||
$self->{rows} = $self->{mh};
|
||||
}
|
||||
|
||||
# if we are sorting by another column, handle that
|
||||
if ( $sb and (keys %{$self->{results}})) {
|
||||
my ( $table, $pk ) = $self->_table_info();
|
||||
my ( $query, $where, $st, $limit );
|
||||
|
||||
$where = qq!$pk in(! . join( ",", keys %{$self->{results}}) . ')';
|
||||
$self->{mh} and $limit = qq!LIMIT $self->{index}, $self->{rows}!;
|
||||
$query = qq!
|
||||
SELECT $pk
|
||||
FROM $table
|
||||
WHERE $where
|
||||
$sb
|
||||
$limit
|
||||
!;
|
||||
$self->debug( "Row fetch query: $query" ) if ($self->{_debug});
|
||||
$sth = $self->{table}->{driver}->prepare( $query );
|
||||
$sth->execute();
|
||||
|
||||
# fix the counts
|
||||
$self->{index} = 0;
|
||||
$self->{max_hits} = $self->{rows};
|
||||
|
||||
# now return them
|
||||
my $order = $sth->fetchall_arrayref();
|
||||
$sth->finish();
|
||||
|
||||
$self->{'order'} = [ map { $_->[0] } @{$order} ];
|
||||
}
|
||||
else {
|
||||
$self->{'order'} = [ sort {
|
||||
( $results->{$b} || return 0 ) <=> ( $results->{$a} || 0 )
|
||||
} keys %{$results} ];
|
||||
$self->debug_dumper( "Results will be presented in the following order: ", $self->{'order'} ) if ($self->{_debug});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sub cache_results {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
|
||||
my $results = $self->{'results'};
|
||||
my ($sth, @records, $i, %horder, @order, $in_list);
|
||||
my $table = $self->{table};
|
||||
my $tname = $table->name();
|
||||
my ($pk) = $self->{table}->pk;
|
||||
|
||||
use GT::SQL::Condition;
|
||||
|
||||
# we know what we're doing here so shut off warns (complains about uninit'd values in range
|
||||
# if thee aren't enough elements in the order array)
|
||||
my $w = $^W; $^W = 0;
|
||||
@order = grep $_, @{$self->{'order'} || []}[$self->{index}..$self->{max_index}] or return [];
|
||||
$^W = $w;
|
||||
|
||||
$i = 0; %horder = ( map { ( $_ => $i++) } @order );
|
||||
$in_list = join ( ",", @order );
|
||||
my $query = qq|
|
||||
SELECT *
|
||||
FROM
|
||||
$tname
|
||||
WHERE
|
||||
$pk IN($in_list)
|
||||
|;
|
||||
|
||||
# the following is left commented out as...
|
||||
# if $tbl->select is used $table->hits() will not
|
||||
# return an accurate count of the number of all the hits. instead, will return
|
||||
# a value up to mh. $tbl->hits() is important because the value is used
|
||||
# in toolbar calculations
|
||||
#
|
||||
# $sth = $table->select( GT::SQL::Condition->new( $pk, 'IN', \"($in_list)" ) );
|
||||
$sth = $table->do_query( $query );
|
||||
|
||||
while ( my $href = $sth->fetchrow_hashref() ) {
|
||||
$records[$horder{$href->{$pk}}] = \%$href
|
||||
}
|
||||
|
||||
return \@records;
|
||||
|
||||
}
|
||||
|
||||
sub fetchrow_array {
|
||||
#--------------------------------------------------------------------------------
|
||||
return @{ $_[0]->fetchrow_arrayref() || [] };
|
||||
}
|
||||
|
||||
sub fetchrow_arrayref {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $records = $self->{cache} ||= $self->cache_results;
|
||||
my $href = shift @$records or return;
|
||||
return $self->_hash_to_array($href);
|
||||
}
|
||||
|
||||
sub fetchrow_hashref {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
|
||||
my $results = $self->{'results'};
|
||||
my $records = $self->{cache} ||= $self->cache_results;
|
||||
my $table = $self->{table};
|
||||
my ($pk) = $self->{table}->pk;
|
||||
|
||||
my $href = shift @$records or return;
|
||||
|
||||
$href->{$self->{'score_col'}} = ( $self->{score} = $results->{$href->{$pk}} );
|
||||
|
||||
return $href;
|
||||
|
||||
}
|
||||
|
||||
sub fetchall_hashref {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my @results;
|
||||
while (my $res = $self->fetchrow_hashref) {
|
||||
push @results, $res;
|
||||
}
|
||||
return \@results;
|
||||
}
|
||||
|
||||
sub fetchall_list {
|
||||
#--------------------------------------------------------------------------------
|
||||
return { map { @$_ } @{shift->fetchall_arrayref} }
|
||||
}
|
||||
|
||||
sub fetchall_arrayref {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
|
||||
$self->{order} or return [];
|
||||
my $results = $self->{results};
|
||||
my ($pk) = $self->{table}->pk;
|
||||
my $scol = $self->{score_col};
|
||||
|
||||
|
||||
if (!$self->{allref_cache}) {
|
||||
$self->{allref_cache} ||= $self->cache_results;
|
||||
|
||||
for my $i ( 0 .. $#{$self->{allref_cache}} ) {
|
||||
my $element = $self->{allref_cache}->[$i];
|
||||
if ( $_[0] eq 'HASH' ) {
|
||||
$element->{$scol} = $results->{$element->{$pk}};
|
||||
}
|
||||
else {
|
||||
$element->{$scol} = $self->_hash_to_array( $element->{$scol} );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
my $records = $self->{allref_cache};
|
||||
|
||||
return $records;
|
||||
}
|
||||
|
||||
sub score {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
return $self->{score};
|
||||
}
|
||||
|
||||
sub _hash_to_array {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $href = shift or return;
|
||||
|
||||
my $results = $self->{'results'};
|
||||
my $table = $self->{table};
|
||||
my $cols = $table->cols();
|
||||
my $ordercols = ( $self->{ordercols} ||= [ sort { $cols->{$a}->{pos} <=> $cols->{$b}->{pos} } keys %$cols ] );
|
||||
my ($pk) = $self->{table}->pk;
|
||||
my $aref = [ map { $href->{$_} } @$ordercols, ( $self->{score} = $results->{$href->{$pk}} ) ];
|
||||
|
||||
return $aref;
|
||||
}
|
||||
|
||||
sub rows {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
return $self->{rows};
|
||||
}
|
||||
|
||||
sub _table_info {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $table = $self->{table}->name() or return $self->error('NOSCHEMA', 'FATAL');
|
||||
my ($pk) = $self->{table}->pk;
|
||||
return ( $table, $pk );
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
$self->{'sth'} and $self->{'sth'}->finish();
|
||||
}
|
||||
|
||||
sub debug_dumper {
|
||||
#--------------------------------------------------------------------------------
|
||||
# calls debug but also dumps all the messages
|
||||
my $self = shift;
|
||||
my $message = shift;
|
||||
my $level = ref $_[0] ? 1 : shift;
|
||||
|
||||
if ( $self->{_debug} >= $level ) {
|
||||
require GT::Dumper;
|
||||
$self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ )) if ($self->{_debug});
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
@ -0,0 +1,572 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::SQL::Search::Base
|
||||
# Author : Aki Mimoto
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Search.pm,v 1.25 2004/08/28 03:53:46 jagerman Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description:
|
||||
# Base classes upon which all search drivers are based
|
||||
#
|
||||
|
||||
package GT::SQL::Search::Base::Search;
|
||||
|
||||
|
||||
use strict;
|
||||
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
|
||||
use GT::Base;
|
||||
use GT::SQL::Search::Base::Common;
|
||||
@ISA = qw( GT::Base GT::SQL::Search::Base::Common);
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
# Preamble information related to the object
|
||||
|
||||
$DEBUG = 0;
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.25 $ =~ /(\d+)\.(\d+)/;
|
||||
@ISA = qw/ GT::Base /;
|
||||
|
||||
$ATTRIBS = {
|
||||
'stopwords' => $STOPWORDS,
|
||||
'mh' => 25,
|
||||
'nh' => 1,
|
||||
'ww' => undef,
|
||||
'ma' => undef,
|
||||
'bool' => undef,
|
||||
'substring' => 0,
|
||||
'query' => '',
|
||||
'sb' => 'score',
|
||||
'so' => '',
|
||||
'score_col' => 'SCORE',
|
||||
'score_sort'=> 0,
|
||||
'debug' => 0,
|
||||
'_debug' => 0,
|
||||
|
||||
# query related
|
||||
'db' => undef,
|
||||
'table' => undef,
|
||||
'filter' => undef,
|
||||
'callback' => undef,
|
||||
|
||||
# strict matching of indexed words, accents on words do count
|
||||
'sm' => 0,
|
||||
'min_word_size' => 3,
|
||||
'max_word_size' => 50,
|
||||
};
|
||||
|
||||
sub init {
|
||||
#--------------------------------------------------------------------------------
|
||||
# Initialises the Search object
|
||||
#
|
||||
my $self = shift;
|
||||
my $input = $self->common_param(@_);
|
||||
|
||||
$self->set($input);
|
||||
|
||||
# now handle filters...,
|
||||
my $tbl = $self->{table};
|
||||
my $cols = $tbl->cols();
|
||||
my %filters = map {
|
||||
(my $tmp = $_) =~ s/-[lg]t$//;
|
||||
exists $cols->{$tmp} ? ($_ => $input->{$_}) : ()
|
||||
} keys %{$input};
|
||||
|
||||
if ( keys %filters ) {
|
||||
$self->debug_dumper( "INIT: Creating Filters: ", \%filters ) if ( $self->{_debug} );
|
||||
$self->filter(\%filters);
|
||||
}
|
||||
|
||||
$self->{table}->connect;
|
||||
}
|
||||
|
||||
sub query {
|
||||
#--------------------------------------------------------------------------------
|
||||
# Returns a sth based on a query
|
||||
#
|
||||
# Options:
|
||||
# - paging
|
||||
# mh : max hits
|
||||
# nh : number hit (or page of hits)
|
||||
#
|
||||
# - searching
|
||||
# ww : whole word
|
||||
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
|
||||
# substring : search for substrings of words
|
||||
# bool : 'and' => and search, 'or' => or search, '' => regular query
|
||||
# query : the string of things to ask for
|
||||
#
|
||||
# - filtering
|
||||
# field_name : value # Find all rows with field_name = value
|
||||
# field_name : ">value" # Find all rows with field_name > value.
|
||||
# field_name : "<value" # Find all rows with field_name < value.
|
||||
# field_name-gt : value # Find all rows with field_name > value.
|
||||
# field_name-lt : value # Find all rows with field_name < value.
|
||||
#
|
||||
# Parameters:
|
||||
# ( $CGI ) : a single cgi object
|
||||
# ( $HASH ) : a hash of the parameters
|
||||
#
|
||||
my $self = shift;
|
||||
# find out what sort of a parameter we're dealing with
|
||||
my $input = $self->common_param(@_);
|
||||
|
||||
# add additional parameters if required
|
||||
foreach my $parameter ( keys %{$ATTRIBS} ) {
|
||||
if ( not exists $input->{$parameter} ) {
|
||||
$input->{$parameter} = $self->{$parameter};
|
||||
}
|
||||
}
|
||||
|
||||
# parse query...,
|
||||
$self->debug( "Search Query: $$input{query}", 1 ) if ($self->{_debug});
|
||||
my ( $query, $rejected ) = $self->_parse_query_string( $input->{'query'} );
|
||||
|
||||
$self->{'rejected_keywords'} = $rejected;
|
||||
|
||||
# setup the additional input parameters
|
||||
$query = $self->_preset_options( $query, $input );
|
||||
|
||||
$self->debug_dumper( "Set the pre-options: ", $query ) if ($self->{_debug});
|
||||
|
||||
# now sort into distinct buckets
|
||||
my $buckets = &_create_buckets( $query );
|
||||
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
|
||||
|
||||
return $self->_query($input, $buckets);
|
||||
}
|
||||
|
||||
sub _query {
|
||||
#--------------------------------------------------------------------------------
|
||||
my ( $self, $input, $buckets ) = @_;
|
||||
|
||||
# now handle the separate possibilities
|
||||
my $results = {};
|
||||
|
||||
# query can have phrases
|
||||
$results = $self->_phrase_query( $buckets->{phrases}, $results );
|
||||
$self->debug_dumper("Did phrase union query. Current result set and scores: ", $results ) if ($self->{_debug});
|
||||
|
||||
# query have keywords
|
||||
$results = $self->_union_query( $buckets->{keywords}, $results );
|
||||
$self->debug_dumper("Did keyword union query. Current result set and scores: ", $results ) if ($self->{_debug});
|
||||
|
||||
# query must have phrases
|
||||
$results = $self->_phrase_intersect_query( $buckets->{phrases_must}, $results );
|
||||
$self->debug_dumper("Did phrase intersect query. Current result set and scores: ", $results ) if ($self->{_debug});
|
||||
|
||||
# query must have keywords
|
||||
$results = $self->_intersect_query( $buckets->{keywords_must}, $results );
|
||||
$self->debug_dumper("Did keyword intersect query. Current result set and scores: ", $results ) if ($self->{_debug});
|
||||
|
||||
# query cannot have keywords
|
||||
$results = $self->_disjoin_query( $buckets->{keywords_cannot}, $results );
|
||||
$self->debug_dumper("Did keyword disjoin query. Current result set and scores: ", $results ) if ($self->{_debug});
|
||||
|
||||
# query cannot have phrases
|
||||
$results = $self->_phrase_disjoin_query( $buckets->{phrases_cannot}, $results);
|
||||
$self->debug_dumper("Did phrase disjoin query. Current result set and scores: ", $results ) if ($self->{_debug});
|
||||
|
||||
# now handle filters
|
||||
my $cols = $self->{'table'}->cols();
|
||||
my %filters = map {
|
||||
(my $tmp = $_) =~ s/-[lg]t$//;
|
||||
$cols->{$tmp} ? ($_ => $input->{$_}) : ()
|
||||
} keys %{$input};
|
||||
|
||||
if (keys %filters) {
|
||||
$self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
|
||||
$results = $self->filter(\%filters, $results);
|
||||
}
|
||||
elsif ($self->{filter}) {
|
||||
$self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
|
||||
$results = $self->_filter_query( $self->{filter}, $results );
|
||||
}
|
||||
else {
|
||||
$self->debug( "No filters being used.") if ($self->{_debug});
|
||||
}
|
||||
|
||||
# now this query should probably clear the filters once it's been used, so i'll dothat here
|
||||
$self->{filter} = undef;
|
||||
|
||||
# now run through a callback function if needed.
|
||||
if ($self->{callback}) {
|
||||
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
|
||||
return $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
|
||||
}
|
||||
$self->debug_dumper ("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
|
||||
$results = $self->{callback}->($self, $results);
|
||||
$self->debug_dumper ("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
|
||||
}
|
||||
|
||||
# so how many hits did we get?
|
||||
$self->{table}->{last_hits} = ( $self->{rows} = scalar($results ? keys %{$results} : ()) );
|
||||
|
||||
# and now create a search sth object to handle all this
|
||||
return $self->sth( $results );
|
||||
}
|
||||
|
||||
sub sth {
|
||||
#--------------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
my $results = shift;
|
||||
|
||||
require GT::SQL::Search::Base::STH;
|
||||
my $sth = GT::SQL::Search::STH->new(
|
||||
'results' => $results,
|
||||
'db' => $self->{table}->{driver},
|
||||
# pass the following attributes down to the STH handler
|
||||
map({ ($_ => $self->{$_}) } qw/ table sb so score_col score_sort nh mh rows _debug /)
|
||||
);
|
||||
|
||||
return $sth;
|
||||
}
|
||||
|
||||
sub rows {
|
||||
#--------------------------------------------------------------------------------
|
||||
# after a query is run, returns the number of rows
|
||||
my $self = shift;
|
||||
return $self->{rows} || 0;
|
||||
}
|
||||
|
||||
sub _add_filters {
|
||||
#--------------------------------------------------------------------------------
|
||||
# creates the filter object
|
||||
my $self = shift;
|
||||
my $filter;
|
||||
|
||||
# find out how we're calling the parameters
|
||||
if ( ref $_[0] eq 'GT::SQL::Condition' ) {
|
||||
$filter = shift;
|
||||
}
|
||||
elsif ( ref $_[0] eq 'HASH' ) {
|
||||
|
||||
|
||||
# setup the query condition using the build_query condition method
|
||||
# build the condition object
|
||||
my %opts = %{ shift() || {} };
|
||||
delete $opts{query};
|
||||
|
||||
$filter = $self->{table}->build_query_cond( \%opts, $self->{table}{schema}{cols} );
|
||||
|
||||
}
|
||||
else {
|
||||
return $self->error ('BADARGS', 'FATAL', "Invalid argument: $_[0] passed to _add_filter");
|
||||
}
|
||||
|
||||
# Use ref, as someone can pass in filter => 1 and mess things up.
|
||||
|
||||
ref $self->{filter} ? $self->{filter}->add ($filter) : ($self->{filter} = $filter);
|
||||
$self->debug_dumper( "Filters: ", $self->{filter} ) if ($self->{_debug});
|
||||
|
||||
return $self->{filter};
|
||||
|
||||
}
|
||||
|
||||
sub _preset_options {
|
||||
#--------------------------------------------------------------------------------
|
||||
# sets up word parameters
|
||||
my $self = shift;
|
||||
my $query = shift or return;
|
||||
my $input = shift or return $query;
|
||||
|
||||
# whole word searching
|
||||
if ( defined $input->{'ww'} or defined $self->{'ww'}) {
|
||||
if ( defined $input->{'ww'} ? $input->{'ww'} : $self->{'ww'} ) {
|
||||
for ( keys %{$query} ) { $query->{$_}->{mode} = 'substring'; }
|
||||
}
|
||||
}
|
||||
|
||||
# substring searching
|
||||
if ( defined $input->{'substring'} or defined $self->{'substring'}) {
|
||||
if ( defined $input->{'substring'} ? $input->{'substring'} : $self->{'substring'} ) {
|
||||
for ( keys %{$query} ) { $query->{$_}->{'substring'} = 1; }
|
||||
}
|
||||
}
|
||||
|
||||
if ( defined $input->{'ma'} or defined $self->{'ma'} ) {
|
||||
# each keyword must be included
|
||||
if ( defined $input->{'ma'} ? $input->{'ma'} : $self->{'ma'} ) {
|
||||
for ( keys %{$query} ) {
|
||||
next if $query->{$_}->{mode} eq 'cannot';
|
||||
$query->{$_}->{mode} = 'must';
|
||||
}
|
||||
}
|
||||
# each word can be included but is not necessary
|
||||
else {
|
||||
for ( keys %{$query} ) { $query->{$_}->{mode} = 'can'; }
|
||||
}
|
||||
}
|
||||
|
||||
# some more and or searches, only if user hasn't put +word -word
|
||||
if ( lc( $input->{'bool'} || $self->{'bool'} ) eq 'and' ) {
|
||||
unless ($input->{query} =~ /(?:^|\s)[+-]\w/) {
|
||||
for ( keys %{$query} ) {
|
||||
next if $query->{$_}->{mode} eq 'cannot';
|
||||
$query->{$_}->{mode} = 'must';
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif ( lc( $input->{'bool'} || $self->{'bool'} ) eq 'or' ) {
|
||||
unless ($input->{query} =~ /(?:^|\s)[+-]\w/) {
|
||||
for ( keys %{$query} ) { $query->{$_}->{mode} = 'can'; }
|
||||
}
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
sub _phrase_query { $_[1] }
|
||||
sub _union_query { $_[1] }
|
||||
sub _phrase_intersect_query { $_[1] }
|
||||
sub _intersect_query { $_[1] }
|
||||
sub _disjoin_query { $_[1] }
|
||||
sub _phrase_disjoin_query { $_[1] }
|
||||
|
||||
sub filter {
|
||||
#--------------------------------------------------------------------------------
|
||||
# adds a filter
|
||||
#
|
||||
my $self = shift;
|
||||
|
||||
# add filters..,
|
||||
my $filters = $self->_add_filters( shift );
|
||||
my $results = shift;
|
||||
|
||||
# see if we need to execute a search, otherwise just return the current filterset
|
||||
defined $results or return $results;
|
||||
|
||||
# start doing the filter stuff
|
||||
return $self->_filter_query( $filters, $results );
|
||||
}
|
||||
|
||||
sub _parse_query_string {
|
||||
#------------------------------------------------------------
|
||||
# from Mastering Regular Expressions altered a fair bit
|
||||
# takes a space delimited string and breaks it up.
|
||||
#
|
||||
my $self = shift;
|
||||
my $text = shift;
|
||||
|
||||
my %words = ();
|
||||
my %reject = ();
|
||||
my %mode = (
|
||||
'+' => 'must',
|
||||
'-' => 'cannot',
|
||||
'<' => 'greater',
|
||||
'>' => 'less'
|
||||
);
|
||||
|
||||
# work on the individual elements
|
||||
my @new = ();
|
||||
while ( $text =~ m{
|
||||
# the first part groups the phrase inside the quotes.
|
||||
# see explanation of this pattern in MRE
|
||||
([+-]?"[^\"\\]*(?:\\.[^\"\\]*)*)" ?
|
||||
| (\+?[\w\x80-\xFF\-\*]+),?
|
||||
| ' '
|
||||
}gx ) {
|
||||
|
||||
my $match = lc $+;
|
||||
|
||||
# strip out buffering spaces
|
||||
$match =~ s/^\s+//; $match =~ s/\s+$//;
|
||||
|
||||
# don't bother trying if there is nothing there
|
||||
next unless $match;
|
||||
|
||||
# find out the searching mode
|
||||
my ($mode, $substring, $phrase);
|
||||
if (my $m = $mode{substr($match,0,1)}) {
|
||||
$match = substr($match,1);
|
||||
$mode = $m;
|
||||
}
|
||||
|
||||
# do we need to substring match?
|
||||
if ( substr( $match, -1, 1 ) eq "*" ) {
|
||||
$match = substr($match,0,length($match)-1);
|
||||
$substring = 1;
|
||||
}
|
||||
|
||||
# find out if we're dealing with a phrase
|
||||
if ( substr($match,0,1) eq '"' ) {
|
||||
$self->debug( "Dealing with a phrase: $match" ) if ($self->{_debug});
|
||||
|
||||
$match = substr($match,1);
|
||||
|
||||
# however, we want to make sure it's a phrase and not something else
|
||||
my ( $word_list, $rejected ) = $self->_tokenize( $match );
|
||||
$self->debug_dumper( "Phrase reduced to: ", $word_list ) if ($self->{_debug});
|
||||
$self->debug_dumper( "Phrase words rejected: ", $rejected ) if ($self->{_debug});
|
||||
my $word_count = @$word_list;
|
||||
|
||||
if ( $word_count > 1 ) { $phrase = $word_list } # ok, standard phrase
|
||||
elsif ($word_count == 1) { $match = $word_list->[0] } # just a standard word, don't worry about searching by phrase
|
||||
}
|
||||
|
||||
# make sure we can use this word
|
||||
if ( not $phrase and my $code = $self->_check_word( $match, $self->{stopwords} ) ) {
|
||||
$reject{ $match } = $code;
|
||||
next;
|
||||
}
|
||||
|
||||
# now, see if we should toss this word
|
||||
$words{$match} = {
|
||||
mode => $mode,
|
||||
phrase => $phrase,
|
||||
substring => $substring,
|
||||
keyword => not $phrase,
|
||||
};
|
||||
}
|
||||
|
||||
# words is a hashref of:
|
||||
# {
|
||||
# word => {
|
||||
# paramaters => 'values'
|
||||
# },
|
||||
# word1 => {
|
||||
# ...
|
||||
# },
|
||||
# ...
|
||||
# }
|
||||
#
|
||||
return( \%words, \%reject );
|
||||
|
||||
}
|
||||
|
||||
|
||||
sub _filter_query {
|
||||
#--------------------------------------------------------------------------------
|
||||
# get the results from the filter
|
||||
#
|
||||
my $self = shift;
|
||||
my $filters = shift;
|
||||
my $results = shift or return {};
|
||||
keys %{$results} or return $results;
|
||||
|
||||
my $table = $self->{table};
|
||||
my $tname = $table->name();
|
||||
|
||||
# setup the where clause
|
||||
my $where = $filters->sql() or return $results;
|
||||
my ($pk) = $table->pk;
|
||||
$where .= qq! AND $pk IN (! . join(',', keys %$results) . ')';
|
||||
|
||||
# now do the filter
|
||||
my $query = qq!
|
||||
SELECT $pk
|
||||
FROM
|
||||
$tname
|
||||
WHERE
|
||||
$where
|
||||
!;
|
||||
$self->debug( "Filter Query: $query" ) if ($self->{_debug});
|
||||
my $sth = $self->{table}->{driver}->prepare($query);
|
||||
$sth->execute();
|
||||
|
||||
# get all the results
|
||||
my $aref = $sth->fetchall_arrayref;
|
||||
return {
|
||||
map {
|
||||
$_->[0] => $results->{$_->[0]}
|
||||
} @$aref
|
||||
};
|
||||
}
|
||||
|
||||
sub _create_buckets {
|
||||
#------------------------------------------------------------
|
||||
# takes the output from _parse_query_string and creates a
|
||||
# bucket hash of all the different types of searching
|
||||
# possible
|
||||
my $query = shift or return;
|
||||
|
||||
my %buckets;
|
||||
|
||||
# put each word in the appropriate hash bucket
|
||||
foreach my $parameter ( keys %{$query} ) {
|
||||
|
||||
my $word_data = $query->{$parameter};
|
||||
|
||||
# the following is slower, however, done that way to be syntatically legible
|
||||
if ( $word_data->{'phrase'} and ( defined $word_data->{'mode'} and ($word_data->{'mode'} =~ /(must|cannot)/ ))) {
|
||||
$buckets{"phrases_$1"}->{$parameter} = $word_data;
|
||||
}
|
||||
elsif ( $word_data->{'phrase'} ) {
|
||||
$buckets{'phrases'}->{$parameter} = $word_data;
|
||||
}
|
||||
elsif ( $word_data->{'keyword'} and ( defined $word_data->{'mode'} and ($word_data->{'mode'}=~ /(must|cannot|greater|less)/) )) {
|
||||
$buckets{"keywords_$1"}->{$parameter} = $word_data;
|
||||
}
|
||||
else {
|
||||
$buckets{'keywords'}->{$parameter} = $word_data;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return \%buckets;
|
||||
}
|
||||
|
||||
sub alternate_driver_query {
|
||||
#--------------------------------------------------------------------------------
|
||||
my ( $self, $drivername, $input ) = @_;
|
||||
|
||||
$drivername = uc $drivername;
|
||||
require GT::SQL::Search;
|
||||
my $driver = GT::SQL::Search->load_search({ %$input, table => $self->{table}, driver => $drivername });
|
||||
my $sth = $driver->query( $input );
|
||||
foreach ( qw( rows rejected_keywords ) ) { $self->{$_} = $driver->{$_}; }
|
||||
return $sth;
|
||||
|
||||
}
|
||||
|
||||
sub clean_sb {
|
||||
# -------------------------------------------------------------------------------
|
||||
# Convert the sort by, sort order into an sql string.
|
||||
#
|
||||
my ($class, $sb, $so) = @_;
|
||||
my $output = '';
|
||||
|
||||
return $output unless ($sb);
|
||||
|
||||
# Remove score attribute, used only for internal indexes.
|
||||
$sb =~ s/^\s*score\b//;
|
||||
$sb =~ s/,?\s*\bscore\b//;
|
||||
|
||||
if ($sb and not ref $sb) {
|
||||
if ($sb =~ /^[\w\s,]+$/) {
|
||||
if ($sb =~ /\s(?:asc|desc)/i) {
|
||||
$output = 'ORDER BY ' . $sb;
|
||||
}
|
||||
else {
|
||||
$output = 'ORDER BY ' . $sb . ' ' . $so;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$class->error('BADSB', 'WARN', $sb);
|
||||
}
|
||||
}
|
||||
elsif (ref $sb eq 'ARRAY') {
|
||||
foreach ( @$sb ) {
|
||||
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
|
||||
}
|
||||
$output = 'ORDER BY ' . join(',', @$sb);
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
sub debug_dumper {
|
||||
#--------------------------------------------------------------------------------
|
||||
# calls debug but also dumps all the messages
|
||||
my $self = shift;
|
||||
my $message = shift;
|
||||
my $level = ref $_[0] ? 1 : defined $_[0] ? shift : 0;
|
||||
|
||||
if ( $self->{_debug} >= $level ) {
|
||||
require GT::Dumper;
|
||||
$self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ )) if ($self->{_debug});
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user