First pass at adding key files

This commit is contained in:
dsainty
2024-06-17 21:49:12 +10:00
commit aa25e9347f
1274 changed files with 392549 additions and 0 deletions

View File

@ -0,0 +1,82 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::Base::Common
# Author : Aki Mimoto
# CVS Info :
# $Id: Common.pm,v 1.8 2004/10/13 21:45:02 aki Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Base classes upon which all search drivers are based
#
package GT::SQL::Search::Base::Common;
use strict;
use Exporter;
use vars qw/ @ISA @EXPORT $STOPWORDS /;
@ISA = qw( Exporter );
@EXPORT = qw( &_tokenize &_check_word $STOPWORDS );
$STOPWORDS = { map { $_ => 1 } qw/
of about or all several also she among since an some and such are than
as that at the be them because there been these between they both this
but those by to do toward during towards each upon either for from was
had were has what have when he where her which his while however with if
within in would into you your is it its many more most must on re it
test not above add am pm jan january feb february mar march apr april
may jun june jul july aug august sep sept september oct october nov
november dec december find &amp &gt &lt we http com www inc other
including
/ };
sub _tokenize {
#--------------------------------------------------------------------------------
# takes a strings and chops it up into little bits
my $self = shift;
my $text = shift;
my ( @words, $i, %rejected, $word, $code );
# split on any non-word (includes accents) characters
@words = split /[^\w\x80-\xFF\-]+/, lc $text;
$self->debug_dumper( "Words: ", \@words ) if ($self->{_debug});
# drop all words that are too small, etc.
$i = 0;
while ( $i <= $#words ) {
$word = $words[ $i ];
if ((exists $self->{stopwords}{$word} and ($code = 'STOPWORD')) or
(length($word) < $self->{min_word_size} and $code = 'TOOSMALL' ) or
(length($word) > $self->{max_word_size} and $code = 'TOOBIG')) {
splice( @words, $i, 1 );
$rejected{$word} = $self->{'rejections'}->{$code};
}
else {
$i++; # Words ok.
}
}
$self->debug_dumper( "Accepted Words: ", \@words ) if ($self->{_debug});
$self->debug_dumper( "Rejected Words: ", \%rejected ) if ($self->{_debug});
return ( \@words, \%rejected );
}
sub _check_word {
#--------------------------------------------------------------------------------
# Returns an error code if it is an invalid word, otherwise returns nothing.
#
my $self = shift;
my $word = shift;
my $code;
if ((exists $self->{stopwords}{$word} and ($code = 'STOPWORD')) or
(length($word) < $self->{min_word_size} and $code = 'TOOSMALL' ) or
(length($word) > $self->{max_word_size} and $code = 'TOOBIG')) {
return $code;
}
return;
}
1;

View File

@ -0,0 +1,78 @@
# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::Base::Indexer
# Author: Aki Mimoto
# CVS Info :
# $Id: Indexer.pm,v 1.4 2004/01/13 01:35:19 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
# Description:
#
#
package GT::SQL::Search::Base::Indexer;
use strict;
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
use GT::Base;
use GT::SQL::Search::Base::Common;
#--------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/;
@ISA = qw/GT::Base GT::SQL::Search::Base::Common/;
$ATTRIBS = {
driver => undef,
stopwords => $STOPWORDS,
rejections => {
STOPWORD => "is a stopword",
TOOSMALL => "is too small a word",
TOOBIG => "is too big a word"
},
table => '',
init => 0,
debug => 0,
min_word_size => 3,
max_word_size => 50,
};
sub drop_search_driver { 1 }
sub add_search_driver { 1 }
# found in GT::SQL::Creator
sub pre_create_table { 1 }
sub post_create_table { 1 }
# GT::SQL::Editor
sub pre_add_column { 1 }
sub post_add_column { 1 }
sub pre_delete_column { 1 }
sub post_delete_column { 1 }
sub pre_drop_table { 1 }
sub post_drop_table { 1 }
# GT::SQL::Table
sub pre_add_record { 1 }
sub post_add_record { 1 }
sub pre_update_record { 1 }
sub post_update_record { 1 }
sub pre_delete_record { 1 }
sub post_delete_record { 1 }
sub pre_delete_all_records { 1 }
sub post_delete_all_records { 1 }
sub driver_ok { 1 }
sub reindex_all { 1 }
1;

View File

@ -0,0 +1,287 @@
# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::STH
# Author: Aki Mimoto
# CVS Info :
# $Id: STH.pm,v 1.25 2004/08/28 03:53:46 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
package GT::SQL::Search::STH;
#--------------------------------------------------------------------------------
use strict;
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE /;
use GT::Base;
@ISA = ('GT::Base');
$ATTRIBS = {
'_debug' => 0,
'sth' => undef,
'results' => {},
'db' => undef,
'table' => undef,
'index' => 0,
'order' => [],
'sb' => 'score',
'so' => '',
'score_col' => 'SCORE',
'score_sort'=> 0,
'nh' => 0,
'mh' => 0
};
$ERROR_MESSAGE = 'GT::SQL';
$ERRORS = {
BADSB => 'Invalid character found in so: "%s"',
};
sub init {
#--------------------------------------------------------------------------------
my $self = shift;
# setup the options
$self->set(@_);
# correct a few of the values
--$self->{nh} if $self->{nh};
my $sth;
my $results = $self->{results};
$self->{rows} = scalar( $results ? keys %{$results} : 0 );
# if we have asked to have sorting by another column (non score), create the part of the query that handles taht
$self->debug( "Setting up sorting for GT::SQL::Search::STH" ) if ($self->{_debug});
my $sb;
# clean up the sort by columns.
unless ($self->{'score_sort'}) {
$sb = GT::SQL::Search::Base::Search->clean_sb($self->{sb}, $self->{so});
}
# setup the max hits and the offsets
$self->{index} = $self->{nh} * $self->{mh} || 0;
$self->{max_index} = $self->{index} + $self->{mh} - 1; # index + mh is the first record which should not be returned.
if ( $self->{max_index} > $self->{rows} ) {
$self->{max_index} = $self->{rows};
$self->{rows} = $self->{rows} - $self->{index};
$self->{rows} < 0 ? $self->{rows} = 0 : 0;
}
else {
$self->{rows} = $self->{mh};
}
# if we are sorting by another column, handle that
if ( $sb and (keys %{$self->{results}})) {
my ( $table, $pk ) = $self->_table_info();
my ( $query, $where, $st, $limit );
$where = qq!$pk in(! . join( ",", keys %{$self->{results}}) . ')';
$self->{mh} and $limit = qq!LIMIT $self->{index}, $self->{rows}!;
$query = qq!
SELECT $pk
FROM $table
WHERE $where
$sb
$limit
!;
$self->debug( "Row fetch query: $query" ) if ($self->{_debug});
$sth = $self->{table}->{driver}->prepare( $query );
$sth->execute();
# fix the counts
$self->{index} = 0;
$self->{max_hits} = $self->{rows};
# now return them
my $order = $sth->fetchall_arrayref();
$sth->finish();
$self->{'order'} = [ map { $_->[0] } @{$order} ];
}
else {
$self->{'order'} = [ sort {
( $results->{$b} || return 0 ) <=> ( $results->{$a} || 0 )
} keys %{$results} ];
$self->debug_dumper( "Results will be presented in the following order: ", $self->{'order'} ) if ($self->{_debug});
}
}
sub cache_results {
#--------------------------------------------------------------------------------
my $self = shift;
my $results = $self->{'results'};
my ($sth, @records, $i, %horder, @order, $in_list);
my $table = $self->{table};
my $tname = $table->name();
my ($pk) = $self->{table}->pk;
use GT::SQL::Condition;
# we know what we're doing here so shut off warns (complains about uninit'd values in range
# if thee aren't enough elements in the order array)
my $w = $^W; $^W = 0;
@order = grep $_, @{$self->{'order'} || []}[$self->{index}..$self->{max_index}] or return [];
$^W = $w;
$i = 0; %horder = ( map { ( $_ => $i++) } @order );
$in_list = join ( ",", @order );
my $query = qq|
SELECT *
FROM
$tname
WHERE
$pk IN($in_list)
|;
# the following is left commented out as...
# if $tbl->select is used $table->hits() will not
# return an accurate count of the number of all the hits. instead, will return
# a value up to mh. $tbl->hits() is important because the value is used
# in toolbar calculations
#
# $sth = $table->select( GT::SQL::Condition->new( $pk, 'IN', \"($in_list)" ) );
$sth = $table->do_query( $query );
while ( my $href = $sth->fetchrow_hashref() ) {
$records[$horder{$href->{$pk}}] = \%$href
}
return \@records;
}
sub fetchrow_array {
#--------------------------------------------------------------------------------
return @{ $_[0]->fetchrow_arrayref() || [] };
}
sub fetchrow_arrayref {
#--------------------------------------------------------------------------------
my $self = shift;
my $records = $self->{cache} ||= $self->cache_results;
my $href = shift @$records or return;
return $self->_hash_to_array($href);
}
sub fetchrow_hashref {
#--------------------------------------------------------------------------------
my $self = shift;
my $results = $self->{'results'};
my $records = $self->{cache} ||= $self->cache_results;
my $table = $self->{table};
my ($pk) = $self->{table}->pk;
my $href = shift @$records or return;
$href->{$self->{'score_col'}} = ( $self->{score} = $results->{$href->{$pk}} );
return $href;
}
sub fetchall_hashref {
#--------------------------------------------------------------------------------
my $self = shift;
my @results;
while (my $res = $self->fetchrow_hashref) {
push @results, $res;
}
return \@results;
}
sub fetchall_list {
#--------------------------------------------------------------------------------
return { map { @$_ } @{shift->fetchall_arrayref} }
}
sub fetchall_arrayref {
#--------------------------------------------------------------------------------
my $self = shift;
$self->{order} or return [];
my $results = $self->{results};
my ($pk) = $self->{table}->pk;
my $scol = $self->{score_col};
if (!$self->{allref_cache}) {
$self->{allref_cache} ||= $self->cache_results;
for my $i ( 0 .. $#{$self->{allref_cache}} ) {
my $element = $self->{allref_cache}->[$i];
if ( $_[0] eq 'HASH' ) {
$element->{$scol} = $results->{$element->{$pk}};
}
else {
$element->{$scol} = $self->_hash_to_array( $element->{$scol} );
}
};
}
my $records = $self->{allref_cache};
return $records;
}
sub score {
#--------------------------------------------------------------------------------
my $self = shift;
return $self->{score};
}
sub _hash_to_array {
#--------------------------------------------------------------------------------
my $self = shift;
my $href = shift or return;
my $results = $self->{'results'};
my $table = $self->{table};
my $cols = $table->cols();
my $ordercols = ( $self->{ordercols} ||= [ sort { $cols->{$a}->{pos} <=> $cols->{$b}->{pos} } keys %$cols ] );
my ($pk) = $self->{table}->pk;
my $aref = [ map { $href->{$_} } @$ordercols, ( $self->{score} = $results->{$href->{$pk}} ) ];
return $aref;
}
sub rows {
#--------------------------------------------------------------------------------
my $self = shift;
return $self->{rows};
}
sub _table_info {
#--------------------------------------------------------------------------------
my $self = shift;
my $table = $self->{table}->name() or return $self->error('NOSCHEMA', 'FATAL');
my ($pk) = $self->{table}->pk;
return ( $table, $pk );
}
sub DESTROY {
#--------------------------------------------------------------------------------
my $self = shift;
$self->{'sth'} and $self->{'sth'}->finish();
}
sub debug_dumper {
#--------------------------------------------------------------------------------
# calls debug but also dumps all the messages
my $self = shift;
my $message = shift;
my $level = ref $_[0] ? 1 : shift;
if ( $self->{_debug} >= $level ) {
require GT::Dumper;
$self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ )) if ($self->{_debug});
}
}
1;

View File

@ -0,0 +1,572 @@
# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::Base
# Author : Aki Mimoto
# CVS Info :
# $Id: Search.pm,v 1.25 2004/08/28 03:53:46 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Base classes upon which all search drivers are based
#
package GT::SQL::Search::Base::Search;
use strict;
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
use GT::Base;
use GT::SQL::Search::Base::Common;
@ISA = qw( GT::Base GT::SQL::Search::Base::Common);
#--------------------------------------------------------------------------------
# Preamble information related to the object
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.25 $ =~ /(\d+)\.(\d+)/;
@ISA = qw/ GT::Base /;
$ATTRIBS = {
'stopwords' => $STOPWORDS,
'mh' => 25,
'nh' => 1,
'ww' => undef,
'ma' => undef,
'bool' => undef,
'substring' => 0,
'query' => '',
'sb' => 'score',
'so' => '',
'score_col' => 'SCORE',
'score_sort'=> 0,
'debug' => 0,
'_debug' => 0,
# query related
'db' => undef,
'table' => undef,
'filter' => undef,
'callback' => undef,
# strict matching of indexed words, accents on words do count
'sm' => 0,
'min_word_size' => 3,
'max_word_size' => 50,
};
sub init {
#--------------------------------------------------------------------------------
# Initialises the Search object
#
my $self = shift;
my $input = $self->common_param(@_);
$self->set($input);
# now handle filters...,
my $tbl = $self->{table};
my $cols = $tbl->cols();
my %filters = map {
(my $tmp = $_) =~ s/-[lg]t$//;
exists $cols->{$tmp} ? ($_ => $input->{$_}) : ()
} keys %{$input};
if ( keys %filters ) {
$self->debug_dumper( "INIT: Creating Filters: ", \%filters ) if ( $self->{_debug} );
$self->filter(\%filters);
}
$self->{table}->connect;
}
sub query {
#--------------------------------------------------------------------------------
# Returns a sth based on a query
#
# Options:
# - paging
# mh : max hits
# nh : number hit (or page of hits)
#
# - searching
# ww : whole word
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
# substring : search for substrings of words
# bool : 'and' => and search, 'or' => or search, '' => regular query
# query : the string of things to ask for
#
# - filtering
# field_name : value # Find all rows with field_name = value
# field_name : ">value" # Find all rows with field_name > value.
# field_name : "<value" # Find all rows with field_name < value.
# field_name-gt : value # Find all rows with field_name > value.
# field_name-lt : value # Find all rows with field_name < value.
#
# Parameters:
# ( $CGI ) : a single cgi object
# ( $HASH ) : a hash of the parameters
#
my $self = shift;
# find out what sort of a parameter we're dealing with
my $input = $self->common_param(@_);
# add additional parameters if required
foreach my $parameter ( keys %{$ATTRIBS} ) {
if ( not exists $input->{$parameter} ) {
$input->{$parameter} = $self->{$parameter};
}
}
# parse query...,
$self->debug( "Search Query: $$input{query}", 1 ) if ($self->{_debug});
my ( $query, $rejected ) = $self->_parse_query_string( $input->{'query'} );
$self->{'rejected_keywords'} = $rejected;
# setup the additional input parameters
$query = $self->_preset_options( $query, $input );
$self->debug_dumper( "Set the pre-options: ", $query ) if ($self->{_debug});
# now sort into distinct buckets
my $buckets = &_create_buckets( $query );
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
return $self->_query($input, $buckets);
}
sub _query {
#--------------------------------------------------------------------------------
my ( $self, $input, $buckets ) = @_;
# now handle the separate possibilities
my $results = {};
# query can have phrases
$results = $self->_phrase_query( $buckets->{phrases}, $results );
$self->debug_dumper("Did phrase union query. Current result set and scores: ", $results ) if ($self->{_debug});
# query have keywords
$results = $self->_union_query( $buckets->{keywords}, $results );
$self->debug_dumper("Did keyword union query. Current result set and scores: ", $results ) if ($self->{_debug});
# query must have phrases
$results = $self->_phrase_intersect_query( $buckets->{phrases_must}, $results );
$self->debug_dumper("Did phrase intersect query. Current result set and scores: ", $results ) if ($self->{_debug});
# query must have keywords
$results = $self->_intersect_query( $buckets->{keywords_must}, $results );
$self->debug_dumper("Did keyword intersect query. Current result set and scores: ", $results ) if ($self->{_debug});
# query cannot have keywords
$results = $self->_disjoin_query( $buckets->{keywords_cannot}, $results );
$self->debug_dumper("Did keyword disjoin query. Current result set and scores: ", $results ) if ($self->{_debug});
# query cannot have phrases
$results = $self->_phrase_disjoin_query( $buckets->{phrases_cannot}, $results);
$self->debug_dumper("Did phrase disjoin query. Current result set and scores: ", $results ) if ($self->{_debug});
# now handle filters
my $cols = $self->{'table'}->cols();
my %filters = map {
(my $tmp = $_) =~ s/-[lg]t$//;
$cols->{$tmp} ? ($_ => $input->{$_}) : ()
} keys %{$input};
if (keys %filters) {
$self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
$results = $self->filter(\%filters, $results);
}
elsif ($self->{filter}) {
$self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
$results = $self->_filter_query( $self->{filter}, $results );
}
else {
$self->debug( "No filters being used.") if ($self->{_debug});
}
# now this query should probably clear the filters once it's been used, so i'll dothat here
$self->{filter} = undef;
# now run through a callback function if needed.
if ($self->{callback}) {
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
return $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
}
$self->debug_dumper ("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
$results = $self->{callback}->($self, $results);
$self->debug_dumper ("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
}
# so how many hits did we get?
$self->{table}->{last_hits} = ( $self->{rows} = scalar($results ? keys %{$results} : ()) );
# and now create a search sth object to handle all this
return $self->sth( $results );
}
sub sth {
#--------------------------------------------------------------------------------
my $self = shift;
my $results = shift;
require GT::SQL::Search::Base::STH;
my $sth = GT::SQL::Search::STH->new(
'results' => $results,
'db' => $self->{table}->{driver},
# pass the following attributes down to the STH handler
map({ ($_ => $self->{$_}) } qw/ table sb so score_col score_sort nh mh rows _debug /)
);
return $sth;
}
sub rows {
#--------------------------------------------------------------------------------
# after a query is run, returns the number of rows
my $self = shift;
return $self->{rows} || 0;
}
sub _add_filters {
#--------------------------------------------------------------------------------
# creates the filter object
my $self = shift;
my $filter;
# find out how we're calling the parameters
if ( ref $_[0] eq 'GT::SQL::Condition' ) {
$filter = shift;
}
elsif ( ref $_[0] eq 'HASH' ) {
# setup the query condition using the build_query condition method
# build the condition object
my %opts = %{ shift() || {} };
delete $opts{query};
$filter = $self->{table}->build_query_cond( \%opts, $self->{table}{schema}{cols} );
}
else {
return $self->error ('BADARGS', 'FATAL', "Invalid argument: $_[0] passed to _add_filter");
}
# Use ref, as someone can pass in filter => 1 and mess things up.
ref $self->{filter} ? $self->{filter}->add ($filter) : ($self->{filter} = $filter);
$self->debug_dumper( "Filters: ", $self->{filter} ) if ($self->{_debug});
return $self->{filter};
}
sub _preset_options {
#--------------------------------------------------------------------------------
# sets up word parameters
my $self = shift;
my $query = shift or return;
my $input = shift or return $query;
# whole word searching
if ( defined $input->{'ww'} or defined $self->{'ww'}) {
if ( defined $input->{'ww'} ? $input->{'ww'} : $self->{'ww'} ) {
for ( keys %{$query} ) { $query->{$_}->{mode} = 'substring'; }
}
}
# substring searching
if ( defined $input->{'substring'} or defined $self->{'substring'}) {
if ( defined $input->{'substring'} ? $input->{'substring'} : $self->{'substring'} ) {
for ( keys %{$query} ) { $query->{$_}->{'substring'} = 1; }
}
}
if ( defined $input->{'ma'} or defined $self->{'ma'} ) {
# each keyword must be included
if ( defined $input->{'ma'} ? $input->{'ma'} : $self->{'ma'} ) {
for ( keys %{$query} ) {
next if $query->{$_}->{mode} eq 'cannot';
$query->{$_}->{mode} = 'must';
}
}
# each word can be included but is not necessary
else {
for ( keys %{$query} ) { $query->{$_}->{mode} = 'can'; }
}
}
# some more and or searches, only if user hasn't put +word -word
if ( lc( $input->{'bool'} || $self->{'bool'} ) eq 'and' ) {
unless ($input->{query} =~ /(?:^|\s)[+-]\w/) {
for ( keys %{$query} ) {
next if $query->{$_}->{mode} eq 'cannot';
$query->{$_}->{mode} = 'must';
}
}
}
elsif ( lc( $input->{'bool'} || $self->{'bool'} ) eq 'or' ) {
unless ($input->{query} =~ /(?:^|\s)[+-]\w/) {
for ( keys %{$query} ) { $query->{$_}->{mode} = 'can'; }
}
}
return $query;
}
sub _phrase_query { $_[1] }
sub _union_query { $_[1] }
sub _phrase_intersect_query { $_[1] }
sub _intersect_query { $_[1] }
sub _disjoin_query { $_[1] }
sub _phrase_disjoin_query { $_[1] }
sub filter {
#--------------------------------------------------------------------------------
# adds a filter
#
my $self = shift;
# add filters..,
my $filters = $self->_add_filters( shift );
my $results = shift;
# see if we need to execute a search, otherwise just return the current filterset
defined $results or return $results;
# start doing the filter stuff
return $self->_filter_query( $filters, $results );
}
sub _parse_query_string {
#------------------------------------------------------------
# from Mastering Regular Expressions altered a fair bit
# takes a space delimited string and breaks it up.
#
my $self = shift;
my $text = shift;
my %words = ();
my %reject = ();
my %mode = (
'+' => 'must',
'-' => 'cannot',
'<' => 'greater',
'>' => 'less'
);
# work on the individual elements
my @new = ();
while ( $text =~ m{
# the first part groups the phrase inside the quotes.
# see explanation of this pattern in MRE
([+-]?"[^\"\\]*(?:\\.[^\"\\]*)*)" ?
| (\+?[\w\x80-\xFF\-\*]+),?
| ' '
}gx ) {
my $match = lc $+;
# strip out buffering spaces
$match =~ s/^\s+//; $match =~ s/\s+$//;
# don't bother trying if there is nothing there
next unless $match;
# find out the searching mode
my ($mode, $substring, $phrase);
if (my $m = $mode{substr($match,0,1)}) {
$match = substr($match,1);
$mode = $m;
}
# do we need to substring match?
if ( substr( $match, -1, 1 ) eq "*" ) {
$match = substr($match,0,length($match)-1);
$substring = 1;
}
# find out if we're dealing with a phrase
if ( substr($match,0,1) eq '"' ) {
$self->debug( "Dealing with a phrase: $match" ) if ($self->{_debug});
$match = substr($match,1);
# however, we want to make sure it's a phrase and not something else
my ( $word_list, $rejected ) = $self->_tokenize( $match );
$self->debug_dumper( "Phrase reduced to: ", $word_list ) if ($self->{_debug});
$self->debug_dumper( "Phrase words rejected: ", $rejected ) if ($self->{_debug});
my $word_count = @$word_list;
if ( $word_count > 1 ) { $phrase = $word_list } # ok, standard phrase
elsif ($word_count == 1) { $match = $word_list->[0] } # just a standard word, don't worry about searching by phrase
}
# make sure we can use this word
if ( not $phrase and my $code = $self->_check_word( $match, $self->{stopwords} ) ) {
$reject{ $match } = $code;
next;
}
# now, see if we should toss this word
$words{$match} = {
mode => $mode,
phrase => $phrase,
substring => $substring,
keyword => not $phrase,
};
}
# words is a hashref of:
# {
# word => {
# paramaters => 'values'
# },
# word1 => {
# ...
# },
# ...
# }
#
return( \%words, \%reject );
}
sub _filter_query {
#--------------------------------------------------------------------------------
# get the results from the filter
#
my $self = shift;
my $filters = shift;
my $results = shift or return {};
keys %{$results} or return $results;
my $table = $self->{table};
my $tname = $table->name();
# setup the where clause
my $where = $filters->sql() or return $results;
my ($pk) = $table->pk;
$where .= qq! AND $pk IN (! . join(',', keys %$results) . ')';
# now do the filter
my $query = qq!
SELECT $pk
FROM
$tname
WHERE
$where
!;
$self->debug( "Filter Query: $query" ) if ($self->{_debug});
my $sth = $self->{table}->{driver}->prepare($query);
$sth->execute();
# get all the results
my $aref = $sth->fetchall_arrayref;
return {
map {
$_->[0] => $results->{$_->[0]}
} @$aref
};
}
sub _create_buckets {
#------------------------------------------------------------
# takes the output from _parse_query_string and creates a
# bucket hash of all the different types of searching
# possible
my $query = shift or return;
my %buckets;
# put each word in the appropriate hash bucket
foreach my $parameter ( keys %{$query} ) {
my $word_data = $query->{$parameter};
# the following is slower, however, done that way to be syntatically legible
if ( $word_data->{'phrase'} and ( defined $word_data->{'mode'} and ($word_data->{'mode'} =~ /(must|cannot)/ ))) {
$buckets{"phrases_$1"}->{$parameter} = $word_data;
}
elsif ( $word_data->{'phrase'} ) {
$buckets{'phrases'}->{$parameter} = $word_data;
}
elsif ( $word_data->{'keyword'} and ( defined $word_data->{'mode'} and ($word_data->{'mode'}=~ /(must|cannot|greater|less)/) )) {
$buckets{"keywords_$1"}->{$parameter} = $word_data;
}
else {
$buckets{'keywords'}->{$parameter} = $word_data;
}
}
return \%buckets;
}
sub alternate_driver_query {
#--------------------------------------------------------------------------------
my ( $self, $drivername, $input ) = @_;
$drivername = uc $drivername;
require GT::SQL::Search;
my $driver = GT::SQL::Search->load_search({ %$input, table => $self->{table}, driver => $drivername });
my $sth = $driver->query( $input );
foreach ( qw( rows rejected_keywords ) ) { $self->{$_} = $driver->{$_}; }
return $sth;
}
sub clean_sb {
# -------------------------------------------------------------------------------
# Convert the sort by, sort order into an sql string.
#
my ($class, $sb, $so) = @_;
my $output = '';
return $output unless ($sb);
# Remove score attribute, used only for internal indexes.
$sb =~ s/^\s*score\b//;
$sb =~ s/,?\s*\bscore\b//;
if ($sb and not ref $sb) {
if ($sb =~ /^[\w\s,]+$/) {
if ($sb =~ /\s(?:asc|desc)/i) {
$output = 'ORDER BY ' . $sb;
}
else {
$output = 'ORDER BY ' . $sb . ' ' . $so;
}
}
else {
$class->error('BADSB', 'WARN', $sb);
}
}
elsif (ref $sb eq 'ARRAY') {
foreach ( @$sb ) {
/^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
}
$output = 'ORDER BY ' . join(',', @$sb);
}
return $output;
}
sub debug_dumper {
#--------------------------------------------------------------------------------
# calls debug but also dumps all the messages
my $self = shift;
my $message = shift;
my $level = ref $_[0] ? 1 : defined $_[0] ? shift : 0;
if ( $self->{_debug} >= $level ) {
require GT::Dumper;
$self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ )) if ($self->{_debug});
}
}
1;