discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/NONINDEXED/Search.pm
2024-06-17 21:49:12 +10:00

258 lines
9.0 KiB
Perl

# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::SQL::Search::NONINDEXED::Search
# Author : Alex Krohn
# CVS Info : 087,071,086,086,085
# $Id: Search.pm,v 1.30 2006/08/09 06:58:39 brewt Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Nonindex search system
#
package GT::SQL::Search::NONINDEXED::Search;
# ==================================================================
use strict;
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG/;
use GT::SQL::Search::Base::Search;
use GT::SQL::Condition;
@ISA = qw( GT::SQL::Search::Base::Search );
$DEBUG = 0;
$VERSION = sprintf "%d.%03d", q$Revision: 1.30 $ =~ /(\d+)\.(\d+)/;
$ATTRIBS = {
# parse based on latin characters
latin_query_parse => 0
};
sub load {
shift;
return GT::SQL::Search::NONINDEXED::Search->new(@_)
}
sub query {
#--------------------------------------------------------------------------------
# Returns a sth based on a query
#
# Options:
# - paging
# mh : max hits
# nh : number hit (or page of hits)
#
# - searching
# ww : whole word
# ma : 1 => OR match, 0 => AND match, undefined => QUERY
# substring : search for substrings of words
# bool : 'and' => and search, 'or' => or search, '' => regular query
# query : the string of things to ask for
#
# - filtering
# field_name : value # Find all rows with field_name = value
# field_name : ">value" # Find all rows with field_name > value.
# field_name : "<value" # Find all rows with field_name < value.
# field_name-gt : value # Find all rows with field_name > value.
# field_name-lt : value # Find all rows with field_name < value.
#
# Parameters:
# ( $CGI ) : a single cgi object
# ( $HASH ) : a hash of the parameters
#
my $self = shift;
# find out what sort of a parameter we're dealing with
my $input = $self->common_param(@_);
# add additional parameters if required
foreach my $parameter ( keys %{$ATTRIBS} ) {
if ( not exists $input->{$parameter} ) {
$input->{$parameter} = $self->{$parameter};
}
}
# parse query
$self->debug( "Search Query: $$input{query}", 1 ) if ($self->{_debug});
my ( $query, $rejected ) = $self->_parse_query_string( $input->{'query'} );
$self->{rejected_keywords} = $rejected;
# setup the additional input parameters
$query = $self->_preset_options( $query, $input );
$self->debug( "Set the pre-options: ", $query ) if ($self->{_debug});
# now sort into distinct buckets
my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query );
$self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
require GT::SQL::Condition;
my $query_condition = new GT::SQL::Condition;
# now handle the separate possibilities
# the union
my $union_cond = $self->_get_condition( $buckets->{keywords}, $buckets->{phrases} );
$query_condition->add(GT::SQL::Condition->new(@$union_cond, 'OR')) if $union_cond;
# the intersect
my $intersect_cond = $self->_get_condition( $buckets->{keywords_must}, $buckets->{phrases_must} );
$query_condition->add(GT::SQL::Condition->new(@$intersect_cond)) if $intersect_cond;
# the disjoin
my $disjoin_cond = $self->_get_condition( $buckets->{keywords_cannot}, $buckets->{phrases_cannot} );
$query_condition->add(GT::SQL::Condition->new(@$disjoin_cond, 'OR')->not) if $disjoin_cond;
# now handle filters
my $cols = $self->{'table'}->cols();
my %filters = map {
(my $column = $_) =~ s/-[lg]t$//;
exists $cols->{$column}
? ($_ => $input->{$_})
: ()
} keys %{$input};
# if there was no query nor filter return nothing.
keys %$query or keys %filters or return $self->sth({});
if (keys %filters) {
$self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
$self->_add_filters( \%filters );
$query_condition = GT::SQL::Condition->new( keys %$query ? $query_condition : (), $self->{filter} );
}
elsif ($self->{filter} and keys %{$self->{filter}} ) {
$self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
$query_condition = GT::SQL::Condition->new( keys %$query ? $query_condition : (), $self->{filter} );
}
else {
$self->debug( "No filters being used.") if ($self->{_debug});
}
# now this query should probably clear the filters once it's been used, so i'll do that here
$self->{filter} = undef;
my $tbl = $self->{table};
my ($pk) = $tbl->pk;
# now run through a callback function if needed.
if ($self->{callback}) {
# Warning: this slows things a heck of a lot.
unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
return $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
}
my $sth = $tbl->select( [ $pk ], $query_condition );
my $results = {};
while (my $result = $sth->fetchrow) {
$results->{$result} = undef;
}
$self->debug_dumper("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
$results = $self->{callback}->($self, $results);
$self->debug_dumper("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
$self->{rows} = scalar($results ? keys %{$results} : ());
return $self->sth( $results );
}
# and now create a search sth object to handle all this
$input->{nh} = (defined $input->{nh} and $input->{nh} =~ /^(\d+)$/) ? $1 : 1;
$input->{mh} = (defined $input->{mh} and $input->{mh} =~ /^(\d+)$/) ? $1 : 25;
$input->{so} = (defined $input->{so} and $input->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : '';
# check that sb is not dangerous
my $sb = $self->clean_sb($input->{sb}, $input->{so});
my $offset = ( $input->{nh} - 1 ) * $input->{mh};
$tbl->select_options($sb) if ($sb);
$tbl->select_options("LIMIT $offset, $input->{mh}");
my $sth = $tbl->select( $query_condition ) or return;
# so how many hits did we get?
$self->{rows} = $sth->rows();
if (($input->{nh} > 1) or ($self->{rows} == $input->{mh})) {
$self->{rows} = $tbl->count($query_condition);
}
return $sth;
}
sub _get_condition {
#-------------------------------------------------------------------------------
my ( $self, $keywords, $phrases ) = @_;
my @list = ( keys %$keywords, keys %$phrases );
my $tbl = $self->{table} or return $self->error( 'NODRIVER', 'FATAL' );
my @cond = ();
my %tmp = $tbl->weight();
my @weights = keys %tmp or return;
foreach my $element ( @list ) {
my @where = ();
foreach my $cols ( @weights ) {
push @where, [$cols, 'LIKE', "%$element%"]; # Condition does quoting by default.
}
push @cond, GT::SQL::Condition->new(@where, 'OR');
}
@cond or return;
return \@cond;
}
sub _parse_query_string {
#------------------------------------------------------------
# Parses a query string '+foo -"bar this" alpha' into a hash of
# words and modes.
#
my ($self, $text) = @_;
my %modes = (
'+' => 'must',
'-' => 'cannot',
'<' => 'greater',
'>' => 'less'
);
# Latin will break up on actual words and punctuation.
if ($self->{latin_query_parse}) {
return $self->SUPER::_parse_query_string( $text );
}
else {
my $words = {};
my @terms;
my $i = 0;
foreach my $term (split /"/, $text) {
push @terms, ($i++ % 2 ? $term : split ' ', $term);
}
for (my $i = 0; $i < @terms; $i++) {
my $word = $terms[$i];
$word =~ s/^\s*|\s*$//g;
next if ($word eq '');
if ($i < $#terms) {
($word eq '-') and ($word = '-' . $terms[++$i]);
($word eq '+') and ($word = '+' . $terms[++$i]);
}
$word =~ s/^([<>+-])//;
my $mode = ($1 and $modes{$1} or 'can');
my $substring = ($word =~ s/\*$//) || 0;
if ($word =~ /\s/) {
$words->{$word} = {
mode => $mode,
phrase => 1,
substring => $substring,
keyword => 0,
};
}
elsif ($word) {
$words->{$word} = {
mode => $mode,
phrase => 0,
substring => $substring,
keyword => 1,
};
}
}
return $words;
}
}
1;