# ================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::Search::MYSQL::VER4 # Author : Aki Mimoto # CVS Info : 087,071,086,086,085 # $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # ================================================================== # # Description: # Class used to search indexed tables. # package GT::SQL::Search::MYSQL::VER4; # ------------------------------------------------------------------------------ use strict; use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /; use GT::SQL::Search::Base::Search; @ISA = qw( GT::SQL::Search::Base::Search ); # ------------------------------------------------------------------------------ # Preamble information related to the object $DEBUG = 0; $VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/; $STOPWORDS = { map { $_ => 1 } qw/ a's able about above according accordingly across actually after afterwards again against ain't all allow allows almost alone along already also although always am among amongst an and another any anybody anyhow anyone anything anyway anyways anywhere apart appear appreciate appropriate are aren't around as aside ask asking associated at available away awfully be became because become becomes becoming been before beforehand behind being believe below beside besides best better between beyond both brief but by c'mon c's came can can't cannot cant cause causes certain certainly changes clearly co com come comes concerning consequently consider considering contain containing contains corresponding could couldn't course currently definitely described despite did didn't different do does doesn't doing don't done down downwards during each edu eg eight either else elsewhere enough entirely especially et etc even ever every everybody everyone everything everywhere ex exactly example except far few fifth first five followed following follows for former formerly forth four from further furthermore get gets getting given gives go goes going gone got gotten greetings had hadn't happens hardly has hasn't have haven't having he he's hello help hence her here here's hereafter hereby herein hereupon hers herself hi him himself his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored immediate in inasmuch inc indeed indicate indicated indicates inner insofar instead into inward is isn't it it'd it'll it's its itself just keep keeps kept know knows known last lately later latter latterly least less lest let let's like liked likely little look looking looks ltd mainly many may maybe me mean meanwhile merely might more moreover most mostly much must my myself name namely nd near nearly necessary need needs neither never nevertheless new next nine no nobody non none noone nor normally not nothing novel now nowhere obviously of off often oh ok okay old on once one ones only onto or other others otherwise ought our ours ourselves out outside over overall own particular particularly per perhaps placed please plus possible presumably probably provides que quite qv rather rd re really reasonably regarding regardless regards relatively respectively right said same saw say saying says second secondly see seeing seem seemed seeming seems seen self selves sensible sent serious seriously seven several shall she should shouldn't since six so some somebody somehow someone something sometime sometimes somewhat somewhere soon sorry specified specify specifying still sub such sup sure t's take taken tell tends th than thank thanks thanx that that's thats the their theirs them themselves then thence there there's thereafter thereby therefore therein theres thereupon these they they'd they'll they're they've think third this thorough thoroughly those though three through throughout thru thus to together too took toward towards tried tries truly try trying twice two un under unfortunately unless unlikely until unto up upon us use used useful uses using usually value various very via viz vs want wants was wasn't way we we'd we'll we're we've welcome well went were weren't what what's whatever when whence whenever where where's whereafter whereas whereby wherein whereupon wherever whether which while whither who who's whoever whole whom whose why will willing wish with within without won't wonder would would wouldn't yes yet you you'd you'll you're you've your yours yourself yourselves zero / }; $ATTRIBS = { min_word_size => 4, stopwords => $STOPWORDS, }; sub query { # -------------------------------------------------- # Returns a sth based on a query # # Options: # - paging # mh : max hits # nh : number hit (or page of hits) # # - searching # ww : whole word # ma : 1 => OR match, 0 => AND match, undefined => QUERY # substring : search for substrings of words # bool : 'and' => and search, 'or' => or search, '' => regular query # query : the string of things to ask for # # - filtering # field_name : value # Find all rows with field_name = value # field_name : ">value" # Find all rows with field_name > value. # field_name : " value. # field_name-lt : value # Find all rows with field_name < value. # # Parameters: # ( $CGI ) : a single cgi object # ( $HASH ) : a hash of the parameters # my $self = shift; # create an easily accessible argument hash my $args = $self->common_param(@_); # see if we can setup the filtering constraints my $filter = { %$args }; my $query = delete $args->{query} || $self->{query} || ''; my $ftr_cond; # parse query $self->debug( "Search Query: $query" ) if ($self->{_debug}); my ( $query_struct, $rejected ) = $self->_parse_query_string( $query ); $self->{rejected_keywords} = $rejected; # setup the additional input parameters $query_struct = $self->_preset_options( $query_struct, $args ); # now sort into distinct buckets my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct ); $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug}); # with the buckets, it's now possible to create a query string # that can be passed directly into the FULLTEXT search. my $query_string = ''; foreach my $search_type ( keys %$buckets ) { my $bucket = $buckets->{$search_type}; foreach my $token ( keys %$bucket ) { next unless $token; my $properties = $bucket->{$token} or next; my $e = ' '; # handle boolean operations $properties->{mode} ||= ''; if ( $properties->{mode} eq 'must' ) { $e .= '+'; } elsif ( $properties->{mode} eq 'cannot' ) { $e .= '-'; } # deal with phrase vs keyword if ( $properties->{phrase} ) { $e .= '"' . quotemeta( $token ) . '"'; } else { $e .= quotemeta $token; # substring match $e .= '*' if $properties->{substring}; } $query_string .= $e; } } # start building the GT::SQL::COndition object that will allow us to # to retreive the data require GT::SQL::Condition; my $tbl = $self->{table}; my $constraints = GT::SQL::Condition->new; # create the GT::SQL::Condition object that will become the filtering # constraints my $filt = $self->{filter}; if ( $filt and ref $filt eq 'HASH' ) { foreach my $fkey ( keys %$filt ) { next if exists $args->{$fkey}; $args->{$fkey} = $filt->{$fkey}; } } if ( my $filter_cond = $tbl->build_query_cond( $args ) ) { $constraints->add( $filter_cond ); } # if the cached filter object is a Condition object, append # it to the filter set if ( $filt and UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) { $constraints->add( $filt ); } # create our fulltext query condition my %weights = $tbl->_weight_cols(); my $cols = join(",", keys %weights); if ( $query_string ) { $constraints->add( GT::SQL::Condition->new( "MATCH( $cols )", "AGAINST", \"('$query_string' IN BOOLEAN MODE)" ) ); } # calculate the cursor constraints foreach my $k (qw( nh mh so sb )) { next if defined $args->{$k}; $args->{$k} = $self->{$k} || ''; } $args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1; $args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25; $args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ ) ? $1 : 'score'; # if the sorting method is "score" the order is forced to "descend" (as there # is almost no reason to order by worst matches) # if the storing key is not "score", the default order will be "ascend" $args->{so} = $args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing ( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' ); # check that sb is not dangerous my $sb = $self->clean_sb($args->{sb}, $args->{so}); $self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug}); # Setup a limit only if there is no callback. The callback argument requires a full results list unless ( $self->{callback} ) { my $offset = ( $args->{nh} - 1 ) * $args->{mh}; $tbl->select_options($sb) if ($sb); $tbl->select_options("LIMIT $offset, $args->{mh}"); } my $sth; # if the weights are all the same value, the query can be optimized # to use just one MATCH AGAINST argument. However, if the weights # are different, each element must be sectioned and queried separately # with the weight value multipler # check to see if all the weight values are the same. my $base_weight; my $weights_same = 1; foreach ( values %weights ) { $base_weight ||= $_ or next; # init and skip 0s next if $base_weight == $_; $weights_same = 0; last; } # multiplex the action my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*'; unless ( $query_string ) { $sth = $tbl->select( [ $result_cols ], $constraints ) or return; } elsif ( $weights_same ) { $sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints ) or return; } else { # group the multiplier counts my %column_multiplier; foreach ( keys %weights ) { push @{$column_multiplier{$weights{$_}}}, $_; } my @search_parameters; foreach my $val ( keys %column_multiplier ) { next unless $val; my $cols_ar = $column_multiplier{ $val } or next; my $search_cols = join ",", @$cols_ar; if ( $val > 1 ) { push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )"; } else { push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )"; } } my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score"; $sth = $tbl->select( [ $result_cols, $search_sql ], $constraints ) or return; } # If we have a callback, we fetch the primary key => score and pass that hash into # the filter. if ($self->{callback}) { unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') { return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!"); } my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref}; $self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug}); my $filtered = $self->{callback}->($self, \%results) || {}; $self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug}); $self->{rows} = scalar keys %$filtered; return $self->sth($filtered); } # count the number of hits. create a query for this purpose only if we are required to. $self->{rows} = $sth->rows(); if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) { $self->{rows} = $tbl->count($constraints); } return $sth; } sub clean_sb { # ------------------------------------------------------------------------------- # Convert the sort by, sort order into an sql string. # my ($class, $sb, $so) = @_; my $output = ''; return $output unless ($sb); if ($sb and not ref $sb) { if ($sb =~ /^[\w\s,]+$/) { if ($sb =~ /\s(?:asc|desc)/i) { $output = 'ORDER BY ' . $sb; } else { $output = 'ORDER BY ' . $sb . ' ' . $so; } } else { $class->error('BADSB', 'WARN', $sb); } } elsif (ref $sb eq 'ARRAY') { foreach ( @$sb ) { /^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next; } $output = 'ORDER BY ' . join(',', @$sb); } return $output; } 1;