First pass at adding key files
This commit is contained in:
		@@ -0,0 +1,187 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::Indexer
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.17 2004/08/28 03:53:49 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::Indexer;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
use strict;
 | 
			
		||||
use vars qw/@ISA $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
 | 
			
		||||
use GT::SQL::Search::Base::Indexer;
 | 
			
		||||
@ISA = qw/GT::SQL::Search::Base::Indexer/;
 | 
			
		||||
$DEBUG   = 0;
 | 
			
		||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.17 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
 | 
			
		||||
$ERRORS = {
 | 
			
		||||
    NOTFROMWEB      => 'There are far too many records in table %s for create/destroy of this indexing scheme from the web. Please use alternative method.',
 | 
			
		||||
    MYSQLNONSUPPORT => 'Driver MYSQL requires MySQL version 3.23.23 or greater. Currently MySQL version: %s'
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@$GT::SQL::ERRORS{ keys %$ERRORS } = values %$ERRORS;
 | 
			
		||||
 | 
			
		||||
$ERROR_MESSAGE = 'GT::SQL';
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    my $class = shift;
 | 
			
		||||
    return $class->new(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub ok {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my ($class, $tbl) = @_;
 | 
			
		||||
    unless (uc $tbl->{connect}->{driver} eq 'MYSQL') {
 | 
			
		||||
        return $class->error ('MYSQLNONSUPPORT', 'WARN', $tbl->{connect}->{driver});
 | 
			
		||||
    }
 | 
			
		||||
    my $sth = $tbl->do_query(qq!SELECT VERSION()!);
 | 
			
		||||
    my $version = $sth->fetchrow;
 | 
			
		||||
    my ($maj, $min) = split (/\./, $version);
 | 
			
		||||
    unless ($maj > 3 or ($maj == 3 and $min >= 23)) {
 | 
			
		||||
        return $class->error(MYSQLNONSUPPORT => WARN => $version);
 | 
			
		||||
    }
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub drop_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
    $self->too_much() and return;
 | 
			
		||||
 | 
			
		||||
    my $tbl = $self->{table} or return;
 | 
			
		||||
    $tbl->connect();
 | 
			
		||||
 | 
			
		||||
    my %weights = $tbl->weight() or return;
 | 
			
		||||
    my $tblname = $tbl->name();
 | 
			
		||||
 | 
			
		||||
# Group the fulltext columns by value of the weight
 | 
			
		||||
    my %cols_grouped;
 | 
			
		||||
    foreach ( keys %weights ) {
 | 
			
		||||
        my $val = $weights{$_} or next;
 | 
			
		||||
        push @{$cols_grouped{$val}}, $_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# Drop unified fulltext columns if required
 | 
			
		||||
    if ( keys %cols_grouped > 1 ) {
 | 
			
		||||
        $cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# For each value grouped column set create a full text 
 | 
			
		||||
# column
 | 
			
		||||
    foreach my $v ( keys %cols_grouped ) {
 | 
			
		||||
 | 
			
		||||
        my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
 | 
			
		||||
 | 
			
		||||
        my $res     = eval {
 | 
			
		||||
            $tbl->do_query(qq!
 | 
			
		||||
                ALTER TABLE $tblname
 | 
			
		||||
                DROP INDEX $ft_name
 | 
			
		||||
            !);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
# Break on errors that can't be handled
 | 
			
		||||
        if ( $@ ) {
 | 
			
		||||
            next if $@ !~ /exist/i;
 | 
			
		||||
            $self->warn( "$@" );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub add_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
    $self->too_much() and return;
 | 
			
		||||
 | 
			
		||||
    my $tbl     = $self->{table} or return $self->error(BADARGS   => FATAL => "table must be passed into add_search_driver.");
 | 
			
		||||
    my %weights = $tbl->weight() or return $self->error(NOWEIGHTS => 'WARN');
 | 
			
		||||
    my $tblname = $tbl->name()   or return $self->error(BADARGS   => FATAL => "table does not have a name?");
 | 
			
		||||
 | 
			
		||||
# group the fulltext columns by value of the weight
 | 
			
		||||
    my %cols_grouped;
 | 
			
		||||
    foreach ( keys %weights ) {
 | 
			
		||||
        my $val = $weights{$_} or next;
 | 
			
		||||
        push @{$cols_grouped{$val}}, $_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# Create unified fulltext columns if required
 | 
			
		||||
    if ( keys %cols_grouped > 1 ) {
 | 
			
		||||
        $cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# for each value grouped column set create a full text 
 | 
			
		||||
# column
 | 
			
		||||
    foreach my $v ( keys %cols_grouped ) {
 | 
			
		||||
 | 
			
		||||
        my $cols    = join(",", sort @{$cols_grouped{$v}});
 | 
			
		||||
        my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
 | 
			
		||||
 | 
			
		||||
        my $res     = eval {
 | 
			
		||||
            $tbl->do_query(qq!
 | 
			
		||||
                ALTER TABLE $tblname
 | 
			
		||||
                ADD FULLTEXT $ft_name ( $cols )
 | 
			
		||||
            !);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
# break on errors that can't be handled
 | 
			
		||||
        if ( $@ ) {
 | 
			
		||||
            next if $@ =~ /duplicate/i;
 | 
			
		||||
            $self->warn( "$@" );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub too_much {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# returns true if there are too many records to be used on the Web
 | 
			
		||||
#
 | 
			
		||||
    if ( $ENV{REQUEST_METHOD} ) {
 | 
			
		||||
        my $self = shift;
 | 
			
		||||
        my $tbl = $self->{table};
 | 
			
		||||
        if ( $tbl->count() > 5000 ) {
 | 
			
		||||
            $self->error( 'NOTFROMWEB', 'WARN', $tbl->name() );
 | 
			
		||||
            return 1
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_create_table {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    shift->add_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub reindex_all {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# this will drop all the fulltext columns and reindex all of them. This should
 | 
			
		||||
# not be required unless the user changes the weights on one of their columns.
 | 
			
		||||
# Unfortunately, this method is not particularly smart and risks not dropping
 | 
			
		||||
# certain index columns and reindexes even when it's not required. It must be
 | 
			
		||||
# recoded at a future date, but as this action won't happen frequently and will 
 | 
			
		||||
# rarely affect the user, it is not a priority.
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
    $self->drop_search_driver;
 | 
			
		||||
    $self->add_search_driver;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,51 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::Search
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.14 2004/01/13 01:35:19 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::Search;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.14 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        min_word_size => 4
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
# --------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    my $opts = $self->common_param( @_ ); 
 | 
			
		||||
 | 
			
		||||
# determine which mysql search variant to use.
 | 
			
		||||
    my $tbl     = $opts->{table};
 | 
			
		||||
    my $ver_sth = $tbl->do_query( 'SELECT VERSION()' );
 | 
			
		||||
    my $version = $ver_sth->fetchrow_array();
 | 
			
		||||
 | 
			
		||||
    my ( $maj, $min ) = split /\./, $version;
 | 
			
		||||
 | 
			
		||||
    my $pkg = 'GT::SQL::Search::MYSQL::';
 | 
			
		||||
    $pkg   .= $maj > 3 ? 'VER4' : 'VER3';
 | 
			
		||||
 | 
			
		||||
    eval "require $pkg"; 
 | 
			
		||||
    return $pkg->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,178 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::VER3
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: VER3.pm,v 1.3 2004/08/28 03:53:49 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::VER3;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        min_word_size => 4
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub _phrase_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return $_[0];
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    foreach my $phrase ( values %{$phrases} ) {
 | 
			
		||||
        $self->debug_dumper( "Unioning: ", $phrase ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
        my $tmp = {};
 | 
			
		||||
        foreach my $keyword ( @{ $phrase->{phrase}|| [] } ) {
 | 
			
		||||
            $tmp = $self->_get_words ( [ $keyword ], $tmp, 'intersect' );
 | 
			
		||||
            keys %$tmp or return {};
 | 
			
		||||
        }
 | 
			
		||||
        foreach my $key ( keys %$tmp ) { $results->{$key} += $tmp->{$key} }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_phrase {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# one day change this so it does words properly
 | 
			
		||||
    return _get_words(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _union_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    return _get_words(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _intersect_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my ( $self, $keywords, $results ) = @_;
 | 
			
		||||
    $keywords or return $results;
 | 
			
		||||
 | 
			
		||||
    foreach my $keyword ( keys %{ $keywords || {} } ) {
 | 
			
		||||
        $results = $self->_get_words ( [ $keyword ], $results, 'intersect' );
 | 
			
		||||
        keys %$results or return {};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_intersect_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return $_[0];
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    my $tmp = $self->_phrase_query ( $phrases, $results );
 | 
			
		||||
    keys %$results or return $tmp;
 | 
			
		||||
    foreach my $key ( keys %$results ) {
 | 
			
		||||
        if ( $tmp->{$key} ) {
 | 
			
		||||
            $results->{$key} += $tmp->{$key};
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            delete $results->{$key}
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _disjoin_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $words   = shift or return shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    $results = $self->_get_words([ keys %{$words || {}} ], $results, 'disjoin' );
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_disjoin_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    my $tmp = $self->_phrase_query ( $phrases, $results );
 | 
			
		||||
    keys %$results or return $tmp;
 | 
			
		||||
    foreach my $key ( keys %$results ) {
 | 
			
		||||
        $tmp->{$key} and delete $results->{$key};
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_words {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self     = shift;
 | 
			
		||||
    my $words    = shift or return $_[0] || {};
 | 
			
		||||
    my $results  = shift || {};
 | 
			
		||||
    my $mode     = lc shift;
 | 
			
		||||
 | 
			
		||||
    my $tbl      = $self->{table} or $self->error( 'BADARGS', 'FATAL', 'Must have table object defined' );
 | 
			
		||||
    my $tname    = $tbl->name();
 | 
			
		||||
    my $wordlist = join " ", ( ref $words ? ( ref $words eq 'HASH' ? keys %$words : @$words ) : $words ) ;
 | 
			
		||||
    my ($pk)     = $tbl->pk;
 | 
			
		||||
 | 
			
		||||
    my %weights  = $tbl->_weight_cols();
 | 
			
		||||
    my $cols     = join(",", keys %weights);
 | 
			
		||||
    my $qwrds    = quotemeta( $wordlist );
 | 
			
		||||
    my $where    = ( $results and keys %$results )
 | 
			
		||||
        ? ("AND $pk IN(" . join(',', keys %$results) . ")")
 | 
			
		||||
        : '';
 | 
			
		||||
    my $query = qq!
 | 
			
		||||
        SELECT $pk, MATCH($cols) AGAINST ('$qwrds') AS SCORE
 | 
			
		||||
        FROM  $tname
 | 
			
		||||
        WHERE MATCH($cols) AGAINST ('$qwrds')
 | 
			
		||||
        $where
 | 
			
		||||
    !;
 | 
			
		||||
    my $sth = $tbl->do_query( $query ) or return;
 | 
			
		||||
 | 
			
		||||
    if ( $mode eq 'disjoin' ) {
 | 
			
		||||
        while ( my $result = $sth->fetchrow ) {
 | 
			
		||||
            delete $results->{$result};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    elsif ( $mode eq 'intersect' ) {
 | 
			
		||||
        my $tmp = {};
 | 
			
		||||
        while ( my $aref = $sth->fetchrow_arrayref ) {
 | 
			
		||||
            $tmp->{$aref->[0]} = $aref->[1];
 | 
			
		||||
        }
 | 
			
		||||
        if ( $results and keys %$results ) {
 | 
			
		||||
            while (my ($id, $score) = each %$results) {
 | 
			
		||||
                if (not defined $tmp->{$id}) {
 | 
			
		||||
                    delete $results->{$id};
 | 
			
		||||
                    next;
 | 
			
		||||
                }
 | 
			
		||||
                $results->{$id} += $score;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $results = $tmp;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        while ( my $aref = $sth->fetchrow_arrayref ) {
 | 
			
		||||
            $results->{$aref->[0]} += $aref->[1];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,355 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::VER4
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::VER4;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $STOPWORDS = { map { $_ => 1 } qw/
 | 
			
		||||
 | 
			
		||||
          a's able about above according accordingly across actually after
 | 
			
		||||
          afterwards again against ain't all allow allows almost alone
 | 
			
		||||
          along already also although always am among amongst an and another
 | 
			
		||||
          any anybody anyhow anyone anything anyway anyways anywhere apart
 | 
			
		||||
          appear appreciate appropriate are aren't around as aside ask asking
 | 
			
		||||
          associated at available away awfully be became because become becomes
 | 
			
		||||
          becoming been before beforehand behind being believe below beside
 | 
			
		||||
          besides best better between beyond both brief but by c'mon c's came
 | 
			
		||||
          can can't cannot cant cause causes certain certainly changes clearly
 | 
			
		||||
          co com come comes concerning consequently consider considering
 | 
			
		||||
          contain containing contains corresponding could couldn't course currently
 | 
			
		||||
          definitely described despite did didn't different do does doesn't
 | 
			
		||||
          doing don't done down downwards during each edu eg eight either else
 | 
			
		||||
          elsewhere enough entirely especially et etc even ever every everybody
 | 
			
		||||
          everyone everything everywhere ex exactly example except far few
 | 
			
		||||
          fifth first five followed following follows for former formerly
 | 
			
		||||
          forth four from further furthermore get gets getting given gives
 | 
			
		||||
          go goes going gone got gotten greetings had hadn't happens hardly
 | 
			
		||||
          has hasn't have haven't having he he's hello help hence her here
 | 
			
		||||
          here's hereafter hereby herein hereupon hers herself hi him himself
 | 
			
		||||
          his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
 | 
			
		||||
          immediate in inasmuch inc indeed indicate indicated indicates inner
 | 
			
		||||
          insofar instead into inward is isn't it it'd it'll it's its itself
 | 
			
		||||
          just keep keeps kept know knows known last lately later latter latterly
 | 
			
		||||
          least less lest let let's like liked likely little look looking looks
 | 
			
		||||
          ltd mainly many may maybe me mean meanwhile merely might more
 | 
			
		||||
          moreover most mostly much must my myself name namely nd near nearly
 | 
			
		||||
          necessary need needs neither never nevertheless new next nine no
 | 
			
		||||
          nobody non none noone nor normally not nothing novel now nowhere
 | 
			
		||||
          obviously of off often oh ok okay old on once one ones only onto
 | 
			
		||||
          or other others otherwise ought our ours ourselves out outside over
 | 
			
		||||
          overall own particular particularly per perhaps placed please plus
 | 
			
		||||
          possible presumably probably provides que quite qv rather rd re
 | 
			
		||||
          really reasonably regarding regardless regards relatively respectively
 | 
			
		||||
          right said same saw say saying says second secondly see seeing seem
 | 
			
		||||
          seemed seeming seems seen self selves sensible sent serious seriously
 | 
			
		||||
          seven several shall she should shouldn't since six so some somebody
 | 
			
		||||
          somehow someone something sometime sometimes somewhat somewhere
 | 
			
		||||
          soon sorry specified specify specifying still sub such sup sure
 | 
			
		||||
          t's take taken tell tends th than thank thanks thanx that that's
 | 
			
		||||
          thats the their theirs them themselves then thence there there's
 | 
			
		||||
          thereafter thereby therefore therein theres thereupon these they
 | 
			
		||||
          they'd they'll they're they've think third this thorough thoroughly
 | 
			
		||||
          those though three through throughout thru thus to together too
 | 
			
		||||
          took toward towards tried tries truly try trying twice two un
 | 
			
		||||
          under unfortunately unless unlikely until unto up upon us use used
 | 
			
		||||
          useful uses using usually value various very via viz vs want wants
 | 
			
		||||
          was wasn't way we we'd we'll we're we've welcome well went were
 | 
			
		||||
          weren't what what's whatever when whence whenever where where's
 | 
			
		||||
          whereafter whereas whereby wherein whereupon wherever whether
 | 
			
		||||
          which while whither who who's whoever whole whom whose why will
 | 
			
		||||
          willing wish with within without won't wonder would would wouldn't
 | 
			
		||||
          yes yet you you'd you'll you're you've your yours yourself
 | 
			
		||||
          yourselves zero
 | 
			
		||||
                
 | 
			
		||||
    / };
 | 
			
		||||
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        min_word_size => 4,
 | 
			
		||||
        stopwords => $STOPWORDS,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub query {
 | 
			
		||||
# --------------------------------------------------
 | 
			
		||||
# Returns a sth based on a query
 | 
			
		||||
#
 | 
			
		||||
# Options:
 | 
			
		||||
#        - paging
 | 
			
		||||
#           mh            : max hits
 | 
			
		||||
#           nh            : number hit (or page of hits)
 | 
			
		||||
#
 | 
			
		||||
#        - searching
 | 
			
		||||
#           ww            : whole word
 | 
			
		||||
#           ma            : 1 => OR match, 0 => AND match, undefined => QUERY
 | 
			
		||||
#           substring     : search for substrings of words
 | 
			
		||||
#           bool          : 'and' => and search, 'or' => or search, '' => regular query
 | 
			
		||||
#           query         : the string of things to ask for
 | 
			
		||||
#
 | 
			
		||||
#        - filtering
 | 
			
		||||
#           field_name    : value       # Find all rows with field_name = value
 | 
			
		||||
#           field_name    : ">value"    # Find all rows with field_name > value.
 | 
			
		||||
#           field_name    : "<value"    # Find all rows with field_name < value.
 | 
			
		||||
#           field_name-gt : value       # Find all rows with field_name > value.
 | 
			
		||||
#           field_name-lt : value       # Find all rows with field_name < value.
 | 
			
		||||
#
 | 
			
		||||
# Parameters:
 | 
			
		||||
#        ( $CGI ) : a single cgi object
 | 
			
		||||
#        ( $HASH ) : a hash of the parameters
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
# create an easily accessible argument hash
 | 
			
		||||
    my $args = $self->common_param(@_);
 | 
			
		||||
 | 
			
		||||
# see if we can setup the filtering constraints
 | 
			
		||||
    my $filter = { %$args }; 
 | 
			
		||||
    my $query  = delete $args->{query} || $self->{query} || '';
 | 
			
		||||
    my $ftr_cond;
 | 
			
		||||
 | 
			
		||||
# parse query
 | 
			
		||||
    $self->debug( "Search Query: $query" ) if ($self->{_debug});
 | 
			
		||||
    my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
 | 
			
		||||
 | 
			
		||||
    $self->{rejected_keywords} = $rejected;
 | 
			
		||||
 | 
			
		||||
# setup the additional input parameters
 | 
			
		||||
    $query_struct = $self->_preset_options( $query_struct, $args );
 | 
			
		||||
 | 
			
		||||
# now sort into distinct buckets
 | 
			
		||||
    my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
 | 
			
		||||
    $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# with the buckets, it's now possible to create a query string
 | 
			
		||||
# that can be passed directly into the FULLTEXT search.
 | 
			
		||||
    my $query_string = '';
 | 
			
		||||
 | 
			
		||||
    foreach my $search_type ( keys %$buckets ) {
 | 
			
		||||
        my $bucket = $buckets->{$search_type};
 | 
			
		||||
        foreach my $token ( keys %$bucket ) {
 | 
			
		||||
            next unless $token;
 | 
			
		||||
            my $properties = $bucket->{$token} or next;
 | 
			
		||||
 | 
			
		||||
            my $e = ' ';
 | 
			
		||||
 | 
			
		||||
# handle boolean operations
 | 
			
		||||
            $properties->{mode} ||= '';
 | 
			
		||||
            if ( $properties->{mode} eq 'must' ) {
 | 
			
		||||
                $e .= '+';
 | 
			
		||||
            }
 | 
			
		||||
            elsif ( $properties->{mode} eq 'cannot' ) {
 | 
			
		||||
                $e .= '-';
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
# deal with phrase vs keyword
 | 
			
		||||
            if ( $properties->{phrase} ) {
 | 
			
		||||
                $e .= '"' . quotemeta( $token ) . '"';
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $e .= quotemeta $token;
 | 
			
		||||
 | 
			
		||||
# substring match
 | 
			
		||||
                $e .= '*' if $properties->{substring};
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $query_string .= $e;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# start building the GT::SQL::COndition object that will allow us to
 | 
			
		||||
# to retreive the data
 | 
			
		||||
 | 
			
		||||
    require GT::SQL::Condition;
 | 
			
		||||
    my $tbl = $self->{table};
 | 
			
		||||
    my $constraints = GT::SQL::Condition->new;
 | 
			
		||||
 | 
			
		||||
# create the GT::SQL::Condition object that will become the filtering
 | 
			
		||||
# constraints
 | 
			
		||||
    my $filt = $self->{filter};
 | 
			
		||||
 | 
			
		||||
    if ( $filt and ref $filt eq 'HASH' ) {
 | 
			
		||||
        foreach my $fkey ( keys %$filt ) {
 | 
			
		||||
            next if exists $args->{$fkey};
 | 
			
		||||
            $args->{$fkey} = $filt->{$fkey};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
 | 
			
		||||
        $constraints->add( $filter_cond );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# if the cached filter object is a Condition object, append
 | 
			
		||||
# it to the filter set
 | 
			
		||||
    if ( $filt and  UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
 | 
			
		||||
        $constraints->add( $filt );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# create our fulltext query condition
 | 
			
		||||
    my %weights = $tbl->_weight_cols();
 | 
			
		||||
    my $cols = join(",", keys %weights);
 | 
			
		||||
    if ( $query_string ) {
 | 
			
		||||
        $constraints->add( GT::SQL::Condition->new( 
 | 
			
		||||
                                "MATCH( $cols )", 
 | 
			
		||||
                                "AGAINST", 
 | 
			
		||||
                                \"('$query_string' IN BOOLEAN MODE)" ) );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# calculate the cursor constraints
 | 
			
		||||
    foreach my $k (qw( nh mh so sb )) {
 | 
			
		||||
        next if defined $args->{$k};
 | 
			
		||||
        $args->{$k} = $self->{$k} || '';
 | 
			
		||||
    }
 | 
			
		||||
    $args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
 | 
			
		||||
    $args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
 | 
			
		||||
    $args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ )  ? $1 : 'score';
 | 
			
		||||
 | 
			
		||||
# if the sorting method is "score" the order is forced to "descend" (as there
 | 
			
		||||
# is almost no reason to order by worst matches) 
 | 
			
		||||
# if the storing key is not "score", the default order will be "ascend"
 | 
			
		||||
    $args->{so} = 
 | 
			
		||||
            $args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
 | 
			
		||||
                    ( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
 | 
			
		||||
 | 
			
		||||
# check that sb is not dangerous
 | 
			
		||||
    my $sb = $self->clean_sb($args->{sb}, $args->{so});
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# Setup a limit only if there is no callback. The callback argument requires a full results list
 | 
			
		||||
    unless ( $self->{callback} ) {
 | 
			
		||||
        my $offset = ( $args->{nh} - 1 ) * $args->{mh};
 | 
			
		||||
        $tbl->select_options($sb) if ($sb);
 | 
			
		||||
        $tbl->select_options("LIMIT $offset, $args->{mh}");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    my $sth;
 | 
			
		||||
 | 
			
		||||
# if the weights are all the same value, the query can be optimized
 | 
			
		||||
# to use just one MATCH AGAINST argument. However, if the weights
 | 
			
		||||
# are different, each element must be sectioned and queried separately
 | 
			
		||||
# with the weight value multipler
 | 
			
		||||
 | 
			
		||||
# check to see if all the weight values are the same.
 | 
			
		||||
    my $base_weight; 
 | 
			
		||||
    my $weights_same = 1;
 | 
			
		||||
    foreach ( values %weights ) {
 | 
			
		||||
        $base_weight ||= $_ or next; # init and skip 0s
 | 
			
		||||
        next if $base_weight == $_;
 | 
			
		||||
        $weights_same = 0;
 | 
			
		||||
        last;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# multiplex the action
 | 
			
		||||
    my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
 | 
			
		||||
 | 
			
		||||
    unless ( $query_string ) {
 | 
			
		||||
        $sth = $tbl->select( [ $result_cols ], $constraints ) or return;
 | 
			
		||||
    }
 | 
			
		||||
    elsif ( $weights_same ) {
 | 
			
		||||
        $sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
 | 
			
		||||
                        or return;
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
 | 
			
		||||
# group the multiplier counts
 | 
			
		||||
        my %column_multiplier;
 | 
			
		||||
        foreach ( keys %weights ) {
 | 
			
		||||
            push @{$column_multiplier{$weights{$_}}}, $_;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        my @search_parameters;
 | 
			
		||||
        foreach my $val ( keys %column_multiplier ) {
 | 
			
		||||
            next unless $val;
 | 
			
		||||
 | 
			
		||||
            my $cols_ar = $column_multiplier{ $val } or next;
 | 
			
		||||
            my $search_cols = join ",", @$cols_ar;
 | 
			
		||||
 | 
			
		||||
            if ( $val > 1 ) {
 | 
			
		||||
                push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
 | 
			
		||||
 | 
			
		||||
        $sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
 | 
			
		||||
                        or return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# If we have a callback, we fetch the primary key => score and pass that hash into 
 | 
			
		||||
# the filter. 
 | 
			
		||||
    if ($self->{callback}) {
 | 
			
		||||
        unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
 | 
			
		||||
            return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
 | 
			
		||||
        }
 | 
			
		||||
        my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
 | 
			
		||||
 | 
			
		||||
        $self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
 | 
			
		||||
        my $filtered = $self->{callback}->($self, \%results) || {};
 | 
			
		||||
        $self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
        $self->{rows} = scalar keys %$filtered;
 | 
			
		||||
        return $self->sth($filtered);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# count the number of hits. create a query for this purpose only if we are required to.
 | 
			
		||||
    $self->{rows} = $sth->rows();
 | 
			
		||||
    if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
 | 
			
		||||
        $self->{rows} = $tbl->count($constraints);
 | 
			
		||||
    } 
 | 
			
		||||
    return $sth;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub clean_sb {
 | 
			
		||||
# -------------------------------------------------------------------------------
 | 
			
		||||
# Convert the sort by, sort order into an sql string.
 | 
			
		||||
#
 | 
			
		||||
    my ($class, $sb, $so) = @_;
 | 
			
		||||
    my $output = '';
 | 
			
		||||
    
 | 
			
		||||
    return $output unless ($sb);
 | 
			
		||||
 | 
			
		||||
    if ($sb and not ref $sb) {
 | 
			
		||||
        if ($sb =~ /^[\w\s,]+$/)  {
 | 
			
		||||
            if ($sb =~ /\s(?:asc|desc)/i) {
 | 
			
		||||
                $output = 'ORDER BY ' . $sb;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $output = 'ORDER BY ' . $sb . ' ' . $so;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $class->error('BADSB', 'WARN', $sb);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    elsif (ref $sb eq 'ARRAY') {
 | 
			
		||||
        foreach ( @$sb ) {
 | 
			
		||||
            /^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
 | 
			
		||||
        }
 | 
			
		||||
        $output = 'ORDER BY ' . join(',', @$sb);
 | 
			
		||||
    }
 | 
			
		||||
    return $output;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
		Reference in New Issue
	
	Block a user