First pass at adding key files
This commit is contained in:
		@@ -0,0 +1,82 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::Base::Common
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Common.pm,v 1.8 2004/10/13 21:45:02 aki Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Base classes upon which all search drivers are based
 | 
			
		||||
#
 | 
			
		||||
package GT::SQL::Search::Base::Common;
 | 
			
		||||
 | 
			
		||||
use strict;
 | 
			
		||||
use Exporter;
 | 
			
		||||
use vars qw/ @ISA @EXPORT $STOPWORDS /;
 | 
			
		||||
 | 
			
		||||
    @ISA = qw( Exporter );
 | 
			
		||||
    @EXPORT = qw( &_tokenize &_check_word $STOPWORDS );
 | 
			
		||||
 | 
			
		||||
    $STOPWORDS = { map { $_ => 1 } qw/
 | 
			
		||||
        of about or all several also she among since an some and such are than
 | 
			
		||||
        as that at the be them because there been these between they both this
 | 
			
		||||
        but those by to do toward during towards each upon either for from was
 | 
			
		||||
        had were has what have when he where her which his while however with if
 | 
			
		||||
        within in would into you your is it its many more most must on re it
 | 
			
		||||
        test not above add am pm jan january feb february mar march apr april
 | 
			
		||||
        may jun june jul july aug august sep sept september oct october nov
 | 
			
		||||
        november dec december find & > < we http com www inc other
 | 
			
		||||
        including 
 | 
			
		||||
    / };
 | 
			
		||||
 | 
			
		||||
sub _tokenize {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# takes a strings and chops it up into little bits
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $text    = shift;
 | 
			
		||||
    my ( @words, $i, %rejected, $word, $code );
 | 
			
		||||
 | 
			
		||||
# split on any non-word (includes accents) characters
 | 
			
		||||
    @words = split /[^\w\x80-\xFF\-]+/, lc $text;
 | 
			
		||||
    $self->debug_dumper( "Words: ", \@words ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# drop all words that are too small, etc.
 | 
			
		||||
    $i = 0;
 | 
			
		||||
    while ( $i <= $#words ) {
 | 
			
		||||
        $word = $words[ $i ];
 | 
			
		||||
        if ((exists $self->{stopwords}{$word}   and ($code = 'STOPWORD')) or
 | 
			
		||||
            (length($word) < $self->{min_word_size} and $code = 'TOOSMALL' )  or
 | 
			
		||||
            (length($word) > $self->{max_word_size} and $code = 'TOOBIG')) {
 | 
			
		||||
                splice( @words, $i, 1 );
 | 
			
		||||
                $rejected{$word}    = $self->{'rejections'}->{$code};
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $i++;   # Words ok.
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    $self->debug_dumper( "Accepted Words: ", \@words  )   if ($self->{_debug});
 | 
			
		||||
    $self->debug_dumper( "Rejected Words: ", \%rejected ) if ($self->{_debug});
 | 
			
		||||
    
 | 
			
		||||
    return ( \@words, \%rejected );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _check_word {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Returns an error code if it is an invalid word, otherwise returns nothing.
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    my $word = shift;
 | 
			
		||||
    my $code;
 | 
			
		||||
    if ((exists $self->{stopwords}{$word}      and ($code = 'STOPWORD')) or
 | 
			
		||||
        (length($word) < $self->{min_word_size} and $code = 'TOOSMALL' )  or
 | 
			
		||||
        (length($word) > $self->{max_word_size} and $code = 'TOOBIG')) {
 | 
			
		||||
            return $code;
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,78 @@
 | 
			
		||||
# ====================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::Base::Indexer
 | 
			
		||||
#   Author: Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.4 2004/01/13 01:35:19 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ====================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::Base::Indexer;
 | 
			
		||||
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::Base;
 | 
			
		||||
    use GT::SQL::Search::Base::Common;
 | 
			
		||||
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    @ISA     = qw/GT::Base GT::SQL::Search::Base::Common/;
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        driver    => undef,
 | 
			
		||||
        stopwords => $STOPWORDS,
 | 
			
		||||
        rejections   => {        
 | 
			
		||||
            STOPWORD => "is a stopword",
 | 
			
		||||
            TOOSMALL => "is too small a word",
 | 
			
		||||
            TOOBIG   => "is too big a word"
 | 
			
		||||
        },
 | 
			
		||||
        table     => '',
 | 
			
		||||
        init      => 0,
 | 
			
		||||
        debug     => 0,
 | 
			
		||||
        min_word_size => 3,
 | 
			
		||||
        max_word_size => 50,             
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub drop_search_driver { 1 }
 | 
			
		||||
sub add_search_driver { 1 }
 | 
			
		||||
 | 
			
		||||
# found in GT::SQL::Creator
 | 
			
		||||
sub pre_create_table { 1 }
 | 
			
		||||
sub post_create_table { 1 }
 | 
			
		||||
 | 
			
		||||
# GT::SQL::Editor
 | 
			
		||||
sub pre_add_column  { 1 }
 | 
			
		||||
sub post_add_column { 1 }
 | 
			
		||||
 | 
			
		||||
sub pre_delete_column  { 1 }
 | 
			
		||||
sub post_delete_column { 1 }
 | 
			
		||||
 | 
			
		||||
sub pre_drop_table { 1 }
 | 
			
		||||
sub post_drop_table { 1 }
 | 
			
		||||
 | 
			
		||||
# GT::SQL::Table
 | 
			
		||||
sub pre_add_record { 1 }
 | 
			
		||||
sub post_add_record { 1 }
 | 
			
		||||
 | 
			
		||||
sub pre_update_record { 1 }
 | 
			
		||||
sub post_update_record { 1 }
 | 
			
		||||
 | 
			
		||||
sub pre_delete_record { 1 }
 | 
			
		||||
sub post_delete_record { 1 }
 | 
			
		||||
 | 
			
		||||
sub pre_delete_all_records { 1 }
 | 
			
		||||
sub post_delete_all_records { 1 }
 | 
			
		||||
 | 
			
		||||
sub driver_ok { 1 }
 | 
			
		||||
 | 
			
		||||
sub reindex_all { 1 }
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
							
								
								
									
										287
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/Base/STH.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										287
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/Base/STH.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,287 @@
 | 
			
		||||
# ====================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::STH
 | 
			
		||||
#   Author: Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: STH.pm,v 1.25 2004/08/28 03:53:46 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ====================================================================
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::STH;
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE /;
 | 
			
		||||
    use GT::Base;
 | 
			
		||||
 | 
			
		||||
    @ISA    = ('GT::Base');
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
                '_debug'    => 0,
 | 
			
		||||
                'sth'       => undef,
 | 
			
		||||
                'results'   => {},
 | 
			
		||||
                'db'        => undef,
 | 
			
		||||
                'table'     => undef,
 | 
			
		||||
                'index'     => 0,
 | 
			
		||||
                'order'     => [],
 | 
			
		||||
                'sb'        => 'score',
 | 
			
		||||
                'so'        => '',
 | 
			
		||||
                'score_col' => 'SCORE',
 | 
			
		||||
                'score_sort'=> 0,
 | 
			
		||||
                'nh'        => 0,
 | 
			
		||||
                'mh'        => 0
 | 
			
		||||
    };
 | 
			
		||||
    $ERROR_MESSAGE = 'GT::SQL';
 | 
			
		||||
    $ERRORS        = {
 | 
			
		||||
        BADSB => 'Invalid character found in so: "%s"',
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub init {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
# setup the options
 | 
			
		||||
    $self->set(@_);
 | 
			
		||||
 | 
			
		||||
# correct a few of the values
 | 
			
		||||
    --$self->{nh} if $self->{nh};
 | 
			
		||||
 | 
			
		||||
    my $sth;
 | 
			
		||||
    my $results = $self->{results};
 | 
			
		||||
    $self->{rows}   = scalar( $results ? keys %{$results} : 0 );
 | 
			
		||||
 | 
			
		||||
# if we have asked to have sorting by another column (non score), create the part of the query that handles taht
 | 
			
		||||
    $self->debug( "Setting up sorting for GT::SQL::Search::STH" ) if ($self->{_debug});
 | 
			
		||||
    my $sb;
 | 
			
		||||
 | 
			
		||||
# clean up the sort by columns.
 | 
			
		||||
    unless ($self->{'score_sort'}) {
 | 
			
		||||
        $sb = GT::SQL::Search::Base::Search->clean_sb($self->{sb}, $self->{so});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# setup the max hits and the offsets
 | 
			
		||||
    $self->{index}  = $self->{nh} * $self->{mh} || 0;
 | 
			
		||||
    $self->{max_index} = $self->{index} + $self->{mh} - 1; # index + mh is the first record which should not be returned.
 | 
			
		||||
 | 
			
		||||
    if ( $self->{max_index} > $self->{rows} ) {
 | 
			
		||||
        $self->{max_index}  = $self->{rows};
 | 
			
		||||
        $self->{rows}       = $self->{rows} - $self->{index};
 | 
			
		||||
        $self->{rows} < 0 ? $self->{rows} = 0 : 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    else {
 | 
			
		||||
        $self->{rows}       = $self->{mh};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# if we are sorting by another column, handle that
 | 
			
		||||
    if ( $sb and (keys %{$self->{results}})) {
 | 
			
		||||
        my ( $table, $pk ) = $self->_table_info();
 | 
			
		||||
        my ( $query, $where, $st, $limit );
 | 
			
		||||
 | 
			
		||||
        $where      = qq!$pk in(! . join( ",", keys %{$self->{results}}) . ')';
 | 
			
		||||
        $self->{mh} and $limit = qq!LIMIT $self->{index}, $self->{rows}!;
 | 
			
		||||
        $query      = qq!
 | 
			
		||||
            SELECT $pk
 | 
			
		||||
            FROM   $table
 | 
			
		||||
            WHERE  $where
 | 
			
		||||
            $sb
 | 
			
		||||
            $limit
 | 
			
		||||
        !;
 | 
			
		||||
        $self->debug( "Row fetch query: $query" ) if ($self->{_debug});
 | 
			
		||||
        $sth        = $self->{table}->{driver}->prepare( $query );
 | 
			
		||||
        $sth->execute();
 | 
			
		||||
 | 
			
		||||
# fix the counts
 | 
			
		||||
        $self->{index}    = 0;
 | 
			
		||||
        $self->{max_hits} = $self->{rows};
 | 
			
		||||
 | 
			
		||||
# now return them
 | 
			
		||||
        my $order         = $sth->fetchall_arrayref();
 | 
			
		||||
        $sth->finish();
 | 
			
		||||
 | 
			
		||||
        $self->{'order'}  = [ map { $_->[0] } @{$order} ];
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $self->{'order'}  = [ sort { 
 | 
			
		||||
                                        ( $results->{$b} || return 0 ) <=> ( $results->{$a} || 0 )
 | 
			
		||||
                                  } keys %{$results} ];
 | 
			
		||||
        $self->debug_dumper( "Results will be presented in the following order: ", $self->{'order'} ) if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub cache_results {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
    my $results = $self->{'results'};
 | 
			
		||||
    my ($sth, @records, $i, %horder, @order, $in_list);
 | 
			
		||||
    my $table   = $self->{table};
 | 
			
		||||
    my $tname   = $table->name();
 | 
			
		||||
    my ($pk)    = $self->{table}->pk;
 | 
			
		||||
 | 
			
		||||
    use GT::SQL::Condition;
 | 
			
		||||
 | 
			
		||||
# we know what we're doing here so shut off warns (complains about uninit'd values in range
 | 
			
		||||
# if thee aren't enough elements in the order array)
 | 
			
		||||
    my $w     = $^W; $^W = 0;
 | 
			
		||||
    @order    = grep $_, @{$self->{'order'} || []}[$self->{index}..$self->{max_index}] or return [];
 | 
			
		||||
    $^W       = $w;
 | 
			
		||||
 | 
			
		||||
    $i        = 0; %horder  = ( map { ( $_ => $i++) } @order );
 | 
			
		||||
    $in_list  = join ( ",", @order );
 | 
			
		||||
    my $query = qq|
 | 
			
		||||
        SELECT * 
 | 
			
		||||
        FROM
 | 
			
		||||
            $tname
 | 
			
		||||
        WHERE
 | 
			
		||||
            $pk IN($in_list)
 | 
			
		||||
    |;
 | 
			
		||||
 | 
			
		||||
# the following is left commented out as...
 | 
			
		||||
# if $tbl->select is used $table->hits() will not
 | 
			
		||||
# return an accurate count of the number of all the hits. instead, will return
 | 
			
		||||
# a value up to mh. $tbl->hits() is important because the value is used
 | 
			
		||||
# in toolbar calculations
 | 
			
		||||
#
 | 
			
		||||
#    $sth     = $table->select( GT::SQL::Condition->new( $pk, 'IN', \"($in_list)" ) );
 | 
			
		||||
    $sth = $table->do_query( $query );
 | 
			
		||||
 | 
			
		||||
    while ( my $href = $sth->fetchrow_hashref() ) { 
 | 
			
		||||
        $records[$horder{$href->{$pk}}] = \%$href
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return \@records;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub fetchrow_array {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    return @{ $_[0]->fetchrow_arrayref() || [] };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub fetchrow_arrayref {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $records = $self->{cache} ||= $self->cache_results;
 | 
			
		||||
    my $href    = shift @$records or return;
 | 
			
		||||
    return $self->_hash_to_array($href);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub fetchrow_hashref {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
    my $results = $self->{'results'};
 | 
			
		||||
    my $records = $self->{cache} ||= $self->cache_results;
 | 
			
		||||
    my $table   = $self->{table};
 | 
			
		||||
    my ($pk)    = $self->{table}->pk;
 | 
			
		||||
 | 
			
		||||
    my $href    = shift @$records or return;
 | 
			
		||||
 | 
			
		||||
    $href->{$self->{'score_col'}} = ( $self->{score} = $results->{$href->{$pk}} );
 | 
			
		||||
 | 
			
		||||
    return $href;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub fetchall_hashref {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    my @results;
 | 
			
		||||
    while (my $res = $self->fetchrow_hashref) {
 | 
			
		||||
        push @results, $res;
 | 
			
		||||
    }
 | 
			
		||||
    return \@results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub fetchall_list {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    return { map { @$_ } @{shift->fetchall_arrayref} }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub fetchall_arrayref {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
    $self->{order} or return [];
 | 
			
		||||
    my $results = $self->{results};
 | 
			
		||||
    my ($pk)    = $self->{table}->pk;
 | 
			
		||||
    my $scol    = $self->{score_col};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    if (!$self->{allref_cache}) {
 | 
			
		||||
        $self->{allref_cache} ||= $self->cache_results;
 | 
			
		||||
 | 
			
		||||
        for my $i ( 0 .. $#{$self->{allref_cache}} ) {
 | 
			
		||||
            my $element = $self->{allref_cache}->[$i];
 | 
			
		||||
            if ( $_[0] eq 'HASH' ) {
 | 
			
		||||
                    $element->{$scol} = $results->{$element->{$pk}};
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                    $element->{$scol} = $self->_hash_to_array( $element->{$scol} );
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    my $records = $self->{allref_cache};
 | 
			
		||||
 | 
			
		||||
    return $records;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub score {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    return $self->{score};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _hash_to_array {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $href    = shift or return;
 | 
			
		||||
 | 
			
		||||
    my $results = $self->{'results'};
 | 
			
		||||
    my $table   = $self->{table};
 | 
			
		||||
    my $cols    = $table->cols();
 | 
			
		||||
    my $ordercols = ( $self->{ordercols} ||= [ sort { $cols->{$a}->{pos} <=> $cols->{$b}->{pos} } keys %$cols ] );
 | 
			
		||||
    my ($pk)    = $self->{table}->pk;
 | 
			
		||||
    my $aref    = [ map { $href->{$_} } @$ordercols, ( $self->{score} = $results->{$href->{$pk}} ) ];
 | 
			
		||||
 | 
			
		||||
    return $aref;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub rows {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    return $self->{rows};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _table_info {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $table   = $self->{table}->name() or return $self->error('NOSCHEMA', 'FATAL');
 | 
			
		||||
    my ($pk)    = $self->{table}->pk;
 | 
			
		||||
    return ( $table, $pk );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub DESTROY {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    $self->{'sth'} and $self->{'sth'}->finish();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub debug_dumper {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# calls debug but also dumps all the messages
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $message = shift;
 | 
			
		||||
    my $level   = ref $_[0] ? 1 : shift;
 | 
			
		||||
 | 
			
		||||
    if ( $self->{_debug} >= $level ) {
 | 
			
		||||
        require GT::Dumper;
 | 
			
		||||
        $self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ )) if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,572 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::Base
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.25 2004/08/28 03:53:46 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Base classes upon which all search drivers are based
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::Base::Search;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::Base;
 | 
			
		||||
    use GT::SQL::Search::Base::Common;
 | 
			
		||||
    @ISA = qw( GT::Base GT::SQL::Search::Base::Common);
 | 
			
		||||
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG      = 0;
 | 
			
		||||
    $VERSION    = sprintf "%d.%03d", q$Revision: 1.25 $ =~ /(\d+)\.(\d+)/;  
 | 
			
		||||
    @ISA        = qw/ GT::Base /;
 | 
			
		||||
 | 
			
		||||
    $ATTRIBS    = {
 | 
			
		||||
        'stopwords' => $STOPWORDS,
 | 
			
		||||
        'mh'        => 25,
 | 
			
		||||
        'nh'        => 1,
 | 
			
		||||
        'ww'        => undef,
 | 
			
		||||
        'ma'        => undef,
 | 
			
		||||
        'bool'      => undef,
 | 
			
		||||
        'substring' => 0,
 | 
			
		||||
        'query'     => '',
 | 
			
		||||
        'sb'        => 'score',
 | 
			
		||||
        'so'        => '',
 | 
			
		||||
        'score_col' => 'SCORE',
 | 
			
		||||
        'score_sort'=> 0,
 | 
			
		||||
        'debug'     => 0,
 | 
			
		||||
        '_debug'    => 0,
 | 
			
		||||
 | 
			
		||||
# query related
 | 
			
		||||
        'db'        => undef,
 | 
			
		||||
        'table'     => undef,
 | 
			
		||||
        'filter'    => undef,
 | 
			
		||||
        'callback'  => undef,
 | 
			
		||||
 | 
			
		||||
# strict matching of indexed words, accents on words do count
 | 
			
		||||
        'sm'        => 0,
 | 
			
		||||
        'min_word_size' => 3,
 | 
			
		||||
        'max_word_size' => 50,             
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub init {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Initialises the Search object
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $input   = $self->common_param(@_);
 | 
			
		||||
 | 
			
		||||
    $self->set($input);
 | 
			
		||||
 | 
			
		||||
# now handle filters...,
 | 
			
		||||
    my $tbl     = $self->{table};
 | 
			
		||||
    my $cols    = $tbl->cols();
 | 
			
		||||
    my %filters = map {
 | 
			
		||||
        (my $tmp = $_) =~ s/-[lg]t$//;
 | 
			
		||||
        exists $cols->{$tmp} ? ($_ => $input->{$_}) : ()
 | 
			
		||||
    } keys %{$input};
 | 
			
		||||
 | 
			
		||||
    if ( keys %filters ) {
 | 
			
		||||
        $self->debug_dumper( "INIT: Creating Filters: ", \%filters ) if ( $self->{_debug} );
 | 
			
		||||
        $self->filter(\%filters);   
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $self->{table}->connect;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub query {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Returns a sth based on a query
 | 
			
		||||
#
 | 
			
		||||
# Options:
 | 
			
		||||
#        - paging
 | 
			
		||||
#           mh            : max hits
 | 
			
		||||
#           nh            : number hit (or page of hits)
 | 
			
		||||
#
 | 
			
		||||
#        - searching
 | 
			
		||||
#           ww            : whole word
 | 
			
		||||
#           ma            : 1 => OR match, 0 => AND match, undefined => QUERY
 | 
			
		||||
#           substring     : search for substrings of words
 | 
			
		||||
#           bool          : 'and' => and search, 'or' => or search, '' => regular query
 | 
			
		||||
#           query         : the string of things to ask for 
 | 
			
		||||
#
 | 
			
		||||
#        - filtering
 | 
			
		||||
#           field_name    : value       # Find all rows with field_name = value
 | 
			
		||||
#           field_name    : ">value"    # Find all rows with field_name > value.
 | 
			
		||||
#           field_name    : "<value"    # Find all rows with field_name < value.
 | 
			
		||||
#           field_name-gt : value       # Find all rows with field_name > value.
 | 
			
		||||
#           field_name-lt : value       # Find all rows with field_name < value.
 | 
			
		||||
#
 | 
			
		||||
# Parameters:
 | 
			
		||||
#        ( $CGI ) : a single cgi object
 | 
			
		||||
#        ( $HASH ) : a hash of the parameters
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
# find out what sort of a parameter we're dealing with
 | 
			
		||||
    my $input   = $self->common_param(@_);
 | 
			
		||||
 | 
			
		||||
# add additional parameters if required
 | 
			
		||||
    foreach my $parameter ( keys %{$ATTRIBS} ) {
 | 
			
		||||
        if ( not exists $input->{$parameter} ) {
 | 
			
		||||
            $input->{$parameter} = $self->{$parameter};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# parse query...,
 | 
			
		||||
    $self->debug( "Search Query: $$input{query}", 1 ) if ($self->{_debug});
 | 
			
		||||
    my ( $query, $rejected ) = $self->_parse_query_string( $input->{'query'} );
 | 
			
		||||
 | 
			
		||||
    $self->{'rejected_keywords'} = $rejected;
 | 
			
		||||
 | 
			
		||||
# setup the additional input parameters
 | 
			
		||||
    $query = $self->_preset_options( $query, $input );
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Set the pre-options: ", $query ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# now sort into distinct buckets
 | 
			
		||||
    my $buckets = &_create_buckets( $query );
 | 
			
		||||
    $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
    return $self->_query($input, $buckets);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _query {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my ( $self, $input, $buckets ) = @_;
 | 
			
		||||
 | 
			
		||||
# now handle the separate possibilities
 | 
			
		||||
    my $results = {};
 | 
			
		||||
 | 
			
		||||
# query can have phrases
 | 
			
		||||
    $results = $self->_phrase_query( $buckets->{phrases}, $results );
 | 
			
		||||
    $self->debug_dumper("Did phrase union query. Current result set and scores: ", $results ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# query have keywords
 | 
			
		||||
    $results = $self->_union_query( $buckets->{keywords}, $results );
 | 
			
		||||
    $self->debug_dumper("Did keyword union query. Current result set and scores: ", $results ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# query must have phrases
 | 
			
		||||
    $results = $self->_phrase_intersect_query( $buckets->{phrases_must}, $results );
 | 
			
		||||
    $self->debug_dumper("Did phrase intersect query. Current result set and scores: ", $results ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# query must have keywords
 | 
			
		||||
    $results = $self->_intersect_query( $buckets->{keywords_must}, $results );
 | 
			
		||||
    $self->debug_dumper("Did keyword intersect query. Current result set and scores: ", $results ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# query cannot have keywords
 | 
			
		||||
    $results = $self->_disjoin_query( $buckets->{keywords_cannot}, $results );
 | 
			
		||||
    $self->debug_dumper("Did keyword disjoin query. Current result set and scores: ", $results ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# query cannot have phrases
 | 
			
		||||
    $results = $self->_phrase_disjoin_query( $buckets->{phrases_cannot}, $results);
 | 
			
		||||
    $self->debug_dumper("Did phrase disjoin query. Current result set and scores: ", $results ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# now handle filters
 | 
			
		||||
    my $cols    = $self->{'table'}->cols();
 | 
			
		||||
    my %filters = map {
 | 
			
		||||
        (my $tmp = $_) =~ s/-[lg]t$//;
 | 
			
		||||
        $cols->{$tmp} ? ($_ => $input->{$_}) : ()
 | 
			
		||||
    } keys %{$input};
 | 
			
		||||
 | 
			
		||||
    if (keys %filters) {
 | 
			
		||||
        $self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
 | 
			
		||||
        $results = $self->filter(\%filters, $results);
 | 
			
		||||
    }
 | 
			
		||||
    elsif ($self->{filter}) {
 | 
			
		||||
        $self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
 | 
			
		||||
        $results = $self->_filter_query( $self->{filter}, $results );
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $self->debug( "No filters being used.") if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now this query should probably clear the filters once it's been used, so i'll dothat here
 | 
			
		||||
    $self->{filter} = undef;
 | 
			
		||||
 | 
			
		||||
# now run through a callback function if needed.
 | 
			
		||||
    if ($self->{callback}) {
 | 
			
		||||
        unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
 | 
			
		||||
            return $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
 | 
			
		||||
        }
 | 
			
		||||
        $self->debug_dumper ("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
 | 
			
		||||
        $results = $self->{callback}->($self, $results);
 | 
			
		||||
        $self->debug_dumper ("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# so how many hits did we get?
 | 
			
		||||
    $self->{table}->{last_hits} = ( $self->{rows} = scalar($results ? keys %{$results} : ()) );
 | 
			
		||||
 | 
			
		||||
# and now create a search sth object to handle all this
 | 
			
		||||
    return $self->sth( $results );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub sth {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $results = shift;
 | 
			
		||||
 | 
			
		||||
    require GT::SQL::Search::Base::STH;
 | 
			
		||||
    my $sth = GT::SQL::Search::STH->new(
 | 
			
		||||
        'results' => $results,
 | 
			
		||||
        'db'      => $self->{table}->{driver},
 | 
			
		||||
# pass the following attributes down to the STH handler
 | 
			
		||||
        map({ ($_ => $self->{$_}) } qw/ table sb so score_col score_sort nh mh rows _debug /)
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    return $sth;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub rows {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# after a query is run, returns the number of rows
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    return $self->{rows} || 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _add_filters {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# creates the filter object
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $filter;
 | 
			
		||||
 | 
			
		||||
# find out how we're calling the parameters
 | 
			
		||||
    if ( ref $_[0] eq 'GT::SQL::Condition' ) {
 | 
			
		||||
        $filter = shift;
 | 
			
		||||
    }
 | 
			
		||||
    elsif ( ref $_[0] eq 'HASH' ) {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# setup the query condition using the build_query condition method
 | 
			
		||||
# build the condition object
 | 
			
		||||
        my %opts = %{ shift() || {} };
 | 
			
		||||
        delete $opts{query};
 | 
			
		||||
 | 
			
		||||
        $filter = $self->{table}->build_query_cond( \%opts, $self->{table}{schema}{cols}  );
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        return $self->error ('BADARGS', 'FATAL', "Invalid argument: $_[0] passed to _add_filter");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# Use ref, as someone can pass in filter => 1 and mess things up.
 | 
			
		||||
 | 
			
		||||
    ref $self->{filter} ? $self->{filter}->add ($filter) : ($self->{filter} = $filter);
 | 
			
		||||
    $self->debug_dumper( "Filters: ", $self->{filter} ) if ($self->{_debug});
 | 
			
		||||
    
 | 
			
		||||
    return $self->{filter};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _preset_options {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# sets up word parameters
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $query   = shift or return;
 | 
			
		||||
    my $input   = shift or return $query;
 | 
			
		||||
 | 
			
		||||
# whole word searching
 | 
			
		||||
    if ( defined $input->{'ww'} or defined $self->{'ww'}) {
 | 
			
		||||
        if ( defined $input->{'ww'} ? $input->{'ww'} : $self->{'ww'} ) {
 | 
			
		||||
            for ( keys %{$query} ) { $query->{$_}->{mode} = 'substring'; }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# substring searching
 | 
			
		||||
    if ( defined $input->{'substring'} or defined $self->{'substring'}) {
 | 
			
		||||
        if ( defined $input->{'substring'} ? $input->{'substring'} : $self->{'substring'} ) {
 | 
			
		||||
            for ( keys %{$query} ) { $query->{$_}->{'substring'} = 1; }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( defined $input->{'ma'} or defined $self->{'ma'} ) {
 | 
			
		||||
# each keyword must be included
 | 
			
		||||
        if ( defined $input->{'ma'} ? $input->{'ma'} : $self->{'ma'} ) {
 | 
			
		||||
            for ( keys %{$query} ) { 
 | 
			
		||||
                next if $query->{$_}->{mode} eq 'cannot';
 | 
			
		||||
                $query->{$_}->{mode} = 'must'; 
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
# each word can be included but is not necessary
 | 
			
		||||
        else {
 | 
			
		||||
            for ( keys %{$query} ) { $query->{$_}->{mode} = 'can'; }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# some more and or searches, only if user hasn't put +word -word
 | 
			
		||||
    if ( lc( $input->{'bool'} || $self->{'bool'} ) eq 'and' ) {
 | 
			
		||||
        unless ($input->{query} =~ /(?:^|\s)[+-]\w/) {
 | 
			
		||||
            for ( keys %{$query} ) { 
 | 
			
		||||
                next if $query->{$_}->{mode} eq 'cannot';
 | 
			
		||||
                $query->{$_}->{mode} = 'must'; 
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    elsif ( lc( $input->{'bool'} || $self->{'bool'} ) eq 'or' ) {
 | 
			
		||||
        unless ($input->{query} =~ /(?:^|\s)[+-]\w/) {
 | 
			
		||||
            for ( keys %{$query} ) { $query->{$_}->{mode} = 'can'; }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $query;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_query { $_[1] }
 | 
			
		||||
sub _union_query { $_[1] }
 | 
			
		||||
sub _phrase_intersect_query { $_[1] }
 | 
			
		||||
sub _intersect_query { $_[1] }
 | 
			
		||||
sub _disjoin_query { $_[1] }
 | 
			
		||||
sub _phrase_disjoin_query { $_[1] }
 | 
			
		||||
 | 
			
		||||
sub filter {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# adds a filter
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
# add filters..,
 | 
			
		||||
    my $filters = $self->_add_filters( shift );
 | 
			
		||||
    my $results = shift;
 | 
			
		||||
 | 
			
		||||
# see if we need to execute a search, otherwise just return the current filterset
 | 
			
		||||
    defined $results or return $results;
 | 
			
		||||
 | 
			
		||||
# start doing the filter stuff
 | 
			
		||||
    return $self->_filter_query( $filters, $results );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _parse_query_string {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
# from Mastering Regular Expressions altered a fair bit
 | 
			
		||||
# takes a space delimited string and breaks it up.
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $text    = shift;
 | 
			
		||||
 | 
			
		||||
    my %words   = ();
 | 
			
		||||
    my %reject  = ();
 | 
			
		||||
    my %mode    = ( 
 | 
			
		||||
        '+' => 'must',
 | 
			
		||||
        '-' => 'cannot',
 | 
			
		||||
        '<' => 'greater',
 | 
			
		||||
        '>' => 'less'
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
# work on the individual elements
 | 
			
		||||
    my @new = ();
 | 
			
		||||
    while ( $text =~ m{
 | 
			
		||||
                # the first part groups the phrase inside the quotes.
 | 
			
		||||
                # see explanation of this pattern in MRE
 | 
			
		||||
                ([+-]?"[^\"\\]*(?:\\.[^\"\\]*)*)" ?
 | 
			
		||||
                |  (\+?[\w\x80-\xFF\-\*]+),?
 | 
			
		||||
                | ' '
 | 
			
		||||
            }gx ) {
 | 
			
		||||
 | 
			
		||||
        my $match   = lc $+;
 | 
			
		||||
 | 
			
		||||
# strip out buffering spaces
 | 
			
		||||
        $match =~ s/^\s+//; $match =~ s/\s+$//;
 | 
			
		||||
 | 
			
		||||
# don't bother trying if there is nothing there
 | 
			
		||||
        next unless $match;
 | 
			
		||||
 | 
			
		||||
# find out the searching mode
 | 
			
		||||
        my ($mode, $substring, $phrase);
 | 
			
		||||
        if (my $m = $mode{substr($match,0,1)}) {
 | 
			
		||||
            $match = substr($match,1); 
 | 
			
		||||
            $mode = $m;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# do we need to substring match?
 | 
			
		||||
        if ( substr( $match, -1, 1 ) eq "*" ) {
 | 
			
		||||
            $match = substr($match,0,length($match)-1);
 | 
			
		||||
            $substring = 1;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# find out if we're dealing with a phrase
 | 
			
		||||
        if ( substr($match,0,1) eq '"' ) {
 | 
			
		||||
            $self->debug( "Dealing with a phrase: $match" ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
            $match = substr($match,1); 
 | 
			
		||||
 | 
			
		||||
# however, we want to make sure it's a phrase and not something else
 | 
			
		||||
            my ( $word_list, $rejected ) = $self->_tokenize( $match );
 | 
			
		||||
            $self->debug_dumper( "Phrase reduced to: ", $word_list ) if ($self->{_debug});
 | 
			
		||||
            $self->debug_dumper( "Phrase words rejected: ", $rejected ) if ($self->{_debug});
 | 
			
		||||
            my $word_count = @$word_list;
 | 
			
		||||
 | 
			
		||||
            if ( $word_count > 1 )   { $phrase = $word_list } # ok, standard phrase
 | 
			
		||||
            elsif ($word_count == 1) { $match = $word_list->[0] } # just a standard word, don't worry about searching by phrase
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# make sure we can use this word
 | 
			
		||||
        if ( not $phrase and my $code = $self->_check_word( $match, $self->{stopwords} ) ) {
 | 
			
		||||
            $reject{ $match } = $code; 
 | 
			
		||||
            next;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# now, see if we should toss this word  
 | 
			
		||||
        $words{$match}  = {
 | 
			
		||||
            mode      => $mode,
 | 
			
		||||
            phrase    => $phrase,
 | 
			
		||||
            substring => $substring,
 | 
			
		||||
            keyword   => not $phrase,
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# words is a hashref of:
 | 
			
		||||
#   {
 | 
			
		||||
#       word => {
 | 
			
		||||
#           paramaters => 'values'
 | 
			
		||||
#       },
 | 
			
		||||
#       word1 => {
 | 
			
		||||
#           ...
 | 
			
		||||
#       },
 | 
			
		||||
#       ...
 | 
			
		||||
#    }
 | 
			
		||||
#
 | 
			
		||||
    return( \%words, \%reject );
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sub _filter_query {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# get the results from the filter
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $filters = shift;
 | 
			
		||||
    my $results = shift or return {};
 | 
			
		||||
    keys %{$results} or return $results;
 | 
			
		||||
 | 
			
		||||
    my $table = $self->{table};
 | 
			
		||||
    my $tname = $table->name();
 | 
			
		||||
 | 
			
		||||
# setup the where clause
 | 
			
		||||
    my $where = $filters->sql() or return $results;
 | 
			
		||||
    my ($pk)  = $table->pk;
 | 
			
		||||
    $where .= qq! AND $pk IN (! . join(',', keys %$results) . ')';
 | 
			
		||||
 | 
			
		||||
# now do the filter
 | 
			
		||||
    my $query = qq!
 | 
			
		||||
        SELECT $pk
 | 
			
		||||
        FROM
 | 
			
		||||
            $tname
 | 
			
		||||
        WHERE
 | 
			
		||||
            $where
 | 
			
		||||
    !;
 | 
			
		||||
    $self->debug( "Filter Query: $query" ) if ($self->{_debug});
 | 
			
		||||
    my $sth = $self->{table}->{driver}->prepare($query);
 | 
			
		||||
    $sth->execute();
 | 
			
		||||
 | 
			
		||||
# get all the results
 | 
			
		||||
    my $aref = $sth->fetchall_arrayref;
 | 
			
		||||
    return {
 | 
			
		||||
        map {
 | 
			
		||||
            $_->[0] => $results->{$_->[0]}
 | 
			
		||||
        } @$aref
 | 
			
		||||
    };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _create_buckets {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
# takes the output from _parse_query_string and creates a
 | 
			
		||||
# bucket hash of all the different types of searching
 | 
			
		||||
# possible
 | 
			
		||||
    my $query   = shift or return;
 | 
			
		||||
 | 
			
		||||
    my %buckets;
 | 
			
		||||
 | 
			
		||||
# put each word in the appropriate hash bucket
 | 
			
		||||
    foreach my $parameter ( keys %{$query} ) {
 | 
			
		||||
 | 
			
		||||
        my $word_data = $query->{$parameter};
 | 
			
		||||
 | 
			
		||||
# the following is slower, however, done that way to be syntatically legible
 | 
			
		||||
        if ( $word_data->{'phrase'} and ( defined $word_data->{'mode'} and ($word_data->{'mode'} =~ /(must|cannot)/ ))) {
 | 
			
		||||
            $buckets{"phrases_$1"}->{$parameter} = $word_data;
 | 
			
		||||
        }
 | 
			
		||||
        elsif ( $word_data->{'phrase'} ) {
 | 
			
		||||
            $buckets{'phrases'}->{$parameter} = $word_data;
 | 
			
		||||
        }
 | 
			
		||||
        elsif ( $word_data->{'keyword'} and ( defined $word_data->{'mode'} and ($word_data->{'mode'}=~ /(must|cannot|greater|less)/) )) {
 | 
			
		||||
            $buckets{"keywords_$1"}->{$parameter} = $word_data;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $buckets{'keywords'}->{$parameter} = $word_data;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return \%buckets;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub alternate_driver_query {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my ( $self, $drivername, $input ) = @_;
 | 
			
		||||
 | 
			
		||||
    $drivername = uc $drivername;
 | 
			
		||||
    require GT::SQL::Search;
 | 
			
		||||
    my $driver = GT::SQL::Search->load_search({ %$input, table => $self->{table}, driver => $drivername });
 | 
			
		||||
    my $sth    = $driver->query( $input );
 | 
			
		||||
    foreach ( qw( rows rejected_keywords ) ) { $self->{$_} = $driver->{$_}; }
 | 
			
		||||
    return $sth;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub clean_sb {
 | 
			
		||||
# -------------------------------------------------------------------------------
 | 
			
		||||
# Convert the sort by, sort order into an sql string.
 | 
			
		||||
#
 | 
			
		||||
    my ($class, $sb, $so) = @_;
 | 
			
		||||
    my $output = '';
 | 
			
		||||
    
 | 
			
		||||
    return $output unless ($sb);
 | 
			
		||||
 | 
			
		||||
# Remove score attribute, used only for internal indexes.
 | 
			
		||||
    $sb =~ s/^\s*score\b//;
 | 
			
		||||
    $sb =~ s/,?\s*\bscore\b//;
 | 
			
		||||
    
 | 
			
		||||
    if ($sb and not ref $sb) {
 | 
			
		||||
        if ($sb =~ /^[\w\s,]+$/)  {
 | 
			
		||||
            if ($sb =~ /\s(?:asc|desc)/i) {
 | 
			
		||||
                $output = 'ORDER BY ' . $sb;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $output = 'ORDER BY ' . $sb . ' ' . $so;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $class->error('BADSB', 'WARN', $sb);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    elsif (ref $sb eq 'ARRAY') {
 | 
			
		||||
        foreach ( @$sb ) {
 | 
			
		||||
            /^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
 | 
			
		||||
        }
 | 
			
		||||
        $output = 'ORDER BY ' . join(',', @$sb);
 | 
			
		||||
    }
 | 
			
		||||
    return $output;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub debug_dumper {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# calls debug but also dumps all the messages
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $message = shift;
 | 
			
		||||
    my $level   = ref $_[0] ? 1 : defined $_[0] ? shift : 0;
 | 
			
		||||
 | 
			
		||||
    if ( $self->{_debug} >= $level ) {
 | 
			
		||||
        require GT::Dumper;
 | 
			
		||||
        $self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ )) if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,411 @@
 | 
			
		||||
# ====================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::INTERNAL::Indexer
 | 
			
		||||
#   Author: Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.11 2004/01/13 01:35:19 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ====================================================================
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::INTERNAL::Indexer;
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $ATTRIBS $VERSION $DEBUG /;
 | 
			
		||||
    use GT::SQL::Search::Base::Indexer;
 | 
			
		||||
    @ISA = qw/ GT::SQL::Search::Base::Indexer /;
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.11 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    shift;
 | 
			
		||||
    return GT::SQL::Search::INTERNAL::Indexer->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub drop_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $table = $self->{table}->name;
 | 
			
		||||
    my $rc1 = $self->{table}->do_query(qq!DROP TABLE $table! ."_Word_List");
 | 
			
		||||
    my $rc2 = $self->{table}->do_query(qq!DROP TABLE $table! ."_Score_List");
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub add_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $name    = $self->{table}->name;
 | 
			
		||||
 | 
			
		||||
# first create the table that handles the words.
 | 
			
		||||
    my $creator = $self->{table}->creator ( $name . "_Word_List" );
 | 
			
		||||
    $creator->cols(
 | 
			
		||||
        Word_ID => {
 | 
			
		||||
            pos      => 1,
 | 
			
		||||
            type     => 'int',
 | 
			
		||||
            not_null => 1,
 | 
			
		||||
            unsigned => 1
 | 
			
		||||
        },
 | 
			
		||||
        Word => {
 | 
			
		||||
            pos     => 2,
 | 
			
		||||
            type    => 'varchar',
 | 
			
		||||
            not_null=> 1,
 | 
			
		||||
            size    => '50'
 | 
			
		||||
        },
 | 
			
		||||
        Frequency => {
 | 
			
		||||
            pos     => 3,
 | 
			
		||||
            type    => 'int',
 | 
			
		||||
            not_null=> 1
 | 
			
		||||
        }
 | 
			
		||||
    );
 | 
			
		||||
    $creator->pk('Word_ID');
 | 
			
		||||
    $creator->ai('Word_ID');
 | 
			
		||||
    $creator->unique({ $name . "_wordndx" => ['Word'] });
 | 
			
		||||
    $creator->create('force') or return;
 | 
			
		||||
 | 
			
		||||
# now create the handler for scores
 | 
			
		||||
    $creator = $self->{table}->creator( $name . '_Score_List' );
 | 
			
		||||
    $creator->cols(
 | 
			
		||||
        Word_ID => {
 | 
			
		||||
            pos      => 1,
 | 
			
		||||
            type     => 'int',
 | 
			
		||||
            not_null => 1,
 | 
			
		||||
            unsigned => 1
 | 
			
		||||
        },
 | 
			
		||||
        Item_ID => {
 | 
			
		||||
            pos      => 2,
 | 
			
		||||
            type     => 'int',
 | 
			
		||||
            not_null => 1,
 | 
			
		||||
            unsigned => 1
 | 
			
		||||
        },
 | 
			
		||||
        Score => {
 | 
			
		||||
            pos      => 3,
 | 
			
		||||
            type     => 'int',
 | 
			
		||||
            not_null => 1
 | 
			
		||||
        },
 | 
			
		||||
        Word_Pos => {
 | 
			
		||||
            pos      => 4,
 | 
			
		||||
            type     => 'int',
 | 
			
		||||
            not_null => 1
 | 
			
		||||
        }
 | 
			
		||||
    );
 | 
			
		||||
    $creator->index({ 'wndx' => ['Word_ID', 'Item_ID', 'Score'], 'itndx' => ['Item_ID'] });
 | 
			
		||||
    $creator->create('force') or return;
 | 
			
		||||
    return 1;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_create_table {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# creates the index tables..
 | 
			
		||||
#
 | 
			
		||||
    return $_[0]->add_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_drop_table {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# Remove the index tables.
 | 
			
		||||
#
 | 
			
		||||
    return $_[0]->drop_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub init_queries {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# Pre-load all our queries.
 | 
			
		||||
#
 | 
			
		||||
    my $self       = shift;
 | 
			
		||||
    my $queries    = shift;
 | 
			
		||||
 | 
			
		||||
    my $driver     = $self->{table}->{driver} or return $self->error ('NODRIVER', 'FATAL');
 | 
			
		||||
    my $table_name = $self->{table}->name()   or return $self->error('NOSCHEMA', 'FATAL');
 | 
			
		||||
    my $wtable     = $table_name . '_Word_List';
 | 
			
		||||
    my $seq        = $wtable . '_seq';
 | 
			
		||||
    my $stable     = $table_name . '_Score_List';
 | 
			
		||||
 | 
			
		||||
    my %ai_queries = (
 | 
			
		||||
        ins_word_ORACLE => "INSERT INTO $wtable (Word_ID, Word, Frequency) VALUES ($seq.NEXTVAL, ?, ?)",
 | 
			
		||||
        ins_word_PG     => "INSERT INTO $wtable (Word_ID, Word, Frequency) VALUES (NEXTVAL('$seq'), ?, ?)",
 | 
			
		||||
        ins_word        => "INSERT INTO $wtable (Word, Frequency) VALUES (?, ?)"
 | 
			
		||||
    );
 | 
			
		||||
    my %queries = (
 | 
			
		||||
        upd_word  => "UPDATE $wtable SET Frequency = ? WHERE  Word_ID = ?",
 | 
			
		||||
        sel_word  => "SELECT Word_ID,Word,Frequency FROM $wtable WHERE  Word = ?",
 | 
			
		||||
        sel_freq  => "SELECT Frequency FROM $wtable WHERE  Word_ID = ?",
 | 
			
		||||
        del_word  => "DELETE FROM $wtable WHERE  Word_ID = ?",
 | 
			
		||||
        mod_word  => "UPDATE $wtable SET Frequency = Frequency - ? WHERE Word_ID = ?",
 | 
			
		||||
        ins_scor  => "INSERT INTO $stable (Word_ID, Item_ID, Score, Word_Pos) VALUES (?, ?, ?, ?)",
 | 
			
		||||
        item_cnt  => "SELECT Word_ID, COUNT(*) FROM $stable WHERE Item_ID = ? GROUP BY Word_ID",
 | 
			
		||||
        scr_del   => "DELETE FROM $stable WHERE Item_ID = ?",
 | 
			
		||||
        dump_word => "DELETE FROM $wtable",
 | 
			
		||||
        dump_scor => "DELETE FROM $stable"
 | 
			
		||||
    );
 | 
			
		||||
    my $type = uc $self->{table}->{connect}->{driver};
 | 
			
		||||
    $self->{ins_word} = $driver->prepare($ai_queries{"ins_word_$type"} || $ai_queries{"ins_word"});
 | 
			
		||||
 | 
			
		||||
# check to see if the table exist
 | 
			
		||||
    $self->{table}->new_table( $wtable ) or return $self->error('CANTPREPARE','WARN', 'Loading of table', $GT::SQL::error);
 | 
			
		||||
    $self->{table}->new_table( $stable ) or return $self->error('CANTPREPARE','WARN', 'Loading of table', $GT::SQL::error);
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    if ($type eq 'MYSQL') {
 | 
			
		||||
        foreach my $query (keys %queries) {
 | 
			
		||||
            $self->{$query} = $driver->prepare_raw ($queries{$query}) or return $self->error ('CANTPREPARE', 'WARN', $query, $GT::SQL::error);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        foreach my $query (keys %queries) {
 | 
			
		||||
            $self->{$query} = $driver->prepare ($queries{$query}) or return $self->error ('CANTPREPARE', 'WARN', $query, $GT::SQL::error);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_add_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# indexes a single record
 | 
			
		||||
    my ($self, $rec, $insert_sth ) = @_;
 | 
			
		||||
 | 
			
		||||
# Only continue if we have weights and a primary key.
 | 
			
		||||
    my $tbl     = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my %weights = $tbl->_weight_cols() or return;
 | 
			
		||||
    my ($pk)    = $tbl->pk();
 | 
			
		||||
    my $item_id = ( $tbl->ai() and $insert_sth ) ? $insert_sth->insert_id() : $rec->{$pk};
 | 
			
		||||
    my $index   = 0;
 | 
			
		||||
 | 
			
		||||
    $self->{init} or $self->init_queries;
 | 
			
		||||
 | 
			
		||||
# Go through each column and index it.
 | 
			
		||||
    foreach my $column ( keys %weights ) {
 | 
			
		||||
        my ($word_list, $rejected) = $self->_tokenize( $rec->{$column} );
 | 
			
		||||
        $word_list or next;
 | 
			
		||||
 | 
			
		||||
# Build a hash of word => frequency.
 | 
			
		||||
        my %words;
 | 
			
		||||
        foreach my $word (@{$word_list}) {
 | 
			
		||||
            $words{$word}++;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# Add the words in, or update frequency.
 | 
			
		||||
        my %word_ids = ();
 | 
			
		||||
        while (my ($word, $freq) = each %words) {
 | 
			
		||||
            $self->{sel_word}->execute($word) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
            my $word_r = $self->{sel_word}->fetchrow_arrayref; # Word_ID, Word, Frequency
 | 
			
		||||
            if ($word_r) {
 | 
			
		||||
                $word_r->[2] += $freq;
 | 
			
		||||
                $word_ids{$word} = $word_r->[0];
 | 
			
		||||
                $self->{upd_word}->execute ($word_r->[2], $word_r->[0]) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $self->{ins_word}->execute ($word, $words{$word}) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                $word_ids{$word} = $self->{ins_word}->insert_id();
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
# now that we have the word ids, insert each of the word-points
 | 
			
		||||
        my $weight = $weights{$column};
 | 
			
		||||
        foreach my $word ( @{$word_list} ) {
 | 
			
		||||
            $self->{ins_scor}->execute ($word_ids{$word}, $item_id, $weight, $index++) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
        }
 | 
			
		||||
        $index++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub reindex_all {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $table   = shift;
 | 
			
		||||
    my $opts    = shift;
 | 
			
		||||
    my $tick    = $opts->{tick} || 0;
 | 
			
		||||
    my $max     = $opts->{max}  || 5000;
 | 
			
		||||
 | 
			
		||||
    my %weights     = $self->{table}->_weight_cols() or return;
 | 
			
		||||
    my @weight_list = keys %weights;
 | 
			
		||||
    my @weight_arr  = map { $weights{$_} } @weight_list;
 | 
			
		||||
    my ($pk)    = $self->{table}->pk();
 | 
			
		||||
    my $index   = 0;
 | 
			
		||||
    my $word_id = 1;
 | 
			
		||||
    $self->{init} or $self->init_queries;
 | 
			
		||||
    
 | 
			
		||||
# first nuke the current index
 | 
			
		||||
    $self->dump_index();
 | 
			
		||||
 | 
			
		||||
# Go through the table and index each field.
 | 
			
		||||
    my $iterations = 1;
 | 
			
		||||
    my $count = 0;
 | 
			
		||||
 | 
			
		||||
    while (1) {
 | 
			
		||||
        if ($max) {
 | 
			
		||||
            my $offset = ($iterations-1) * $max;
 | 
			
		||||
            $table->select_options ( "LIMIT $offset,$max");
 | 
			
		||||
        }
 | 
			
		||||
        my $cond     = $opts->{cond} || {};
 | 
			
		||||
        my $sth      = $table->select($cond, [ $pk, @weight_list] );
 | 
			
		||||
        my $done     = 1;
 | 
			
		||||
 | 
			
		||||
        while ( my $arrayref = $sth->fetchrow_arrayref() ) {
 | 
			
		||||
# the primary key value
 | 
			
		||||
            my $i       = 0;
 | 
			
		||||
            my $item_id = $arrayref->[($i++)];
 | 
			
		||||
            $index      = 0;
 | 
			
		||||
            $done       = 0;
 | 
			
		||||
 | 
			
		||||
# start going through the record data
 | 
			
		||||
            foreach my $weight ( @weight_arr ) {
 | 
			
		||||
                my ($word_list, $junk) = $self->_tokenize( $arrayref->[$i++]  );
 | 
			
		||||
                $word_list or next;
 | 
			
		||||
 | 
			
		||||
# Build a hash of word => frequency.
 | 
			
		||||
                my %words;
 | 
			
		||||
                foreach my $word (@{$word_list}) {
 | 
			
		||||
                    $words{$word}++;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
# Add the words in, or update frequency.
 | 
			
		||||
                my %word_ids = ();
 | 
			
		||||
                while (my ($word, $freq) = each %words) {
 | 
			
		||||
                    $self->{sel_word}->execute($word) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                    my $word_r = $self->{sel_word}->fetchrow_arrayref; # WordID,Word,Freq
 | 
			
		||||
                    if ($word_r) {
 | 
			
		||||
                        $word_r->[2] += $freq;
 | 
			
		||||
                        $word_ids{$word} = $word_r->[0];
 | 
			
		||||
                        $self->{upd_word}->execute ($word_r->[2], $word_r->[0]) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                    }
 | 
			
		||||
                    else {
 | 
			
		||||
                        $self->{ins_word}->execute ($word, $words{$word}) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                        $word_ids{$word} = $self->{ins_word}->insert_id();
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
# now that we have the word ids, insert each of the word-points
 | 
			
		||||
                foreach my $word ( @{$word_list} ) {
 | 
			
		||||
                    $self->{ins_scor}->execute ($word_ids{$word}, $item_id, $weight, $index++) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                }
 | 
			
		||||
                $index++;
 | 
			
		||||
            }
 | 
			
		||||
            if ($tick) {
 | 
			
		||||
                $count++;
 | 
			
		||||
                $count % $tick      or (print "$count ");
 | 
			
		||||
                $count % ($tick*10) or (print "\n");
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return if ($done);
 | 
			
		||||
        $iterations++;
 | 
			
		||||
        return if (! $max);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub pre_delete_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# Delete a records index values.
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $where   = shift; 
 | 
			
		||||
 | 
			
		||||
    my $tbl     = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my %weights = $tbl->_weight_cols() or return;
 | 
			
		||||
    my ($pk)    = $tbl->pk();
 | 
			
		||||
    my $q       = $tbl->select( $where, [ $pk ] );
 | 
			
		||||
 | 
			
		||||
    while ( my $aref = $q->fetchrow_arrayref() ) {
 | 
			
		||||
        my $item_id = $aref->[0] or next;
 | 
			
		||||
        my @weight_list = keys %weights;
 | 
			
		||||
        my $index   = 0;
 | 
			
		||||
        $self->{init} or $self->init_queries;
 | 
			
		||||
 | 
			
		||||
    # Get a frequency count for each word 
 | 
			
		||||
        $self->{item_cnt}->execute($item_id) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
 | 
			
		||||
    # Now go through and either decrement the freq, or remove the entry.
 | 
			
		||||
        while ( my ($word_id, $frequency) = $self->{item_cnt}->fetchrow_array() ) {
 | 
			
		||||
            $self->{sel_freq}->execute($word_id) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
            $self->debug( "Deleting frequencies for $word_id. decreasing by $frequency" ) if ($self->{_debug});
 | 
			
		||||
            if (my $freq = $self->{sel_freq}->fetchrow_arrayref) {
 | 
			
		||||
                if ($freq->[0] == $frequency) {
 | 
			
		||||
                    $self->{del_word}->execute($word_id) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                }
 | 
			
		||||
                else {
 | 
			
		||||
                    $self->{mod_word}->execute($frequency, $word_id) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    # Remove the listings from the scores table.
 | 
			
		||||
        $self->{scr_del}->execute($item_id) or return $self->error ('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
    }
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_update_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
    my ( $self, $set_cond, $where_cond, $tmp ) = @_;
 | 
			
		||||
 | 
			
		||||
# delete the previous record
 | 
			
		||||
    $self->pre_delete_record( $where_cond ) or return;
 | 
			
		||||
#
 | 
			
		||||
# the new record
 | 
			
		||||
    my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my $q   = $tbl->select( $where_cond );
 | 
			
		||||
    while ( my $href = $q->fetchrow_hashref() ) {
 | 
			
		||||
        $self->post_add_record( $href );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub reindex_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# reindexes a record. basically deletes all associated records from current db abnd does an index.
 | 
			
		||||
# it's safe to use this
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $rec     = shift;
 | 
			
		||||
 | 
			
		||||
    $self->delete_record($rec);
 | 
			
		||||
    $self->index_record($rec);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub dump_index {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    $self->{init} or $self->init_queries;
 | 
			
		||||
 | 
			
		||||
    $self->{dump_word}->execute() or $self->error('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
    $self->{dump_scor}->execute() or $self->error('CANTEXECUTE', 'WARN', $DBI::errstr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sub debug_dumper {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# calls debug but also dumps all the messages
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $message = shift;
 | 
			
		||||
    my $level   = ref $_[0] ? 1 : shift;
 | 
			
		||||
 | 
			
		||||
    if ( $self->{_debug} >= $level ) {
 | 
			
		||||
        require GT::Dumper;
 | 
			
		||||
        $self->debug( $message . join( "", map { GT::Dumper::Dumper($_) } @_ ));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub DESTROY {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Calls finish on init queries.
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    return unless ($self->{init});
 | 
			
		||||
    $self->{upd_word}->finish;
 | 
			
		||||
#   $self->{ins_word}->finish; will get finished automatically
 | 
			
		||||
    $self->{sel_word}->finish;
 | 
			
		||||
    $self->{sel_freq}->finish;
 | 
			
		||||
    $self->{del_word}->finish;
 | 
			
		||||
    $self->{mod_word}->finish;
 | 
			
		||||
    $self->{ins_scor}->finish;
 | 
			
		||||
    $self->{item_cnt}->finish;
 | 
			
		||||
    $self->{scr_del}->finish;
 | 
			
		||||
    $self->{dump_word}->finish;
 | 
			
		||||
    $self->{dump_scor}->finish;
 | 
			
		||||
    $self->{init} = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,604 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Indexer
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.18 2004/08/28 03:53:47 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to make changes to tables and create tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::INTERNAL::Search;
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $VERSION $DEBUG $ATTRIBS /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.18 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
# the max number of links that can be handled by UNION before it should simply 
 | 
			
		||||
# shunt the searching pipe to NONINDEXED system
 | 
			
		||||
        'union_shunt_threshold'  => '5000',
 | 
			
		||||
        'phrase_shunt_threshold' => '1000',
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
################################################################################
 | 
			
		||||
# Internal functions
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    shift;
 | 
			
		||||
    return GT::SQL::Search::INTERNAL::Search->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# this just checks to ensure that the words are not all search keywords
 | 
			
		||||
#
 | 
			
		||||
    my ( $self, $input, $buckets ) = @_;
 | 
			
		||||
 | 
			
		||||
# calculate wordids and frequencies
 | 
			
		||||
    foreach ( keys %$buckets ) {
 | 
			
		||||
        $buckets->{$_} = $self->get_wordids( $buckets->{$_}, ( /phrase/ ? "phrases" : "keywords" ) );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# the following is a bit tricky and will be replaced however, if the number 
 | 
			
		||||
# of results from a union is more than the maximum shunt value, it will 
 | 
			
		||||
# simply do a nonindexed query
 | 
			
		||||
    if ( $buckets->{keywords} ) {
 | 
			
		||||
        my $rec       = _count_frequencies( $buckets->{keywords} );
 | 
			
		||||
        my $count     = 0;
 | 
			
		||||
        foreach ( values %$rec ) { $count +=  $_; }
 | 
			
		||||
        if ($count > $self->{union_shunt_threshold}) {
 | 
			
		||||
            $self->debug_dumper("Too many results using internal search, falling back to nonindexed. Counts are: ", $rec) if ($self->{_debug});
 | 
			
		||||
            return $self->alternate_driver_query( 'NONINDEXED', $input );
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Now test the phrases. Just due to how the phrase searching works, the queries
 | 
			
		||||
# can grow in size extremely rapidly, and slowdown the search. So the limit for
 | 
			
		||||
# phrase searching is separate as it requires a different cutoff value than
 | 
			
		||||
# the keyword search which is usually much lower!
 | 
			
		||||
    if ($buckets->{phrases}) {
 | 
			
		||||
        foreach my $phrase ( keys %{$buckets->{phrases} || {} } ) {
 | 
			
		||||
            my $rec       = _count_frequencies( $buckets->{phrases}->{$phrase}->{word_info} );
 | 
			
		||||
            my ( $count ) = sort values %$rec; # Get smallest frequency.
 | 
			
		||||
            if ( $count > $self->{phrase_shunt_threshold} ) {
 | 
			
		||||
                $self->debug_dumper("Too many results using internal search, falling back to nonindexed. Counts are: ", $rec) if ($self->{_debug});
 | 
			
		||||
                return $self->alternate_driver_query( 'NONINDEXED', $input );
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    if ($buckets->{phrases_must}) {
 | 
			
		||||
        foreach my $phrase ( keys %{$buckets->{phrases_must} || {} } ) {
 | 
			
		||||
            my $rec       = _count_frequencies( $buckets->{phrases_must}->{$phrase}->{word_info} );
 | 
			
		||||
            my ( $count ) = sort values %$rec; # Get smallest frequency.
 | 
			
		||||
            if ( $count > $self->{phrase_shunt_threshold} ) {
 | 
			
		||||
                $self->debug_dumper("Too many results using internal search, falling back to nonindexed. Counts are: ", $rec) if ($self->{_debug});
 | 
			
		||||
                return $self->alternate_driver_query( 'NONINDEXED', $input );
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return $self->SUPER::_query( $input, $buckets );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _count_frequencies {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $word_info = shift;
 | 
			
		||||
    my $rec       = {};
 | 
			
		||||
    foreach my $word ( keys %$word_info ) {
 | 
			
		||||
        my $freq  = 0;
 | 
			
		||||
        foreach ( values %{$word_info->{$word}->{word_info}} ) {
 | 
			
		||||
            $freq += $_;
 | 
			
		||||
        }
 | 
			
		||||
        $rec->{$word} = $freq;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $rec;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _table_names {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# return the table names
 | 
			
		||||
#
 | 
			
		||||
    my $self    =  shift;
 | 
			
		||||
    my $table   = $self->{table}->name() or return $self->error('NOSCHEMA', 'FATAL');
 | 
			
		||||
    my $wtable  = $table . '_Word_List';
 | 
			
		||||
    my $stable  = $table . '_Score_List';
 | 
			
		||||
 | 
			
		||||
    return ( $table, $wtable, $stable);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _word_infos {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# get the word ids and frequencies 
 | 
			
		||||
#
 | 
			
		||||
    my $self       = shift;
 | 
			
		||||
    my $word_infos = shift;
 | 
			
		||||
 | 
			
		||||
    my $rec        = {};
 | 
			
		||||
 | 
			
		||||
    foreach my $word ( keys %$word_infos ) {
 | 
			
		||||
        my $wi     = $word_infos->{$word}->{word_info};
 | 
			
		||||
        $rec->{$word} = [ map { [ $_, $wi->{$_} ] } keys %$wi ];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $rec;
 | 
			
		||||
    
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _union_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Takes a list of words and gets all words that match
 | 
			
		||||
# returns { itemid -> score } of hits that match
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $words   = shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
    my ( $query, $where, $db, $word_infos );
 | 
			
		||||
    my ( $table, $wtable, $stable) = $self->_table_names();
 | 
			
		||||
 | 
			
		||||
    $db         = $self->{table}->{driver} or return $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    $word_infos = $self->_word_infos( $words ) or return $results;
 | 
			
		||||
 | 
			
		||||
    return $results unless (keys %{$word_infos});
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Getting words: ", $words) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# build the where clause
 | 
			
		||||
    my @word_ids;
 | 
			
		||||
    foreach my $word_synonym_list  ( values %$word_infos ) {
 | 
			
		||||
        next unless ( $word_synonym_list );
 | 
			
		||||
        foreach my $word_id ( @{$word_synonym_list }) {
 | 
			
		||||
            next unless ( ref $word_id eq 'ARRAY' ); # ensure it's a reference
 | 
			
		||||
            push @word_ids, $word_id->[0]; # we need to shed the word quantities
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results unless ( @word_ids );
 | 
			
		||||
    $where = 'Word_ID IN(' . join(",", @word_ids) . ")";
 | 
			
		||||
 | 
			
		||||
# build the query
 | 
			
		||||
    $query = qq!
 | 
			
		||||
        SELECT Item_ID, SUM(Score)
 | 
			
		||||
            FROM $stable
 | 
			
		||||
            WHERE
 | 
			
		||||
                $where
 | 
			
		||||
            GROUP BY Item_ID
 | 
			
		||||
    !;
 | 
			
		||||
 | 
			
		||||
    $self->debug( "Union Query: $query" ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# prepare the query
 | 
			
		||||
    my $sth = $db->prepare( $query ) or return;
 | 
			
		||||
    $sth->execute() or return;
 | 
			
		||||
 | 
			
		||||
# get the results
 | 
			
		||||
    my %word_infos = $sth->fetchall_list;
 | 
			
		||||
 | 
			
		||||
# merge the current result set into found
 | 
			
		||||
    foreach my $item ( keys %{$results} ) {
 | 
			
		||||
        $word_infos{$item} += $results->{$item};
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    return \%word_infos;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _intersect_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Takes a list of words and gets all words that match all the keywords
 | 
			
		||||
# returns { itemid -> score } of hits that match
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $words   = shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    $words or return $results;
 | 
			
		||||
    keys %{$words} or return $results;
 | 
			
		||||
 | 
			
		||||
    my ( $query, $where, $db, $word_infos, $word_hits );
 | 
			
		||||
    my ( $table, $wtable, $stable) = $self->_table_names();
 | 
			
		||||
 | 
			
		||||
# have we left any of our words out?
 | 
			
		||||
    $db         = $self->{table}->{driver} or return $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    $word_infos = $self->_word_infos( $words ) or return {};
 | 
			
		||||
    if ( keys %{$word_infos} < keys %{$words} ) {
 | 
			
		||||
        return {};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Keyword Intersect words: ", $word_infos ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# take the words and get a hash of the word scores
 | 
			
		||||
    foreach my $word ( keys %{$word_infos} ) {
 | 
			
		||||
 | 
			
		||||
        my $total_freq = 0;
 | 
			
		||||
        foreach my $word_synonyms ( @{$word_infos->{$word}} ) {
 | 
			
		||||
            $total_freq += $word_synonyms->[1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        $word_hits->{$word} = $total_freq or return;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# so now, sort out the words from lowest frequency to highest frequency
 | 
			
		||||
    my @search_order = sort { $word_hits->{$a} <=> $word_hits->{$b} } keys %{$word_hits};
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Searching words in this order: ", \@search_order) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# find out how we're going to handle the searching, if the first elements
 | 
			
		||||
 | 
			
		||||
################################################################################
 | 
			
		||||
### The following part is for smaller intersect subsets
 | 
			
		||||
################################################################################
 | 
			
		||||
    my $intersect = $results;
 | 
			
		||||
    foreach my $word ( @search_order ) {
 | 
			
		||||
 | 
			
		||||
# setup the where clause to get all the words associated
 | 
			
		||||
        my $where   = "Word_ID in(" . join(",", map( { $_->[0] } @{$word_infos->{$word}} )) . ")";
 | 
			
		||||
 | 
			
		||||
# setup the intersect for the previous if required. for iterative intersecting
 | 
			
		||||
        if ( keys %{$intersect} ) {
 | 
			
		||||
            $where  .= " AND Item_ID in(" . join(",",keys %{$intersect}) . ")";
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# make the database engine work a little bit
 | 
			
		||||
        $query = qq!
 | 
			
		||||
            SELECT Item_ID, SUM(Score) AS Score
 | 
			
		||||
            FROM $stable
 | 
			
		||||
            WHERE
 | 
			
		||||
                $where
 | 
			
		||||
            GROUP BY Item_ID
 | 
			
		||||
        !;
 | 
			
		||||
        $self->debug( "Intersect Query: $query" ) if ($self->{_debug});
 | 
			
		||||
        my $intersect_sth = $db->prepare( $query );
 | 
			
		||||
 | 
			
		||||
        $intersect_sth->execute();
 | 
			
		||||
 | 
			
		||||
# get a list of all the matches
 | 
			
		||||
        my $matches = $intersect_sth->fetchall_arrayref();
 | 
			
		||||
 | 
			
		||||
        $self->debug_dumper( "Matches found for $word: ", $matches ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# go through all the matches and intersect them
 | 
			
		||||
        my %tmp = ();
 | 
			
		||||
        foreach my $row ( @{$matches} ) {
 | 
			
		||||
            my ( $itemid, $score ) = @{$row};
 | 
			
		||||
            $intersect->{$itemid} ||= 0;
 | 
			
		||||
            $tmp{ $itemid } = $intersect->{$itemid} + $score;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# inform the system of that development
 | 
			
		||||
        %tmp or return;
 | 
			
		||||
        $intersect = \%tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $intersect;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _disjoin_query {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $words   = shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
    $words or return $results;
 | 
			
		||||
 | 
			
		||||
    my ( $query, $where, $db, $word_infos, $word_hits );
 | 
			
		||||
    my ( $table, $wtable, $stable) = $self->_table_names();
 | 
			
		||||
 | 
			
		||||
    $db = $self->{table}->{driver} or return $results;
 | 
			
		||||
 | 
			
		||||
# have we left any of our words out?
 | 
			
		||||
    $word_infos = $self->_word_infos( $words ) or return $results;
 | 
			
		||||
#   if ( keys %{$word_infos} < keys %{$words} ) {
 | 
			
		||||
#       return $results;
 | 
			
		||||
#   }
 | 
			
		||||
 | 
			
		||||
# take the words and get a hash of the word scores
 | 
			
		||||
    foreach my $word ( keys %{$word_infos} ) {
 | 
			
		||||
        my $total_freq = 0;
 | 
			
		||||
        foreach my $word_synonyms ( $word_infos->{$word} ) {
 | 
			
		||||
            $total_freq += ( $word_synonyms->[0] || 0 );
 | 
			
		||||
        }
 | 
			
		||||
# if the value is null this mean there is actually no results, whoops!
 | 
			
		||||
        $total_freq and $word_hits->{$word} = $total_freq;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# so now, sort out the words from lowest frequency to highest frequency
 | 
			
		||||
    my @search_order = sort { $word_hits->{$b} <=> $word_hits->{$b} } keys %{$word_hits};
 | 
			
		||||
    $self->debug_dumper( "Disjoining words in the following order: ", \@search_order) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
################################################################################
 | 
			
		||||
### This following part is for smaller disjoin presets
 | 
			
		||||
################################################################################
 | 
			
		||||
    foreach my $word ( @search_order ) {
 | 
			
		||||
 | 
			
		||||
# setup the where clause to get all the words associated
 | 
			
		||||
        my $where = "Word_ID in(" . join(",", map( { $_->[0] } @{$word_infos->{$word}} )) . ")";
 | 
			
		||||
 | 
			
		||||
# setup the intersect for the previous if required. for iterative intersecting
 | 
			
		||||
        if ( keys %{$results} ) {
 | 
			
		||||
            $where .= " AND Item_ID in(" . join(",", keys %{$results}) . ")";
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# make the database engine work a little bit
 | 
			
		||||
        $query = qq!
 | 
			
		||||
            SELECT Item_ID
 | 
			
		||||
            FROM $stable
 | 
			
		||||
            WHERE
 | 
			
		||||
                $where
 | 
			
		||||
            GROUP BY Item_ID
 | 
			
		||||
        !;
 | 
			
		||||
        $self->debug($query) if ($self->{_debug});
 | 
			
		||||
        my $intersect_sth = $db->prepare( $query );
 | 
			
		||||
 | 
			
		||||
        $intersect_sth->execute();
 | 
			
		||||
 | 
			
		||||
# get a list of all the matches
 | 
			
		||||
        my $matches = $intersect_sth->fetchall_arrayref();
 | 
			
		||||
 | 
			
		||||
# strip the matches from the current result set
 | 
			
		||||
        foreach my $word ( map { $_->[0] } @{$matches}) {
 | 
			
		||||
            delete $results->{$word};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_disjoin_query {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
# subtracts the found phrases from the list
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
    $phrases or return $results;
 | 
			
		||||
 | 
			
		||||
    foreach my $phrase ( values %{$phrases} ) {
 | 
			
		||||
        my $temp = $self->_get_phrase( $phrase->{'phrase'}, $phrase->{word_info} );
 | 
			
		||||
 | 
			
		||||
# perform disjoin
 | 
			
		||||
        foreach my $itemid ( keys %{$temp} ) {
 | 
			
		||||
            $self->debug( "Deleting $itemid from list" ) if ($self->{_debug});
 | 
			
		||||
            delete $results->{$itemid};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_intersect_query {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
# intersects phrases together
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    $phrases or return $results;
 | 
			
		||||
 | 
			
		||||
    foreach my $phrase ( values %{$phrases} ) {
 | 
			
		||||
        my $temp = $self->_get_phrase( $phrase->{'phrase'}, $phrase->{word_info} );
 | 
			
		||||
 | 
			
		||||
# perform intersect
 | 
			
		||||
        foreach my $itemid ( keys %{$temp} ) {
 | 
			
		||||
            $temp->{$itemid} += $results->{$itemid} || 0;
 | 
			
		||||
        }
 | 
			
		||||
        $results = $temp;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_query {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
# this is a phrase union query
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    foreach my $phrase ( values %{$phrases} ) {
 | 
			
		||||
        $self->debug_dumper( "Unioning: ", $phrase ) if ($self->{_debug});
 | 
			
		||||
        $results = $self->_get_phrase( $phrase->{'phrase'}, $phrase->{word_info}, $results );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_phrase {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $wordlist= shift;
 | 
			
		||||
    my $word_info = shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    $wordlist or return $results;
 | 
			
		||||
 | 
			
		||||
    my ( $query, $where, $db, $word_infos, %word_hits );
 | 
			
		||||
    my ( $table, $wtable, $stable) = $self->_table_names();
 | 
			
		||||
    my ($pk) = $self->{table}->pk;
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Getting words: ", $wordlist ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# get all the word ids that we want to handle   
 | 
			
		||||
    $db         = $self->{table}->{driver} or return $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    $word_infos = $self->_word_infos( $word_info ) or return;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Word infos: ", $word_infos ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# take the words and get a hash of the word scores
 | 
			
		||||
    foreach my $word ( keys %{$word_infos} ) {
 | 
			
		||||
 | 
			
		||||
        @{$word_infos->{$word} || []} or return;
 | 
			
		||||
 | 
			
		||||
        my $total_freq = 0;
 | 
			
		||||
        foreach my $word_synonyms ( @{$word_infos->{$word}} ) {
 | 
			
		||||
            $total_freq += $word_synonyms->[1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# if the value is null this mean there is actually no results, whoops!
 | 
			
		||||
        $word_hits{$word} = $total_freq;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "With synonyms tallied: ",  \%word_hits ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# so now, setup the order of search
 | 
			
		||||
    my $i = 0;
 | 
			
		||||
    my %word_order = map { $_ => $i++ } @{$wordlist};
 | 
			
		||||
    my @search_order = sort { $word_hits{$a} <=> $word_hits{$b} } keys %word_hits;
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "Word search order: ", \@search_order ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
################################################################################
 | 
			
		||||
### This following part is for smaller phrases
 | 
			
		||||
################################################################################
 | 
			
		||||
# start getting words in order of their frequency
 | 
			
		||||
    my %matches = ();
 | 
			
		||||
    my $index = 0;
 | 
			
		||||
    foreach my $word ( @search_order ) {
 | 
			
		||||
 | 
			
		||||
# setup the where clause for the individual words, firstly
 | 
			
		||||
        if ( keys %matches ) {
 | 
			
		||||
            my $vector  = $word_order{$word} - $index;
 | 
			
		||||
            $where = '(';
 | 
			
		||||
            $where =
 | 
			
		||||
                '(' .
 | 
			
		||||
                join(
 | 
			
		||||
                    " OR ",
 | 
			
		||||
                    map(
 | 
			
		||||
                        "Item_ID = $_ AND Word_Pos IN(" . join(",", map $_->[0] + $vector, @{$matches{$_}}) . ')',
 | 
			
		||||
                        keys %matches
 | 
			
		||||
                    )
 | 
			
		||||
                ) .
 | 
			
		||||
                ") AND ";
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $where = '';
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        $where .= "Word_ID IN(" . ( join ",", map { $_->[0] || () } @{$word_infos->{$word}} or return $results ) . ')';
 | 
			
		||||
 | 
			
		||||
        $query = qq!
 | 
			
		||||
            SELECT 
 | 
			
		||||
                Item_ID, Score, Word_Pos
 | 
			
		||||
            FROM 
 | 
			
		||||
                $stable 
 | 
			
		||||
            WHERE
 | 
			
		||||
                $where
 | 
			
		||||
        !;
 | 
			
		||||
 | 
			
		||||
        $self->debug( "Phrase get for '$word': " . $query ) if ($self->{_debug});
 | 
			
		||||
        my $sth = $db->prepare( $query );
 | 
			
		||||
        $sth->execute();
 | 
			
		||||
 | 
			
		||||
        %matches = ();
 | 
			
		||||
 | 
			
		||||
        while (my $hit = $sth->fetchrow_arrayref) {
 | 
			
		||||
            push @{$matches{$hit->[0]}}, [ $hit->[2], $hit->[1] ];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# If there are no values stored in %matches, it means that for
 | 
			
		||||
# this keyword, there have been no hits based upon position.
 | 
			
		||||
# In that case, terminate and return a null result
 | 
			
		||||
        keys %matches or last;
 | 
			
		||||
 | 
			
		||||
# where were we in the string?
 | 
			
		||||
        $index = $word_order{$word};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now tally up all the scores and merge the new records in
 | 
			
		||||
    foreach my $itemid ( keys %matches ) {
 | 
			
		||||
        my $score = 0;
 | 
			
		||||
        foreach my $sub_total ( @{$matches{$itemid}} ) {
 | 
			
		||||
            $score += $sub_total->[1];
 | 
			
		||||
        }
 | 
			
		||||
        $results->{$itemid} += $score;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub get_wordids {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Get a list of words 
 | 
			
		||||
#
 | 
			
		||||
    my $self     = shift;
 | 
			
		||||
    my $elements = shift or return;
 | 
			
		||||
    my $mode     = lc shift || 'keywords';
 | 
			
		||||
 | 
			
		||||
    if ( $mode eq 'keywords' ) {
 | 
			
		||||
        $elements = $self->_get_wordid($elements);
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        foreach my $phrase ( keys %$elements ) {
 | 
			
		||||
            my $results = $self->_get_wordid({
 | 
			
		||||
                map { ($_ => { substring => 0 }) } @{$elements->{$phrase}->{phrase}}
 | 
			
		||||
            });
 | 
			
		||||
 | 
			
		||||
            $elements->{$phrase}->{word_info} = $results;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $elements;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_wordid {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Get a list of words 
 | 
			
		||||
#
 | 
			
		||||
    my $self  = shift;
 | 
			
		||||
    my $words = shift;
 | 
			
		||||
    my $tbl   = $self->{table};
 | 
			
		||||
    
 | 
			
		||||
    my ( $table, $wtable, $stable) = $self->_table_names();
 | 
			
		||||
 | 
			
		||||
    foreach my $word ( keys %$words ) {
 | 
			
		||||
        my $query =
 | 
			
		||||
            qq!SELECT Word_ID, Frequency FROM $wtable WHERE Word LIKE '! .
 | 
			
		||||
            quotemeta($word) .
 | 
			
		||||
            ( $words->{$word}->{substring} ? '%' : '' ) .
 | 
			
		||||
            "'";
 | 
			
		||||
        my $sth = $tbl->do_query($query) or next;
 | 
			
		||||
        my $tmp = { $sth->fetchall_list };
 | 
			
		||||
 | 
			
		||||
        $words->{$word}->{word_info} = $tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $words;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
##
 | 
			
		||||
# Internal Use
 | 
			
		||||
# $self->_cgi_to_hash ($in);
 | 
			
		||||
# --------------------------
 | 
			
		||||
#   Creates a hash ref from a cgi object.
 | 
			
		||||
##
 | 
			
		||||
sub _cgi_to_hash {
 | 
			
		||||
    my ($self, $cgi) = @_;
 | 
			
		||||
    $cgi and UNIVERSAL::can($cgi, 'param') or return $self->error(NODRIVER => 'FATAL');
 | 
			
		||||
    my @keys = $cgi->param;
 | 
			
		||||
    my $result = {};
 | 
			
		||||
    foreach my $key (@keys) {
 | 
			
		||||
        my @values = $cgi->param($key);
 | 
			
		||||
        if (@values == 1) { $result->{$key} = $values[0] }
 | 
			
		||||
        else              { $result->{$key} = \@values   }
 | 
			
		||||
    }
 | 
			
		||||
    return $result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,239 @@
 | 
			
		||||
# ====================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::LUCENE::Indexer
 | 
			
		||||
#   Author: Scott Beck
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ====================================================================
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::LUCENE::Indexer;
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
use strict;
 | 
			
		||||
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
 | 
			
		||||
use Lucene;
 | 
			
		||||
use GT::SQL::Search::Base::Indexer;
 | 
			
		||||
use GT::TempFile;
 | 
			
		||||
@ISA = qw/ GT::SQL::Search::Base::Indexer /;
 | 
			
		||||
$DEBUG   = 0;
 | 
			
		||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
$ERRORS = {
 | 
			
		||||
    INDEX_CORRUPT => 'Could not create an Indexer, this probably means your index is corrupted and you should rebuild it. The error was: %s',
 | 
			
		||||
    DELETE_FAILED => 'Could not delete some records: %s'
 | 
			
		||||
};
 | 
			
		||||
$ERROR_MESSAGE = 'GT::SQL';
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    my $class = shift;
 | 
			
		||||
    return $class->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_path {
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    my $name    = $self->{table}->name;
 | 
			
		||||
    my $tmpdir  = GT::TempFile::find_tmpdir();
 | 
			
		||||
    my $path = $tmpdir . '/' . $name;
 | 
			
		||||
    $path = $1 if $path =~ /(.*)/; # XXX untaint
 | 
			
		||||
    return $path;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_store {
 | 
			
		||||
    my ($self, $create) = @_;
 | 
			
		||||
    my $path = $self->_get_path;
 | 
			
		||||
    return Lucene::Store::FSDirectory->getDirectory($path, $create);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_indexer {
 | 
			
		||||
    my ($self, $create) = @_;
 | 
			
		||||
    my %weights = $self->{table}->_weight_cols() or return $self->error(NOWEIGHTS => 'WARN');
 | 
			
		||||
 | 
			
		||||
    my ($pk) = $self->{table}->pk;
 | 
			
		||||
    if (!$pk) {
 | 
			
		||||
        return $self->error('NOPRIMARYKEY','WARN');
 | 
			
		||||
    }
 | 
			
		||||
    my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
 | 
			
		||||
    my $store = $self->_get_store($create);
 | 
			
		||||
 | 
			
		||||
    my $iw;
 | 
			
		||||
    eval { $iw = new Lucene::Index::IndexWriter($store, $analyzer, $create); };
 | 
			
		||||
    if ($@) {
 | 
			
		||||
        return $self->error('INDEX_CORRUPT', 'WARN', "$@");
 | 
			
		||||
    }
 | 
			
		||||
    return $iw;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub drop_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $path = $self->_get_path;
 | 
			
		||||
    require File::Tools;
 | 
			
		||||
    File::Tools::deldir($path);
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub add_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    $self->_get_indexer(1) or return;
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_create_table {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# creates the index tables..
 | 
			
		||||
#
 | 
			
		||||
    return $_[0]->add_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_drop_table {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# Remove the index tables.
 | 
			
		||||
#
 | 
			
		||||
    return $_[0]->drop_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sub post_add_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# indexes a single record
 | 
			
		||||
    my ($self, $rec, $insert_sth, $no_optimize) = @_;
 | 
			
		||||
 | 
			
		||||
    my $tbl     = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my %weights = $tbl->_weight_cols() or return;
 | 
			
		||||
 | 
			
		||||
    my $indexer = $self->_get_indexer(0) or return $self->{_debug} ? () : 1;
 | 
			
		||||
    my $doc = new Lucene::Document;
 | 
			
		||||
    my ($pk) = $self->{table}->pk;
 | 
			
		||||
    delete $weights{$pk};
 | 
			
		||||
    for my $column_name (keys %weights) {
 | 
			
		||||
        my $field = Lucene::Document::Field->UnStored($column_name, $rec->{$column_name});
 | 
			
		||||
        $field->setBoost($weights{$column_name});
 | 
			
		||||
        $doc->add($field);
 | 
			
		||||
    }
 | 
			
		||||
    $doc->add(Lucene::Document::Field->Keyword($pk, ($tbl->ai && $insert_sth ? $insert_sth->insert_id : $rec->{$pk})));
 | 
			
		||||
    $indexer->addDocument($doc);
 | 
			
		||||
    $indexer->optimize if !$no_optimize;
 | 
			
		||||
    $indexer->close;
 | 
			
		||||
    undef $indexer;
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub reindex_all {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $table   = shift;
 | 
			
		||||
    my $opts    = shift;
 | 
			
		||||
    my $tick    = $opts->{tick} || 0;
 | 
			
		||||
    my $max     = $opts->{max}  || 5000;
 | 
			
		||||
 | 
			
		||||
    my $indexer = $self->_get_indexer(1) or return $self->{_debug} ? () : 1; # clobbers the old one
 | 
			
		||||
    $indexer->close;
 | 
			
		||||
    undef $indexer;
 | 
			
		||||
 | 
			
		||||
    my %weights     = $self->{table}->_weight_cols() or return;
 | 
			
		||||
    my @weight_list = keys %weights;
 | 
			
		||||
    my ($pk)    = $self->{table}->pk();
 | 
			
		||||
 | 
			
		||||
# Go through the table and index each field.
 | 
			
		||||
    my $iterations = 1;
 | 
			
		||||
    my $count = 0;
 | 
			
		||||
 | 
			
		||||
    while (1) {
 | 
			
		||||
        if ($max) {
 | 
			
		||||
            my $offset = ($iterations-1) * $max;
 | 
			
		||||
            $table->select_options("LIMIT $offset,$max");
 | 
			
		||||
        }
 | 
			
		||||
        my $cond     = $opts->{cond} || {};
 | 
			
		||||
        my $sth      = $table->select($cond, [$pk, @weight_list]);
 | 
			
		||||
        my $done     = 1;
 | 
			
		||||
 | 
			
		||||
        while (my $rec = $sth->fetchrow_hashref() ) {
 | 
			
		||||
            $self->post_add_record($rec, undef, 1);
 | 
			
		||||
            $done = 0;
 | 
			
		||||
            if ($tick) {
 | 
			
		||||
                $count++;
 | 
			
		||||
                $count % $tick      or (print "$count ");
 | 
			
		||||
                $count % ($tick*10) or (print "\n");
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        last if $done;
 | 
			
		||||
        $iterations++;
 | 
			
		||||
        last if !$max;
 | 
			
		||||
    }
 | 
			
		||||
    $indexer = $self->_get_indexer(0) or return;
 | 
			
		||||
    $indexer->optimize;
 | 
			
		||||
    $indexer->close;
 | 
			
		||||
    undef $indexer;
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub pre_delete_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# Delete a records index values.
 | 
			
		||||
#
 | 
			
		||||
    my ($self, $where) = @_; 
 | 
			
		||||
 | 
			
		||||
    my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my ($pk) = $tbl->pk();
 | 
			
		||||
    my $q = $tbl->select($where, [$pk]);
 | 
			
		||||
 | 
			
		||||
    my $reader = eval { Lucene::Index::IndexReader->open($self->_get_store(0)); };
 | 
			
		||||
    if ($@) {
 | 
			
		||||
        return $self->{_debug} ? $self->error('INDEX_CORRUPT', 'WARN', "$@") : 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    my @errors;
 | 
			
		||||
    while (my ($item_id) = $q->fetchrow) {
 | 
			
		||||
        my $t =  new Lucene::Index::Term($pk => $item_id);
 | 
			
		||||
        eval { $reader->deleteDocuments($t); };
 | 
			
		||||
        if ($@) {
 | 
			
		||||
            push @errors, "$@";
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    $reader->close;
 | 
			
		||||
    undef $reader;
 | 
			
		||||
    if (@errors) {
 | 
			
		||||
        return $self->{_debug} ? $self->error('DELETE_FAILED', 'WARN', join(", ", @errors)) : 1;
 | 
			
		||||
    }
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_update_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
    my ( $self, $set_cond, $where_cond, $tmp ) = @_;
 | 
			
		||||
 | 
			
		||||
# delete the previous record
 | 
			
		||||
    eval {
 | 
			
		||||
        $self->pre_delete_record($where_cond) or return $self->{_debug} ? () : 1;
 | 
			
		||||
    };
 | 
			
		||||
#
 | 
			
		||||
# the new record
 | 
			
		||||
    my $tbl = $self->{table} or $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my ($pk) = $tbl->pk();
 | 
			
		||||
    my %weights = $self->{table}->_weight_cols();
 | 
			
		||||
    my @weight_list = keys %weights;
 | 
			
		||||
    my $q = $tbl->select($where_cond, [$pk, @weight_list]);
 | 
			
		||||
    while (my $href = $q->fetchrow_hashref) {
 | 
			
		||||
        $self->post_add_record($href);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub reindex_record {
 | 
			
		||||
# -------------------------------------------------------
 | 
			
		||||
# reindexes a record. basically deletes all associated records from current db abnd does an index.
 | 
			
		||||
# it's safe to use this
 | 
			
		||||
    my ($self, $rec) = @_;
 | 
			
		||||
 | 
			
		||||
    $self->delete_record($rec);
 | 
			
		||||
    $self->index_record($rec);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,206 @@
 | 
			
		||||
NAME
 | 
			
		||||
    Lucene -- API to the C++ port of the Lucene search engine
 | 
			
		||||
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
  Initialize/Empty Lucene index
 | 
			
		||||
      my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer();
 | 
			
		||||
      my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 1);
 | 
			
		||||
 | 
			
		||||
      my $tmp_writer = new Lucene::Index::IndexWriter($store, $analyzer, 1);
 | 
			
		||||
      $tmp_writer->close;
 | 
			
		||||
      undef $tmp_writer;
 | 
			
		||||
 | 
			
		||||
  Choose your Analyzer (string tokenizer)
 | 
			
		||||
      # lowercases text and splits it at non-letter characters 
 | 
			
		||||
      my $analyzer = Lucene::Analysis::SimpleAnalyzer();
 | 
			
		||||
      # same as before and removes stop words
 | 
			
		||||
      my $analyzer = Lucene::Analysis::StopAnalyzer();
 | 
			
		||||
      # splits text at whitespace characters
 | 
			
		||||
      my $analyzer = Lucene::Analysis::WhitespaceAnalyzer();
 | 
			
		||||
      # lowercases text, tokenized it based on a grammer that 
 | 
			
		||||
      # leaves named authorities intact (e-mails, company names,
 | 
			
		||||
      # web hostnames, IP addresses, etc) and removed stop words
 | 
			
		||||
      my $analyzer = Lucene::Analysis::Standard::StandardAnalyzer();
 | 
			
		||||
 | 
			
		||||
  Choose your Store (storage engine)
 | 
			
		||||
      # in-memory storage
 | 
			
		||||
      my $store = new Lucene::Store::RAMDirectory();
 | 
			
		||||
      # disk-based storage
 | 
			
		||||
      my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 0);
 | 
			
		||||
 | 
			
		||||
  Open and configure an IndexWriter
 | 
			
		||||
      my $writer = new Lucene::Index::IndexWriter($store, $analyzer, 0);
 | 
			
		||||
      # optional settings for power users
 | 
			
		||||
      $writer->setMergeFactor(100);
 | 
			
		||||
      $writer->setUseCompoundFile(0);
 | 
			
		||||
      $writer->setMaxFieldLength(255);
 | 
			
		||||
      $writer->setMinMergeDocs(10);
 | 
			
		||||
      $writer->setMaxMergeDocs(100);
 | 
			
		||||
 | 
			
		||||
  Create Documents and add Fields
 | 
			
		||||
      my $doc = new Lucene::Document;
 | 
			
		||||
      # field gets analyzed, indexed and stored
 | 
			
		||||
      $doc->add(Lucene::Document::Field->Text("content", $content));
 | 
			
		||||
      # field gets indexed and stored
 | 
			
		||||
      $doc->add(Lucene::Document::Field->Keyword("isbn", $isbn));
 | 
			
		||||
      # field gets just stored
 | 
			
		||||
      $doc->add(Lucene::Document::Field->UnIndexed("sales_rank", $sales_rank));
 | 
			
		||||
      # field gets analyzed and indexed 
 | 
			
		||||
      $doc->add(Lucene::Document::Field->UnStored("categories", $categories));
 | 
			
		||||
 | 
			
		||||
  Add Documents to an IndexWriter
 | 
			
		||||
      $writer->addDocument($doc);
 | 
			
		||||
 | 
			
		||||
  Optimize your index and close the IndexWriter
 | 
			
		||||
      $writer->optimize();
 | 
			
		||||
      $writer->close();
 | 
			
		||||
      undef $writer;
 | 
			
		||||
 | 
			
		||||
  Delete Documents
 | 
			
		||||
      my $reader = Lucene::Index::IndexReader->open($store);
 | 
			
		||||
      my $term = new Lucene::Index::Term("isbn", $isbn);
 | 
			
		||||
      $reader->deleteDocuments($term);
 | 
			
		||||
      $reader->close();
 | 
			
		||||
      undef $reader;
 | 
			
		||||
 | 
			
		||||
  Query index
 | 
			
		||||
      # initalize searcher and parser
 | 
			
		||||
      my $analyzer = Lucene::Analysis::SimpleAnalyzer();
 | 
			
		||||
      my $store = Lucene::Store::FSDirectory->getDirectory("/home/lucene", 0);
 | 
			
		||||
      my $searcher = new Lucene::Search::IndexSearcher($store);
 | 
			
		||||
      my $parser = new Lucene::QueryParser("default_field", $analyzer);
 | 
			
		||||
 | 
			
		||||
      # build a query on the default field
 | 
			
		||||
      my $query = $parser->parse("perl");
 | 
			
		||||
 | 
			
		||||
      # build a query on another field
 | 
			
		||||
      my $query = $parser->parse("title:cookbook");
 | 
			
		||||
 | 
			
		||||
      # define a sort on one field
 | 
			
		||||
      my $sortfield = new Lucene::Search::SortField("unixtime"); 
 | 
			
		||||
      my $reversed_sortfield = new Lucene::Search::SortField("unixtime", 1);
 | 
			
		||||
      my $sort = new Lucene::Search::Sort($sortfield);
 | 
			
		||||
 | 
			
		||||
      # define a sort on two fields
 | 
			
		||||
      my $sort = new Lucene::Search::Sort($sortfield1, $sortfield2);
 | 
			
		||||
 | 
			
		||||
      # use Lucene's INDEXORDER or RELEVANCE sort
 | 
			
		||||
      my $sort = Lucene::Search::Sort->INDEXORDER;
 | 
			
		||||
      my $sort = Lucene::Search::Sort->RELEVANCE;
 | 
			
		||||
 | 
			
		||||
      # query index and get results
 | 
			
		||||
      my $hits = $searcher->search($query);
 | 
			
		||||
      my $sorted_hits = $searcher->search($query, $sort);
 | 
			
		||||
 | 
			
		||||
      # get number of results
 | 
			
		||||
      my $num_hits = $hits->length();
 | 
			
		||||
 | 
			
		||||
      # get fields and ranking score for each hit
 | 
			
		||||
      for (my $i = 0; $i < $num_hits; $i++) {
 | 
			
		||||
        my $doc = $hits->doc($i);
 | 
			
		||||
        my $score = $hits->score($i);
 | 
			
		||||
        my $title = $doc->get("title");
 | 
			
		||||
        my $isbn = $doc->get("isbn");
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      # free memory and close searcher
 | 
			
		||||
      undef $hits;
 | 
			
		||||
      undef $query;
 | 
			
		||||
      undef $parser;
 | 
			
		||||
      undef $analyzer;
 | 
			
		||||
      $searcher->close();
 | 
			
		||||
      undef $fsdir;
 | 
			
		||||
      undef $searcher;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  Close your Store
 | 
			
		||||
      $store->close;
 | 
			
		||||
      undef $store;
 | 
			
		||||
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
    Like it or not Apache Lucene has become the de-facto standard for
 | 
			
		||||
    open-source high-performance search. It has a large user-base, is well
 | 
			
		||||
    documented and has plenty of committers. Unfortunately Apache Lucene is
 | 
			
		||||
    entirely written in Java and therefore of relatively little use for perl
 | 
			
		||||
    programmers. Fortunately in the recent years a group of C++ programmers
 | 
			
		||||
    led by Ben van Klinken decided to port Java Lucene to C++.
 | 
			
		||||
 | 
			
		||||
    The purpose of the module is to export the C++ Lucene API to perl and at
 | 
			
		||||
    the same time be as close as possible to the original Java API. This has
 | 
			
		||||
    the combined advantage of providing perl programmers with a
 | 
			
		||||
    well-documented API and giving them access to a C++ search engine
 | 
			
		||||
    library that is supposedly faster than the original.
 | 
			
		||||
 | 
			
		||||
CHARACTER SUPPORT
 | 
			
		||||
    Currently only ISO 8859-1 (Latin-1) characters are supported. Obviously
 | 
			
		||||
    this included all ASCII characters.
 | 
			
		||||
 | 
			
		||||
INDEX COMPATIBLITY
 | 
			
		||||
    For the moment indices produced by this module are not compatible with
 | 
			
		||||
    those from Apache Lucene. The reason for this is that this module uses
 | 
			
		||||
    1-byte character encoding as opposed to 2-byte (widechar) encoding with
 | 
			
		||||
    Apache Lucene.
 | 
			
		||||
 | 
			
		||||
INSTALLATION
 | 
			
		||||
    This module requires the clucene library to be installed. The best way
 | 
			
		||||
    to get it is to go to the following page
 | 
			
		||||
 | 
			
		||||
        http://sourceforge.net/projects/clucene/
 | 
			
		||||
 | 
			
		||||
    and download the latest STABLE clucene-core version. Currently it is
 | 
			
		||||
    clucene-core-0.9.15. Make sure you compile it in ASCII mode and install
 | 
			
		||||
    it in your standard library path.
 | 
			
		||||
 | 
			
		||||
    On a Linux platform this goes as follows:
 | 
			
		||||
 | 
			
		||||
        wget http://kent.dl.sourceforge.net/sourceforge/clucene/clucene-core-0.9.15.tar.gz
 | 
			
		||||
        cd clucene-core-0.9.15
 | 
			
		||||
        ./autogen.sh
 | 
			
		||||
        ./configure --disable-debug --prefix=/usr --exec-prefix=/usr --enable-ascii
 | 
			
		||||
        make
 | 
			
		||||
        make check
 | 
			
		||||
        (as root) make install
 | 
			
		||||
 | 
			
		||||
    To install the perl module itself, run the following commands:
 | 
			
		||||
 | 
			
		||||
        perl Makefile.PL
 | 
			
		||||
        make
 | 
			
		||||
        make test
 | 
			
		||||
        (as root) make install
 | 
			
		||||
 | 
			
		||||
AUTHOR
 | 
			
		||||
    Thomas Busch <tbusch at cpan dot org>
 | 
			
		||||
 | 
			
		||||
COPYRIGHT AND LICENSE
 | 
			
		||||
    Copyright (c) 2006 Thomas Busch
 | 
			
		||||
 | 
			
		||||
    This library is free software; you can redistribute it and/or modify it
 | 
			
		||||
    under the same terms as Perl itself.
 | 
			
		||||
 | 
			
		||||
SEE ALSO
 | 
			
		||||
    Plucene - a pure-Perl implementation of Lucene
 | 
			
		||||
 | 
			
		||||
    KinoSearch - a search engine library inspired by Lucene
 | 
			
		||||
 | 
			
		||||
DISCLAIMER OF WARRANTY
 | 
			
		||||
    BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
 | 
			
		||||
    FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
 | 
			
		||||
    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
 | 
			
		||||
    PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
 | 
			
		||||
    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 | 
			
		||||
    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
 | 
			
		||||
    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
 | 
			
		||||
    YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
 | 
			
		||||
    NECESSARY SERVICING, REPAIR, OR CORRECTION.
 | 
			
		||||
 | 
			
		||||
    IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 | 
			
		||||
    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
 | 
			
		||||
    REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE
 | 
			
		||||
    TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR
 | 
			
		||||
    CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
 | 
			
		||||
    SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
 | 
			
		||||
    RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
 | 
			
		||||
    FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
 | 
			
		||||
    SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 | 
			
		||||
    DAMAGES.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										115
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/STH.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/LUCENE/STH.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,115 @@
 | 
			
		||||
# ====================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::STH
 | 
			
		||||
#   Author: Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: STH.pm,v 1.1 2006/12/07 07:04:51 aki Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ====================================================================
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::LUCENE::STH;
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
use strict;
 | 
			
		||||
use vars qw/@ISA $ATTRIBS $VERSION $DEBUG $ERRORS $ERROR_MESSAGE /;
 | 
			
		||||
require GT::SQL::Search::Base::STH;
 | 
			
		||||
 | 
			
		||||
@ISA = ('GT::SQL::Search::STH');
 | 
			
		||||
$ATTRIBS = {
 | 
			
		||||
    'db_sort' => 1,
 | 
			
		||||
    'hits'    => undef
 | 
			
		||||
};
 | 
			
		||||
$ERROR_MESSAGE = 'GT::SQL::Search::STH';
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sub init {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# GT::SQL::Search::STH expects a full set of results in $self->{results}. For
 | 
			
		||||
# Lucene the only time a full set of results is there is when we are sorting
 | 
			
		||||
# on a field that is not weighted, otherwise the results in $self->{results} is
 | 
			
		||||
# the proper page and number of results.
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
    $self->set(@_);
 | 
			
		||||
 | 
			
		||||
    --$self->{nh} if $self->{nh};
 | 
			
		||||
 | 
			
		||||
    # Here we allow hits to override our concept of rows. This is only useful
 | 
			
		||||
    # when !$self->{db_sort}
 | 
			
		||||
    $self->{rows} = $self->{hits}
 | 
			
		||||
        ? $self->{hits}
 | 
			
		||||
        : $self->{results}
 | 
			
		||||
            ? scalar(keys %{$self->{results}})
 | 
			
		||||
            : 0;
 | 
			
		||||
 | 
			
		||||
    if ($self->{db_sort}) {
 | 
			
		||||
        $self->get_db_sorted_results;
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $self->get_sorted_results;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub get_sorted_results {
 | 
			
		||||
# Just sorts the results out of $self->{results} which should have been setup
 | 
			
		||||
# by a search driver
 | 
			
		||||
    my ($self) = @_;
 | 
			
		||||
    my $results = $self->{results};
 | 
			
		||||
    $self->{index} = 0;
 | 
			
		||||
    $self->{max_index} = $self->{mh} - 1;
 | 
			
		||||
    $self->{'order'}  = [ sort { 
 | 
			
		||||
                                    ( $results->{$b} || return 0 ) <=> ( $results->{$a} || 0 )
 | 
			
		||||
                              } keys %{$results} ];
 | 
			
		||||
    $self->debug_dumper( "Results will be presented in the following order: ", $self->{'order'} ) if ($self->{_debug});
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub get_db_sorted_results {
 | 
			
		||||
# This assumes $self->{results} has a full result set, i.e. without any LIMIT
 | 
			
		||||
# It then selects the result set using the SQL driver to do the sorting. This
 | 
			
		||||
# is for Search modules which can not handle their own sorting
 | 
			
		||||
    my ($self) = @_;
 | 
			
		||||
 | 
			
		||||
    my $sb = GT::SQL::Search::Base::Search->clean_sb($self->{sb}, $self->{so});
 | 
			
		||||
    $self->{index}  = $self->{nh} * $self->{mh} || 0;
 | 
			
		||||
    $self->{max_index} = $self->{index} + $self->{mh} - 1; # index + mh is the first record which should not be returned.
 | 
			
		||||
    if ($self->{max_index} > $self->{rows}) {
 | 
			
		||||
        $self->{max_index}  = $self->{rows};
 | 
			
		||||
        $self->{rows}       = $self->{rows} - $self->{index};
 | 
			
		||||
        $self->{rows} < 0 ? $self->{rows} = 0 : 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    else {
 | 
			
		||||
        $self->{rows}       = $self->{mh};
 | 
			
		||||
    }
 | 
			
		||||
    my ( $table, $pk ) = $self->_table_info();
 | 
			
		||||
    my ( $query, $where, $st, $limit );
 | 
			
		||||
 | 
			
		||||
    $where      = qq!$pk in(! . join( ",", keys %{$self->{results}}) . ')';
 | 
			
		||||
    $self->{mh} and $limit = qq!LIMIT $self->{index}, $self->{rows}!;
 | 
			
		||||
    $query      = qq!
 | 
			
		||||
        SELECT $pk
 | 
			
		||||
        FROM   $table
 | 
			
		||||
        WHERE  $where
 | 
			
		||||
        $sb
 | 
			
		||||
        $limit
 | 
			
		||||
    !;
 | 
			
		||||
    $self->debug( "Row fetch query: $query" ) if ($self->{_debug});
 | 
			
		||||
    my $sth        = $self->{table}->{driver}->prepare( $query );
 | 
			
		||||
    $sth->execute();
 | 
			
		||||
 | 
			
		||||
    $self->{index}    = 0;
 | 
			
		||||
    $self->{max_hits} = $self->{rows};
 | 
			
		||||
 | 
			
		||||
    # Fetch the results in sorted order
 | 
			
		||||
    my $order = $sth->fetchall_arrayref();
 | 
			
		||||
    $sth->finish();
 | 
			
		||||
 | 
			
		||||
    $self->{'order'}  = [ map { $_->[0] } @{$order} ];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,260 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::LUCENE::Search
 | 
			
		||||
#   Author  : Scott Beck
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.2 2006/12/07 22:42:16 aki Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::LUCENE::Search;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
use strict;
 | 
			
		||||
use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS $ERRORS $ERROR_MESSAGE /;
 | 
			
		||||
use Lucene;
 | 
			
		||||
use GT::TempFile;
 | 
			
		||||
use GT::SQL::Search::LUCENE::STH;
 | 
			
		||||
use GT::SQL::Search::Base::Search;
 | 
			
		||||
@ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
$DEBUG   = 0;
 | 
			
		||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
$ERRORS = {
 | 
			
		||||
    SEARCH_ERROR => "Error searching: %s",
 | 
			
		||||
    QUERY_ERROR  => "Query error: %s"
 | 
			
		||||
};
 | 
			
		||||
$ERROR_MESSAGE = 'GT::SQL';
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    shift;
 | 
			
		||||
    return GT::SQL::Search::LUCENE::Search->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_path {
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    my $name    = $self->{table}->name;
 | 
			
		||||
    my $tmpdir  = GT::TempFile::find_tmpdir();
 | 
			
		||||
    my $path = $tmpdir . '/' . $name;
 | 
			
		||||
    $path = $1 if $path =~ /(.*)/; # XXX untaint
 | 
			
		||||
    return $path;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_store {
 | 
			
		||||
    my ($self, $create) = @_;
 | 
			
		||||
    my $path = $self->_get_path;
 | 
			
		||||
    return Lucene::Store::FSDirectory->getDirectory($path, $create);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub query {
 | 
			
		||||
# --------------------------------------------------
 | 
			
		||||
# Returns a sth based on a query
 | 
			
		||||
#
 | 
			
		||||
# Options:
 | 
			
		||||
#        - paging
 | 
			
		||||
#           mh            : max hits
 | 
			
		||||
#           nh            : number hit (or page of hits)
 | 
			
		||||
#
 | 
			
		||||
#        - searching
 | 
			
		||||
#           ww            : whole word
 | 
			
		||||
#           ma            : 1 => OR match, 0 => AND match, undefined => QUERY
 | 
			
		||||
#           substring     : search for substrings of words
 | 
			
		||||
#           bool          : 'and' => and search, 'or' => or search, '' => regular query
 | 
			
		||||
#           query         : the string of things to ask for
 | 
			
		||||
#
 | 
			
		||||
#        - filtering
 | 
			
		||||
#           field_name    : value       # Find all rows with field_name = value
 | 
			
		||||
#           field_name    : ">value"    # Find all rows with field_name > value.
 | 
			
		||||
#           field_name    : "<value"    # Find all rows with field_name < value.
 | 
			
		||||
#           field_name-gt : value       # Find all rows with field_name > value.
 | 
			
		||||
#           field_name-lt : value       # Find all rows with field_name < value.
 | 
			
		||||
#
 | 
			
		||||
# Parameters:
 | 
			
		||||
#        ( $CGI ) : a single cgi object
 | 
			
		||||
#        ( $HASH ) : a hash of the parameters
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
# create an easily accessible argument hash
 | 
			
		||||
    my $args = $self->common_param(@_);
 | 
			
		||||
    my $tbl = $self->{table};
 | 
			
		||||
 | 
			
		||||
# see if we can setup the filtering constraints
 | 
			
		||||
    my $filter = { %$args }; 
 | 
			
		||||
    my $query  = delete $args->{query} || $self->{query} || '';
 | 
			
		||||
    my $ftr_cond;
 | 
			
		||||
 | 
			
		||||
# parse query
 | 
			
		||||
    $self->debug( "Search Query: $query" ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
    my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
 | 
			
		||||
 | 
			
		||||
    $self->{rejected_keywords} = $rejected;
 | 
			
		||||
 | 
			
		||||
# setup the additional input parameters
 | 
			
		||||
    $query_struct = $self->_preset_options( $query_struct, $args );
 | 
			
		||||
 | 
			
		||||
# now sort into distinct buckets
 | 
			
		||||
    my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
 | 
			
		||||
    $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# with the buckets, it's now possible to create a query string
 | 
			
		||||
# that can be passed directly into the Lucene search.
 | 
			
		||||
    my $query_string = '';
 | 
			
		||||
 | 
			
		||||
    foreach my $search_type ( keys %$buckets ) {
 | 
			
		||||
        my $bucket = $buckets->{$search_type};
 | 
			
		||||
        foreach my $token ( keys %$bucket ) {
 | 
			
		||||
            next unless $token;
 | 
			
		||||
            my $properties = $bucket->{$token} or next;
 | 
			
		||||
            $token =~ s/(["()])/\\$1/g;
 | 
			
		||||
            $token =~ s/\b(or|and)\b/ /g;
 | 
			
		||||
 | 
			
		||||
            my $e = ' ';
 | 
			
		||||
 | 
			
		||||
# handle boolean operations
 | 
			
		||||
            $properties->{mode} ||= '';
 | 
			
		||||
            if ( $properties->{mode} eq 'must' ) {
 | 
			
		||||
                $e .= '+';
 | 
			
		||||
            }
 | 
			
		||||
            elsif ( $properties->{mode} eq 'cannot' ) {
 | 
			
		||||
                $e .= '-';
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
# deal with phrase vs keyword
 | 
			
		||||
            if ( $properties->{phrase} ) {
 | 
			
		||||
                $e .= '"' . $token . '"' unless $token =~ /^"|"$/;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $e .= $token;
 | 
			
		||||
 | 
			
		||||
# substring match
 | 
			
		||||
                if ($properties->{mode} ne 'substring') {
 | 
			
		||||
                    $e .= '*' if $properties->{substring};
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $query_string .= $e;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# calculate the cursor constraints
 | 
			
		||||
    foreach my $k (qw( nh mh so sb )) {
 | 
			
		||||
        next if defined $args->{$k};
 | 
			
		||||
        $args->{$k} = $self->{$k} || '';
 | 
			
		||||
    }
 | 
			
		||||
    $args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
 | 
			
		||||
    $args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
 | 
			
		||||
    $args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ )  ? $1 : 'score';
 | 
			
		||||
 | 
			
		||||
    # Score is the default
 | 
			
		||||
    $args->{so} = (defined $args->{so} and $args->{so} =~ /^(asc|desc)(?:end)?$/i) ? lc($1) : 'asc';
 | 
			
		||||
 | 
			
		||||
    my %weights = $tbl->_weight_cols();
 | 
			
		||||
    my @sortfields;
 | 
			
		||||
    my $do_mysql_sort = 0;
 | 
			
		||||
    for (ref($args->{sb}) eq 'ARRAY' ? @{$args->{sb}} : $args->{sb}) {
 | 
			
		||||
        if (!exists $weights{$_}) {
 | 
			
		||||
            $do_mysql_sort = 1 if $_ ne 'score';
 | 
			
		||||
            next;
 | 
			
		||||
        }
 | 
			
		||||
        push @sortfields, new Lucene::Search::SortField($_, $args->{so} ne 'asc');
 | 
			
		||||
    }
 | 
			
		||||
    my $sort = @sortfields ? new Lucene::Search::Sort(@sortfields) : Lucene::Search::Sort->RELEVANCE;
 | 
			
		||||
    my $store = $self->_get_store(0);
 | 
			
		||||
    my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer;
 | 
			
		||||
    my $searcher = eval { new Lucene::Search::IndexSearcher($store); };
 | 
			
		||||
    if ($@) {
 | 
			
		||||
        $self->{_debug} and $self->error('SEARCH_ERROR', 'WARN', "$@");
 | 
			
		||||
        return $self->sth({}, 0); # no hits
 | 
			
		||||
    }
 | 
			
		||||
    # Random default field, it's not used
 | 
			
		||||
    my $parser = new Lucene::MultiFieldQueryParser((keys %weights)[0], $analyzer);
 | 
			
		||||
    my $pquery = eval { $parser->parse($query_string, [keys %weights], $analyzer); };
 | 
			
		||||
    if ($@) {
 | 
			
		||||
        $self->{_debug} and $self->error('QUERY_ERROR', 'WARN', "$@");
 | 
			
		||||
        return $self->sth({}, 0); # no hits
 | 
			
		||||
    }
 | 
			
		||||
    my $hits = $searcher->search($pquery, $sort);
 | 
			
		||||
    my $num_hits = $hits->length;
 | 
			
		||||
 | 
			
		||||
## Setup a limit only if there is no callback. The callback argument requires a full results list
 | 
			
		||||
    my ($offset, $max_hits) = (0, $num_hits);
 | 
			
		||||
    unless ($self->{callback} or $do_mysql_sort) {
 | 
			
		||||
        $offset = ( $args->{nh} - 1 ) * $args->{mh};
 | 
			
		||||
        $max_hits = $offset + $args->{mh};
 | 
			
		||||
    }
 | 
			
		||||
    $max_hits = $num_hits if $max_hits > $num_hits;
 | 
			
		||||
    my ($pk) = $self->{table}->pk;
 | 
			
		||||
    my @indexes;
 | 
			
		||||
    my $results = {};
 | 
			
		||||
    for (my $i= $offset; $i < $max_hits; ++$i) {
 | 
			
		||||
        my $doc = $hits->doc($i);
 | 
			
		||||
        my $value = $doc->get($pk);
 | 
			
		||||
        my $score = $hits->score($i);
 | 
			
		||||
        $results->{$value} = $score;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now handle filters
 | 
			
		||||
    my $cols    = $self->{'table'}->cols();
 | 
			
		||||
    my %filters = map {
 | 
			
		||||
        (my $tmp = $_) =~ s/-[lg]t$//;
 | 
			
		||||
        $cols->{$tmp} ? ($_ => $args->{$_}) : ()
 | 
			
		||||
    } keys %{$args};
 | 
			
		||||
 | 
			
		||||
    if (keys %filters) {
 | 
			
		||||
        $self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
 | 
			
		||||
        $results = $self->filter(\%filters, $results);
 | 
			
		||||
    }
 | 
			
		||||
    elsif ($self->{filter}) {
 | 
			
		||||
        $self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
 | 
			
		||||
        $results = $self->_filter_query( $self->{filter}, $results );
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $self->debug( "No filters being used.") if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now this query should probably clear the filters once it's been used, so i'll dothat here
 | 
			
		||||
    $self->{filter} = undef;
 | 
			
		||||
 | 
			
		||||
# now run through a callback function if needed.
 | 
			
		||||
    if ($self->{callback}) {
 | 
			
		||||
        unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
 | 
			
		||||
            $self->{_debug} and $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
 | 
			
		||||
            return $self->sth({}, 0); # no hits
 | 
			
		||||
        }
 | 
			
		||||
        $self->debug_dumper ("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
 | 
			
		||||
        $results = $self->{callback}->($self, $results);
 | 
			
		||||
        $self->debug_dumper ("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $self->{rows} = $num_hits;
 | 
			
		||||
 | 
			
		||||
    return $self->sth($results, $do_mysql_sort);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub sth {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my ($self, $results, $db_sort) = @_;
 | 
			
		||||
 | 
			
		||||
    my $sth = GT::SQL::Search::LUCENE::STH->new(
 | 
			
		||||
        'results' => $results,
 | 
			
		||||
        'hits'    => $self->{rows},
 | 
			
		||||
        'db'      => $self->{table}->{driver},
 | 
			
		||||
        'db_sort' => $db_sort,
 | 
			
		||||
# pass the following attributes down to the STH handler
 | 
			
		||||
        map({ ($_ => $self->{$_}) } qw/ table sb so score_col score_sort nh mh rows _debug /)
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    return $sth;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,98 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MSSQL::Indexer
 | 
			
		||||
#   Author: Alex Krohn
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.6 2004/08/28 03:53:48 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Supports MS SQL full text indexer on MS SQL 2000 only.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MSSQL::Indexer;
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
 | 
			
		||||
    use GT::SQL::Search::Base::Indexer;
 | 
			
		||||
    @ISA = qw/ GT::SQL::Search::Base::Indexer /;
 | 
			
		||||
    $DEBUG      = 0;
 | 
			
		||||
    $VERSION    = sprintf "%d.%03d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
 | 
			
		||||
    $ERRORS = {
 | 
			
		||||
        NOTFROMWEB      => 'There are far too many records in table %s for create/destroy of this indexing scheme from the web. Please use alternative method.',
 | 
			
		||||
        MSSQLNONSUPPORT => 'You must be using MS SQL 2000 in order to use full text indexing. Current Database: %s',
 | 
			
		||||
        CREATEINDEX     => 'Problem Creating Full Text Index: %s'
 | 
			
		||||
    };
 | 
			
		||||
    $ERROR_MESSAGE = 'GT::SQL';
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    my $class = shift;
 | 
			
		||||
    return $class->new(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub ok {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my ($class, $tbl) = @_;
 | 
			
		||||
    unless (uc $tbl->{connect}->{driver} eq 'ODBC') {
 | 
			
		||||
        return $class->error ('MSSQLNONSUPPORT', 'WARN', $tbl->{connect}->{driver});
 | 
			
		||||
    }
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub drop_search_driver {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $table   = $self->{table};
 | 
			
		||||
    my $name    = $table->name;
 | 
			
		||||
    my $cat     = $name . '_ctlg';
 | 
			
		||||
 | 
			
		||||
    my $res = eval {
 | 
			
		||||
        $table->do_query(" sp_fulltext_table '$name', 'drop' ");
 | 
			
		||||
        $table->do_query(" sp_fulltext_catalog '$cat', 'drop' ");
 | 
			
		||||
        1;
 | 
			
		||||
    };
 | 
			
		||||
    $res ? return 1 : return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub add_search_driver {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $table   = $self->{table};
 | 
			
		||||
    my $name    = $table->name;
 | 
			
		||||
    my $cat     = $name . '_ctlg';
 | 
			
		||||
    my %weights = $table->weight;
 | 
			
		||||
    my ($pk)    = $table->pk;
 | 
			
		||||
 | 
			
		||||
# Enable a database for full text indexing
 | 
			
		||||
    $table->do_query(" sp_fulltext_database 'enable' ") or $self->error('CREATEINDEX', 'FATAL', $GT::SQL::error);
 | 
			
		||||
# Create a full text catalog to store the data.
 | 
			
		||||
    $table->do_query(" sp_fulltext_catalog '$cat', 'create' ") or $self->error('CREATEINDEX', 'WARN', $GT::SQL::error);
 | 
			
		||||
# Make a unique index on primary key (not sure why it isn't by default.
 | 
			
		||||
    $table->do_query(" create unique index PK_$name on $name ($pk) ");
 | 
			
		||||
# Mark this table as using the full text catalog created
 | 
			
		||||
    $table->do_query(" sp_fulltext_table '$name', 'create', '$cat', 'PK_$name' ") or $self->error('CREATEINDEX', 'WARN', $GT::SQL::error);
 | 
			
		||||
# Specify which columns are to be indexed
 | 
			
		||||
    foreach my $col (keys %weights) {
 | 
			
		||||
        if ($weights{$col}) {
 | 
			
		||||
            $table->do_query(" sp_fulltext_column '$name', '$col', 'add' ") or $self->error('CREATEINDEX', 'WARN', $GT::SQL::error);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
# Must have a timestamp field.
 | 
			
		||||
    $table->do_query(" alter table $name add timestamp ");
 | 
			
		||||
# Build the index.
 | 
			
		||||
    $table->do_query(" sp_fulltext_table '$name', 'start_change_tracking' ")        or $self->error('CREATEINDEX', 'WARN', $GT::SQL::error);
 | 
			
		||||
    $table->do_query(" sp_fulltext_table '$name', 'start_background_updateindex' ") or $self->error('CREATEINDEX', 'WARN', $GT::SQL::error);
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_create_table {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    shift->add_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,179 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MSSQL::Search
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.9 2004/08/28 03:53:48 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MSSQL::Search;
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG      = 0;
 | 
			
		||||
    $VERSION    = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $ATTRIBS    = {
 | 
			
		||||
        min_word_size => 2,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    shift;
 | 
			
		||||
    return GT::SQL::Search::MSSQL::Search->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub query {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# overruns the usual query system with the mssql version
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
 | 
			
		||||
# Find out what sort of a parameter we're dealing with
 | 
			
		||||
    my $input   = $self->common_param(@_);
 | 
			
		||||
 | 
			
		||||
# Add additional parameters if required
 | 
			
		||||
    foreach my $parameter ( keys %{$ATTRIBS} ) {
 | 
			
		||||
        if ( not exists $input->{$parameter} ) {
 | 
			
		||||
            $input->{$parameter} = $self->{$parameter};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# Parse query...,
 | 
			
		||||
    my ( $query, $rejected )     = $self->_parse_query_string( $input->{'query'} );
 | 
			
		||||
    $self->{'rejected_keywords'} = $rejected;
 | 
			
		||||
 | 
			
		||||
# Setup the additional input parameters
 | 
			
		||||
    $query = $self->_preset_options( $query, $input );
 | 
			
		||||
 | 
			
		||||
# Now sort into distinct buckets
 | 
			
		||||
    my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query );
 | 
			
		||||
    my $tbl     = $self->{table} or $self->error( 'BADARGS', 'FATAL', 'Must have table object defined' );
 | 
			
		||||
    my $string  = $self->_string ($buckets);
 | 
			
		||||
 | 
			
		||||
    return $self->sth({}) unless ($string =~ /\w/);
 | 
			
		||||
 | 
			
		||||
    my $table_name = $tbl->name();
 | 
			
		||||
    my ($pk)       = $tbl->pk;
 | 
			
		||||
 | 
			
		||||
# create the filter
 | 
			
		||||
    my $filter_sql = $self->{filter} ? "WHERE ( " . $self->{filter}->sql . ' )' : '';
 | 
			
		||||
 | 
			
		||||
# If we have a callback, we need all results.
 | 
			
		||||
    if ($self->{callback}) {
 | 
			
		||||
        $query = qq!
 | 
			
		||||
            SELECT $pk, K.RANK
 | 
			
		||||
            FROM $table_name AS T INNER JOIN
 | 
			
		||||
                CONTAINSTABLE ( $table_name, *,
 | 
			
		||||
                    '$string'
 | 
			
		||||
                ) AS K
 | 
			
		||||
                ON T.$pk = K.[KEY]
 | 
			
		||||
            $filter_sql
 | 
			
		||||
        !;
 | 
			
		||||
        my %results   = $tbl->do_query($query)->fetchall_list;
 | 
			
		||||
        my $results   = $self->{callback}->($self, \%results);
 | 
			
		||||
        $self->{rows} = $results ? scalar keys %$results : 0;
 | 
			
		||||
        return $self->sth($results);
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        my $mh = (defined $input->{nh} and $input->{nh} =~ /^(\d+)$/) ? $1 : 1;
 | 
			
		||||
        my $nh = (defined $input->{mh} and $input->{mh} =~ /^(\d+)$/) ? $1 : 25;
 | 
			
		||||
# First get the total.
 | 
			
		||||
        $query = qq!
 | 
			
		||||
            SELECT COUNT(*)
 | 
			
		||||
            FROM $table_name AS T INNER JOIN
 | 
			
		||||
                CONTAINSTABLE ( $table_name, *,
 | 
			
		||||
                    '$string'
 | 
			
		||||
                ) AS K
 | 
			
		||||
                ON T.$pk = K.[KEY]
 | 
			
		||||
            $filter_sql
 | 
			
		||||
        !;
 | 
			
		||||
        my ($count) = $tbl->do_query($query)->fetchrow;
 | 
			
		||||
 | 
			
		||||
# Now get results.
 | 
			
		||||
        $query = qq!
 | 
			
		||||
            SELECT $pk, K.RANK
 | 
			
		||||
            FROM $table_name AS T INNER JOIN
 | 
			
		||||
                CONTAINSTABLE ( $table_name, *,
 | 
			
		||||
                    '$string'
 | 
			
		||||
                ) AS K
 | 
			
		||||
                ON T.$pk = K.[KEY]
 | 
			
		||||
            $filter_sql
 | 
			
		||||
            ORDER BY K.RANK DESC
 | 
			
		||||
        !;
 | 
			
		||||
        my %results   = $tbl->do_query($query)->fetchall_list;
 | 
			
		||||
        $self->{rows} = $count;
 | 
			
		||||
        return $self->sth(\%results);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _string {
 | 
			
		||||
# -------------------------------------------------------------------
 | 
			
		||||
# Returns the string to use for containstable.
 | 
			
		||||
#
 | 
			
		||||
    my ($self, $buckets) = @_;
 | 
			
		||||
 | 
			
		||||
# union
 | 
			
		||||
    my $tmp_bucket = $buckets->{keywords};
 | 
			
		||||
    my $union_request_str = join(
 | 
			
		||||
        " or ",
 | 
			
		||||
        map(
 | 
			
		||||
            qq!"$_"!,
 | 
			
		||||
            keys %{$buckets->{phrases}}
 | 
			
		||||
        ),
 | 
			
		||||
        map(
 | 
			
		||||
            ($tmp_bucket->{$_}->{substring} ? "$_*" : $_),
 | 
			
		||||
            keys %$tmp_bucket
 | 
			
		||||
        )
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
# intersect
 | 
			
		||||
    $tmp_bucket = $buckets->{keywords_must};
 | 
			
		||||
    my $intersect_request_str = join(
 | 
			
		||||
        " and ",
 | 
			
		||||
        map(
 | 
			
		||||
            qq!"$_"!,
 | 
			
		||||
            keys %{$buckets->{phrases_must}}
 | 
			
		||||
        ),
 | 
			
		||||
        map(
 | 
			
		||||
            ($tmp_bucket->{$_}->{substring} ? "$_*" : $_),
 | 
			
		||||
            keys %$tmp_bucket
 | 
			
		||||
        )
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
# disjoin
 | 
			
		||||
    $tmp_bucket = $buckets->{keywords_cannot};
 | 
			
		||||
    my $disjoin_request_str = join(
 | 
			
		||||
        " and ",
 | 
			
		||||
        map(
 | 
			
		||||
            qq!"$_"!,
 | 
			
		||||
            keys %{$buckets->{phrases_cannot}}
 | 
			
		||||
        ),
 | 
			
		||||
        map(
 | 
			
		||||
            ($tmp_bucket->{$_}->{substring} ? "$_*" : $_),
 | 
			
		||||
            keys %$tmp_bucket
 | 
			
		||||
        )
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
# now build the query
 | 
			
		||||
    my $tmp_request_str = join(
 | 
			
		||||
        " and ",
 | 
			
		||||
        ($union_request_str     ?     "( $union_request_str )"     : ()),
 | 
			
		||||
        ($intersect_request_str ?     "( $intersect_request_str )" : ()),
 | 
			
		||||
        ($disjoin_request_str   ? "NOT ( $disjoin_request_str )"   : ())
 | 
			
		||||
    );
 | 
			
		||||
    return $tmp_request_str;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,187 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::Indexer
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.17 2004/08/28 03:53:49 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::Indexer;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
use strict;
 | 
			
		||||
use vars qw/@ISA $VERSION $DEBUG $ERRORS $ERROR_MESSAGE/;
 | 
			
		||||
use GT::SQL::Search::Base::Indexer;
 | 
			
		||||
@ISA = qw/GT::SQL::Search::Base::Indexer/;
 | 
			
		||||
$DEBUG   = 0;
 | 
			
		||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.17 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
 | 
			
		||||
$ERRORS = {
 | 
			
		||||
    NOTFROMWEB      => 'There are far too many records in table %s for create/destroy of this indexing scheme from the web. Please use alternative method.',
 | 
			
		||||
    MYSQLNONSUPPORT => 'Driver MYSQL requires MySQL version 3.23.23 or greater. Currently MySQL version: %s'
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@$GT::SQL::ERRORS{ keys %$ERRORS } = values %$ERRORS;
 | 
			
		||||
 | 
			
		||||
$ERROR_MESSAGE = 'GT::SQL';
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    my $class = shift;
 | 
			
		||||
    return $class->new(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub ok {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my ($class, $tbl) = @_;
 | 
			
		||||
    unless (uc $tbl->{connect}->{driver} eq 'MYSQL') {
 | 
			
		||||
        return $class->error ('MYSQLNONSUPPORT', 'WARN', $tbl->{connect}->{driver});
 | 
			
		||||
    }
 | 
			
		||||
    my $sth = $tbl->do_query(qq!SELECT VERSION()!);
 | 
			
		||||
    my $version = $sth->fetchrow;
 | 
			
		||||
    my ($maj, $min) = split (/\./, $version);
 | 
			
		||||
    unless ($maj > 3 or ($maj == 3 and $min >= 23)) {
 | 
			
		||||
        return $class->error(MYSQLNONSUPPORT => WARN => $version);
 | 
			
		||||
    }
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub drop_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
    $self->too_much() and return;
 | 
			
		||||
 | 
			
		||||
    my $tbl = $self->{table} or return;
 | 
			
		||||
    $tbl->connect();
 | 
			
		||||
 | 
			
		||||
    my %weights = $tbl->weight() or return;
 | 
			
		||||
    my $tblname = $tbl->name();
 | 
			
		||||
 | 
			
		||||
# Group the fulltext columns by value of the weight
 | 
			
		||||
    my %cols_grouped;
 | 
			
		||||
    foreach ( keys %weights ) {
 | 
			
		||||
        my $val = $weights{$_} or next;
 | 
			
		||||
        push @{$cols_grouped{$val}}, $_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# Drop unified fulltext columns if required
 | 
			
		||||
    if ( keys %cols_grouped > 1 ) {
 | 
			
		||||
        $cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# For each value grouped column set create a full text 
 | 
			
		||||
# column
 | 
			
		||||
    foreach my $v ( keys %cols_grouped ) {
 | 
			
		||||
 | 
			
		||||
        my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
 | 
			
		||||
 | 
			
		||||
        my $res     = eval {
 | 
			
		||||
            $tbl->do_query(qq!
 | 
			
		||||
                ALTER TABLE $tblname
 | 
			
		||||
                DROP INDEX $ft_name
 | 
			
		||||
            !);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
# Break on errors that can't be handled
 | 
			
		||||
        if ( $@ ) {
 | 
			
		||||
            next if $@ !~ /exist/i;
 | 
			
		||||
            $self->warn( "$@" );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub add_search_driver {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
    $self->too_much() and return;
 | 
			
		||||
 | 
			
		||||
    my $tbl     = $self->{table} or return $self->error(BADARGS   => FATAL => "table must be passed into add_search_driver.");
 | 
			
		||||
    my %weights = $tbl->weight() or return $self->error(NOWEIGHTS => 'WARN');
 | 
			
		||||
    my $tblname = $tbl->name()   or return $self->error(BADARGS   => FATAL => "table does not have a name?");
 | 
			
		||||
 | 
			
		||||
# group the fulltext columns by value of the weight
 | 
			
		||||
    my %cols_grouped;
 | 
			
		||||
    foreach ( keys %weights ) {
 | 
			
		||||
        my $val = $weights{$_} or next;
 | 
			
		||||
        push @{$cols_grouped{$val}}, $_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# Create unified fulltext columns if required
 | 
			
		||||
    if ( keys %cols_grouped > 1 ) {
 | 
			
		||||
        $cols_grouped{-1} = [ grep { $weights{$_} } keys %weights ];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# for each value grouped column set create a full text 
 | 
			
		||||
# column
 | 
			
		||||
    foreach my $v ( keys %cols_grouped ) {
 | 
			
		||||
 | 
			
		||||
        my $cols    = join(",", sort @{$cols_grouped{$v}});
 | 
			
		||||
        my $ft_name = 'ft_'.join("_", sort @{$cols_grouped{$v}});
 | 
			
		||||
 | 
			
		||||
        my $res     = eval {
 | 
			
		||||
            $tbl->do_query(qq!
 | 
			
		||||
                ALTER TABLE $tblname
 | 
			
		||||
                ADD FULLTEXT $ft_name ( $cols )
 | 
			
		||||
            !);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
# break on errors that can't be handled
 | 
			
		||||
        if ( $@ ) {
 | 
			
		||||
            next if $@ =~ /duplicate/i;
 | 
			
		||||
            $self->warn( "$@" );
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return 1;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub too_much {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# returns true if there are too many records to be used on the Web
 | 
			
		||||
#
 | 
			
		||||
    if ( $ENV{REQUEST_METHOD} ) {
 | 
			
		||||
        my $self = shift;
 | 
			
		||||
        my $tbl = $self->{table};
 | 
			
		||||
        if ( $tbl->count() > 5000 ) {
 | 
			
		||||
            $self->error( 'NOTFROMWEB', 'WARN', $tbl->name() );
 | 
			
		||||
            return 1
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub post_create_table {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    shift->add_search_driver(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub reindex_all {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# this will drop all the fulltext columns and reindex all of them. This should
 | 
			
		||||
# not be required unless the user changes the weights on one of their columns.
 | 
			
		||||
# Unfortunately, this method is not particularly smart and risks not dropping
 | 
			
		||||
# certain index columns and reindexes even when it's not required. It must be
 | 
			
		||||
# recoded at a future date, but as this action won't happen frequently and will 
 | 
			
		||||
# rarely affect the user, it is not a priority.
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
    $self->drop_search_driver;
 | 
			
		||||
    $self->add_search_driver;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,51 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::Search
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.14 2004/01/13 01:35:19 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::Search;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.14 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        min_word_size => 4
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
# --------------------------------------------------
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
    my $opts = $self->common_param( @_ ); 
 | 
			
		||||
 | 
			
		||||
# determine which mysql search variant to use.
 | 
			
		||||
    my $tbl     = $opts->{table};
 | 
			
		||||
    my $ver_sth = $tbl->do_query( 'SELECT VERSION()' );
 | 
			
		||||
    my $version = $ver_sth->fetchrow_array();
 | 
			
		||||
 | 
			
		||||
    my ( $maj, $min ) = split /\./, $version;
 | 
			
		||||
 | 
			
		||||
    my $pkg = 'GT::SQL::Search::MYSQL::';
 | 
			
		||||
    $pkg   .= $maj > 3 ? 'VER4' : 'VER3';
 | 
			
		||||
 | 
			
		||||
    eval "require $pkg"; 
 | 
			
		||||
    return $pkg->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
							
								
								
									
										178
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER3.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										178
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER3.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,178 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::VER3
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: VER3.pm,v 1.3 2004/08/28 03:53:49 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::VER3;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        min_word_size => 4
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub _phrase_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return $_[0];
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    foreach my $phrase ( values %{$phrases} ) {
 | 
			
		||||
        $self->debug_dumper( "Unioning: ", $phrase ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
        my $tmp = {};
 | 
			
		||||
        foreach my $keyword ( @{ $phrase->{phrase}|| [] } ) {
 | 
			
		||||
            $tmp = $self->_get_words ( [ $keyword ], $tmp, 'intersect' );
 | 
			
		||||
            keys %$tmp or return {};
 | 
			
		||||
        }
 | 
			
		||||
        foreach my $key ( keys %$tmp ) { $results->{$key} += $tmp->{$key} }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_phrase {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# one day change this so it does words properly
 | 
			
		||||
    return _get_words(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _union_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    return _get_words(@_);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _intersect_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my ( $self, $keywords, $results ) = @_;
 | 
			
		||||
    $keywords or return $results;
 | 
			
		||||
 | 
			
		||||
    foreach my $keyword ( keys %{ $keywords || {} } ) {
 | 
			
		||||
        $results = $self->_get_words ( [ $keyword ], $results, 'intersect' );
 | 
			
		||||
        keys %$results or return {};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_intersect_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return $_[0];
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    my $tmp = $self->_phrase_query ( $phrases, $results );
 | 
			
		||||
    keys %$results or return $tmp;
 | 
			
		||||
    foreach my $key ( keys %$results ) {
 | 
			
		||||
        if ( $tmp->{$key} ) {
 | 
			
		||||
            $results->{$key} += $tmp->{$key};
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            delete $results->{$key}
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _disjoin_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $words   = shift or return shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    $results = $self->_get_words([ keys %{$words || {}} ], $results, 'disjoin' );
 | 
			
		||||
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _phrase_disjoin_query {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $phrases = shift or return shift;
 | 
			
		||||
    my $results = shift || {};
 | 
			
		||||
 | 
			
		||||
    my $tmp = $self->_phrase_query ( $phrases, $results );
 | 
			
		||||
    keys %$results or return $tmp;
 | 
			
		||||
    foreach my $key ( keys %$results ) {
 | 
			
		||||
        $tmp->{$key} and delete $results->{$key};
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_words {
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    my $self     = shift;
 | 
			
		||||
    my $words    = shift or return $_[0] || {};
 | 
			
		||||
    my $results  = shift || {};
 | 
			
		||||
    my $mode     = lc shift;
 | 
			
		||||
 | 
			
		||||
    my $tbl      = $self->{table} or $self->error( 'BADARGS', 'FATAL', 'Must have table object defined' );
 | 
			
		||||
    my $tname    = $tbl->name();
 | 
			
		||||
    my $wordlist = join " ", ( ref $words ? ( ref $words eq 'HASH' ? keys %$words : @$words ) : $words ) ;
 | 
			
		||||
    my ($pk)     = $tbl->pk;
 | 
			
		||||
 | 
			
		||||
    my %weights  = $tbl->_weight_cols();
 | 
			
		||||
    my $cols     = join(",", keys %weights);
 | 
			
		||||
    my $qwrds    = quotemeta( $wordlist );
 | 
			
		||||
    my $where    = ( $results and keys %$results )
 | 
			
		||||
        ? ("AND $pk IN(" . join(',', keys %$results) . ")")
 | 
			
		||||
        : '';
 | 
			
		||||
    my $query = qq!
 | 
			
		||||
        SELECT $pk, MATCH($cols) AGAINST ('$qwrds') AS SCORE
 | 
			
		||||
        FROM  $tname
 | 
			
		||||
        WHERE MATCH($cols) AGAINST ('$qwrds')
 | 
			
		||||
        $where
 | 
			
		||||
    !;
 | 
			
		||||
    my $sth = $tbl->do_query( $query ) or return;
 | 
			
		||||
 | 
			
		||||
    if ( $mode eq 'disjoin' ) {
 | 
			
		||||
        while ( my $result = $sth->fetchrow ) {
 | 
			
		||||
            delete $results->{$result};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    elsif ( $mode eq 'intersect' ) {
 | 
			
		||||
        my $tmp = {};
 | 
			
		||||
        while ( my $aref = $sth->fetchrow_arrayref ) {
 | 
			
		||||
            $tmp->{$aref->[0]} = $aref->[1];
 | 
			
		||||
        }
 | 
			
		||||
        if ( $results and keys %$results ) {
 | 
			
		||||
            while (my ($id, $score) = each %$results) {
 | 
			
		||||
                if (not defined $tmp->{$id}) {
 | 
			
		||||
                    delete $results->{$id};
 | 
			
		||||
                    next;
 | 
			
		||||
                }
 | 
			
		||||
                $results->{$id} += $score;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $results = $tmp;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        while ( my $aref = $sth->fetchrow_arrayref ) {
 | 
			
		||||
            $results->{$aref->[0]} += $aref->[1];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return $results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
							
								
								
									
										355
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										355
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/SQL/Search/MYSQL/VER4.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,355 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Search::MYSQL::VER4
 | 
			
		||||
#   Author  : Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: VER4.pm,v 1.9 2004/01/13 01:35:19 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Class used to search indexed tables.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::MYSQL::VER4;
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/ @ISA $ATTRIBS $VERSION $DEBUG $AUTOLOAD $STOPWORDS /;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------------------------
 | 
			
		||||
# Preamble information related to the object
 | 
			
		||||
 | 
			
		||||
    $DEBUG   = 0;
 | 
			
		||||
    $VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
 | 
			
		||||
    $STOPWORDS = { map { $_ => 1 } qw/
 | 
			
		||||
 | 
			
		||||
          a's able about above according accordingly across actually after
 | 
			
		||||
          afterwards again against ain't all allow allows almost alone
 | 
			
		||||
          along already also although always am among amongst an and another
 | 
			
		||||
          any anybody anyhow anyone anything anyway anyways anywhere apart
 | 
			
		||||
          appear appreciate appropriate are aren't around as aside ask asking
 | 
			
		||||
          associated at available away awfully be became because become becomes
 | 
			
		||||
          becoming been before beforehand behind being believe below beside
 | 
			
		||||
          besides best better between beyond both brief but by c'mon c's came
 | 
			
		||||
          can can't cannot cant cause causes certain certainly changes clearly
 | 
			
		||||
          co com come comes concerning consequently consider considering
 | 
			
		||||
          contain containing contains corresponding could couldn't course currently
 | 
			
		||||
          definitely described despite did didn't different do does doesn't
 | 
			
		||||
          doing don't done down downwards during each edu eg eight either else
 | 
			
		||||
          elsewhere enough entirely especially et etc even ever every everybody
 | 
			
		||||
          everyone everything everywhere ex exactly example except far few
 | 
			
		||||
          fifth first five followed following follows for former formerly
 | 
			
		||||
          forth four from further furthermore get gets getting given gives
 | 
			
		||||
          go goes going gone got gotten greetings had hadn't happens hardly
 | 
			
		||||
          has hasn't have haven't having he he's hello help hence her here
 | 
			
		||||
          here's hereafter hereby herein hereupon hers herself hi him himself
 | 
			
		||||
          his hither hopefully how howbeit however i'd i'll i'm i've ie if ignored
 | 
			
		||||
          immediate in inasmuch inc indeed indicate indicated indicates inner
 | 
			
		||||
          insofar instead into inward is isn't it it'd it'll it's its itself
 | 
			
		||||
          just keep keeps kept know knows known last lately later latter latterly
 | 
			
		||||
          least less lest let let's like liked likely little look looking looks
 | 
			
		||||
          ltd mainly many may maybe me mean meanwhile merely might more
 | 
			
		||||
          moreover most mostly much must my myself name namely nd near nearly
 | 
			
		||||
          necessary need needs neither never nevertheless new next nine no
 | 
			
		||||
          nobody non none noone nor normally not nothing novel now nowhere
 | 
			
		||||
          obviously of off often oh ok okay old on once one ones only onto
 | 
			
		||||
          or other others otherwise ought our ours ourselves out outside over
 | 
			
		||||
          overall own particular particularly per perhaps placed please plus
 | 
			
		||||
          possible presumably probably provides que quite qv rather rd re
 | 
			
		||||
          really reasonably regarding regardless regards relatively respectively
 | 
			
		||||
          right said same saw say saying says second secondly see seeing seem
 | 
			
		||||
          seemed seeming seems seen self selves sensible sent serious seriously
 | 
			
		||||
          seven several shall she should shouldn't since six so some somebody
 | 
			
		||||
          somehow someone something sometime sometimes somewhat somewhere
 | 
			
		||||
          soon sorry specified specify specifying still sub such sup sure
 | 
			
		||||
          t's take taken tell tends th than thank thanks thanx that that's
 | 
			
		||||
          thats the their theirs them themselves then thence there there's
 | 
			
		||||
          thereafter thereby therefore therein theres thereupon these they
 | 
			
		||||
          they'd they'll they're they've think third this thorough thoroughly
 | 
			
		||||
          those though three through throughout thru thus to together too
 | 
			
		||||
          took toward towards tried tries truly try trying twice two un
 | 
			
		||||
          under unfortunately unless unlikely until unto up upon us use used
 | 
			
		||||
          useful uses using usually value various very via viz vs want wants
 | 
			
		||||
          was wasn't way we we'd we'll we're we've welcome well went were
 | 
			
		||||
          weren't what what's whatever when whence whenever where where's
 | 
			
		||||
          whereafter whereas whereby wherein whereupon wherever whether
 | 
			
		||||
          which while whither who who's whoever whole whom whose why will
 | 
			
		||||
          willing wish with within without won't wonder would would wouldn't
 | 
			
		||||
          yes yet you you'd you'll you're you've your yours yourself
 | 
			
		||||
          yourselves zero
 | 
			
		||||
                
 | 
			
		||||
    / };
 | 
			
		||||
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        min_word_size => 4,
 | 
			
		||||
        stopwords => $STOPWORDS,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub query {
 | 
			
		||||
# --------------------------------------------------
 | 
			
		||||
# Returns a sth based on a query
 | 
			
		||||
#
 | 
			
		||||
# Options:
 | 
			
		||||
#        - paging
 | 
			
		||||
#           mh            : max hits
 | 
			
		||||
#           nh            : number hit (or page of hits)
 | 
			
		||||
#
 | 
			
		||||
#        - searching
 | 
			
		||||
#           ww            : whole word
 | 
			
		||||
#           ma            : 1 => OR match, 0 => AND match, undefined => QUERY
 | 
			
		||||
#           substring     : search for substrings of words
 | 
			
		||||
#           bool          : 'and' => and search, 'or' => or search, '' => regular query
 | 
			
		||||
#           query         : the string of things to ask for
 | 
			
		||||
#
 | 
			
		||||
#        - filtering
 | 
			
		||||
#           field_name    : value       # Find all rows with field_name = value
 | 
			
		||||
#           field_name    : ">value"    # Find all rows with field_name > value.
 | 
			
		||||
#           field_name    : "<value"    # Find all rows with field_name < value.
 | 
			
		||||
#           field_name-gt : value       # Find all rows with field_name > value.
 | 
			
		||||
#           field_name-lt : value       # Find all rows with field_name < value.
 | 
			
		||||
#
 | 
			
		||||
# Parameters:
 | 
			
		||||
#        ( $CGI ) : a single cgi object
 | 
			
		||||
#        ( $HASH ) : a hash of the parameters
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
# create an easily accessible argument hash
 | 
			
		||||
    my $args = $self->common_param(@_);
 | 
			
		||||
 | 
			
		||||
# see if we can setup the filtering constraints
 | 
			
		||||
    my $filter = { %$args }; 
 | 
			
		||||
    my $query  = delete $args->{query} || $self->{query} || '';
 | 
			
		||||
    my $ftr_cond;
 | 
			
		||||
 | 
			
		||||
# parse query
 | 
			
		||||
    $self->debug( "Search Query: $query" ) if ($self->{_debug});
 | 
			
		||||
    my ( $query_struct, $rejected ) = $self->_parse_query_string( $query );
 | 
			
		||||
 | 
			
		||||
    $self->{rejected_keywords} = $rejected;
 | 
			
		||||
 | 
			
		||||
# setup the additional input parameters
 | 
			
		||||
    $query_struct = $self->_preset_options( $query_struct, $args );
 | 
			
		||||
 | 
			
		||||
# now sort into distinct buckets
 | 
			
		||||
    my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query_struct );
 | 
			
		||||
    $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# with the buckets, it's now possible to create a query string
 | 
			
		||||
# that can be passed directly into the FULLTEXT search.
 | 
			
		||||
    my $query_string = '';
 | 
			
		||||
 | 
			
		||||
    foreach my $search_type ( keys %$buckets ) {
 | 
			
		||||
        my $bucket = $buckets->{$search_type};
 | 
			
		||||
        foreach my $token ( keys %$bucket ) {
 | 
			
		||||
            next unless $token;
 | 
			
		||||
            my $properties = $bucket->{$token} or next;
 | 
			
		||||
 | 
			
		||||
            my $e = ' ';
 | 
			
		||||
 | 
			
		||||
# handle boolean operations
 | 
			
		||||
            $properties->{mode} ||= '';
 | 
			
		||||
            if ( $properties->{mode} eq 'must' ) {
 | 
			
		||||
                $e .= '+';
 | 
			
		||||
            }
 | 
			
		||||
            elsif ( $properties->{mode} eq 'cannot' ) {
 | 
			
		||||
                $e .= '-';
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
# deal with phrase vs keyword
 | 
			
		||||
            if ( $properties->{phrase} ) {
 | 
			
		||||
                $e .= '"' . quotemeta( $token ) . '"';
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $e .= quotemeta $token;
 | 
			
		||||
 | 
			
		||||
# substring match
 | 
			
		||||
                $e .= '*' if $properties->{substring};
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $query_string .= $e;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# start building the GT::SQL::COndition object that will allow us to
 | 
			
		||||
# to retreive the data
 | 
			
		||||
 | 
			
		||||
    require GT::SQL::Condition;
 | 
			
		||||
    my $tbl = $self->{table};
 | 
			
		||||
    my $constraints = GT::SQL::Condition->new;
 | 
			
		||||
 | 
			
		||||
# create the GT::SQL::Condition object that will become the filtering
 | 
			
		||||
# constraints
 | 
			
		||||
    my $filt = $self->{filter};
 | 
			
		||||
 | 
			
		||||
    if ( $filt and ref $filt eq 'HASH' ) {
 | 
			
		||||
        foreach my $fkey ( keys %$filt ) {
 | 
			
		||||
            next if exists $args->{$fkey};
 | 
			
		||||
            $args->{$fkey} = $filt->{$fkey};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( my $filter_cond = $tbl->build_query_cond( $args ) ) {
 | 
			
		||||
        $constraints->add( $filter_cond );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# if the cached filter object is a Condition object, append
 | 
			
		||||
# it to the filter set
 | 
			
		||||
    if ( $filt and  UNIVERSAL::isa( $filt, 'GT::SQL::Condition' ) ) {
 | 
			
		||||
        $constraints->add( $filt );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# create our fulltext query condition
 | 
			
		||||
    my %weights = $tbl->_weight_cols();
 | 
			
		||||
    my $cols = join(",", keys %weights);
 | 
			
		||||
    if ( $query_string ) {
 | 
			
		||||
        $constraints->add( GT::SQL::Condition->new( 
 | 
			
		||||
                                "MATCH( $cols )", 
 | 
			
		||||
                                "AGAINST", 
 | 
			
		||||
                                \"('$query_string' IN BOOLEAN MODE)" ) );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# calculate the cursor constraints
 | 
			
		||||
    foreach my $k (qw( nh mh so sb )) {
 | 
			
		||||
        next if defined $args->{$k};
 | 
			
		||||
        $args->{$k} = $self->{$k} || '';
 | 
			
		||||
    }
 | 
			
		||||
    $args->{nh} = (defined $args->{nh} and $args->{nh} =~ /^(\d+)$/) ? $1 : 1;
 | 
			
		||||
    $args->{mh} = (defined $args->{mh} and $args->{mh} =~ /^(\d+)$/) ? $1 : 25;
 | 
			
		||||
    $args->{sb} = (defined $args->{sb} and $args->{sb} =~ /^([\w ]+)$/ )  ? $1 : 'score';
 | 
			
		||||
 | 
			
		||||
# if the sorting method is "score" the order is forced to "descend" (as there
 | 
			
		||||
# is almost no reason to order by worst matches) 
 | 
			
		||||
# if the storing key is not "score", the default order will be "ascend"
 | 
			
		||||
    $args->{so} = 
 | 
			
		||||
            $args->{sb} eq 'score' ? 'desc' : # comment out this entire line to prevent "descend" order forcing
 | 
			
		||||
                    ( (defined $args->{so} and $args->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : 'asc' );
 | 
			
		||||
 | 
			
		||||
# check that sb is not dangerous
 | 
			
		||||
    my $sb = $self->clean_sb($args->{sb}, $args->{so});
 | 
			
		||||
 | 
			
		||||
    $self->debug_dumper( "About to query. Constraint object: ", $constraints) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# Setup a limit only if there is no callback. The callback argument requires a full results list
 | 
			
		||||
    unless ( $self->{callback} ) {
 | 
			
		||||
        my $offset = ( $args->{nh} - 1 ) * $args->{mh};
 | 
			
		||||
        $tbl->select_options($sb) if ($sb);
 | 
			
		||||
        $tbl->select_options("LIMIT $offset, $args->{mh}");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    my $sth;
 | 
			
		||||
 | 
			
		||||
# if the weights are all the same value, the query can be optimized
 | 
			
		||||
# to use just one MATCH AGAINST argument. However, if the weights
 | 
			
		||||
# are different, each element must be sectioned and queried separately
 | 
			
		||||
# with the weight value multipler
 | 
			
		||||
 | 
			
		||||
# check to see if all the weight values are the same.
 | 
			
		||||
    my $base_weight; 
 | 
			
		||||
    my $weights_same = 1;
 | 
			
		||||
    foreach ( values %weights ) {
 | 
			
		||||
        $base_weight ||= $_ or next; # init and skip 0s
 | 
			
		||||
        next if $base_weight == $_;
 | 
			
		||||
        $weights_same = 0;
 | 
			
		||||
        last;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# multiplex the action
 | 
			
		||||
    my $result_cols = $self->{callback} ? ($tbl->pk)[0] : '*';
 | 
			
		||||
 | 
			
		||||
    unless ( $query_string ) {
 | 
			
		||||
        $sth = $tbl->select( [ $result_cols ], $constraints ) or return;
 | 
			
		||||
    }
 | 
			
		||||
    elsif ( $weights_same ) {
 | 
			
		||||
        $sth = $tbl->select( [ $result_cols, "MATCH($cols) AGAINST ('$query_string' IN BOOLEAN MODE) AS score" ], $constraints )
 | 
			
		||||
                        or return;
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
 | 
			
		||||
# group the multiplier counts
 | 
			
		||||
        my %column_multiplier;
 | 
			
		||||
        foreach ( keys %weights ) {
 | 
			
		||||
            push @{$column_multiplier{$weights{$_}}}, $_;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        my @search_parameters;
 | 
			
		||||
        foreach my $val ( keys %column_multiplier ) {
 | 
			
		||||
            next unless $val;
 | 
			
		||||
 | 
			
		||||
            my $cols_ar = $column_multiplier{ $val } or next;
 | 
			
		||||
            my $search_cols = join ",", @$cols_ar;
 | 
			
		||||
 | 
			
		||||
            if ( $val > 1 ) {
 | 
			
		||||
                push @search_parameters, "( ( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) ) * $val )";
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                push @search_parameters, "( MATCH($search_cols) AGAINST ('$query_string' IN BOOLEAN MODE) )";
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        my $search_sql = "( " . join( " + ", @search_parameters ) . " ) AS score";
 | 
			
		||||
 | 
			
		||||
        $sth = $tbl->select( [ $result_cols, $search_sql ], $constraints )
 | 
			
		||||
                        or return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# If we have a callback, we fetch the primary key => score and pass that hash into 
 | 
			
		||||
# the filter. 
 | 
			
		||||
    if ($self->{callback}) {
 | 
			
		||||
        unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
 | 
			
		||||
            return $self->error('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
 | 
			
		||||
        }
 | 
			
		||||
        my %results = map { $_->[0] => $_->[1] } @{$sth->fetchall_arrayref};
 | 
			
		||||
 | 
			
		||||
        $self->debug_dumper("Running results through callback. Had: " . scalar (keys %results) . " results.", \%results) if ($self->{_debug});
 | 
			
		||||
        my $filtered = $self->{callback}->($self, \%results) || {};
 | 
			
		||||
        $self->debug_dumper("New result set: " . scalar (keys %$filtered) . " results.", $filtered) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
        $self->{rows} = scalar keys %$filtered;
 | 
			
		||||
        return $self->sth($filtered);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# count the number of hits. create a query for this purpose only if we are required to.
 | 
			
		||||
    $self->{rows} = $sth->rows();
 | 
			
		||||
    if (($args->{nh} > 1) or ($self->{rows} == $args->{mh})) {
 | 
			
		||||
        $self->{rows} = $tbl->count($constraints);
 | 
			
		||||
    } 
 | 
			
		||||
    return $sth;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub clean_sb {
 | 
			
		||||
# -------------------------------------------------------------------------------
 | 
			
		||||
# Convert the sort by, sort order into an sql string.
 | 
			
		||||
#
 | 
			
		||||
    my ($class, $sb, $so) = @_;
 | 
			
		||||
    my $output = '';
 | 
			
		||||
    
 | 
			
		||||
    return $output unless ($sb);
 | 
			
		||||
 | 
			
		||||
    if ($sb and not ref $sb) {
 | 
			
		||||
        if ($sb =~ /^[\w\s,]+$/)  {
 | 
			
		||||
            if ($sb =~ /\s(?:asc|desc)/i) {
 | 
			
		||||
                $output = 'ORDER BY ' . $sb;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                $output = 'ORDER BY ' . $sb . ' ' . $so;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $class->error('BADSB', 'WARN', $sb);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    elsif (ref $sb eq 'ARRAY') {
 | 
			
		||||
        foreach ( @$sb ) {
 | 
			
		||||
            /^[\w\s,]+$/ or $class->error( 'BADSB', 'WARN', $sb ), next;
 | 
			
		||||
        }
 | 
			
		||||
        $output = 'ORDER BY ' . join(',', @$sb);
 | 
			
		||||
    }
 | 
			
		||||
    return $output;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,25 @@
 | 
			
		||||
# ====================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::NONINDEXED::Indexer
 | 
			
		||||
#   Author: Aki Mimoto
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Indexer.pm,v 1.3 2004/01/13 01:35:20 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ====================================================================
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::NONINDEXED::Indexer;
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $DEBUG/;
 | 
			
		||||
    use GT::SQL::Search::Base::Indexer;
 | 
			
		||||
    @ISA = qw/ GT::SQL::Search::Base::Indexer /;
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    shift;
 | 
			
		||||
    return GT::SQL::Search::NONINDEXED::Indexer->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
@@ -0,0 +1,257 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::SQL::Search::NONINDEXED::Search
 | 
			
		||||
#   Author  : Alex Krohn
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Search.pm,v 1.30 2006/08/09 06:58:39 brewt Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Nonindex search system
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::SQL::Search::NONINDEXED::Search;
 | 
			
		||||
# ==================================================================
 | 
			
		||||
    use strict;
 | 
			
		||||
    use vars qw/@ISA $ATTRIBS $VERSION $DEBUG/;
 | 
			
		||||
    use GT::SQL::Search::Base::Search;
 | 
			
		||||
    use GT::SQL::Condition;
 | 
			
		||||
    @ISA = qw( GT::SQL::Search::Base::Search );
 | 
			
		||||
 | 
			
		||||
    $DEBUG      = 0;
 | 
			
		||||
    $VERSION    = sprintf "%d.%03d", q$Revision: 1.30 $ =~ /(\d+)\.(\d+)/;  
 | 
			
		||||
    $ATTRIBS    = {
 | 
			
		||||
# parse based on latin characters
 | 
			
		||||
        latin_query_parse => 0
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
sub load {
 | 
			
		||||
    shift;
 | 
			
		||||
    return GT::SQL::Search::NONINDEXED::Search->new(@_)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub query {
 | 
			
		||||
#--------------------------------------------------------------------------------
 | 
			
		||||
# Returns a sth based on a query
 | 
			
		||||
#
 | 
			
		||||
# Options:
 | 
			
		||||
#        - paging
 | 
			
		||||
#           mh            : max hits
 | 
			
		||||
#           nh            : number hit (or page of hits)
 | 
			
		||||
#
 | 
			
		||||
#        - searching
 | 
			
		||||
#           ww            : whole word
 | 
			
		||||
#           ma            : 1 => OR match, 0 => AND match, undefined => QUERY
 | 
			
		||||
#           substring     : search for substrings of words
 | 
			
		||||
#           bool          : 'and' => and search, 'or' => or search, '' => regular query
 | 
			
		||||
#           query         : the string of things to ask for
 | 
			
		||||
#
 | 
			
		||||
#        - filtering
 | 
			
		||||
#           field_name    : value       # Find all rows with field_name = value
 | 
			
		||||
#           field_name    : ">value"    # Find all rows with field_name > value.
 | 
			
		||||
#           field_name    : "<value"    # Find all rows with field_name < value.
 | 
			
		||||
#           field_name-gt : value       # Find all rows with field_name > value.
 | 
			
		||||
#           field_name-lt : value       # Find all rows with field_name < value.
 | 
			
		||||
#
 | 
			
		||||
# Parameters:
 | 
			
		||||
#        ( $CGI ) : a single cgi object
 | 
			
		||||
#        ( $HASH ) : a hash of the parameters
 | 
			
		||||
#
 | 
			
		||||
    my $self = shift;
 | 
			
		||||
 | 
			
		||||
# find out what sort of a parameter we're dealing with
 | 
			
		||||
    my $input = $self->common_param(@_);
 | 
			
		||||
 | 
			
		||||
# add additional parameters if required
 | 
			
		||||
    foreach my $parameter ( keys %{$ATTRIBS} ) {
 | 
			
		||||
        if ( not exists $input->{$parameter} ) {
 | 
			
		||||
            $input->{$parameter} = $self->{$parameter};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# parse query
 | 
			
		||||
    $self->debug( "Search Query: $$input{query}", 1 ) if ($self->{_debug});
 | 
			
		||||
    my ( $query, $rejected ) = $self->_parse_query_string( $input->{'query'} );
 | 
			
		||||
 | 
			
		||||
    $self->{rejected_keywords} = $rejected;
 | 
			
		||||
 | 
			
		||||
# setup the additional input parameters
 | 
			
		||||
    $query = $self->_preset_options( $query, $input );
 | 
			
		||||
 | 
			
		||||
    $self->debug( "Set the pre-options: ", $query ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
# now sort into distinct buckets
 | 
			
		||||
    my $buckets = GT::SQL::Search::Base::Search::_create_buckets( $query );
 | 
			
		||||
    $self->debug_dumper( "Created Buckets for querying: ", $buckets ) if ($self->{_debug});
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    require GT::SQL::Condition;
 | 
			
		||||
    my $query_condition = new GT::SQL::Condition;
 | 
			
		||||
 | 
			
		||||
# now handle the separate possibilities
 | 
			
		||||
# the union
 | 
			
		||||
    my $union_cond     = $self->_get_condition( $buckets->{keywords},        $buckets->{phrases} );
 | 
			
		||||
    $query_condition->add(GT::SQL::Condition->new(@$union_cond, 'OR')) if $union_cond;
 | 
			
		||||
# the intersect
 | 
			
		||||
    my $intersect_cond = $self->_get_condition( $buckets->{keywords_must},   $buckets->{phrases_must} );
 | 
			
		||||
    $query_condition->add(GT::SQL::Condition->new(@$intersect_cond)) if $intersect_cond;
 | 
			
		||||
 | 
			
		||||
# the disjoin
 | 
			
		||||
    my $disjoin_cond   = $self->_get_condition( $buckets->{keywords_cannot}, $buckets->{phrases_cannot} );
 | 
			
		||||
    $query_condition->add(GT::SQL::Condition->new(@$disjoin_cond, 'OR')->not) if $disjoin_cond;
 | 
			
		||||
 | 
			
		||||
# now handle filters
 | 
			
		||||
    my $cols    = $self->{'table'}->cols();
 | 
			
		||||
    my %filters = map {
 | 
			
		||||
        (my $column = $_) =~ s/-[lg]t$//;
 | 
			
		||||
        exists $cols->{$column}
 | 
			
		||||
            ? ($_ => $input->{$_})
 | 
			
		||||
            : ()
 | 
			
		||||
    } keys %{$input};
 | 
			
		||||
 | 
			
		||||
# if there was no query nor filter return nothing.
 | 
			
		||||
    keys %$query or keys %filters or return $self->sth({});
 | 
			
		||||
 | 
			
		||||
    if (keys %filters) {
 | 
			
		||||
        $self->debug( "Creating Filters: ", \%filters ) if ($self->{_debug});
 | 
			
		||||
        $self->_add_filters( \%filters );
 | 
			
		||||
        $query_condition = GT::SQL::Condition->new( keys %$query ? $query_condition : (), $self->{filter} );
 | 
			
		||||
    }
 | 
			
		||||
    elsif ($self->{filter} and keys %{$self->{filter}} ) {
 | 
			
		||||
        $self->debug( "Filtering results", $self->{filter} ) if ($self->{_debug});
 | 
			
		||||
        $query_condition = GT::SQL::Condition->new( keys %$query ? $query_condition : (), $self->{filter} );
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $self->debug( "No filters being used.") if ($self->{_debug});
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now this query should probably clear the filters once it's been used, so i'll do that here
 | 
			
		||||
    $self->{filter} = undef;
 | 
			
		||||
 | 
			
		||||
    my $tbl  = $self->{table};
 | 
			
		||||
    my ($pk) = $tbl->pk;
 | 
			
		||||
 | 
			
		||||
# now run through a callback function if needed.
 | 
			
		||||
    if ($self->{callback}) {
 | 
			
		||||
 | 
			
		||||
# Warning: this slows things a heck of a lot.
 | 
			
		||||
        unless (ref $self->{callback} and ref $self->{callback} eq 'CODE') {
 | 
			
		||||
            return $self->error ('BADARGS', 'FATAL', "callback '$self->{callback}' must be a code ref!");
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        my $sth     = $tbl->select( [ $pk ], $query_condition );
 | 
			
		||||
        my $results = {};
 | 
			
		||||
        while (my $result = $sth->fetchrow) {
 | 
			
		||||
            $results->{$result} = undef;
 | 
			
		||||
        }
 | 
			
		||||
        $self->debug_dumper("Running results through callback. Had: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
 | 
			
		||||
        $results = $self->{callback}->($self, $results);
 | 
			
		||||
        $self->debug_dumper("New result set: " . scalar (keys %$results) . " results.", $results) if ($self->{_debug});
 | 
			
		||||
        $self->{rows} = scalar($results ? keys %{$results} : ());
 | 
			
		||||
 | 
			
		||||
        return $self->sth( $results );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# and now create a search sth object to handle all this
 | 
			
		||||
    $input->{nh} = (defined $input->{nh} and $input->{nh} =~ /^(\d+)$/) ? $1 : 1;
 | 
			
		||||
    $input->{mh} = (defined $input->{mh} and $input->{mh} =~ /^(\d+)$/) ? $1 : 25;
 | 
			
		||||
    $input->{so} = (defined $input->{so} and $input->{so} =~ /^(asc(?:end)?|desc(?:end)?)$/i) ? $1 : '';
 | 
			
		||||
 | 
			
		||||
# check that sb is not dangerous
 | 
			
		||||
    my $sb = $self->clean_sb($input->{sb}, $input->{so});
 | 
			
		||||
 | 
			
		||||
    my $offset = ( $input->{nh} - 1 ) * $input->{mh};
 | 
			
		||||
    $tbl->select_options($sb) if ($sb);
 | 
			
		||||
    $tbl->select_options("LIMIT $offset, $input->{mh}");
 | 
			
		||||
    my $sth = $tbl->select( $query_condition ) or return;
 | 
			
		||||
 | 
			
		||||
# so how many hits did we get?
 | 
			
		||||
    $self->{rows} = $sth->rows();
 | 
			
		||||
    if (($input->{nh} > 1) or ($self->{rows} == $input->{mh})) {
 | 
			
		||||
        $self->{rows} = $tbl->count($query_condition);
 | 
			
		||||
    }
 | 
			
		||||
    return $sth;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _get_condition {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
    my ( $self, $keywords, $phrases ) = @_;
 | 
			
		||||
 | 
			
		||||
    my @list = ( keys %$keywords, keys %$phrases );
 | 
			
		||||
 | 
			
		||||
    my $tbl     = $self->{table} or return $self->error( 'NODRIVER', 'FATAL' );
 | 
			
		||||
    my @cond    = ();
 | 
			
		||||
    my %tmp     = $tbl->weight();
 | 
			
		||||
    my @weights = keys  %tmp or return;
 | 
			
		||||
    foreach my $element ( @list ) {
 | 
			
		||||
        my @where = ();
 | 
			
		||||
        foreach my $cols ( @weights ) {
 | 
			
		||||
            push @where, [$cols, 'LIKE', "%$element%"]; # Condition does quoting by default.
 | 
			
		||||
        }
 | 
			
		||||
        push @cond, GT::SQL::Condition->new(@where, 'OR');
 | 
			
		||||
    }
 | 
			
		||||
    @cond or return;
 | 
			
		||||
 | 
			
		||||
    return \@cond;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _parse_query_string {
 | 
			
		||||
#------------------------------------------------------------
 | 
			
		||||
# Parses a query string '+foo -"bar this" alpha' into a hash of
 | 
			
		||||
# words and modes.
 | 
			
		||||
#
 | 
			
		||||
    my ($self, $text) = @_;
 | 
			
		||||
    my %modes = (
 | 
			
		||||
        '+' => 'must',
 | 
			
		||||
        '-' => 'cannot',
 | 
			
		||||
        '<' => 'greater',
 | 
			
		||||
        '>' => 'less'
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
# Latin will break up on actual words and punctuation.
 | 
			
		||||
    if ($self->{latin_query_parse}) {
 | 
			
		||||
        return $self->SUPER::_parse_query_string( $text );
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        my $words = {};
 | 
			
		||||
        my @terms;
 | 
			
		||||
        my $i = 0;
 | 
			
		||||
        foreach my $term (split /"/, $text) {
 | 
			
		||||
            push @terms, ($i++ % 2 ? $term : split ' ', $term);
 | 
			
		||||
        }
 | 
			
		||||
        for (my $i = 0; $i < @terms; $i++) {
 | 
			
		||||
            my $word = $terms[$i];
 | 
			
		||||
            $word =~ s/^\s*|\s*$//g;
 | 
			
		||||
            next if ($word eq '');
 | 
			
		||||
            if ($i < $#terms) {
 | 
			
		||||
                ($word eq '-') and ($word = '-' . $terms[++$i]);
 | 
			
		||||
                ($word eq '+') and ($word = '+' . $terms[++$i]);
 | 
			
		||||
            }
 | 
			
		||||
            $word         =~ s/^([<>+-])//;
 | 
			
		||||
            my $mode      = ($1 and $modes{$1} or 'can');
 | 
			
		||||
            my $substring = ($word =~ s/\*$//) || 0;
 | 
			
		||||
            if ($word =~ /\s/) {
 | 
			
		||||
                $words->{$word} = {
 | 
			
		||||
                    mode      => $mode,
 | 
			
		||||
                    phrase    => 1,
 | 
			
		||||
                    substring => $substring,
 | 
			
		||||
                    keyword   => 0,
 | 
			
		||||
                };
 | 
			
		||||
            }
 | 
			
		||||
            elsif ($word) {
 | 
			
		||||
                $words->{$word} = {
 | 
			
		||||
                    mode      => $mode,
 | 
			
		||||
                    phrase    => 0,
 | 
			
		||||
                    substring => $substring,
 | 
			
		||||
                    keyword   => 1,
 | 
			
		||||
                };
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return $words;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
		Reference in New Issue
	
	Block a user