First pass at adding key files
This commit is contained in:
		
							
								
								
									
										213
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/Text/Tools.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										213
									
								
								site/slowtwitch.com/cgi-bin/articles/GT/Text/Tools.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,213 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
 | 
			
		||||
#
 | 
			
		||||
#   GT::Text::Tools
 | 
			
		||||
#   Author  : Scott Beck
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: Tools.pm,v 1.9 2005/06/09 23:42:16 brewt Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description: A general purpose text parsing module.
 | 
			
		||||
#
 | 
			
		||||
package GT::Text::Tools;
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Pragmas
 | 
			
		||||
use strict;
 | 
			
		||||
 | 
			
		||||
# Internal mules
 | 
			
		||||
use bases 'GT::Base' => '';
 | 
			
		||||
 | 
			
		||||
sub linesplit {
 | 
			
		||||
# --------------------------------------------------------------------
 | 
			
		||||
# my @words = GT::Text::Tools->linesplit($regex, $line);
 | 
			
		||||
# ------------------------------------------------------
 | 
			
		||||
#   Splits $line by $regex outside of quotes ['"]
 | 
			
		||||
#   If regex is false defaults to \s+.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
    # Ganged and modified from Text::ParseWords
 | 
			
		||||
    local $^W;
 | 
			
		||||
 | 
			
		||||
    my ($class, $delimiter, $line) = @_;
 | 
			
		||||
    $delimiter ||= '\s+';
 | 
			
		||||
    $delimiter =~ s/(\s)/\\$1/g;
 | 
			
		||||
    my ($quote, $quoted, $unquoted, $delim, $word, @pieces);
 | 
			
		||||
 | 
			
		||||
    while (length($line)) {
 | 
			
		||||
 | 
			
		||||
        ($quote, $quoted, undef, $unquoted, $delim, undef) =
 | 
			
		||||
            $line =~ m/^(["'])                          # a $quote
 | 
			
		||||
                            ((?:\\.|(?!\1)[^\\])*)      # and $quoted text
 | 
			
		||||
                            \1                          # followed by the same quote
 | 
			
		||||
                            ([\000-\377]*)              # and the rest
 | 
			
		||||
                        |                               # --OR--
 | 
			
		||||
                            ^((?:\\.|[^\\"'])*?)        # an $unquoted text
 | 
			
		||||
                            (\Z(?!\n)|(?:$delimiter)|(?!^)(?=["']))  
 | 
			
		||||
                                                        # plus EOL, delimiter, or quote
 | 
			
		||||
                            ([\000-\377]*)              # the rest
 | 
			
		||||
                  /x;              # extended layout
 | 
			
		||||
        return () unless ( $quote || length($unquoted) || length($delim));
 | 
			
		||||
 | 
			
		||||
        $line = $+;
 | 
			
		||||
 | 
			
		||||
        $quoted = "$quote$quoted$quote";
 | 
			
		||||
        $word .= defined $quote ? $quoted : $unquoted;
 | 
			
		||||
 | 
			
		||||
        if (length($delim)) {
 | 
			
		||||
            push(@pieces, $word);
 | 
			
		||||
            undef $word;
 | 
			
		||||
        }
 | 
			
		||||
        if (!length($line)) {
 | 
			
		||||
            push(@pieces, $word);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return (@pieces);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub linewrap {
 | 
			
		||||
# --------------------------------------------------------------------
 | 
			
		||||
# GT::Text::Tools->linewrap( $string, $number, {
 | 
			
		||||
#     nowrap          => $regexs,
 | 
			
		||||
#     eol             => "\n",
 | 
			
		||||
#     max_line_length => 50000
 | 
			
		||||
# });
 | 
			
		||||
# ----------------------------------------------
 | 
			
		||||
#   linewrap takes a string, a number of characters per line and a
 | 
			
		||||
#   hash ref of options. String will be wrapped to the number of 
 | 
			
		||||
#   characters specified on spaces.
 | 
			
		||||
#   The following options apply:
 | 
			
		||||
#       nowrap          => array ref of regexes that if matched, will
 | 
			
		||||
#                          not be wrapped.
 | 
			
		||||
#       eol             => What to wrap the lines with, defaults to 
 | 
			
		||||
#                          \n.
 | 
			
		||||
#       eol_match       => What to use to match eol characters; defaults to
 | 
			
		||||
#                          \r?\n
 | 
			
		||||
#       max_line_length => The maximum length a line can be that will
 | 
			
		||||
#                          be wrapped on a space. Any line reaching
 | 
			
		||||
#                          this length will be wrapped without
 | 
			
		||||
#                          looking for spaces. Defaults to 50_000, set
 | 
			
		||||
#                          to non-true value to avoid this affect.
 | 
			
		||||
#
 | 
			
		||||
    my ($class, $string, $i, $opts) = @_;
 | 
			
		||||
    my $max_len = exists($opts->{max_line_length}) ? $opts->{max_line_length} : 50_000;
 | 
			
		||||
    my $regexs  = $opts->{nowrap} || [];
 | 
			
		||||
    my $nl      = $opts->{eol}    || "\n";
 | 
			
		||||
    my $eolre   = $opts->{eol_match} || "\r?\n";
 | 
			
		||||
    $regexs     = (ref($regexs) eq 'ARRAY') ? $regexs : [$regexs || ()];
 | 
			
		||||
    my @t       = split /$eolre/, $string;
 | 
			
		||||
    my $r       = "";
 | 
			
		||||
    while (@t) {
 | 
			
		||||
        my $match = 0;
 | 
			
		||||
        if (length $t[0] <= $i) {
 | 
			
		||||
            $r .= shift(@t) . $nl;
 | 
			
		||||
            $match = 1;
 | 
			
		||||
        }
 | 
			
		||||
        elsif ($t[0] =~ /^\s*$/) {
 | 
			
		||||
            my $spaces = shift @t;
 | 
			
		||||
# Keep the string of spaces unless it's too long (don't bother wrapping them)
 | 
			
		||||
            $r .= (length $spaces <= $i ? $spaces : '') . $nl;
 | 
			
		||||
            $match = 1;
 | 
			
		||||
        }
 | 
			
		||||
        elsif ($max_len and length $t[0] > $max_len) { # Line is too long.
 | 
			
		||||
            my $line = shift @t;
 | 
			
		||||
            while ($line) {
 | 
			
		||||
                $r .= substr($line, 0, $i) . $nl;
 | 
			
		||||
                substr($line, 0, $i) = '';
 | 
			
		||||
            }
 | 
			
		||||
            $match = 1;
 | 
			
		||||
        }
 | 
			
		||||
        elsif (@{$regexs}) {
 | 
			
		||||
            my $regex = join('|', @{$regexs});
 | 
			
		||||
            if ($t[0] =~ m/$regex/) {
 | 
			
		||||
                my $eos = ''; # Store any incomplete lines
 | 
			
		||||
                while ($t[0] =~ s/^(.*?)(\s?)((?:$regex)\s?)//) {
 | 
			
		||||
                    my $pre = _wrap($i, $nl, $eos . $1);
 | 
			
		||||
                    $eos    = '';
 | 
			
		||||
                    my $s   = $2 || '';
 | 
			
		||||
                    my $mat = $3;
 | 
			
		||||
 | 
			
		||||
                    if (!length($pre) or $pre =~ /$nl$/) {
 | 
			
		||||
                        $r .= $pre;
 | 
			
		||||
                        if (length $mat > $i) {
 | 
			
		||||
                            $r .= $mat . $nl;
 | 
			
		||||
                        }
 | 
			
		||||
                        else {
 | 
			
		||||
                            $eos = $mat;
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    else {
 | 
			
		||||
                        $pre =~ s/($nl|^)(.*?)$//;
 | 
			
		||||
                        $r .= $pre . $1;
 | 
			
		||||
                        my $leftover = $2;
 | 
			
		||||
 | 
			
		||||
                        if (length($leftover . $s . $mat) <= $i) {
 | 
			
		||||
                            $eos = $leftover . $s . $mat;
 | 
			
		||||
                        }
 | 
			
		||||
                        else {
 | 
			
		||||
                            $r .= $leftover . $nl;
 | 
			
		||||
                            if (length $mat > $i) {
 | 
			
		||||
                                $r .= $mat . $nl;
 | 
			
		||||
                            }
 | 
			
		||||
                            else {
 | 
			
		||||
                                $eos = $mat;
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                $eos .= $t[0] if length $t[0];
 | 
			
		||||
                if (length $eos) {
 | 
			
		||||
                    $r .= _wrap($i, $nl, $eos) . $nl;
 | 
			
		||||
                }
 | 
			
		||||
                shift(@t);
 | 
			
		||||
                $match = 1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        next if $match;
 | 
			
		||||
        $r .= _wrap($i, $nl, shift(@t) || '') . $nl;
 | 
			
		||||
    }
 | 
			
		||||
    return $r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _wrap {
 | 
			
		||||
# --------------------------------------------------------------------
 | 
			
		||||
# _wrap($length, $newline, $string);
 | 
			
		||||
# ----------------------------
 | 
			
		||||
#   Internal method called by linewrap() to wrap a line.
 | 
			
		||||
#
 | 
			
		||||
    my ($i, $e);
 | 
			
		||||
    $i = $e = shift;
 | 
			
		||||
    my $nl  = shift;
 | 
			
		||||
    my $r;
 | 
			
		||||
    defined $_[0] or return '';
 | 
			
		||||
    if (length $_[0] < $i) { return $_[0]; }
 | 
			
		||||
    while (@_) {
 | 
			
		||||
        defined($_[0]) or last;
 | 
			
		||||
        if ($_[0] =~ /^(.{$i})\s(.+)$/) {
 | 
			
		||||
            shift() and $r .= $1 . $nl;
 | 
			
		||||
            $i = $e;
 | 
			
		||||
            if (defined($2) and length($2) <= $e) {
 | 
			
		||||
                $r .= $2;
 | 
			
		||||
                $r .= $nl if length($2) == $e;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                unshift(@_, $2);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        elsif ($i-- == 0) {
 | 
			
		||||
            $i = $e;
 | 
			
		||||
            shift() =~ /^(.{$i})(.+)$/ and $r .= $1 . $nl;
 | 
			
		||||
            if (defined($2) and length($2) <= $e) {
 | 
			
		||||
                $r .= $2;
 | 
			
		||||
                $r .= $nl if length($2) == $e;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                unshift(@_, $2)
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return defined($r) ? $r : '';
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
		Reference in New Issue
	
	Block a user