# ================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::Text::Tools # Author : Scott Beck # CVS Info : # $Id: Tools.pm,v 1.9 2005/06/09 23:42:16 brewt Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # ================================================================== # # Description: A general purpose text parsing module. # package GT::Text::Tools; # ================================================================== # Pragmas use strict; # Internal mules use bases 'GT::Base' => ''; sub linesplit { # -------------------------------------------------------------------- # my @words = GT::Text::Tools->linesplit($regex, $line); # ------------------------------------------------------ # Splits $line by $regex outside of quotes ['"] # If regex is false defaults to \s+. # # Ganged and modified from Text::ParseWords local $^W; my ($class, $delimiter, $line) = @_; $delimiter ||= '\s+'; $delimiter =~ s/(\s)/\\$1/g; my ($quote, $quoted, $unquoted, $delim, $word, @pieces); while (length($line)) { ($quote, $quoted, undef, $unquoted, $delim, undef) = $line =~ m/^(["']) # a $quote ((?:\\.|(?!\1)[^\\])*) # and $quoted text \1 # followed by the same quote ([\000-\377]*) # and the rest | # --OR-- ^((?:\\.|[^\\"'])*?) # an $unquoted text (\Z(?!\n)|(?:$delimiter)|(?!^)(?=["'])) # plus EOL, delimiter, or quote ([\000-\377]*) # the rest /x; # extended layout return () unless ( $quote || length($unquoted) || length($delim)); $line = $+; $quoted = "$quote$quoted$quote"; $word .= defined $quote ? $quoted : $unquoted; if (length($delim)) { push(@pieces, $word); undef $word; } if (!length($line)) { push(@pieces, $word); } } return (@pieces); } sub linewrap { # -------------------------------------------------------------------- # GT::Text::Tools->linewrap( $string, $number, { # nowrap => $regexs, # eol => "\n", # max_line_length => 50000 # }); # ---------------------------------------------- # linewrap takes a string, a number of characters per line and a # hash ref of options. String will be wrapped to the number of # characters specified on spaces. # The following options apply: # nowrap => array ref of regexes that if matched, will # not be wrapped. # eol => What to wrap the lines with, defaults to # \n. # eol_match => What to use to match eol characters; defaults to # \r?\n # max_line_length => The maximum length a line can be that will # be wrapped on a space. Any line reaching # this length will be wrapped without # looking for spaces. Defaults to 50_000, set # to non-true value to avoid this affect. # my ($class, $string, $i, $opts) = @_; my $max_len = exists($opts->{max_line_length}) ? $opts->{max_line_length} : 50_000; my $regexs = $opts->{nowrap} || []; my $nl = $opts->{eol} || "\n"; my $eolre = $opts->{eol_match} || "\r?\n"; $regexs = (ref($regexs) eq 'ARRAY') ? $regexs : [$regexs || ()]; my @t = split /$eolre/, $string; my $r = ""; while (@t) { my $match = 0; if (length $t[0] <= $i) { $r .= shift(@t) . $nl; $match = 1; } elsif ($t[0] =~ /^\s*$/) { my $spaces = shift @t; # Keep the string of spaces unless it's too long (don't bother wrapping them) $r .= (length $spaces <= $i ? $spaces : '') . $nl; $match = 1; } elsif ($max_len and length $t[0] > $max_len) { # Line is too long. my $line = shift @t; while ($line) { $r .= substr($line, 0, $i) . $nl; substr($line, 0, $i) = ''; } $match = 1; } elsif (@{$regexs}) { my $regex = join('|', @{$regexs}); if ($t[0] =~ m/$regex/) { my $eos = ''; # Store any incomplete lines while ($t[0] =~ s/^(.*?)(\s?)((?:$regex)\s?)//) { my $pre = _wrap($i, $nl, $eos . $1); $eos = ''; my $s = $2 || ''; my $mat = $3; if (!length($pre) or $pre =~ /$nl$/) { $r .= $pre; if (length $mat > $i) { $r .= $mat . $nl; } else { $eos = $mat; } } else { $pre =~ s/($nl|^)(.*?)$//; $r .= $pre . $1; my $leftover = $2; if (length($leftover . $s . $mat) <= $i) { $eos = $leftover . $s . $mat; } else { $r .= $leftover . $nl; if (length $mat > $i) { $r .= $mat . $nl; } else { $eos = $mat; } } } } $eos .= $t[0] if length $t[0]; if (length $eos) { $r .= _wrap($i, $nl, $eos) . $nl; } shift(@t); $match = 1; } } next if $match; $r .= _wrap($i, $nl, shift(@t) || '') . $nl; } return $r; } sub _wrap { # -------------------------------------------------------------------- # _wrap($length, $newline, $string); # ---------------------------- # Internal method called by linewrap() to wrap a line. # my ($i, $e); $i = $e = shift; my $nl = shift; my $r; defined $_[0] or return ''; if (length $_[0] < $i) { return $_[0]; } while (@_) { defined($_[0]) or last; if ($_[0] =~ /^(.{$i})\s(.+)$/) { shift() and $r .= $1 . $nl; $i = $e; if (defined($2) and length($2) <= $e) { $r .= $2; $r .= $nl if length($2) == $e; } else { unshift(@_, $2); } } elsif ($i-- == 0) { $i = $e; shift() =~ /^(.{$i})(.+)$/ and $r .= $1 . $nl; if (defined($2) and length($2) <= $e) { $r .= $2; $r .= $nl if length($2) == $e; } else { unshift(@_, $2) } } } return defined($r) ? $r : ''; } 1;