First pass at adding key files
This commit is contained in:
213
site/slowtwitch.com/cgi-bin/articles/GT/Text/Tools.pm
Normal file
213
site/slowtwitch.com/cgi-bin/articles/GT/Text/Tools.pm
Normal file
@ -0,0 +1,213 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Text::Tools
|
||||
# Author : Scott Beck
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Tools.pm,v 1.9 2005/06/09 23:42:16 brewt Exp $
|
||||
#
|
||||
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
#
|
||||
# Description: A general purpose text parsing module.
|
||||
#
|
||||
package GT::Text::Tools;
|
||||
# ==================================================================
|
||||
# Pragmas
|
||||
use strict;
|
||||
|
||||
# Internal mules
|
||||
use bases 'GT::Base' => '';
|
||||
|
||||
sub linesplit {
|
||||
# --------------------------------------------------------------------
|
||||
# my @words = GT::Text::Tools->linesplit($regex, $line);
|
||||
# ------------------------------------------------------
|
||||
# Splits $line by $regex outside of quotes ['"]
|
||||
# If regex is false defaults to \s+.
|
||||
#
|
||||
|
||||
# Ganged and modified from Text::ParseWords
|
||||
local $^W;
|
||||
|
||||
my ($class, $delimiter, $line) = @_;
|
||||
$delimiter ||= '\s+';
|
||||
$delimiter =~ s/(\s)/\\$1/g;
|
||||
my ($quote, $quoted, $unquoted, $delim, $word, @pieces);
|
||||
|
||||
while (length($line)) {
|
||||
|
||||
($quote, $quoted, undef, $unquoted, $delim, undef) =
|
||||
$line =~ m/^(["']) # a $quote
|
||||
((?:\\.|(?!\1)[^\\])*) # and $quoted text
|
||||
\1 # followed by the same quote
|
||||
([\000-\377]*) # and the rest
|
||||
| # --OR--
|
||||
^((?:\\.|[^\\"'])*?) # an $unquoted text
|
||||
(\Z(?!\n)|(?:$delimiter)|(?!^)(?=["']))
|
||||
# plus EOL, delimiter, or quote
|
||||
([\000-\377]*) # the rest
|
||||
/x; # extended layout
|
||||
return () unless ( $quote || length($unquoted) || length($delim));
|
||||
|
||||
$line = $+;
|
||||
|
||||
$quoted = "$quote$quoted$quote";
|
||||
$word .= defined $quote ? $quoted : $unquoted;
|
||||
|
||||
if (length($delim)) {
|
||||
push(@pieces, $word);
|
||||
undef $word;
|
||||
}
|
||||
if (!length($line)) {
|
||||
push(@pieces, $word);
|
||||
}
|
||||
}
|
||||
return (@pieces);
|
||||
}
|
||||
|
||||
sub linewrap {
|
||||
# --------------------------------------------------------------------
|
||||
# GT::Text::Tools->linewrap( $string, $number, {
|
||||
# nowrap => $regexs,
|
||||
# eol => "\n",
|
||||
# max_line_length => 50000
|
||||
# });
|
||||
# ----------------------------------------------
|
||||
# linewrap takes a string, a number of characters per line and a
|
||||
# hash ref of options. String will be wrapped to the number of
|
||||
# characters specified on spaces.
|
||||
# The following options apply:
|
||||
# nowrap => array ref of regexes that if matched, will
|
||||
# not be wrapped.
|
||||
# eol => What to wrap the lines with, defaults to
|
||||
# \n.
|
||||
# eol_match => What to use to match eol characters; defaults to
|
||||
# \r?\n
|
||||
# max_line_length => The maximum length a line can be that will
|
||||
# be wrapped on a space. Any line reaching
|
||||
# this length will be wrapped without
|
||||
# looking for spaces. Defaults to 50_000, set
|
||||
# to non-true value to avoid this affect.
|
||||
#
|
||||
my ($class, $string, $i, $opts) = @_;
|
||||
my $max_len = exists($opts->{max_line_length}) ? $opts->{max_line_length} : 50_000;
|
||||
my $regexs = $opts->{nowrap} || [];
|
||||
my $nl = $opts->{eol} || "\n";
|
||||
my $eolre = $opts->{eol_match} || "\r?\n";
|
||||
$regexs = (ref($regexs) eq 'ARRAY') ? $regexs : [$regexs || ()];
|
||||
my @t = split /$eolre/, $string;
|
||||
my $r = "";
|
||||
while (@t) {
|
||||
my $match = 0;
|
||||
if (length $t[0] <= $i) {
|
||||
$r .= shift(@t) . $nl;
|
||||
$match = 1;
|
||||
}
|
||||
elsif ($t[0] =~ /^\s*$/) {
|
||||
my $spaces = shift @t;
|
||||
# Keep the string of spaces unless it's too long (don't bother wrapping them)
|
||||
$r .= (length $spaces <= $i ? $spaces : '') . $nl;
|
||||
$match = 1;
|
||||
}
|
||||
elsif ($max_len and length $t[0] > $max_len) { # Line is too long.
|
||||
my $line = shift @t;
|
||||
while ($line) {
|
||||
$r .= substr($line, 0, $i) . $nl;
|
||||
substr($line, 0, $i) = '';
|
||||
}
|
||||
$match = 1;
|
||||
}
|
||||
elsif (@{$regexs}) {
|
||||
my $regex = join('|', @{$regexs});
|
||||
if ($t[0] =~ m/$regex/) {
|
||||
my $eos = ''; # Store any incomplete lines
|
||||
while ($t[0] =~ s/^(.*?)(\s?)((?:$regex)\s?)//) {
|
||||
my $pre = _wrap($i, $nl, $eos . $1);
|
||||
$eos = '';
|
||||
my $s = $2 || '';
|
||||
my $mat = $3;
|
||||
|
||||
if (!length($pre) or $pre =~ /$nl$/) {
|
||||
$r .= $pre;
|
||||
if (length $mat > $i) {
|
||||
$r .= $mat . $nl;
|
||||
}
|
||||
else {
|
||||
$eos = $mat;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$pre =~ s/($nl|^)(.*?)$//;
|
||||
$r .= $pre . $1;
|
||||
my $leftover = $2;
|
||||
|
||||
if (length($leftover . $s . $mat) <= $i) {
|
||||
$eos = $leftover . $s . $mat;
|
||||
}
|
||||
else {
|
||||
$r .= $leftover . $nl;
|
||||
if (length $mat > $i) {
|
||||
$r .= $mat . $nl;
|
||||
}
|
||||
else {
|
||||
$eos = $mat;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$eos .= $t[0] if length $t[0];
|
||||
if (length $eos) {
|
||||
$r .= _wrap($i, $nl, $eos) . $nl;
|
||||
}
|
||||
shift(@t);
|
||||
$match = 1;
|
||||
}
|
||||
}
|
||||
next if $match;
|
||||
$r .= _wrap($i, $nl, shift(@t) || '') . $nl;
|
||||
}
|
||||
return $r;
|
||||
}
|
||||
|
||||
sub _wrap {
|
||||
# --------------------------------------------------------------------
|
||||
# _wrap($length, $newline, $string);
|
||||
# ----------------------------
|
||||
# Internal method called by linewrap() to wrap a line.
|
||||
#
|
||||
my ($i, $e);
|
||||
$i = $e = shift;
|
||||
my $nl = shift;
|
||||
my $r;
|
||||
defined $_[0] or return '';
|
||||
if (length $_[0] < $i) { return $_[0]; }
|
||||
while (@_) {
|
||||
defined($_[0]) or last;
|
||||
if ($_[0] =~ /^(.{$i})\s(.+)$/) {
|
||||
shift() and $r .= $1 . $nl;
|
||||
$i = $e;
|
||||
if (defined($2) and length($2) <= $e) {
|
||||
$r .= $2;
|
||||
$r .= $nl if length($2) == $e;
|
||||
}
|
||||
else {
|
||||
unshift(@_, $2);
|
||||
}
|
||||
}
|
||||
elsif ($i-- == 0) {
|
||||
$i = $e;
|
||||
shift() =~ /^(.{$i})(.+)$/ and $r .= $1 . $nl;
|
||||
if (defined($2) and length($2) <= $e) {
|
||||
$r .= $2;
|
||||
$r .= $nl if length($2) == $e;
|
||||
}
|
||||
else {
|
||||
unshift(@_, $2)
|
||||
}
|
||||
}
|
||||
}
|
||||
return defined($r) ? $r : '';
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user