214 lines
7.3 KiB
Perl
214 lines
7.3 KiB
Perl
# ==================================================================
|
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
|
#
|
|
# GT::Text::Tools
|
|
# Author : Scott Beck
|
|
# CVS Info :
|
|
# $Id: Tools.pm,v 1.9 2005/06/09 23:42:16 brewt Exp $
|
|
#
|
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
# ==================================================================
|
|
#
|
|
# Description: A general purpose text parsing module.
|
|
#
|
|
package GT::Text::Tools;
|
|
# ==================================================================
|
|
# Pragmas
|
|
use strict;
|
|
|
|
# Internal mules
|
|
use bases 'GT::Base' => '';
|
|
|
|
sub linesplit {
|
|
# --------------------------------------------------------------------
|
|
# my @words = GT::Text::Tools->linesplit($regex, $line);
|
|
# ------------------------------------------------------
|
|
# Splits $line by $regex outside of quotes ['"]
|
|
# If regex is false defaults to \s+.
|
|
#
|
|
|
|
# Ganged and modified from Text::ParseWords
|
|
local $^W;
|
|
|
|
my ($class, $delimiter, $line) = @_;
|
|
$delimiter ||= '\s+';
|
|
$delimiter =~ s/(\s)/\\$1/g;
|
|
my ($quote, $quoted, $unquoted, $delim, $word, @pieces);
|
|
|
|
while (length($line)) {
|
|
|
|
($quote, $quoted, undef, $unquoted, $delim, undef) =
|
|
$line =~ m/^(["']) # a $quote
|
|
((?:\\.|(?!\1)[^\\])*) # and $quoted text
|
|
\1 # followed by the same quote
|
|
([\000-\377]*) # and the rest
|
|
| # --OR--
|
|
^((?:\\.|[^\\"'])*?) # an $unquoted text
|
|
(\Z(?!\n)|(?:$delimiter)|(?!^)(?=["']))
|
|
# plus EOL, delimiter, or quote
|
|
([\000-\377]*) # the rest
|
|
/x; # extended layout
|
|
return () unless ( $quote || length($unquoted) || length($delim));
|
|
|
|
$line = $+;
|
|
|
|
$quoted = "$quote$quoted$quote";
|
|
$word .= defined $quote ? $quoted : $unquoted;
|
|
|
|
if (length($delim)) {
|
|
push(@pieces, $word);
|
|
undef $word;
|
|
}
|
|
if (!length($line)) {
|
|
push(@pieces, $word);
|
|
}
|
|
}
|
|
return (@pieces);
|
|
}
|
|
|
|
sub linewrap {
|
|
# --------------------------------------------------------------------
|
|
# GT::Text::Tools->linewrap( $string, $number, {
|
|
# nowrap => $regexs,
|
|
# eol => "\n",
|
|
# max_line_length => 50000
|
|
# });
|
|
# ----------------------------------------------
|
|
# linewrap takes a string, a number of characters per line and a
|
|
# hash ref of options. String will be wrapped to the number of
|
|
# characters specified on spaces.
|
|
# The following options apply:
|
|
# nowrap => array ref of regexes that if matched, will
|
|
# not be wrapped.
|
|
# eol => What to wrap the lines with, defaults to
|
|
# \n.
|
|
# eol_match => What to use to match eol characters; defaults to
|
|
# \r?\n
|
|
# max_line_length => The maximum length a line can be that will
|
|
# be wrapped on a space. Any line reaching
|
|
# this length will be wrapped without
|
|
# looking for spaces. Defaults to 50_000, set
|
|
# to non-true value to avoid this affect.
|
|
#
|
|
my ($class, $string, $i, $opts) = @_;
|
|
my $max_len = exists($opts->{max_line_length}) ? $opts->{max_line_length} : 50_000;
|
|
my $regexs = $opts->{nowrap} || [];
|
|
my $nl = $opts->{eol} || "\n";
|
|
my $eolre = $opts->{eol_match} || "\r?\n";
|
|
$regexs = (ref($regexs) eq 'ARRAY') ? $regexs : [$regexs || ()];
|
|
my @t = split /$eolre/, $string;
|
|
my $r = "";
|
|
while (@t) {
|
|
my $match = 0;
|
|
if (length $t[0] <= $i) {
|
|
$r .= shift(@t) . $nl;
|
|
$match = 1;
|
|
}
|
|
elsif ($t[0] =~ /^\s*$/) {
|
|
my $spaces = shift @t;
|
|
# Keep the string of spaces unless it's too long (don't bother wrapping them)
|
|
$r .= (length $spaces <= $i ? $spaces : '') . $nl;
|
|
$match = 1;
|
|
}
|
|
elsif ($max_len and length $t[0] > $max_len) { # Line is too long.
|
|
my $line = shift @t;
|
|
while ($line) {
|
|
$r .= substr($line, 0, $i) . $nl;
|
|
substr($line, 0, $i) = '';
|
|
}
|
|
$match = 1;
|
|
}
|
|
elsif (@{$regexs}) {
|
|
my $regex = join('|', @{$regexs});
|
|
if ($t[0] =~ m/$regex/) {
|
|
my $eos = ''; # Store any incomplete lines
|
|
while ($t[0] =~ s/^(.*?)(\s?)((?:$regex)\s?)//) {
|
|
my $pre = _wrap($i, $nl, $eos . $1);
|
|
$eos = '';
|
|
my $s = $2 || '';
|
|
my $mat = $3;
|
|
|
|
if (!length($pre) or $pre =~ /$nl$/) {
|
|
$r .= $pre;
|
|
if (length $mat > $i) {
|
|
$r .= $mat . $nl;
|
|
}
|
|
else {
|
|
$eos = $mat;
|
|
}
|
|
}
|
|
else {
|
|
$pre =~ s/($nl|^)(.*?)$//;
|
|
$r .= $pre . $1;
|
|
my $leftover = $2;
|
|
|
|
if (length($leftover . $s . $mat) <= $i) {
|
|
$eos = $leftover . $s . $mat;
|
|
}
|
|
else {
|
|
$r .= $leftover . $nl;
|
|
if (length $mat > $i) {
|
|
$r .= $mat . $nl;
|
|
}
|
|
else {
|
|
$eos = $mat;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
$eos .= $t[0] if length $t[0];
|
|
if (length $eos) {
|
|
$r .= _wrap($i, $nl, $eos) . $nl;
|
|
}
|
|
shift(@t);
|
|
$match = 1;
|
|
}
|
|
}
|
|
next if $match;
|
|
$r .= _wrap($i, $nl, shift(@t) || '') . $nl;
|
|
}
|
|
return $r;
|
|
}
|
|
|
|
sub _wrap {
|
|
# --------------------------------------------------------------------
|
|
# _wrap($length, $newline, $string);
|
|
# ----------------------------
|
|
# Internal method called by linewrap() to wrap a line.
|
|
#
|
|
my ($i, $e);
|
|
$i = $e = shift;
|
|
my $nl = shift;
|
|
my $r;
|
|
defined $_[0] or return '';
|
|
if (length $_[0] < $i) { return $_[0]; }
|
|
while (@_) {
|
|
defined($_[0]) or last;
|
|
if ($_[0] =~ /^(.{$i})\s(.+)$/) {
|
|
shift() and $r .= $1 . $nl;
|
|
$i = $e;
|
|
if (defined($2) and length($2) <= $e) {
|
|
$r .= $2;
|
|
$r .= $nl if length($2) == $e;
|
|
}
|
|
else {
|
|
unshift(@_, $2);
|
|
}
|
|
}
|
|
elsif ($i-- == 0) {
|
|
$i = $e;
|
|
shift() =~ /^(.{$i})(.+)$/ and $r .= $1 . $nl;
|
|
if (defined($2) and length($2) <= $e) {
|
|
$r .= $2;
|
|
$r .= $nl if length($2) == $e;
|
|
}
|
|
else {
|
|
unshift(@_, $2)
|
|
}
|
|
}
|
|
}
|
|
return defined($r) ? $r : '';
|
|
}
|
|
|
|
1;
|