truncate.pm
package truncate;
#
# This module truncates an htmlized bit of text
# and closes the tags on truncation.
#
# Written by Jackie Hamilton (kira@cgi101.com)
use HTML::Entities;
use HTML::Parser;
@ISA = qw(Exporter);
@EXPORT = qw(truncate);
# A list of the tags to allow.
my %ok_tags = ( i=>1, b=>1, a=>1 );
sub start {
# the start function is called by HTML::Parser at the opening of
# every html tag. aka <a> <img> <p> <br> etc
# tag is what's inside the <>, $text is the complete tag with <>'s.
#
my ( $tag, $text ) = @_;
if ($truncated) { return; } # if we already truncated, end now
unless (exists $ok_tags{$tag}) { return; }
unless ($tag eq "p" or $tag eq "br" or $tag eq "hr" or $tag eq "li") {
$open_tags{$tag}++; # store a count of all open tags
}
$truncstr .= $text; # append the tag onto the output text
}
sub end {
# just like start, except its called for closing tags like </a> </b> etc
my($tag, $text) = @_;
if ($truncated) { return; }
unless (exists $ok_tags{$tag}) { return; }
$open_tags{$tag}--;
$truncstr .= $text;
}
sub mytext {
# called by HTML::Parser for any non-tag entities (aka plain text)
my ($text) = @_;
if ($truncated) { return; }
my $strlen = length($text);
# some data uses non-standard characters, so we're going to use
# HTML::Entities to encode these into HTML for proper display
$text = encode_entities($text);
if (($strlen + $pos) > $maxlen) { # it'll be too long. truncate it.
$truncated = 1;
my $diff = $maxlen - $pos;
my $break = index($text, " ", $diff);
$truncstr .= substr($text, 0, $break) . " . . . " . &closetags;
} else { # append the text in full if its not too long.
$truncstr .= $text;
}
if ($text !~ /^\s*$/) {
$pos += $strlen;
}
}
sub truncate {
my($text, $len) = @_;
our %open_tags = (); # store counters of all open tags
our $pos = 0; # keep track of how long the non-tag text is
our $maxlen = $len; # need an "our" var for this to share
our $truncstr = ""; # the output text
our $truncated = 0; # truncated already? true or false
my $parser = HTML::Parser->new(api_version=>3,
start_h => [\&start, "tagname,text"],
end_h => [\&end, "tagname,text"],
text_h => [\&mytext, "dtext"]);
$parser->parse($text);
$parser->eof;
return $truncstr;
}
sub closetags { # close all of the open tags.
my $closetags = "";
foreach my $i (keys %open_tags) {
if ($open_tags{$i} > 0) {
$closetags .= qq(</$i>);
}
}
return $closetags;
}
1;