truncate.pm


package truncate;
#
# This module truncates an htmlized bit of text
# and closes the tags on truncation.
#
# Written by Jackie Hamilton (kira@cgi101.com)

use HTML::Entities;

use HTML::Parser;
@ISA = qw(Exporter);
@EXPORT = qw(truncate);

# A list of the tags to allow.
my %ok_tags = ( i=>1, b=>1, a=>1 );

sub start {
# the start function is called by HTML::Parser at the opening of
# every html tag. aka <a> <img> <p> <br> etc
# tag is what's inside the <>, $text is the complete tag with <>'s.
#
   my ( $tag, $text ) = @_;
   if ($truncated) { return; }  # if we already truncated, end now

   unless (exists $ok_tags{$tag}) { return; }

   unless ($tag eq "p" or $tag eq "br" or $tag eq "hr" or $tag eq "li") {
     $open_tags{$tag}++;        # store a count of all open tags
   }
   $truncstr .= $text;          # append the tag onto the output text
}

sub end {
# just like start, except its called for closing tags like </a> </b> etc
   my($tag, $text) = @_;
   if ($truncated) { return; }
   unless (exists $ok_tags{$tag}) { return; }

   $open_tags{$tag}--;
   $truncstr .= $text;
}

sub mytext {
# called by HTML::Parser for any non-tag entities (aka plain text)
   my ($text) = @_;
   if ($truncated) { return; }
   my $strlen = length($text);

# some data uses non-standard characters, so we're going to use
# HTML::Entities to encode these into HTML for proper display
   $text = encode_entities($text);

   if (($strlen + $pos) > $maxlen) {    # it'll be too long. truncate it.
      $truncated = 1;
      my $diff = $maxlen - $pos;
      my $break = index($text, " ", $diff);
      $truncstr .= substr($text, 0, $break) . " . . . " . &closetags;
   } else {     # append the text in full if its not too long.
      $truncstr .= $text;
   }
   if ($text !~ /^\s*$/) {
      $pos += $strlen;
   }
}

sub truncate {
    my($text, $len) = @_;
    
    our %open_tags = ();         # store counters of all open tags       
    our $pos = 0;                # keep track of how long the non-tag text is
    our $maxlen = $len;          # need an "our" var for this to share
    our $truncstr = "";          # the output text
    our $truncated = 0;          # truncated already? true or false
    my $parser = HTML::Parser->new(api_version=>3,
        start_h => [\&start, "tagname,text"],
        end_h => [\&end, "tagname,text"],
        text_h => [\&mytext, "dtext"]);
    $parser->parse($text);
    $parser->eof;
    return $truncstr;
}

sub closetags {                 # close all of the open tags.
    my $closetags = "";
    foreach my $i (keys %open_tags) {
       if ($open_tags{$i} > 0) {
          $closetags .= qq(</$i>);
       }
    }
    return $closetags;
}

1;