<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">package HTML::FormatText;

# ABSTRACT: Format HTML as plaintext


use 5.006_001;
use strict;
use warnings;

# We now use Smart::Comments in place of the old DEBUG framework.
# this should be commented out in release versions....
##use Smart::Comments;

use base 'HTML::Formatter';

our $VERSION = '2.12'; # VERSION
our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY

# ------------------------------------------------------------------------
sub default_values {
    (   shift-&gt;SUPER::default_values(),
        lm =&gt; 3,     # left margin
        rm =&gt; 72,    # right margin (actually, maximum text width)
    );
}

# ------------------------------------------------------------------------
sub configure {
    my ( $self, $hash ) = @_;

    my $lm = $self-&gt;{lm};
    my $rm = $self-&gt;{rm};

    $lm = delete $hash-&gt;{lm}          if exists $hash-&gt;{lm};
    $lm = delete $hash-&gt;{leftmargin}  if exists $hash-&gt;{leftmargin};
    $rm = delete $hash-&gt;{rm}          if exists $hash-&gt;{rm};
    $rm = delete $hash-&gt;{rightmargin} if exists $hash-&gt;{rightmargin};

    my $width = $rm - $lm;
    if ( $width &lt; 1 ) {
        warn "Bad margins, ignored" if $^W;
        return;
    }
    if ( $width &lt; 20 ) {
        warn "Page probably too narrow" if $^W;
    }

    for ( keys %$hash ) {
        warn "Unknown configure option '$_'" if $^W;
    }

    $self-&gt;{lm} = $lm;
    $self-&gt;{rm} = $rm;
    $self;
}

# ------------------------------------------------------------------------
sub begin {
    my $self = shift;

    $self-&gt;SUPER::begin;
    $self-&gt;{curpos} = 0;    # current output position.
    $self-&gt;{maxpos} = 0;    # highest value of $pos (used by header underliner)
    $self-&gt;{hspace} = 0;    # horizontal space pending flag
}

# ------------------------------------------------------------------------
sub end {
    shift-&gt;collect("\n");
}

# ------------------------------------------------------------------------
sub header_start {
    my ( $self, $level ) = @_;

    $self-&gt;vspace( 1 + ( 6 - $level ) * 0.4 );
    $self-&gt;{maxpos} = 0;
    1;
}

# ------------------------------------------------------------------------
sub header_end {
    my ( $self, $level ) = @_;

    if ( $level &lt;= 2 ) {
        my $line;
        $line = '=' if $level == 1;
        $line = '-' if $level == 2;
        $self-&gt;vspace(0);
        $self-&gt;out( $line x ( $self-&gt;{maxpos} - $self-&gt;{lm} ) );
    }
    $self-&gt;vspace(1);
    1;
}

# ------------------------------------------------------------------------
sub bullet {
    my $self = shift;

    $self-&gt;SUPER::bullet( $_[0] . ' ' );
}

# ------------------------------------------------------------------------
sub hr_start {
    my $self = shift;

    $self-&gt;vspace(1);
    $self-&gt;out( '-' x ( $self-&gt;{rm} - $self-&gt;{lm} ) );
    $self-&gt;vspace(1);
}

# ------------------------------------------------------------------------
sub pre_out {
    my $self = shift;

    # should really handle bold/italic etc.
    if ( defined $self-&gt;{vspace} ) {
        if ( $self-&gt;{out} ) {
            $self-&gt;nl() while $self-&gt;{vspace}-- &gt;= 0;
            $self-&gt;{vspace} = undef;
        }
    }
    my $indent = ' ' x $self-&gt;{lm};
    my $pre    = shift;
    $pre =~ s/^/$indent/mg;
    $self-&gt;collect($pre);
    $self-&gt;{out}++;
}

# ------------------------------------------------------------------------
sub out {
    my $self = shift;
    my $text = shift;

    $text =~ tr/\xA0\xAD/ /d;

    if ( $text =~ /^\s*$/ ) {
        $self-&gt;{hspace} = 1;
        return;
    }

    if ( defined $self-&gt;{vspace} ) {
        if ( $self-&gt;{out} ) {
            $self-&gt;nl while $self-&gt;{vspace}-- &gt;= 0;
        }
        $self-&gt;goto_lm;
        $self-&gt;{vspace} = undef;
        $self-&gt;{hspace} = 0;
    }

    if ( $self-&gt;{hspace} ) {
        if ( $self-&gt;{curpos} + length($text) &gt; $self-&gt;{rm} ) {

            # word will not fit on line; do a line break
            $self-&gt;nl;
            $self-&gt;goto_lm;
        }
        else {

            # word fits on line; use a space
            $self-&gt;collect(' ');
            ++$self-&gt;{curpos};
        }
        $self-&gt;{hspace} = 0;
    }

    $self-&gt;collect($text);
    my $pos = $self-&gt;{curpos} += length $text;
    $self-&gt;{maxpos} = $pos if $self-&gt;{maxpos} &lt; $pos;
    $self-&gt;{'out'}++;
}

# ------------------------------------------------------------------------
sub goto_lm {
    my $self = shift;

    my $pos = $self-&gt;{curpos};
    my $lm  = $self-&gt;{lm};
    if ( $pos &lt; $lm ) {
        $self-&gt;{curpos} = $lm;
        $self-&gt;collect( " " x ( $lm - $pos ) );
    }
}

# ------------------------------------------------------------------------
sub nl {
    my $self = shift;

    $self-&gt;{'out'}++;
    $self-&gt;{curpos} = 0;
    $self-&gt;collect("\n");
}

# ------------------------------------------------------------------------
sub adjust_lm {
    my $self = shift;

    $self-&gt;{lm} += $_[0];
    $self-&gt;goto_lm;
}

# ------------------------------------------------------------------------
sub adjust_rm {
    shift-&gt;{rm} += $_[0];
}


1;

__END__

=pod

=for test_synopsis 1;
__END__

=for stopwords latin1 leftmargin lm plaintext rightmargin rm CPAN homepage

=head1 NAME

HTML::FormatText - Format HTML as plaintext

=head1 VERSION

version 2.12

=head1 SYNOPSIS

    use HTML::TreeBuilder;
    $tree = HTML::TreeBuilder-&gt;new-&gt;parse_file("test.html");

    use HTML::FormatText;
    $formatter = HTML::FormatText-&gt;new(leftmargin =&gt; 0, rightmargin =&gt; 50);
    print $formatter-&gt;format($tree);

or, more simply:

    use HTML::FormatText;
    my $string = HTML::FormatText-&gt;format_file(
        'test.html',
        leftmargin =&gt; 0, rightmargin =&gt; 50
        );

=head1 DESCRIPTION

HTML::FormatText is a formatter that outputs plain latin1 text. All character
attributes (bold/italic/underline) are ignored. Formatting of HTML tables and
forms is not implemented.

HTML::FormatText is built on L&lt;HTML::Formatter&gt; and documentation for that
module applies to this - especially L&lt;HTML::Formatter/new&gt;,
L&lt;HTML::Formatter/format_file&gt; and L&lt;HTML::Formatter/format_string&gt;.

You might specify the following parameters when constructing the formatter:

=over 4

=item I&lt;leftmargin&gt; (alias I&lt;lm&gt;)

The column of the left margin. The default is 3.

=item I&lt;rightmargin&gt; (alias I&lt;rm&gt;)

The column of the right margin. The default is 72.

=back

=head1 SEE ALSO

L&lt;HTML::Formatter&gt;

=head1 INSTALLATION

See perlmodinstall for information and options on installing Perl modules.

=head1 BUGS AND LIMITATIONS

You can make new bug reports, and view existing ones, through the
web interface at L&lt;http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Format&gt;.

=head1 AVAILABILITY

The project homepage is L&lt;https://metacpan.org/release/HTML-Format&gt;.

The latest version of this module is available from the Comprehensive Perl
Archive Network (CPAN). Visit L&lt;http://www.perl.com/CPAN/&gt; to find a CPAN
site near you, or see L&lt;https://metacpan.org/module/HTML::Format/&gt;.

=head1 AUTHORS

=over 4

=item *

Nigel Metheringham &lt;nigelm@cpan.org&gt;

=item *

Sean M Burke &lt;sburke@cpan.org&gt;

=item *

Gisle Aas &lt;gisle@ActiveState.com&gt;

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
</pre></body></html>