Hello community, here is the log from the commit of package perl-HTML-Tree checked in at Thu Dec 14 02:04:10 CET 2006. -------- --- perl-HTML-Tree/perl-HTML-Tree.changes 2006-08-10 11:03:20.000000000 +0200 +++ /mounts/work_src_done/STABLE/perl-HTML-Tree/perl-HTML-Tree.changes 2006-12-13 15:38:40.000000000 +0100 @@ -1,0 +2,15 @@ +Wed Dec 13 15:36:38 CET 2006 - kssingvo@suse.de + +- update to 3.23 + * fix for as_html was not proper, and broken behavior should never + be modified. + * HTML::Element::as_XML now only escapes five characters, instead + of escaping everything but alphanumerics and spaces. + * A string comparison was commented to use lc() on both sides, but + didn't. + * Added several new tests and enhanced others. + * Fixed description of HTML::Element::all_attr_names. + * Fixed example code in HTML::Element::push_content. + * Fixed description of HTML::Element::as_HTML. + +------------------------------------------------------------------- Old: ---- HTML-Tree-3.21.tar.bz2 New: ---- HTML-Tree-3.23.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ perl-HTML-Tree.spec ++++++ --- /var/tmp/diff_new_pack.SSOYYd/_old 2006-12-14 02:03:24.000000000 +0100 +++ /var/tmp/diff_new_pack.SSOYYd/_new 2006-12-14 02:03:24.000000000 +0100 @@ -1,5 +1,5 @@ # -# spec file for package perl-HTML-Tree (Version 3.21) +# spec file for package perl-HTML-Tree (Version 3.23) # # Copyright (c) 2006 SUSE LINUX Products GmbH, Nuernberg, Germany. # This file and all modifications and additions to the pristine @@ -17,9 +17,9 @@ Requires: perl = %{perl_version}, perl-HTML-Parser, perl-HTML-Tagset Autoreqprov: on Summary: Modules for representing, creating, and extracting information from HTML syntax trees -Version: 3.21 +Version: 3.23 Release: 1 -Source: http://search.cpan.org/CPAN/authors/id/P/PE/PETDANCE/HTML-Tree-%{version}.tar.bz2 +Source: http://search.cpan.org/CPAN/authors/id/P/PE/PETEK/HTML-Tree-%{version}.tar.bz2 URL: http://search.cpan.org/CPAN/authors/id/P/PE/PETEK/ BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -62,6 +62,18 @@ %doc README Changes %changelog -n perl-HTML-Tree +* Wed Dec 13 2006 - kssingvo@suse.de +- update to 3.23 + * fix for as_html was not proper, and broken behavior should never + be modified. + * HTML::Element::as_XML now only escapes five characters, instead + of escaping everything but alphanumerics and spaces. + * A string comparison was commented to use lc() on both sides, but + didn't. + * Added several new tests and enhanced others. + * Fixed description of HTML::Element::all_attr_names. + * Fixed example code in HTML::Element::push_content. + * Fixed description of HTML::Element::as_HTML. * Thu Aug 10 2006 - kssingvo@suse.de - update to 3.21 * Updated HTML::Parser requirement to 3.46 to fix a bug in ++++++ HTML-Tree-3.21.tar.bz2 -> HTML-Tree-3.23.tar.bz2 ++++++ diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/Changes new/HTML-Tree-3.23/Changes --- old/HTML-Tree-3.21/Changes 2006-08-07 02:10:50.000000000 +0200 +++ new/HTML-Tree-3.23/Changes 2006-11-12 18:11:05.000000000 +0100 @@ -1,5 +1,39 @@ Changelog for HTML-Tree +3.23 Sun Nov 12 11:09:31 CST 2006 + [THINGS THAT MAY BREAK YOUR CODE OR TESTS] + * Mark-Jason Dominus points out that the fix for as_html was not + proper, and broken behavior should never be codified. Fixed + as_html so an empty string doesn't encode entites, instead of + blaming the behavior on HTML::Entities. (RT 18571) + +3.22 Sat Nov 11 21:23:22 CST 2006 + [THINGS THAT MAY BREAK YOUR CODE OR TESTS] + * HTML::Element::as_XML now only escapes five characters, instead + of escaping everything but alphanumerics and spaces. This is + more in line with the XML spec, and will no longer escape wide + characters as two (or more) entities. Resolves RT 14260. Thanks + to Carl Franks and somewhere [at] confuzzled.lu for assistance. + + [FIXES] + * A string comparison was commented to use lc() on both sides, but + didn't. This caused HTML::Element::look_down to not properly find + elements in certain cases. Thanks to Andrew Suhachov. (RT 21114) + + [TESTS] + * Added several new tests and enhanced others. Thanks to Rocco + Caputo for t/attributes.t, and several others for providing + test cases in their RT bugs. + + [DOCUMENTATION] + * Fixed description of HTML::Element::all_attr_names. Thanks + to dsteinbrunner [at] pobox.com for catching it. + * Fixed example code in HTML::Element::push_content. Thanks + to dsteinbrunner [at] pobox.com for catching it. (RT 21293) + * Fixed description of HTML::Element::as_HTML. Thanks to + Mark-Jason Dominus for catching it. (RT 18569) + + 3.21 Sun Aug 6 19:10:00 CDT 2006 [FIXES] * Updated HTML::Parser requirement to 3.46 to fix a bug in diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/MANIFEST new/HTML-Tree-3.23/MANIFEST --- old/HTML-Tree-3.21/MANIFEST 2006-08-04 06:50:11.000000000 +0200 +++ new/HTML-Tree-3.23/MANIFEST 2006-11-12 18:18:35.000000000 +0100 @@ -1,10 +1,5 @@ Changes -MANIFEST -Makefile.PL -README -TODO htmltree - lib/HTML/AsSubs.pm lib/HTML/Element.pm lib/HTML/Element/traverse.pm @@ -14,19 +9,26 @@ lib/HTML/Tree/AboutTrees.pod lib/HTML/Tree/Scanning.pod lib/HTML/TreeBuilder.pm - +Makefile.PL +MANIFEST +README t/00system.t t/assubs.t +t/attributes.t +t/body.t t/building.t +t/children.t t/clonei.t t/construct_tree.t -t/construct_tree.t t/doctype.t -t/oldparse.html +t/escape.t t/oldparse.html t/oldparse.t -t/parsefile.t t/parse.t +t/parsefile.t t/pod.t t/split.t t/tag-rendering.t +t/unicode.t +TODO +META.yml Module meta-data (added by MakeMaker) diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/META.yml new/HTML-Tree-3.23/META.yml --- old/HTML-Tree-3.21/META.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/HTML-Tree-3.23/META.yml 2006-11-12 18:18:35.000000000 +0100 @@ -0,0 +1,12 @@ +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: HTML-Tree +version: 3.23 +version_from: lib/HTML/Tree.pm +installdirs: site +requires: + HTML::Parser: 3.46 + HTML::Tagset: 3.02 + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.30 diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/htmltree new/HTML-Tree-3.23/htmltree --- old/HTML-Tree-3.21/htmltree 2006-08-04 06:50:11.000000000 +0200 +++ new/HTML-Tree-3.23/htmltree 2006-08-08 00:19:07.000000000 +0200 @@ -1,5 +1,5 @@ #!/usr/local/bin/perl -# Time-stamp: "2000-10-02 14:48:15 MDT" sburke@cpan.org +# Time-stamp: "2000-10-02 14:48:15 MDT" # # Parse the given HTML file(s) and dump the parse tree # Usage: diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/lib/HTML/Element.pm new/HTML-Tree-3.23/lib/HTML/Element.pm --- old/HTML-Tree-3.21/lib/HTML/Element.pm 2006-08-07 01:47:54.000000000 +0200 +++ new/HTML-Tree-3.23/lib/HTML/Element.pm 2006-11-12 18:13:33.000000000 +0100 @@ -6,12 +6,12 @@ =head1 VERSION -Version 3.21 +Version 3.23 =cut use vars qw( $VERSION ); -$VERSION = '3.21'; +$VERSION = '3.23'; =head1 SYNOPSIS @@ -586,7 +586,7 @@ Like all_attr, but only returns the names of the attributes. -Example output of C<< $h->all_attr() >> : +Example output of C<< $h->all_attr_names() >> : C<'_parent', '_tag', 'lang', '_content', >. =cut @@ -715,8 +715,7 @@ $body->push_content( ['br'], ['ul', - map ['li', $_] - qw(Peaches Apples Pears Mangos) + map ['li', $_], qw(Peaches Apples Pears Mangos) ] ); @@ -1456,7 +1455,7 @@ the entities to encode. For compatibility with previous versions, specify C<'E<lt>E<gt>&'> here. If omitted or undef, I<all> unsafe characters are encoded as HTML entities. See L<HTML::Entities> for -details. +details. If passed an empty string, no entities are encoded. If $indent_char is specified and defined, the HTML to be output is intented, using the string you specify (which you probably should @@ -1480,8 +1479,6 @@ #my $indent_on = defined($indent) && length($indent); my @html = (); - undef($entities) unless defined($entities) and length($entities); - $omissible_map ||= \%HTML::Element::optionalEndTag; my $empty_element_map = $self->_empty_element_map; @@ -1550,11 +1547,15 @@ $last_tag_tightenable = 0; # I guess this is right HTML::Entities::encode_entities($node, $entities) # That does magic things if $entities is undef. - unless $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }; - # To keep from amp-escaping children of script et al. - # That doesn't deal with descendants; but then, CDATA - # parents shouldn't /have/ descendants other than a - # text children (or comments?) + unless ( + (defined($entities) && !length($entities)) + # If there's no entity to encode, don't call it + || $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} } + # To keep from amp-escaping children of script et al. + # That doesn't deal with descendants; but then, CDATA + # parents shouldn't /have/ descendants other than a + # text children (or comments?) + ); if($nonindentable_ancestors) { push @html, $node; # say no go } else { @@ -1597,11 +1598,15 @@ # simple text content HTML::Entities::encode_entities($node, $entities) # That does magic things if $entities is undef. - unless $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }; - # To keep from amp-escaping children of script et al. - # That doesn't deal with descendants; but then, CDATA - # parents shouldn't /have/ descendants other than a - # text children (or comments?) + unless ( + (defined($entities) && !length($entities)) + # If there's no entity to encode, don't call it + || $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} } + # To keep from amp-escaping children of script et al. + # That doesn't deal with descendants; but then, CDATA + # parents shouldn't /have/ descendants other than a + # text children (or comments?) + ); push(@html, $node); } 1; # keep traversing @@ -1720,9 +1725,19 @@ sub _xml_escape { # DESTRUCTIVE (a.k.a. "in-place") + # Five required escapes: http://www.w3.org/TR/2006/REC-xml11-20060816/#syntax + # We allow & if it's part of a valid escape already: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-references foreach my $x (@_) { - $x =~ s<([^\x20\x21\x23\x27-\x3b\x3d\x3F-\x5B\x5D-\x7E])> - <''.(ord($1)).';'>seg; + $x =~ s/( # Escape... + < | # Less than, or + > | # Greater than, or + ' | # Single quote, or + " | # Double quote, or + &(?! # An ampersand that isn't followed by... + (\#\d+; | # A hash mark, digits and semicolon, or + \#x[\da-f]+; | # A hash mark, "x", hex digits and semicolon, or + [A-Za-z0-9]+; )) # alphanums (not underscore, hence not \w) and a semicolon + )/''.ord($1).";"/sgex; # And replace them with their XML digit counterpart } return; } @@ -1827,7 +1842,8 @@ entities. See L<HTML::Entities> for details. If you specify some value for C<$entities>, remember to include the double-quote character in it. (Previous versions of this module would basically behave as if -C<'&"E<gt>'> were specified for C<$entities>.) +C<'&"E<gt>'> were specified for C<$entities>.) If C<$entities> is +an empty string, no entity is escaped. =cut @@ -1874,7 +1890,7 @@ $val = $val->{text}; } else { - HTML::Entities::encode_entities($val, $entities); + HTML::Entities::encode_entities($val, $entities) unless (defined($entities) && !length($entities)); } $val = qq{"$val"}; @@ -2753,7 +2769,7 @@ ? $val !~ $c->[1] : ( ref $val ne $c->[2] # have unequal ref values => fail - or lc($val) ne $c->[1] + or lc($val) ne lc($c->[1]) # have unequal lc string values => fail )) ) @@ -3825,6 +3841,8 @@ Original authors: Gisle Aas, Sean Burke and Andy Lester. +Thanks to Mark-Jason Dominus for a POD suggestion. + =cut 1; diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/lib/HTML/Tree.pm new/HTML-Tree-3.23/lib/HTML/Tree.pm --- old/HTML-Tree-3.21/lib/HTML/Tree.pm 2006-08-07 02:05:20.000000000 +0200 +++ new/HTML-Tree-3.23/lib/HTML/Tree.pm 2006-11-12 18:13:41.000000000 +0100 @@ -6,12 +6,15 @@ =head1 VERSION -Version 3.21 +3.23 =cut +# HTML::Tree is basically just a happy alias to HTML::TreeBuilder. +use HTML::TreeBuilder (); + use vars qw( $VERSION ); -$VERSION = '3.21'; +$VERSION = 3.23; =head1 SYNOPSIS @@ -30,9 +33,6 @@ =cut -# HTML::Tree is basically just a happy alias to HTML::TreeBuilder. -use HTML::TreeBuilder (); - sub new { shift; unshift @_, 'HTML::TreeBuilder'; goto &HTML::TreeBuilder::new; diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/lib/HTML/TreeBuilder.pm new/HTML-Tree-3.23/lib/HTML/TreeBuilder.pm --- old/HTML-Tree-3.21/lib/HTML/TreeBuilder.pm 2006-08-07 01:52:44.000000000 +0200 +++ new/HTML-Tree-3.23/lib/HTML/TreeBuilder.pm 2006-11-12 18:13:46.000000000 +0100 @@ -4,7 +4,7 @@ use integer; # vroom vroom! use Carp (); use vars qw(@ISA $VERSION $DEBUG); -$VERSION = '3.21'; +$VERSION = '3.23'; #--------------------------------------------------------------------------- # Make a 'DEBUG' constant... diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/t/attributes.t new/HTML-Tree-3.23/t/attributes.t --- old/HTML-Tree-3.21/t/attributes.t 1970-01-01 01:00:00.000000000 +0100 +++ new/HTML-Tree-3.23/t/attributes.t 2006-11-12 01:02:13.000000000 +0100 @@ -0,0 +1,28 @@ +#!/usr/bin/perl + +# HTML::TreeBuilder invokes HTML::Entities::decode on the contents of +# HREF attributes. Some CGI-based sites use lang=en or such for +# internationalization. When this parameter is after an ampersand, +# the resulting &lang is decoded, breaking the link. "sub" is another +# popular one. + +# Test provided by Rocco Caputo + +use warnings; +use strict; + +use Test::More tests => 1; +use HTML::TreeBuilder; + +my $tb = HTML::TreeBuilder->new(); +$tb->parse( + "<a href='http://wherever/moo.cgi?xyz=123&lang=en'>Test</a>" +); + +my @links = $tb->look_down( sub { $_[0]->tag eq "a" } ); +my $href = $links[0]->attr("href"); + +ok($href =~ /lang/, "href should contain 'lang' (is: $href)"); + +exit; + diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/t/body.t new/HTML-Tree-3.23/t/body.t --- old/HTML-Tree-3.21/t/body.t 1970-01-01 01:00:00.000000000 +0100 +++ new/HTML-Tree-3.23/t/body.t 2006-11-12 18:08:03.000000000 +0100 @@ -0,0 +1,82 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +use Test::More tests => 11; + +BEGIN { + use_ok('HTML::TreeBuilder'); +} + +EMPTY: { + my $root = HTML::TreeBuilder->new(); + $root->implicit_body_p_tag(1); + $root->xml_mode(1); + $root->parse(''); + $root->eof(); + + is($root->as_HTML(),"<html><head></head><body></body></html>\n"); +} + +BR_ONLY: { + my $root = HTML::TreeBuilder->new(); + $root->implicit_body_p_tag(1); + $root->xml_mode(1); + $root->parse('<br />'); + $root->eof(); + + is($root->as_HTML(),"<html><head></head><body><p><br /></body></html>\n"); +} + +TEXT_ONLY: { + my $root = HTML::TreeBuilder->new(); + $root->implicit_body_p_tag(1); + $root->xml_mode(1); + $root->parse('text'); + $root->eof(); + + is($root->as_HTML(),"<html><head></head><body><p>text</body></html>\n"); +} + +EMPTY_TABLE: { + my $root = HTML::TreeBuilder->new(); + $root->implicit_body_p_tag(1); + $root->xml_mode(1); + $root->parse('<table></table>'); + $root->eof(); + + is($root->as_HTML(),"<html><head></head><body><table></table></body></html>\n"); +} + +ESCAPES: { + my $root = HTML::TreeBuilder->new(); + my $escape = 'This ſoftware has ſome bugs'; + my $html = $root->parse($escape)->eof->elementify(); + TODO: { + local $TODO = 'HTML::Parser::parse mucks with our escapes'; + is($html->as_HTML(),"<html><head></head><body>$escape</body></html>\n"); + } +} + +OTHER_LANGUAGES: { + my $root = HTML::TreeBuilder->new(); + my $escape = 'Gebühr vor Ort von € 30,- pro Woche'; # RT 14212 + my $html = $root->parse($escape)->eof; + is($html->as_HTML(),"<html><head></head><body>Gebühr vor Ort von € 30,- pro Woche</body></html>\n"); +} + +RT_18570: { + my $root = HTML::TreeBuilder->new(); + my $escape = 'This ∼ is a twiddle'; + my $html = $root->parse($escape)->eof->elementify(); + is($html->as_HTML(),"<html><head></head><body>$escape</body></html>\n"); +} + +RT_18571: { + my $root = HTML::TreeBuilder->new(); + my $html = $root->parse('<b>$self->escape</b>')->eof->elementify(); + is($html->as_HTML(),"<html><head></head><body><b>\$self->escape</b></body></html>\n"); + is($html->as_HTML(''),"<html><head></head><body><b>\$self->escape</b></body></html>\n"); + is($html->as_HTML("\0"),"<html><head></head><body><b>\$self->escape</b></body></html>\n"); # 3.22 compatability +} diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/t/children.t new/HTML-Tree-3.23/t/children.t --- old/HTML-Tree-3.21/t/children.t 1970-01-01 01:00:00.000000000 +0100 +++ new/HTML-Tree-3.23/t/children.t 2006-11-12 03:26:06.000000000 +0100 @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +# RT 21114 test case. Thanks Andrew Suhachov for finding it. + +use warnings; +use strict; + +use Test::More tests=>4; + +BEGIN { + use_ok('HTML::TreeBuilder'); +} + +my $root = HTML::TreeBuilder->new(); +my $escape = '<table><tr><td>One</td><td>Two</td></tr><tr><td>Three</td><td>Four</td></tr></table>'; +my $html = $root->parse($escape)->eof; + +my $child = $root->look_down( _tag=>'tr', sub { my $tr = shift; $tr->look_down( _tag=>'td', _parent=>$tr) ? 1 : 0; } ); +isa_ok( $child, 'HTML::Element', "Child found"); + +my @children = $root->look_down( _tag=>'tr', sub { my $tr = shift; $tr->look_down( _tag=>'td', _parent=>$tr) ? 1 : 0; } ); +cmp_ok( scalar(@children), '==', '2', "2 total children found"); + +my $none = $root->look_down( _tag=>'tr', sub { 0 } ); +ok(!defined($none),'No children found'); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/t/escape.t new/HTML-Tree-3.23/t/escape.t --- old/HTML-Tree-3.21/t/escape.t 1970-01-01 01:00:00.000000000 +0100 +++ new/HTML-Tree-3.23/t/escape.t 2006-11-11 21:39:42.000000000 +0100 @@ -0,0 +1,57 @@ +#!/usr/bin/perl + +# Tests that the following translations take place, and none other: +# +# & => & (aka &) +# < => < (aka <) +# > => > (aka >) +# ' => ' (aka ') +# " => " (aka ") +# +# Further tests that already-escaped things are not further escaped. +# +# Escapes are defined in the XML spec: +# http://www.w3.org/TR/2006/REC-xml11-20060816/#dt-escape + + +BEGIN { + %translations = ( + 'x > 3' => 'x > 3', + 'x < 3' => 'x < 3', + '< 3 >' => '< 3 >', + "he's" => "he's", + "he’s" => "he’s", # MS "smart" quotes don't get escaped (single) + '"his"' => '"his"', + '‘his’' => '‘his’', # MS "smart" quotes don't get escaped (single) + '“his”' => '“his”', # MS "smart" quotes don't get escaped (double) + '1&2' => '1&2', + '1&2' => '1&2', + '1&2' => '1&2', + '1& 2' => '1& 2', + '1& 2' => '1& 2', + 'abc' => 'abc', + 'número' => 'número', + '⇓' => '⇓', + 'Œ' => 'Œ', + '²' => '²', + '&no_go;' => '&no_go;', + + 'This ſoftware has ſome bugs' => 'This ſoftware has ſome bugs', # RT 18568 + ); + + $tests = keys(%translations) + 1; +} + +use Test::More tests => $tests; + +BEGIN { + + use_ok('HTML::Element'); +} + +foreach my $orig (keys %translations) { + $new = $orig; + HTML::Element::_xml_escape($new); + is($new,$translations{$orig},"Properly escaped: $orig"); +} + diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/t/pod.t new/HTML-Tree-3.23/t/pod.t --- old/HTML-Tree-3.21/t/pod.t 2006-08-04 06:50:10.000000000 +0200 +++ new/HTML-Tree-3.23/t/pod.t 2006-08-07 02:29:40.000000000 +0200 @@ -1,6 +1,6 @@ -#!perl -Tw +#!perl -T + use Test::More; -use strict ; -eval "use Test::Pod 1.00"; -plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; +eval "use Test::Pod 1.14"; +plan skip_all => "Test::Pod 1.14 required for testing POD" if $@; all_pod_files_ok(); diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/HTML-Tree-3.21/t/unicode.t new/HTML-Tree-3.23/t/unicode.t --- old/HTML-Tree-3.21/t/unicode.t 1970-01-01 01:00:00.000000000 +0100 +++ new/HTML-Tree-3.23/t/unicode.t 2006-11-11 18:33:08.000000000 +0100 @@ -0,0 +1,115 @@ +#!perl -w +# -*-Perl-*- +# Time-stamp: "2003-09-15 01:45:14 ADT" + +use strict; +use Test::More; +my $DEBUG = 2; + +BEGIN { + # Make sure we've got Unicode support: + eval "use v5.8.0; utf8::is_utf8('x');"; + if ($@) { + plan skip_all => "Perl 5.8.0 or newer required for Unicode tests"; + exit; + } + + plan tests => 11; + binmode STDOUT, ":utf8"; +} # end BEGIN + +use Encode; +use HTML::TreeBuilder; + +print "#Using Encode version v", $Encode::VERSION || "?", "\n"; +print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n"; +print "#Using HTML::Element version v$HTML::Element::VERSION\n"; +print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; +print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; +print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; +print "# Running under perl version $] for $^O", + (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; +print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n" + if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); +print "# MacPerl verison $MacPerl::Version\n" + if defined $MacPerl::Version; +printf + "# Current time local: %s\n# Current time GMT: %s\n", + scalar(localtime($^T)), scalar(gmtime($^T)); + +ok 1; + +ok same('<p> </p>', decode('latin1', "<p>\xA0</p>")); + +ok !same('<p></p>', decode('latin1', "<p>\xA0</p>"), 1); +ok !same('<p> </p>', decode('latin1', "<p>\xA0</p>"), 1); + +ok same('<p> </p>', decode('latin1', "<p>\xA0\xA0\xA0</p>")); +ok same("<p>\xA0\xA0\xA0</p>", decode('latin1', "<p>\xA0\xA0\xA0</p>")); + +ok !same('<p></p>', decode('latin1', "<p>\xA0\xA0\xA0</p>"), 1); +ok !same('<p> </p>', decode('latin1', "<p>\xA0\xA0\xA0</p>"), 1); + +ok same('<p> — </p>', + "<p>\xA0\xA0\x{2014}\xA0\xA0</p>"); + +ok same('<p> XXmdashXX </p>', + "<p>\xA0\xA0\x{2014}\xA0\xA0</p>", + 0, sub { $_[0] =~ s/XXmdashXX/\x{2014}/ }); + +ok same('<p> <b>bold</b> </p>', + decode('latin1', "<p>\xA0<b>bold</b>\xA0\xA0</p>")); + +sub same { + my($code1, $code2, $flip, $fixup) = @_; + my $t1 = HTML::TreeBuilder->new; + my $t2 = HTML::TreeBuilder->new; + + if(ref $code1) { $t1->implicit_tags(0); $code1 = $$code1 } + if(ref $code2) { $t2->implicit_tags(0); $code2 = $$code2 } + + $t1->parse($code1); $t1->eof; + $t2->parse($code2); $t2->eof; + + my $out1 = $t1->as_XML; + my $out2 = $t2->as_XML; + + $fixup->($out1, $out2) if $fixup; + + my $rv = ($out1 eq $out2); + + #print $rv? "RV TRUE\n" : "RV FALSE\n"; + #print $flip? "FLIP TRUE\n" : "FLIP FALSE\n"; + + if($flip ? (!$rv) : $rv) { + if($DEBUG > 2) { + print + "In1 $code1\n", + "In2 $code2\n", + "Out1 $out1\n", + "Out2 $out2\n", + "\n\n"; + } + } else { + local $_; + foreach my $line ( + '', + "The following failure is at " . join(' : ' ,caller), + "Explanation of failure: " . ($flip ? 'same' : 'different') + . " parse trees!", + sprintf("Input code 1 (utf8=%d):", utf8::is_utf8($code1)), $code1, + sprintf("Input code 2 (utf8=%d):", utf8::is_utf8($code2)), $code2, + "Output tree (as XML) 1:", $out1, + "Output tree (as XML) 2:", $out2, + ) { + $_ = $line; + s/\n/\n# /g; + print "# $_\n"; + } + } + + $t1->delete; + $t2->delete; + + return $rv; +} # end same ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Remember to have fun... --------------------------------------------------------------------- To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org For additional commands, e-mail: opensuse-commit+help@opensuse.org