Hello community,
here is the log from the commit of package perl-XML-Twig for openSUSE:Factory
checked in at Mon Mar 7 15:09:29 CET 2011.
--------
--- perl-XML-Twig/perl-XML-Twig.changes 2010-12-16 13:56:09.000000000 +0100
+++ /mounts/work_src_done/STABLE/perl-XML-Twig/perl-XML-Twig.changes 2011-02-28 00:55:32.000000000 +0100
@@ -1,0 +2,15 @@
+Sun Feb 27 23:43:46 UTC 2011 - pascal.bleser@opensuse.org
+
+- update to 3.38:
+ * fixed: RT#65865: _ should be allowed at the start on an XML name
+ * removed: making att and class lvalues created problems: in certain context
+ they made regular calls to the method create empty attributes. I
+ could find no satisfactory fix,they were either incompletes, or to complex
+ for often used methods. So att and class are back to being regular, non
+ l-value methods. latt and lclass are the l-value versions.
+ * added: documented the -html option for xml_grep, that allows processing
+ HTML input
+ * added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert HTML
+ to XML
+
+-------------------------------------------------------------------
calling whatdependson for head-i586
Old:
----
XML-Twig-3.37.tar.bz2
New:
----
XML-Twig-3.38.tar.bz2
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ perl-XML-Twig.spec ++++++
--- /var/tmp/diff_new_pack.cYmafR/_old 2011-03-07 15:07:45.000000000 +0100
+++ /var/tmp/diff_new_pack.cYmafR/_new 2011-03-07 15:07:45.000000000 +0100
@@ -1,7 +1,7 @@
#
-# spec file for package perl-XML-Twig (Version 3.37 )
+# spec file for package perl-XML-Twig
#
-# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany.
+# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
Name: perl-XML-Twig
-Version: 3.37
+Version: 3.38
Release: 1
Requires: perl-XML-Parser
BuildRequires: perl-XML-Parser
@@ -27,8 +27,9 @@
AutoReqProv: on
Group: Development/Libraries/Perl
License: Artistic
-Url: http://cpan.org/modules/by-module/XML/
-Summary: Tree interface to XML documents allowing processing chunk by chunk of huge documents
+Url: http://search.cpan.org/dist/XML::Twig
+Summary: Tree interface to XML documents
+# http://search.cpan.org/CPAN/authors/id/M/MI/MIROD/XML-Twig-%{version}.tar.gz
Source: XML-Twig-%{version}.tar.bz2
BuildRoot: %{_tmppath}/%{name}-%{version}-build
%{perl_requires}
@@ -61,25 +62,31 @@
%setup -q -n XML-Twig-%{version}
%build
-perl Makefile.PL
-make
+%__perl Makefile.PL
+%__make
%check
-make test
+%__make test
%install
%perl_make_install
%perl_process_packlist
%clean
-rm -rf $RPM_BUILD_ROOT
+%{?buildroot:%__rm -rf "%{buildroot}"}
%files
%defattr(-,root,root)
%doc Changes README
-%doc %{_mandir}/man?/*
-%{perl_vendorlib}/XML
-%{perl_vendorarch}/auto/XML
-%{_bindir}/xml_*
+%dir %{perl_vendorlib}/XML
+%{perl_vendorlib}/XML/Twig.pm
+%{perl_vendorlib}/XML/Twig
+%{_bindir}/xml_grep
+%{_bindir}/xml_merge
+%{_bindir}/xml_pp
+%{_bindir}/xml_spellcheck
+%{_bindir}/xml_split
+%doc %{_mandir}/man1/xml_*.1%{ext_man}
+%doc %{_mandir}/man3/XML::Twig.%{perl_man3ext}%{ext_man}
%changelog
++++++ XML-Twig-3.37.tar.bz2 -> XML-Twig-3.38.tar.bz2 ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/Changes new/XML-Twig-3.38/Changes
--- old/XML-Twig-3.37/Changes 2010-10-09 07:45:32.000000000 +0200
+++ new/XML-Twig-3.38/Changes 2011-02-27 07:40:45.000000000 +0100
@@ -1,11 +1,38 @@
$Id: /xmltwig/trunk/Changes 33 2008-04-30T08:03:41.004487Z mrodrigu $
CHANGES
+version 3.38
+date: 2011-07-27
+# minor maintenance release
+fixed: RT 65865: _ should be allowed at the start on an XML name
+ https://rt.cpan.org/Ticket/Display.html?id=65865
+ reported by Steve Prokopowich
+removed: making att and class lvalues created problems: in certain
+ context they made regular calls to the method create empty
+� attributes. I could find no satisfactory fix,they were either
+ incompletes, or to complex for often used methods. So att and
+ class are back to being regular, non l-value methods.
+ latt and lclass are the l-value versions.
+added: documented the -html option for xml_grep, that allows processing
+ HTML input
+added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert
+ HTML to XML
+
version 3.37
date: 2010-10-08
# minor maintenance release
fixed: more tests fixed for HTML::TreeBuilder, hopefully
will pass now
+removed: making att and class lvalues created problems: in certain
+ context they made regular calls to the method create empty
+� attributes. I could find no satisfactory fix,they were either
+ incompletes, or to complex for often used methods. So att and
+ class are back to being regular, non l-value methods.
+ latt and lclass are the l-value versions.
+added: documented the -html option for xml_grep, that allows processing
+ HTML input
+added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert
+ HTML to XML
version 3.36
date: 2010-10-07
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/META.yml new/XML-Twig-3.38/META.yml
--- old/XML-Twig-3.37/META.yml 2010-10-10 07:52:02.000000000 +0200
+++ new/XML-Twig-3.38/META.yml 2011-02-27 07:42:00.000000000 +0100
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: XML-Twig
-version: 3.37
+version: 3.38
abstract: XML, The Perl Way
author:
- Michel Rodriguez
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/Twig.pm new/XML-Twig-3.38/Twig.pm
--- old/XML-Twig-3.37/Twig.pm 2010-10-10 07:52:01.000000000 +0200
+++ new/XML-Twig-3.38/Twig.pm 2011-02-27 07:41:59.000000000 +0100
@@ -38,16 +38,16 @@
# xml name (leading # allowed)
# first line is for perl 5.005, second line for modern perl, that accept character classes
-my $REG_NAME = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters
- $REG_NAME = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5
+my $REG_NAME = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters
+ $REG_NAME = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5
# name or wildcard (* or '') (leading # allowed)
-my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters
- $REG_NAME_W = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5
+my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters
+ $REG_NAME_W = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5
# name or wildcard (* or '') (leading # allowed) with optional class
-my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters
- $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5
+my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters
+ $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5
my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp
@@ -97,7 +97,7 @@
BEGIN
{
-$VERSION = '3.37';
+$VERSION = '3.38';
use XML::Parser;
my $needVersion = '2.23';
@@ -783,6 +783,13 @@
return $@ ? $t->_reset_twig_after_error : $t;
}
+sub parseurl_html
+ { my $t= shift;
+ _use( 'LWP::Simple') or croak "missing LWP::Simple";
+ $t->parse_html( LWP::Simple::get( shift()), @_);
+ }
+
+
# uses eval to catch the parser's death
sub safe_parse_html
{ my $t= shift;
@@ -847,7 +854,7 @@
if( _is_well_formed_xml( $_[0]))
{ $t->parse( @_) }
else
- { my $html= _html2xml( $_[0]);
+ { my $html= $t->{use_tidy} ? _tidy_html( $_[0]) : _html2xml( $_[0]);
if( _is_well_formed_xml( $html))
{ $t->parse( $html); }
else
@@ -913,7 +920,7 @@
wrap => 0,
break_before_br => 0,
};
-
+ $options ||= {};
my $tidy_options= { %$TIDY_DEFAULTS, %$options};
my $tidy = HTML::Tidy->new( $tidy_options);
$tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean
@@ -1366,7 +1373,7 @@
sub _set_regexp_handler
{ my( $handlers, $path, $handler, $prev_handler)= @_;
# if the expression was a regexp it is now a string (it was stringified when it became a hash key)
- if( $path=~ m{^\(\?([xism]*)(?:-[xism]*)?:(.*)\)$})
+ if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$})
{ my $regexp= qr/(?$1:$2)/; # convert it back into a regexp
my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{_tag} =~ $regexp ) };
my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub,
@@ -5522,9 +5529,12 @@
}
# methods dealing with the class attribute, convenient if you work with xhtml
-sub class
+sub class { $_[0]->{att}->{class}; }
+# lvalue version of class. separate from class to avoid problem like RT#
+sub lclass
:lvalue # > perl 5.5
-{ my( $elt)= @_; $elt->{'att'}->{'class'}; }
+ { $_[0]->{att}->{class}; }
+
sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); }
# adds a class to an element
@@ -5611,9 +5621,12 @@
return $elt;
}
-sub att
+sub att { $_[0]->{att}->{$_[1]}; }
+# lvalue version of att. separate from class to avoid problem like RT#
+sub latt
:lvalue # > perl 5.5
-{ $_[0]->{att}->{$_[1]}; }
+ { $_[0]->{att}->{$_[1]}; }
+
sub del_att
{ my $elt= shift;
while( @_) { delete $elt->{'att'}->{shift()}; }
@@ -7327,22 +7340,25 @@
my $replacement_string;
my $is_string= _is_string( $replace);
foreach my $text_elt ($elt->descendants_or_self( $TEXT))
- { if( $is_string)
+ {
+ if( $is_string)
{ my $text= $text_elt->text;
$text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx;
$text_elt->set_text( $text);
}
else
- {
+ {
no utf8; # = perl 5.6
my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace));
my $text= $text_elt->text;
my $pos=0; # used to skip text that was previously matched
+ my $found_hit;
while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg))
- { my $match_start = length( $pre_match_string);
- my $match = $text_elt->split_at( $match_start + $pos);
+ { $found_hit=1;
+ my $match_start = length( $pre_match_string);
+ my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt;
my $match_length = length( $match_string);
- my $post_match = $match->split_at( $match_length);
+ my $post_match = $match->split_at( $match_length);
$replace_sub->( $match, @var);
# merge previous text with current one
my $next_sibling;
@@ -7357,21 +7373,12 @@
# go to next
$text_elt= $post_match;
$text= $post_match->text;
- # merge last text element with next one if needed,
- # the match will be against the non-matched text,
- # so $pos is used to skip the merged part
- my $prev_sibling;
- if( ($prev_sibling= $post_match->{prev_sibling})
- && ($XML::Twig::index2gi[$post_match->{'gi'}] eq $XML::Twig::index2gi[$prev_sibling->{'gi'}])
- )
- { $pos= length( $prev_sibling->text);
- $prev_sibling->merge_text( $post_match);
- }
# if the match is at the end of the text an empty #PCDATA is left: remove it
if( !$text_elt->text) { $text_elt->delete; }
}
+ if( $found_hit) { $text_elt->normalize; } # in case consecutive #PCDATA have been created
}
}
@@ -7392,22 +7399,22 @@
sub _install_replace_sub
{ my $replace_exp= shift;
my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
- my $sub= q{ my( $match, @var)= @_; unshift @var, undef; my $new; };
+ my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
my( $gi, $exp);
foreach my $item (@item)
- { if( $item=~ m{^&elt\s*\(([^)]*)\)})
- { $exp= $1;
- }
+ { next if ! length $item;
+ if( $item=~ m{^&elt\s*\(([^)]*)\)})
+ { $exp= $1; }
elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
{ $exp= " '#ENT' => $1"; }
else
{ $exp= qq{ '#PCDATA' => "$item"}; }
- $exp=~ s{\$(\d)}{\$var[$1]}g; # replace references to matches
+ $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches
$sub.= qq{ \$new= \$match->new( $exp); };
- $sub .= q{ $new->paste( before => $match); };
+ $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;};
}
$sub .= q{ $match->delete; };
- #$sub=~ s/;/;\n/g;
+ #$sub=~ s/;/;\n/g; warn "subs: $sub";
my $coderef= eval "sub { $NO_WARNINGS; $sub }";
if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); }
return $coderef;
@@ -8440,6 +8447,7 @@
} # end of block containing package globals ($pretty_print, $quotes, keep_encoding...)
+# merges consecutive #PCDATAs in am element
sub normalize
{ my( $elt)= @_;
my @descendants= $elt->descendants( $PCDATA);
@@ -10486,13 +10494,15 @@
get get an extra CDATA section inside ( <!-- foo --> becomes
<!-- <![CDATA[ foo ]]> -->
-=item parsefile_html
+=item parsefile_html ($file)
parse an HTML file (by converting it to XML using HTML::TreeBuilder, which
-needs to be available). The file is loaded completely in memory and converted
-to XML before being parsed.
+needs to be available, or HTML::Tidy if the C option was used).
+The file is loaded completely in memory and converted to XML before being parsed.
+
+=item parseurl_html ($url $optional_user_agent)
-B<Alpha>: implementation, and thus generated XML could change.
+parse an URL as html the same way C> does
=item safe_parseurl_html ($url $optional_user_agent)
@@ -11546,7 +11556,11 @@
Return the value of attribute C<$att> or C<undef>
-this method is an lvalue, so you can do C<< $elt->{'att'}->{'foo'}= 'bar' >>
+=item latt ($att)
+
+Return the value of attribute C<$att> or C<undef>
+
+this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or C<< $elt->latt( 'foo')++; >>
=item set_att ($att, $att_value)
@@ -12665,7 +12679,10 @@
attribute are quite convenient when dealing with XHTML, or plain XML that
will eventually be displayed using CSS)
-this method is an lvalue, so you can do C<< $elt->class= "foo" >>
+=item lclass
+
+same as class, except that
+this method is an lvalue, so you can do C<< $elt->lclass= "foo" >>
=item set_class ($class)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/Twig_pm.slow new/XML-Twig-3.38/Twig_pm.slow
--- old/XML-Twig-3.37/Twig_pm.slow 2010-10-09 18:17:09.000000000 +0200
+++ new/XML-Twig-3.38/Twig_pm.slow 2011-02-26 09:00:28.000000000 +0100
@@ -38,16 +38,16 @@
# xml name (leading # allowed)
# first line is for perl 5.005, second line for modern perl, that accept character classes
-my $REG_NAME = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters
- $REG_NAME = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5
+my $REG_NAME = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters
+ $REG_NAME = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5
# name or wildcard (* or '') (leading # allowed)
-my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters
- $REG_NAME_W = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5
+my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters
+ $REG_NAME_W = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5
# name or wildcard (* or '') (leading # allowed) with optional class
-my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters
- $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5
+my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters
+ $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5
my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp
@@ -97,7 +97,7 @@
BEGIN
{
-$VERSION = '3.37';
+$VERSION = '3.38';
use XML::Parser;
my $needVersion = '2.23';
@@ -783,6 +783,13 @@
return $@ ? $t->_reset_twig_after_error : $t;
}
+sub parseurl_html
+ { my $t= shift;
+ _use( 'LWP::Simple') or croak "missing LWP::Simple";
+ $t->parse_html( LWP::Simple::get( shift()), @_);
+ }
+
+
# uses eval to catch the parser's death
sub safe_parse_html
{ my $t= shift;
@@ -847,7 +854,7 @@
if( _is_well_formed_xml( $_[0]))
{ $t->parse( @_) }
else
- { my $html= _html2xml( $_[0]);
+ { my $html= $t->{use_tidy} ? _tidy_html( $_[0]) : _html2xml( $_[0]);
if( _is_well_formed_xml( $html))
{ $t->parse( $html); }
else
@@ -913,7 +920,7 @@
wrap => 0,
break_before_br => 0,
};
-
+ $options ||= {};
my $tidy_options= { %$TIDY_DEFAULTS, %$options};
my $tidy = HTML::Tidy->new( $tidy_options);
$tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean
@@ -1366,7 +1373,7 @@
sub _set_regexp_handler
{ my( $handlers, $path, $handler, $prev_handler)= @_;
# if the expression was a regexp it is now a string (it was stringified when it became a hash key)
- if( $path=~ m{^\(\?([xism]*)(?:-[xism]*)?:(.*)\)$})
+ if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$})
{ my $regexp= qr/(?$1:$2)/; # convert it back into a regexp
my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{_tag} =~ $regexp ) };
my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub,
@@ -5523,9 +5530,12 @@
}
# methods dealing with the class attribute, convenient if you work with xhtml
-sub class
+sub class { $_[0]->{att}->{class}; }
+# lvalue version of class. separate from class to avoid problem like RT#
+sub lclass
:lvalue # > perl 5.5
-{ my( $elt)= @_; $elt->att( 'class'); }
+ { $_[0]->{att}->{class}; }
+
sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); }
# adds a class to an element
@@ -5612,9 +5622,12 @@
return $elt;
}
-sub att
+sub att { $_[0]->{att}->{$_[1]}; }
+# lvalue version of att. separate from class to avoid problem like RT#
+sub latt
:lvalue # > perl 5.5
-{ $_[0]->{att}->{$_[1]}; }
+ { $_[0]->{att}->{$_[1]}; }
+
sub del_att
{ my $elt= shift;
while( @_) { delete $elt->{'att'}->{shift()}; }
@@ -7328,22 +7341,25 @@
my $replacement_string;
my $is_string= _is_string( $replace);
foreach my $text_elt ($elt->descendants_or_self( $TEXT))
- { if( $is_string)
+ {
+ if( $is_string)
{ my $text= $text_elt->text;
$text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx;
$text_elt->set_text( $text);
}
else
- {
+ {
no utf8; # = perl 5.6
my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace));
my $text= $text_elt->text;
my $pos=0; # used to skip text that was previously matched
+ my $found_hit;
while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg))
- { my $match_start = length( $pre_match_string);
- my $match = $text_elt->split_at( $match_start + $pos);
+ { $found_hit=1;
+ my $match_start = length( $pre_match_string);
+ my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt;
my $match_length = length( $match_string);
- my $post_match = $match->split_at( $match_length);
+ my $post_match = $match->split_at( $match_length);
$replace_sub->( $match, @var);
# merge previous text with current one
my $next_sibling;
@@ -7358,21 +7374,12 @@
# go to next
$text_elt= $post_match;
$text= $post_match->text;
- # merge last text element with next one if needed,
- # the match will be against the non-matched text,
- # so $pos is used to skip the merged part
- my $prev_sibling;
- if( ($prev_sibling= $post_match->_prev_sibling)
- && ($post_match->gi eq $prev_sibling->gi)
- )
- { $pos= length( $prev_sibling->text);
- $prev_sibling->merge_text( $post_match);
- }
# if the match is at the end of the text an empty #PCDATA is left: remove it
if( !$text_elt->text) { $text_elt->delete; }
}
+ if( $found_hit) { $text_elt->normalize; } # in case consecutive #PCDATA have been created
}
}
@@ -7393,22 +7400,22 @@
sub _install_replace_sub
{ my $replace_exp= shift;
my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
- my $sub= q{ my( $match, @var)= @_; unshift @var, undef; my $new; };
+ my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
my( $gi, $exp);
foreach my $item (@item)
- { if( $item=~ m{^&elt\s*\(([^)]*)\)})
- { $exp= $1;
- }
+ { next if ! length $item;
+ if( $item=~ m{^&elt\s*\(([^)]*)\)})
+ { $exp= $1; }
elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
{ $exp= " '#ENT' => $1"; }
else
{ $exp= qq{ '#PCDATA' => "$item"}; }
- $exp=~ s{\$(\d)}{\$var[$1]}g; # replace references to matches
+ $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches
$sub.= qq{ \$new= \$match->new( $exp); };
- $sub .= q{ $new->paste( before => $match); };
+ $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;};
}
$sub .= q{ $match->delete; };
- #$sub=~ s/;/;\n/g;
+ #$sub=~ s/;/;\n/g; warn "subs: $sub";
my $coderef= eval "sub { $NO_WARNINGS; $sub }";
if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); }
return $coderef;
@@ -8442,6 +8449,7 @@
} # end of block containing package globals ($pretty_print, $quotes, keep_encoding...)
+# merges consecutive #PCDATAs in am element
sub normalize
{ my( $elt)= @_;
my @descendants= $elt->descendants( $PCDATA);
@@ -10488,13 +10496,15 @@
get get an extra CDATA section inside ( <!-- foo --> becomes
<!-- <![CDATA[ foo ]]> -->
-=item parsefile_html
+=item parsefile_html ($file)
parse an HTML file (by converting it to XML using HTML::TreeBuilder, which
-needs to be available). The file is loaded completely in memory and converted
-to XML before being parsed.
+needs to be available, or HTML::Tidy if the C option was used).
+The file is loaded completely in memory and converted to XML before being parsed.
+
+=item parseurl_html ($url $optional_user_agent)
-B<Alpha>: implementation, and thus generated XML could change.
+parse an URL as html the same way C> does
=item safe_parseurl_html ($url $optional_user_agent)
@@ -11548,7 +11558,11 @@
Return the value of attribute C<$att> or C<undef>
-this method is an lvalue, so you can do C<< $elt->att( 'foo')= 'bar' >>
+=item latt ($att)
+
+Return the value of attribute C<$att> or C<undef>
+
+this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or C<< $elt->latt( 'foo')++; >>
=item set_att ($att, $att_value)
@@ -12667,7 +12681,10 @@
attribute are quite convenient when dealing with XHTML, or plain XML that
will eventually be displayed using CSS)
-this method is an lvalue, so you can do C<< $elt->class= "foo" >>
+=item lclass
+
+same as class, except that
+this method is an lvalue, so you can do C<< $elt->lclass= "foo" >>
=item set_class ($class)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/t/test_3_36.t new/XML-Twig-3.38/t/test_3_36.t
--- old/XML-Twig-3.37/t/test_3_36.t 2010-10-09 09:39:36.000000000 +0200
+++ new/XML-Twig-3.38/t/test_3_36.t 2011-02-25 11:28:06.000000000 +0100
@@ -12,7 +12,7 @@
use XML::Twig;
-my $TMAX=66;
+my $TMAX=67;
print "1..$TMAX\n";
{ my $doc=q{<d><s id="s1"><t>title 1</t><s id="s2"><t>title 2</t></s><s id="s3"></s></s><s id="s4"></s></d>};
@@ -84,19 +84,22 @@
{ if( $] >= 5.006)
{ my $t= XML::Twig->parse( q{<d><e/></d>});
- $t->first_elt( 'e')->att( 'a')= 'b';
+ $t->first_elt( 'e')->latt( 'a')= 'b';
is( $t->sprint, q{<d><e a="b"/></d>}, 'lvalued attribute (no attributes)');
- $t->first_elt( 'e')->att( 'c')= 'd';
+ $t->first_elt( 'e')->latt( 'c')= 'd';
is( $t->sprint, q{<d><e a="b" c="d"/></d>}, 'lvalued attribute (attributes)');
- $t->first_elt( 'e')->att( 'c')= '';
+ $t->first_elt( 'e')->latt( 'c')= '';
is( $t->sprint, q{<d><e a="b" c=""/></d>}, 'lvalued attribute (modifying existing attributes)');
- $t->root->class= 'foo';
+ $t->root->lclass= 'foo';
is( $t->sprint, q{<d class="foo"><e a="b" c=""/></d>}, 'lvalued class (new class)');
- $t->root->class=~ s{fo}{tot};
+ $t->root->lclass=~ s{fo}{tot};
is( $t->sprint, q{<d class="toto"><e a="b" c=""/></d>}, 'lvalued class (modify class)');
+ $t= XML::Twig->parse( '<d a="1"/>');
+ $t->root->latt( 'a')++;
+ is( $t->sprint, '<d a="2"/>', '++ on attribute');
}
else
- { skip( 5 => "cannot use lvalued attributes with perl $]"); }
+ { skip( 6 => "cannot use lvalued attributes with perl $]"); }
}
# used for all HTML parsing tests with HTML::Tidy
@@ -112,7 +115,6 @@
}
else
{
-
my $doc= '<html><head><title>a title</title></head><body>par 1<p>par 2<br>after the break</body></html>';
my $t= XML::Twig->new( use_tidy => 1)->parse_html( $doc);
my $inner= '<ul><li>foo</li><li>bar</li></ul>';
@@ -240,7 +242,7 @@
{ if( XML::Twig::_use( 'HTML::TreeBuilder'))
{ my $html_with_Amp= XML::Twig->new->parse_html( '<html><head></head><body>&Amp;</body></html>')->sprint;
- if( $HTML::TreeBuilder::VERSION < 4.00)
+ if( $HTML::TreeBuilder::VERSION <= 3.23)
{ is( $html_with_Amp, '<html><head></head><body>&</body></html>', '&Amp; used in html (fixed HTB < 4.00)'); }
else
{ is( $html_with_Amp, '<html><head></head><body>&Amp;</body></html>', '&Amp; used in html (NOT fixed HTB > r.00)'); }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/t/tools.pm new/XML-Twig-3.38/t/tools.pm
--- old/XML-Twig-3.37/t/tools.pm 2010-09-21 19:43:40.000000000 +0200
+++ new/XML-Twig-3.38/t/tools.pm 2011-02-25 11:00:45.000000000 +0100
@@ -392,6 +392,13 @@
return $perl;
}
+# scrubs xhtml generated by tools from likely to change bits
+sub scrub_xhtml
+ { my( $html)= @_;
+ $html=~ s{<!DOCTYPE[^>]*>}{}; # scrub doctype
+ $html=~ s{\s*xmlns="[^"]*"}{}; # scrup namespace declaration
+ return $html;
+ }
__END__
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/tools/xml_grep/xml_grep new/XML-Twig-3.38/tools/xml_grep/xml_grep
--- old/XML-Twig-3.37/tools/xml_grep/xml_grep 2010-09-20 18:51:37.000000000 +0200
+++ new/XML-Twig-3.38/tools/xml_grep/xml_grep 2011-02-26 08:51:37.000000000 +0100
@@ -5,14 +5,15 @@
use Pod::Usage;
use XML::Twig;
-my $VERSION="0.8";
+my $VERSION="0.9";
# options (all used globally in the script)
my( $help, $man, @roots, @paths, $files,
$count, $nb_results, $nb_results_per_file,
$encoding, @exclude,
$wrap, $nowrap, $descr, $group, $pretty_print, $version, $text_only, $date,
- $html, $verbose, $strict
+ $html, $tidy,
+ $verbose, $strict
);
# used to check if the wrapping tags need to be output
@@ -44,6 +45,7 @@
'date!' => \$date,
'strict' => \$strict,
'html' => \$html,
+ 'tidy' => \$tidy,
'verbose' => \$verbose,
) or pod2usage(2);
@@ -113,7 +115,7 @@
{ create_regular_handlers( \%options, \@roots, \@paths);
}
-
+if( $tidy) { $html= 1; $options{use_tidy}= 1; }
$options{pretty_print} = $pretty_print if( $pretty_print);
@@ -418,12 +420,20 @@
if the option is used but no style is given then 'C<indented>' is used
-short form for this arggument is B<-s>
+short form for this argument is B<-s>
=item B<--text_only>
Displays the text of the results, one by line.
+=item B<--html>
+
+Allow HTML input, files are converted using HTML::TreeBuilder
+
+=item B<--Tidy>
+
+Allow HTML input, files are converted using HTML::Tidy
+
=back
=head2 Condition Syntax
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Remember to have fun...
--
To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org
For additional commands, e-mail: opensuse-commit+help@opensuse.org