commit perl-Text-CSV for openSUSE:Factory
Hello community, here is the log from the commit of package perl-Text-CSV for openSUSE:Factory checked in at 2013-07-31 17:24:40 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/perl-Text-CSV (Old) and /work/SRC/openSUSE:Factory/.perl-Text-CSV.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "perl-Text-CSV" Changes: -------- --- /work/SRC/openSUSE:Factory/perl-Text-CSV/perl-Text-CSV.changes 2012-02-14 13:09:40.000000000 +0100 +++ /work/SRC/openSUSE:Factory/.perl-Text-CSV.new/perl-Text-CSV.changes 2013-07-31 17:24:41.000000000 +0200 @@ -1,0 +2,13 @@ +Sat Jul 27 11:58:55 UTC 2013 - coolo@suse.com + +- updated to 1.32 + - fix t/rt71_pp.t + - fix handling UTF8 in parse method. + - fix getline with allow_loose_quotes (rt#83705) + - add allow_unquoted_escape + - fix parsing escapted sep char (found in Text::CSV_XS rt#81295) + * TODO: Updating documents and adding diag_verbose in the next version + - catch up Text::CSV_XS 0.99 + (except for diag_verbose and allow_unquoted_escape) + +------------------------------------------------------------------- Old: ---- Text-CSV-1.21.tar.gz New: ---- Text-CSV-1.32.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ perl-Text-CSV.spec ++++++ --- /var/tmp/diff_new_pack.itrNop/_old 2013-07-31 17:24:42.000000000 +0200 +++ /var/tmp/diff_new_pack.itrNop/_new 2013-07-31 17:24:42.000000000 +0200 @@ -1,7 +1,7 @@ # # spec file for package perl-Text-CSV # -# Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,20 +17,19 @@ Name: perl-Text-CSV -Version: 1.21 +Version: 1.32 Release: 0 %define cpan_name Text-CSV -Summary: Comma-separated values manipulator (using XS or PurePerl) -License: GPL-1.0+ or Artistic-1.0 +Summary: comma-separated values manipulator (using XS or PurePerl) +License: Artistic-1.0 or GPL-1.0+ Group: Development/Libraries/Perl Url: http://search.cpan.org/dist/Text-CSV/ -Source: http://www.cpan.org/authors/id/M/MA/MAKAMAKA/Text-CSV-%{version}.tar.gz +Source: http://www.cpan.org/authors/id/M/MA/MAKAMAKA/%{cpan_name}-%{version}.tar.gz +BuildArch: noarch +BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: perl BuildRequires: perl-macros -BuildRequires: perl(IO::Handle) -Requires: perl(IO::Handle) -BuildRoot: %{_tmppath}/%{name}-%{version}-build -BuildArch: noarch +#BuildRequires: perl(Text::CSV) %{perl_requires} %description @@ -60,11 +59,8 @@ %perl_process_packlist %perl_gen_filelist -%clean -%{__rm} -rf %{buildroot} - %files -f %{name}.files -%defattr(644,root,root,755) +%defattr(-,root,root,755) %doc Changes README %changelog ++++++ Text-CSV-1.21.tar.gz -> Text-CSV-1.32.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/Changes new/Text-CSV-1.32/Changes --- old/Text-CSV-1.21/Changes 2010-12-27 04:55:59.000000000 +0100 +++ new/Text-CSV-1.32/Changes 2013-06-13 08:36:38.000000000 +0200 @@ -1,5 +1,20 @@ Revision history for Perl extension Text::CSV. +1.32 + - fix t/rt71_pp.t + +1.31 Thu Jun 13 14:06:49 2013 + - fix handling UTF8 in parse method. + - fix getline with allow_loose_quotes (rt#83705) + - add allow_unquoted_escape + - fix parsing escapted sep char (found in Text::CSV_XS rt#81295) + + * TODO: Updating documents and adding diag_verbose in the next version + +1.30 Tue Jun 11 00:06:02 2013 + - catch up Text::CSV_XS 0.99 + (except for diag_verbose and allow_unquoted_escape) + 1.21 Mon Dec 27 12:35:35 2010 - updated the compatibility for Text::CSV_XS version 0.80 * added getline_all() and getaline_hr_all() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/MANIFEST new/Text-CSV-1.32/MANIFEST --- old/Text-CSV-1.21/MANIFEST 2010-12-27 04:45:58.000000000 +0100 +++ new/Text-CSV-1.32/MANIFEST 2013-06-13 08:47:00.000000000 +0200 @@ -37,3 +37,4 @@ t/util.pl Extra test utilities META.yml Module meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/META.json new/Text-CSV-1.32/META.json --- old/Text-CSV-1.21/META.json 1970-01-01 01:00:00.000000000 +0100 +++ new/Text-CSV-1.32/META.json 2013-06-13 08:47:00.000000000 +0200 @@ -0,0 +1,48 @@ +{ + "abstract" : "comma-separated values manipulator (using XS or PurePerl)", + "author" : [ + "Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>" + ], + "dynamic_config" : 1, + "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.130880", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Text-CSV", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "IO::Handle" : "0", + "Test::Harness" : "0", + "Test::More" : "0" + } + } + }, + "release_status" : "stable", + "resources" : { + "repository" : { + "url" : "http://github.com/makamaka/Text-CSV" + } + }, + "version" : "1.32" +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/META.yml new/Text-CSV-1.32/META.yml --- old/Text-CSV-1.21/META.yml 2010-12-27 04:59:11.000000000 +0100 +++ new/Text-CSV-1.32/META.yml 2013-06-13 08:47:00.000000000 +0200 @@ -1,26 +1,26 @@ ---- #YAML:1.0 -name: Text-CSV -version: 1.21 -abstract: comma-separated values manipulator (using XS or PurePerl) +--- +abstract: 'comma-separated values manipulator (using XS or PurePerl)' author: - - Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> -license: perl -distribution_type: module -configure_requires: - ExtUtils::MakeMaker: 0 + - 'Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>' build_requires: - ExtUtils::MakeMaker: 0 + ExtUtils::MakeMaker: 0 +configure_requires: + ExtUtils::MakeMaker: 0 +dynamic_config: 1 +generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.130880' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: 1.4 +name: Text-CSV +no_index: + directory: + - t + - inc requires: - IO::Handle: 0 - Test::Harness: 0 - Test::More: 0 + IO::Handle: 0 + Test::Harness: 0 + Test::More: 0 resources: - repository: http://github.com/makamaka/Text-CSV -no_index: - directory: - - t - - inc -generated_by: ExtUtils::MakeMaker version 6.56 -meta-spec: - url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 + repository: http://github.com/makamaka/Text-CSV +version: 1.32 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/README new/Text-CSV-1.32/README --- old/Text-CSV-1.21/README 2010-12-27 04:41:17.000000000 +0100 +++ new/Text-CSV-1.32/README 2013-06-10 17:12:22.000000000 +0200 @@ -1,4 +1,4 @@ -Text::CSV version 1.21 +Text::CSV version 1.30 ======================== comma-separated values manipulator @@ -31,7 +31,7 @@ COPYRIGHT AND LICENSE Copyright (C) 1997 Alan Citterman. All rights reserved. - Copyright (C) 2007-2010 Makamaka Hannyaharamitu. All rights reserved. + Copyright (C) 2007-2013 Makamaka Hannyaharamitu. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/lib/Text/CSV.pm new/Text-CSV-1.32/lib/Text/CSV.pm --- old/Text-CSV-1.21/lib/Text/CSV.pm 2010-12-27 04:40:39.000000000 +0100 +++ new/Text-CSV-1.32/lib/Text/CSV.pm 2013-06-13 08:37:27.000000000 +0200 @@ -6,14 +6,14 @@ use vars qw( $VERSION $DEBUG ); BEGIN { - $VERSION = '1.21'; + $VERSION = '1.32'; $DEBUG = 0; } # if use CSV_XS, requires version my $Module_XS = 'Text::CSV_XS'; my $Module_PP = 'Text::CSV_PP'; -my $XS_Version = '0.80'; +my $XS_Version = '0.99'; my $Is_Dynamic = 0; @@ -27,6 +27,7 @@ keep_meta_info allow_loose_quotes allow_loose_escapes verbatim meta_info is_quoted is_binary eof getline print parse combine fields string error_diag error_input status blank_is_undef empty_is_undef getline_hr column_names bind_columns auto_diag quote_space quote_null getline_all getline_hr_all + is_missing quote_binary record_number print_hr PV IV NV /; # @@ -287,9 +288,10 @@ =head1 VERSION - 1.21 + 1.32 -This module is compatible with Text::CSV_XS B<0.80> and later. +This module is compatible with Text::CSV_XS B<0.99> and later. +(except for diag_verbose and allow_unquoted_escape) =head2 Embedded newlines @@ -569,6 +571,12 @@ binary mode (the C<{ binary =E<gt> 1 }> is set). The default is true. You can prevent NULL escapes by setting this attribute to 0. +=item quote_binary + +By default, all "unsafe" bytes inside a string cause the combined field to +be quoted. By setting this attribute to 0, you can disable that trigger for +bytes >= 0x7f. + =item keep_meta_info By default, the parsing of input lines is as simple and fast as @@ -928,6 +936,24 @@ This returns a true value if the data in the indicated column contained any byte in the range [\x00-\x08,\x10-\x1F,\x7F-\xFF] +=head2 is_missing + + my $missing = $csv->is_missing ($column_idx); + +Where C<$column_idx> is the (zero-based) index of the column in the last +result of L</getline_hr>. + + while (my $hr = $csv->getline_hr ($fh)) { + $csv->is_missing (0) and next; # This was an empty line + } + +When using L</getline_hr> for parsing, it is impossible to tell if the +fields are C<undef> because they where not filled in the CSV stream or +because they were not read at all, as B<all> the fields defined by +L</column_names> are set in the hash-ref. If you still need to know if all +fields in each row are provided, you should enable C<keep_meta_info> so you +can check the flags. + =head2 status $status = $csv->status (); @@ -978,6 +1004,14 @@ When called as a class method or a direct function call, the error diag is that of the last C<new ()> call. +=head2 record_number + + $recno = $csv->record_number (); + +Returns the records parsed by this csv instance. This value should be more +accurate than C<$.> when embedded newlines come in play. Records written by +this instance are not counted. + =head2 SetDiag $csv->SetDiag (0); @@ -1133,12 +1167,12 @@ Text::CSV_PP: -Copyright (C) 2005-2010 Makamaka Hannyaharamitu. +Copyright (C) 2005-2013 Makamaka Hannyaharamitu. Text:CSV_XS: -Copyright (C) 2007-2010 H.Merijn Brand for PROCURA B.V. +Copyright (C) 2007-2013 H.Merijn Brand for PROCURA B.V. Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved. Portions Copyright (C) 1997 Alan Citterman. All rights reserved. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/lib/Text/CSV_PP.pm new/Text-CSV-1.32/lib/Text/CSV_PP.pm --- old/Text-CSV-1.21/lib/Text/CSV_PP.pm 2010-12-27 04:40:58.000000000 +0100 +++ new/Text-CSV-1.32/lib/Text/CSV_PP.pm 2013-06-13 07:06:01.000000000 +0200 @@ -11,7 +11,7 @@ use vars qw($VERSION); use Carp (); -$VERSION = '1.29'; +$VERSION = '1.31'; sub PV { 0 } sub IV { 1 } @@ -19,6 +19,7 @@ sub IS_QUOTED () { 0x0001; } sub IS_BINARY () { 0x0002; } +sub IS_MISSING () { 0x0010; } my $ERRORS = { @@ -60,6 +61,8 @@ 3006 => "EHR - bind_columns () did not pass enough refs for parsed fields", 3007 => "EHR - bind_columns needs refs to writable scalars", 3008 => "EHR - unexpected error in bound fields", + 3009 => "EHR - print_hr () called before column_names ()", + 3010 => "EHR - print_hr () called with invalid arguments", 0 => "", }; @@ -78,6 +81,7 @@ keep_meta_info => 0, allow_loose_quotes => 0, allow_loose_escapes => 0, + allow_unquoted_escape => 0, allow_whitespace => 0, chomp_verbatim => 0, types => undef, @@ -87,8 +91,11 @@ auto_diag => 0, quote_space => 1, quote_null => 1, + quote_binary => 1, + diag_verbose => 0, _EOF => 0, + _RECNO => 0, _STATUS => undef, _FIELDS => undef, _FFLAGS => undef, @@ -106,10 +113,12 @@ $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy no strict 'refs'; *{"utf8::is_utf8"} = sub { 0; }; + *{"utf8::decode"} = sub { }; } elsif ( $] < 5.008 ) { no strict 'refs'; *{"utf8::is_utf8"} = sub { 0; }; + *{"utf8::decode"} = sub { }; } elsif ( !defined &utf8::is_utf8 ) { require Encode; @@ -244,6 +253,11 @@ return $context ? @diag : $diagobj; } + +sub record_number { + return shift->{_RECNO}; +} + ################################################################################ # string ################################################################################ @@ -273,8 +287,8 @@ $self->{_STRING} = ''; $self->{_STATUS} = 0; - my ($always_quote, $binary, $quot, $sep, $esc, $empty_is_undef, $quote_space, $quote_null) - = @{$self}{qw/always_quote binary quote_char sep_char escape_char empty_is_undef quote_space quote_null/}; + my ($always_quote, $binary, $quot, $sep, $esc, $empty_is_undef, $quote_space, $quote_null, $quote_binary ) + = @{$self}{qw/always_quote binary quote_char sep_char escape_char empty_is_undef quote_space quote_null quote_binary/}; if(!defined $quot){ $quot = ''; } @@ -312,7 +326,7 @@ if( $binary and $quote_null ){ use bytes; - $must_be_quoted++ if ( $column =~ s/\0/${esc}0/g || $column =~ /[\x00-\x1f\x7f-\xa0]/ ); + $must_be_quoted++ if ( $column =~ s/\0/${esc}0/g || ($quote_binary && $column =~ /[\x00-\x1f\x7f-\xa0]/) ); } if($always_quote or $must_be_quoted){ @@ -340,9 +354,9 @@ return 0 if(!defined $line); - my ($binary, $quot, $sep, $esc, $types, $keep_meta_info, $allow_whitespace, $eol, $blank_is_undef, $empty_is_undef) + my ($binary, $quot, $sep, $esc, $types, $keep_meta_info, $allow_whitespace, $eol, $blank_is_undef, $empty_is_undef, $unquot_esc) = @{$self}{ - qw/binary quote_char sep_char escape_char types keep_meta_info allow_whitespace eol blank_is_undef empty_is_undef/ + qw/binary quote_char sep_char escape_char types keep_meta_info allow_whitespace eol blank_is_undef empty_is_undef allow_unquoted_escape/ }; $sep = ',' unless (defined $sep); @@ -357,15 +371,19 @@ my $re_split = $self->{_re_split}->{$quot}->{$esc}->{$sep} ||= _make_regexp_split_column($esc, $quot, $sep); my $re_quoted = $self->{_re_quoted}->{$quot} ||= qr/^\Q$quot\E(.*)\Q$quot\E$/s; my $re_in_quot_esp1 = $self->{_re_in_quot_esp1}->{$esc} ||= qr/\Q$esc\E(.)/; - my $re_in_quot_esp2 = $self->{_re_in_quot_esp2}->{$quot}->{$esc} ||= qr/[\Q$quot$esc\E0]/; + my $re_in_quot_esp2 = $self->{_re_in_quot_esp2}->{$quot}->{$esc} ||= qr/[\Q$quot$esc$sep\E0]/; my $re_quot_char = $self->{_re_quot_char}->{$quot} ||= qr/\Q$quot\E/; - my $re_esc = $self->{_re_esc}->{$quot}->{$esc} ||= qr/\Q$esc\E(\Q$quot\E|\Q$esc\E|0)/; + my $re_esc = $self->{_re_esc}->{$quot}->{$esc} ||= qr/\Q$esc\E(\Q$quot\E|\Q$esc\E|\Q$sep\E|0)/; my $re_invalid_quot = $self->{_re_invalid_quot}->{$quot}->{$esc} ||= qr/^$re_quot_char|[^\Q$re_esc\E]$re_quot_char/; if ($allow_whitespace) { $re_split = $self->{_re_split_allow_sp}->{$quot}->{$esc}->{$sep} ||= _make_regexp_split_column_allow_sp($esc, $quot, $sep); } + if ($unquot_esc) { + $re_split = $self->{_re_split_allow_unqout_esc}->{$quot}->{$esc}->{$sep} + ||= _make_regexp_split_column_allow_unqout_esc($esc, $quot, $sep); + } my $palatable = 1; my @part = (); @@ -445,7 +463,8 @@ $palatable = 0; last; } - else { + + unless ($self->{allow_loose_quotes}) { $col =~ s/\Q$esc\E(.)/$1/g; } } @@ -522,10 +541,20 @@ $col = undef; } + if ( $unquot_esc ) { + $col =~ s/\Q$esc\E(.)/$1/g; + } + + } + + utf8::encode($col) if $utf8; + if ( defined $col && _is_valid_utf8($col) ) { + utf8::decode($col); } push @part,$col; push @{$meta_flag}, $flag if ($keep_meta_info); + $self->{ _RECNO }++; $i++; } @@ -552,11 +581,36 @@ return qr/([^\Q$sep\E]*)\Q$sep\E/s; } - qr/( + return qr/( \Q$quot\E [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* \Q$quot\E | # or + \Q$quot\E + (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* + \Q$quot\E + | # or + [^\Q$sep\E]* + ) + \Q$sep\E + /xs; +} + + +sub _make_regexp_split_column_allow_unqout_esc { + my ($esc, $quot, $sep) = @_; + + return qr/( + \Q$quot\E + [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* + \Q$quot\E + | # or + \Q$quot\E + (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* + \Q$quot\E + | # or + (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* + | # or [^\Q$sep\E]* ) \Q$sep\E @@ -581,7 +635,7 @@ qr/$ws* ( \Q$quot\E - [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* + [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc$sep\E0][^\Q$quot$esc\E]*)* \Q$quot\E | # or [^\Q$sep\E]*? @@ -607,6 +661,13 @@ $io->print( $self->_string ) or $self->_set_error_diag(2200); } + +sub print_hr { + my ($self, $io, $hr) = @_; + $self->{_COLUMN_NAMES} or $self->_set_error_diag(3009); + ref $hr eq "HASH" or $self->_set_error_diag(3010); + $self->print ($io, [ map { $hr->{$_} } $self->column_names ]); +} ################################################################################ # getline ################################################################################ @@ -639,7 +700,10 @@ LOOP: { my $is_continued = scalar(my @list = $line =~ /$re/g) % 2; # if line is valid, quot is even - if ( $line =~ /${re}0/ ) { # null suspicion case + if ( $self->{allow_loose_quotes } ) { + $is_continued = 0; + } + elsif ( $line =~ /${re}0/ ) { # null suspicion case $is_continued = $line =~ qr/ ^ ( @@ -704,7 +768,6 @@ return []; } - ################################################################################ # getline_all ################################################################################ @@ -750,6 +813,10 @@ my $fr = $self->getline( $io ) or return undef; + if ( ref $self->{_FFLAGS} ) { + $self->{_FFLAGS}[$_] = IS_MISSING for ($#{$fr} + 1) .. $#{$self->{_COLUMN_NAMES}}; + } + @hr{ @{ $self->{_COLUMN_NAMES} } } = @$fr; \%hr; @@ -857,6 +924,13 @@ return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } ); $_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0; } + +sub is_missing { + my ($self, $idx, $val) = @_; + ref $self->{_FFLAGS} && + $idx >= 0 && $idx < @{$self->{_FFLAGS}} or return; + $self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0; +} ################################################################################ # _check_type # take an arg as scalar referrence. @@ -895,7 +969,8 @@ BEGIN { for my $method ( qw/always_quote binary keep_meta_info allow_loose_quotes allow_loose_escapes - verbatim blank_is_undef empty_is_undef auto_diag quote_space quote_null/ ) { + verbatim blank_is_undef empty_is_undef quote_space quote_null + quote_binary allow_unquoted_escape/ ) { eval qq| sub $method { \$_[0]->{$method} = defined \$_[1] ? \$_[1] : 0 if (\@_ > 1); @@ -971,6 +1046,41 @@ Carp::croak( $_[0]->error_diag . '' ); } +sub auto_diag { + my $self = shift; + if (@_) { + my $v = shift; + !defined $v || $v eq "" and $v = 0; + $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false + $self->{auto_diag} = $v; + } + $self->{auto_diag}; +} + +sub diag_verbose { + my $self = shift; + if (@_) { + my $v = shift; + !defined $v || $v eq "" and $v = 0; + $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false + $self->{diag_verbose} = $v; + } + $self->{diag_verbose}; +} + +sub _is_valid_utf8 { + return ( $_[0] =~ /^(?: + [\x00-\x7F] + |[\xC2-\xDF][\x80-\xBF] + |[\xE0][\xA0-\xBF][\x80-\xBF] + |[\xE1-\xEC][\x80-\xBF][\x80-\xBF] + |[\xED][\x80-\x9F][\x80-\xBF] + |[\xEE-\xEF][\x80-\xBF][\x80-\xBF] + |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] + |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] + |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] + )+$/x ) ? 1 : 0; +} ################################################################################ package Text::CSV::ErrorDiag; @@ -1042,9 +1152,10 @@ =head1 VERSION - 1.29 + 1.31 -This module is compatible with Text::CSV_XS B<0.80> and later. +This module is compatible with Text::CSV_XS B<0.99>. +(except for diag_verbose and allow_unquoted_escape) =head2 Unicode (UTF8) @@ -1764,7 +1875,7 @@ =head1 COPYRIGHT AND LICENSE -Copyright 2005-2010 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> +Copyright 2005-2013 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/10_base.t new/Text-CSV-1.32/t/10_base.t --- old/Text-CSV-1.21/t/10_base.t 2008-04-12 12:52:19.000000000 +0200 +++ new/Text-CSV-1.32/t/10_base.t 2013-06-10 17:12:22.000000000 +0200 @@ -3,7 +3,7 @@ use strict; $^W = 1; # use warnings core since 5.6 -use Test::More tests => 61; +use Test::More tests => 64; BEGIN { $ENV{PERL_TEXT_CSV} = 0; @@ -80,6 +80,10 @@ is (($csv->fields ())[2], "", "Hi! - fields () - field 3"); ok ( $csv->status (), "status ()"); +ok ( $csv->parse (""), "Empty line"); +is ( scalar $csv->fields (), 1, "Empty - count"); +is (($csv->fields ())[0], "", "One empty field"); + # Are Integers and Reals quoted? # # Important: Do not modify these tests unless you have a good diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/12_acc.t new/Text-CSV-1.32/t/12_acc.t --- old/Text-CSV-1.21/t/12_acc.t 2010-03-16 09:10:18.000000000 +0100 +++ new/Text-CSV-1.32/t/12_acc.t 2013-06-13 07:02:23.000000000 +0200 @@ -3,7 +3,7 @@ use strict; $^W = 1; # use warnings core since 5.6 -use Test::More tests => 113; +use Test::More tests => 133; BEGIN { $ENV{PERL_TEXT_CSV} = 0; @@ -23,13 +23,17 @@ is ($csv->keep_meta_info, 0, "keep_meta_info"); is ($csv->allow_loose_quotes, 0, "allow_loose_quotes"); is ($csv->allow_loose_escapes, 0, "allow_loose_escapes"); +is ($csv->allow_unquoted_escape, 0, "allow_unquoted_escape"); is ($csv->allow_whitespace, 0, "allow_whitespace"); is ($csv->blank_is_undef, 0, "blank_is_undef"); is ($csv->empty_is_undef, 0, "empty_is_undef"); is ($csv->auto_diag, 0, "auto_diag"); +is ($csv->diag_verbose, 0, "diag_verbose"); is ($csv->verbatim, 0, "verbatim"); is ($csv->quote_space, 1, "quote_space"); is ($csv->quote_null, 1, "quote_null"); +is ($csv->quote_binary, 1, "quote_binary"); +is ($csv->record_number, 0, "record_number"); is ($csv->binary (1), 1, "binary (1)"); my @fld = ( 'txt =, "Hi!"', "Yes", "", 2, undef, "1.09", "\r", undef ); @@ -47,20 +51,34 @@ is ($csv->always_quote (1), 1, "always_quote (1)"); is ($csv->allow_loose_quotes (1), 1, "allow_loose_quotes (1)"); is ($csv->allow_loose_escapes (1), 1, "allow_loose_escapes (1)"); +is ($csv->allow_unquoted_escape (1), 1, "allow_unquoted_escape (1)"); is ($csv->allow_whitespace (1), 1, "allow_whitespace (1)"); is ($csv->blank_is_undef (1), 1, "blank_is_undef (1)"); is ($csv->empty_is_undef (1), 1, "empty_is_undef (1)"); is ($csv->auto_diag (1), 1, "auto_diag (1)"); +is ($csv->auto_diag (2), 2, "auto_diag (2)"); +is ($csv->auto_diag (9), 9, "auto_diag (9)"); +is ($csv->auto_diag ("true"), 1, "auto_diag (\"true\")"); +is ($csv->auto_diag (undef), 0, "auto_diag (undef)"); +is ($csv->auto_diag (""), 0, "auto_diag (\"\")"); +is ($csv->diag_verbose (1), 1, "diag_verbose (1)"); +is ($csv->diag_verbose (2), 2, "diag_verbose (2)"); +is ($csv->diag_verbose (9), 9, "diag_verbose (9)"); +is ($csv->diag_verbose ("true"), 1, "diag_verbose (\"true\")"); +is ($csv->diag_verbose (undef), 0, "diag_verbose (undef)"); +is ($csv->diag_verbose (""), 0, "diag_verbose (\"\")"); is ($csv->verbatim (1), 1, "verbatim (1)"); is ($csv->quote_space (1), 1, "quote_space (1)"); is ($csv->quote_null (1), 1, "quote_null (1)"); +is ($csv->quote_binary (1), 1, "quote_binary (1)"); is ($csv->escape_char ("\\"), "\\", "escape_char (\\)"); ok ($csv->combine (@fld), "combine"); is ($csv->string, qq{=txt \\=, "Hi!"=;=Yes=;==;=2=;;=1.09=;=\r=;\r}, "string"); -is ($csv->quote_space (0), 0, "quote_space (1)"); -is ($csv->quote_null (0), 0, "quote_null (1)"); +is ($csv->quote_space (0), 0, "quote_space (0)"); +is ($csv->quote_null (0), 0, "quote_null (0)"); +is ($csv->quote_binary (0), 0, "quote_binary (0)"); # Funny settings, all three translate to \0 internally ok ($csv = Text::CSV->new ({ @@ -73,7 +91,9 @@ is ($csv->escape_char, undef, "escape_char undef"); ok ($csv->parse ("foo"), "parse (foo)"); $csv->sep_char (","); +is ($csv->record_number, 1, "record_number"); ok ($csv->parse ("foo"), "parse (foo)"); +is ($csv->record_number, 2, "record_number"); ok (!$csv->parse ("foo,foo\0bar"), "parse (foo)"); $csv->escape_char ("\\"); ok (!$csv->parse ("foo,foo\0bar"), "parse (foo)"); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/46_eol_si.t new/Text-CSV-1.32/t/46_eol_si.t --- old/Text-CSV-1.21/t/46_eol_si.t 2010-09-25 10:33:01.000000000 +0200 +++ new/Text-CSV-1.32/t/46_eol_si.t 2013-06-10 17:15:33.000000000 +0200 @@ -211,3 +211,4 @@ $/ = $def_rs; 1; + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/50_utf8.t new/Text-CSV-1.32/t/50_utf8.t --- old/Text-CSV-1.21/t/50_utf8.t 2008-10-18 04:40:01.000000000 +0200 +++ new/Text-CSV-1.32/t/50_utf8.t 2013-06-13 06:32:33.000000000 +0200 @@ -6,11 +6,11 @@ use Test::More; BEGIN { - if ($] < 5.008) { + if ($] < 5.008001) { plan skip_all => "UTF8 tests useless in this ancient perl version"; } else { - plan tests => 67; + plan tests => 91; } } @@ -32,7 +32,7 @@ # 0D = \r 3B = ; foreach my $test ( # Space-like characters - [ "\x{0000A0}", "U+0000A0 NO-BRAK SPACE" ], + [ "\x{0000A0}", "U+0000A0 NO-BREAK SPACE" ], [ "\x{00200B}", "U+00200B ZERO WIDTH SPACE" ], # Some characters with possible problems in the code point [ "\x{000122}", "U+000122 LATIN CAPITAL LETTER G WITH CEDILLA" ], @@ -58,9 +58,7 @@ my @out = $csv->fields; # Cannot use is_deeply (), because of the binary content is (scalar @in, scalar @out, "fields $msg"); - for (0 .. $#in) { - is_binary ($in[$_], $out[$_], "field $_ $msg"); - } + is_binary ($in[$_], $out[$_], "field $_ $msg") for 0 .. $#in; } # Test if the UTF8 part is accepted, but the \n is not @@ -68,7 +66,6 @@ is ($csv->binary, 0, "bin flag still unset"); is ($csv->error_diag + 0, 2021, "Error 2021"); -# As all utf tests are skipped for older pers, It's safe to use 3-arg open this way my $file = "files/utf8.csv"; SKIP: { open my $fh, "<:encoding(utf8)", $file or @@ -87,3 +84,50 @@ $csv->combine (@$row); ok (utf8::valid ($csv->string), "Combined string is valid utf8"); } + +# Test quote_binary +$csv->always_quote (0); +$csv->quote_space (0); +$csv->quote_binary (0); +ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine"); +is ($csv->string, qq{ ,1,\x{20ac} }, "String 0-0"); +$csv->quote_binary (1); +ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine"); +is ($csv->string, qq{ ,1,"\x{20ac} "}, "String 0-1"); + +$csv->quote_space (1); +$csv->quote_binary (0); +ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine"); +is ($csv->string, qq{" ",1,"\x{20ac} "}, "String 1-0"); +ok ($csv->quote_binary (1), "quote binary on"); +ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine"); +is ($csv->string, qq{" ",1,"\x{20ac} "}, "String 1-1"); + +open my $fh, ">:encoding(utf-8)", "_50test.csv"; +print $fh "euro\n\x{20ac}\neuro\n"; +close $fh; +open $fh, "<:encoding(utf-8)", "_50test.csv"; + +SKIP: { + my $out = ""; + my $isutf8 = $] < 5.008001 ? + sub { !$_[0]; } : # utf8::is_utf8 () not available in 5.8.0 + sub { utf8::is_utf8 ($out); }; + ok ($csv->auto_diag (1), "auto diag"); + ok ($csv->binary (1), "set binary"); + ok ($csv->bind_columns (\$out), "bind"); + ok ($csv->getline ($fh), "parse"); + is ($csv->is_binary (0), 0, "not binary"); + is ($out, "euro", "euro"); + ok (!$isutf8->(1), "not utf8"); + ok ($csv->getline ($fh), "parse"); + is ($csv->is_binary (0), 1, "is binary"); + is ($out, "\x{20ac}", "euro"); + ok ($isutf8->(0), "is utf8"); + ok ($csv->getline ($fh), "parse"); + is ($csv->is_binary (0), 0, "not binary"); + is ($out, "euro", "euro"); + ok (!$isutf8->(1), "not utf8"); + close $fh; + unlink "_50test.csv"; + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/51_utf8.t new/Text-CSV-1.32/t/51_utf8.t --- old/Text-CSV-1.21/t/51_utf8.t 2010-12-27 04:52:07.000000000 +0100 +++ new/Text-CSV-1.32/t/51_utf8.t 2013-06-10 17:12:22.000000000 +0200 @@ -6,8 +6,7 @@ use Test::More; BEGIN { - $ENV{PERL_TEXT_CSV} = 0; - $] < 5.008 and + $] < 5.008001 and plan skip_all => "UTF8 tests useless in this ancient perl version"; } @@ -44,10 +43,11 @@ [ "bytes up :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_up, "utf8", "no warn", ], ); - plan tests => 1 + 6 * @tests; + plan tests => 7 + 6 * @tests; } BEGIN { + $ENV{PERL_TEXT_CSV} = 0; require_ok "Text::CSV"; plan skip_all => "Cannot load Text::CSV" if $@; require "t/util.pl"; @@ -94,3 +94,28 @@ is (warned ($c_warn), warned ($p_warn), "$test against Perl warning"); is (warned ($c_warn), $expect_w, "$test against expected warning"); } + +# Test automatic upgrades for valid UTF-8 +{ my $data = join "\n" => ( + "1,aap,3", # No diac + "1,a\x{e1}p,3", # a_ACUTE in ISO-8859-1 + "1,a\x{c4}\x{83}p,3", # a_BREVE in UTF-8 + ) x 2; + my @expect = ("aap", "a\341p", "a\x{0103}p") x 2; + + my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 }); + + foreach my $bc (undef, 3) { + my @data; + open my $fh, "<", \$data; + $bc and $csv->bind_columns (\my ($f1, $f2, $f3)); + is (scalar $csv->bind_columns, $bc, "Columns_bound?"); + while (my $row = $csv->getline ($fh)) { + push @data, $bc ? $f2 : $row->[1]; + } + close $fh; + is_deeply (\@data, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns")); + is_deeply ([ map { utf8::is_utf8 ($_) } @data ], + [ "", "", 1, "", "", 1 ], "UTF8 flags"); + } + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/70_rt.t new/Text-CSV-1.32/t/70_rt.t --- old/Text-CSV-1.21/t/70_rt.t 2010-12-27 04:34:59.000000000 +0100 +++ new/Text-CSV-1.32/t/70_rt.t 2013-06-13 06:43:01.000000000 +0200 @@ -4,7 +4,7 @@ $^W = 1; #use Test::More "no_plan"; - use Test::More tests => 438; + use Test::More tests => 453; BEGIN { $ENV{PERL_TEXT_CSV} = 0; @@ -368,6 +368,110 @@ } } +{ # http://rt.cpan.org/Ticket/Display.html?id=74216 + $rt = "74216"; # setting 'eol' affects global input record separator + + open FH, ">$csv_file"; + print FH @{$input{$rt}}; + close FH; + + my $slurp_check = sub { + open FH, "<$csv_file"; + is (scalar @{[<FH>]}, 4); + close FH; + }; + + $slurp_check->(); + + my $crlf = "\015\012"; + open FH, ">_$csv_file"; + print FH "a,b,c" . $crlf . "1,2,3" . $crlf; + close FH; + open FH, "<_$csv_file"; + my $csv = Text::CSV->new ({ eol => $crlf }); + is_deeply ($csv->getline (*FH), [qw( a b c )]); + close FH; + unlink "_$csv_file"; + + $slurp_check->(); + + { local $/ = "\n"; + $slurp_check->(); + } + } + +SKIP: { # http://rt.cpan.org/Ticket/Display.html?id=74220 + $] < 5.008002 and skip "UTF8 unreliable in perl $]", 7; + + $rt = "74220"; # Text::CSV can be made to produce bad strings + my $csv = Text::CSV->new ({ binary => 1 }); + + my $ax = chr (0xfa); + my $bx = "foo"; + + # We set the UTF-8 flag on a string with no funny characters + utf8::upgrade ($bx); + is ($bx, "foo", "no funny characters in the string"); + + ok (utf8::valid ($ax), "first string correct in Perl"); + ok (utf8::valid ($bx), "second string correct in Perl"); + + ok ($csv->combine ($ax, $bx), "combine ()"); + ok (my $foo = $csv->string (), "string ()"); + + ok (utf8::valid ($foo), "is combined string correct inside Perl?"); + is ($foo, qq{\xfa,foo}, "expected result"); + } + +=pod + +SKIP: { # http://rt.cpan.org/Ticket/Display.html?id=80680 + skip "skip tests for XS, this tests too long to PP", 20000; + + (eval { require Encode; $Encode::VERSION } || "0.00") =~ m{^([0-9.]+)}; + $1 < 2.47 and skip "Encode is too old for these tests", 20000; + $] < 5.008008 and skip "UTF8+Encode unreliable in perl $]", 20000; + + $rt = "80680"; # Text::CSV produces garbage on some data + + my $csv = Text::CSV->new ({ binary => 1 }); + my $txt = "\x{415}\x{43a}\x{438}\x{43d}\x{431}\x{443}\x{440}\x{433}\x{2116}"; + BIG_LOOP: foreach my $n (1 .. 5000) { + foreach my $e (0 .. 3) { + + my $data = ("a" x $e) . ($txt x $n); + my $enc = Encode::encode ("UTF-8", $data); + my $exp = qq{1,"$enc"}; + my $out = ""; + open my $fh, ">:encoding(utf-8)", \$out; + $csv->print ($fh, [ 1, $data ]); + close $fh; + + my $l = length ($out); + if ($out eq $exp) { + ok (1, "Buffer boundary check $n/$e ($l)"); + next; + } + + is ($out, $exp, "Data $n/$e ($l)"); + last BIG_LOOP; + } + } + } + +=cut + +{ # http://rt.cpan.org/Ticket/Display.html?id=81295 + $rt = 81295; # escaped sep_char discarded when only item in unquoted field + my $csv = Text::CSV->new ({ escape_char => "\\", auto_diag => 1 }); + ok ($csv->parse ($input{$rt}[0]), "parse without allow_unquoted_escape"); + is_deeply ([ $csv->fields ], [ 1, ",", 3 ], "escaped sep in quoted field"); + $csv->allow_unquoted_escape (1); + ok ($csv->parse ($input{$rt}[1]), "parse with allow_unquoted_escape"); + is_deeply ([ $csv->fields ], [ 1, ",", 3 ], "escaped sep in unquoted field"); + } + + __END__ �24386� - \t doesn't work in _XS, works in _PP VIN StockNumber Year Make Model MD Engine EngineSize Transmission DriveTrain Trim BodyStyle CityFuel HWYFuel Mileage Color InteriorColor InternetPrice RetailPrice Notes ShortReview Certified NewUsed Image_URLs Equipment @@ -412,6 +516,16 @@ --------------090302050909040309030109-- �58356� - Incorrect CSV generated if "quote_space => 0" �61525� - eol not working for values other than "\n"? +�74216� - setting 'eol' affects global input record separator +1,2 +3,4 +5,6 +7,8 +�74330� - Text::CSV can be made to produce bad strings +�80680� - Text::CSV produces garbage on some data +�81295� - escaped sep_char discarded when only item in unquoted field +1,"\,",3 +1,\,,3 �x1001� - Lines starting with "0" (Ruslan Dautkhanov) "0","A" "0","A" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/71_pp.t new/Text-CSV-1.32/t/71_pp.t --- old/Text-CSV-1.21/t/71_pp.t 2010-10-20 06:39:08.000000000 +0200 +++ new/Text-CSV-1.32/t/71_pp.t 2013-06-13 08:29:31.000000000 +0200 @@ -5,7 +5,7 @@ use strict; $^W = 1; -use Test::More tests => 88; +use Test::More tests => 99; BEGIN { $ENV{PERL_TEXT_CSV} = $ARGV[0] || 0; } @@ -302,3 +302,52 @@ unlink( '__test.csv' ); } + +{ # https://rt.cpan.org/Ticket/Display.html?id=83705 + +my $csv = Text::CSV->new( + { + binary => 1, + allow_loose_escapes => 1, + allow_loose_quotes => 1, + sep_char => q{;}, + escape_char => q{"}, + quote_char => q{"} + } +); + +$csv->parse(q{"6RE";"EINKAUF";"5";"";"2,5" HD"}); +is_deeply([$csv->fields], ["6RE","EINKAUF","5","",'2,5" HD']); + +my $csv_dump = q{"6RE";"EINKAUF";"5";"";"2,5" HD" +"LIDL";"-2"}; + +open( FH, '>__test.csv' ) or die $!; +print FH $csv_dump; +close FH; + +open FH, '<__test.csv'; + +is_deeply( $csv->getline(*FH), ["6RE","EINKAUF","5","",'2,5" HD'] ); +is_deeply( $csv->getline(*FH), ['LIDL','-2'] ); + +close FH; + +unlink( '__test.csv' ); + +} + +{ # imported from t/70_rt.t +my $csv = Text::CSV->new ({ escape_char => "\\", auto_diag => 1 }); + +ok( $csv->parse(q{1,"\,",3}) ); +is_deeply ([ $csv->fields ], [ 1, ",", 3 ], "escaped sep in quoted field"); +ok( $csv->parse(q{1,"2\,4",3}) ); +is_deeply ([ $csv->fields ], [ 1, "2,4", 3 ], "escaped sep in quoted field"); + +$csv->allow_unquoted_escape(1); +ok( $csv->parse(q{1,\,,3}) ); +is_deeply ([ $csv->fields ], [ 1, ",", 3 ], "escaped sep in quoted field"); +ok( $csv->parse(q{1,2\,4,3}) ); +is_deeply ([ $csv->fields ], [ 1, "2,4", 3 ], "escaped sep in quoted field"); +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/Text-CSV-1.21/t/75_hashref.t new/Text-CSV-1.32/t/75_hashref.t --- old/Text-CSV-1.21/t/75_hashref.t 2010-06-18 05:00:28.000000000 +0200 +++ new/Text-CSV-1.32/t/75_hashref.t 2013-06-10 17:12:22.000000000 +0200 @@ -4,7 +4,7 @@ $^W = 1; #use Test::More "no_plan"; - use Test::More tests => 68; + use Test::More tests => 75; BEGIN { $ENV{PERL_TEXT_CSV} = 0; @@ -123,4 +123,17 @@ close FH; +open FH, ">_75test.csv"; +$hr = { c_foo => 1, foo => "poison", zebra => "Of course" }; +is ($csv->column_names (undef), undef, "reset column headers"); +ok ($csv->column_names (sort keys %$hr), "set column names"); +ok ($csv->eol ("\n"), "set eol for output"); +ok ($csv->print (*FH, [ $csv->column_names ]), "print header"); +ok ($csv->print_hr (*FH, $hr), "print_hr"); +close FH; +open FH, "<_75test.csv"; +ok ($csv->column_names ($csv->getline (*FH)), "get column names"); +is_deeply ($csv->getline_hr (*FH), $hr, "compare to written hr"); +close FH; + unlink "_75test.csv"; -- To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org For additional commands, e-mail: opensuse-commit+help@opensuse.org
participants (1)
-
root@hilbert.suse.de