commit perl-URI-Find for openSUSE:Factory
Hello community, here is the log from the commit of package perl-URI-Find for openSUSE:Factory checked in at 2014-09-17 17:25:56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/perl-URI-Find (Old) and /work/SRC/openSUSE:Factory/.perl-URI-Find.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "perl-URI-Find" Changes: -------- --- /work/SRC/openSUSE:Factory/perl-URI-Find/perl-URI-Find.changes 2011-12-22 10:54:49.000000000 +0100 +++ /work/SRC/openSUSE:Factory/.perl-URI-Find.new/perl-URI-Find.changes 2014-09-17 17:26:16.000000000 +0200 @@ -1,0 +2,38 @@ +Mon Sep 15 15:44:32 UTC 2014 - coolo@suse.com + +- updated to 20140709 + New Features + * The "git" scheme is supported. (Schwern) + * svn, ssh and svn+ssh schemes are supported. [rt.cpan.org 57490] (Schwern) + * Added a --schemeless option to urifind. (Schwern) + + Bug Fixes + * http:// is no longer matched [rt.cpan.org 63283] (Schwern) + + Backwards Incompatibilities + * Previously, URIs stringified to their canonical version. Now + they stringify as written. This results in less loss of + information. For example. "Blah HTTP:://FOO.COM" previously + would stringify as "http://foo.com/" and now it will stringify + as "HTTP://FOO.COM". To restore the old behavior you can call + $uri->canonical. (Schwern) + + Distribution Changes + * No longer using URI::URL. (Schwern) + * Now requires URI 1.60 for Unicode support. (Schwern) + + + 20140702 Wed Jul 2 13:41:47 PDT 2014 + New Features + * IDNA (aka Unicode) domains are now supported. [github 3] (GwenDragon) + * The list of TLDs for schemeless matching has been updated. [github 3] (GwenDragon) + + Bug Fixes + * Handle balanced [], {} and quotes in addition to (). [rt.cpan.org 85053] (Schwern) + * Don't mangle IPv6 URLs. [rt.cpan.org 85053] (Schwern) + * Schemeless is more accurate about two letter TLDs. [github 3] (GwenDragon) + + Distribution Changes + * Switched the issue tracker to Github. (Schwern) + +------------------------------------------------------------------- Old: ---- URI-Find-20111103.tar.gz New: ---- URI-Find-20140709.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ perl-URI-Find.spec ++++++ --- /var/tmp/diff_new_pack.IuVx3G/_old 2014-09-17 17:26:17.000000000 +0200 +++ /var/tmp/diff_new_pack.IuVx3G/_new 2014-09-17 17:26:17.000000000 +0200 @@ -1,7 +1,7 @@ # # spec file for package perl-URI-Find # -# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: perl-URI-Find -Version: 20111103 +Version: 20140709 Release: 0 %define cpan_name URI-Find Summary: Find URIs in arbitrary text @@ -31,16 +31,13 @@ BuildRequires: perl-macros BuildRequires: perl(Module::Build) >= 0.30 BuildRequires: perl(Test::More) >= 0.88 -BuildRequires: perl(URI) >= 1.00 -BuildRequires: perl(URI::URL) >= 5.00 -#BuildRequires: perl(URI::Find) -Requires: perl(URI) >= 1.00 -Requires: perl(URI::URL) >= 5.00 +BuildRequires: perl(URI) >= 1.60 +Requires: perl(URI) >= 1.60 %{perl_requires} %description This module does one thing: Finds URIs and URLs in plain text. It finds -them quickly and it finds them *all* (or what URI::URL considers a URI to +them quickly and it finds them *all* (or what URI.pm considers a URI to be.) It only finds URIs which include a scheme (http:// or the like), for something a bit less strict have a look at URI::Find::Schemeless. ++++++ URI-Find-20111103.tar.gz -> URI-Find-20140709.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/Build.PL new/URI-Find-20140709/Build.PL --- old/URI-Find-20111103/Build.PL 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/Build.PL 2014-07-10 01:32:18.000000000 +0200 @@ -17,9 +17,8 @@ }, requires => { - perl => '5.6.0', - URI => '1.00', - URI::URL => '5.00', + perl => '5.8.9', + URI => '1.60', }, license => 'perl', @@ -29,8 +28,8 @@ meta_merge => { resources => { homepage => 'http://search.cpan.org/dist/URI-Find', - bugtracker => 'http://rt.cpan.org/Public/Dist/Display.html?Name=URI-Find', - repository => 'http://github.com/schwern/uri-find/tree/master', + bugtracker => 'http://github.com/schwern/URI-Find/issues/', + repository => 'http://github.com/schwern/URI-Find/', } }, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/Changes new/URI-Find-20140709/Changes --- old/URI-Find-20111103/Changes 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/Changes 2014-07-10 01:32:18.000000000 +0200 @@ -1,3 +1,39 @@ +20140709 Wed Jul 9 16:28:37 PDT 2014 + New Features + * The "git" scheme is supported. (Schwern) + * svn, ssh and svn+ssh schemes are supported. [rt.cpan.org 57490] (Schwern) + * Added a --schemeless option to urifind. (Schwern) + + Bug Fixes + * http:// is no longer matched [rt.cpan.org 63283] (Schwern) + + Backwards Incompatibilities + * Previously, URIs stringified to their canonical version. Now + they stringify as written. This results in less loss of + information. For example. "Blah HTTP:://FOO.COM" previously + would stringify as "http://foo.com/" and now it will stringify + as "HTTP://FOO.COM". To restore the old behavior you can call + $uri->canonical. (Schwern) + + Distribution Changes + * No longer using URI::URL. (Schwern) + * Now requires URI 1.60 for Unicode support. (Schwern) + + +20140702 Wed Jul 2 13:41:47 PDT 2014 + New Features + * IDNA (aka Unicode) domains are now supported. [github 3] (GwenDragon) + * The list of TLDs for schemeless matching has been updated. [github 3] (GwenDragon) + + Bug Fixes + * Handle balanced [], {} and quotes in addition to (). [rt.cpan.org 85053] (Schwern) + * Don't mangle IPv6 URLs. [rt.cpan.org 85053] (Schwern) + * Schemeless is more accurate about two letter TLDs. [github 3] (GwenDragon) + + Distribution Changes + * Switched the issue tracker to Github. (Schwern) + + 20111103 Thu Nov 3 12:14:21 PDT 2011 Bug Fixes * URI::URL::strict will no longer leak out of find() if the callback diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/INSTALL new/URI-Find-20140709/INSTALL --- old/URI-Find-20111103/INSTALL 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/INSTALL 2014-07-10 01:32:18.000000000 +0200 @@ -22,7 +22,7 @@ WHAT VERSION OF PERL DO I NEED? - perl 5.6.0 or higher + perl 5.8.9 or higher WHAT MODULES DO I NEED? @@ -30,11 +30,10 @@ To build, test and install the module you need: Module::Build 0.30 or higher - Test::More 0.82 or higher + Test::More 0.88 or higher To run the module you need: - URI.pm 1.00 or higher - URI::URL 5.00 or higher + URI 1.60 or higher They can all be found on http://search.cpan.org/ or by running your CPAN shell. \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/MANIFEST new/URI-Find-20140709/MANIFEST --- old/URI-Find-20111103/MANIFEST 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/MANIFEST 2014-07-10 01:32:18.000000000 +0200 @@ -15,9 +15,7 @@ t/is_schemed.t t/load-schemeless.t t/rfc3986_appendix_c.t -t/strict_leak.t t/urifind/find.t t/urifind/pod.t t/urifind/sciencenews TODO -SIGNATURE Added here by Module::Build diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/MANIFEST.SKIP new/URI-Find-20140709/MANIFEST.SKIP --- old/URI-Find-20111103/MANIFEST.SKIP 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/MANIFEST.SKIP 2014-07-10 01:32:18.000000000 +0200 @@ -60,4 +60,8 @@ # Avoid patches and diff files lying around \.patch$ -\.diff$ \ No newline at end of file +\.diff$ + + +# Don't ship the Travis config. +^\.travis\.yml$ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/META.json new/URI-Find-20140709/META.json --- old/URI-Find-20111103/META.json 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/META.json 2014-07-10 01:32:18.000000000 +0200 @@ -4,7 +4,7 @@ "Michael G Schwern <schwern@pobox.com>" ], "dynamic_config" : 1, - "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112621", + "generated_by" : "Module::Build version 0.4205", "license" : [ "perl_5" ], @@ -27,34 +27,33 @@ }, "runtime" : { "requires" : { - "URI" : "1.00", - "URI::URL" : "5.00", - "perl" : "v5.6.0" + "URI" : "1.60", + "perl" : "v5.8.9" } } }, "provides" : { "URI::Find" : { "file" : "lib/URI/Find.pm", - "version" : "20111103" + "version" : "20140709" }, "URI::Find::Schemeless" : { "file" : "lib/URI/Find/Schemeless.pm", - "version" : "20111103" + "version" : "20140709" } }, "release_status" : "stable", "resources" : { "bugtracker" : { - "web" : "http://rt.cpan.org/Public/Dist/Display.html?Name=URI-Find" + "web" : "http://github.com/schwern/URI-Find/issues/" }, "homepage" : "http://search.cpan.org/dist/URI-Find", "license" : [ "http://dev.perl.org/licenses/" ], "repository" : { - "url" : "http://github.com/schwern/uri-find/tree/master" + "url" : "http://github.com/schwern/URI-Find/" } }, - "version" : "20111103" + "version" : "20140709" } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/META.yml new/URI-Find-20140709/META.yml --- old/URI-Find-20111103/META.yml 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/META.yml 2014-07-10 01:32:18.000000000 +0200 @@ -3,31 +3,30 @@ author: - 'Michael G Schwern <schwern@pobox.com>' build_requires: - Module::Build: 0.30 - Test::More: 0.88 + Module::Build: '0.30' + Test::More: '0.88' configure_requires: - Module::Build: 0.30 + Module::Build: '0.30' dynamic_config: 1 -generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112621' +generated_by: 'Module::Build version 0.4205, CPAN::Meta::Converter version 2.141520' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 + version: '1.4' name: URI-Find provides: URI::Find: file: lib/URI/Find.pm - version: 20111103 + version: '20140709' URI::Find::Schemeless: file: lib/URI/Find/Schemeless.pm - version: 20111103 + version: '20140709' requires: - URI: 1.00 - URI::URL: 5.00 - perl: v5.6.0 + URI: '1.60' + perl: v5.8.9 resources: - bugtracker: http://rt.cpan.org/Public/Dist/Display.html?Name=URI-Find + bugtracker: http://github.com/schwern/URI-Find/issues/ homepage: http://search.cpan.org/dist/URI-Find license: http://dev.perl.org/licenses/ - repository: http://github.com/schwern/uri-find/tree/master -version: 20111103 + repository: http://github.com/schwern/URI-Find/ +version: '20140709' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/SIGNATURE new/URI-Find-20140709/SIGNATURE --- old/URI-Find-20111103/SIGNATURE 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/SIGNATURE 1970-01-01 01:00:00.000000000 +0100 @@ -1,45 +0,0 @@ -This file contains message digests of all files listed in MANIFEST, -signed via the Module::Signature module, version 0.68. - -To verify the content in this distribution, first make sure you have -Module::Signature installed, then type: - - % cpansign -v - -It will check each file's integrity, as well as the signature's -validity. If "==> Signature verified OK! <==" is not displayed, -the distribution may already have been compromised, and you should -not run its Makefile.PL or Build.PL. - ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -SHA1 4980e7e39e8e37b06249f58f54e7dee8055be6cc Build.PL -SHA1 03a01e797601036531f85a24245ca2cfe5d9d413 Changes -SHA1 3ca0307f0585442c160041b8d8f3472359735108 INSTALL -SHA1 90bf8962c7a2396772f77235465ee2fc36b1cb99 MANIFEST -SHA1 fe61e38edb3e406107b3953fd0b9b10dd7b85823 MANIFEST.SKIP -SHA1 6edfdc3c57b4a3a4f68b6515af705c35809d4d08 META.json -SHA1 a754caa1786c7111a0d98b66a2af0b08afef1b19 META.yml -SHA1 fc72946137d28d945b8c7027b4a49a001ec49cd1 README -SHA1 7dc0589de524cbd4c983c5cd6e9da58fd474b34a TODO -SHA1 a08831e8073d4182201c06d2db1c25247b70de9d bin/urifind -SHA1 091ecea74f29c23dae2c2c0edb01a62e86333d9c lib/URI/Find.pm -SHA1 26a9b46160b195bfead3edf5ae76d6c770b0b76c lib/URI/Find/Schemeless.pm -SHA1 c0d4570cd9b83c474cb3761a206edc139653f7f7 t/Find.t -SHA1 789861fdf1ab6465748f8cacafcef27ee70a7ac9 t/filter.t -SHA1 1071febaa25419c5cdb2580bf87ee81834e70132 t/html.t -SHA1 2c057ac42eb47f6b7da78c3b7ebb20b94f33e719 t/is_schemed.t -SHA1 a0fdf62d822e769d80b229bb88f1a013f6ab0964 t/load-schemeless.t -SHA1 0c5eb1bda18407bdf26b8831a08cad4a14938082 t/rfc3986_appendix_c.t -SHA1 8fd6bf632cfd8a654a1edc06ffa2ea4fbdd4647b t/strict_leak.t -SHA1 6aaf29926da83d3c369cf28969bd6c48df8deff4 t/urifind/find.t -SHA1 9ff9e4e6fef205eebead0f792da79dedd61b4b7e t/urifind/pod.t -SHA1 0cb2627de8403934f9893ed2e86145e7c372402c t/urifind/sciencenews ------BEGIN PGP SIGNATURE----- -Version: GnuPG v2.0.18 (Darwin) - -iEYEARECAAYFAk6y6EsACgkQWMohlhD1QydrhwCfd3gNydcSbENAcdiG7aPCPD+D -QgwAnieFByYg0uQKTzSA7o5ITOXRy+gP -=IcdT ------END PGP SIGNATURE----- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/bin/urifind new/URI-Find-20140709/bin/urifind --- old/URI-Find-20111103/bin/urifind 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/bin/urifind 2014-07-10 01:32:18.000000000 +0200 @@ -7,12 +7,13 @@ use strict; -our $VERSION = 20111103; +our $VERSION = 20140709; use File::Basename qw(basename); use Getopt::Long qw(GetOptions); use IO::File; use URI::Find; +use URI::Find::Schemeless; # What to do, and how my $help = 0; @@ -25,19 +26,23 @@ my @pats = (); my @schemes = (); my $dump = 0; +my $schemeless = 0; Getopt::Long::Configure(qw{no_ignore_case bundling}); -GetOptions('s!' => \$sort, - 'u!' => \$unique, - 'p!' => \$prefix, - 'n!' => \$noprefix, - 'r!' => \$reverse, - 'h!' => \$help, - 'v!' => \$version, - 'd!' => sub { $dump = 1 }, - 'D!' => sub { $dump = 2 }, - 'P=s@' => \@pats, - 'S=s@' => \@schemes); +GetOptions( + 's!' => \$sort, + 'u!' => \$unique, + 'p!' => \$prefix, + 'n!' => \$noprefix, + 'r!' => \$reverse, + 'h!' => \$help, + 'v!' => \$version, + 'd!' => sub { $dump = 1 }, + 'D!' => sub { $dump = 2 }, + 'P=s@' => \@pats, + 'S=s@' => \@schemes, + 'schemeless!' => \$schemeless, +); if ($help || $version) { my $prog = basename($0); @@ -122,7 +127,8 @@ $name = $argv; } - my $finder = URI::Find->new(sub { push @uris => [ $name, $_[0] ] }); + my $class = $schemeless ? "URI::Find::Schemeless" : "URI::Find"; + my $finder = $class->new(sub { push @uris => [ $name, $_[0] ] }); $finder->find(\$data); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/lib/URI/Find/Schemeless.pm new/URI-Find-20140709/lib/URI/Find/Schemeless.pm --- old/URI-Find-20111103/lib/URI/Find/Schemeless.pm 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/lib/URI/Find/Schemeless.pm 2014-07-10 01:32:18.000000000 +0200 @@ -12,9 +12,9 @@ use URI::Find (); use vars qw($VERSION); -$VERSION = 20111103; +$VERSION = 20140709; -my($dnsSet) = 'A-Za-z0-9-'; +my($dnsSet) = '\p{isAlpha}A-Za-z0-9-'; # extended for IDNA domains my($cruftSet) = __PACKAGE__->cruft_set . '<>?}'; @@ -87,7 +87,8 @@ @_ == 1 || __PACKAGE__->badinvo; my($self) = shift; - # Updated from http://www.iana.org/domains/root/db/ + use utf8; + # Updated from http://www.iana.org/domains/root/db/ with new TLDs my $plain = join '|', qw( AERO ARPA @@ -110,9 +111,660 @@ PRO TEL TRAVEL + ac + academy + accountants + active + actor + ad + ae + aero + af + ag + agency + ai + airforce + al + am + an + ao + aq + ar + archi + army + arpa + as + asia + associates + at + attorney + au + audio + autos + aw + ax + axa + az + ba + bar + bargains + bayern + bb + bd + be + beer + berlin + best + bf + bg + bh + bi + bid + bike + bio + biz + bj + bl + black + blackfriday + blue + bm + bmw + bn + bo + boutique + bq + br + brussels + bs + bt + build + builders + buzz + bv + bw + by + bz + bzh + ca + cab + camera + camp + capetown + capital + cards + care + career + careers + cash + cat + catering + cc + cd + center + ceo + cf + cg + ch + cheap + christmas + church + ci + citic + ck + cl + claims + cleaning + clinic + clothing + club + cm + cn + co + codes + coffee + college + cologne + com + community + company + computer + condos + construction + consulting + contractors + cooking + cool + coop + country + cr + credit + creditcard + cruises + cu + cv + cw + cx + cy + cz + dance + dating + de + degree + democrat + dental + dentist + desi + diamonds + digital + directory + discount + dj + dk + dm + dnp + do + domains + durban + dz + ec + edu + education + ee + eg + eh + email + engineer + engineering + enterprises + equipment + er + es + estate + et + eu + eus + events + exchange + expert + exposed + fail + farm + feedback + fi + finance + financial + fish + fishing + fitness + fj + fk + flights + florist + fm + fo + foo + foundation + fr + frogans + fund + furniture + futbol + ga + gal + gallery + gb + gd + ge + gf + gg + gh + gi + gift + gives + gl + glass + global + globo + gm + gmo + gn + gop + gov + gp + gq + gr + graphics + gratis + green + gripe + gs + gt + gu + guide + guitars + guru + gw + gy + hamburg + haus + hiphop + hiv + hk + hm + hn + holdings + holiday + homes + horse + host + house + hr + ht + hu + id + ie + il + im + immobilien + in + industries + info + ink + institute + insure + int + international + investments + io + iq + ir + is + it + je + jetzt + jm + jo + jobs + joburg + jp + juegos + kaufen + ke + kg + kh + ki + kim + kitchen + kiwi + km + kn + koeln + kp + kr + kred + kw + ky + kz + la + land + lawyer + lb + lc + lease + li + life + lighting + limited + limo + link + lk + loans + london + lotto + lr + ls + lt + lu + luxe + luxury + lv + ly + ma + maison + management + mango + market + marketing + mc + md + me + media + meet + menu + mf + mg + mh + miami + mil + mini + mk + ml + mm + mn + mo + mobi + moda + moe + monash + mortgage + moscow + motorcycles + mp + mq + mr + ms + mt + mu + museum + mv + mw + mx + my + mz + na + nagoya + name + navy + nc + ne + net + neustar + nf + ng + nhk + ni + ninja + nl + no + np + nr + nu + nyc + nz + okinawa + om + onl + org + organic + ovh + pa + paris + partners + parts + pe + pf + pg + ph + photo + photography + photos + physio + pics + pictures + pink + pk + pl + plumbing + pm + pn + post + pr + press + pro + productions + properties + ps + pt + pub + pw + py + qa + qpon + quebec + re + recipes + red + rehab + reise + reisen + ren + rentals + repair + report + republican + rest + reviews + rich + rio + ro + rocks + rodeo + rs + ru + ruhr + rw + ryukyu + sa + saarland + sb + sc + schule + scot + sd + se + services + sexy + sg + sh + shiksha + shoes + si + singles + sj + sk + sl + sm + sn + so + social + software + sohu + solar + solutions + soy + space + sr + ss + st + su + supplies + supply + support + surf + surgery + sv + sx + sy + systems + sz + tattoo + tax + tc + td + technology + tel + tf + tg + th + tienda + tips + tirol + tj + tk + tl + tm + tn + to + today + tokyo + tools + town + toys + tp + tr + trade + training + travel + tt + tv + tw + tz + ua + ug + uk + um + university + uno + us + uy + uz + va + vacations + vc + ve + vegas + ventures + versicherung + vet + vg + vi + viajes + villas + vision + vlaanderen + vn + vodka + vote + voting + voto + voyage + vu + wang + watch + webcam + website + wed + wf + wien + wiki + works + ws + wtc + wtf + 测试 + परीक्षा + 集团 + 在线 + 한국 + ভারত + موقع + বাংলা + 公益 + 公司 + 移动 + 我爱你 + москва + испытание + қаз + онлайн + сайт + срб + 테스트 + орг + 삼성 + சிங்கப்பூர் + 商标 + 商城 + дети + мкд + טעסט + 中文网 + 中信 + 中国 + 中國 + భారత్ + ලංකා + 測試 + ભારત + भारत + آزمایشی + பரிட்சை + संगठन + 网络 + укр + 香港 + δοκιμή + إختبار + 台湾 + 台灣 + мон + الجزائر + عمان + ایران + امارات + بازار + پاکستان + الاردن + بھارت + المغرب + السعودية + سودان + مليسيا + شبكة + გე + 机构 + 组织机构 + ไทย + سورية + рф + تونس + みんな + 世界 + ਭਾਰਤ + 网址 + 游戏 + مصر + قطر + இலங்கை + இந்தியா + 新加坡 + فلسطين + テスト + 政务 + xxx + xyz + yachts + ye + yokohama + yt + za + zm + zone + zw ); - - return qr/(?:[a-z]{2}|$plain)/i; + + return qr/(?:$plain)/i; } =head1 AUTHOR diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/lib/URI/Find.pm new/URI-Find-20140709/lib/URI/Find.pm --- old/URI-Find-20111103/lib/URI/Find.pm 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/lib/URI/Find.pm 2014-07-10 01:32:18.000000000 +0200 @@ -10,21 +10,28 @@ use base qw(Exporter); use vars qw($VERSION @EXPORT); -$VERSION = 20111103; +$VERSION = 20140709; @EXPORT = qw(find_uris); use constant YES => (1==1); use constant NO => !YES; use Carp qw(croak); -use URI::URL; require URI; +my $reserved = q(;/?:@&=+$,[]); +my $mark = q(-_.!~*'()); +my $unreserved = "A-Za-z0-9\Q$mark\E"; +my $uric = quotemeta($reserved) . '\p{isAlpha}' . $unreserved . "%"; + # URI scheme pattern without the non-alpha numerics. # Those are extremely uncommon and interfere with the match. -my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9]*/; -my($uricSet) = $URI::uric; +my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9\+]*/; +my($uricSet) = $uric; # use new set + +# Some schemes which URI.pm does not explicitly support. +my $extraSchemesRe = qr{^(?:git|svn|ssh|svn\+ssh)$}; # We need to avoid picking up 'HTTP::Request::Common' so we have a # subset of uric without a colon ("I have no colon and yet I must poop") @@ -32,7 +39,7 @@ $uricCheat =~ tr/://d; # Identifying characters accidentally picked up with a URI. -my($cruftSet) = q{]),.'";}; #'# +my($cruftSet) = q{])\},.'";}; #'# =head1 NAME @@ -49,10 +56,10 @@ =head1 DESCRIPTION -This module does one thing: Finds URIs and URLs in plain text. It finds -them quickly and it finds them B<all> (or what URI::URL considers a URI -to be.) It only finds URIs which include a scheme (http:// or the -like), for something a bit less strict have a look at +This module does one thing: Finds URIs and URLs in plain text. It +finds them quickly and it finds them B<all> (or what URI.pm considers +a URI to be.) It only finds URIs which include a scheme (http:// or +the like), for something a bit less strict have a look at L<URI::Find::Schemeless|URI::Find::Schemeless>. For a command-line interface, L<urifind> is provided. @@ -68,10 +75,9 @@ Creates a new URI::Find object. &callback is a function which is called on each URI found. It is -passed two arguments, the first is a URI::URL object representing the -URI found. The second is the original text of the URI found. The -return value of the callback will replace the original URI in the -text. +passed two arguments, the first is a URI object representing the URI +found. The second is the original text of the URI found. The return +value of the callback will replace the original URI in the text. =cut @@ -120,7 +126,7 @@ $self->{_uris_found} = 0; # Yes, evil. Basically, look for something vaguely resembling a URL, - # then hand it off to URI::URL for examination. If it passes, throw + # then hand it off to URI for examination. If it passes, throw # it to a callback and put the result in its place. local $SIG{__DIE__} = 'DEFAULT'; my $uri_cand; @@ -310,6 +316,14 @@ =cut +my %balanced_cruft = ( + '(' => ')', + '{' => '}', + '[' => ']', + '"' => '"', + q['] => q['], +); + sub decruft { @_ == 2 || __PACKAGE__->badinvo; my($self, $orig_match) = @_; @@ -326,11 +340,8 @@ $cruft =~ s/^;//; } - my $opening = $orig_match =~ tr/(/(/; - my $closing = $orig_match =~ tr/)/)/; - if ( $cruft =~ /\)$/ && $opening == ( $closing + 1 ) ) { - $orig_match .= ')'; - $cruft =~ s/\)$//; + while( my($open, $close) = each %balanced_cruft ) { + $self->recruft_balanced(\$orig_match, \$cruft, $open, $close); } $self->{end_cruft} = $cruft if $cruft; @@ -339,6 +350,23 @@ return $orig_match; } + +sub recruft_balanced { + my $self = shift; + my($orig_match, $cruft, $open, $close) = @_; + + my $open_count = () = $$orig_match =~ m{\Q$open}g; + my $close_count = () = $$orig_match =~ m{\Q$close}g; + + if ( $$cruft =~ /\Q$close\E$/ && $open_count == ( $close_count + 1 ) ) { + $$orig_match .= $close; + $$cruft =~ s/\Q$close\E$//; + } + + return; +} + + =item B<recruft> my $uri = $self->recruft($uri); @@ -492,15 +520,18 @@ $uri =~ $self->schemeless_uri_re and $uri !~ /^<?$schemeRe:/; - # Set strict to avoid bogus schemes - my $old_strict = URI::URL::strict(1); - eval { - $uri = URI::URL->new($uri); - }; + $uri = URI->new($uri); - # And restore it - URI::URL::strict($old_strict); + # Throw out anything with an invalid scheme. + my $has_invalid_scheme = $uri->isa("URI::_foreign") && + $uri->scheme !~ $extraSchemesRe; + + # Toss out things like http:// but keep file:/// + my $is_empty = $uri =~ m{^$schemeRe://$}; + + undef $uri if $has_invalid_scheme || $is_empty; + }; if($@ || !defined $uri) { # leave everything untouched, its not a URI. return NO; @@ -539,8 +570,7 @@ =head1 SEE ALSO -L<urifind>, L<URI::Find::Schemeless>, L<URI::URL>, L<URI>, -RFC 3986 Appendix C +L<urifind>, L<URI::Find::Schemeless>, L<URI>, RFC 3986 Appendix C =cut diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/t/Find.t new/URI-Find-20140709/t/Find.t --- old/URI-Find-20111103/t/Find.t 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/t/Find.t 2014-07-10 01:32:18.000000000 +0200 @@ -2,6 +2,7 @@ use strict; +use open ':std', ':encoding(utf8)'; use Test::More 'no_plan'; use_ok 'URI::Find'; @@ -53,29 +54,28 @@ my %Tests; BEGIN { my $all = join '', keys %Run; - - # ARGH! URI::URL is inconsistant in how it normalizes URLs! - # HTTP URLs get a trailing slash, FTP and gopher do not. + + use utf8; %Tests = ( 'Something something something.travel and stuff' - => [[ S => 'http://something.travel/' ]], - '<URL:http://www.perl.com>' => 'http://www.perl.com/', + => [[ S => 'http://something.travel' ]], + '<URL:http://www.perl.com>' => 'http://www.perl.com', '<ftp://ftp.site.org>' => 'ftp://ftp.site.org', '<ftp.site.org>' => [[ S => 'ftp://ftp.site.org' ]], 'Make sure "http://www.foo.com" is caught' => - 'http://www.foo.com/', - 'http://www.foo.com' => 'http://www.foo.com/', - 'www.foo.com' => [[ S => 'http://www.foo.com/' ]], + 'http://www.foo.com', + 'http://www.foo.com' => 'http://www.foo.com', + 'www.foo.com' => [[ S => 'http://www.foo.com' ]], 'ftp.foo.com' => [[ S => 'ftp://ftp.foo.com' ]], 'gopher://moo.foo.com' => 'gopher://moo.foo.com', 'I saw this site, http://www.foo.com, and its really neat!' - => 'http://www.foo.com/', + => 'http://www.foo.com', 'Foo Industries (at http://www.foo.com)' - => 'http://www.foo.com/', + => 'http://www.foo.com', 'Oh, dear. Another message from Dejanews. http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25 How fun.' => 'http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25', 'Hmmm, Storyserver from news.com. http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-2... How nice.' - => [[S => 'http://news.com/'], + => [[S => 'http://news.com'], [$all => 'http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-2...']], '$html = get("http://www.perl.com/");' => 'http://www.perl.com/', q|my $url = url('http://www.perl.com/cgi-bin/cpan_mod');| @@ -83,36 +83,65 @@ 'http://www.perl.org/support/online_support.html#mail' => 'http://www.perl.org/support/online_support.html#mail', 'irc.lightning.net irc.mcs.net' - => [[S => 'http://irc.lightning.net/'], - [S => 'http://irc.mcs.net/']], - 'foo.bar.xx/~baz/', - => [[S => 'http://foo.bar.xx/~baz/']], + => [[S => 'http://irc.lightning.net'], + [S => 'http://irc.mcs.net']], + 'foo.bar.xx/~baz/' => [], 'foo.bar.xx/~baz/ abcd.efgh.mil, none.such/asdf/ hi.there.org' - => [[S => 'http://foo.bar.xx/~baz/'], - [S => 'http://abcd.efgh.mil/'], - [S => 'http://hi.there.org/']], + => [[S => 'http://abcd.efgh.mil'], + [S => 'http://hi.there.org']], 'foo:<1.2.3.4>' - => [[S => 'http://1.2.3.4/']], + => [[S => 'http://1.2.3.4']], 'mail.eserv.com.au? failed before ? designated end' - => [[S => 'http://mail.eserv.com.au/']], + => [[S => 'http://mail.eserv.com.au']], 'foo.info/himom ftp.bar.biz' => [[S => 'http://foo.info/himom'], [S => 'ftp://ftp.bar.biz']], - '(http://round.com)' => 'http://round.com/', - '[http://square.com]' => 'http://square.com/', - '{http://brace.com}' => 'http://brace.com/', - '<http://angle.com>' => 'http://angle.com/', - '(round.com)' => [[S => 'http://round.com/' ]], - '[square.com]' => [[S => 'http://square.com/' ]], - '{brace.com}' => [[S => 'http://brace.com/' ]], - '<angle.com>' => [[S => 'http://angle.com/' ]], - '<x>intag.com</x>' => [[S => 'http://intag.com/' ]], + '(http://round.com)' => 'http://round.com', + '[http://square.com]' => 'http://square.com', + '{http://brace.com}' => 'http://brace.com', + '<http://angle.com>' => 'http://angle.com', + '(round.com)' => [[S => 'http://round.com' ]], + '[square.com]' => [[S => 'http://square.com' ]], + '{brace.com}' => [[S => 'http://brace.com' ]], + '<angle.com>' => [[S => 'http://angle.com' ]], + '<x>intag.com</x>' => [[S => 'http://intag.com' ]], '[mailto:somebody@company.ext]' => 'mailto:somebody@company.ext', - 'HTtp://MIXED-Case.Com' => 'http://mixed-case.com/', + 'HTtp://MIXED-Case.Com' => 'HTtp://MIXED-Case.Com', "The technology of magnetic energy has become so powerful an entire ". "house can...http://bit.ly/8yEdeb" => "http://bit.ly/8yEdeb", 'http://www.foo.com/bar((baz)blah)' => 'http://www.foo.com/bar((baz)blah)', + 'https://[2607:5300:60:1509::228d:413a]' => 'https://[2607:5300:60:1509::228d:413a]', + '[https://[2607:5300:60:1509::228d:413a]]' => 'https://[2607:5300:60:1509::228d:413a]', + + # Tests for file: + "origin file:///Users/schwern/devel/URI-Find/ (fetch)" + => 'file:///Users/schwern/devel/URI-Find/', + "This is how you express the root path file:/// as a URL" + => 'file:///', + + # Tests for git: + 'GwenDragon git://github.com/GwenDragon/uri-find.git (fetch)' + => 'git://github.com/GwenDragon/uri-find.git', + + # Tests for svn+ssh: + "URLs like svn+ssh://example.net aren't found" + => 'svn+ssh://example.net', + + # Tests for IDNA domains + 'http://müller.de' => 'http://xn--mller-kva.de', + 'http://موقع.وزارة-الاتصالات.مصر' => 'http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c', + 'http://правительство.рф' => 'http://xn--80aealotwbjpid2k.xn--p1ai', + 'http://北京大学.中國' => 'http://xn--1lq90ic7fzpc.xn--fiqz9s', + 'http://北京大学.cn' => 'http://xn--1lq90ic7fzpc.cn', + + # Test new TLDs + 'http://my.test.transport' => 'http://my.test.transport', + 'http://regierung.bayern' => 'http://regierung.bayern', + 'http://kaiser-senf.gmbh/shop/' => 'http://kaiser-senf.gmbh/shop/', + 'Have vacation in lovely Bavaria and visit tourist.in.bayern to go to King Ludwig New Schwanstein. For political information see website regierung.bayern to get more.' + => [[S => 'http://tourist.in.bayern' ], [S => 'http://regierung.bayern' ]], + 'The mießlich-österlich-mück.ag was established in 2032 by M. Ostrich.' => [[S => 'http://xn--mielich-sterlich-mck-dwb52cye.ag' ]], # False tests 'HTTP::Request::Common' => [], @@ -125,6 +154,9 @@ 'x comp.ai.nat-lang libdb.so.3 x' => [], 'x comp.ai.nat-lang libdb.so.3 x' => [], 'www.marselisl www.info@skive-hallerne.dk' => [], + 'bogusscheme://foo.com/' => [], + 'http:' => [], + 'http://' => [], # XXX broken # q{$url = 'http://'.rand(1000000).'@anonymizer.com/'.$url;} # => [], @@ -197,13 +229,3 @@ while( my($text, $rspec_list) = each %Tests ) { run $text, @$rspec_list; } - -# We used to turn URI::URL strict on and leave it on. - -for my $val (0, 1) { - URI::URL::strict($val); - my $f = URI::Find->new(sub { }); - my $t = "foo"; - $f->find(\$t); - is $val, URI::URL::strict(), "URI::URL::strict $val"; -} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/t/strict_leak.t new/URI-Find-20140709/t/strict_leak.t --- old/URI-Find-20111103/t/strict_leak.t 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/t/strict_leak.t 1970-01-01 01:00:00.000000000 +0100 @@ -1,35 +0,0 @@ -#!/usr/bin/env perl -w - -# Test that URI::URL::strict does not remain on if a callback or filter dies. -# rt.cpan.org 71153 - -use strict; -use warnings; - -use Test::More; - -use URI::Find; - -note "with a dying callback"; { - my $text = "Foo http://example.com bar"; - my $finder = URI::Find->new( sub { die; } ); - - URI::URL::strict(0); - ok !URI::URL::strict(); - ok !eval { $finder->find(\$text); 1 }; - ok !URI::URL::strict(); -} - - -note "with a dying filter"; { - my $text = "Foo http://example.com bar"; - my $finder = URI::Find->new( sub {} ); - - URI::URL::strict(0); - ok !URI::URL::strict(); - ok !eval { $finder->find(\$text, sub { die; }); 1 }; - ok !URI::URL::strict(); -} - - -done_testing; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/t/urifind/find.t new/URI-Find-20140709/t/urifind/find.t --- old/URI-Find-20111103/t/urifind/find.t 2011-11-03 20:15:23.000000000 +0100 +++ new/URI-Find-20140709/t/urifind/find.t 2014-07-10 01:32:18.000000000 +0200 @@ -5,8 +5,6 @@ use Test::More; use File::Spec; -plan tests => 13; - ok(my $ifile = File::Spec->catfile(qw(t urifind sciencenews)), "Test file found"); my $urifind = File::Spec->catfile(qw(blib script urifind)); @@ -42,3 +40,28 @@ @data = `$^X $urifind -S http -P \.org $ifile`; is(@data, 8, "Correct number elements when invoked with -P \.org -S http"); + +@data = `$^X $urifind --schemeless $ifile`; +chomp @data; +is_deeply \@data, [map { "$_" } qw( + http://66.33.90.123 + http://efwd.dnsix.com + mailto:eletter@lists.sciencenews.org + mailto:eletter-help@lists.sciencenews.org + mailto:eletter-unsubscribe@lists.sciencenews.org + mailto:eletter-subscribe@lists.sciencenews.org + http://www.sciencenews.org + http://www.sciencenews.org/20030705/fob1.asp + http://www.sciencenews.org/20030705/fob5.asp + http://www.sciencenews.org/20030705/bob8.asp + http://www.sciencenews.org/20030705/mathtrek.asp + http://www.sciencenews.org/20030705/food.asp + http://www.sciencenews.org + http://www.sciencenews.org/20030705/toc.asp + http://www.sciencenews.org + http://www.sciencenews.org/20030705/fob2.asp + http://www.sciencenews.org/20030705/fob3.asp + http:/% +)]; + +done_testing; -- To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org For additional commands, e-mail: opensuse-commit+help@opensuse.org
participants (1)
-
root@hilbert.suse.de