Hello community,
here is the log from the commit of package perl-URI-Find for openSUSE:Factory checked in at 2014-09-17 17:25:56
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/perl-URI-Find (Old)
and /work/SRC/openSUSE:Factory/.perl-URI-Find.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "perl-URI-Find"
Changes:
--------
--- /work/SRC/openSUSE:Factory/perl-URI-Find/perl-URI-Find.changes 2011-12-22 10:54:49.000000000 +0100
+++ /work/SRC/openSUSE:Factory/.perl-URI-Find.new/perl-URI-Find.changes 2014-09-17 17:26:16.000000000 +0200
@@ -1,0 +2,38 @@
+Mon Sep 15 15:44:32 UTC 2014 - coolo@suse.com
+
+- updated to 20140709
+ New Features
+ * The "git" scheme is supported. (Schwern)
+ * svn, ssh and svn+ssh schemes are supported. [rt.cpan.org 57490] (Schwern)
+ * Added a --schemeless option to urifind. (Schwern)
+
+ Bug Fixes
+ * http:// is no longer matched [rt.cpan.org 63283] (Schwern)
+
+ Backwards Incompatibilities
+ * Previously, URIs stringified to their canonical version. Now
+ they stringify as written. This results in less loss of
+ information. For example. "Blah HTTP:://FOO.COM" previously
+ would stringify as "http://foo.com/" and now it will stringify
+ as "HTTP://FOO.COM". To restore the old behavior you can call
+ $uri->canonical. (Schwern)
+
+ Distribution Changes
+ * No longer using URI::URL. (Schwern)
+ * Now requires URI 1.60 for Unicode support. (Schwern)
+
+
+ 20140702 Wed Jul 2 13:41:47 PDT 2014
+ New Features
+ * IDNA (aka Unicode) domains are now supported. [github 3] (GwenDragon)
+ * The list of TLDs for schemeless matching has been updated. [github 3] (GwenDragon)
+
+ Bug Fixes
+ * Handle balanced [], {} and quotes in addition to (). [rt.cpan.org 85053] (Schwern)
+ * Don't mangle IPv6 URLs. [rt.cpan.org 85053] (Schwern)
+ * Schemeless is more accurate about two letter TLDs. [github 3] (GwenDragon)
+
+ Distribution Changes
+ * Switched the issue tracker to Github. (Schwern)
+
+-------------------------------------------------------------------
Old:
----
URI-Find-20111103.tar.gz
New:
----
URI-Find-20140709.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ perl-URI-Find.spec ++++++
--- /var/tmp/diff_new_pack.IuVx3G/_old 2014-09-17 17:26:17.000000000 +0200
+++ /var/tmp/diff_new_pack.IuVx3G/_new 2014-09-17 17:26:17.000000000 +0200
@@ -1,7 +1,7 @@
#
# spec file for package perl-URI-Find
#
-# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
+# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -17,7 +17,7 @@
Name: perl-URI-Find
-Version: 20111103
+Version: 20140709
Release: 0
%define cpan_name URI-Find
Summary: Find URIs in arbitrary text
@@ -31,16 +31,13 @@
BuildRequires: perl-macros
BuildRequires: perl(Module::Build) >= 0.30
BuildRequires: perl(Test::More) >= 0.88
-BuildRequires: perl(URI) >= 1.00
-BuildRequires: perl(URI::URL) >= 5.00
-#BuildRequires: perl(URI::Find)
-Requires: perl(URI) >= 1.00
-Requires: perl(URI::URL) >= 5.00
+BuildRequires: perl(URI) >= 1.60
+Requires: perl(URI) >= 1.60
%{perl_requires}
%description
This module does one thing: Finds URIs and URLs in plain text. It finds
-them quickly and it finds them *all* (or what URI::URL considers a URI to
+them quickly and it finds them *all* (or what URI.pm considers a URI to
be.) It only finds URIs which include a scheme (http:// or the like), for
something a bit less strict have a look at URI::Find::Schemeless.
++++++ URI-Find-20111103.tar.gz -> URI-Find-20140709.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/Build.PL new/URI-Find-20140709/Build.PL
--- old/URI-Find-20111103/Build.PL 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/Build.PL 2014-07-10 01:32:18.000000000 +0200
@@ -17,9 +17,8 @@
},
requires => {
- perl => '5.6.0',
- URI => '1.00',
- URI::URL => '5.00',
+ perl => '5.8.9',
+ URI => '1.60',
},
license => 'perl',
@@ -29,8 +28,8 @@
meta_merge => {
resources => {
homepage => 'http://search.cpan.org/dist/URI-Find',
- bugtracker => 'http://rt.cpan.org/Public/Dist/Display.html?Name=URI-Find',
- repository => 'http://github.com/schwern/uri-find/tree/master',
+ bugtracker => 'http://github.com/schwern/URI-Find/issues/',
+ repository => 'http://github.com/schwern/URI-Find/',
}
},
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/Changes new/URI-Find-20140709/Changes
--- old/URI-Find-20111103/Changes 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/Changes 2014-07-10 01:32:18.000000000 +0200
@@ -1,3 +1,39 @@
+20140709 Wed Jul 9 16:28:37 PDT 2014
+ New Features
+ * The "git" scheme is supported. (Schwern)
+ * svn, ssh and svn+ssh schemes are supported. [rt.cpan.org 57490] (Schwern)
+ * Added a --schemeless option to urifind. (Schwern)
+
+ Bug Fixes
+ * http:// is no longer matched [rt.cpan.org 63283] (Schwern)
+
+ Backwards Incompatibilities
+ * Previously, URIs stringified to their canonical version. Now
+ they stringify as written. This results in less loss of
+ information. For example. "Blah HTTP:://FOO.COM" previously
+ would stringify as "http://foo.com/" and now it will stringify
+ as "HTTP://FOO.COM". To restore the old behavior you can call
+ $uri->canonical. (Schwern)
+
+ Distribution Changes
+ * No longer using URI::URL. (Schwern)
+ * Now requires URI 1.60 for Unicode support. (Schwern)
+
+
+20140702 Wed Jul 2 13:41:47 PDT 2014
+ New Features
+ * IDNA (aka Unicode) domains are now supported. [github 3] (GwenDragon)
+ * The list of TLDs for schemeless matching has been updated. [github 3] (GwenDragon)
+
+ Bug Fixes
+ * Handle balanced [], {} and quotes in addition to (). [rt.cpan.org 85053] (Schwern)
+ * Don't mangle IPv6 URLs. [rt.cpan.org 85053] (Schwern)
+ * Schemeless is more accurate about two letter TLDs. [github 3] (GwenDragon)
+
+ Distribution Changes
+ * Switched the issue tracker to Github. (Schwern)
+
+
20111103 Thu Nov 3 12:14:21 PDT 2011
Bug Fixes
* URI::URL::strict will no longer leak out of find() if the callback
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/INSTALL new/URI-Find-20140709/INSTALL
--- old/URI-Find-20111103/INSTALL 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/INSTALL 2014-07-10 01:32:18.000000000 +0200
@@ -22,7 +22,7 @@
WHAT VERSION OF PERL DO I NEED?
- perl 5.6.0 or higher
+ perl 5.8.9 or higher
WHAT MODULES DO I NEED?
@@ -30,11 +30,10 @@
To build, test and install the module you need:
Module::Build 0.30 or higher
- Test::More 0.82 or higher
+ Test::More 0.88 or higher
To run the module you need:
- URI.pm 1.00 or higher
- URI::URL 5.00 or higher
+ URI 1.60 or higher
They can all be found on http://search.cpan.org/ or by running your CPAN shell.
\ No newline at end of file
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/MANIFEST new/URI-Find-20140709/MANIFEST
--- old/URI-Find-20111103/MANIFEST 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/MANIFEST 2014-07-10 01:32:18.000000000 +0200
@@ -15,9 +15,7 @@
t/is_schemed.t
t/load-schemeless.t
t/rfc3986_appendix_c.t
-t/strict_leak.t
t/urifind/find.t
t/urifind/pod.t
t/urifind/sciencenews
TODO
-SIGNATURE Added here by Module::Build
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/MANIFEST.SKIP new/URI-Find-20140709/MANIFEST.SKIP
--- old/URI-Find-20111103/MANIFEST.SKIP 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/MANIFEST.SKIP 2014-07-10 01:32:18.000000000 +0200
@@ -60,4 +60,8 @@
# Avoid patches and diff files lying around
\.patch$
-\.diff$
\ No newline at end of file
+\.diff$
+
+
+# Don't ship the Travis config.
+^\.travis\.yml$
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/META.json new/URI-Find-20140709/META.json
--- old/URI-Find-20111103/META.json 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/META.json 2014-07-10 01:32:18.000000000 +0200
@@ -4,7 +4,7 @@
"Michael G Schwern "
],
"dynamic_config" : 1,
- "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112621",
+ "generated_by" : "Module::Build version 0.4205",
"license" : [
"perl_5"
],
@@ -27,34 +27,33 @@
},
"runtime" : {
"requires" : {
- "URI" : "1.00",
- "URI::URL" : "5.00",
- "perl" : "v5.6.0"
+ "URI" : "1.60",
+ "perl" : "v5.8.9"
}
}
},
"provides" : {
"URI::Find" : {
"file" : "lib/URI/Find.pm",
- "version" : "20111103"
+ "version" : "20140709"
},
"URI::Find::Schemeless" : {
"file" : "lib/URI/Find/Schemeless.pm",
- "version" : "20111103"
+ "version" : "20140709"
}
},
"release_status" : "stable",
"resources" : {
"bugtracker" : {
- "web" : "http://rt.cpan.org/Public/Dist/Display.html?Name=URI-Find"
+ "web" : "http://github.com/schwern/URI-Find/issues/"
},
"homepage" : "http://search.cpan.org/dist/URI-Find",
"license" : [
"http://dev.perl.org/licenses/"
],
"repository" : {
- "url" : "http://github.com/schwern/uri-find/tree/master"
+ "url" : "http://github.com/schwern/URI-Find/"
}
},
- "version" : "20111103"
+ "version" : "20140709"
}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/META.yml new/URI-Find-20140709/META.yml
--- old/URI-Find-20111103/META.yml 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/META.yml 2014-07-10 01:32:18.000000000 +0200
@@ -3,31 +3,30 @@
author:
- 'Michael G Schwern '
build_requires:
- Module::Build: 0.30
- Test::More: 0.88
+ Module::Build: '0.30'
+ Test::More: '0.88'
configure_requires:
- Module::Build: 0.30
+ Module::Build: '0.30'
dynamic_config: 1
-generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112621'
+generated_by: 'Module::Build version 0.4205, CPAN::Meta::Converter version 2.141520'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
+ version: '1.4'
name: URI-Find
provides:
URI::Find:
file: lib/URI/Find.pm
- version: 20111103
+ version: '20140709'
URI::Find::Schemeless:
file: lib/URI/Find/Schemeless.pm
- version: 20111103
+ version: '20140709'
requires:
- URI: 1.00
- URI::URL: 5.00
- perl: v5.6.0
+ URI: '1.60'
+ perl: v5.8.9
resources:
- bugtracker: http://rt.cpan.org/Public/Dist/Display.html?Name=URI-Find
+ bugtracker: http://github.com/schwern/URI-Find/issues/
homepage: http://search.cpan.org/dist/URI-Find
license: http://dev.perl.org/licenses/
- repository: http://github.com/schwern/uri-find/tree/master
-version: 20111103
+ repository: http://github.com/schwern/URI-Find/
+version: '20140709'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/SIGNATURE new/URI-Find-20140709/SIGNATURE
--- old/URI-Find-20111103/SIGNATURE 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/SIGNATURE 1970-01-01 01:00:00.000000000 +0100
@@ -1,45 +0,0 @@
-This file contains message digests of all files listed in MANIFEST,
-signed via the Module::Signature module, version 0.68.
-
-To verify the content in this distribution, first make sure you have
-Module::Signature installed, then type:
-
- % cpansign -v
-
-It will check each file's integrity, as well as the signature's
-validity. If "==> Signature verified OK! <==" is not displayed,
-the distribution may already have been compromised, and you should
-not run its Makefile.PL or Build.PL.
-
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA1
-
-SHA1 4980e7e39e8e37b06249f58f54e7dee8055be6cc Build.PL
-SHA1 03a01e797601036531f85a24245ca2cfe5d9d413 Changes
-SHA1 3ca0307f0585442c160041b8d8f3472359735108 INSTALL
-SHA1 90bf8962c7a2396772f77235465ee2fc36b1cb99 MANIFEST
-SHA1 fe61e38edb3e406107b3953fd0b9b10dd7b85823 MANIFEST.SKIP
-SHA1 6edfdc3c57b4a3a4f68b6515af705c35809d4d08 META.json
-SHA1 a754caa1786c7111a0d98b66a2af0b08afef1b19 META.yml
-SHA1 fc72946137d28d945b8c7027b4a49a001ec49cd1 README
-SHA1 7dc0589de524cbd4c983c5cd6e9da58fd474b34a TODO
-SHA1 a08831e8073d4182201c06d2db1c25247b70de9d bin/urifind
-SHA1 091ecea74f29c23dae2c2c0edb01a62e86333d9c lib/URI/Find.pm
-SHA1 26a9b46160b195bfead3edf5ae76d6c770b0b76c lib/URI/Find/Schemeless.pm
-SHA1 c0d4570cd9b83c474cb3761a206edc139653f7f7 t/Find.t
-SHA1 789861fdf1ab6465748f8cacafcef27ee70a7ac9 t/filter.t
-SHA1 1071febaa25419c5cdb2580bf87ee81834e70132 t/html.t
-SHA1 2c057ac42eb47f6b7da78c3b7ebb20b94f33e719 t/is_schemed.t
-SHA1 a0fdf62d822e769d80b229bb88f1a013f6ab0964 t/load-schemeless.t
-SHA1 0c5eb1bda18407bdf26b8831a08cad4a14938082 t/rfc3986_appendix_c.t
-SHA1 8fd6bf632cfd8a654a1edc06ffa2ea4fbdd4647b t/strict_leak.t
-SHA1 6aaf29926da83d3c369cf28969bd6c48df8deff4 t/urifind/find.t
-SHA1 9ff9e4e6fef205eebead0f792da79dedd61b4b7e t/urifind/pod.t
-SHA1 0cb2627de8403934f9893ed2e86145e7c372402c t/urifind/sciencenews
------BEGIN PGP SIGNATURE-----
-Version: GnuPG v2.0.18 (Darwin)
-
-iEYEARECAAYFAk6y6EsACgkQWMohlhD1QydrhwCfd3gNydcSbENAcdiG7aPCPD+D
-QgwAnieFByYg0uQKTzSA7o5ITOXRy+gP
-=IcdT
------END PGP SIGNATURE-----
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/bin/urifind new/URI-Find-20140709/bin/urifind
--- old/URI-Find-20111103/bin/urifind 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/bin/urifind 2014-07-10 01:32:18.000000000 +0200
@@ -7,12 +7,13 @@
use strict;
-our $VERSION = 20111103;
+our $VERSION = 20140709;
use File::Basename qw(basename);
use Getopt::Long qw(GetOptions);
use IO::File;
use URI::Find;
+use URI::Find::Schemeless;
# What to do, and how
my $help = 0;
@@ -25,19 +26,23 @@
my @pats = ();
my @schemes = ();
my $dump = 0;
+my $schemeless = 0;
Getopt::Long::Configure(qw{no_ignore_case bundling});
-GetOptions('s!' => \$sort,
- 'u!' => \$unique,
- 'p!' => \$prefix,
- 'n!' => \$noprefix,
- 'r!' => \$reverse,
- 'h!' => \$help,
- 'v!' => \$version,
- 'd!' => sub { $dump = 1 },
- 'D!' => sub { $dump = 2 },
- 'P=s@' => \@pats,
- 'S=s@' => \@schemes);
+GetOptions(
+ 's!' => \$sort,
+ 'u!' => \$unique,
+ 'p!' => \$prefix,
+ 'n!' => \$noprefix,
+ 'r!' => \$reverse,
+ 'h!' => \$help,
+ 'v!' => \$version,
+ 'd!' => sub { $dump = 1 },
+ 'D!' => sub { $dump = 2 },
+ 'P=s@' => \@pats,
+ 'S=s@' => \@schemes,
+ 'schemeless!' => \$schemeless,
+);
if ($help || $version) {
my $prog = basename($0);
@@ -122,7 +127,8 @@
$name = $argv;
}
- my $finder = URI::Find->new(sub { push @uris => [ $name, $_[0] ] });
+ my $class = $schemeless ? "URI::Find::Schemeless" : "URI::Find";
+ my $finder = $class->new(sub { push @uris => [ $name, $_[0] ] });
$finder->find(\$data);
}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/lib/URI/Find/Schemeless.pm new/URI-Find-20140709/lib/URI/Find/Schemeless.pm
--- old/URI-Find-20111103/lib/URI/Find/Schemeless.pm 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/lib/URI/Find/Schemeless.pm 2014-07-10 01:32:18.000000000 +0200
@@ -12,9 +12,9 @@
use URI::Find ();
use vars qw($VERSION);
-$VERSION = 20111103;
+$VERSION = 20140709;
-my($dnsSet) = 'A-Za-z0-9-';
+my($dnsSet) = '\p{isAlpha}A-Za-z0-9-'; # extended for IDNA domains
my($cruftSet) = __PACKAGE__->cruft_set . '<>?}';
@@ -87,7 +87,8 @@
@_ == 1 || __PACKAGE__->badinvo;
my($self) = shift;
- # Updated from http://www.iana.org/domains/root/db/
+ use utf8;
+ # Updated from http://www.iana.org/domains/root/db/ with new TLDs
my $plain = join '|', qw(
AERO
ARPA
@@ -110,9 +111,660 @@
PRO
TEL
TRAVEL
+ ac
+ academy
+ accountants
+ active
+ actor
+ ad
+ ae
+ aero
+ af
+ ag
+ agency
+ ai
+ airforce
+ al
+ am
+ an
+ ao
+ aq
+ ar
+ archi
+ army
+ arpa
+ as
+ asia
+ associates
+ at
+ attorney
+ au
+ audio
+ autos
+ aw
+ ax
+ axa
+ az
+ ba
+ bar
+ bargains
+ bayern
+ bb
+ bd
+ be
+ beer
+ berlin
+ best
+ bf
+ bg
+ bh
+ bi
+ bid
+ bike
+ bio
+ biz
+ bj
+ bl
+ black
+ blackfriday
+ blue
+ bm
+ bmw
+ bn
+ bo
+ boutique
+ bq
+ br
+ brussels
+ bs
+ bt
+ build
+ builders
+ buzz
+ bv
+ bw
+ by
+ bz
+ bzh
+ ca
+ cab
+ camera
+ camp
+ capetown
+ capital
+ cards
+ care
+ career
+ careers
+ cash
+ cat
+ catering
+ cc
+ cd
+ center
+ ceo
+ cf
+ cg
+ ch
+ cheap
+ christmas
+ church
+ ci
+ citic
+ ck
+ cl
+ claims
+ cleaning
+ clinic
+ clothing
+ club
+ cm
+ cn
+ co
+ codes
+ coffee
+ college
+ cologne
+ com
+ community
+ company
+ computer
+ condos
+ construction
+ consulting
+ contractors
+ cooking
+ cool
+ coop
+ country
+ cr
+ credit
+ creditcard
+ cruises
+ cu
+ cv
+ cw
+ cx
+ cy
+ cz
+ dance
+ dating
+ de
+ degree
+ democrat
+ dental
+ dentist
+ desi
+ diamonds
+ digital
+ directory
+ discount
+ dj
+ dk
+ dm
+ dnp
+ do
+ domains
+ durban
+ dz
+ ec
+ edu
+ education
+ ee
+ eg
+ eh
+ email
+ engineer
+ engineering
+ enterprises
+ equipment
+ er
+ es
+ estate
+ et
+ eu
+ eus
+ events
+ exchange
+ expert
+ exposed
+ fail
+ farm
+ feedback
+ fi
+ finance
+ financial
+ fish
+ fishing
+ fitness
+ fj
+ fk
+ flights
+ florist
+ fm
+ fo
+ foo
+ foundation
+ fr
+ frogans
+ fund
+ furniture
+ futbol
+ ga
+ gal
+ gallery
+ gb
+ gd
+ ge
+ gf
+ gg
+ gh
+ gi
+ gift
+ gives
+ gl
+ glass
+ global
+ globo
+ gm
+ gmo
+ gn
+ gop
+ gov
+ gp
+ gq
+ gr
+ graphics
+ gratis
+ green
+ gripe
+ gs
+ gt
+ gu
+ guide
+ guitars
+ guru
+ gw
+ gy
+ hamburg
+ haus
+ hiphop
+ hiv
+ hk
+ hm
+ hn
+ holdings
+ holiday
+ homes
+ horse
+ host
+ house
+ hr
+ ht
+ hu
+ id
+ ie
+ il
+ im
+ immobilien
+ in
+ industries
+ info
+ ink
+ institute
+ insure
+ int
+ international
+ investments
+ io
+ iq
+ ir
+ is
+ it
+ je
+ jetzt
+ jm
+ jo
+ jobs
+ joburg
+ jp
+ juegos
+ kaufen
+ ke
+ kg
+ kh
+ ki
+ kim
+ kitchen
+ kiwi
+ km
+ kn
+ koeln
+ kp
+ kr
+ kred
+ kw
+ ky
+ kz
+ la
+ land
+ lawyer
+ lb
+ lc
+ lease
+ li
+ life
+ lighting
+ limited
+ limo
+ link
+ lk
+ loans
+ london
+ lotto
+ lr
+ ls
+ lt
+ lu
+ luxe
+ luxury
+ lv
+ ly
+ ma
+ maison
+ management
+ mango
+ market
+ marketing
+ mc
+ md
+ me
+ media
+ meet
+ menu
+ mf
+ mg
+ mh
+ miami
+ mil
+ mini
+ mk
+ ml
+ mm
+ mn
+ mo
+ mobi
+ moda
+ moe
+ monash
+ mortgage
+ moscow
+ motorcycles
+ mp
+ mq
+ mr
+ ms
+ mt
+ mu
+ museum
+ mv
+ mw
+ mx
+ my
+ mz
+ na
+ nagoya
+ name
+ navy
+ nc
+ ne
+ net
+ neustar
+ nf
+ ng
+ nhk
+ ni
+ ninja
+ nl
+ no
+ np
+ nr
+ nu
+ nyc
+ nz
+ okinawa
+ om
+ onl
+ org
+ organic
+ ovh
+ pa
+ paris
+ partners
+ parts
+ pe
+ pf
+ pg
+ ph
+ photo
+ photography
+ photos
+ physio
+ pics
+ pictures
+ pink
+ pk
+ pl
+ plumbing
+ pm
+ pn
+ post
+ pr
+ press
+ pro
+ productions
+ properties
+ ps
+ pt
+ pub
+ pw
+ py
+ qa
+ qpon
+ quebec
+ re
+ recipes
+ red
+ rehab
+ reise
+ reisen
+ ren
+ rentals
+ repair
+ report
+ republican
+ rest
+ reviews
+ rich
+ rio
+ ro
+ rocks
+ rodeo
+ rs
+ ru
+ ruhr
+ rw
+ ryukyu
+ sa
+ saarland
+ sb
+ sc
+ schule
+ scot
+ sd
+ se
+ services
+ sexy
+ sg
+ sh
+ shiksha
+ shoes
+ si
+ singles
+ sj
+ sk
+ sl
+ sm
+ sn
+ so
+ social
+ software
+ sohu
+ solar
+ solutions
+ soy
+ space
+ sr
+ ss
+ st
+ su
+ supplies
+ supply
+ support
+ surf
+ surgery
+ sv
+ sx
+ sy
+ systems
+ sz
+ tattoo
+ tax
+ tc
+ td
+ technology
+ tel
+ tf
+ tg
+ th
+ tienda
+ tips
+ tirol
+ tj
+ tk
+ tl
+ tm
+ tn
+ to
+ today
+ tokyo
+ tools
+ town
+ toys
+ tp
+ tr
+ trade
+ training
+ travel
+ tt
+ tv
+ tw
+ tz
+ ua
+ ug
+ uk
+ um
+ university
+ uno
+ us
+ uy
+ uz
+ va
+ vacations
+ vc
+ ve
+ vegas
+ ventures
+ versicherung
+ vet
+ vg
+ vi
+ viajes
+ villas
+ vision
+ vlaanderen
+ vn
+ vodka
+ vote
+ voting
+ voto
+ voyage
+ vu
+ wang
+ watch
+ webcam
+ website
+ wed
+ wf
+ wien
+ wiki
+ works
+ ws
+ wtc
+ wtf
+ 测试
+ परीक्षा
+ 集团
+ 在线
+ 한국
+ ভারত
+ موقع
+ বাংলা
+ 公益
+ 公司
+ 移动
+ 我爱你
+ москва
+ испытание
+ қаз
+ онлайн
+ сайт
+ срб
+ 테스트
+ орг
+ 삼성
+ சிங்கப்பூர்
+ 商标
+ 商城
+ дети
+ мкд
+ טעסט
+ 中文网
+ 中信
+ 中国
+ 中國
+ భారత్
+ ලංකා
+ 測試
+ ભારત
+ भारत
+ آزمایشی
+ பரிட்சை
+ संगठन
+ 网络
+ укр
+ 香港
+ δοκιμή
+ إختبار
+ 台湾
+ 台灣
+ мон
+ الجزائر
+ عمان
+ ایران
+ امارات
+ بازار
+ پاکستان
+ الاردن
+ بھارت
+ المغرب
+ السعودية
+ سودان
+ مليسيا
+ شبكة
+ გე
+ 机构
+ 组织机构
+ ไทย
+ سورية
+ рф
+ تونس
+ みんな
+ 世界
+ ਭਾਰਤ
+ 网址
+ 游戏
+ مصر
+ قطر
+ இலங்கை
+ இந்தியா
+ 新加坡
+ فلسطين
+ テスト
+ 政务
+ xxx
+ xyz
+ yachts
+ ye
+ yokohama
+ yt
+ za
+ zm
+ zone
+ zw
);
-
- return qr/(?:[a-z]{2}|$plain)/i;
+
+ return qr/(?:$plain)/i;
}
=head1 AUTHOR
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/lib/URI/Find.pm new/URI-Find-20140709/lib/URI/Find.pm
--- old/URI-Find-20111103/lib/URI/Find.pm 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/lib/URI/Find.pm 2014-07-10 01:32:18.000000000 +0200
@@ -10,21 +10,28 @@
use base qw(Exporter);
use vars qw($VERSION @EXPORT);
-$VERSION = 20111103;
+$VERSION = 20140709;
@EXPORT = qw(find_uris);
use constant YES => (1==1);
use constant NO => !YES;
use Carp qw(croak);
-use URI::URL;
require URI;
+my $reserved = q(;/?:@&=+$,[]);
+my $mark = q(-_.!~*'());
+my $unreserved = "A-Za-z0-9\Q$mark\E";
+my $uric = quotemeta($reserved) . '\p{isAlpha}' . $unreserved . "%";
+
# URI scheme pattern without the non-alpha numerics.
# Those are extremely uncommon and interfere with the match.
-my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9]*/;
-my($uricSet) = $URI::uric;
+my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9\+]*/;
+my($uricSet) = $uric; # use new set
+
+# Some schemes which URI.pm does not explicitly support.
+my $extraSchemesRe = qr{^(?:git|svn|ssh|svn\+ssh)$};
# We need to avoid picking up 'HTTP::Request::Common' so we have a
# subset of uric without a colon ("I have no colon and yet I must poop")
@@ -32,7 +39,7 @@
$uricCheat =~ tr/://d;
# Identifying characters accidentally picked up with a URI.
-my($cruftSet) = q{]),.'";}; #'#
+my($cruftSet) = q{])\},.'";}; #'#
=head1 NAME
@@ -49,10 +56,10 @@
=head1 DESCRIPTION
-This module does one thing: Finds URIs and URLs in plain text. It finds
-them quickly and it finds them B<all> (or what URI::URL considers a URI
-to be.) It only finds URIs which include a scheme (http:// or the
-like), for something a bit less strict have a look at
+This module does one thing: Finds URIs and URLs in plain text. It
+finds them quickly and it finds them B<all> (or what URI.pm considers
+a URI to be.) It only finds URIs which include a scheme (http:// or
+the like), for something a bit less strict have a look at
LURI::Find::Schemeless|URI::Find::Schemeless.
For a command-line interface, L<urifind> is provided.
@@ -68,10 +75,9 @@
Creates a new URI::Find object.
&callback is a function which is called on each URI found. It is
-passed two arguments, the first is a URI::URL object representing the
-URI found. The second is the original text of the URI found. The
-return value of the callback will replace the original URI in the
-text.
+passed two arguments, the first is a URI object representing the URI
+found. The second is the original text of the URI found. The return
+value of the callback will replace the original URI in the text.
=cut
@@ -120,7 +126,7 @@
$self->{_uris_found} = 0;
# Yes, evil. Basically, look for something vaguely resembling a URL,
- # then hand it off to URI::URL for examination. If it passes, throw
+ # then hand it off to URI for examination. If it passes, throw
# it to a callback and put the result in its place.
local $SIG{__DIE__} = 'DEFAULT';
my $uri_cand;
@@ -310,6 +316,14 @@
=cut
+my %balanced_cruft = (
+ '(' => ')',
+ '{' => '}',
+ '[' => ']',
+ '"' => '"',
+ q['] => q['],
+);
+
sub decruft {
@_ == 2 || __PACKAGE__->badinvo;
my($self, $orig_match) = @_;
@@ -326,11 +340,8 @@
$cruft =~ s/^;//;
}
- my $opening = $orig_match =~ tr/(/(/;
- my $closing = $orig_match =~ tr/)/)/;
- if ( $cruft =~ /\)$/ && $opening == ( $closing + 1 ) ) {
- $orig_match .= ')';
- $cruft =~ s/\)$//;
+ while( my($open, $close) = each %balanced_cruft ) {
+ $self->recruft_balanced(\$orig_match, \$cruft, $open, $close);
}
$self->{end_cruft} = $cruft if $cruft;
@@ -339,6 +350,23 @@
return $orig_match;
}
+
+sub recruft_balanced {
+ my $self = shift;
+ my($orig_match, $cruft, $open, $close) = @_;
+
+ my $open_count = () = $$orig_match =~ m{\Q$open}g;
+ my $close_count = () = $$orig_match =~ m{\Q$close}g;
+
+ if ( $$cruft =~ /\Q$close\E$/ && $open_count == ( $close_count + 1 ) ) {
+ $$orig_match .= $close;
+ $$cruft =~ s/\Q$close\E$//;
+ }
+
+ return;
+}
+
+
=item B<recruft>
my $uri = $self->recruft($uri);
@@ -492,15 +520,18 @@
$uri =~ $self->schemeless_uri_re and
$uri !~ /^$schemeRe:/;
- # Set strict to avoid bogus schemes
- my $old_strict = URI::URL::strict(1);
-
eval {
- $uri = URI::URL->new($uri);
- };
+ $uri = URI->new($uri);
- # And restore it
- URI::URL::strict($old_strict);
+ # Throw out anything with an invalid scheme.
+ my $has_invalid_scheme = $uri->isa("URI::_foreign") &&
+ $uri->scheme !~ $extraSchemesRe;
+
+ # Toss out things like http:// but keep file:///
+ my $is_empty = $uri =~ m{^$schemeRe://$};
+
+ undef $uri if $has_invalid_scheme || $is_empty;
+ };
if($@ || !defined $uri) { # leave everything untouched, its not a URI.
return NO;
@@ -539,8 +570,7 @@
=head1 SEE ALSO
-L<urifind>, LURI::Find::Schemeless, LURI::URL, L<URI>,
-RFC 3986 Appendix C
+L<urifind>, LURI::Find::Schemeless, L<URI>, RFC 3986 Appendix C
=cut
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/t/Find.t new/URI-Find-20140709/t/Find.t
--- old/URI-Find-20111103/t/Find.t 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/t/Find.t 2014-07-10 01:32:18.000000000 +0200
@@ -2,6 +2,7 @@
use strict;
+use open ':std', ':encoding(utf8)';
use Test::More 'no_plan';
use_ok 'URI::Find';
@@ -53,29 +54,28 @@
my %Tests;
BEGIN {
my $all = join '', keys %Run;
-
- # ARGH! URI::URL is inconsistant in how it normalizes URLs!
- # HTTP URLs get a trailing slash, FTP and gopher do not.
+
+ use utf8;
%Tests = (
'Something something something.travel and stuff'
- => [[ S => 'http://something.travel/' ]],
- 'URL:http://www.perl.com' => 'http://www.perl.com/',
+ => [[ S => 'http://something.travel' ]],
+ 'URL:http://www.perl.com' => 'http://www.perl.com',
'ftp://ftp.site.org' => 'ftp://ftp.site.org',
'' => [[ S => 'ftp://ftp.site.org' ]],
'Make sure "http://www.foo.com" is caught' =>
- 'http://www.foo.com/',
- 'http://www.foo.com' => 'http://www.foo.com/',
- 'www.foo.com' => [[ S => 'http://www.foo.com/' ]],
+ 'http://www.foo.com',
+ 'http://www.foo.com' => 'http://www.foo.com',
+ 'www.foo.com' => [[ S => 'http://www.foo.com' ]],
'ftp.foo.com' => [[ S => 'ftp://ftp.foo.com' ]],
'gopher://moo.foo.com' => 'gopher://moo.foo.com',
'I saw this site, http://www.foo.com, and its really neat!'
- => 'http://www.foo.com/',
+ => 'http://www.foo.com',
'Foo Industries (at http://www.foo.com)'
- => 'http://www.foo.com/',
+ => 'http://www.foo.com',
'Oh, dear. Another message from Dejanews. http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25 How fun.'
=> 'http://www.deja.com/%5BST_rn=ps%5D/qs.xp?ST=PS&svcclass=dnyr&QRY=lwall&defaultOp=AND&DBS=1&OP=dnquery.xp&LNG=ALL&subjects=&groups=&authors=&fromdate=&todate=&showsort=score&maxhits=25',
'Hmmm, Storyserver from news.com. http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-2... How nice.'
- => [[S => 'http://news.com/'],
+ => [[S => 'http://news.com'],
[$all => 'http://news.cnet.com/news/0-1004-200-1537811.html?tag=st.ne.1002.thed.1004-2...']],
'$html = get("http://www.perl.com/");' => 'http://www.perl.com/',
q|my $url = url('http://www.perl.com/cgi-bin/cpan_mod');|
@@ -83,36 +83,65 @@
'http://www.perl.org/support/online_support.html#mail'
=> 'http://www.perl.org/support/online_support.html#mail',
'irc.lightning.net irc.mcs.net'
- => [[S => 'http://irc.lightning.net/'],
- [S => 'http://irc.mcs.net/']],
- 'foo.bar.xx/~baz/',
- => [[S => 'http://foo.bar.xx/~baz/']],
+ => [[S => 'http://irc.lightning.net'],
+ [S => 'http://irc.mcs.net']],
+ 'foo.bar.xx/~baz/' => [],
'foo.bar.xx/~baz/ abcd.efgh.mil, none.such/asdf/ hi.there.org'
- => [[S => 'http://foo.bar.xx/~baz/'],
- [S => 'http://abcd.efgh.mil/'],
- [S => 'http://hi.there.org/']],
+ => [[S => 'http://abcd.efgh.mil'],
+ [S => 'http://hi.there.org']],
'foo:<1.2.3.4>'
- => [[S => 'http://1.2.3.4/']],
+ => [[S => 'http://1.2.3.4']],
'mail.eserv.com.au? failed before ? designated end'
- => [[S => 'http://mail.eserv.com.au/']],
+ => [[S => 'http://mail.eserv.com.au']],
'foo.info/himom ftp.bar.biz'
=> [[S => 'http://foo.info/himom'],
[S => 'ftp://ftp.bar.biz']],
- '(http://round.com)' => 'http://round.com/',
- '[http://square.com]' => 'http://square.com/',
- '{http://brace.com}' => 'http://brace.com/',
- 'http://angle.com' => 'http://angle.com/',
- '(round.com)' => [[S => 'http://round.com/' ]],
- '[square.com]' => [[S => 'http://square.com/' ]],
- '{brace.com}' => [[S => 'http://brace.com/' ]],
- '' => [[S => 'http://angle.com/' ]],
- '<x>intag.com</x>' => [[S => 'http://intag.com/' ]],
+ '(http://round.com)' => 'http://round.com',
+ '[http://square.com]' => 'http://square.com',
+ '{http://brace.com}' => 'http://brace.com',
+ 'http://angle.com' => 'http://angle.com',
+ '(round.com)' => [[S => 'http://round.com' ]],
+ '[square.com]' => [[S => 'http://square.com' ]],
+ '{brace.com}' => [[S => 'http://brace.com' ]],
+ '' => [[S => 'http://angle.com' ]],
+ '<x>intag.com</x>' => [[S => 'http://intag.com' ]],
'[mailto:somebody@company.ext]' => 'mailto:somebody@company.ext',
- 'HTtp://MIXED-Case.Com' => 'http://mixed-case.com/',
+ 'HTtp://MIXED-Case.Com' => 'HTtp://MIXED-Case.Com',
"The technology of magnetic energy has become so powerful an entire ".
"house can...http://bit.ly/8yEdeb"
=> "http://bit.ly/8yEdeb",
'http://www.foo.com/bar((baz)blah)' => 'http://www.foo.com/bar((baz)blah)',
+ 'https://[2607:5300:60:1509::228d:413a]' => 'https://[2607:5300:60:1509::228d:413a]',
+ '[https://[2607:5300:60:1509::228d:413a]]' => 'https://[2607:5300:60:1509::228d:413a]',
+
+ # Tests for file:
+ "origin file:///Users/schwern/devel/URI-Find/ (fetch)"
+ => 'file:///Users/schwern/devel/URI-Find/',
+ "This is how you express the root path file:/// as a URL"
+ => 'file:///',
+
+ # Tests for git:
+ 'GwenDragon git://github.com/GwenDragon/uri-find.git (fetch)'
+ => 'git://github.com/GwenDragon/uri-find.git',
+
+ # Tests for svn+ssh:
+ "URLs like svn+ssh://example.net aren't found"
+ => 'svn+ssh://example.net',
+
+ # Tests for IDNA domains
+ 'http://müller.de' => 'http://xn--mller-kva.de',
+ 'http://موقع.وزارة-الاتصالات.مصر' => 'http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c',
+ 'http://правительство.рф' => 'http://xn--80aealotwbjpid2k.xn--p1ai',
+ 'http://北京大学.中國' => 'http://xn--1lq90ic7fzpc.xn--fiqz9s',
+ 'http://北京大学.cn' => 'http://xn--1lq90ic7fzpc.cn',
+
+ # Test new TLDs
+ 'http://my.test.transport' => 'http://my.test.transport',
+ 'http://regierung.bayern' => 'http://regierung.bayern',
+ 'http://kaiser-senf.gmbh/shop/' => 'http://kaiser-senf.gmbh/shop/',
+ 'Have vacation in lovely Bavaria and visit tourist.in.bayern to go to King Ludwig New Schwanstein. For political information see website regierung.bayern to get more.'
+ => [[S => 'http://tourist.in.bayern' ], [S => 'http://regierung.bayern' ]],
+ 'The mießlich-österlich-mück.ag was established in 2032 by M. Ostrich.' => [[S => 'http://xn--mielich-sterlich-mck-dwb52cye.ag' ]],
# False tests
'HTTP::Request::Common' => [],
@@ -125,6 +154,9 @@
'x comp.ai.nat-lang libdb.so.3 x' => [],
'x comp.ai.nat-lang libdb.so.3 x' => [],
'www.marselisl www.info@skive-hallerne.dk' => [],
+ 'bogusscheme://foo.com/' => [],
+ 'http:' => [],
+ 'http://' => [],
# XXX broken
# q{$url = 'http://'.rand(1000000).'@anonymizer.com/'.$url;}
# => [],
@@ -197,13 +229,3 @@
while( my($text, $rspec_list) = each %Tests ) {
run $text, @$rspec_list;
}
-
-# We used to turn URI::URL strict on and leave it on.
-
-for my $val (0, 1) {
- URI::URL::strict($val);
- my $f = URI::Find->new(sub { });
- my $t = "foo";
- $f->find(\$t);
- is $val, URI::URL::strict(), "URI::URL::strict $val";
-}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/t/strict_leak.t new/URI-Find-20140709/t/strict_leak.t
--- old/URI-Find-20111103/t/strict_leak.t 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/t/strict_leak.t 1970-01-01 01:00:00.000000000 +0100
@@ -1,35 +0,0 @@
-#!/usr/bin/env perl -w
-
-# Test that URI::URL::strict does not remain on if a callback or filter dies.
-# rt.cpan.org 71153
-
-use strict;
-use warnings;
-
-use Test::More;
-
-use URI::Find;
-
-note "with a dying callback"; {
- my $text = "Foo http://example.com bar";
- my $finder = URI::Find->new( sub { die; } );
-
- URI::URL::strict(0);
- ok !URI::URL::strict();
- ok !eval { $finder->find(\$text); 1 };
- ok !URI::URL::strict();
-}
-
-
-note "with a dying filter"; {
- my $text = "Foo http://example.com bar";
- my $finder = URI::Find->new( sub {} );
-
- URI::URL::strict(0);
- ok !URI::URL::strict();
- ok !eval { $finder->find(\$text, sub { die; }); 1 };
- ok !URI::URL::strict();
-}
-
-
-done_testing;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/URI-Find-20111103/t/urifind/find.t new/URI-Find-20140709/t/urifind/find.t
--- old/URI-Find-20111103/t/urifind/find.t 2011-11-03 20:15:23.000000000 +0100
+++ new/URI-Find-20140709/t/urifind/find.t 2014-07-10 01:32:18.000000000 +0200
@@ -5,8 +5,6 @@
use Test::More;
use File::Spec;
-plan tests => 13;
-
ok(my $ifile = File::Spec->catfile(qw(t urifind sciencenews)),
"Test file found");
my $urifind = File::Spec->catfile(qw(blib script urifind));
@@ -42,3 +40,28 @@
@data = `$^X $urifind -S http -P \.org $ifile`;
is(@data, 8, "Correct number elements when invoked with -P \.org -S http");
+
+@data = `$^X $urifind --schemeless $ifile`;
+chomp @data;
+is_deeply \@data, [map { "$_" } qw(
+ http://66.33.90.123
+ http://efwd.dnsix.com
+ mailto:eletter@lists.sciencenews.org
+ mailto:eletter-help@lists.sciencenews.org
+ mailto:eletter-unsubscribe@lists.sciencenews.org
+ mailto:eletter-subscribe@lists.sciencenews.org
+ http://www.sciencenews.org
+ http://www.sciencenews.org/20030705/fob1.asp
+ http://www.sciencenews.org/20030705/fob5.asp
+ http://www.sciencenews.org/20030705/bob8.asp
+ http://www.sciencenews.org/20030705/mathtrek.asp
+ http://www.sciencenews.org/20030705/food.asp
+ http://www.sciencenews.org
+ http://www.sciencenews.org/20030705/toc.asp
+ http://www.sciencenews.org
+ http://www.sciencenews.org/20030705/fob2.asp
+ http://www.sciencenews.org/20030705/fob3.asp
+ http:/%
+)];
+
+done_testing;
--
To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org
For additional commands, e-mail: opensuse-commit+help@opensuse.org