Hello community,
here is the log from the commit of package python-urlgrabber
checked in at Sun Aug 6 21:58:16 CEST 2006.
--------
--- python-urlgrabber/python-urlgrabber.changes 2006-05-25 14:27:40.000000000 +0200
+++ python-urlgrabber/python-urlgrabber.changes 2006-08-04 17:28:05.000000000 +0200
@@ -1,0 +2,9 @@
+Fri Aug 4 17:25:18 CEST 2006 - cthiel@suse.de
+
+- update to version 2.9.10
+ * Make keepalive, byteranges, etc. work with https.
+ * Fixed a minor error reporting bug due to changes in python 2.4.
+ * Catch read errors after the file has been opened.
+- removed obsolete urlgrabber-read-error.patch
+
+-------------------------------------------------------------------
Old:
----
urlgrabber-2.9.9.tar.bz2
urlgrabber-read-error.patch
New:
----
urlgrabber-2.9.10.tar.bz2
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-urlgrabber.spec ++++++
--- /var/tmp/diff_new_pack.Ta0nNT/_old 2006-08-06 21:58:11.000000000 +0200
+++ /var/tmp/diff_new_pack.Ta0nNT/_new 2006-08-06 21:58:11.000000000 +0200
@@ -1,5 +1,5 @@
#
-# spec file for package python-urlgrabber (Version 2.9.9)
+# spec file for package python-urlgrabber (Version 2.9.10)
#
# Copyright (c) 2006 SUSE LINUX Products GmbH, Nuernberg, Germany.
# This file and all modifications and additions to the pristine
@@ -12,15 +12,14 @@
Name: python-urlgrabber
BuildRequires: python-devel
-Version: 2.9.9
+Version: 2.9.10
Release: 1
Summary: A high-level cross-protocol url-grabber
Group: Development/Libraries/Python
License: LGPL
URL: http://linux.duke.edu/projects/urlgrabber/
Source: urlgrabber-%{version}.tar.bz2
-Patch: %{name}-%{version}.patch
-Patch1: urlgrabber-read-error.patch
+Patch: %{name}-2.9.9.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-build
%py_requires
@@ -40,7 +39,6 @@
%prep
%setup -q -n urlgrabber-%{version}
%patch
-%patch1
%build
export CFLAGS="$RPM_OPT_FLAGS"
@@ -59,6 +57,12 @@
%{py_sitedir}/urlgrabber
%changelog -n python-urlgrabber
+* Fri Aug 04 2006 - cthiel@suse.de
+- update to version 2.9.10
+ * Make keepalive, byteranges, etc. work with https.
+ * Fixed a minor error reporting bug due to changes in python 2.4.
+ * Catch read errors after the file has been opened.
+- removed obsolete urlgrabber-read-error.patch
* Thu May 25 2006 - cthiel@suse.de
- update to version 2.9.9
* Added tests to make sure that the "quote" option works as advertised
++++++ urlgrabber-2.9.9.tar.bz2 -> urlgrabber-2.9.10.tar.bz2 ++++++
diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/urlgrabber-2.9.9/ChangeLog new/urlgrabber-2.9.10/ChangeLog
--- old/urlgrabber-2.9.9/ChangeLog 2006-03-02 22:06:42.000000000 +0100
+++ new/urlgrabber-2.9.10/ChangeLog 2006-07-20 22:17:32.000000000 +0200
@@ -1,3 +1,35 @@
+2006-07-20 Michael D. Stenner
+
+ * urlgrabber/__init__.py:
+
+ release 2.9.10
+
+2006-07-20 Michael D. Stenner
+
+ * urlgrabber/: byterange.py, grabber.py, keepalive.py:
+
+ Added patch from James Bowes to make keepalive, byteranges, etc.
+ work with https (oops!)
+
+2006-04-04 Michael D. Stenner
+
+ * urlgrabber/keepalive.py:
+
+ Fixed a minor error reporting bug due to changes in python 2.4.
+
+2006-03-22 Michael D. Stenner
+
+ * urlgrabber/grabber.py:
+
+ Patch from Jeremy Katz to catch read errors after the file has been
+ opened.
+
+2006-03-02 Michael D. Stenner
+
+ * ChangeLog:
+
+ updated ChangeLog
+
2006-03-02 Michael D. Stenner
* urlgrabber/__init__.py:
diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/urlgrabber-2.9.9/PKG-INFO new/urlgrabber-2.9.10/PKG-INFO
--- old/urlgrabber-2.9.9/PKG-INFO 2006-03-02 22:06:52.000000000 +0100
+++ new/urlgrabber-2.9.10/PKG-INFO 2006-07-20 22:17:42.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: urlgrabber
-Version: 2.9.9
+Version: 2.9.10
Summary: A high-level cross-protocol url-grabber
Home-page: http://linux.duke.edu/projects/urlgrabber/
Author: Michael D. Stenner, Ryan Tomayko
diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/urlgrabber-2.9.9/urlgrabber/__init__.py new/urlgrabber-2.9.10/urlgrabber/__init__.py
--- old/urlgrabber-2.9.9/urlgrabber/__init__.py 2006-03-02 22:06:35.000000000 +0100
+++ new/urlgrabber-2.9.10/urlgrabber/__init__.py 2006-07-20 22:17:24.000000000 +0200
@@ -14,7 +14,7 @@
# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
-# $Id: __init__.py,v 1.17 2006/03/02 21:06:35 mstenner Exp $
+# $Id: __init__.py,v 1.18 2006/07/20 20:17:24 mstenner Exp $
"""A high-level cross-protocol url-grabber.
@@ -44,8 +44,8 @@
automatically switching mirrors if there is a failure.
"""
-__version__ = '2.9.9'
-__date__ = '2006/03/02'
+__version__ = '2.9.10'
+__date__ = '2006/07/20'
__author__ = 'Michael D. Stenner , ' \
'Ryan Tomayko '
__url__ = 'http://linux.duke.edu/projects/urlgrabber/'
diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/urlgrabber-2.9.9/urlgrabber/byterange.py new/urlgrabber-2.9.10/urlgrabber/byterange.py
--- old/urlgrabber-2.9.9/urlgrabber/byterange.py 2005-10-22 23:57:28.000000000 +0200
+++ new/urlgrabber-2.9.10/urlgrabber/byterange.py 2006-07-20 22:15:58.000000000 +0200
@@ -17,7 +17,7 @@
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-# $Id: byterange.py,v 1.11 2005/10/22 21:57:28 mstenner Exp $
+# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $
import os
import stat
@@ -71,6 +71,15 @@
# HTTP's Range Not Satisfiable error
raise RangeError('Requested Range Not Satisfiable')
+class HTTPSRangeHandler(HTTPRangeHandler):
+ """ Range Header support for HTTPS. """
+
+ def https_error_206(self, req, fp, code, msg, hdrs):
+ return self.http_error_206(req, fp, code, msg, hdrs)
+
+ def https_error_416(self, req, fp, code, msg, hdrs):
+ self.https_error_416(req, fp, code, msg, hdrs)
+
class RangeableFileObject:
"""File object wrapper to enable raw range handling.
This was implemented primarilary for handling range
diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/urlgrabber-2.9.9/urlgrabber/grabber.py new/urlgrabber-2.9.10/urlgrabber/grabber.py
--- old/urlgrabber-2.9.9/urlgrabber/grabber.py 2006-03-02 21:56:57.000000000 +0100
+++ new/urlgrabber-2.9.10/urlgrabber/grabber.py 2006-07-20 22:15:58.000000000 +0200
@@ -364,7 +364,7 @@
"""
-# $Id: grabber.py,v 1.45 2006/03/02 20:56:57 mstenner Exp $
+# $Id: grabber.py,v 1.47 2006/07/20 20:15:58 mstenner Exp $
import os
import os.path
@@ -402,24 +402,25 @@
# This is a convenient way to make keepalive optional.
# Just rename the module so it can't be imported.
import keepalive
- from keepalive import HTTPHandler
+ from keepalive import HTTPHandler, HTTPSHandler
except ImportError, msg:
- keepalive_handler = None
+ keepalive_handlers = ()
else:
- keepalive_handler = HTTPHandler()
+ keepalive_handlers = (HTTPHandler(), HTTPSHandler())
try:
# add in range support conditionally too
import byterange
- from byterange import HTTPRangeHandler, FileRangeHandler, \
- FTPRangeHandler, range_tuple_normalize, range_tuple_to_header, \
- RangeError
+ from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
+ FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
+ range_tuple_to_header, RangeError
except ImportError, msg:
range_handlers = ()
RangeError = None
have_range = 0
else:
- range_handlers = (HTTPRangeHandler(), FileRangeHandler(), FTPRangeHandler())
+ range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
+ FileRangeHandler(), FTPRangeHandler())
have_range = 1
@@ -454,7 +455,7 @@
global DEBUG
DEBUG = DBOBJ
- if keepalive_handler and keepalive.DEBUG is None:
+ if keepalive_handlers and keepalive.DEBUG is None:
keepalive.DEBUG = DBOBJ
if have_range and byterange.DEBUG is None:
byterange.DEBUG = DBOBJ
@@ -582,7 +583,8 @@
def close_all():
"""close any open keepalive connections"""
- if keepalive_handler: keepalive_handler.close_all()
+ for handler in keepalive_handlers:
+ handler.close_all()
def urlgrab(url, filename=None, **kwargs):
"""grab the file at <url> and make a local copy at <filename>
@@ -1012,7 +1014,7 @@
return self.opts.opener
elif self._opener is None:
handlers = []
- need_keepalive_handler = (keepalive_handler and self.opts.keepalive)
+ need_keepalive_handler = (keepalive_handlers and self.opts.keepalive)
need_range_handler = (range_handlers and \
(self.opts.range or self.opts.reget))
# if you specify a ProxyHandler when creating the opener
@@ -1043,7 +1045,7 @@
# -------------------------------------------------------
if need_keepalive_handler:
- handlers.append( keepalive_handler )
+ handlers.extend( keepalive_handlers )
if need_range_handler:
handlers.extend( range_handlers )
handlers.append( auth_handler )
@@ -1247,6 +1249,8 @@
raise URLGrabError(4, _('Socket Error: %s') % (e, ))
except TimeoutError, e:
raise URLGrabError(12, _('Timeout: %s') % (e, ))
+ except IOError, e:
+ raise URLGrabError(4, _('IOError: %s') %(e,))
newsize = len(new)
if not newsize: break # no more to read
diff -urN --exclude=CVS --exclude=.cvsignore --exclude=.svn --exclude=.svnignore old/urlgrabber-2.9.9/urlgrabber/keepalive.py new/urlgrabber-2.9.10/urlgrabber/keepalive.py
--- old/urlgrabber-2.9.9/urlgrabber/keepalive.py 2005-10-22 23:57:28.000000000 +0200
+++ new/urlgrabber-2.9.10/urlgrabber/keepalive.py 2006-07-20 22:15:58.000000000 +0200
@@ -99,7 +99,7 @@
"""
-# $Id: keepalive.py,v 1.13 2005/10/22 21:57:28 mstenner Exp $
+# $Id: keepalive.py,v 1.15 2006/07/20 20:15:58 mstenner Exp $
import urllib2
import httplib
@@ -172,7 +172,7 @@
else:
return dict(self._hostmap)
-class HTTPHandler(urllib2.HTTPHandler):
+class KeepAliveHandler:
def __init__(self):
self._cm = ConnectionManager()
@@ -207,9 +207,6 @@
self._cm.remove(connection)
#### Transaction Execution
- def http_open(self, req):
- return self.do_open(HTTPConnection, req)
-
def do_open(self, http_class, req):
host = req.get_host()
if not host:
@@ -249,12 +246,14 @@
r._url = req.get_full_url()
r._connection = h
r.code = r.status
+ r.headers = r.msg
+ r.msg = r.reason
if r.status == 200 or not HANDLE_ERRORS:
return r
else:
- return self.parent.error('http', req, r, r.status, r.reason, r.msg)
-
+ return self.parent.error('http', req, r,
+ r.status, r.msg, r.headers)
def _reuse_connection(self, h, req, host):
"""start the transaction with a re-used connection
@@ -322,6 +321,20 @@
if req.has_data():
h.send(data)
+class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
+ def __init__(self):
+ KeepAliveHandler.__init__(self)
+
+ def http_open(self, req):
+ return self.do_open(HTTPConnection, req)
+
+class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
+ def __init__(self):
+ KeepAliveHandler.__init__(self)
+
+ def https_open(self, req):
+ return self.do_open(HTTPSConnection, req)
+
class HTTPResponse(httplib.HTTPResponse):
# we need to subclass HTTPResponse in order to
# 1) add readline() and readlines() methods
@@ -371,7 +384,7 @@
self.close()
def info(self):
- return self.msg
+ return self.headers
def geturl(self):
return self._url
@@ -423,6 +436,9 @@
class HTTPConnection(httplib.HTTPConnection):
# use the modified response class
response_class = HTTPResponse
+
+class HTTPSConnection(httplib.HTTPSConnection):
+ response_class = HTTPResponse
#########################################################################
##### TEST FUNCTIONS
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Remember to have fun...