commit python-beautifulsoup4 for openSUSE:Factory
Hello community, here is the log from the commit of package python-beautifulsoup4 for openSUSE:Factory checked in at 2017-07-30 11:20:10 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-beautifulsoup4 (Old) and /work/SRC/openSUSE:Factory/.python-beautifulsoup4.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-beautifulsoup4" Sun Jul 30 11:20:10 2017 rev:21 rq:509648 version:4.6.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-beautifulsoup4/python-beautifulsoup4.changes 2017-06-13 16:06:47.885730844 +0200 +++ /work/SRC/openSUSE:Factory/.python-beautifulsoup4.new/python-beautifulsoup4.changes 2017-07-30 11:20:13.749924234 +0200 @@ -1,0 +2,14 @@ +Wed Jul 5 06:28:31 UTC 2017 - dmueller@suse.com + +- update to 4.6.0: + * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for + getting the value of an attribute, but which always returns a list, + whether or not the attribute is a multi-value attribute. [bug=1678589] + * Improved the handling of empty-element tags like <br> when using the + html.parser parser. [bug=1676935] + * HTML parsers treat all HTML4 and HTML5 empty element tags (aka void + element tags) correctly. [bug=1656909] + * Namespace prefix is preserved when an XML tag is copied. Thanks + to Vikas for a patch and test. [bug=1685172] + +------------------------------------------------------------------- Old: ---- beautifulsoup4-4.5.3.tar.gz New: ---- beautifulsoup4-4.6.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-beautifulsoup4.spec ++++++ --- /var/tmp/diff_new_pack.DwsiWF/_old 2017-07-30 11:20:14.313844728 +0200 +++ /var/tmp/diff_new_pack.DwsiWF/_new 2017-07-30 11:20:14.313844728 +0200 @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-beautifulsoup4 -Version: 4.5.3 +Version: 4.6.0 Release: 0 Summary: HTML/XML Parser for Quick-Turnaround Applications Like Screen-Scraping License: MIT ++++++ beautifulsoup4-4.5.3.tar.gz -> beautifulsoup4-4.6.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/NEWS.txt new/beautifulsoup4-4.6.0/NEWS.txt --- old/beautifulsoup4-4.5.3/NEWS.txt 2017-01-02 16:00:18.000000000 +0100 +++ new/beautifulsoup4-4.6.0/NEWS.txt 2017-05-07 15:49:34.000000000 +0200 @@ -1,3 +1,21 @@ += 4.6.0 (20170507) = + +* Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for + getting the value of an attribute, but which always returns a list, + whether or not the attribute is a multi-value attribute. [bug=1678589] + +* It's now possible to use a tag's namespace prefix when searching, + e.g. soup.find('namespace:tag') [bug=1655332] + +* Improved the handling of empty-element tags like <br> when using the + html.parser parser. [bug=1676935] + +* HTML parsers treat all HTML4 and HTML5 empty element tags (aka void + element tags) correctly. [bug=1656909] + +* Namespace prefix is preserved when an XML tag is copied. Thanks + to Vikas for a patch and test. [bug=1685172] + = 4.5.3 (20170102) = * Fixed foster parenting when html5lib is the tree builder. Thanks to diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/PKG-INFO new/beautifulsoup4-4.6.0/PKG-INFO --- old/beautifulsoup4-4.5.3/PKG-INFO 2017-01-02 16:08:01.000000000 +0100 +++ new/beautifulsoup4-4.6.0/PKG-INFO 2017-05-07 15:52:33.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: beautifulsoup4 -Version: 4.5.3 +Version: 4.6.0 Summary: Screen-scraping library Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/ Author: Leonard Richardson diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/beautifulsoup4.egg-info/PKG-INFO new/beautifulsoup4-4.6.0/beautifulsoup4.egg-info/PKG-INFO --- old/beautifulsoup4-4.5.3/beautifulsoup4.egg-info/PKG-INFO 2017-01-02 16:08:01.000000000 +0100 +++ new/beautifulsoup4-4.6.0/beautifulsoup4.egg-info/PKG-INFO 2017-05-07 15:52:32.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: beautifulsoup4 -Version: 4.5.3 +Version: 4.6.0 Summary: Screen-scraping library Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/ Author: Leonard Richardson diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/beautifulsoup4.egg-info/SOURCES.txt new/beautifulsoup4-4.6.0/beautifulsoup4.egg-info/SOURCES.txt --- old/beautifulsoup4-4.5.3/beautifulsoup4.egg-info/SOURCES.txt 2017-01-02 16:08:01.000000000 +0100 +++ new/beautifulsoup4-4.6.0/beautifulsoup4.egg-info/SOURCES.txt 2017-05-07 15:52:33.000000000 +0200 @@ -13,7 +13,6 @@ beautifulsoup4.egg-info/dependency_links.txt beautifulsoup4.egg-info/requires.txt beautifulsoup4.egg-info/top_level.txt -bs4/1631353.py bs4/__init__.py bs4/dammit.py bs4/diagnose.py diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/1631353.py new/beautifulsoup4-4.6.0/bs4/1631353.py --- old/beautifulsoup4-4.5.3/bs4/1631353.py 2016-12-10 20:12:55.000000000 +0100 +++ new/beautifulsoup4-4.6.0/bs4/1631353.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,5 +0,0 @@ -doc = """<script> -h=window.location.protocol+"//",r='<body onload="'; -</script>""" -from bs4.diagnose import diagnose -diagnose(doc) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/__init__.py new/beautifulsoup4-4.6.0/bs4/__init__.py --- old/beautifulsoup4-4.5.3/bs4/__init__.py 2017-01-02 15:57:54.000000000 +0100 +++ new/beautifulsoup4-4.6.0/bs4/__init__.py 2017-05-07 15:48:18.000000000 +0200 @@ -21,7 +21,7 @@ # found in the LICENSE file. __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.5.3" +__version__ = "4.6.0" __copyright__ = "Copyright (c) 2004-2017 Leonard Richardson" __license__ = "MIT" @@ -82,7 +82,7 @@ ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n" def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, @@ -215,8 +215,8 @@ markup = markup.encode("utf8") warnings.warn( '"%s" looks like a filename, not markup. You should' - 'probably open this file and pass the filehandle into' - 'Beautiful Soup.' % markup) + ' probably open this file and pass the filehandle into' + ' Beautiful Soup.' % markup) self._check_markup_is_url(markup) for (self.markup, self.original_encoding, self.declared_html_encoding, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/builder/__init__.py new/beautifulsoup4-4.6.0/bs4/builder/__init__.py --- old/beautifulsoup4-4.5.3/bs4/builder/__init__.py 2016-07-20 02:28:09.000000000 +0200 +++ new/beautifulsoup4-4.6.0/bs4/builder/__init__.py 2017-05-06 19:31:05.000000000 +0200 @@ -232,8 +232,13 @@ """ preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags - empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) + empty_element_tags = set([ + # These are from HTML5. + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + + # These are from HTML4, removed in HTML5. + 'spacer', 'frame' + ]) # The HTML standard defines these attributes as containing a # space-separated list of values, not a single value. That is, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/builder/_htmlparser.py new/beautifulsoup4-4.6.0/bs4/builder/_htmlparser.py --- old/beautifulsoup4-4.5.3/bs4/builder/_htmlparser.py 2016-07-17 21:10:15.000000000 +0200 +++ new/beautifulsoup4-4.6.0/bs4/builder/_htmlparser.py 2017-05-07 13:08:16.000000000 +0200 @@ -52,7 +52,31 @@ HTMLPARSER = 'html.parser' class BeautifulSoupHTMLParser(HTMLParser): - def handle_starttag(self, name, attrs): + + def __init__(self, *args, **kwargs): + HTMLParser.__init__(self, *args, **kwargs) + + # Keep a list of empty-element tags that were encountered + # without an explicit closing tag. If we encounter a closing tag + # of this type, we'll associate it with one of those entries. + # + # This isn't a stack because we don't care about the + # order. It's a list of closing tags we've already handled and + # will ignore, assuming they ever show up. + self.already_closed_empty_element = [] + + def handle_startendtag(self, name, attrs): + # This is only called when the markup looks like + # <tag/>. + + # is_startend() tells handle_starttag not to close the tag + # just because its name matches a known empty-element tag. We + # know that this is an empty-element tag and we want to call + # handle_endtag ourselves. + tag = self.handle_starttag(name, attrs, handle_empty_element=False) + self.handle_endtag(name) + + def handle_starttag(self, name, attrs, handle_empty_element=True): # XXX namespace attr_dict = {} for key, value in attrs: @@ -62,10 +86,34 @@ value = '' attr_dict[key] = value attrvalue = '""' - self.soup.handle_starttag(name, None, None, attr_dict) - - def handle_endtag(self, name): - self.soup.handle_endtag(name) + #print "START", name + tag = self.soup.handle_starttag(name, None, None, attr_dict) + if tag and tag.is_empty_element and handle_empty_element: + # Unlike other parsers, html.parser doesn't send separate end tag + # events for empty-element tags. (It's handled in + # handle_startendtag, but only if the original markup looked like + # <tag/>.) + # + # So we need to call handle_endtag() ourselves. Since we + # know the start event is identical to the end event, we + # don't want handle_endtag() to cross off any previous end + # events for tags of this name. + self.handle_endtag(name, check_already_closed=False) + + # But we might encounter an explicit closing tag for this tag + # later on. If so, we want to ignore it. + self.already_closed_empty_element.append(name) + + def handle_endtag(self, name, check_already_closed=True): + #print "END", name + if check_already_closed and name in self.already_closed_empty_element: + # This is a redundant end tag for an empty-element tag. + # We've already called handle_endtag() for it, so just + # check it off the list. + # print "ALREADY CLOSED", name + self.already_closed_empty_element.remove(name) + else: + self.soup.handle_endtag(name) def handle_data(self, data): self.soup.handle_data(data) @@ -169,6 +217,7 @@ warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e + parser.already_closed_empty_element = [] # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some # 3.2.3 code. This ensures they don't treat markup like <p></p> as a diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/element.py new/beautifulsoup4-4.6.0/bs4/element.py --- old/beautifulsoup4-4.5.3/bs4/element.py 2016-12-20 00:21:26.000000000 +0100 +++ new/beautifulsoup4-4.6.0/bs4/element.py 2017-05-07 14:15:39.000000000 +0200 @@ -131,8 +131,8 @@ # to methods like encode() and prettify(): # # "html" - All Unicode characters with corresponding HTML entities - # are converted to those entities on output. - # "minimal" - Bare ampersands and angle brackets are converted to + # are converted to those entities on output. + # "minimal" - Bare ampersands and angle brackets are converted to # XML entities: & < > # None - The null formatter. Unicode characters are never # converted to entities. This is not recommended, but it's @@ -535,9 +535,16 @@ return ResultSet(strainer, result) elif isinstance(name, basestring): # Optimization to find all tags with a given name. + if name.count(':') == 1: + # This is a name with a prefix. + prefix, name = name.split(':', 1) + else: + prefix = None result = (element for element in generator if isinstance(element, Tag) - and element.name == name) + and element.name == name + and (prefix is None or element.prefix == prefix) + ) return ResultSet(strainer, result) results = ResultSet(strainer) while True: @@ -863,7 +870,7 @@ Its contents are a copy of the old Tag's contents. """ clone = type(self)(None, self.builder, self.name, self.namespace, - self.nsprefix, self.attrs, is_xml=self._is_xml) + self.prefix, self.attrs, is_xml=self._is_xml) for attr in ('can_be_empty_element', 'hidden'): setattr(clone, attr, getattr(self, attr)) for child in self.contents: @@ -985,6 +992,13 @@ attribute.""" return self.attrs.get(key, default) + def get_attribute_list(self, key, default=None): + """The same as get(), but always returns a list.""" + value = self.get(key, default) + if not isinstance(value, list): + value = [value] + return value + def has_attr(self, key): return key in self.attrs @@ -1698,7 +1712,7 @@ "I don't know how to match against a %s" % markup.__class__) return found - def _matches(self, markup, match_against): + def _matches(self, markup, match_against, already_tried=None): # print u"Matching %s against %s" % (markup, match_against) result = False if isinstance(markup, list) or isinstance(markup, tuple): @@ -1713,7 +1727,7 @@ if self._matches(' '.join(markup), match_against): return True return False - + if match_against is True: # True matches any non-None value. return markup is not None @@ -1723,6 +1737,7 @@ # Custom callables take the tag as an argument, but all # other ways of matching match the tag name as a string. + original_markup = markup if isinstance(markup, Tag): markup = markup.name @@ -1733,18 +1748,51 @@ # None matches None, False, an empty string, an empty list, and so on. return not match_against - if isinstance(match_against, unicode): + if (hasattr(match_against, '__iter__') + and not isinstance(match_against, basestring)): + # We're asked to match against an iterable of items. + # The markup must be match at least one item in the + # iterable. We'll try each one in turn. + # + # To avoid infinite recursion we need to keep track of + # items we've already seen. + if not already_tried: + already_tried = set() + for item in match_against: + if item.__hash__: + key = item + else: + key = id(item) + if key in already_tried: + continue + else: + already_tried.add(key) + if self._matches(original_markup, item, already_tried): + return True + else: + return False + + # Beyond this point we might need to run the test twice: once against + # the tag's name and once against its prefixed name. + match = False + + if not match and isinstance(match_against, unicode): # Exact string match - return markup == match_against + match = markup == match_against - if hasattr(match_against, 'match'): + if not match and hasattr(match_against, 'search'): # Regexp match return match_against.search(markup) - if hasattr(match_against, '__iter__'): - # The markup must be an exact match against something - # in the iterable. - return markup in match_against + if (not match + and isinstance(original_markup, Tag) + and original_markup.prefix): + # Try the whole thing again with the prefixed tag name. + return self._matches( + original_markup.prefix + ':' + original_markup.name, match_against + ) + + return match class ResultSet(list): @@ -1753,3 +1801,8 @@ def __init__(self, source, result=()): super(ResultSet, self).__init__(result) self.source = source + + def __getattr__(self, key): + raise AttributeError( + "ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key + ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/testing.py new/beautifulsoup4-4.6.0/bs4/testing.py --- old/beautifulsoup4-4.5.3/bs4/testing.py 2016-07-30 13:54:51.000000000 +0200 +++ new/beautifulsoup4-4.6.0/bs4/testing.py 2017-05-07 14:16:59.000000000 +0200 @@ -69,6 +69,18 @@ markup in these tests, there's not much room for interpretation. """ + def test_empty_element_tags(self): + """Verify that all HTML4 and HTML5 empty element (aka void element) tags + are handled correctly. + """ + for name in [ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + 'spacer', 'frame' + ]: + soup = self.soup("") + new_tag = soup.new_tag(name) + self.assertEqual(True, new_tag.is_empty_element) + def test_pickle_and_unpickle_identity(self): # Pickling a tree, then unpickling it, yields a tree identical # to the original. @@ -330,6 +342,13 @@ self.assertEqual("p", soup.p.name) self.assertConnectedness(soup) + def test_empty_element_tags(self): + """Verify consistent handling of empty-element tags, + no matter how they come in through the markup. + """ + self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>") + self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>") + def test_head_tag_between_head_and_body(self): "Prevent recurrence of a bug in the html5lib treebuilder." content = """<html><head></head> @@ -669,6 +688,40 @@ soup = self.soup(markup) self.assertEqual(unicode(soup.foo), markup) + def test_find_by_prefixed_name(self): + doc = """<?xml version="1.0" encoding="utf-8"?> +<Document xmlns="http://example.com/ns0" + xmlns:ns1="http://example.com/ns1" + xmlns:ns2="http://example.com/ns2" + <ns1:tag>foo</ns1:tag> + <ns1:tag>bar</ns1:tag> + <ns2:tag key="value">baz</ns2:tag> +</Document> +""" + soup = self.soup(doc) + + # There are three <tag> tags. + self.assertEqual(3, len(soup.find_all('tag'))) + + # But two of them are ns1:tag and one of them is ns2:tag. + self.assertEqual(2, len(soup.find_all('ns1:tag'))) + self.assertEqual(1, len(soup.find_all('ns2:tag'))) + + self.assertEqual(1, len(soup.find_all('ns2:tag', key='value'))) + self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag']))) + + def test_copy_tag_preserves_namespace(self): + xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<w:document xmlns:w="http://example.com/ns0"/>""" + + soup = self.soup(xml) + tag = soup.document + duplicate = copy.copy(tag) + + # The two tags have the same namespace prefix. + self.assertEqual(tag.prefix, duplicate.prefix) + + class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): """Smoke test for a tree builder that supports HTML5.""" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/tests/test_htmlparser.py new/beautifulsoup4-4.6.0/bs4/tests/test_htmlparser.py --- old/beautifulsoup4-4.5.3/bs4/tests/test_htmlparser.py 2015-06-28 21:53:42.000000000 +0200 +++ new/beautifulsoup4-4.6.0/bs4/tests/test_htmlparser.py 2017-05-07 03:30:50.000000000 +0200 @@ -29,4 +29,6 @@ loaded = pickle.loads(dumped) self.assertTrue(isinstance(loaded.builder, type(tree.builder))) - + def test_redundant_empty_element_closing_tags(self): + self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>") + self.assertSoupEquals('</br></br></br>', "") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/bs4/tests/test_tree.py new/beautifulsoup4-4.6.0/bs4/tests/test_tree.py --- old/beautifulsoup4-4.5.3/bs4/tests/test_tree.py 2016-07-27 03:22:19.000000000 +0200 +++ new/beautifulsoup4-4.6.0/bs4/tests/test_tree.py 2017-05-07 03:38:18.000000000 +0200 @@ -1,3 +1,4 @@ + # -*- coding: utf-8 -*- """Tests for Beautiful Soup's tree traversal methods. @@ -234,6 +235,7 @@ self.assertEqual('1', r3.string) self.assertEqual('3', r4.string) + class TestFindAllByAttribute(TreeTest): def test_find_all_by_attribute_name(self): @@ -1284,6 +1286,10 @@ soup = self.soup("<a class='foo\tbar'>") self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode()) + def test_get_attribute_list(self): + soup = self.soup("<a id='abc def'>") + self.assertEqual(['abc def'], soup.a.get_attribute_list('id')) + def test_accept_charset(self): soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">') self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset']) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/doc/source/index.rst new/beautifulsoup4-4.6.0/doc/source/index.rst --- old/beautifulsoup4-4.5.3/doc/source/index.rst 2016-12-19 23:43:28.000000000 +0100 +++ new/beautifulsoup4-4.6.0/doc/source/index.rst 2017-05-07 03:37:18.000000000 +0200 @@ -402,13 +402,13 @@ ``headers``, and ``accesskey``. Beautiful Soup presents the value(s) of a multi-valued attribute as a list:: - css_soup = BeautifulSoup('<p class="body strikeout"></p>') - css_soup.p['class'] - # ["body", "strikeout"] - css_soup = BeautifulSoup('<p class="body"></p>') css_soup.p['class'] # ["body"] + + css_soup = BeautifulSoup('<p class="body strikeout"></p>') + css_soup.p['class'] + # ["body", "strikeout"] If an attribute `looks` like it has more than one value, but it's not a multi-valued attribute as defined by any version of the HTML @@ -428,6 +428,12 @@ print(rel_soup.p) # <p>Back to the <a rel="index contents">homepage</a></p> +You can use ```get_attribute_list`` to get a value that's always a list, +string, whether or not it's a multi-valued atribute + + id_soup.p.get_attribute_list('id') + # ["my id"] + If you parse a document as XML, there are no multi-valued attributes:: xml_soup = BeautifulSoup('<p class="body strikeout"></p>', 'xml') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/beautifulsoup4-4.5.3/setup.py new/beautifulsoup4-4.6.0/setup.py --- old/beautifulsoup4-4.5.3/setup.py 2017-01-02 15:57:45.000000000 +0100 +++ new/beautifulsoup4-4.6.0/setup.py 2017-05-07 15:49:03.000000000 +0200 @@ -5,7 +5,7 @@ setup( name="beautifulsoup4", - version = "4.5.3", + version = "4.6.0", author="Leonard Richardson", author_email='leonardr@segfault.org', url="http://www.crummy.com/software/BeautifulSoup/bs4/",
participants (1)
-
root@hilbert.suse.de