Hello community, here is the log from the commit of package python3-html2text for openSUSE:Factory checked in at 2016-07-05 09:53:05 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python3-html2text (Old) and /work/SRC/openSUSE:Factory/.python3-html2text.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python3-html2text" Changes: -------- --- /work/SRC/openSUSE:Factory/python3-html2text/python3-html2text.changes 2016-05-25 21:24:20.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.python3-html2text.new/python3-html2text.changes 2016-07-05 09:53:14.000000000 +0200 @@ -1,0 +2,9 @@ +Tue Jul 5 01:11:57 UTC 2016 - arun@gmx.de + +- update to version 2016.5.29: + * Fix #125: --pad_tables now pads table cells to make them look + nice. + * Fix #114: Break does not interrupt blockquotes + * Deprecation warnings for URL retrieval. + +------------------------------------------------------------------- @@ -6 +14,0 @@ - Old: ---- html2text-2016.4.2.tar.gz New: ---- html2text-2016.5.29.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python3-html2text.spec ++++++ --- /var/tmp/diff_new_pack.7CzrT1/_old 2016-07-05 09:53:15.000000000 +0200 +++ /var/tmp/diff_new_pack.7CzrT1/_new 2016-07-05 09:53:15.000000000 +0200 @@ -17,7 +17,7 @@ Name: python3-html2text -Version: 2016.4.2 +Version: 2016.5.29 Release: 0 Url: https://github.com/Alir3z4/html2text/ Summary: Turn HTML into equivalent Markdown-structured text ++++++ html2text-2016.4.2.tar.gz -> html2text-2016.5.29.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/ChangeLog.rst new/html2text-2016.5.29/ChangeLog.rst --- old/html2text-2016.4.2/ChangeLog.rst 2016-04-01 23:57:46.000000000 +0200 +++ new/html2text-2016.5.29/ChangeLog.rst 2016-05-29 18:12:58.000000000 +0200 @@ -1,3 +1,12 @@ +2016.5.29 +========= +---- + +* Fix #125: --pad_tables now pads table cells to make them look nice. +* Fix #114: Break does not interrupt blockquotes +* Deprecation warnings for URL retrieval. + + 2016.4.2 ========= ---- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/PKG-INFO new/html2text-2016.5.29/PKG-INFO --- old/html2text-2016.4.2/PKG-INFO 2016-04-02 00:00:15.000000000 +0200 +++ new/html2text-2016.5.29/PKG-INFO 2016-05-29 18:16:19.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: html2text -Version: 2016.4.2 +Version: 2016.5.29 Summary: Turn HTML into equivalent Markdown-structured text. Home-page: https://github.com/Alir3z4/html2text/ Author: Alireza Savand @@ -107,7 +107,6 @@ Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.0 -Classifier: Programming Language :: Python :: 3.1 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/html2text/__init__.py new/html2text-2016.5.29/html2text/__init__.py --- old/html2text-2016.4.2/html2text/__init__.py 2016-04-01 23:58:02.000000000 +0200 +++ new/html2text-2016.5.29/html2text/__init__.py 2016-05-29 18:14:12.000000000 +0200 @@ -27,10 +27,11 @@ list_numbering_start, dumb_css_parser, escape_md_section, - skipwrap + skipwrap, + pad_tables_in_text ) -__version__ = (2016, 4, 2) +__version__ = (2016, 5, 29) # TODO: @@ -77,6 +78,7 @@ self.hide_strikethrough = False # covered in cli self.mark_code = config.MARK_CODE self.wrap_links = config.WRAP_LINKS # covered in cli + self.pad_tables = config.PAD_TABLES # covered in cli self.tag_callback = None if out is None: # pragma: no cover @@ -130,7 +132,11 @@ def handle(self, data): self.feed(data) self.feed("") - return self.optwrap(self.close()) + markdown = self.optwrap(self.close()) + if self.pad_tables: + return pad_tables_in_text(markdown) + else: + return markdown def outtextf(self, s): self.outtextlist.append(s) @@ -142,23 +148,20 @@ try: nochr = unicode('') + unicode_character = unichr except NameError: nochr = str('') + unicode_character = chr self.pbr() self.o('', 0, 'end') outtext = nochr.join(self.outtextlist) + if self.unicode_snob: - try: - nbsp = unichr(name2cp('nbsp')) - except NameError: - nbsp = chr(name2cp('nbsp')) + nbsp = unicode_character(name2cp('nbsp')) else: - try: - nbsp = unichr(32) - except NameError: - nbsp = chr(32) + nbsp = unicode_character(32) try: outtext = outtext.replace(unicode(' _place_holder;'), nbsp) except NameError: @@ -331,7 +334,10 @@ self.p() if tag == "br" and start: - self.o(" \n") + if self.blockquote > 0: + self.o(" \n> ") + else: + self.o(" \n") if tag == "hr" and start: self.p() @@ -556,8 +562,16 @@ self.o('</{0}>'.format(tag)) else: - if tag == "table" and start: - self.table_start = True + if tag == "table": + if start: + self.table_start = True + if self.pad_tables: + self.o("<"+config.TABLE_MARKER_FOR_PAD+">") + self.o(" \n") + else: + if self.pad_tables: + self.o("</"+config.TABLE_MARKER_FOR_PAD+">") + self.o(" \n") if tag in ["td", "th"] and start: if self.split_next_td: self.o("| ") @@ -814,7 +828,9 @@ for para in text.split("\n"): if len(para) > 0: if not skipwrap(para, self.wrap_links): - result += "\n".join(wrap(para, self.body_width)) + result += "\n".join( + wrap(para, self.body_width, break_long_words=False) + ) if para.endswith(' '): result += " \n" newlines = 1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/html2text/cli.py new/html2text-2016.5.29/html2text/cli.py --- old/html2text-2016.4.2/html2text/cli.py 2016-04-01 23:55:11.000000000 +0200 +++ new/html2text-2016.5.29/html2text/cli.py 2016-05-29 18:08:48.000000000 +0200 @@ -1,4 +1,5 @@ import optparse +import warnings from html2text.compat import urllib from html2text import HTML2Text, config, __version__ @@ -23,6 +24,13 @@ version='%prog ' + ".".join(map(str, __version__)) ) p.add_option( + "--pad-tables", + dest="pad_tables", + action="store_true", + default=config.PAD_TABLES, + help="pad the cells to equal column width in tables" + ) + p.add_option( "--no-wrap-links", dest="wrap_links", action="store_false", @@ -204,6 +212,8 @@ file_ = args[0] if file_.startswith('http://') or file_.startswith('https://'): + warnings.warn("Support for retrieving html over network is set for deprecation by version (2017, 1, x)", + DeprecationWarning) baseurl = file_ j = urllib.urlopen(baseurl) data = j.read() @@ -268,5 +278,6 @@ h.links_each_paragraph = options.links_each_paragraph h.mark_code = options.mark_code h.wrap_links = options.wrap_links + h.pad_tables = options.pad_tables wrapwrite(h.handle(data)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/html2text/config.py new/html2text-2016.5.29/html2text/config.py --- old/html2text-2016.4.2/html2text/config.py 2015-11-04 15:32:38.000000000 +0100 +++ new/html2text-2016.5.29/html2text/config.py 2016-05-29 18:08:48.000000000 +0200 @@ -3,6 +3,8 @@ # Use Unicode characters instead of their ascii psuedo-replacements UNICODE_SNOB = 0 +# Marker to use for marking tables for padding post processing +TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding" # Escape all special characters. Output is less readable, but avoids # corner case formatting issues. ESCAPE_SNOB = 0 @@ -36,6 +38,7 @@ IGNORE_EMPHASIS = False MARK_CODE = False DECODE_ERRORS = 'strict' +PAD_TABLES = False # Convert links with same href and text to <href> format if they are absolute links USE_AUTOMATIC_LINKS = True diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/html2text/utils.py new/html2text-2016.5.29/html2text/utils.py --- old/html2text-2016.4.2/html2text/utils.py 2016-01-08 17:31:01.000000000 +0100 +++ new/html2text-2016.5.29/html2text/utils.py 2016-05-29 18:08:48.000000000 +0200 @@ -244,3 +244,55 @@ text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text) return text + +def reformat_table(lines, right_margin): + """ + Given the lines of a table + padds the cells and returns the new lines + """ + # find the maximum width of the columns + max_width = [len(x.rstrip()) + right_margin for x in lines[0].split('|')] + for line in lines: + cols = [x.rstrip() for x in line.split('|')] + max_width = [max(len(x) + right_margin, old_len) + for x, old_len in zip(cols, max_width)] + + # reformat + new_lines = [] + for line in lines: + cols = [x.rstrip() for x in line.split('|')] + if set(line.strip()) == set('-|'): + filler = '-' + new_cols = [x.rstrip() + (filler * (M - len(x.rstrip()))) + for x, M in zip(cols, max_width)] + else: + filler = ' ' + new_cols = [x.rstrip() + (filler * (M - len(x.rstrip()))) + for x, M in zip(cols, max_width)] + new_lines.append('|'.join(new_cols)) + return new_lines + +def pad_tables_in_text(text, right_margin=1): + """ + Provide padding for tables in the text + """ + lines = text.split('\n') + table_buffer, altered_lines, table_widths, table_started = [], [], [], False + new_lines = [] + for line in lines: + # Toogle table started + if (config.TABLE_MARKER_FOR_PAD in line): + table_started = not table_started + if not table_started: + table = reformat_table(table_buffer, right_margin) + new_lines.extend(table) + table_buffer = [] + new_lines.append('') + continue + # Process lines + if table_started: + table_buffer.append(line) + else: + new_lines.append(line) + new_text = '\n'.join(new_lines) + return new_text diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/html2text.egg-info/PKG-INFO new/html2text-2016.5.29/html2text.egg-info/PKG-INFO --- old/html2text-2016.4.2/html2text.egg-info/PKG-INFO 2016-04-02 00:00:15.000000000 +0200 +++ new/html2text-2016.5.29/html2text.egg-info/PKG-INFO 2016-05-29 18:16:19.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: html2text -Version: 2016.4.2 +Version: 2016.5.29 Summary: Turn HTML into equivalent Markdown-structured text. Home-page: https://github.com/Alir3z4/html2text/ Author: Alireza Savand @@ -107,7 +107,6 @@ Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.0 -Classifier: Programming Language :: Python :: 3.1 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/html2text.egg-info/SOURCES.txt new/html2text-2016.5.29/html2text.egg-info/SOURCES.txt --- old/html2text-2016.4.2/html2text.egg-info/SOURCES.txt 2016-04-02 00:00:15.000000000 +0200 +++ new/html2text-2016.5.29/html2text.egg-info/SOURCES.txt 2016-05-29 18:16:19.000000000 +0200 @@ -35,6 +35,8 @@ test/bodywidth_newline.md test/bold_inside_link.html test/bold_inside_link.md +test/break_preserved_in_blockquote.html +test/break_preserved_in_blockquote.md test/css_import_no_semicolon.html test/css_import_no_semicolon.md test/decript_tage.html @@ -73,6 +75,8 @@ test/link_titles.md test/list_tags_example.html test/list_tags_example.md +test/long_lines.html +test/long_lines.md test/mark_code.html test/mark_code.md test/nbsp.html @@ -93,6 +97,8 @@ test/normal.md test/normal_escape_snob.html test/normal_escape_snob.md +test/pad_table.html +test/pad_table.md test/pre.html test/pre.md test/preformatted_in_list.html diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/setup.py new/html2text-2016.5.29/setup.py --- old/html2text-2016.4.2/setup.py 2016-04-01 23:55:11.000000000 +0200 +++ new/html2text-2016.5.29/setup.py 2016-05-29 18:13:44.000000000 +0200 @@ -1,6 +1,8 @@ # coding: utf-8 import sys + from setuptools import setup, Command, find_packages + try: from pypandoc import convert read_md = lambda f: convert(f, 'rst') @@ -18,7 +20,8 @@ class RunTests(Command): - """New setup.py command to run all tests for the package. + """ + New setup.py command to run all tests for the package. """ description = "run all tests for the package" @@ -62,7 +65,6 @@ 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.0', - 'Programming Language :: Python :: 3.1', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/break_preserved_in_blockquote.html new/html2text-2016.5.29/test/break_preserved_in_blockquote.html --- old/html2text-2016.4.2/test/break_preserved_in_blockquote.html 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2016.5.29/test/break_preserved_in_blockquote.html 2016-05-29 18:08:48.000000000 +0200 @@ -0,0 +1 @@ +a<blockquote>b<br>c</blockquote> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/break_preserved_in_blockquote.md new/html2text-2016.5.29/test/break_preserved_in_blockquote.md --- old/html2text-2016.4.2/test/break_preserved_in_blockquote.md 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2016.5.29/test/break_preserved_in_blockquote.md 2016-05-29 18:08:48.000000000 +0200 @@ -0,0 +1,5 @@ +a + +> b +> c + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/long_lines.html new/html2text-2016.5.29/test/long_lines.html --- old/html2text-2016.4.2/test/long_lines.html 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2016.5.29/test/long_lines.html 2016-05-29 18:08:48.000000000 +0200 @@ -0,0 +1 @@ +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd <img src="http://www.foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo.com"> asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/long_lines.md new/html2text-2016.5.29/test/long_lines.md --- old/html2text-2016.4.2/test/long_lines.md 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2016.5.29/test/long_lines.md 2016-05-29 18:08:48.000000000 +0200 @@ -0,0 +1,14 @@ +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd +![](http://www.foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo...) +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd asd +asd asd asd asd asd + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/pad_table.html new/html2text-2016.5.29/test/pad_table.html --- old/html2text-2016.4.2/test/pad_table.html 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2016.5.29/test/pad_table.html 2016-05-29 18:08:48.000000000 +0200 @@ -0,0 +1,26 @@ +<!DOCTYPE html> <html> + <head lang="en"> <meta charset="UTF-8"> <title></title> </head> + <body> <h1>This is a test document</h1> With some text, <code>code</code>, <b>bolds</b> and <i>italics</i>. <h2>This is second header</h2> <p style="display: none">Displaynone text</p> + <table> + <tr> <th>Header 1</th> <th>Header 2</th> <th>Header 3</th> </tr> + <tr> <td>Content 1</td> <td>2</td> <td><img src="http://lorempixel.com/200/200" alt="200"/> Image!</td> </tr> + <tr> <td>Content 1 longer</td> <td>Content 2</td> <td>blah</td> </tr> + <tr> <td>Content </td> <td>Content 2</td> <td>blah</td> </tr> + <tr> <td>t </td> <td>Content 2</td> <td>blah blah blah</td> </tr> + </table> + + + <table> <tr> <th>H1</th> <th>H2</th> <th>H3</th> </tr> + <tr> <td>C1</td> <td>Content 2</td> <td>x</td> </tr> + <tr> <td>C123</td> <td>Content 2</td> <td>xyz</td> </tr> + </table> + +some content between the tables<br> + + <table> <tr> <th>Header 1</th> <th>Header 2</th> <th>Header 3</th> </tr> + <tr> <td>Content 1</td> <td>Content 2</td> <td><img src="http://lorempixel.com/200/200" alt="200"/> Image!</td> </tr> + <tr> <td>Content 1</td> <td>Content 2 longer</td> <td><img src="http://lorempixel.com/200/200" alt="200"/> Image!</td> </tr> + </table> + +something else entirely +</body> </html> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/pad_table.md new/html2text-2016.5.29/test/pad_table.md --- old/html2text-2016.4.2/test/pad_table.md 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2016.5.29/test/pad_table.md 2016-05-29 18:08:48.000000000 +0200 @@ -0,0 +1,28 @@ +# This is a test document + +With some text, `code`, **bolds** and _italics_. + +## This is second header + +Displaynone text + +Header 1 | Header 2 | Header 3 +-----------------|-----------|---------------------------------------------- +Content 1 | 2 | ![200](http://lorempixel.com/200/200) Image! +Content 1 longer | Content 2 | blah +Content | Content 2 | blah +t | Content 2 | blah blah blah + +H1 | H2 | H3 +-----|-----------|----- +C1 | Content 2 | x +C123 | Content 2 | xyz + +some content between the tables +Header 1 | Header 2 | Header 3 +----------|------------------|---------------------------------------------- +Content 1 | Content 2 | ![200](http://lorempixel.com/200/200) Image! +Content 1 | Content 2 longer | ![200](http://lorempixel.com/200/200) Image! + +something else entirely + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2016.4.2/test/test_html2text.py new/html2text-2016.5.29/test/test_html2text.py --- old/html2text-2016.4.2/test/test_html2text.py 2016-04-01 23:55:11.000000000 +0200 +++ new/html2text-2016.5.29/test/test_html2text.py 2016-05-29 18:08:48.000000000 +0200 @@ -170,6 +170,10 @@ module_args['mark_code'] = True cmdline_args.append('--mark-code') + if base_fn.startswith('pad_table'): + module_args['pad_tables'] = True + cmdline_args.append('--pad-tables') + if base_fn not in ['bodywidth_newline.html', 'abbr_tag.html']: test_func = None