commit pdfcompare for openSUSE:Factory

28 Apr 2016

Hello community,

here is the log from the commit of package pdfcompare for openSUSE:Factory checked in at 2016-04-28 16:57:22
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/pdfcompare (Old)
 and      /work/SRC/openSUSE:Factory/.pdfcompare.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "pdfcompare"

Changes:
--------

--- /work/SRC/openSUSE:Factory/pdfcompare/pdfcompare.changes	2014-01-14 21:51:50.000000000 +0100
+++ /work/SRC/openSUSE:Factory/.pdfcompare.new/pdfcompare.changes	2016-04-28 17:02:38.000000000 +0200
@@ -1,0 +2,18 @@
+Tue Apr 19 15:24:08 UTC 2016 - jw@owncloud.com
+
+- V1.6.8 - cleaner popup annotations unless -S
+	 - no navigation buttons unless -f ..N..
+
+-------------------------------------------------------------------
+Mon Apr 18 18:17:25 UTC 2016 - jnweiger@gmail.com
+
+- V1.6.7 - support Ubuntu 14.04
+
+-------------------------------------------------------------------
+Mon Apr 18 13:57:26 UTC 2016 - jw@owncloud.com
+
+- V1.6.6 - hunspell usage hint: how to add words to private dictonary.
+-          pull_github.sh added.
+-          use pdf_highlight.py if pdfcompare.py is not in the tar. Historic name.
+
+-------------------------------------------------------------------

Old:
----
  pdfcompare-1.6.5.tar.bz2

New:
----
  Makefile
  debian.changelog
  debian.compat
  debian.control
  debian.pdfcompare.install
  debian.rules
  debian.series
  pdfcompare-1.6.8.tar.bz2
  pdfcompare.dsc
  pull_github.sh

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ pdfcompare.spec ++++++
--- /var/tmp/diff_new_pack.71YLEz/_old	2016-04-28 17:02:40.000000000 +0200
+++ /var/tmp/diff_new_pack.71YLEz/_new	2016-04-28 17:02:40.000000000 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package pdfcompare
 #
-# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
+# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -17,13 +17,14 @@
 
 
 Name:           pdfcompare
-Version:        1.6.5
+Version:        1.6.8
 Release:        0
 Summary:        Compare two PDF files, write a resulting PDF with highlighted changes
 License:        GPL-2.0
 Group:          Productivity/Publishing/PDF
 Url:            https://github.com/jnweiger/pdfcompare
 Source:         pdfcompare-%version.tar.bz2
+Source100:      pull_github.sh
 
 # These BuildRequires are only required for the testsuite
 BuildRequires:  poppler-tools
@@ -75,6 +76,7 @@
 %endif
 
 %install
+test -f pdfcompare.py || mv pdf_highlight.py pdfcompare.py
 install -Dm 0755 pdfcompare.py %{buildroot}%{_bindir}/pdfcompare
 
 %files

++++++ debian.changelog ++++++
pdfcompare (1.6.8-1) stable; urgency=low

  * V1.6.8 - cleaner popup annotations unless -S
  *          no navigation buttons unless -f ..N..

 -- Jürgen Weigert   Tue, 19 Apr 2016 15:45:49 +0200

pdfcompare (1.6.7-1ubuntu1) stable; urgency=medium

  * 1.6.7 support ubuntu 14.04

 -- Jürgen Weigert   Mon, 18 Apr 2016 20:17:01 +0200

pdfcompare (1.6.6-1) stable; urgency=low

  * V1.6.6 - hunspell usage hint: how to add words to private dictonary.
  *          pull_github.sh added.

 -- Jürgen Weigert   Mon, 18 Apr 2016 13:55:48 +0200

pdfcompare (1.6.5-2) stable; urgency=medium

  * Dependencies added, debian.rules file fixed.

 -- Jürgen Weigert   Mon, 18 Apr 2016 13:13:31 +0200

pdfcompare (1.6.5-1) stable; urgency=medium

  * first DEB packaging

 -- Jürgen Weigert   Mon, 04 Apr 2016 20:40:05 +0200
++++++ debian.compat ++++++
9
++++++ debian.control ++++++
Source: pdfcompare
Section: unknown
Priority: optional
Maintainer: Jürgen Weigert 
Build-Depends: debhelper (>= 4.2.21)

Package: pdfcompare
Architecture: all
# poppler-utils is needed for /usr/bin/pdftohtml
# python-pygame is needed for pygame.font only.
Depends: ${shlibs:Depends}, ${misc:Depends},
	python-pypdf, python-pygame, poppler-utils, python-reportlab,
	hunspell, hunspell-en-us, hunspell-de-de
Description: Compare two PDF files, write a resulting PDF with highlighted changes.
	Potential text portions that were moved around are recognized and analyzed
	for similarity with a second level diff.

++++++ debian.pdfcompare.install ++++++
pdfcompare	/usr/bin
++++++ debian.rules ++++++
#!/usr/bin/make -f

%:
	echo 'all:' > Makefile # don't build anything
	dh $@

override_dh_auto_install:
	mv pdfcompare.py pdfcompare || mv pdf_highlight.py pdfcompare
	chmod 0755 pdfcompare
	dh_auto_install -- INSTALL_ROOT=$(CURDIR)/debian/tmp
++++++ pdfcompare-1.6.5.tar.bz2 -> pdfcompare-1.6.8.tar.bz2 ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/MANIFEST.in new/pdfcompare-1.6.8/MANIFEST.in
--- old/pdfcompare-1.6.5/MANIFEST.in	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/MANIFEST.in	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1 @@
+include *.txt
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/Makefile new/pdfcompare-1.6.8/Makefile
--- old/pdfcompare-1.6.5/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/Makefile	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,26 @@
+
+VER=1.6.8
+D=dist/pdfcompare-$(VER)
+EXCL=--exclude \*.orig --exclude \*~
+
+all: check tar
+
+check test:
+	cd test; make test VER=$(VER)
+
+testrefresh refreshtest:
+	cd test; make test refresh=yes
+
+clean:
+	rm -rf dist *.orig *~
+	rm -rf test/*.orig test/*~
+
+tar dist:
+	rm -rf dist
+	mkdir -p $D
+	ln -s ../../pdfcompare.py $D/pdfcompare.py
+	ln -s ../../COPYING $D/
+	ln -s ../../test $D/test
+	cd dist; tar jhcvf ../pdfcompare-$(VER).tar.bz2 pdfcompare-$(VER) $(EXCL)
+	rm -rf dist
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/README.txt new/pdfcompare-1.6.8/README.txt
--- old/pdfcompare-1.6.5/README.txt	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/README.txt	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,13 @@
+pdfcompare
+==========
+
+Compare text of two PDF files, write a resulting PDF with highlighted changes.
+Potential text portions that were moved around are recognized and analyzed 
+for similarity with a second level diff.
+
+Required Packages:
+
+* pyPdf
+* reportlab.pdfgen
+* reportlab.lib.colors
+* pygame.font'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/TODO.md new/pdfcompare-1.6.8/TODO.md
--- old/pdfcompare-1.6.5/TODO.md	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/TODO.md	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,81 @@
+TODO
+
+* Test with pyPDF2
+
+* Test with python3
+
+* Is a Windows installer possible?
+
+* Test popups with Microsoft Edge Browser
+
+* hunspell issues:
+  - python-HunspellPure should be a separate module. Split it.
+  - we artificially limit to [A-Z_-]+ for words. This is bad for german umlauts.
+  - extend hunspell to allow a progress indicator callback.
+    (counting newlines seen in response)
+
+* testsuite
+  - maybe prepare a test script that allows numbers to be off by some 
+    percentage, but wants everything else precise.
+    This helps with pdf source checking.
+
+* improve --log logfile generator.
+  produce a json/xml/csv/txt file describing the diffs, -s word locations 
+  and --spellcheck results.
+
+* one letter changes always become word changes.
+  Either run in single character mode. Or try to trim the replaced text for 
+  common suffix or common prefix.
+
+* Normalize nonbreaking spaces to spaces.
+  This is important when e.g. markdown source has a 0x20 space, but rendered
+  PDF may have   instead.
+
+
+
+DONE:
+
+* write compressed streams.
+
+* catch file open errors, before ET complains about 0 elements.
+
+* perform only same-length-replace. All other replace-ops should be replace+insert
+  or replace+delete.
+
+* place delete marker at last text end position, rather than next text start position.
+  This is a tricky, implementation in markword().
+
+* testsuite
+  - a 1:1 comparison is not possible, as e.g. poppler-0.18 and poppler-0.20
+    produce differences in the exact coordinates used.
+  - make a fuzzy comparison against templates with python-cv, pHash, etc...
+    http://stackoverflow.com/questions/1819124/image-comparison-algorithm suggests
+    Scipy.  imgcmp.py does this.
+  - generate several output.pdf, convert via ImageMagick to png, 
+  - run pdfcompare --version.
+
+* nicer +++---~~~== git style diagnostics per page, rather than saying '87 hits'.
+
+* if pagebreaks are within deleted text, point this out in the baloon popup.
+  
+* Navigation from changebar to changebar, if there are many unchanged pages to jump over.
+  - calculation, graphics done. Hack with relocated navigation done.
+
+* popups are all in one line in okular. Need to provide linebreaks manually, sigh.
+
+* introduce an ignore-margin for text changes. Any words there will not go into
+  the compare wordlists, and will not match with --search. This is meant to skip
+  over pagenumbers and other bottom or top matter, that is not considered part
+  of the document contents stream.
+  --feature margin shall draw the margin area as shaded gray, so that we know
+  where we are.
+
+* feature:
+  pipe the wordlist through hunspell, if hunspell is available.
+  use search-highlights to mark all words for which hunspell has spelling 
+  suggestions. 
+
+* feature: 
+  added a trivial --log implementation
+
+* second level diff for moved blocks.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/hunspell.py new/pdfcompare-1.6.8/hunspell.py
--- old/pdfcompare-1.6.5/hunspell.py	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/hunspell.py	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,184 @@
+# hunspell.py -- a wrapper class for hunspell
+#
+# (c) 2013 Juergen Weigert jw@suse.de
+# Distribute under GPL-2.0 or ask
+#
+# 2013-01-31, V0.1 jw - initial draught: word by word I/O
+# 2013-02-01, V0.1 jw - added own _readline() to use buffering. Pythons readline()
+#                       does single byte read()s, which is slow.
+# 2013-02-02, V0.2 jw - check_words() now remembers a wordlist, pushes all out 
+#                       with an extra thread, reads back async, and reassembles.
+#                       This is much more efficient
+#
+import os,subprocess,re
+
+__VERSION__ = '0.2'
+
+class Hunspell():
+    """A pure python module to interface with hunspell.
+       It was written as a replacement for the hunspell module from
+       http://code.google.com/p/pyhunspell/, which appears to be in unmaintained.
+       and more difficult to use, due to lack of examples and documentation.
+    """
+    def __init__(self, dicts=['en_US']):
+        self.cmd = ['hunspell', '-i', 'utf-8', '-a']
+        self.dicts = dicts
+        self.proc = None
+        self.attr = None
+        self.buffer = ''
+
+    def _start(self):
+        cmd = self.cmd
+        if self.dicts is not None and len(self.dicts): 
+            cmd += ['-d', ','.join(self.dicts)]
+        try:
+            self.proc = subprocess.Popen(cmd, shell=False, 
+                stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        except OSError as e:
+            self.proc = "%s failed: errno=%d %s" % (cmd, e.errno, e.strerror)
+            raise OSError(self.proc)
+        header = ''
+        while True:
+            more = self.proc.stdout.readline().rstrip()
+            if len(more) > 5 and more[0:5] == '@(#) ':    # version line with -a
+                self.version = more[5:]
+                break
+            elif len(more) > 9 and more[0:9] == 'Hunspell ': # version line w/o -a
+                self.version = more
+                break
+            else:
+                header += more  # stderr should be collected here. It does not work
+        if len(header): self.header = header
+        self.buffer = ''
+        
+    def _readline(self):
+        # python readline() is horribly stupid on this pipe. It reads single
+        # byte, just like java did in the 1980ies. Sorry, this is not
+        # acceptable in 2013.
+        if self.proc is None:
+            raise Error("Hunspell_readline before _start")
+        while True:
+            idx = self.buffer.find('\n')
+            if idx < 0:
+                more = self.proc.stdout.read()
+                if not len(more):
+                    r = self.buffer
+                    self.buffer = ''
+                    return r
+                self.buffer += more
+            else:
+                break
+        r = self.buffer[0:idx+1]
+        self.buffer = self.buffer[idx+1:]
+        return r
+
+    def _load_attr(self):
+        try:
+            p = subprocess.Popen(self.cmd + ['-D'], shell=False, 
+                stdin=open('/dev/null'), stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
+        except OSError as e:
+            raise OSError("%s failed: errno=%d %s" % (self.cmd + ['-D'], e.errno, e.strerror))
+        self.attr = {}
+        header=''
+        while True:
+            line = p.stdout.readline().rstrip()
+            if not len(line):
+                break
+            # AVAILABLE DICTIONARIES (path is not mandatory for -d option):
+            m = re.match('([A-Z]+\s[A-Z]+).*:$', line)
+            if m:
+                header = m.group(1)
+                self.attr[header] = []
+            elif len(header):
+                self.attr[header].append(line)
+        return self.attr
+ 
+    def dicts(self,dicts=None):
+        """returns or sets the dictionaries that hunspell shall try to use"""
+        if dicts is not None:
+            self.dicts = dicts
+        return self.dicts
+
+    def list_dicts(self):
+        """query hunspell about the available dictionaries.
+           Returns a key value dict where keys are short names, and values 
+           are path names. You can pick some or all of the returned keys,
+           and use the list (or one) as an argument to 
+           the next Hunspell() instance, or as an argument 
+           to the dicts() method.
+        """
+        if self.attr is None: self._load_attr()
+        r = {}
+        for d in self.attr['AVAILABLE DICTIONARIES']:
+            words = d.split('/')
+            r[words[-1]] = d
+        return r
+ 
+    def dict_search_path(self):
+        """returns a list of pathnames, actually used by hunspell to load 
+           spelling dictionaries from.
+        """
+        if self.attr is None: self._load_attr()
+        r = []
+        for d in self.attr['SEARCH PATH']:
+            r += d.split(':')
+        return r
+ 
+    def dicts_loaded(self):
+        """query the spelling dictionaries that will actually be used for 
+           the next check_words() call.
+        """
+        if self.attr is None: self._load_attr()
+        return self.attr['LOADED DICTIONARY']
+ 
+    def check_words(self, words):
+        """takes a list of words as parameter, and checks them against the 
+           loaded spelling dictionaries. A key value dict is returned, where
+           every key represents a word that was not found in the 
+           spelling dictionaries. Values are lists of correction suggestions.
+           check_words() is implemented by calling the hunspell binary in pipe mode.
+           This is fairly robust, but not optimized for efficiency.
+        """
+        if self.proc is None:
+            self._start()
+        childpid = os.fork()
+        if childpid == 0:
+            for w in words:
+                self.proc.stdin.write(("^"+w+"\n").encode('utf8'))
+            os._exit(0)
+        self.proc.stdin.close()
+        bad_words = {}
+ 
+        while True:
+            line = self._readline()
+            if len(line) == 0:
+                break
+            line = line.rstrip()
+            if not len(line) or line[0] in '*+-': continue
+ 
+            if line[0] == '#': 
+                car = line.split(' ')
+                bad_words[car[1]] = []          # no suggestions
+            elif line[0] != '&': 
+                print "hunspell protocoll error: '%s'" % line
+                continue        # unknown stuff
+            # '& Radae 7 0: Radar, Ramada, Estrada, Prada, Rad, Roadie, Readable\n'
+            a = line.split(': ')
+            if len(a) >= 2:
+                car = a[0].split(' ')
+                cdr = a[1].split(', ')
+                bad_words[car[1]] = cdr
+            else:
+                print("bad hunspell reply: %s, split as %s" % (line, a))
+        self.proc = None
+        return bad_words
+
+ 
+if __name__ == "__main__": 
+    from pprint import pprint
+    h = Hunspell()
+    pprint(h.list_dicts())
+    pprint(h.dict_search_path())
+    pprint(h.check_words(["ppppp", '123', '', 'gorkicht', 'gemank', 'haus', '']))
+    pprint(h.check_words(["Radae", 'blood', 'mensch', 'green', 'blea', 'fork']))
+    pprint(h.version)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/imgcmp.py new/pdfcompare-1.6.8/imgcmp.py
--- old/pdfcompare-1.6.5/imgcmp.py	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/imgcmp.py	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,109 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# compare two images using Scipy
+# (c) 2013 - jw@suse.de - distributer under GPL-2.0 or ask.
+#
+# Dependencies:
+# sudo zypper in python-scipy
+# sudo zypper in ImageMagick
+#
+# See also python-pHash, and python-opencv
+# http://stackoverflow.com/questions/13379909/compare-similarity-of-images-usi...
+
+from __future__ import print_function, division
+
+import sys, os, re, tempfile
+from pprint import pprint
+
+import scipy as sp
+from scipy.misc import imread
+from scipy.signal.signaltools import correlate2d as c2d
+
+
+class CompareImageException(Exception):
+     """
+     Exception class for comparing two files 
+     """
+     def __init__(self, c11, c12, c22):
+         self.c11=c11
+         self.c12=c12
+         self.c22=c22
+     def __repr__(self):
+          return "(%.2f %.2f %.2f)" % (self.c11, self.c12, self.c22)
+     __str__=__repr__
+
+
+def load_img(fname):
+     """
+     Load and convert images
+     """
+     # get JPG image as Scipy array, RGB (3 layer)
+     if re.search("\.pdf$", fname, re.I):
+       # convert PDF to JPG
+       tf = tempfile.NamedTemporaryFile(delete=True, suffix=".jpg")
+       print("creating %s" % tf.name)
+       os.system("convert '%s[0]' -geometry 100x100 '%s'" % (fname, tf.name))
+       data = imread(tf.name)
+       tf.close()
+     else:
+       data = imread(fname)
+     # convert to grey-scale using W3C luminance calc
+     ## pprint([data])
+     ## ValueError: matrices are not aligned, if alpha channel...
+     lum = [299, 587, 114]
+     if len(data[0][0]) > 3:
+       lum.append(0)
+     data = sp.inner(data, lum) / 1000.0
+     # normalize per http://en.wikipedia.org/wiki/Cross-correlation
+     return (data - data.mean()) / data.std()
+
+
+def compare(file1, file2, diff):
+     """
+     Compares two files (JPEG, PNG or PDF) 
+     """
+     im1 = load_img(file1)
+     im2 = load_img(file2)
+     c11 = c2d(im1, im1, mode='same')  # baseline
+     c22 = c2d(im2, im2, mode='same')  # baseline
+     c12 = c2d(im1, im2, mode='same')
+     m = [c11.max(), c12.max(), c22.max()]
+     diff_ab = 100 * (1-m[1]/m[0])
+     diff_ba = 100 * (1-m[1]/m[2])
+     
+     fail=max(diff_ab,diff_ba) > diff
+
+     if fail:
+          raise CompareImageException(c11.max(), c12.max(), c22.max())
+
+     return fail
+
+def main():
+     """
+     Compares two files (JPEG, PNG or PDF)
+     """
+  if len(sys.argv) < 4:
+    print("""Usage: %s FILE1 FILE2 N.NN
+
+        FILE1,FILE2 can be in JPEG, PNG, or PDF format.
+        N.NN should be a small floating point number. It represents 
+        the allowed difference in the image metrics.
+        correlate2d from scipy.signal.signaltools is used to compute 
+        the metrics.
+    """ % sys.argv[0])
+    sys.exit(0)
+    diff_allowed=float(sys.argv[3])
+    try:
+         fail=compare(sys.argv[1],sys.argv[2],diff_allowed)
+    except CompareImageException as i:
+         print("error: %s" % i)
+    
+
+    print("limit: %.2f%% -> %s" % (diff_allowed, ("OK","FAIL")[fail]))
+    if fail: sys.exit(1)
+
+
+if __name__ == "__main__":
+       main()
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/man/Makefile new/pdfcompare-1.6.8/man/Makefile
--- old/pdfcompare-1.6.5/man/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/man/Makefile	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,20 @@
+#
+# apt-get install xsltproc fop
+#
+
+## openSUSE:
+DB=/usr/share/xml/docbook/stylesheet/nwalsh/current/
+## Ubuntu:
+DB=/usr/share/xml/docbook/stylesheet/nwalsh/
+
+all: man html pdf
+
+man:
+	xsltproc $(DB)/manpages/docbook.xsl pdfcompare.xml
+
+html:
+	xsltproc --output pdfcompare.html $(DB)/xhtml/docbook.xsl pdfcompare.xml
+
+pdf:
+	xsltproc --stringparam paper.type A4 --output pdfcompare.fo $(DB)/fo/docbook.xsl pdfcompare.xml
+	fop pdfcompare.fo pdfcompare.pdf && rm pdfcompare.fo
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/man/pdfcompare.1 new/pdfcompare-1.6.8/man/pdfcompare.1
--- old/pdfcompare-1.6.5/man/pdfcompare.1	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/man/pdfcompare.1	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,189 @@
+'\" t
+.\"     Title: pdfcompare
+.\"    Author: Jürgen Weigert
+.\" Generator: DocBook XSL Stylesheets v1.78.1 http://docbook.sf.net/
+.\"      Date: 04/18/2016
+.\"    Manual: @VERSION@
+.\"    Source: https://github.com/jnweiger/pdfcompare @VERSION@
+.\"  Language: English
+.\"
+.TH "PDFCOMPARE" "1" "04/18/2016" "https://github\&.com/jnweiger/" "@VERSION@"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
+.\" * set default formatting
+.\" -----------------------------------------------------------------
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+.\" -----------------------------------------------------------------
+.\" * MAIN CONTENT STARTS HERE *
+.\" -----------------------------------------------------------------
+.SH "NAME"
+pdfcompare \- Highlight words in a PDF file
+.SH "SYNOPSIS"
+.HP \w'\fBpdfcompare\fR\ 'u
+\fBpdfcompare\fR [\-h] [\-c\ \fIOLDFILE\fR] [\-d\ \fIDECRYPT_KEY\fR] [\-e] [\-i] [\-l\ \fILOGFILE\fR] [\-m\ \fIOPS\fR] [\-n] [\-o\ \fIOUTFILE\fR] [\-s\ \fIWORD_REGEXP\fR] [\-\-spell] [\-\-strict] [\-t\ \fITRANSP\fR] [\-B] [\-C\ NAME=\fIR\fR,\fIG\fR,\fIB\fR] [\-D] [\-F\ \fIFIRST_PAGE\fR] [\-L\ \fILAST_PAGE\fR] [\-M\ N,E,W,S] [\-V] [\-X]
+.br
+{INFILE} [INFILE2]
+.SH "POSITIONAL ARGUMENTS"
+.PP
+\fBINFILE\fR
+.RS 4
+the required PDF input file
+.RE
+.PP
+\fBINFILE2\fR
+.RS 4
+an optional
+\(lqnewer\(rq
+PDF input file; alternate syntax to
+\fB\-c\fR
+.RE
+.SH "OPTIONAL ARGUMENTS"
+.PP
+\fB\-B\fR, \fB\-\-below\fR
+.RS 4
+Paint the highlight markers below the text\&. Try this if the normal merge crashes\&. Use with care, highlights may disappear below background graphics\&. Default: BELOW=\*(AqFALSE\*(Aq
+.RE
+.PP
+\fB\-c \fR\fB\fIOLDFILE\fR\fR, \fB\-\-compare\-text \fR\fB\fIOLDFILE\fR\fR
+.RS 4
+Mark added, deleted and replaced text (or see
+\fB\-m\fR) with regard to
+\fIOLDFILE\fR\&. File formats
+\&.pdf,
+\&.xml,
+\&.txt
+are recognized by their suffix\&. The comparison works word by word\&.
+.RE
+.PP
+\fB\-C NAME=\fR\fB\fIR\fR\fR\fB,\fR\fB\fIG\fR\fR\fB,\fR\fB\fIB\fR\fR, \fB\-\-search\-color NAME=\fR\fB\fIR\fR\fR\fB,\fR\fB\fIG\fR\fR\fB,\fR\fB\fIB\fR\fR
+.RS 4
+Set colors of the search highlights as an RGB triplet; R,G,B ranges are 0\&.0\-1\&.0 each; valid names are \*(Aqadd,\*(Aqdelete\*(Aq,\*(Aqchange\*(Aq,\*(Aqequal\*(Aq,\*(Aqmargin\*(Aq,\*(Aqall\*(Aq; default name is \*(Aqequal\*(Aq, which is also used for
+\fB\-s\fR; default colors are A=0\&.3,1,0\&.3 /*green*/ C=0\&.9,0\&.8,0 /*yellow*/ B=0\&.9,0\&.9,0\&.9 /*gray*/ E=1,0,1 /*pink*/ D=1,0\&.3,0\&.3 /*red*/ M=0\&.7,1,1 /*blue*/
+.RE
+.PP
+\fB\-D\fR, \fB\-\-debug\fR
+.RS 4
+Enable debugging\&. Prints more on stdout, dumps several
+*\&.xml
+and
+*\&.pdf
+files\&.
+.RE
+.PP
+\fB\-e\fR, \fB\-\-exclude\-irrelevant\-pages\fR
+.RS 4
+With
+\fB\-s\fR; show only matching pages\&. With
+\fB\-c\fR: show only changed pages; default: reproduce all pages from
+\fIINFILE\fR
+in
+\fIOUTFILE\fR
+.RE
+.PP
+\fB\-f \fR\fB\fIFEATURES\fR\fR, \fB\-\-features \fR\fB\fIFEATURES\fR\fR
+.RS 4
+Specify how to mark\&. Allowed values are \*(Aqhighlight\*(Aq, \*(Aqchangebar\*(Aq, \*(Aqpopup\*(Aq, \*(Aqnavigation\*(Aq, \*(Aqwatermark\*(Aq, \*(Aqmargin\*(Aq\&. Default: H,C,P,N,W,B
+.RE
+.PP
+\fB\-F \fR\fB\fIFIRST_PAGE\fR\fR, \fB\-\-first\-page \fR\fB\fIFIRST_PAGE\fR\fR
+.RS 4
+Skip some pages at start of document; see also
+\fB\-L\fR; default: all pages
+.RE
+.PP
+\fB\-h\fR, \fB\-\-help\fR
+.RS 4
+Show this help message and exit
+.RE
+.PP
+\fB\-i\fR, \fB\-\-nocase\fR
+.RS 4
+Make
+\fB\-s\fR
+case insensitive; default: case sensitive
+.RE
+.PP
+\fB\-L \fR\fB\fILAST_PAGE\fR\fR, \fB\-\-last\-page \fR\fB\fILAST_PAGE\fR\fR
+.RS 4
+Limit pages processed; this counts pages, it does not use document page numbers; see also
+\fB\-F\fR; default: all pages
+.RE
+.PP
+\fB\-l \fR\fB\fILOGFILE\fR\fR, \fB\-\-log \fR\fB\fILOGFILE\fR\fR
+.RS 4
+Write an python datastructure describing all the overlay objects on each page\&. Default none\&.
+.RE
+.PP
+\fB\-M N,E,W,S\fR, \fB\-\-margins N,E,W,S\fR
+.RS 4
+Specify margin space to ignore on each page\&. A margin width is expressed in units of ca\&. 100dpi\&. Specify four numbers in the order north,east,west,south\&. Default: 0,0,0,0
+.RE
+.PP
+\fB\-m \fR\fB\fIOPS\fR\fR, \fB\-\-mark \fR\fB\fIOPS\fR\fR
+.RS 4
+Specify what to mark\&. Used with
+\fB\-c\fR\&. Allowed values are \*(Aqadd\*(Aq,\*(Aqdelete\*(Aq,\*(Aqchange\*(Aq,\*(Aqequal\*(Aq\&. Multiple values can be listed comma\-seperated; abbreviations are allowed\&. Default: A,D,C
+.RE
+.PP
+\fB\-n\fR, \fB\-\-no\-output\fR
+.RS 4
+Do not write an output file; print diagnostics only; default: write output file as per
+\fB\-o\fR
+.RE
+.PP
+\fB\-o \fR\fB\fIOUTFILE\fR\fR, \fB\-\-output \fR\fB\fIOUTFILE\fR\fR
+.RS 4
+Write output to FILE; default:
+output\&.pdf
+.RE
+.PP
+\fB\-\-spell\fR, \fB\-\-spell\-check\fR
+.RS 4
+Run the text body of the (new) PDF through
+\fBhunspell\fR\&. Unknown words are underlined\&. Use e\&.g\&. \*(Aqenv DICTIONARY=en_US \&.\&.\&.\*(Aq (or de_DE, \&.\&.\&.) to specify the spelling dictionary, if your system has more than one\&. To add new words to your private dictionary use e\&.g\&. \*(Aqecho "ownCloud" >> ~/\&.hunspell_en_US\*(Aq Check with
+\fBhunspell \fR\fB\fB\-D\fR\fR
+and study
+\fBhunspell\fR(1)\&.
+.RE
+.PP
+\fB\-\-strict\fR
+.RS 4
+Show really all differences; default: ignore removed hyphenation; ignore character spacing inside a word
+.RE
+.PP
+\fB\-t \fR\fB\fITRANSP\fR\fR, \fB\-\-transparency \fR\fB\fITRANSP\fR\fR
+.RS 4
+Set transparency of the highlight; invisible: 0\&.0; full opaque: 1\&.0; default: 0\&.6
+.RE
+.PP
+\fB\-V\fR, \fB\-\-version\fR
+.RS 4
+Print the version number and exit
+.RE
+.PP
+\fB\-X\fR, \fB\-\-no\-compression\fR
+.RS 4
+Write uncompressed PDF\&. Default: FlateEncode filter compression\&.
+.RE
+.SH "AUTHORS"
+.PP
+\fBJürgen Weigert\fR
+.RS 4
+Developer
+.RE
+.PP
+\fBThomas Schraitle\fR <\&toms@opensuse\&.org\&>
+.RS 4
+Manpage author
+.RE
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/man/pdfcompare.xml new/pdfcompare-1.6.8/man/pdfcompare.xml
--- old/pdfcompare-1.6.5/man/pdfcompare.xml	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/man/pdfcompare.xml	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,272 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+                         "http://www.docbook.org/xml/4.5/docbookx.dtd"
+[
+  <!ENTITY product "pdfcompare">
+  <!ENTITY cmd     "pdfcompare">
+]>
+<refentry lang="en" id="pdfcompare">
+  <refentryinfo>
+      <productname>&product;</productname>
+      <author>
+        <firstname>Jürgen</firstname>
+        <surname>Weigert</surname>
+        <contrib>Developer</contrib>
+      </author>
+    <othercredit class="technicaleditor">
+      <firstname>Thomas</firstname>
+      <surname>Schraitle</surname>
+      <email>toms@opensuse.org</email>
+      <contrib>Manpage author</contrib>
+    </othercredit>
+  </refentryinfo>
+  <refmeta>
+      <refentrytitle>&cmd;</refentrytitle>
+      <manvolnum>1</manvolnum>
+      <refmiscinfo class="version">@VERSION@</refmiscinfo>
+      <refmiscinfo class="source">https://github.com/jnweiger/pdfcompare</refmiscinfo>
+      <!--<refmiscinfo class="manual"></refmiscinfo>-->
+   </refmeta>
+  
+  <refnamediv>
+      <refname>&product;</refname>
+      <refpurpose>Highlight words in a PDF file</refpurpose>
+   </refnamediv>
+  
+  <refsynopsisdiv id="calabash.synopsis">
+      <title>Synopsis</title>
+      <cmdsynopsis><command>&cmd;</command>
+        <arg choice="opt">-h</arg>
+        <arg choice="opt">-c <replaceable>OLDFILE</replaceable></arg>
+        <arg choice="opt">-d <replaceable>DECRYPT_KEY</replaceable></arg>
+        <arg choice="opt">-e</arg>
+        <arg choice="opt">-i</arg>
+        <arg choice="opt">-l <replaceable>LOGFILE</replaceable></arg>
+        <arg choice="opt">-m <replaceable>OPS</replaceable></arg>
+        <arg choice="opt">-n</arg>
+        <arg choice="opt">-o <replaceable>OUTFILE</replaceable></arg>
+        <arg choice="opt">-s <replaceable>WORD_REGEXP</replaceable></arg>
+        <arg choice="opt">--spell</arg>
+        <arg choice="opt">--strict</arg>
+        <arg choice="opt">-t <replaceable>TRANSP</replaceable></arg>
+        <arg choice="opt">-B</arg>
+        <arg choice="opt">-C NAME=<replaceable>R</replaceable>,<replaceable>G</replaceable>,<replaceable>B</replaceable></arg>
+        <arg choice="opt">-D</arg>
+        <arg choice="opt">-F <replaceable>FIRST_PAGE</replaceable></arg>
+        <arg choice="opt">-L <replaceable>LAST_PAGE</replaceable></arg>
+        <arg choice="opt">-M N,E,W,S</arg>
+        <arg choice="opt">-V</arg>
+        <arg choice="opt">-X</arg>
+        <sbr/>
+        <arg choice="req">INFILE</arg>
+        <arg choice="opt">INFILE2</arg>
+      </cmdsynopsis>
+  </refsynopsisdiv>
+  
+  <refsect1>
+    <title>Positional Arguments</title>
+    <variablelist>
+      <varlistentry>
+        <term><option>INFILE</option></term>
+        <listitem>
+          <para>the required PDF input file</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><option>INFILE2</option></term>
+        <listitem>
+          <para>an optional <quote>newer</quote> PDF input file;
+            alternate syntax to <option>-c</option></para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+  
+  <refsect1>
+    <title>Optional Arguments</title>
+    <variablelist>
+      <varlistentry id="pdfcompare.below">
+        <term><option>-B</option></term>
+        <term><option>--below</option></term>
+        <listitem>
+          <para>Paint the highlight markers below the text. Try this if
+            the normal merge crashes. Use with care, highlights may
+            disappear below background graphics. Default: BELOW='FALSE'</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.compare-text">
+        <term><option>-c <replaceable>OLDFILE</replaceable></option></term>
+        <term><option>--compare-text <replaceable>OLDFILE</replaceable></option></term>
+        <listitem>
+          <para>Mark added, deleted and replaced text (or see <option>-m</option>) with
+            regard to <replaceable>OLDFILE</replaceable>. File formats <filename>.pdf</filename>, 
+            <filename>.xml</filename>, <filename>.txt</filename> are
+            recognized by their suffix. The comparison works word by
+            word.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.search-color">
+        <term><option>-C NAME=<replaceable>R</replaceable>,<replaceable>G</replaceable>,<replaceable>B</replaceable></option></term>
+        <term><option>--search-color NAME=<replaceable>R</replaceable>,<replaceable>G</replaceable>,<replaceable>B</replaceable></option></term>
+        <listitem>
+          <para>Set colors of the search highlights as an RGB triplet;
+            R,G,B ranges are 0.0-1.0 each; valid names are
+            'add,'delete','change','equal','margin','all'; default name
+            is 'equal', which is also used for <option>-s</option>; default colors are
+            A=0.3,1,0.3 /*green*/ C=0.9,0.8,0 /*yellow*/ B=0.9,0.9,0.9
+            /*gray*/ E=1,0,1 /*pink*/ D=1,0.3,0.3 /*red*/ M=0.7,1,1
+            /*blue*/</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.debug">
+        <term><option>-D</option></term>
+        <term><option>--debug</option></term>
+        <listitem>
+          <para>Enable debugging. Prints more on stdout, dumps several
+            <filename>*.xml</filename> and <filename>*.pdf</filename>
+            files.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.exclude-irrelevant-pages">
+        <term><option>-e</option></term>
+        <term><option>--exclude-irrelevant-pages</option></term>
+        <listitem>
+          <para>With <option>-s</option>; show only matching pages. With
+          <option>-c</option>: show only changed pages; default:
+            reproduce all pages from <replaceable>INFILE</replaceable>
+            in <replaceable>OUTFILE</replaceable></para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.features">
+        <term><option>-f <replaceable>FEATURES</replaceable></option></term>
+        <term><option>--features <replaceable>FEATURES</replaceable></option></term>
+        <listitem>
+          <para>Specify how to mark. Allowed values are 'highlight',
+            'changebar', 'popup', 'navigation', 'watermark', 'margin'.
+            Default: H,C,P,N,W,B</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.first-page">
+        <term><option>-F <replaceable>FIRST_PAGE</replaceable></option></term>
+        <term><option>--first-page <replaceable>FIRST_PAGE</replaceable></option></term>
+        <listitem>
+          <para>Skip some pages at start of document; see also
+            <option>-L</option>; default: all pages</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.help">
+        <term><option>-h</option></term>
+        <term><option>--help</option></term>
+        <listitem>
+          <para>Show this help message and exit</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.nocase">
+        <term><option>-i</option></term>
+        <term><option>--nocase</option></term>
+        <listitem>
+          <para>Make <option>-s</option> case insensitive; default: case
+          sensitive</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.last-page">
+        <term><option>-L <replaceable>LAST_PAGE</replaceable></option></term>
+        <term><option>--last-page <replaceable>LAST_PAGE</replaceable></option></term>
+        <listitem>
+          <para>Limit pages processed; this counts pages, it does not
+            use document page numbers; see also <option>-F</option>; default: all
+            pages</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.log">
+        <term><option>-l <replaceable>LOGFILE</replaceable></option></term>
+        <term><option>--log <replaceable>LOGFILE</replaceable></option></term>
+        <listitem>
+          <para>Write an python datastructure describing all the overlay
+            objects on each page. Default none.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.margins">
+        <term><option>-M N,E,W,S</option></term>
+        <term><option>--margins N,E,W,S</option></term>
+        <listitem>
+          <para>Specify margin space to ignore on each page. A margin
+            width is expressed in units of ca. 100dpi. Specify four
+            numbers in the order north,east,west,south. Default:
+            0,0,0,0</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.mark">
+        <term><option>-m <replaceable>OPS</replaceable></option></term>
+        <term><option>--mark <replaceable>OPS</replaceable></option></term>
+        <listitem>
+          <para>Specify what to mark. Used with <option>-c</option>. Allowed values are
+            'add','delete','change','equal'. Multiple values can be
+            listed comma-seperated; abbreviations are allowed. Default:
+            A,D,C</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.no-output">
+        <term><option>-n</option></term>
+        <term><option>--no-output</option></term>
+        <listitem>
+          <para>Do not write an output file; print diagnostics only;
+            default: write output file as per <option>-o</option></para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.output">
+        <term><option>-o <replaceable>OUTFILE</replaceable></option></term>
+        <term><option>--output <replaceable>OUTFILE</replaceable></option></term>
+        <listitem>
+          <para>Write output to FILE; default: <filename>output.pdf</filename></para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.spell">
+        <term><option>--spell</option></term>
+        <term><option>--spell-check</option></term>
+        <listitem>
+          <para>Run the text body of the (new) PDF through <command>hunspell</command>.
+            Unknown words are underlined. Use e.g. 'env DICTIONARY=en_US
+            ...' (or de_DE, ...) to specify the spelling dictionary, if
+            your system has more than one. To add new words to your 
+            private dictionary use e.g. 'echo "ownCloud" >> ~/.hunspell_en_US'
+             Check with <command>hunspell <option>-D</option></command> and
+            study <citerefentry>
+              <refentrytitle>hunspell</refentrytitle>
+              <manvolnum>1</manvolnum>
+            </citerefentry>. </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.strict">
+        <term><option>--strict</option></term>
+        <listitem>
+          <para>Show really all differences; default: ignore removed
+            hyphenation; ignore character spacing inside a word</para>
+        </listitem>
+      </varlistentry>      
+      <varlistentry id="pdfcompare.transparency">
+        <term><option>-t <replaceable>TRANSP</replaceable></option></term>
+        <term><option>--transparency <replaceable>TRANSP</replaceable></option></term>
+        <listitem>
+          <para>Set transparency of the highlight; invisible: 0.0; full
+            opaque: 1.0; default: 0.6 </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.version">
+        <term><option>-V</option></term>
+        <term><option>--version</option></term>
+        <listitem>
+          <para>Print the version number and exit</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry id="pdfcompare.no-compression">
+        <term><option>-X</option></term>
+        <term><option>--no-compression</option></term>
+        <listitem>
+          <para>Write uncompressed PDF. Default: FlateEncode filter
+            compression.</para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+</refentry>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/pdfcompare.py new/pdfcompare-1.6.8/pdfcompare.py
--- old/pdfcompare-1.6.5/pdfcompare.py	2014-01-07 15:28:01.000000000 +0100
+++ new/pdfcompare-1.6.8/pdfcompare.py	2016-04-19 17:23:08.000000000 +0200
@@ -1,9 +1,9 @@
 #! /usr/bin/python
 # -*- coding: UTF-8 -*-
 #
-# pdf_highlight.py -- command line tool to show search or compare results in a PDF
+# pdfcompare.py -- command line tool to show search or compare results in a PDF
 #
-# (c) 2012-2013 Juergen Weigert jw@suse.de
+# (c) 2012-2016 Juergen Weigert juewei@fabfolk.com
 # Distribute under GPL-2.0 or ask
 #
 # 2012-03-16, V0.1 jw - initial draught: argparse, pdftohtml-xml, font.metrics
@@ -88,6 +88,12 @@
 #                         later on. Strange.
 # 2014-01-07, V1.6.5 jw - manually merged https://github.com/jnweiger/pdfcompare/pull/4
 #                         hope, I did not break too much...
+# 2014-11-07, V1.6.6 jw - hint added for hunspell use: add word.
+# 2015-04-18, V1.6.7 jw - fall back to pyPdf from PyPDF2, for Ubuntu 14.04 LTS
+# 2015-04-19, V1.6.8 jw - popup pN[tcb]: source location descriptors optional.
+#                         No normal user expects or understands them.
+#                         No navigation marks per default. They are often broken, and often
+#                         useless due to page number changes. Include in -f to enable.
 #
 # osc in devel:languages:python python-pypdf >= 1.13+20130112
 #  need fix from https://bugs.launchpad.net/pypdf/+bug/242756
@@ -113,9 +119,9 @@
 # Compatibility for older Python versions
 from __future__ import with_statement
 from __future__ import print_function
-# from __future__ import division
+from __future__ import division
 
-__VERSION__ = '1.6.5'
+__VERSION__ = '1.6.8'
 
 try:
   # python2
@@ -123,7 +129,12 @@
 except ImportError:
   # python3, breaks python2-reportlab
   from io import StringIO
-from pyPdf import PdfFileWriter, PdfFileReader, generic as Pdf
+try:
+  # Ubuntu 15.x
+  from PyPDF2 import PdfFileWriter, PdfFileReader, generic as Pdf
+except ImportError:
+  # Ubuntu 14.04 LTS
+  from pyPdf import PdfFileWriter, PdfFileReader, generic as Pdf
 from reportlab.pdfgen import canvas
 from reportlab.lib.colors import Color
 import urllib   # used when normal encode fails.
@@ -150,6 +161,7 @@
 
 highlight_height = 1.2  # some fonts cause too much overlap with 1.4
                         # 1.2 is often not enough to look symmetric.
+anno_popup_src_loc_ref = False	# False: 'chg: bla'    True: 'chg:p1t: bla'
 
 # from pdfminer.fontmetrics import FONT_METRICS
 # FONT_METRICS['Helvetica'][1]['W']
@@ -283,7 +295,10 @@
     text = mark.get('t', '.') + ':'
     if 'o' in mark:
       if isinstance(mark['o'], list):
-        text += mark['o'][1]+': '+ mark['o'][0]
+        if anno_popup_src_loc_ref:
+          text += mark['o'][1]+': '+ mark['o'][0]
+	else:
+          text += ' '+mark['o'][0]
       else:
         text += ' '+mark['o']
     # need ascii here. anything else triggers
@@ -585,7 +600,7 @@
 
   i = word[2]
   l = len(word[0])
-  char_width = float(x2-x1)/len(word[1])
+  char_width = (x2-x1)/len(word[1])
   x1 += i * char_width
   x2 = x1 + l * char_width
   # Given the fast track above, maybe for the rest, a
@@ -713,7 +728,7 @@
   return finfo
 
 def main():
-  parser = ArgumentParser(epilog="version: "+__VERSION__, description="highlight words in a PDF file.")
+  parser = ArgumentParser(epilog="version: "+__VERSION__, description="Highlight changed/added/deleted/moved text in a PDF file.")
   parser.def_trans = 0.6
   parser.def_decrypt_key = ''
   parser.def_colors = { 'E': [1,0,1,    'pink'],        # extra
@@ -724,75 +739,87 @@
                         'B': [.9,.9,.9, 'gray'] }       # borders
   parser.def_output = 'output.pdf'
   parser.def_marks = 'A,D,C'
-  parser.def_features = 'H,C,P,N,W,B'
+  parser.def_features = 'H,C,P,W,B'
   parser.def_margins = '0,0,0,0'
   parser.def_margins = '0,0,0,0'
   parser.def_below = False
   parser.add_argument("-c", "--compare-text", metavar="OLDFILE",
-                      help="mark added, deleted and replaced text (or see -m) with regard to OLDFILE. \
+                      help="Mark added, deleted and replaced text (or see -m) with regard to OLDFILE. \
                             File formats .pdf, .xml, .txt are recognized by their suffix. \
                             The comparison works word by word.")
   parser.add_argument("-d", "--decrypt-key", metavar="DECRYPT_KEY", default=parser.def_decrypt_key,
-                      help="open an encrypted PDF; default: KEY='"+parser.def_decrypt_key+"'")
+                      help="Open an encrypted PDF. Default: KEY='"+parser.def_decrypt_key+"'")
   parser.add_argument("-e", "--exclude-irrelevant-pages", default=False, action="store_true",
-                      help="with -s: show only matching pages; with -c: show only changed pages; \
-                      default: reproduce all pages from INFILE in OUTFILE")
+                      help="With -s: show only matching pages; with -c: show only changed pages. \
+                      Default: reproduce all pages from INFILE in OUTFILE.")
   parser.add_argument("-f", "--features", metavar="FEATURES", default=parser.def_features,
-                      help="specify how to mark. Allowed values are 'highlight', 'changebar', 'popup', \
+                      help="Specify how to mark. Allowed values are 'highlight', 'changebar', 'popup', \
                       'navigation', 'watermark', 'margin'. Default: " + str(parser.def_features))
   parser.add_argument("-i", "--nocase", default=False, action="store_true",
-                      help="make -s case insensitive; default: case sensitive")
+                      help="Make -s case insensitive; default: case sensitive.")
   parser.add_argument("-l", "--log",  metavar="LOGFILE", 
-                      help="write an python datastructure describing all the overlay objects on each page. Default none.")
+                      help="Write an python datastructure describing all the overlay objects on each page. Default none.")
   parser.add_argument("-m", "--mark", metavar="OPS", default=parser.def_marks,
-                      help="specify what to mark. Used with -c. Allowed values are 'add','delete','change','equal'. \
+                      help="Specify what to mark. Used with -c. Allowed values are 'add','delete','change','equal'. \
                             Multiple values can be listed comma-seperated; abbreviations are allowed.\
                             Default: " + str(parser.def_marks))
   parser.add_argument("-n", "--no-output", default=False, action="store_true",
-                      help="do not write an output file; print diagnostics only; default: write output file as per -o")
+                      help="Do not write an output file; print diagnostics only. Default: write output file as per -o option.")
   parser.add_argument("-o", "--output", metavar="OUTFILE", default=parser.def_output,
-                      help="write output to FILE; default: "+parser.def_output)
+                      help="Write output to FILE; default: "+parser.def_output)
   parser.add_argument("-s", "--search", metavar="WORD_REGEXP", 
-                      help="highlight WORD_REGEXP")
+                      help="Highlight WORD_REGEXP")
   parser.add_argument("--spell", "--spell-check", default=False, action="store_true",
-                      help="run the text body of the (new) pdf through hunspell. Unknown words are underlined. Use e.g. 'env DICTIONARY=de_DE ...' (or en_US, ...) to specify the spelling dictionary, if your system has more than one. Check with 'hunspell -D' and study 'man hunspell'.")
+                      help="Run the text body of the (new) pdf through hunspell. Unknown words are underlined. \
+		          Use e.g. 'env DICTIONARY=en_US ...' (or de_DE, ...) to specify the spelling dictionary, \
+			  if your system has more than one. To add new words to your private dictionary use e.g. \
+			  'echo >> ~/.hunspell_en_US ownCloud'. Check with 'hunspell -D' and study 'man hunspell'.")
   parser.add_argument("--strict", default=False, action="store_true",
-                      help="show really all differences; default: ignore removed hyphenation; ignore character spacing inside a word")
+                      help="Show really all differences. Default: ignore removed hyphenation; \
+		          ignore character spacing inside a word.")
   parser.add_argument("-t", "--transparency", type=float, default=parser.def_trans, metavar="TRANSP", 
-                      help="set transparency of the highlight; invisible: 0.0; full opaque: 1.0; \
+                      help="Set transparency of the highlight; invisible: 0.0; full opaque: 1.0; \
                       default: " + str(parser.def_trans))
   parser.add_argument("-B", "--below", default=parser.def_below, action="store_true",
-                      help="Paint the highlight markers below the text. Try this if the normal merge crashes. Use with care, highlights may disappear below background graphics. Default: BELOW='"+str(parser.def_below)+"'")
+                      help="Paint the highlight markers below the text. Try this if the normal merge crashes. Use with care, highlights may disappear below background graphics. Default: BELOW='"+str(parser.def_below)+"'.")
   parser.add_argument("-C", "--search-color", metavar="NAME=R,G,B", action="append",
-                      help="set colors of the search highlights as an RGB triplet; R,G,B ranges are 0.0-1.0 each; valid names are 'add,'delete','change','equal','margin','all'; default name is 'equal', which is also used for -s; default colors are " + 
+                      help="Set colors of the search highlights as an RGB triplet; R,G,B ranges are 0.0-1.0 each; valid names are 'add,'delete','change','equal','margin','all'; default name is 'equal', which is also used for -s; default colors are " + 
                       " ".join(["%s=%s,%s,%s /*%s*/ " %(x_y[0],x_y[1][0],x_y[1][1],x_y[1][2],x_y[1][3]) for x_y in list(parser.def_colors.items())]))
   parser.add_argument("-D", "--debug", default=False, action="store_true",
-                      help="enable debugging. Prints more on stdout, dumps several *.xml or *.pdf files.")
+                      help="Enable debugging. Prints more on stdout, dumps several *.xml or *.pdf files.")
   parser.add_argument("-F", "--first-page", metavar="FIRST_PAGE",
-                      help="skip some pages at start of document; see also -L; default: all pages")
+                      help="Skip some pages at start of document; see also -L option. Default: all pages.")
   parser.add_argument("-L", "--last-page", metavar="LAST_PAGE",
-                      help="limit pages processed; this counts pages, it does not use document \
-                      page numbers; see also -F; default: all pages")
+                      help="Limit pages processed; this counts pages, it does not use document \
+                      page numbers; see also -F; default: all pages.")
   parser.add_argument("-M", "--margins", metavar="N,E,W,S", default=parser.def_margins,
-                      help="specify margin space to ignore on each page. A margin width is expressed \
+                      help="Specify margin space to ignore on each page. A margin width is expressed \
                       in units of ca. 100dpi. Specify four numbers in the order north,east,west,south. Default: "\
                       + str(parser.def_margins))
+  parser.add_argument("-S", "--source-location", default=False, action="store_true",
+                      help="Annotation start includes :pNX: markers where 'N' is the page number of the location \
+		          in the original document and X is 't' for top, 'c' for center, or 'b' for bottom of the page. \
+			  Default: Annotations start only with 'chg:', 'add:', 'del:' optionally followed by original text.")
   parser.add_argument("-V", "--version", default=False, action="store_true",
-                      help="print the version number and exit")
+                      help="Print the version number and exit.")
   parser.add_argument("-X", "--no-compression", default=False, action="store_true",
-                      help="write uncompressed PDF. Default: FlateEncode filter compression.")
+                      help="Write uncompressed PDF. Default: FlateEncode filter compression.")
   parser.add_argument("--leftside", default=False, action="store_true",
-                      help="put changebars and navigation at the left hand side of the page. Default: right hand side.")
-  parser.add_argument("infile", metavar="INFILE", help="the input file")
-  parser.add_argument("infile2", metavar="INFILE2", nargs="?", help="optional 'newer' input file; alternate syntax to -c")
+                      help="Put changebars and navigation at the left hand side of the page. Default: right hand side.")
+  parser.add_argument("infile", metavar="INFILE", help="The input file.")
+  parser.add_argument("infile2", metavar="INFILE2", nargs="?", help="Optional 'newer' input file; alternate syntax to -c")
   args = parser.parse_args()      # --help is automatic
 
   args.transparency = 1 - args.transparency     # it is needed reversed.
 
   if args.version: parser.exit(__VERSION__)
+
   global debug 
   debug = args.debug
 
+  global anno_popup_src_loc_ref
+  anno_popup_src_loc_ref = args.source_location
+
   args.search_colors = parser.def_colors.copy()
   if args.search_color:
     for col in args.search_color:
@@ -821,7 +848,7 @@
     args.compare_text,args.infile = args.infile,args.infile2
 
   if args.search is None and args.compare_text is None and args.spell is None:
-    parser.exit("Oops. Nothing to do. Specify either -s or --spell or -c or two input files")
+    parser.exit("Oops. Nothing to do. Specify either -s or --spell or -c or two input files.")
 
   if not os.access(args.infile, os.R_OK):
     parser.exit("Cannot read input file: %s" % args.infile)
@@ -926,13 +953,14 @@
       print("DocumentInfo():")
       pprint(di)
     output._objects.append(di)
-  except Exception,e:
+  except Exception as e:
     print("WARNING: getDocumentInfo() failed: " + str(e) )
 
   output._info = Pdf.IndirectObject(len(output._objects), 0, output)
 
   pages_written = 0
   total_hits = 0
+  outline = []
 
   page_idx = 0
   nav_bwd = None
@@ -961,6 +989,7 @@
       if hitdetails[det]: hits_fmt += '%s%d' % (ch,hitdetails[det])
 
     print(" page %d: %d hits %s" % (page_marks[i]['nr'], len(page_marks[i]['rect']), hits_fmt))
+    outline.append(" page %d: %d hits %s" % (page_marks[i]['nr'], len(page_marks[i]['rect']), hits_fmt))
     # pprint(hitdetails)
 
     page = input1.getPage(i)
@@ -1009,8 +1038,15 @@
       output.addPage(page)
 
     pages_written += 1
-
   print("saving %s" % args.output)
+  # add outline  
+  try:
+    parent = output.addBookmark('Hits', 0) # add parent bookmark
+    for bm in outline:
+       output.addBookmark(bm,outline.index(bm),parent=parent)
+  except Exception as e:
+    print("Warning: cannot add Bookmarks (pyPdf too old?): %s" % str(e))
+  
   if args.no_output is False:
     outputStream = file(args.output, "wb")
     try:
@@ -1068,7 +1104,7 @@
   if (width is not None): 
     tot_w = pre_w+str_w+suf_w
     if (tot_w == 0): tot_w = 1
-    ratio = float(width)/tot_w
+    ratio = width/tot_w
   #pprint([[pre,str,suf,width],[pre_w,str_w,suf_w,tot_w],ratio])
   return (xoff+pre_w*ratio, str_w*ratio)
 
@@ -1159,9 +1195,9 @@
   def catwords(dw, idx1, idx2, maxwords=666):
     # make maxwords low enough, so that the popup fits on the screen.
     if (maxwords is not None and idx2-idx1 > maxwords):
-      cw1_text, cw1_loc = catwords(dw, idx1, idx1+maxwords/3, None)
-      cw2_text, cw2_loc = catwords(dw, idx2-maxwords/3, idx2, None)
-      text = cw1_text + ("<br><br> --]-------- snip %d words --------[-- <br><br>" % (idx2-idx1-maxwords*2/3)) + cw2_text
+      cw1_text, cw1_loc = catwords(dw, idx1, idx1+int(maxwords/3), None)
+      cw2_text, cw2_loc = catwords(dw, idx2-int(maxwords/3), idx2, None)
+      text = cw1_text + ("<br><br> --]-------- snip %d words --------[-- <br><br>" % (idx2-idx1-int(maxwords*2/3))) + cw2_text
       return [ text, cw1_loc ]
 
     text = ""
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/runtests.py new/pdfcompare-1.6.8/runtests.py
--- old/pdfcompare-1.6.5/runtests.py	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/runtests.py	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import pytest
+import sys
+
+class MyPlugin:
+    def pytest_sessionfinish(self):
+        print("\n*** test run reporting finishing")
+
+
+# Empty statement here needed so minversion reports no error
+#pytest
+
+pytest.main(plugins=[MyPlugin()] )
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/setup.py new/pdfcompare-1.6.8/setup.py
--- old/pdfcompare-1.6.5/setup.py	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/setup.py	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys 
+
+from distutils.core import setup
+from setuptools.command.test import test as TestCommand
+
+class PyTest(TestCommand):
+    def finalize_options(self):
+        TestCommand.finalize_options(self)
+        self.test_args = []
+        self.test_suite = True
+    def run_tests(self):
+        #import here, cause outside the eggs aren't loaded
+        import pytest
+        errno = pytest.main(self.test_args)
+        sys.exit(errno)
+
+
+setup(name='pdfcompare',
+      version='1.0',
+      description='Compare two PDF files',
+      author='Jürgen Weigert',
+      author_email='juewei@fabfolk.com',
+      url='https://github.com/jnweiger/pdfcompare',
+      scripts=['pdfcompare.py', 'imgcmp.py'],
+      license='GPL-2.0',
+      classifiers=[
+          'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
+          'Environment :: Console',
+          'Development Status :: 5 - Production/Stable',
+          'Programming Language :: Python :: 2.7',
+          'Programming Language :: Python :: 3',
+                  ],
+      cmdclass={'test': PyTest},
+      long_description="".join(open('README.txt').readlines()),
+      tests_require=['pytest', 'scipy'],
+      #packages=['pyPdf','reportlab.pdfgen','reportlab.lib.colors','pygame.font' ],
+# 
+     )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/test/Makefile new/pdfcompare-1.6.8/test/Makefile
--- old/pdfcompare-1.6.5/test/Makefile	2013-10-24 14:09:14.000000000 +0200
+++ new/pdfcompare-1.6.8/test/Makefile	2016-04-19 17:23:08.000000000 +0200
@@ -1,10 +1,16 @@
-VER=1.3
 refresh=
 
 all: test
 
-test:
-	ln -sf ../pdf_highlight.py pdfcompare
+test_requires:
+	@echo The selftest uses the following extra packages:
+	@rpm -q shunit2 || exit 2
+	@rpm -q python-scipy || exit 2
+	@rpm -q pdftk || exit 2
+	@echo -----------------------------------------------
+
+test: test_requires
+	ln -sf ../pdfcompare.py pdfcompare
 	env PATH=.:$$PATH sh ./helptest.sh $(VER) VER=$(VER)
 	env PATH=.:$$PATH sh ./python3.sh
 	env PATH=.:$$PATH refresh=$(refresh) sh ./restest.sh
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pdfcompare-1.6.5/test/cli/test_cli.py new/pdfcompare-1.6.8/test/cli/test_cli.py
--- old/pdfcompare-1.6.5/test/cli/test_cli.py	1970-01-01 01:00:00.000000000 +0100
+++ new/pdfcompare-1.6.8/test/cli/test_cli.py	2016-04-19 17:23:08.000000000 +0200
@@ -0,0 +1,28 @@
+#!/usr/bin/python 
+# -*- coding: utf-8 -*-
+
+import os.path
+
+
+def test_version():
+         """
+         Checks, if version number in last line of help output is available
+         """
+         import subprocess 
+         L=subprocess.check_output(['./pdf_highlight.py','-h'])
+         L=L.strip()
+         LL=L.split("\n")
+         assert 'version' in LL[-1]
+
+
+
+def test_pdfcompare_exists():
+         assert os.path.exists('pdf_highlight.py')
+
+def test_scipy():
+         """
+         Checks, if the module scipy is available
+         """
+         import scipy 
+         assert scipy.__version__
+

++++++ pdfcompare.dsc ++++++
Format: 1.0
Source: pdfcompare
Version: 1.6.8-1
Binary: pdfcompare
Maintainer: Jürgen Weigert  
Architecture: any
Build-Depends: debhelper (>= 4.2.21)
# https://github.com/openSUSE/obs-build/pull/147
DEBTRANSFORM-RELEASE: 1
++++++ pull_github.sh ++++++
#! /bin/sh
#

url=git@github.com:jnweiger/pdfcompare.git
name=pdfcompare

rm -rf $name
tstamp=$(date +%Y%m%d)
git clone --depth 1 --branch master $url -o $name
version=$(grep '^__VERSION__' $name/pdfcompare.py | sed -e "s@.*'\(.*\)'.*@\1@")
#version=$version.git$tstamp

mv $name $name-$version
rm $name-*.tar.bz2
tar jcvf $name-$version.tar.bz2 --exclude '.??*'  $name-$version
rm -rf $name-$version

sed -i -e "s@^\(Version:\s*\).*@\1"$version"@" *.spec
sed -i -e "s@^\(Source0:\s*\).*@\1"$name-$version.tar.bz2"@" *.spec

osc addremove
echo "now run: vi *.dsc; debchange -mc debian.changelog; osc vc; osc up; osc ci"

    

root＠hilbert.suse.de

tags

participants (1)