commit urlwatch for openSUSE:Factory
Hello community, here is the log from the commit of package urlwatch for openSUSE:Factory checked in at 2020-05-29 21:23:45 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/urlwatch (Old) and /work/SRC/openSUSE:Factory/.urlwatch.new.3606 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "urlwatch" Fri May 29 21:23:45 2020 rev:18 rq:809787 version:2.18 Changes: -------- --- /work/SRC/openSUSE:Factory/urlwatch/urlwatch.changes 2020-03-16 10:18:25.291589269 +0100 +++ /work/SRC/openSUSE:Factory/.urlwatch.new.3606/urlwatch.changes 2020-05-29 21:37:39.822701066 +0200 @@ -1,0 +2,31 @@ +Mon May 4 06:46:45 UTC 2020 - Michael Vetter <mvetter@suse.com> + +- Update to 2.18: + Added: + * New filter: re.sub that can replace/remove strings using regular + expressions + * Support ignore_timeout_errors and ignore_too_many_redirects for + URL jobs (#423, by Josh aka Zevlag) + * HTML reporter: Add viewport meta tag for improved viewing on + mobile devices (#432, by Mike Borsetti) + * Optional support for insecure SMTP password storage in the + config; use with caution (#431) + Fixed: + * Fix --test-filter when the specified job is not found + * Fix another YAMLLoadWarning in unit tests (#382, by Louis Sautier) + * Documentation updates and typo fixes (by Nate Eagleson) + * Pushover: Fix default device config (Fixes #409 and #372, + documented by Richard Goodwin) + Changed: + * Nicer formatting of --features for jobs with no docstring + or many keys + * The XPath and CSS filters now support XML namespaces + (#404, by Chenfeng Bao) + * Drop support for Python 3.3 and Python 3.4 (new minimum + requirement is Python 3.5) + * Use html.escape instead of cgi.escape (which was removed + in Python 3.8; #424, by Chenfeng Bao) + * Allow non-ASCII characters in format-json output filter + (#433, by Mike Borsetti) + +------------------------------------------------------------------- Old: ---- urlwatch-2.17.tar.gz New: ---- urlwatch-2.18.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ urlwatch.spec ++++++ --- /var/tmp/diff_new_pack.4sbdYd/_old 2020-05-29 21:37:40.182702138 +0200 +++ /var/tmp/diff_new_pack.4sbdYd/_new 2020-05-29 21:37:40.186702150 +0200 @@ -17,14 +17,14 @@ Name: urlwatch -Version: 2.17 +Version: 2.18 Release: 0 Summary: A tool for monitoring webpages for updates License: BSD-3-Clause Group: Productivity/Networking/Web/Utilities URL: https://thp.io/2008/urlwatch/ Source0: https://github.com/thp/%{name}/archive/%{version}.tar.gz#/%{name}-%{version}.tar.gz -BuildRequires: python3-devel +BuildRequires: python3-devel >= 3.5 BuildRequires: python3-setuptools Requires: python3-PyYAML Requires: python3-appdirs ++++++ urlwatch-2.17.tar.gz -> urlwatch-2.18.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/.travis.yml new/urlwatch-2.18/.travis.yml --- old/urlwatch-2.17/.travis.yml 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/.travis.yml 2020-05-03 11:31:08.000000000 +0200 @@ -1,8 +1,9 @@ language: python python: - - "3.4" - "3.5" - "3.6" + - "3.7" + - "3.8" install: - python setup.py install_dependencies script: nosetests -v diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/CHANGELOG.md new/urlwatch-2.18/CHANGELOG.md --- old/urlwatch-2.17/CHANGELOG.md 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/CHANGELOG.md 2020-05-03 11:31:08.000000000 +0200 @@ -4,6 +4,28 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). +## [2.18] -- 2020-05-03 + +### Added +- New filter: `re.sub` that can replace/remove strings using regular expressions +- Support `ignore_timeout_errors` and `ignore_too_many_redirects` for URL jobs (#423, by Josh aka Zevlag) +- HTML reporter: Add `viewport` meta tag for improved viewing on mobile devices (#432, by Mike Borsetti) +- Optional support for insecure SMTP password storage in the config; use with caution (#431) + +### Fixed +- Fix `--test-filter` when the specified job is not found +- Fix another `YAMLLoadWarning` in unit tests (#382, by Louis Sautier) +- Documentation updates and typo fixes (by Nate Eagleson) +- Pushover: Fix default device config (Fixes #409 and #372, documented by Richard Goodwin) + +### Changed +- Nicer formatting of `--features` for jobs with no docstring or many keys +- The XPath and CSS filters now support XML namespaces (#404, by Chenfeng Bao) +- Drop support for Python 3.3 and Python 3.4 (new minimum requirement is Python 3.5) +- Use `html.escape` instead of `cgi.escape` (which was removed in Python 3.8; #424, by Chenfeng Bao) +- Allow non-ASCII characters in `format-json` output filter (#433, by Mike Borsetti) + + ## [2.17] -- 2019-04-12 ### Added diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/COPYING new/urlwatch-2.18/COPYING --- old/urlwatch-2.17/COPYING 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/COPYING 2020-05-03 11:31:08.000000000 +0200 @@ -1,4 +1,4 @@ -Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +Copyright (c) 2008-2020 Thomas Perl <m@thp.io> All rights reserved. Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/Dockerfile new/urlwatch-2.18/Dockerfile --- old/urlwatch-2.17/Dockerfile 1970-01-01 01:00:00.000000000 +0100 +++ new/urlwatch-2.18/Dockerfile 2020-05-03 11:31:08.000000000 +0200 @@ -0,0 +1,16 @@ +FROM python:3.8.2 + +RUN python3 -m pip install pyyaml minidb requests keyring appdirs lxml cssselect beautifulsoup4 jsbeautifier cssbeautifier + +WORKDIR /opt/urlwatch + +COPY lib ./lib +COPY share ./share +COPY setup.py . +COPY setup.cfg . + +RUN python setup.py install + +WORKDIR /root/.urlwatch + +ENTRYPOINT ["urlwatch"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/README.md new/urlwatch-2.18/README.md --- old/urlwatch-2.17/README.md 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/README.md 2020-05-03 11:31:08.000000000 +0200 @@ -23,7 +23,7 @@ urlwatch 2 requires: - * Python 3.3 or newer + * Python 3.5 or newer * [PyYAML](http://pyyaml.org/) * [minidb](https://thp.io/2010/minidb/) * [requests](http://python-requests.org/) @@ -42,9 +42,11 @@ * Pushover reporter: [chump](https://github.com/karanlyons/chump/) * Pushbullet reporter: [pushbullet.py](https://github.com/randomchars/pushbullet.py) + * Matrix reporter: [matrix_client](https://github.com/matrix-org/matrix-python-sdk), [markdown2](https://github.com/trentm/python-markdown2) * Stdout reporter with color on Windows: [colorama](https://github.com/tartley/colorama) * "browser" job kind: [requests-html](https://html.python-requests.org) * Unit testing: [pycodestyle](http://pycodestyle.pycqa.org/en/latest/) + * Beautify filter : [beautifulsoup4](https://pypi.org/project/beautifulsoup4/) [jsbeautifier](https://pypi.org/project/jsbeautifier/) [cssbeautifier](https://pypi.org/project/cssbeautifier/) QUICK START @@ -111,7 +113,7 @@ filter: html2text,grep:a\054b:,strip ``` -If you want to extract only the body tag you can use this filer: +If you want to extract only the body tag you can use this filter: ```yaml url: https://thp.io/2008/urlwatch/ filter: element-by-tag:body @@ -214,6 +216,13 @@ The above config file sets all jobs to use wdiff as diff tool, and all "url" jobs to ignore connection errors. +Sometimes a web page can have the same data between comparisons but it appears in random order. +If that happens, you can choose to sort before the comparison. +```yaml +url: https://example.net/ +filter: sort +``` + PUSHOVER -------- @@ -222,14 +231,20 @@ chump python package installed (see DEPENDENCIES). Then edit your config (`urlwatch --edit-config`) and enable pushover. You will also need to add to the config your Pushover user key and a unique app key (generated by -registering urlwatch as an application on your Pushover account(https://pushover.net/apps/build) +registering urlwatch as an application on your Pushover account(https://pushover.net/apps/build). + +You can send to a specific device by using the device name, as indicated when +you add or view your list of devices in the Pushover console. For example +`device: 'MyPhone'`, or `device: 'MyLaptop'`. To send to *all* of your +devices, set `device: null` in your config (`urlwatch --edit-config`) or leave +out the device configuration completely. PUSHBULLET -------- -Pushbullet notification are configured similarly to Pushover (see above). -You'll need to add to the config your Pushbullet Access Token, which you +Pushbullet notifications are configured similarly to Pushover (see above). +You'll need to add to the config your Pushbullet Access Token, which you can generate at https://www.pushbullet.com/#settings TELEGRAM @@ -270,7 +285,7 @@ SLACK ----- -Slack nofifications are configured using "Slack Incoming Webhooks". Here is a +Slack notifications are configured using "Slack Incoming Webhooks". Here is a sample configuration: ```yaml @@ -285,6 +300,45 @@ You can use the command `urlwatch --test-slack` to test if the Slack integration works. +MATRIX +------ + +You can have notifications sent to you through the Matrix protocol. + +To achieve this, you first need to register a Matrix account for the bot on any homeserver. + +You then need to acquire an access token and room ID, using the following instructions adapted from [this guide](https://t2bot.io/docs/access_tokens/): + +1. Open [Riot.im](https://riot.im/app/) in a private browsing window +2. Register/Log in as your bot, using its user ID and password. +3. Set the display name and avatar, if desired. +4. In the settings page, scroll down to the bottom and click Access Token: \<click to reveal\>. +5. Copy the highlighted text to your configuration. +6. Join the room that you wish to send notifications to. +7. Go to the Room Settings (gear icon) and copy the *Internal Room ID* from the bottom. +8. Close the private browsing window **but do not log out, as this invalidates the Access Token**. + +Here is a sample configuration: + +```yaml +matrix: + homeserver: https://matrix.org + access_token: "YOUR_TOKEN_HERE" + room_id: "!roomroomroom:matrix.org" + enabled: true +``` + +You will probably want to use the following configuration for the `markdown` reporter, if you intend to post change +notifications to a public Matrix room, as the messages quickly become noisy: + +```yaml +markdown: + details: false + footer: false + minimal: true + enabled: true +``` + BROWSER ------- @@ -322,7 +376,7 @@ - `report/email/from`: `your.username@gmail.com` (edit accordingly) - `report/email/method`: `smtp` - `report/email/smtp/host`: `smtp.gmail.com` -- `report/email/smtp/keyring`: `true` +- `report/email/smtp/auth`: `true` - `report/email/smtp/port`: `587` - `report/email/smtp/starttls`: `true` - `report/email/to`: The e-mail address you want to send reports to @@ -344,7 +398,7 @@ - `report/email/method`: `smtp` - `report/email/smtp/host`: `email-smtp.us-west-2.amazonaws.com` (edit accordingly) - `report/email/smtp/user`: `ABCDEFGHIJ1234567890` (edit accordingly) -- `report/email/smtp/keyring`: `true` +- `report/email/smtp/auth`: `true` - `report/email/smtp/port`: `587` (25 or 465 also work) - `report/email/smtp/starttls`: `true` - `report/email/to`: The e-mail address you want to send reports to @@ -353,6 +407,30 @@ the password, run: `urlwatch --smtp-login` and enter your password. +SMTP LOGIN WITHOUT KEYRING +-------------------------- + +If for whatever reason you cannot use a keyring to store your password +(for example, when using it from a `cron` job) +you can also set the `insecure_password` option in the SMTP config: + +- `report/email/smtp/auth`: `true` +- `report/email/smtp/insecure_password`: `secret123` + +The `insecure_password` key will be preferred over the data stored in +the keyring. Please note that as the name says, storing the password +as plaintext in the configuration is insecure and bad practice, but +for an e-mail account that's only dedicated for sending mails this +might be a way. **Never ever use this with your your primary +e-mail account!** Seriously! Create a throw-away GMail (or other) +account just for sending out those e-mails or use local `sendmail` with +a mail server configured instead of relying on SMTP and password auth. + +Note that this makes it really easy for your password to be picked up +by software running on your machine, by other users logged into the system +and/or for the password to appear in log files accidentally. + + TESTING FILTERS --------------- @@ -421,6 +499,30 @@ - html2text: re ``` +To match an element in an [XML namespace](https://www.w3.org/TR/xml-names/), +use a namespace prefix before the tag name. Use a `:` to seperate the namespace +prefix and the tag name in an XPath expression, and use a `|` in a CSS selector. +```yaml +url: 'https://www.wired.com/feed/rss' +filter: + - xpath: + path: '//item/media:keywords' + method: xml + namespaces: + media: http://search.yahoo.com/mrss/ +``` +```yaml +url: 'https://www.wired.com/feed/rss' +filter: + - css: + selector: 'item > media|keywords' + method: xml + namespaces: + media: http://search.yahoo.com/mrss/ +``` +Alternatively, use the XPath expression `//*[name()='<tag_name>']` to bypass +the namespace entirely. + Another useful option with XPath and CSS filters is `exclude`. Elements selected by this `exclude` expression are removed from the final result. For example, the following job will not have any `<a>` tag in its results: @@ -450,6 +552,36 @@ closest match. +REMOVE OR REPLACE TEXT USING REGULAR EXPRESSIONS +------------------------------------------------ + +Just like Python's `re.sub` function, there's the possibility to apply a regular +expression and either remove of replace the matched text. The following example +applies the filter 3 times: + + 1. Just specifying a string as the value will replace the matches with the empty string. + 2. Simple patterns can be replaced with another string using "pattern" as the expression and "repl" as the replacement. + 3. You can use groups (`()`) and back-reference them with `\1` (etc..) to put groups into the replacement string. + +All features are described in Python's [re.sub](https://docs.python.org/3/library/re.html#re.sub) +documentation (the `pattern` and `repl` values are passed to this function as-is, with the value +of `repl` defaulting to the empty string). + + +```yaml +kind: url +url: https://example.com/ +filter: + - re.sub: '\s*href="[^"]*"' + - re.sub: + pattern: '<h1>' + repl: 'HEADING 1: ' + - re.sub: + pattern: '</([^>]*)>' + repl: '<END OF TAG \1>' +``` + + MIGRATION FROM URLWATCH 1.x --------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/__init__.py new/urlwatch-2.18/lib/urlwatch/__init__.py --- old/urlwatch-2.17/lib/urlwatch/__init__.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/__init__.py 2020-05-03 11:31:08.000000000 +0200 @@ -8,9 +8,9 @@ pkgname = 'urlwatch' -__copyright__ = 'Copyright 2008-2019 Thomas Perl' +__copyright__ = 'Copyright 2008-2020 Thomas Perl' __author__ = 'Thomas Perl <m@thp.io>' __license__ = 'BSD' __url__ = 'https://thp.io/2008/urlwatch/' -__version__ = '2.17' +__version__ = '2.18' __user_agent__ = '%s/%s (+https://thp.io/2008/urlwatch/info.html)' % (pkgname, __version__) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/cli.py new/urlwatch-2.18/lib/urlwatch/cli.py --- old/urlwatch-2.17/lib/urlwatch/cli.py 1970-01-01 01:00:00.000000000 +0100 +++ new/urlwatch-2.18/lib/urlwatch/cli.py 2020-05-03 11:31:08.000000000 +0200 @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# This file is part of urlwatch (https://thp.io/2008/urlwatch/). +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# File and folder paths +import logging +import os.path +import signal +import socket +import sys + +from appdirs import AppDirs + +pkgname = 'urlwatch' +urlwatch_dir = os.path.expanduser(os.path.join('~', '.' + pkgname)) +urlwatch_cache_dir = AppDirs(pkgname).user_cache_dir + +if not os.path.exists(urlwatch_dir): + urlwatch_dir = AppDirs(pkgname).user_config_dir + +# Check if we are installed in the system already +(prefix, bindir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0]))) + +if bindir != 'bin': + sys.path.insert(0, os.path.join(prefix, bindir, 'lib')) + +from urlwatch.command import UrlwatchCommand +from urlwatch.config import CommandConfig +from urlwatch.main import Urlwatch +from urlwatch.storage import YamlConfigStorage, CacheMiniDBStorage, UrlsYaml + +# One minute (=60 seconds) timeout for each request to avoid hanging +socket.setdefaulttimeout(60) + +# Ignore SIGPIPE for stdout (see https://github.com/thp/urlwatch/issues/77) +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + # Windows does not have signal.SIGPIPE + ... + +logger = logging.getLogger(pkgname) + +CONFIG_FILE = 'urlwatch.yaml' +URLS_FILE = 'urls.yaml' +CACHE_FILE = 'cache.db' +HOOKS_FILE = 'hooks.py' + + +def setup_logger(verbose): + if verbose: + root_logger = logging.getLogger('') + console = logging.StreamHandler() + console.setFormatter(logging.Formatter('%(asctime)s %(module)s %(levelname)s: %(message)s')) + root_logger.addHandler(console) + root_logger.setLevel(logging.DEBUG) + root_logger.info('turning on verbose logging mode') + + +def main(): + config_file = os.path.join(urlwatch_dir, CONFIG_FILE) + urls_file = os.path.join(urlwatch_dir, URLS_FILE) + hooks_file = os.path.join(urlwatch_dir, HOOKS_FILE) + new_cache_file = os.path.join(urlwatch_cache_dir, CACHE_FILE) + old_cache_file = os.path.join(urlwatch_dir, CACHE_FILE) + cache_file = new_cache_file + if os.path.exists(old_cache_file) and not os.path.exists(new_cache_file): + cache_file = old_cache_file + + command_config = CommandConfig(pkgname, urlwatch_dir, bindir, prefix, + config_file, urls_file, hooks_file, cache_file, False) + setup_logger(command_config.verbose) + + # setup storage API + config_storage = YamlConfigStorage(command_config.config) + cache_storage = CacheMiniDBStorage(command_config.cache) + urls_storage = UrlsYaml(command_config.urls) + + # setup urlwatcher + urlwatch = Urlwatch(command_config, config_storage, cache_storage, urls_storage) + urlwatch_command = UrlwatchCommand(urlwatch) + + # run urlwatcher + urlwatch_command.run() + + +if __name__ == '__main__': + main() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/command.py new/urlwatch-2.18/lib/urlwatch/command.py --- old/urlwatch-2.17/lib/urlwatch/command.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/command.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -99,7 +99,7 @@ pretty_name = job.pretty_name() location = job.get_location() if pretty_name != location: - print('%d: %s (%s)' % (idx + 1, pretty_name, location)) + print('%d: %s ( %s )' % (idx + 1, pretty_name, location)) else: print('%d: %s' % (idx + 1, pretty_name)) return 0 @@ -118,10 +118,10 @@ def test_filter(self): job = self._find_job(self.urlwatch_config.test_filter) - job = job.with_defaults(self.urlwatcher.config_storage.config) if job is None: print('Not found: %r' % (self.urlwatch_config.test_filter,)) return 1 + job = job.with_defaults(self.urlwatcher.config_storage.config) if isinstance(job, UrlJob): # Force re-retrieval of job, as we're testing filters @@ -256,8 +256,8 @@ print('Please set the method to SMTP for the e-mail reporter.') success = False - if not smtp_config['keyring']: - print('Keyring authentication must be enabled for SMTP.') + if not smtp_config.get('auth', smtp_config.get('keyring', False)): + print('Authentication must be enabled for SMTP.') success = False smtp_hostname = smtp_config['host'] @@ -273,6 +273,10 @@ if not success: sys.exit(1) + if 'insecure_password' in smtp_config: + print('The password is already set in the config (key "insecure_password").') + sys.exit(0) + if have_password(smtp_hostname, smtp_username): message = 'Password for %s / %s already set, update? [y/N] ' % (smtp_username, smtp_hostname) if input(message).lower() != 'y': diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/config.py new/urlwatch-2.18/lib/urlwatch/config.py --- old/urlwatch-2.17/lib/urlwatch/config.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/config.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/filters.py new/urlwatch-2.18/lib/urlwatch/filters.py --- old/urlwatch-2.17/lib/urlwatch/filters.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/filters.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ from enum import Enum from lxml import etree +from lxml.cssselect import CSSSelector from .util import TrackSubClasses @@ -154,6 +155,29 @@ return data +class BeautifyFilter(FilterBase): + """Beautify HTML""" + + __kind__ = 'beautify' + + def filter(self, data, subfilter=None): + import jsbeautifier + import cssbeautifier + from bs4 import BeautifulSoup as bs + soup = bs(data, features="lxml") + scripts = soup.find_all('script') + for script in scripts: + if script.string is not None: + beautified_js = jsbeautifier.beautify(script.string) + script.string = beautified_js + styles = soup.find_all('style') + for style in styles: + if style.string is not None: + beautified_css = cssbeautifier.beautify(style.string) + style.string = beautified_css + return soup.prettify() + + class Html2TextFilter(FilterBase): """Convert HTML to plaintext""" @@ -195,7 +219,7 @@ if subfilter is not None: indentation = int(subfilter) parsed_json = json.loads(data) - return json.dumps(parsed_json, sort_keys=True, indent=indentation, separators=(',', ': ')) + return json.dumps(parsed_json, ensure_ascii=False, sort_keys=True, indent=indentation, separators=(',', ': ')) class GrepFilter(FilterBase): @@ -372,31 +396,28 @@ def __init__(self, filter_kind, subfilter, expr_key): self.filter_kind = filter_kind - self.expression, self.method, self.exclude = self.parse_subfilter( - filter_kind, subfilter, expr_key, self.EXPR_NAMES[filter_kind]) - self.parser = (etree.HTMLParser if self.method == 'html' else etree.XMLParser)() - self.data = '' - - @staticmethod - def parse_subfilter(filter_kind, subfilter, expr_key, expr_name): if subfilter is None: - raise ValueError('Need %s for filtering' % (expr_name,)) + raise ValueError('Need %s for filtering' % (self.EXPR_NAMES[filter_kind],)) if isinstance(subfilter, str): - expression = subfilter - method = 'html' - exclude = None + self.expression = subfilter + self.method = 'html' + self.exclude = None + self.namespaces = None elif isinstance(subfilter, dict): if expr_key not in subfilter: - raise ValueError('Need %s for filtering' % (expr_name,)) - expression = subfilter[expr_key] - method = subfilter.get('method', 'html') - exclude = subfilter.get('exclude') - if method not in ('html', 'xml'): - raise ValueError('%s method must be "html" or "xml", got %r' % (filter_kind, method)) + raise ValueError('Need %s for filtering' % (self.EXPR_NAMES[filter_kind],)) + self.expression = subfilter[expr_key] + self.method = subfilter.get('method', 'html') + self.exclude = subfilter.get('exclude') + self.namespaces = subfilter.get('namespaces') + if self.method not in ('html', 'xml'): + raise ValueError('%s method must be "html" or "xml", got %r' % (filter_kind, self.method)) + if self.method == 'html' and self.namespaces is not None: + raise ValueError('Namespace prefixes only supported with "xml" method.') else: raise ValueError('%s subfilter must be a string or dict' % (filter_kind,)) - - return expression, method, exclude + self.parser = (etree.HTMLParser if self.method == 'html' else etree.XMLParser)() + self.data = '' def feed(self, data): self.data += data @@ -430,9 +451,8 @@ parent.text = parent.text + element.tail if parent.text else element.tail parent.remove(element) - @classmethod - def _reevaluate(cls, element): - if cls._orphaned(element): + def _reevaluate(self, element): + if self._orphaned(element): return None if isinstance(element, etree._ElementUnicodeResult): parent = element.getparent() @@ -447,8 +467,7 @@ else: return element - @staticmethod - def _orphaned(element): + def _orphaned(self, element): if isinstance(element, etree._ElementUnicodeResult): parent = element.getparent() if ((element.is_tail and parent.tail is None) @@ -460,7 +479,7 @@ try: tree = element.getroottree() path = tree.getpath(element) - return element is not tree.xpath(path)[0] + return element is not tree.xpath(path, namespaces=self.namespaces)[0] except (ValueError, IndexError): return True @@ -478,11 +497,13 @@ return [] excluded_elems = None if self.filter_kind == 'css': - selected_elems = root.cssselect(self.expression) - excluded_elems = root.cssselect(self.exclude) if self.exclude else None + selected_elems = CSSSelector(self.expression, + namespaces=self.namespaces).evaluate(root) + excluded_elems = CSSSelector(self.exclude, + namespaces=self.namespaces).evaluate(root) if self.exclude else None elif self.filter_kind == 'xpath': - selected_elems = root.xpath(self.expression) - excluded_elems = root.xpath(self.exclude) if self.exclude else None + selected_elems = root.xpath(self.expression, namespaces=self.namespaces) + excluded_elems = root.xpath(self.exclude, namespaces=self.namespaces) if self.exclude else None if excluded_elems is not None: for el in excluded_elems: self._remove_element(el) @@ -512,3 +533,32 @@ lxml_parser = LxmlParser('xpath', subfilter, 'path') lxml_parser.feed(data) return lxml_parser.get_filtered_data() + + +class RegexSub(FilterBase): + """Replace text with regular expressions using Python's re.sub""" + + __kind__ = 're.sub' + + def filter(self, data, subfilter=None): + if subfilter is None: + raise ValueError('{} needs a subfilter'.format(self.__kind__)) + + # Allow for just specifying a regular expression (that will be removed) + if isinstance(subfilter, str): + subfilter = {'pattern': subfilter} + + # Default: Replace with empty string if no "repl" value is set + return re.sub(subfilter.get('pattern'), subfilter.get('repl', ''), data) + + +class SortFilter(FilterBase): + """Sort the results before comparison""" + + __kind__ = 'sort' + + def filter(self, data, subfilter=None): + data_list = data.splitlines() + data_list = sorted(data_list, key=str.casefold) + + return '\n'.join(data_list) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/handler.py new/urlwatch-2.18/lib/urlwatch/handler.py --- old/urlwatch-2.17/lib/urlwatch/handler.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/handler.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/html2txt.py new/urlwatch-2.18/lib/urlwatch/html2txt.py --- old/urlwatch-2.17/lib/urlwatch/html2txt.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/html2txt.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/ical2txt.py new/urlwatch-2.18/lib/urlwatch/ical2txt.py --- old/urlwatch-2.17/lib/urlwatch/ical2txt.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/ical2txt.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/jobs.py new/urlwatch-2.18/lib/urlwatch/jobs.py --- old/urlwatch-2.17/lib/urlwatch/jobs.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/jobs.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ import re import subprocess import requests +import textwrap import urlwatch from requests.packages.urllib3.exceptions import InsecureRequestWarning @@ -85,12 +86,16 @@ def job_documentation(cls): result = [] for sc in TrackSubClasses.sorted_by_kind(cls): - result.extend(( - ' * %s - %s' % (sc.__kind__, sc.__doc__), - ' Required keys: %s' % (', '.join(sc.__required__),), - ' Optional keys: %s' % (', '.join(sc.__optional__),), - '', - )) + if sc.__doc__: + result.append(' * %s - %s' % (sc.__kind__, sc.__doc__)) + else: + result.append(' * %s' % (sc.__kind__,)) + + for msg, value in ((' Required keys: ', sc.__required__), (' Optional keys: ', sc.__optional__)): + if value: + values = ('\n' + (len(msg) * ' ')).join(textwrap.wrap(', '.join(value), 79 - len(msg))) + result.append('%s%s' % (msg, values)) + result.append('') return '\n'.join(result) def get_location(self): @@ -203,7 +208,8 @@ __required__ = ('url',) __optional__ = ('cookies', 'data', 'method', 'ssl_no_verify', 'ignore_cached', 'http_proxy', 'https_proxy', - 'headers', 'ignore_connection_errors', 'ignore_http_error_codes', 'encoding', 'timeout') + 'headers', 'ignore_connection_errors', 'ignore_http_error_codes', 'encoding', 'timeout', + 'ignore_timeout_errors', 'ignore_too_many_redirects') LOCATION_IS_URL = True CHARSET_RE = re.compile('text/(html|plain); charset=([^;]*)') @@ -319,6 +325,10 @@ def ignore_error(self, exception): if isinstance(exception, requests.exceptions.ConnectionError) and self.ignore_connection_errors: return True + if isinstance(exception, requests.exceptions.Timeout) and self.ignore_timeout_errors: + return True + if isinstance(exception, requests.exceptions.TooManyRedirects) and self.ignore_too_many_redirects: + return True elif isinstance(exception, requests.exceptions.HTTPError): status_code = exception.response.status_code ignored_codes = [] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/mailer.py new/urlwatch-2.18/lib/urlwatch/mailer.py --- old/urlwatch-2.17/lib/urlwatch/mailer.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/mailer.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -72,12 +72,13 @@ class SMTPMailer(Mailer): - def __init__(self, smtp_user, smtp_server, smtp_port, tls, auth): + def __init__(self, smtp_user, smtp_server, smtp_port, tls, auth, insecure_password=None): self.smtp_server = smtp_server self.smtp_user = smtp_user self.smtp_port = smtp_port self.tls = tls self.auth = auth + self.insecure_password = insecure_password def send(self, msg): s = smtplib.SMTP(self.smtp_server, self.smtp_port) @@ -86,13 +87,16 @@ if self.tls: s.starttls() - if self.auth and keyring is not None: - passwd = keyring.get_password(self.smtp_server, self.smtp_user) - if passwd is None: - raise ValueError('No password available in keyring for {}, {}'.format(self.smtp_server, self.smtp_user)) + if self.auth: + if self.insecure_password: + passwd = self.insecure_password + elif keyring is not None: + passwd = keyring.get_password(self.smtp_server, self.smtp_user) + if passwd is None: + raise ValueError('No password available in keyring for {}, {}'.format(self.smtp_server, self.smtp_user)) s.login(self.smtp_user, passwd) - s.sendmail(msg['From'], [msg['To']], msg.as_string()) + s.sendmail(msg['From'], msg['To'].split(','), msg.as_string()) s.quit() @@ -101,7 +105,7 @@ self.sendmail_path = sendmail_path def send(self, msg): - p = subprocess.Popen([self.sendmail_path, '-t', '-oi'], + p = subprocess.Popen([self.sendmail_path, '-oi', msg['To']], stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/main.py new/urlwatch-2.18/lib/urlwatch/main.py --- old/urlwatch-2.17/lib/urlwatch/main.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/main.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/migration.py new/urlwatch-2.18/lib/urlwatch/migration.py --- old/urlwatch-2.17/lib/urlwatch/migration.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/migration.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/reporters.py new/urlwatch-2.18/lib/urlwatch/reporters.py --- old/urlwatch-2.17/lib/urlwatch/reporters.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/reporters.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,6 +1,6 @@ # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -38,7 +38,7 @@ import os import sys import time -import cgi +import html import functools import requests @@ -58,8 +58,18 @@ except ImportError: Pushbullet = None -logger = logging.getLogger(__name__) +try: + import matrix_client.api +except ImportError: + matrix_client = None +try: + # markdown2 is an optional dependency which provides better formatting for Matrix. + from markdown2 import Markdown +except ImportError: + Markdown = None + +logger = logging.getLogger(__name__) # Regular expressions that match the added/removed markers of GNU wdiff output WDIFF_ADDED_RE = r'[{][+].*?[+][}]' @@ -140,8 +150,8 @@ return self.s def format(self, *args, **kwargs): - return str(self).format(*(cgi.escape(str(arg)) for arg in args), - **{k: cgi.escape(str(v)) for k, v in kwargs.items()}) + return str(self).format(*(html.escape(str(arg)) for arg in args), + **{k: html.escape(str(v)) for k, v in kwargs.items()}) class HtmlReporter(ReporterBase): @@ -155,6 +165,7 @@ <html><head> <title>urlwatch</title> <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> <style type="text/css"> body { font-family: sans-serif; } .diff_add { color: green; background-color: lightgreen; } @@ -248,7 +259,7 @@ pretty_name = job_state.job.pretty_name() location = job_state.job.get_location() if pretty_name != location: - location = '%s (%s)' % (pretty_name, location) + location = '%s ( %s )' % (pretty_name, location) yield ': '.join((job_state.verb.upper(), location)) return @@ -295,7 +306,7 @@ pretty_name = job_state.job.pretty_name() location = job_state.job.get_location() if pretty_name != location: - location = '%s (%s)' % (pretty_name, location) + location = '%s ( %s )' % (pretty_name, location) pretty_summary = ': '.join((job_state.verb.upper(), pretty_name)) summary = ': '.join((job_state.verb.upper(), location)) @@ -397,8 +408,13 @@ return if self.config['method'] == "smtp": smtp_user = self.config['smtp'].get('user', None) or self.config['from'] + # Legacy support: The current smtp "auth" setting was previously called "keyring" + if 'keyring' in self.config['smtp']: + logger.info('The SMTP config key "keyring" is now called "auth".') + use_auth = self.config['smtp'].get('auth', self.config['smtp'].get('keyring', False)) mailer = SMTPMailer(smtp_user, self.config['smtp']['host'], self.config['smtp']['port'], - self.config['smtp']['starttls'], self.config['smtp']['keyring']) + self.config['smtp']['starttls'], use_auth, + self.config['smtp'].get('insecure_password')) elif self.config['method'] == "sendmail": mailer = SendmailMailer(self.config['sendmail']['path']) else: @@ -454,7 +470,9 @@ def web_service_submit(self, service, title, body): sound = self.config['sound'] - device = self.config['device'] + # If device is the empty string or not specified at all, use None to send to all devices + # (see https://github.com/thp/urlwatch/issues/372) + device = self.config.get('device', None) or None msg = service.create_message(title=title, message=body, html=True, sound=sound, device=device) msg.send() @@ -580,7 +598,7 @@ class SlackReporter(TextReporter): """Custom Slack reporter""" - MAX_LENGTH = 4096 + MAX_LENGTH = 40000 __kind__ = 'slack' @@ -617,3 +635,113 @@ def chunkstring(self, string, length): return (string[0 + i:length + i] for i in range(0, len(string), length)) + + +class MarkdownReporter(ReporterBase): + def submit(self): + cfg = self.report.config['report']['markdown'] + show_details = cfg['details'] + show_footer = cfg['footer'] + + if cfg['minimal']: + for job_state in self.report.get_filtered_job_states(self.job_states): + pretty_name = job_state.job.pretty_name() + location = job_state.job.get_location() + if pretty_name != location: + location = '%s (%s)' % (pretty_name, location) + yield '* ' + ': '.join((job_state.verb.upper(), location)) + return + + summary = [] + details = [] + for job_state in self.report.get_filtered_job_states(self.job_states): + summary_part, details_part = self._format_output(job_state) + summary.extend(summary_part) + details.extend(details_part) + + if summary: + yield from ('%d. %s' % (idx + 1, line) for idx, line in enumerate(summary)) + yield '' + + if show_details: + yield from details + + if summary and show_footer: + yield from ('--- ', + '%s %s, %s ' % (urlwatch.pkgname, urlwatch.__version__, urlwatch.__copyright__), + 'Website: %s ' % (urlwatch.__url__,), + 'watched %d URLs in %d seconds' % (len(self.job_states), self.duration.seconds)) + + def _format_content(self, job_state): + if job_state.verb == 'error': + return job_state.traceback.strip() + + if job_state.verb == 'unchanged': + return job_state.old_data + + if job_state.old_data in (None, job_state.new_data): + return None + + return self.unified_diff(job_state) + + def _format_output(self, job_state): + summary_part = [] + details_part = [] + + pretty_name = job_state.job.pretty_name() + location = job_state.job.get_location() + if pretty_name != location: + location = '%s (%s)' % (pretty_name, location) + + pretty_summary = ': '.join((job_state.verb.upper(), pretty_name)) + summary = ': '.join((job_state.verb.upper(), location)) + content = self._format_content(job_state) + + summary_part.append(pretty_summary) + + details_part.append('### ' + summary) + if content is not None: + details_part.extend(('', '```', content, '```', '')) + details_part.extend(('', '')) + + return summary_part, details_part + + +class MatrixReporter(MarkdownReporter): + """Custom Matrix reporter""" + MAX_LENGTH = 4096 + + __kind__ = 'matrix' + + def submit(self): + homeserver_url = self.config['homeserver'] + access_token = self.config['access_token'] + room_id = self.config['room_id'] + + body_markdown = '\n'.join(super().submit()) + + if not body_markdown: + logger.debug('Not calling Matrix API (no changes)') + return + + if len(body_markdown) > self.MAX_LENGTH: + body_markdown = body_markdown[:self.MAX_LENGTH] + + client_api = matrix_client.api.MatrixHttpApi(homeserver_url, access_token) + + if Markdown is not None: + body_html = Markdown().convert(body_markdown) + + client_api.send_message_event( + room_id, + "m.room.message", + content={ + "msgtype": "m.text", + "format": "org.matrix.custom.html", + "body": body_markdown, + "formatted_body": body_html + } + ) + else: + logger.debug('Not formatting as Markdown; dependency on markdown2 not met?') + client_api.send_message(room_id, body_markdown) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/storage.py new/urlwatch-2.18/lib/urlwatch/storage.py --- old/urlwatch-2.17/lib/urlwatch/storage.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/storage.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -59,6 +59,12 @@ 'minimal': False, }, + 'markdown': { + 'details': True, + 'footer': True, + 'minimal': False, + }, + 'html': { 'diff': 'unified', # "unified" or "table" }, @@ -81,7 +87,7 @@ 'user': '', 'port': 25, 'starttls': True, - 'keyring': True, + 'auth': True, }, 'sendmail': { 'path': 'sendmail', @@ -90,7 +96,7 @@ 'pushover': { 'enabled': False, 'app': '', - 'device': '', + 'device': None, 'sound': 'spacealarm', 'user': '', }, @@ -107,6 +113,12 @@ 'enabled': False, 'webhook_url': '', }, + 'matrix': { + 'enabled': False, + 'homeserver': '', + 'access_token': '', + 'room_id': '', + }, 'mailgun': { 'enabled': False, 'region': 'us', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/util.py new/urlwatch-2.18/lib/urlwatch/util.py --- old/urlwatch-2.17/lib/urlwatch/util.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/util.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/lib/urlwatch/worker.py new/urlwatch-2.18/lib/urlwatch/worker.py --- old/urlwatch-2.17/lib/urlwatch/worker.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/lib/urlwatch/worker.py 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/requirements.txt new/urlwatch-2.18/requirements.txt --- old/urlwatch-2.17/requirements.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/urlwatch-2.18/requirements.txt 2020-05-03 11:31:08.000000000 +0200 @@ -0,0 +1,7 @@ +pyyaml +minidb +requests +keyring +appdirs +lxml +cssselect diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/setup.py new/urlwatch-2.18/setup.py --- old/urlwatch-2.17/setup.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/setup.py 2020-05-03 11:31:08.000000000 +0200 @@ -22,7 +22,7 @@ m['install_requires'].extend(['enum34']) if sys.platform == 'win32': m['install_requires'].extend(['colorama']) -m['scripts'] = ['urlwatch'] +m['entry_points'] = {"console_scripts": ["urlwatch=urlwatch.cli:main"]} m['package_dir'] = {'': 'lib'} m['packages'] = ['urlwatch'] m['python_requires'] = '>3.3.0' @@ -53,7 +53,10 @@ from pip._internal import main except ImportError: from pip import main - main(['install', '--upgrade'] + m['install_requires']) + try: + main(['install', '--upgrade'] + m['install_requires']) + except TypeError: # recent pip + main.main(['install', '--upgrade'] + m['install_requires']) m['cmdclass'] = {'install_dependencies': InstallDependencies} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/share/urlwatch/examples/hooks.py.example new/urlwatch-2.18/share/urlwatch/examples/hooks.py.example --- old/urlwatch-2.17/share/urlwatch/examples/hooks.py.example 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/share/urlwatch/examples/hooks.py.example 2020-05-03 11:31:08.000000000 +0200 @@ -1,7 +1,7 @@ # # Example hooks file for urlwatch # -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> +# Copyright (c) 2008-2020 Thomas Perl <m@thp.io> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/test/data/filter_tests.yaml new/urlwatch-2.18/test/data/filter_tests.yaml --- old/urlwatch-2.17/test/data/filter_tests.yaml 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/test/data/filter_tests.yaml 2020-05-03 11:31:08.000000000 +0200 @@ -78,6 +78,39 @@ <div>foo</div> <div id="bar">bar</div> +xpath_xml_namespaces: + filter: + xpath: + path: //item | //f:item + method: xml + exclude: //f:year | //author + namespaces: + f: foo + data: | + <feed xmlns:f="foo"> + <item> + <f:year>2017</f:year> + <author>Tom</author> + <data>abc</data> + </item> + <f:item> + <year>2018</year> + <f:author>Jerry</f:author> + <data>xyz</data> + </f:item> + </feed> + expected_result: | + <item xmlns:f="foo"> + + + <data>abc</data> + </item> + + <f:item xmlns:f="foo"> + <year>2018</year> + <f:author>Jerry</f:author> + <data>xyz</data> + </f:item> css: filter: css:div data: | @@ -102,6 +135,39 @@ </body></html> expected_result: | <div class="foo">foo</div> +css_xml_namespaces: + filter: + css: + selector: item, f|item + method: xml + exclude: f|year, author + namespaces: + f: foo + data: | + <feed xmlns:f="foo"> + <item> + <f:year>2017</f:year> + <author>Tom</author> + <data>abc</data> + </item> + <f:item> + <year>2018</year> + <f:author>Jerry</f:author> + <data>xyz</data> + </f:item> + </feed> + expected_result: | + <item xmlns:f="foo"> + + + <data>abc</data> + </item> + + <f:item xmlns:f="foo"> + <year>2018</year> + <f:author>Jerry</f:author> + <data>xyz</data> + </f:item> grep: filter: grep:blue data: | @@ -154,3 +220,15 @@ expected_result: |- 48 65 6c 6c 6f 20 77 6f 72 6c 64 21 0a e4 bd a0 Hello world!.... e5 a5 bd ef bc 8c e4 b8 96 e7 95 8c ef bc 81 0a ................ +sort: + filter: sort + data: | + The rose is red; + the violet's blue. + Sugar is sweet, + and so are you. + expected_result: |- + and so are you. + Sugar is sweet, + The rose is red; + the violet's blue. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/test/data/urlwatch.yaml new/urlwatch-2.18/test/data/urlwatch.yaml --- old/urlwatch-2.17/test/data/urlwatch.yaml 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/test/data/urlwatch.yaml 2020-05-03 11:31:08.000000000 +0200 @@ -12,7 +12,7 @@ path: sendmail smtp: host: localhost - keyring: true + auth: true port: 25 starttls: true subject: '{count} changes: {jobs}' @@ -21,7 +21,7 @@ diff: unified pushover: app: '' - device: '' + device: null enabled: false sound: 'spacealarm' user: '' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/test/test_filters.py new/urlwatch-2.18/test/test_filters.py --- old/urlwatch-2.17/test/test_filters.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/test/test_filters.py 2020-05-03 11:31:08.000000000 +0200 @@ -33,6 +33,6 @@ eq_(result, expected_result) with open(os.path.join(os.path.dirname(__file__), 'data/filter_tests.yaml'), 'r', encoding='utf8') as fp: - filter_tests = yaml.load(fp) + filter_tests = yaml.load(fp, Loader=yaml.SafeLoader) for test_name in filter_tests: yield check_filter, test_name diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/test/test_handler.py new/urlwatch-2.18/test/test_handler.py --- old/urlwatch-2.17/test/test_handler.py 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/test/test_handler.py 2020-05-03 11:31:08.000000000 +0200 @@ -79,7 +79,6 @@ style = pycodestyle.StyleGuide(ignore=['E501', 'E402', 'W503']) py_files = [y for x in os.walk(os.path.abspath('.')) for y in glob(os.path.join(x[0], '*.py'))] - py_files.append(os.path.abspath('urlwatch')) result = style.check_files(py_files) assert result.total_errors == 0, "Found #{0} code style errors".format(result.total_errors) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.17/urlwatch new/urlwatch-2.18/urlwatch --- old/urlwatch-2.17/urlwatch 2019-04-12 17:29:43.000000000 +0200 +++ new/urlwatch-2.18/urlwatch 2020-05-03 11:31:08.000000000 +0200 @@ -1,111 +1,8 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2019 Thomas Perl <m@thp.io> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# Convenience script to run urlwatch from a Git checkout +# This is NOT the script that gets installed as part of "setup.py install" - -# File and folder paths -import logging -import os.path -import signal -import socket import sys - -from appdirs import AppDirs - -pkgname = 'urlwatch' -urlwatch_dir = os.path.expanduser(os.path.join('~', '.' + pkgname)) -urlwatch_cache_dir = AppDirs(pkgname).user_cache_dir - -if not os.path.exists(urlwatch_dir): - urlwatch_dir = AppDirs(pkgname).user_config_dir - -# Check if we are installed in the system already -(prefix, bindir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0]))) - -if bindir != 'bin': - sys.path.insert(0, os.path.join(prefix, bindir, 'lib')) - -from urlwatch.command import UrlwatchCommand -from urlwatch.config import CommandConfig -from urlwatch.main import Urlwatch -from urlwatch.storage import YamlConfigStorage, CacheMiniDBStorage, UrlsYaml - -# One minute (=60 seconds) timeout for each request to avoid hanging -socket.setdefaulttimeout(60) - -# Ignore SIGPIPE for stdout (see https://github.com/thp/urlwatch/issues/77) -try: - signal.signal(signal.SIGPIPE, signal.SIG_DFL) -except AttributeError: - # Windows does not have signal.SIGPIPE - ... - -logger = logging.getLogger(pkgname) - -CONFIG_FILE = 'urlwatch.yaml' -URLS_FILE = 'urls.yaml' -CACHE_FILE = 'cache.db' -HOOKS_FILE = 'hooks.py' - - -def setup_logger(verbose): - if verbose: - root_logger = logging.getLogger('') - console = logging.StreamHandler() - console.setFormatter(logging.Formatter('%(asctime)s %(module)s %(levelname)s: %(message)s')) - root_logger.addHandler(console) - root_logger.setLevel(logging.DEBUG) - root_logger.info('turning on verbose logging mode') - - -if __name__ == '__main__': - config_file = os.path.join(urlwatch_dir, CONFIG_FILE) - urls_file = os.path.join(urlwatch_dir, URLS_FILE) - hooks_file = os.path.join(urlwatch_dir, HOOKS_FILE) - new_cache_file = os.path.join(urlwatch_cache_dir, CACHE_FILE) - old_cache_file = os.path.join(urlwatch_dir, CACHE_FILE) - cache_file = new_cache_file - if os.path.exists(old_cache_file) and not os.path.exists(new_cache_file): - cache_file = old_cache_file - - command_config = CommandConfig(pkgname, urlwatch_dir, bindir, prefix, - config_file, urls_file, hooks_file, cache_file, False) - setup_logger(command_config.verbose) - - # setup storage API - config_storage = YamlConfigStorage(command_config.config) - cache_storage = CacheMiniDBStorage(command_config.cache) - urls_storage = UrlsYaml(command_config.urls) - - # setup urlwatcher - urlwatch = Urlwatch(command_config, config_storage, cache_storage, urls_storage) - urlwatch_command = UrlwatchCommand(urlwatch) - - # run urlwatcher - urlwatch_command.run() +sys.path.insert(0, 'lib') +from urlwatch.cli import main +main()
participants (1)
-
root