commit youtube-dl for openSUSE:Factory
Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2020-12-21 10:24:31 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.5145 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "youtube-dl" Mon Dec 21 10:24:31 2020 rev:152 rq:857759 version:2020.12.14 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2020-12-12 20:36:15.242095541 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.5145/python-youtube-dl.changes 2020-12-21 10:27:05.416229727 +0100 @@ -1,0 +2,10 @@ +Sun Dec 13 19:35:10 UTC 2020 - Jan Engelhardt <jengelh@inai.de> + +- Update to release 2020.12.14 + * youtube: Add some invidious instances + * itv: clean description from HTML tags + * linuxacademy] Fix authentication and extraction + * downloader/hls] delegate manifests with media initialization + to ffmpeg + +------------------------------------------------------------------- youtube-dl.changes: same change Old: ---- youtube-dl-2020.12.12.tar.gz youtube-dl-2020.12.12.tar.gz.sig New: ---- youtube-dl-2020.12.14.tar.gz youtube-dl-2020.12.14.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.ntxBsv/_old 2020-12-21 10:27:06.596231065 +0100 +++ /var/tmp/diff_new_pack.ntxBsv/_new 2020-12-21 10:27:06.600231069 +0100 @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2020.12.12 +Version: 2020.12.14 Release: 0 Summary: A Python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.ntxBsv/_old 2020-12-21 10:27:06.628231101 +0100 +++ /var/tmp/diff_new_pack.ntxBsv/_new 2020-12-21 10:27:06.628231101 +0100 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2020.12.12 +Version: 2020.12.14 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2020.12.12.tar.gz -> youtube-dl-2020.12.14.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2020-12-12 01:09:56.000000000 +0100 +++ new/youtube-dl/ChangeLog 2020-12-13 18:57:08.000000000 +0100 @@ -1,3 +1,30 @@ +version 2020.12.14 + +Core +* [extractor/common] Improve JSON-LD interaction statistic extraction (#23306) +* [downloader/hls] Delegate manifests with media initialization to ffmpeg ++ [extractor/common] Document duration meta field for playlists + +Extractors +* [mdr] Bypass geo restriction +* [mdr] Improve extraction (#24346, #26873) +* [yandexmusic:album] Improve album title extraction (#27418) +* [eporner] Fix view count extraction and make optional (#23306) ++ [eporner] Extend URL regular expression +* [eporner] Fix hash extraction and extend _VALID_URL (#27396) +* [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400) +* [twitcasting] Fix format extraction and improve info extraction (#24868) +* [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402) +* [itv] Clean description from HTML tags (#27399) +* [vlive] Sort live formats (#27404) +* [hotstart] Fix and improve extraction + * Fix format extraction (#26690) + + Extract thumbnail URL (#16079, #20412) + + Add support for country specific playlist URLs (#23496) + * Select the last id in video URL (#26412) ++ [youtube] Add some invidious instances (#27373) + + version 2020.12.12 Core @@ -106,7 +133,7 @@ Extractors + [tva] Add support for qub.ca (#27235) -+ [toggle] Detect DRM protected videos (closes #16479)(closes #20805) ++ [toggle] Detect DRM protected videos (#16479, #20805) + [toggle] Add support for new MeWatch URLs (#27256) * [youtube:tab] Extract channels only from channels tab (#27266) + [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/test/test_InfoExtractor.py new/youtube-dl/test/test_InfoExtractor.py --- old/youtube-dl/test/test_InfoExtractor.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/test/test_InfoExtractor.py 2020-12-13 18:56:48.000000000 +0100 @@ -98,6 +98,55 @@ self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + def test_search_json_ld_realworld(self): + # https://github.com/ytdl-org/youtube-dl/issues/23306 + expect_dict( + self, + self.ie._search_json_ld(r'''<script type="application/ld+json"> +{ +"@context": "http://schema.org/", +"@type": "VideoObject", +"name": "1 On 1 With Kleio", +"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/", +"duration": "PT0H12M23S", +"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"], +"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4", +"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/", +"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", +"width": "1920", +"height": "1080", +"encodingFormat": "mp4", +"bitrate": "6617kbps", +"isFamilyFriendly": "False", +"description": "Kleio Valentien", +"uploadDate": "2015-12-05T21:24:35+01:00", +"interactionStatistic": { +"@type": "InteractionCounter", +"interactionType": { "@type": "http://schema.org/WatchAction" }, +"userInteractionCount": 1120958 +}, "aggregateRating": { +"@type": "AggregateRating", +"ratingValue": "88", +"ratingCount": "630", +"bestRating": "100", +"worstRating": "0" +}, "actor": [{ +"@type": "Person", +"name": "Kleio Valentien", +"url": "https://www.eporner.com/pornstar/kleio-valentien/" +}]} +</script>''', None), + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }) + def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/downloader/hls.py new/youtube-dl/youtube_dl/downloader/hls.py --- old/youtube-dl/youtube_dl/downloader/hls.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/downloader/hls.py 2020-12-13 18:56:48.000000000 +0100 @@ -42,11 +42,13 @@ # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] + r'#EXT-X-MAP:', # media initialization [5] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.... # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.... # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.... # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.... + # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.... ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/common.py new/youtube-dl/youtube_dl/extractor/common.py --- old/youtube-dl/youtube_dl/extractor/common.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/common.py 2020-12-13 18:56:48.000000000 +0100 @@ -336,8 +336,8 @@ object, each element of which is a valid dictionary by this specification. Additionally, playlists can have "id", "title", "description", "uploader", - "uploader_id", "uploader_url" attributes with the same semantics as videos - (see above). + "uploader_id", "uploader_url", "duration" attributes with the same semantics + as videos (see above). _type "multi_video" indicates that there are multiple videos that @@ -1237,8 +1237,16 @@ 'ViewAction': 'view', } + def extract_interaction_type(e): + interaction_type = e.get('interactionType') + if isinstance(interaction_type, dict): + interaction_type = interaction_type.get('@type') + return str_or_none(interaction_type) + def extract_interaction_statistic(e): interaction_statistic = e.get('interactionStatistic') + if isinstance(interaction_statistic, dict): + interaction_statistic = [interaction_statistic] if not isinstance(interaction_statistic, list): return for is_e in interaction_statistic: @@ -1246,8 +1254,8 @@ continue if is_e.get('@type') != 'InteractionCounter': continue - interaction_type = is_e.get('interactionType') - if not isinstance(interaction_type, compat_str): + interaction_type = extract_interaction_type(is_e) + if not interaction_type: continue # For interaction count some sites provide string instead of # an integer (as per spec) with non digit characters (e.g. ",") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/eporner.py new/youtube-dl/youtube_dl/extractor/eporner.py --- old/youtube-dl/youtube_dl/extractor/eporner.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/eporner.py 2020-12-13 18:56:48.000000000 +0100 @@ -16,7 +16,7 @@ class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' + _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video...', 'md5': '39d486f046212d8e1b911c52ab4691f8', @@ -43,7 +43,10 @@ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', 'only_matching': True, }, { - 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', + 'url': 'http://www.eporner.com/embed/3YRUtzMcWn0', + 'only_matching': True, + }, { + 'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/', 'only_matching': True, }] @@ -57,7 +60,7 @@ video_id = self._match_id(urlh.geturl()) hash = self._search_regex( - r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash') + r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') title = self._og_search_title(webpage, default=None) or self._html_search_regex( r'<title>(.+?) - EPORNER', webpage, 'title') @@ -115,8 +118,8 @@ duration = parse_duration(self._html_search_meta( 'duration', webpage, default=None)) view_count = str_to_int(self._search_regex( - r'id="cinemaviews">\s*([0-9,]+)\s*<small>views', - webpage, 'view count', fatal=False)) + r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)', + webpage, 'view count', default=None)) return merge_dicts(json_ld, { 'id': video_id, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/hotstar.py new/youtube-dl/youtube_dl/extractor/hotstar.py --- old/youtube-dl/youtube_dl/extractor/hotstar.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/hotstar.py 2020-12-13 18:56:48.000000000 +0100 @@ -3,6 +3,7 @@ import hashlib import hmac +import json import re import time import uuid @@ -25,43 +26,50 @@ class HotStarBaseIE(InfoExtractor): _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' - def _call_api_impl(self, path, video_id, query): + def _call_api_impl(self, path, video_id, headers, query, data=None): st = int(time.time()) exp = st + 6000 auth = 'st=%d~exp=%d~acl=/*' % (st, exp) auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() - response = self._download_json( - 'https://api.hotstar.com/' + path, video_id, headers={ - 'hotstarauth': auth, - 'x-country-code': 'IN', - 'x-platform-code': 'JIO', - }, query=query) - if response['statusCode'] != 'OK': - raise ExtractorError( - response['body']['message'], expected=True) - return response['body']['results'] + h = {'hotstarauth': auth} + h.update(headers) + return self._download_json( + 'https://api.hotstar.com/' + path, + video_id, headers=h, query=query, data=data) def _call_api(self, path, video_id, query_name='contentId'): - return self._call_api_impl(path, video_id, { + response = self._call_api_impl(path, video_id, { + 'x-country-code': 'IN', + 'x-platform-code': 'JIO', + }, { query_name: video_id, 'tas': 10000, }) + if response['statusCode'] != 'OK': + raise ExtractorError( + response['body']['message'], expected=True) + return response['body']['results'] - def _call_api_v2(self, path, video_id): - return self._call_api_impl( - '%s/in/contents/%s' % (path, video_id), video_id, { - 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash', - 'client': 'mweb', - 'clientVersion': '6.18.0', - 'deviceId': compat_str(uuid.uuid4()), - 'osName': 'Windows', - 'osVersion': '10', - }) + def _call_api_v2(self, path, video_id, headers, query=None, data=None): + h = {'X-Request-Id': compat_str(uuid.uuid4())} + h.update(headers) + try: + return self._call_api_impl( + path, video_id, h, query, data) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + if e.cause.code == 402: + self.raise_login_required() + message = self._parse_json(e.cause.read().decode(), video_id)['message'] + if message in ('Content not available in region', 'Country is not supported'): + raise self.raise_geo_restricted(message) + raise ExtractorError(message) + raise e class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})' _TESTS = [{ # contentData 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', @@ -92,8 +100,13 @@ # only available via api v2 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-...', 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/110003...', + 'only_matching': True, }] _GEO_BYPASS = False + _DEVICE_ID = None + _USER_TOKEN = None def _real_extract(self, url): video_id = self._match_id(url) @@ -121,7 +134,30 @@ headers = {'Referer': url} formats = [] geo_restricted = False - playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] + + if not self._USER_TOKEN: + self._DEVICE_ID = compat_str(uuid.uuid4()) + self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, { + 'X-HS-Platform': 'PCTV', + 'Content-Type': 'application/json', + }, data=json.dumps({ + 'device_ids': [{ + 'id': self._DEVICE_ID, + 'type': 'device_id', + }], + }).encode())['user_identity'] + + playback_sets = self._call_api_v2( + 'play/v2/playback/content/' + video_id, video_id, { + 'X-HS-Platform': 'web', + 'X-HS-AppVersion': '6.99.1', + 'X-HS-UserToken': self._USER_TOKEN, + }, query={ + 'device-id': self._DEVICE_ID, + 'desired-config': 'encryption:plain', + 'os-name': 'Windows', + 'os-version': '10', + })['data']['playBackSets'] for playback_set in playback_sets: if not isinstance(playback_set, dict): continue @@ -163,19 +199,22 @@ for f in formats: f.setdefault('http_headers', {}).update(headers) + image = try_get(video_data, lambda x: x['image']['h'], compat_str) + return { 'id': video_id, 'title': title, + 'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 'formats': formats, 'channel': video_data.get('channelName'), - 'channel_id': video_data.get('channelId'), + 'channel_id': str_or_none(video_data.get('channelId')), 'series': video_data.get('showName'), 'season': video_data.get('seasonName'), 'season_number': int_or_none(video_data.get('seasonNo')), - 'season_id': video_data.get('seasonId'), + 'season_id': str_or_none(video_data.get('seasonId')), 'episode': title, 'episode_number': int_or_none(video_data.get('episodeNo')), } @@ -183,7 +222,7 @@ class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' _TESTS = [{ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'info_dict': { @@ -193,6 +232,9 @@ }, { 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830', + 'only_matching': True, }] def _real_extract(self, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/itv.py new/youtube-dl/youtube_dl/extractor/itv.py --- old/youtube-dl/youtube_dl/extractor/itv.py 2020-12-12 01:08:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/itv.py 2020-12-13 18:56:48.000000000 +0100 @@ -7,6 +7,7 @@ from .common import InfoExtractor from .brightcove import BrightcoveNewIE from ..utils import ( + clean_html, determine_ext, extract_attributes, get_element_by_class, @@ -14,7 +15,6 @@ merge_dicts, parse_duration, smuggle_url, - strip_or_none, url_or_none, ) @@ -146,7 +146,7 @@ 'formats': formats, 'subtitles': subtitles, 'duration': parse_duration(video_data.get('Duration')), - 'description': strip_or_none(get_element_by_class('episode-info__synopsis', webpage)), + 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)), }, info) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/linuxacademy.py new/youtube-dl/youtube_dl/extractor/linuxacademy.py --- old/youtube-dl/youtube_dl/extractor/linuxacademy.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/linuxacademy.py 2020-12-13 18:56:48.000000000 +0100 @@ -8,11 +8,15 @@ from ..compat import ( compat_b64decode, compat_HTTPError, + compat_str, ) from ..utils import ( + clean_html, ExtractorError, - orderedSet, - unescapeHTML, + js_to_json, + parse_duration, + try_get, + unified_timestamp, urlencode_postdata, urljoin, ) @@ -28,11 +32,15 @@ ) ''' _TESTS = [{ - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', 'info_dict': { - 'id': '1498-2', + 'id': '7971-2', 'ext': 'mp4', - 'title': "Introduction to the Practitioner's Brief", + 'title': 'What Is Data Science', + 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', + 'timestamp': 1607387907, + 'upload_date': '20201208', + 'duration': 304, }, 'params': { 'skip_download': True, @@ -46,7 +54,8 @@ 'info_dict': { 'id': '154', 'title': 'AWS Certified Cloud Practitioner', - 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', + 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', + 'duration': 28835, }, 'playlist_count': 41, 'skip': 'Requires Linux Academy account credentials', @@ -74,6 +83,7 @@ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ 'client_id': self._CLIENT_ID, 'response_type': 'token id_token', + 'response_mode': 'web_message', 'redirect_uri': self._ORIGIN_URL, 'scope': 'openid email user_impersonation profile', 'audience': self._ORIGIN_URL, @@ -129,7 +139,13 @@ access_token = self._search_regex( r'access_token=([^=&]+)', urlh.geturl(), - 'access token') + 'access token', default=None) + if not access_token: + access_token = self._parse_json( + self._search_regex( + r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, + 'authorization response'), None, + transform_source=js_to_json)['response']['access_token'] self._download_webpage( 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' @@ -144,30 +160,84 @@ # course path if course_id: - entries = [ - self.url_result( - urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) - for lesson_url in orderedSet(re.findall( - r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', - webpage))] - title = unescapeHTML(self._html_search_regex( - (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)', - r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), - webpage, 'title', default=None, group='value')) - description = unescapeHTML(self._html_search_regex( - r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', - webpage, 'description', default=None, group='value')) - return self.playlist_result(entries, course_id, title, description) + module = self._parse_json( + self._search_regex( + r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'), + item_id) + entries = [] + chapter_number = None + chapter = None + chapter_id = None + for item in module['items']: + if not isinstance(item, dict): + continue + + def type_field(key): + return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() + type_fields = (type_field('name'), type_field('slug')) + # Move to next module section + if 'section' in type_fields: + chapter = item.get('course_name') + chapter_id = item.get('course_module') + chapter_number = 1 if not chapter_number else chapter_number + 1 + continue + # Skip non-lessons + if 'lesson' not in type_fields: + continue + lesson_url = urljoin(url, item.get('url')) + if not lesson_url: + continue + title = item.get('title') or item.get('lesson_name') + description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) + entries.append({ + '_type': 'url_transparent', + 'url': lesson_url, + 'ie_key': LinuxAcademyIE.ie_key(), + 'title': title, + 'description': description, + 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), + 'duration': parse_duration(item.get('duration')), + 'chapter': chapter, + 'chapter_id': chapter_id, + 'chapter_number': chapter_number, + }) + return { + '_type': 'playlist', + 'entries': entries, + 'id': course_id, + 'title': module.get('title'), + 'description': module.get('md_desc') or clean_html(module.get('desc')), + 'duration': parse_duration(module.get('duration')), + } # single video path - info = self._extract_jwplayer_data( - webpage, item_id, require_title=False, m3u8_id='hls',) - title = self._search_regex( - (r'>Lecture\s*:\s*(?P<value>[^<]+)', - r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, - 'title', group='value') - info.update({ + m3u8_url = self._parse_json( + self._search_regex( + r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), + item_id)[0]['file'] + formats = self._extract_m3u8_formats( + m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + info = { 'id': item_id, - 'title': title, - }) + 'formats': formats, + } + lesson = self._parse_json( + self._search_regex( + (r'window\.lesson\s*=\s*({.+?})\s*;', + r'player\.lesson\s*=\s*({.+?})\s*;'), + webpage, 'lesson', default='{}'), item_id, fatal=False) + if lesson: + info.update({ + 'title': lesson.get('lesson_name'), + 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), + 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), + 'duration': parse_duration(lesson.get('duration')), + }) + if not info.get('title'): + info['title'] = self._search_regex( + (r'>Lecture\s*:\s*(?P<value>[^<]+)', + r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, + 'title', group='value') return info diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mdr.py new/youtube-dl/youtube_dl/extractor/mdr.py --- old/youtube-dl/youtube_dl/extractor/mdr.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mdr.py 2020-12-13 18:56:48.000000000 +0100 @@ -2,12 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( determine_ext, int_or_none, parse_duration, parse_iso8601, + url_or_none, xpath_text, ) @@ -16,6 +20,8 @@ IE_DESC = 'MDR.DE and KiKA' _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' + _GEO_COUNTRIES = ['DE'] + _TESTS = [{ # MDR regularly deletes its videos 'url': 'http://www.mdr.de/fakt/video189002.html', @@ -67,6 +73,22 @@ 'uploader': 'MITTELDEUTSCHER RUNDFUNK', }, }, { + # empty bitrateVideo and bitrateAudio + 'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html', + 'info_dict': { + 'id': '128372', + 'ext': 'mp4', + 'title': 'Der kleine Wichtel kehrt zurück', + 'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a', + 'duration': 4876, + 'timestamp': 1607823300, + 'upload_date': '20201213', + 'uploader': 'ZDF', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.htm...', 'only_matching': True, }, { @@ -91,10 +113,13 @@ title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) + type_ = xpath_text(doc, './type', default=None) + formats = [] processed_urls = [] for asset in doc.findall('./assets/asset'): for source in ( + 'download', 'progressiveDownload', 'dynamicHttpStreamingRedirector', 'adaptiveHttpStreamingRedirector'): @@ -102,63 +127,49 @@ if url_el is None: continue - video_url = url_el.text - if video_url in processed_urls: + video_url = url_or_none(url_el.text) + if not video_url or video_url in processed_urls: continue processed_urls.append(video_url) - vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) - abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) - - ext = determine_ext(url_el.text) + ext = determine_ext(video_url) if ext == 'm3u8': - url_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', entry_protocol='m3u8_native', - preference=0, m3u8_id='HLS', fatal=False) + preference=0, m3u8_id='HLS', fatal=False)) elif ext == 'f4m': - url_formats = self._extract_f4m_formats( + formats.extend(self._extract_f4m_formats( video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, - preference=0, f4m_id='HDS', fatal=False) + preference=0, f4m_id='HDS', fatal=False)) else: media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) + format_id = [media_type] + if vbr or abr: + format_id.append(compat_str(vbr or abr)) + f = { 'url': video_url, - 'format_id': '%s-%d' % (media_type, vbr or abr), + 'format_id': '-'.join(format_id), 'filesize': filesize, 'abr': abr, - 'preference': 1, + 'vbr': vbr, } if vbr: - width = int_or_none(xpath_text(asset, './frameWidth', 'width')) - height = int_or_none(xpath_text(asset, './frameHeight', 'height')) f.update({ - 'vbr': vbr, - 'width': width, - 'height': height, + 'width': int_or_none(xpath_text(asset, './frameWidth', 'width')), + 'height': int_or_none(xpath_text(asset, './frameHeight', 'height')), }) - url_formats = [f] - - if not url_formats: - continue - - if not vbr: - for f in url_formats: - abr = f.get('tbr') or abr - if 'tbr' in f: - del f['tbr'] - f.update({ - 'abr': abr, - 'vcodec': 'none', - }) + if type_ == 'audio': + f['vcodec'] = 'none' - formats.extend(url_formats) + formats.append(f) self._sort_formats(formats) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/slideslive.py new/youtube-dl/youtube_dl/extractor/slideslive.py --- old/youtube-dl/youtube_dl/extractor/slideslive.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/slideslive.py 2020-12-13 18:56:48.000000000 +0100 @@ -83,9 +83,10 @@ else: formats = [] _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s' + # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol formats.extend(self._extract_m3u8_formats( - _MANIFEST_PATTERN % (service_id, 'm3u8'), service_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + _MANIFEST_PATTERN % (service_id, 'm3u8'), + service_id, 'mp4', m3u8_id='hls', fatal=False)) formats.extend(self._extract_mpd_formats( _MANIFEST_PATTERN % (service_id, 'mpd'), service_id, mpd_id='dash', fatal=False)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/twitcasting.py new/youtube-dl/youtube_dl/extractor/twitcasting.py --- old/youtube-dl/youtube_dl/extractor/twitcasting.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/twitcasting.py 2020-12-13 18:56:48.000000000 +0100 @@ -1,11 +1,20 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import urlencode_postdata - import re +from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + get_element_by_class, + get_element_by_id, + parse_duration, + str_to_int, + unified_timestamp, + urlencode_postdata, +) + class TwitCastingIE(InfoExtractor): _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)' @@ -17,8 +26,12 @@ 'ext': 'mp4', 'title': 'Live #2357609', 'uploader_id': 'ivetesangalo', - 'description': "Moi! I'm live on TwitCasting from my iPhone.", + 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.', 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20110822', + 'timestamp': 1314010824, + 'duration': 32, + 'view_count': int, }, 'params': { 'skip_download': True, @@ -30,8 +43,12 @@ 'ext': 'mp4', 'title': 'Live playing something #3689740', 'uploader_id': 'mttbernardini', - 'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)", + 'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.', 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20120212', + 'timestamp': 1329028024, + 'duration': 681, + 'view_count': int, }, 'params': { 'skip_download': True, @@ -40,9 +57,7 @@ }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - uploader_id = mobj.group('uploader_id') + uploader_id, video_id = re.match(self._VALID_URL, url).groups() video_password = self._downloader.params.get('videopassword') request_data = None @@ -52,30 +67,45 @@ }) webpage = self._download_webpage(url, video_id, data=request_data) - title = self._html_search_regex( - r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</', - webpage, 'title', default=None) or self._html_search_meta( - 'twitter:title', webpage, fatal=True) + title = clean_html(get_element_by_id( + 'movietitle', webpage)) or self._html_search_meta( + ['og:title', 'twitter:title'], webpage, fatal=True) + video_js_data = {} m3u8_url = self._search_regex( - (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1', - r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'), - webpage, 'm3u8 url', group='url') + r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'm3u8 url', group='url', default=None) + if not m3u8_url: + video_js_data = self._parse_json(self._search_regex( + r"data-movie-playlist='(\[[^']+\])'", + webpage, 'movie playlist'), video_id)[0] + m3u8_url = video_js_data['source']['url'] + # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + m3u8_url, video_id, 'mp4', m3u8_id='hls') - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'twitter:description', webpage) + thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage) + description = clean_html(get_element_by_id( + 'authorcomment', webpage)) or self._html_search_meta( + ['description', 'og:description', 'twitter:description'], webpage) + duration = float_or_none(video_js_data.get( + 'duration'), 1000) or parse_duration(clean_html( + get_element_by_class('tw-player-duration-time', webpage))) + view_count = str_to_int(self._search_regex( + r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None)) + timestamp = unified_timestamp(self._search_regex( + r'data-toggle="true"[^>]+datetime="([^"]+)"', + webpage, 'datetime', None)) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'timestamp': timestamp, 'uploader_id': uploader_id, + 'duration': duration, + 'view_count': view_count, 'formats': formats, } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/vlive.py new/youtube-dl/youtube_dl/extractor/vlive.py --- old/youtube-dl/youtube_dl/extractor/vlive.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/vlive.py 2020-12-13 18:56:48.000000000 +0100 @@ -155,6 +155,7 @@ 'old/v3/live/%s/playInfo', video_id)['result']['adaptiveStreamUrl'] formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4') + self._sort_formats(formats) info = get_common_fields() info.update({ 'title': self._live_title(video['title']), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/yandexmusic.py new/youtube-dl/youtube_dl/extractor/yandexmusic.py --- old/youtube-dl/youtube_dl/extractor/yandexmusic.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/yandexmusic.py 2020-12-13 18:56:48.000000000 +0100 @@ -260,6 +260,14 @@ }, 'playlist_count': 33, # 'skip': 'Travis CI servers blocked by YandexMusic', + }, { + # empty artists + 'url': 'https://music.yandex.ru/album/9091882', + 'info_dict': { + 'id': '9091882', + 'title': 'ТЕД на русском', + }, + 'playlist_count': 187, }] def _real_extract(self, url): @@ -273,7 +281,10 @@ entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) - title = '%s - %s' % (album['artists'][0]['name'], album['title']) + title = album['title'] + artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str) + if artist: + title = '%s - %s' % (artist, title) year = album.get('year') if year: title += ' (%s)' % year diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py new/youtube-dl/youtube_dl/extractor/youtube.py --- old/youtube-dl/youtube_dl/extractor/youtube.py 2020-12-12 01:08:09.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/youtube.py 2020-12-13 18:56:48.000000000 +0100 @@ -319,10 +319,18 @@ (?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| + (?:www\.)?invidious\.zapashcanon\.fr/| + (?:www\.)?invidious\.kavin\.rocks/| + (?:www\.)?invidious\.tube/| + (?:www\.)?invidiou\.site/| + (?:www\.)?invidious\.site/| + (?:www\.)?invidious\.xyz/| (?:www\.)?invidious\.nixnet\.xyz/| (?:www\.)?invidious\.drycat\.fr/| (?:www\.)?tube\.poal\.co/| + (?:www\.)?tube\.connect\.cafe/| (?:www\.)?vid\.wxzm\.sx/| + (?:www\.)?vid\.mint\.lgbt/| (?:www\.)?yewtu\.be/| (?:www\.)?yt\.elukerio\.org/| (?:www\.)?yt\.lelux\.fi/| diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2020-12-12 01:09:56.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2020-12-13 18:57:08.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.12.12' +__version__ = '2020.12.14'
participants (1)
-
User for buildservice source handling