Hello community,
here is the log from the commit of package you-get for openSUSE:Factory checked in at 2018-12-03 10:10:35
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/you-get (Old)
and /work/SRC/openSUSE:Factory/.you-get.new.19453 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get"
Mon Dec 3 10:10:35 2018 rev:15 rq:653197 version:0.4.1181
Changes:
--------
--- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2018-11-09 07:54:17.131720342 +0100
+++ /work/SRC/openSUSE:Factory/.you-get.new.19453/you-get.changes 2018-12-03 10:10:55.543679324 +0100
@@ -1,0 +2,5 @@
+Sat Dec 1 06:54:44 UTC 2018 - aloisio@gmx.com
+
+- Update to version 0.4.1181 (no changelog supplied)
+
+-------------------------------------------------------------------
Old:
----
you-get-0.4.1167.tar.gz
New:
----
you-get-0.4.1181.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ you-get.spec ++++++
--- /var/tmp/diff_new_pack.oKUsAh/_old 2018-12-03 10:10:56.751678207 +0100
+++ /var/tmp/diff_new_pack.oKUsAh/_new 2018-12-03 10:10:56.751678207 +0100
@@ -17,7 +17,7 @@
Name: you-get
-Version: 0.4.1167
+Version: 0.4.1181
Release: 0
Summary: Dumb downloader that scrapes the web
License: MIT
++++++ you-get-0.4.1167.tar.gz -> you-get-0.4.1181.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/common.py new/you-get-0.4.1181/src/you_get/common.py
--- old/you-get-0.4.1167/src/you_get/common.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/common.py 2018-11-30 21:51:11.000000000 +0100
@@ -102,6 +102,7 @@
'soundcloud' : 'soundcloud',
'ted' : 'ted',
'theplatform' : 'theplatform',
+ 'tiktok' : 'tiktok',
'tucao' : 'tucao',
'tudou' : 'tudou',
'tumblr' : 'tumblr',
@@ -1575,9 +1576,9 @@
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
page = get_content(url, headers=fake_headers)
videos = re.findall(
- r'([^<]+)<', page
+ r'([^<]+)<', page
)
- vdurs = re.findall(r'<span class="vdur _dwc">([^<]+)<', page)
+ vdurs = re.findall(r'([^<]+)<', page)
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
print('Google Videos search:')
for v in zip(videos, durs):
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractor.py new/you-get-0.4.1181/src/you_get/extractor.py
--- old/you-get-0.4.1167/src/you_get/extractor.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractor.py 2018-11-30 21:51:11.000000000 +0100
@@ -211,7 +211,7 @@
ext = self.dash_streams[stream_id]['container']
total_size = self.dash_streams[stream_id]['size']
- if ext == 'm3u8':
+ if ext == 'm3u8' or ext == 'm4a':
ext = 'mp4'
if not urls:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/__init__.py new/you-get-0.4.1181/src/you_get/extractors/__init__.py
--- old/you-get-0.4.1167/src/you_get/extractors/__init__.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/__init__.py 2018-11-30 21:51:11.000000000 +0100
@@ -67,6 +67,7 @@
from .soundcloud import *
from .suntv import *
from .theplatform import *
+from .tiktok import *
from .tucao import *
from .tudou import *
from .tumblr import *
@@ -88,4 +89,4 @@
from .khan import *
from .zhanqi import *
from .kuaishou import *
-from .zhibo import *
\ No newline at end of file
+from .zhibo import *
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/acfun.py new/you-get-0.4.1181/src/you_get/extractors/acfun.py
--- old/you-get-0.4.1167/src/you_get/extractors/acfun.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/acfun.py 2018-11-30 21:51:11.000000000 +0100
@@ -85,9 +85,13 @@
_, _, seg_size = url_info(url)
size += seg_size
#fallback to flvhd is not quite possible
- print_info(site_info, title, 'mp4', size)
+ if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):
+ ext = 'flv'
+ else:
+ ext = 'mp4'
+ print_info(site_info, title, ext, size)
if not info_only:
- download_urls(preferred[0], title, 'mp4', size, output_dir=output_dir, merge=merge)
+ download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)
else:
raise NotImplementedError(sourceType)
@@ -105,27 +109,42 @@
pass
def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
- assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url)
- html = get_content(url)
+ assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)
- title = r1(r'data-title="([^"]+)"', html)
+ if re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
+ html = get_content(url)
+ title = r1(r'data-title="([^"]+)"', html)
+ if match1(url, r'_(\d+)$'): # current P
+ title = title + " " + r1(r'active">([^<]*)', html)
+ vid = r1('data-vid="(\d+)"', html)
+ up = r1('data-name="([^"]+)"', html)
+ # bangumi
+ elif re.match("http://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
+ html = get_content(url)
+ title = match1(html, r'"newTitle"\s*:\s*"([^"]+)"')
+ if match1(url, r'_(\d+)$'): # current P
+ title = title + " " + r1(r'active">([^<]*)', html)
+ vid = match1(html, r'videoId="(\d+)"')
+ up = "acfun"
+ else:
+ raise NotImplemented
+
+ assert title and vid
title = unescape_html(title)
title = escape_file_path(title)
- assert title
- if match1(url, r'_(\d+)$'): # current P
- title = title + " " + r1(r'active">([^<]*)', html)
-
- vid = r1('data-vid="(\d+)"', html)
- up = r1('data-name="([^"]+)"', html)
p_title = r1('active">([^<]+)', html)
title = '%s (%s)' % (title, up)
- if p_title: title = '%s - %s' % (title, p_title)
+ if p_title:
+ title = '%s - %s' % (title, p_title)
+
+
acfun_download_by_vid(vid, title,
output_dir=output_dir,
merge=merge,
info_only=info_only,
**kwargs)
+
site_info = "AcFun.tv"
download = acfun_download
download_playlist = playlist_not_supported('acfun')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/bilibili.py new/you-get-0.4.1181/src/you_get/extractors/bilibili.py
--- old/you-get-0.4.1167/src/you_get/extractors/bilibili.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/bilibili.py 2018-11-30 21:51:11.000000000 +0100
@@ -130,14 +130,13 @@
m = re.search(r'(.*?)</h1>', self.page) or re.search(r'', self.page)
if m is not None:
self.title = m.group(1)
- s = re.search(r'<span>([^<]+)</span>', m.group(1))
+ s = re.search(r'([^<]+)</span>', m.group(1))
if s:
self.title = unescape_html(s.group(1))
if self.title is None:
m = re.search(r'property="og:title" content="([^"]+)"', self.page)
if m is not None:
self.title = m.group(1)
-
if 'subtitle' in kwargs:
subtitle = kwargs['subtitle']
self.title = '{} {}'.format(self.title, subtitle)
@@ -162,6 +161,8 @@
self.live_entry(**kwargs)
elif 'vc.bilibili.com' in self.url:
self.vc_entry(**kwargs)
+ elif 'audio/au' in self.url:
+ self.audio_entry(**kwargs)
else:
self.entry(**kwargs)
@@ -173,6 +174,30 @@
self.title = page_list[0]['pagename']
self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs)
+ def audio_entry(self, **kwargs):
+ assert re.match(r'https?://www.bilibili.com/audio/au\d+', self.url)
+ patt = r"(\d+)"
+ audio_id = re.search(patt, self.url).group(1)
+ audio_info_url = \
+ 'https://www.bilibili.com/audio/music-service-c/web/song/info?sid={}'.format(audio_id)
+ audio_info_response = json.loads(get_content(audio_info_url))
+ if audio_info_response['msg'] != 'success':
+ log.wtf('fetch audio information failed!')
+ sys.exit(2)
+ self.title = audio_info_response['data']['title']
+ # TODO:there is no quality option for now
+ audio_download_url = \
+ 'https://www.bilibili.com/audio/music-service-c/web/url?sid={}&privilege=2&quality=2'.format(audio_id)
+ audio_download_response = json.loads(get_content(audio_download_url))
+ if audio_download_response['msg'] != 'success':
+ log.wtf('fetch audio resource failed!')
+ sys.exit(2)
+ self.streams['mp4'] = {}
+ self.streams['mp4']['src'] = [audio_download_response['data']['cdns'][0]]
+ self.streams['mp4']['container'] = 'm4a'
+ self.streams['mp4']['size'] = audio_download_response['data']['size']
+
+
def entry(self, **kwargs):
# tencent player
tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page)
@@ -265,22 +290,9 @@
episode_id = frag
else:
episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) or re.search(r'\/ep(\d+)', self.url).group(1)
- # cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id))
- # cid = json.loads(cont)['result']['cid']
- cont = get_content('http://bangumi.bilibili.com/web_api/episode/{}.json'.format(episode_id))
- ep_info = json.loads(cont)['result']['currentEpisode']
-
- bangumi_data = get_bangumi_info(str(ep_info['seasonId']))
- bangumi_payment = bangumi_data.get('payment')
- if bangumi_payment and bangumi_payment['price'] != '0':
- log.w("It's a paid item")
- # ep_ids = collect_bangumi_epids(bangumi_data)
-
- index_title = ep_info['indexTitle']
- long_title = ep_info['longTitle'].strip()
- cid = ep_info['danmaku']
-
- self.title = '{} [{} {}]'.format(self.title, index_title, long_title)
+ data = json.loads(re.search(r'__INITIAL_STATE__=(.+);\(function', self.page).group(1))
+ cid = data['epInfo']['cid']
+ # index_title = data['epInfo']['index_title']
self.download_by_vid(cid, bangumi=True, **kwargs)
@@ -383,7 +395,79 @@
else:
log.wtf("Fail to parse the fav title" + url, "")
+def download_music_from_favlist(url, page, **kwargs):
+ m = re.search(r'https?://www.bilibili.com/audio/mycollection/(\d+)', url)
+ if m is not None:
+ sid = m.group(1)
+ json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-coll?"
+ "sid={}&pn={}&ps=100".format(sid, page)))
+ if json_result['msg'] == 'success':
+ music_list = json_result['data']['data']
+ music_count = len(music_list)
+ for i in range(music_count):
+ audio_id = music_list[i]['id']
+ audio_title = music_list[i]['title']
+ audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id)
+ print("Start downloading music ", audio_title)
+ Bilibili().download_by_url(audio_url, **kwargs)
+ if page < json_result['data']['pageCount']:
+ page += 1
+ download_music_from_favlist(url, page, **kwargs)
+ else:
+ log.wtf("Fail to get music list of page " + json_result)
+ sys.exit(2)
+ else:
+ log.wtf("Fail to parse the sid from " + url, "")
+
+def download_video_from_totallist(url, page, **kwargs):
+ # the url has format: https://space.bilibili.com/64169458/#/video
+ m = re.search(r'space\.bilibili\.com/(\d+)/.*?video', url)
+ mid = ""
+ if m is not None:
+ mid = m.group(1)
+ jsonresult = json.loads(get_content("https://space.bilibili.com/ajax/member/getSubmitVideos?mid={}&pagesize=100&tid=0&page={}&keyword=&order=pubdate&jsonp=jsonp".format(mid, page)))
+ if jsonresult['status']:
+ videos = jsonresult['data']['vlist']
+ videocount = len(videos)
+ for i in range(videocount):
+ videoid = videos[i]["aid"]
+ videotitle = videos[i]["title"]
+ videourl = "https://www.bilibili.com/video/av{}".format(videoid)
+ print("Start downloading ", videotitle, " video ", videotitle)
+ Bilibili().download_by_url(videourl, subtitle=videotitle, **kwargs)
+ if page < jsonresult['data']['pages']:
+ page += 1
+ download_video_from_totallist(url, page, **kwargs)
+ else:
+ log.wtf("Fail to get the files of page " + jsonresult)
+ sys.exit(2)
+
+ else:
+ log.wtf("Fail to parse the video title" + url, "")
+def download_music_from_totallist(url, page, **kwargs):
+ m = re.search(r'https?://www.bilibili.com/audio/am(\d+)\?type=\d', url)
+ if m is not None:
+ sid = m.group(1)
+ json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-menu?"
+ "sid={}&pn={}&ps=100".format(sid, page)))
+ if json_result['msg'] == 'success':
+ music_list = json_result['data']['data']
+ music_count = len(music_list)
+ for i in range(music_count):
+ audio_id = music_list[i]['id']
+ audio_title = music_list[i]['title']
+ audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id)
+ print("Start downloading music ",audio_title)
+ Bilibili().download_by_url(audio_url, **kwargs)
+ if page < json_result['data']['pageCount']:
+ page += 1
+ download_music_from_totallist(url, page, **kwargs)
+ else:
+ log.wtf("Fail to get music list of page " + json_result)
+ sys.exit(2)
+ else:
+ log.wtf("Fail to parse the sid from " + url, "")
def bilibili_download_playlist_by_url(url, **kwargs):
url = url_locations([url], faker=True)[0]
@@ -403,6 +487,12 @@
elif 'favlist' in url:
# this a fav list folder
download_video_from_favlist(url, **kwargs)
+ elif re.match(r'https?://space.bilibili.com/\d+/#/video', url):
+ download_video_from_totallist(url, 1, **kwargs)
+ elif re.match(r'https://www.bilibili.com/audio/mycollection/\d+', url):
+ download_music_from_favlist(url, 1, **kwargs)
+ elif re.match(r'https?://www.bilibili.com/audio/am\d+\?type=\d', url):
+ download_music_from_totallist(url, 1, **kwargs)
else:
aid = re.search(r'av(\d+)', url).group(1)
page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid)))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/iwara.py new/you-get-0.4.1181/src/you_get/extractors/iwara.py
--- old/you-get-0.4.1167/src/you_get/extractors/iwara.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/iwara.py 2018-11-30 21:51:11.000000000 +0100
@@ -17,20 +17,20 @@
def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
global headers
- video_hash=match1(url, r'http://\w+.iwara.tv/videos/(\w+)')
- video_url=match1(url, r'(http://\w+.iwara.tv)/videos/\w+')
- html = get_content(url,headers=headers)
+ video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
+ video_url = match1(url, r'(https?://\w+.iwara.tv)/videos/\w+')
+ html = get_content(url, headers=headers)
title = r1(r'<title>(.*)</title>', html)
- api_url=video_url+'/api/video/'+video_hash
- content=get_content(api_url,headers=headers)
- data=json.loads(content)
- type,ext,size=url_info(data[0]['uri'], headers=headers)
- down_urls=data[0]['uri']
- print_info(down_urls,title+data[0]['resolution'],type,size)
+ api_url = video_url + '/api/video/' + video_hash
+ content = get_content(api_url, headers=headers)
+ data = json.loads(content)
+ down_urls = 'https:' + data[0]['uri']
+ type, ext, size = url_info(down_urls, headers=headers)
+ print_info(site_info, title+data[0]['resolution'], type, size)
if not info_only:
- download_urls([down_urls], title, ext, size, output_dir, merge = merge,headers=headers)
+ download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
-site_info = "iwara"
+site_info = "Iwara"
download = iwara_download
download_playlist = playlist_not_supported('iwara')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/tiktok.py new/you-get-0.4.1181/src/you_get/extractors/tiktok.py
--- old/you-get-0.4.1167/src/you_get/extractors/tiktok.py 1970-01-01 01:00:00.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/tiktok.py 2018-11-30 21:51:11.000000000 +0100
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+__all__ = ['tiktok_download']
+
+from ..common import *
+
+def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+ html = get_html(url)
+ title = r1(r'<title>(.*?)</title>', html)
+ video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html)
+ title = '%s [%s]' % (title, video_id)
+ dataText = r1(r'var data = \[(.*)\] ', html) or r1(r'var data = (\{.*\})', html)
+ data = json.loads(dataText)
+ source = 'http:' + data['video']['play_addr']['url_list'][0]
+ mime, ext, size = url_info(source)
+
+ print_info(site_info, title, mime, size)
+ if not info_only:
+ download_urls([source], title, ext, size, output_dir, merge=merge)
+
+site_info = "TikTok.com"
+download = tiktok_download
+download_playlist = playlist_not_supported('tiktok')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/youku.py new/you-get-0.4.1181/src/you_get/extractors/youku.py
--- old/you-get-0.4.1167/src/you_get/extractors/youku.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/youku.py 2018-11-30 21:51:11.000000000 +0100
@@ -78,7 +78,7 @@
self.api_error_code = None
self.api_error_msg = None
- self.ccode = '0516'
+ self.ccode = '0590'
# Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
# grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/youtube.py new/you-get-0.4.1181/src/you_get/extractors/youtube.py
--- old/you-get-0.4.1167/src/you_get/extractors/youtube.py 2018-11-07 16:59:58.000000000 +0100
+++ new/you-get-0.4.1181/src/you_get/extractors/youtube.py 2018-11-30 21:51:11.000000000 +0100
@@ -144,7 +144,10 @@
for video in videos:
vid = parse_query_param(video, 'v')
index = parse_query_param(video, 'index')
- self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
+ try:
+ self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
+ except:
+ pass
def prepare(self, **kwargs):
assert self.url or self.vid
@@ -160,7 +163,8 @@
ytplayer_config = None
if 'status' not in video_info:
- log.wtf('[Failed] Unknown status.')
+ log.wtf('[Failed] Unknown status.', exit_code=None)
+ raise
elif video_info['status'] == ['ok']:
if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
self.title = parse.unquote_plus(video_info['title'][0])
@@ -192,7 +196,8 @@
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
except:
msg = re.search('class="message">([^<]+)<', video_page).group(1)
- log.wtf('[Failed] "%s"' % msg.strip())
+ log.wtf('[Failed] "%s"' % msg.strip(), exit_code=None)
+ raise
if 'title' in ytplayer_config['args']:
# 150 Restricted from playback on certain sites
@@ -201,18 +206,22 @@
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
else:
- log.wtf('[Error] The uploader has not made this video available in your country.')
+ log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None)
+ raise
#self.title = re.search('