mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	[bilibili] add support for specefic page extraction
This commit is contained in:
		
							parent
							
								
									55af2b26e0
								
							
						
					
					
						commit
						520e753390
					
				| @ -1,18 +1,20 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
|  | import re | ||||||
| import json | import json | ||||||
| import xml.etree.ElementTree as ET | import xml.etree.ElementTree as ET | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     unescapeHTML, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class BiliBiliIE(InfoExtractor): | class BiliBiliIE(InfoExtractor): | ||||||
|     _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/' |     _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://www.bilibili.tv/video/av1074402/', |         'url': 'http://www.bilibili.tv/video/av1074402/', | ||||||
| @ -33,17 +35,31 @@ class BiliBiliIE(InfoExtractor): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1041170', |             'id': '1041170', | ||||||
|             'title': '【BD1080P】刀语【诸神&异域】', |             'title': '【BD1080P】刀语【诸神&异域】', | ||||||
|  |             'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', | ||||||
|  |             'uploader': '枫叶逝去', | ||||||
|  |             'timestamp': 1396501299, | ||||||
|         }, |         }, | ||||||
|         'playlist_count': 12, |         'playlist_count': 9, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _extract_video_info(self, cid, view_data, page_num=1, num_pages=1): |     def _real_extract(self, url): | ||||||
|         title = view_data['title'] |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         page_num = mobj.group('page_num') or '1' | ||||||
|  | 
 | ||||||
|  |         view_data = self._download_json( | ||||||
|  |             'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num), | ||||||
|  |             video_id) | ||||||
|  |         if 'error' in view_data: | ||||||
|  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True) | ||||||
|  | 
 | ||||||
|  |         cid = view_data['cid'] | ||||||
|  |         title = unescapeHTML(view_data['title']) | ||||||
| 
 | 
 | ||||||
|         page = self._download_webpage( |         page = self._download_webpage( | ||||||
|             'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, |             'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, | ||||||
|             cid, |             cid, | ||||||
|             'Downloading page %d/%d' % (page_num, num_pages) |             'Downloading page %s/%s' % (page_num, view_data['pages']) | ||||||
|         ) |         ) | ||||||
|         try: |         try: | ||||||
|             err_info = json.loads(page) |             err_info = json.loads(page) | ||||||
| @ -76,7 +92,7 @@ class BiliBiliIE(InfoExtractor): | |||||||
|             }) |             }) | ||||||
| 
 | 
 | ||||||
|         info = { |         info = { | ||||||
|             'id': cid, |             'id': str(cid), | ||||||
|             'title': title, |             'title': title, | ||||||
|             'description': view_data.get('description'), |             'description': view_data.get('description'), | ||||||
|             'thumbnail': view_data.get('pic'), |             'thumbnail': view_data.get('pic'), | ||||||
| @ -92,22 +108,7 @@ class BiliBiliIE(InfoExtractor): | |||||||
|         else: |         else: | ||||||
|             info.update({ |             info.update({ | ||||||
|                 '_type': 'multi_video', |                 '_type': 'multi_video', | ||||||
|  |                 'id': video_id, | ||||||
|                 'entries': entries, |                 'entries': entries, | ||||||
|             }) |             }) | ||||||
|             return info |             return info | ||||||
| 
 |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         video_id = self._match_id(url) |  | ||||||
|         view_data = self._download_json('http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s' % video_id, video_id) |  | ||||||
| 
 |  | ||||||
|         num_pages = int_or_none(view_data['pages']) |  | ||||||
|         if num_pages > 1: |  | ||||||
|             play_list_title = view_data['title'] |  | ||||||
|             page_list = self._download_json('http://www.bilibili.com/widget/getPageList?aid=%s' % video_id, video_id, 'Downloading page list metadata') |  | ||||||
|             entries = [] |  | ||||||
|             for page in page_list: |  | ||||||
|                 view_data['title'] = page['pagename'] |  | ||||||
|                 entries.append(self._extract_video_info(str(page['cid']), view_data, page['page'], num_pages)) |  | ||||||
|             return self.playlist_result(entries, video_id, play_list_title, view_data.get('description')) |  | ||||||
|         else: |  | ||||||
|             return self._extract_video_info(str(view_data['cid']), view_data) |  | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 remitamine
						remitamine