mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	Merge remote-tracking branch 'jaimeMF/f4m'
Conflicts: youtube_dl/extractor/__init__.py
This commit is contained in:
		
						commit
						4edff78531
					
				| @ -5,6 +5,7 @@ from .hls import HlsFD | ||||
| from .http import HttpFD | ||||
| from .mplayer import MplayerFD | ||||
| from .rtmp import RtmpFD | ||||
| from .f4m import F4mFD | ||||
| 
 | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| @ -22,5 +23,7 @@ def get_suitable_downloader(info_dict): | ||||
|         return HlsFD | ||||
|     if url.startswith('mms') or url.startswith('rtsp'): | ||||
|         return MplayerFD | ||||
|     if determine_ext(url) == 'f4m': | ||||
|         return F4mFD | ||||
|     else: | ||||
|         return HttpFD | ||||
|  | ||||
							
								
								
									
										305
									
								
								youtube_dl/downloader/f4m.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										305
									
								
								youtube_dl/downloader/f4m.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,305 @@ | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import base64 | ||||
| import io | ||||
| import itertools | ||||
| import os | ||||
| from struct import unpack, pack | ||||
| import time | ||||
| import xml.etree.ElementTree as etree | ||||
| 
 | ||||
| from .common import FileDownloader | ||||
| from .http import HttpFD | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
|     format_bytes, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class FlvReader(io.BytesIO): | ||||
|     """ | ||||
|     Reader for Flv files | ||||
|     The file format is documented in https://www.adobe.com/devnet/f4v.html | ||||
|     """ | ||||
| 
 | ||||
|     # Utility functions for reading numbers and strings | ||||
|     def read_unsigned_long_long(self): | ||||
|         return unpack('!Q', self.read(8))[0] | ||||
| 
 | ||||
|     def read_unsigned_int(self): | ||||
|         return unpack('!I', self.read(4))[0] | ||||
| 
 | ||||
|     def read_unsigned_char(self): | ||||
|         return unpack('!B', self.read(1))[0] | ||||
| 
 | ||||
|     def read_string(self): | ||||
|         res = b'' | ||||
|         while True: | ||||
|             char = self.read(1) | ||||
|             if char == b'\x00': | ||||
|                 break | ||||
|             res += char | ||||
|         return res | ||||
| 
 | ||||
|     def read_box_info(self): | ||||
|         """ | ||||
|         Read a box and return the info as a tuple: (box_size, box_type, box_data) | ||||
|         """ | ||||
|         real_size = size = self.read_unsigned_int() | ||||
|         box_type = self.read(4) | ||||
|         header_end = 8 | ||||
|         if size == 1: | ||||
|             real_size = self.read_unsigned_long_long() | ||||
|             header_end = 16 | ||||
|         return real_size, box_type, self.read(real_size-header_end) | ||||
| 
 | ||||
|     def read_asrt(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read(3) | ||||
|         quality_entry_count = self.read_unsigned_char() | ||||
|         # QualityEntryCount | ||||
|         for i in range(quality_entry_count): | ||||
|             self.read_string() | ||||
| 
 | ||||
|         segment_run_count = self.read_unsigned_int() | ||||
|         segments = [] | ||||
|         for i in range(segment_run_count): | ||||
|             first_segment = self.read_unsigned_int() | ||||
|             fragments_per_segment = self.read_unsigned_int() | ||||
|             segments.append((first_segment, fragments_per_segment)) | ||||
| 
 | ||||
|         return { | ||||
|             'segment_run': segments, | ||||
|         } | ||||
| 
 | ||||
|     def read_afrt(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read(3) | ||||
|         # time scale | ||||
|         self.read_unsigned_int() | ||||
| 
 | ||||
|         quality_entry_count = self.read_unsigned_char() | ||||
|         # QualitySegmentUrlModifiers | ||||
|         for i in range(quality_entry_count): | ||||
|             self.read_string() | ||||
| 
 | ||||
|         fragments_count = self.read_unsigned_int() | ||||
|         fragments = [] | ||||
|         for i in range(fragments_count): | ||||
|             first = self.read_unsigned_int() | ||||
|             first_ts = self.read_unsigned_long_long() | ||||
|             duration = self.read_unsigned_int() | ||||
|             if duration == 0: | ||||
|                 discontinuity_indicator = self.read_unsigned_char() | ||||
|             else: | ||||
|                 discontinuity_indicator = None | ||||
|             fragments.append({ | ||||
|                 'first': first, | ||||
|                 'ts': first_ts, | ||||
|                 'duration': duration, | ||||
|                 'discontinuity_indicator': discontinuity_indicator, | ||||
|             }) | ||||
| 
 | ||||
|         return { | ||||
|             'fragments': fragments, | ||||
|         } | ||||
| 
 | ||||
|     def read_abst(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read(3) | ||||
|         # BootstrapinfoVersion | ||||
|         bootstrap_info_version = self.read_unsigned_int() | ||||
|         # Profile,Live,Update,Reserved | ||||
|         self.read(1) | ||||
|         # time scale | ||||
|         self.read_unsigned_int() | ||||
|         # CurrentMediaTime | ||||
|         self.read_unsigned_long_long() | ||||
|         # SmpteTimeCodeOffset | ||||
|         self.read_unsigned_long_long() | ||||
|         # MovieIdentifier | ||||
|         movie_identifier = self.read_string() | ||||
|         server_count = self.read_unsigned_char() | ||||
|         # ServerEntryTable | ||||
|         for i in range(server_count): | ||||
|             self.read_string() | ||||
|         quality_count = self.read_unsigned_char() | ||||
|         # QualityEntryTable | ||||
|         for i in range(server_count): | ||||
|             self.read_string() | ||||
|         # DrmData | ||||
|         self.read_string() | ||||
|         # MetaData | ||||
|         self.read_string() | ||||
| 
 | ||||
|         segments_count = self.read_unsigned_char() | ||||
|         segments = [] | ||||
|         for i in range(segments_count): | ||||
|             box_size, box_type, box_data = self.read_box_info() | ||||
|             assert box_type == b'asrt' | ||||
|             segment = FlvReader(box_data).read_asrt() | ||||
|             segments.append(segment) | ||||
|         fragments_run_count = self.read_unsigned_char() | ||||
|         fragments = [] | ||||
|         for i in range(fragments_run_count): | ||||
|             box_size, box_type, box_data = self.read_box_info() | ||||
|             assert box_type == b'afrt' | ||||
|             fragments.append(FlvReader(box_data).read_afrt()) | ||||
| 
 | ||||
|         return { | ||||
|             'segments': segments, | ||||
|             'fragments': fragments, | ||||
|         } | ||||
| 
 | ||||
|     def read_bootstrap_info(self): | ||||
|         total_size, box_type, box_data = self.read_box_info() | ||||
|         assert box_type == b'abst' | ||||
|         return FlvReader(box_data).read_abst() | ||||
| 
 | ||||
| 
 | ||||
| def read_bootstrap_info(bootstrap_bytes): | ||||
|     return FlvReader(bootstrap_bytes).read_bootstrap_info() | ||||
| 
 | ||||
| 
 | ||||
| def build_fragments_list(boot_info): | ||||
|     """ Return a list of (segment, fragment) for each fragment in the video """ | ||||
|     res = [] | ||||
|     segment_run_table = boot_info['segments'][0] | ||||
|     # I've only found videos with one segment | ||||
|     segment_run_entry = segment_run_table['segment_run'][0] | ||||
|     n_frags = segment_run_entry[1] | ||||
|     fragment_run_entry_table = boot_info['fragments'][0]['fragments'] | ||||
|     first_frag_number = fragment_run_entry_table[0]['first'] | ||||
|     for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)): | ||||
|         res.append((1, frag_number)) | ||||
|     return res | ||||
| 
 | ||||
| 
 | ||||
| def write_flv_header(stream, metadata): | ||||
|     """Writes the FLV header and the metadata to stream""" | ||||
|     # FLV header | ||||
|     stream.write(b'FLV\x01') | ||||
|     stream.write(b'\x05') | ||||
|     stream.write(b'\x00\x00\x00\x09') | ||||
|     # FLV File body | ||||
|     stream.write(b'\x00\x00\x00\x00') | ||||
|     # FLVTAG | ||||
|     # Script data | ||||
|     stream.write(b'\x12') | ||||
|     # Size of the metadata with 3 bytes | ||||
|     stream.write(pack('!L', len(metadata))[1:]) | ||||
|     stream.write(b'\x00\x00\x00\x00\x00\x00\x00') | ||||
|     stream.write(metadata) | ||||
|     # Magic numbers extracted from the output files produced by AdobeHDS.php | ||||
|     #(https://github.com/K-S-V/Scripts) | ||||
|     stream.write(b'\x00\x00\x01\x73') | ||||
| 
 | ||||
| 
 | ||||
| def _add_ns(prop): | ||||
|     return '{http://ns.adobe.com/f4m/1.0}%s' % prop | ||||
| 
 | ||||
| 
 | ||||
| class HttpQuietDownloader(HttpFD): | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         pass | ||||
| 
 | ||||
| 
 | ||||
| class F4mFD(FileDownloader): | ||||
|     """ | ||||
|     A downloader for f4m manifests or AdobeHDS. | ||||
|     """ | ||||
| 
 | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         self.to_screen('[download] Downloading f4m manifest') | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader(self.ydl, {'continuedl': True, 'quiet': True, 'noprogress': True}) | ||||
| 
 | ||||
|         doc = etree.fromstring(manifest) | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] | ||||
|         formats = sorted(formats, key=lambda f: f[0]) | ||||
|         rate, media = formats[-1] | ||||
|         base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) | ||||
|         bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) | ||||
|         metadata = base64.b64decode(media.find(_add_ns('metadata')).text) | ||||
|         boot_info = read_bootstrap_info(bootstrap) | ||||
|         fragments_list = build_fragments_list(boot_info) | ||||
|         total_frags = len(fragments_list) | ||||
| 
 | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
|         write_flv_header(dest_stream, metadata) | ||||
| 
 | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_counter': 0, | ||||
|         } | ||||
|         start = time.time() | ||||
| 
 | ||||
|         def frag_progress_hook(status): | ||||
|             frag_total_bytes = status.get('total_bytes', 0) | ||||
|             estimated_size = (state['downloaded_bytes'] + | ||||
|                 (total_frags - state['frag_counter']) * frag_total_bytes) | ||||
|             if status['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_counter'] += 1 | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 byte_counter = state['downloaded_bytes'] | ||||
|             else: | ||||
|                 frag_downloaded_bytes = status['downloaded_bytes'] | ||||
|                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                     frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
| 
 | ||||
|             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | ||||
|             self.report_progress(progress, format_bytes(estimated_size), | ||||
|                 status.get('speed'), eta) | ||||
|         http_dl.add_progress_hook(frag_progress_hook) | ||||
| 
 | ||||
|         frags_filenames = [] | ||||
|         for (seg_i, frag_i) in fragments_list: | ||||
|             name = 'Seg%d-Frag%d' % (seg_i, frag_i) | ||||
|             url = base_url + name | ||||
|             frag_filename = '%s-%s' % (tmpfilename, name) | ||||
|             success = http_dl.download(frag_filename, {'url': url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             with open(frag_filename, 'rb') as down: | ||||
|                 down_data = down.read() | ||||
|                 reader = FlvReader(down_data) | ||||
|                 while True: | ||||
|                     _, box_type, box_data = reader.read_box_info() | ||||
|                     if box_type == b'mdat': | ||||
|                         dest_stream.write(box_data) | ||||
|                         break | ||||
|             frags_filenames.append(frag_filename) | ||||
| 
 | ||||
|         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) | ||||
| 
 | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
| 
 | ||||
|         fsize = os.path.getsize(encodeFilename(filename)) | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': fsize, | ||||
|             'total_bytes': fsize, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|         }) | ||||
| 
 | ||||
|         return True | ||||
| @ -209,6 +209,7 @@ from .statigram import StatigramIE | ||||
| from .steam import SteamIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| from .teamcoco import TeamcocoIE | ||||
| from .techtalks import TechTalksIE | ||||
|  | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/syfy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/syfy.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| 
 | ||||
| 
 | ||||
| class SyfyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)' | ||||
| 
 | ||||
|     _TEST = { | ||||
|         'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', | ||||
|         'info_dict': { | ||||
|             'id': 'NmqMrGnXvmO1', | ||||
|             'ext': 'flv', | ||||
|             'title': 'George Lucas has Advice for his Daughter', | ||||
|             'description': 'Listen to what insights George Lucas give his daughter Amanda.', | ||||
|         }, | ||||
|         'params': { | ||||
|             # f4m download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         return self.url_result(self._og_search_video_url(webpage)) | ||||
| @ -11,7 +11,10 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language | ||||
| 
 | ||||
| 
 | ||||
| class ThePlatformIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ | ||||
|            (?P<config>[^/\?]+/(?:swf|config)/select/)? | ||||
|          |theplatform:)(?P<id>[^/\?&]+)''' | ||||
| 
 | ||||
|     _TEST = { | ||||
|         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ | ||||
| @ -29,9 +32,7 @@ class ThePlatformIE(InfoExtractor): | ||||
|         }, | ||||
|     } | ||||
| 
 | ||||
|     def _get_info(self, video_id): | ||||
|         smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||
|             'format=smil&mbr=true'.format(video_id)) | ||||
|     def _get_info(self, video_id, smil_url): | ||||
|         meta = self._download_xml(smil_url, video_id) | ||||
| 
 | ||||
|         try: | ||||
| @ -50,6 +51,15 @@ class ThePlatformIE(InfoExtractor): | ||||
| 
 | ||||
|         head = meta.find(_x('smil:head')) | ||||
|         body = meta.find(_x('smil:body')) | ||||
| 
 | ||||
|         f4m_node = body.find(_x('smil:seq/smil:video')) | ||||
|         if f4m_node is not None: | ||||
|             formats = [{ | ||||
|                 'ext': 'flv', | ||||
|                 # the parameters are from syfy.com, other sites may use others | ||||
|                 'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3', | ||||
|             }] | ||||
|         else: | ||||
|             base_url = head.find(_x('smil:meta')).attrib['base'] | ||||
|             switch = body.find(_x('smil:switch')) | ||||
|             formats = [] | ||||
| @ -68,7 +78,6 @@ class ThePlatformIE(InfoExtractor): | ||||
|                     'height': height, | ||||
|                     'vbr': vbr, | ||||
|                 }) | ||||
| 
 | ||||
|             self._sort_formats(formats) | ||||
| 
 | ||||
|         return { | ||||
| @ -83,4 +92,13 @@ class ThePlatformIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         return self._get_info(video_id) | ||||
|         if mobj.group('config'): | ||||
|             config_url = url+ '&form=json' | ||||
|             config_url = config_url.replace('swf/', 'config/') | ||||
|             config_json = self._download_webpage(config_url, video_id, u'Downloading config') | ||||
|             config = json.loads(config_json) | ||||
|             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4' | ||||
|         else: | ||||
|             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||
|                 'format=smil&mbr=true'.format(video_id)) | ||||
|         return self._get_info(video_id, smil_url) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
						Philipp Hagemeister