mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	[canvas] Add support for vrt.be/vrtnu (closes #11873)
This commit is contained in:
		
							parent
							
								
									cdd1ce92c4
								
							
						
					
					
						commit
						7913e0fca7
					
				| @ -1,16 +1,24 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| import re | import re | ||||||
|  | import json | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from .gigya import GigyaBaseIE | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | from ..compat import compat_HTTPError | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     float_or_none, |     ExtractorError, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
|  |     float_or_none, | ||||||
|  |     int_or_none, | ||||||
|  |     parse_iso8601, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class CanvasIE(InfoExtractor): | class CanvasIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)' |     _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', |         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | ||||||
|         'md5': '90139b746a0a9bd7bb631283f6e2a64e', |         'md5': '90139b746a0a9bd7bb631283f6e2a64e', | ||||||
| @ -166,3 +174,133 @@ class CanvasEenIE(InfoExtractor): | |||||||
|             'title': title, |             'title': title, | ||||||
|             'description': self._og_search_description(webpage), |             'description': self._og_search_description(webpage), | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class VrtNUIE(GigyaBaseIE): | ||||||
|  |     IE_DESC = 'VrtNU.be' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'De zwarte weduwe', | ||||||
|  |             'description': 'md5:d90c21dced7db869a85db89a623998d4', | ||||||
|  |             'duration': 1457.04, | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'season': '1', | ||||||
|  |             'season_number': 1, | ||||||
|  |             'episode_number': 1, | ||||||
|  |         }, | ||||||
|  |         # 'skip': 'This video is only available for registered users' | ||||||
|  |     }] | ||||||
|  |     _NETRC_MACHINE = 'vrtnu' | ||||||
|  |     _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' | ||||||
|  |     _CONTEXT_ID = 'R3595707040' | ||||||
|  | 
 | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         self._login() | ||||||
|  | 
 | ||||||
|  |     def _login(self): | ||||||
|  |         username, password = self._get_login_info() | ||||||
|  |         if username is None: | ||||||
|  |             self.raise_login_required() | ||||||
|  | 
 | ||||||
|  |         auth_data = { | ||||||
|  |             'APIKey': self._APIKEY, | ||||||
|  |             'targetEnv': 'jssdk', | ||||||
|  |             'loginID': username, | ||||||
|  |             'password': password, | ||||||
|  |             'authMode': 'cookie', | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         auth_info = self._gigya_login(auth_data) | ||||||
|  | 
 | ||||||
|  |         # Sometimes authentication fails for no good reason, retry | ||||||
|  |         login_attempt = 1 | ||||||
|  |         while login_attempt <= 3: | ||||||
|  |             try: | ||||||
|  |                 # When requesting a token, no actual token is returned, but the | ||||||
|  |                 # necessary cookies are set. | ||||||
|  |                 self._request_webpage( | ||||||
|  |                     'https://token.vrt.be', | ||||||
|  |                     None, note='Requesting a token', errnote='Could not get a token', | ||||||
|  |                     headers={ | ||||||
|  |                         'Content-Type': 'application/json', | ||||||
|  |                         'Referer': 'https://www.vrt.be/vrtnu/', | ||||||
|  |                     }, | ||||||
|  |                     data=json.dumps({ | ||||||
|  |                         'uid': auth_info['UID'], | ||||||
|  |                         'uidsig': auth_info['UIDSignature'], | ||||||
|  |                         'ts': auth_info['signatureTimestamp'], | ||||||
|  |                         'email': auth_info['profile']['email'], | ||||||
|  |                     }).encode('utf-8')) | ||||||
|  |             except ExtractorError as e: | ||||||
|  |                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | ||||||
|  |                     login_attempt += 1 | ||||||
|  |                     self.report_warning('Authentication failed') | ||||||
|  |                     self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') | ||||||
|  |                 else: | ||||||
|  |                     raise e | ||||||
|  |             else: | ||||||
|  |                 break | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         display_id = self._match_id(url) | ||||||
|  | 
 | ||||||
|  |         webpage = self._download_webpage(url, display_id) | ||||||
|  | 
 | ||||||
|  |         title = self._html_search_regex( | ||||||
|  |             r'(?ms)<h1 class="content__heading">(.+?)</h1>', | ||||||
|  |             webpage, 'title').strip() | ||||||
|  | 
 | ||||||
|  |         description = self._html_search_regex( | ||||||
|  |             r'(?ms)<div class="content__description">(.+?)</div>', | ||||||
|  |             webpage, 'description', default=None) | ||||||
|  | 
 | ||||||
|  |         season = self._html_search_regex( | ||||||
|  |             [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* | ||||||
|  |                     <span>seizoen\ (.+?)</span>\s* | ||||||
|  |                 </div>''', | ||||||
|  |              r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], | ||||||
|  |             webpage, 'season', default=None) | ||||||
|  | 
 | ||||||
|  |         season_number = int_or_none(season) | ||||||
|  | 
 | ||||||
|  |         episode_number = int_or_none(self._html_search_regex( | ||||||
|  |             r'''(?xms)<div\ class="content__episode">\s* | ||||||
|  |                     <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> | ||||||
|  |                 </div>''', | ||||||
|  |             webpage, 'episode_number', default=None)) | ||||||
|  | 
 | ||||||
|  |         release_date = parse_iso8601(self._html_search_regex( | ||||||
|  |             r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', | ||||||
|  |             webpage, 'release_date', default=None)) | ||||||
|  | 
 | ||||||
|  |         # If there's a ? or a # in the URL, remove them and everything after | ||||||
|  |         clean_url = url.split('?')[0].split('#')[0].strip('/') | ||||||
|  |         securevideo_url = clean_url + '.mssecurevideo.json' | ||||||
|  | 
 | ||||||
|  |         json = self._download_json(securevideo_url, display_id) | ||||||
|  |         # We are dealing with a '../<show>.relevant' URL | ||||||
|  |         redirect_url = json.get('url') | ||||||
|  |         if redirect_url: | ||||||
|  |             return self.url_result('https:' + redirect_url) | ||||||
|  |         else: | ||||||
|  |             # There is only one entry, but with an unknown key, so just get | ||||||
|  |             # the first one | ||||||
|  |             video_id = list(json.values())[0].get('videoid') | ||||||
|  | 
 | ||||||
|  |         return { | ||||||
|  |             '_type': 'url_transparent', | ||||||
|  |             'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, | ||||||
|  |             'ie_key': CanvasIE.ie_key(), | ||||||
|  |             'id': video_id, | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'season': season, | ||||||
|  |             'season_number': season_number, | ||||||
|  |             'episode_number': episode_number, | ||||||
|  |             'release_date': release_date, | ||||||
|  |         } | ||||||
|  | |||||||
| @ -150,6 +150,7 @@ from .canalc2 import Canalc2IE | |||||||
| from .canvas import ( | from .canvas import ( | ||||||
|     CanvasIE, |     CanvasIE, | ||||||
|     CanvasEenIE, |     CanvasEenIE, | ||||||
|  |     VrtNUIE, | ||||||
| ) | ) | ||||||
| from .carambatv import ( | from .carambatv import ( | ||||||
|     CarambaTVIE, |     CarambaTVIE, | ||||||
|  | |||||||
							
								
								
									
										22
									
								
								youtube_dl/extractor/gigya.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								youtube_dl/extractor/gigya.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | from .common import InfoExtractor | ||||||
|  | 
 | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     urlencode_postdata, | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class GigyaBaseIE(InfoExtractor): | ||||||
|  |     def _gigya_login(self, auth_data): | ||||||
|  |         auth_info = self._download_json( | ||||||
|  |             'https://accounts.eu1.gigya.com/accounts.login', None, | ||||||
|  |             note='Logging in', errnote='Unable to log in', | ||||||
|  |             data=urlencode_postdata(auth_data)) | ||||||
|  | 
 | ||||||
|  |         error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') | ||||||
|  |         if error_message: | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 'Unable to login: %s' % error_message, expected=True) | ||||||
|  |         return auth_info | ||||||
| @ -2,19 +2,18 @@ from __future__ import unicode_literals | |||||||
| 
 | 
 | ||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .gigya import GigyaBaseIE | ||||||
|  | 
 | ||||||
| from ..compat import compat_str | from ..compat import compat_str | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |  | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     try_get, |     try_get, | ||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     urlencode_postdata, |  | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class MedialaanIE(InfoExtractor): | class MedialaanIE(GigyaBaseIE): | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|                     https?:// |                     https?:// | ||||||
|                         (?:www\.|nieuws\.)? |                         (?:www\.|nieuws\.)? | ||||||
| @ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor): | |||||||
|             'password': password, |             'password': password, | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         auth_info = self._download_json( |         auth_info = self._gigya_login(auth_data) | ||||||
|             'https://accounts.eu1.gigya.com/accounts.login', None, |  | ||||||
|             note='Logging in', errnote='Unable to log in', |  | ||||||
|             data=urlencode_postdata(auth_data)) |  | ||||||
| 
 |  | ||||||
|         error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') |  | ||||||
|         if error_message: |  | ||||||
|             raise ExtractorError( |  | ||||||
|                 'Unable to login: %s' % error_message, expected=True) |  | ||||||
| 
 | 
 | ||||||
|         self._uid = auth_info['UID'] |         self._uid = auth_info['UID'] | ||||||
|         self._uid_signature = auth_info['UIDSignature'] |         self._uid_signature = auth_info['UIDSignature'] | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 mrBliss
						mrBliss