mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	[generic] Allow multiple matches for generic hits (Fixes #2818)
This commit is contained in:
		
							parent
							
								
									f1f25be6db
								
							
						
					
					
						commit
						b30b8698ea
					
				| @ -637,70 +637,77 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(smotri_url, 'Smotri') | ||||
| 
 | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|             # Look for gorilla-vid style embedding | ||||
|             mobj = re.search(r'''(?sx) | ||||
|             found = re.findall(r'''(?sx) | ||||
|                 (?: | ||||
|                     jw_plugins| | ||||
|                     JWPlayerOptions| | ||||
|                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup | ||||
|                 ) | ||||
|                 .*?file\s*:\s*["\'](.*?)["\']''', webpage) | ||||
|         if mobj is None: | ||||
|         if not found: | ||||
|             # Broaden the search a little bit | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit: JWPlayer JS loader | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||
| 
 | ||||
|         if mobj is None: | ||||
|             found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|             # Broaden the findall a little bit: JWPlayer JS loader | ||||
|             found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||
|         if not found: | ||||
|             # Try to find twitter cards info | ||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
|         if mobj is None: | ||||
|             found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
|         if not found: | ||||
|             # We look for Open Graph info: | ||||
|             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) | ||||
|             m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||
|             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||
|             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | ||||
|             if m_video_type is not None: | ||||
|                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if mobj is None: | ||||
|                 found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if not found: | ||||
|             # HTML5 video | ||||
|             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) | ||||
|         if mobj is None: | ||||
|             mobj = re.search( | ||||
|             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage) | ||||
|         if not found: | ||||
|             found = re.findall( | ||||
|                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' | ||||
|                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', | ||||
|                 webpage) | ||||
|             if mobj: | ||||
|                 new_url = mobj.group(1) | ||||
|             if found: | ||||
|                 new_url = found.group(1) | ||||
|                 self.report_following_redirect(new_url) | ||||
|                 return { | ||||
|                     '_type': 'url', | ||||
|                     'url': new_url, | ||||
|                 } | ||||
|         if mobj is None: | ||||
|         if not found: | ||||
|             raise ExtractorError('Unsupported URL: %s' % url) | ||||
| 
 | ||||
|         # It's possible that one of the regexes | ||||
|         # matched, but returned an empty group: | ||||
|         if mobj.group(1) is None: | ||||
|             raise ExtractorError('Did not find a valid video URL at %s' % url) | ||||
|         entries = [] | ||||
|         for video_url in found: | ||||
|             video_url = compat_urlparse.urljoin(url, video_url) | ||||
|             video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||
| 
 | ||||
|         video_url = mobj.group(1) | ||||
|         video_url = compat_urlparse.urljoin(url, video_url) | ||||
|         video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||
|             # Sometimes, jwplayer extraction will result in a YouTube URL | ||||
|             if YoutubeIE.suitable(video_url): | ||||
|                 entries.append(self.url_result(video_url, 'Youtube')) | ||||
|                 continue | ||||
| 
 | ||||
|         # Sometimes, jwplayer extraction will result in a YouTube URL | ||||
|         if YoutubeIE.suitable(video_url): | ||||
|             return self.url_result(video_url, 'Youtube') | ||||
|             # here's a fun little line of code for you: | ||||
|             video_id = os.path.splitext(video_id)[0] | ||||
| 
 | ||||
|         # here's a fun little line of code for you: | ||||
|         video_id = os.path.splitext(video_id)[0] | ||||
|             entries.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'uploader': video_uploader, | ||||
|                 'title': video_title, | ||||
|             }) | ||||
| 
 | ||||
|         if len(entries) == 1: | ||||
|             return entries[1] | ||||
|         else: | ||||
|             for num, e in enumerate(entries, start=1): | ||||
|                 e['title'] = '%s (%d)' % (e['title'], num) | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'entries': entries, | ||||
|             } | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|         } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
						Philipp Hagemeister