mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	[generic] Allow multiple matches for generic hits (Fixes #2818)
This commit is contained in:
		
							parent
							
								
									f1f25be6db
								
							
						
					
					
						commit
						b30b8698ea
					
				| @ -637,70 +637,77 @@ class GenericIE(InfoExtractor): | |||||||
|             return self.url_result(smotri_url, 'Smotri') |             return self.url_result(smotri_url, 'Smotri') | ||||||
| 
 | 
 | ||||||
|         # Start with something easy: JW Player in SWFObject |         # Start with something easy: JW Player in SWFObject | ||||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||||
|         if mobj is None: |         if not found: | ||||||
|             # Look for gorilla-vid style embedding |             # Look for gorilla-vid style embedding | ||||||
|             mobj = re.search(r'''(?sx) |             found = re.findall(r'''(?sx) | ||||||
|                 (?: |                 (?: | ||||||
|                     jw_plugins| |                     jw_plugins| | ||||||
|                     JWPlayerOptions| |                     JWPlayerOptions| | ||||||
|                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup |                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup | ||||||
|                 ) |                 ) | ||||||
|                 .*?file\s*:\s*["\'](.*?)["\']''', webpage) |                 .*?file\s*:\s*["\'](.*?)["\']''', webpage) | ||||||
|         if mobj is None: |         if not found: | ||||||
|             # Broaden the search a little bit |             # Broaden the search a little bit | ||||||
|             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) |             found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||||
|         if mobj is None: |         if not found: | ||||||
|             # Broaden the search a little bit: JWPlayer JS loader |             # Broaden the findall a little bit: JWPlayer JS loader | ||||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) |             found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||||
| 
 |         if not found: | ||||||
|         if mobj is None: |  | ||||||
|             # Try to find twitter cards info |             # Try to find twitter cards info | ||||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) |             found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||||
|         if mobj is None: |         if not found: | ||||||
|             # We look for Open Graph info: |             # We look for Open Graph info: | ||||||
|             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) |             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) | ||||||
|             m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) |             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||||
|             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: |             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | ||||||
|             if m_video_type is not None: |             if m_video_type is not None: | ||||||
|                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) |                 found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||||
|         if mobj is None: |         if not found: | ||||||
|             # HTML5 video |             # HTML5 video | ||||||
|             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) |             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage) | ||||||
|         if mobj is None: |         if not found: | ||||||
|             mobj = re.search( |             found = re.findall( | ||||||
|                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' |                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' | ||||||
|                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', |                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', | ||||||
|                 webpage) |                 webpage) | ||||||
|             if mobj: |             if found: | ||||||
|                 new_url = mobj.group(1) |                 new_url = found.group(1) | ||||||
|                 self.report_following_redirect(new_url) |                 self.report_following_redirect(new_url) | ||||||
|                 return { |                 return { | ||||||
|                     '_type': 'url', |                     '_type': 'url', | ||||||
|                     'url': new_url, |                     'url': new_url, | ||||||
|                 } |                 } | ||||||
|         if mobj is None: |         if not found: | ||||||
|             raise ExtractorError('Unsupported URL: %s' % url) |             raise ExtractorError('Unsupported URL: %s' % url) | ||||||
| 
 | 
 | ||||||
|         # It's possible that one of the regexes |         entries = [] | ||||||
|         # matched, but returned an empty group: |         for video_url in found: | ||||||
|         if mobj.group(1) is None: |  | ||||||
|             raise ExtractorError('Did not find a valid video URL at %s' % url) |  | ||||||
| 
 |  | ||||||
|         video_url = mobj.group(1) |  | ||||||
|             video_url = compat_urlparse.urljoin(url, video_url) |             video_url = compat_urlparse.urljoin(url, video_url) | ||||||
|             video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) |             video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||||
| 
 | 
 | ||||||
|             # Sometimes, jwplayer extraction will result in a YouTube URL |             # Sometimes, jwplayer extraction will result in a YouTube URL | ||||||
|             if YoutubeIE.suitable(video_url): |             if YoutubeIE.suitable(video_url): | ||||||
|             return self.url_result(video_url, 'Youtube') |                 entries.append(self.url_result(video_url, 'Youtube')) | ||||||
|  |                 continue | ||||||
| 
 | 
 | ||||||
|             # here's a fun little line of code for you: |             # here's a fun little line of code for you: | ||||||
|             video_id = os.path.splitext(video_id)[0] |             video_id = os.path.splitext(video_id)[0] | ||||||
| 
 | 
 | ||||||
|         return { |             entries.append({ | ||||||
|                 'id': video_id, |                 'id': video_id, | ||||||
|                 'url': video_url, |                 'url': video_url, | ||||||
|                 'uploader': video_uploader, |                 'uploader': video_uploader, | ||||||
|                 'title': video_title, |                 'title': video_title, | ||||||
|  |             }) | ||||||
|  | 
 | ||||||
|  |         if len(entries) == 1: | ||||||
|  |             return entries[1] | ||||||
|  |         else: | ||||||
|  |             for num, e in enumerate(entries, start=1): | ||||||
|  |                 e['title'] = '%s (%d)' % (e['title'], num) | ||||||
|  |             return { | ||||||
|  |                 '_type': 'playlist', | ||||||
|  |                 'entries': entries, | ||||||
|             } |             } | ||||||
|  | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
						Philipp Hagemeister