mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	[xhamster] Extract all formats and fix duration extraction (#13593)
This commit is contained in:
		
							parent
							
								
									00e5c36315
								
							
						
					
					
						commit
						d852c6bc59
					
				| @ -3,6 +3,7 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..compat import compat_str | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     clean_html, |     clean_html, | ||||||
|     dict_get, |     dict_get, | ||||||
| @ -28,6 +29,7 @@ class XHamsterIE(InfoExtractor): | |||||||
|         'md5': '8281348b8d3c53d39fffb377d24eac4e', |         'md5': '8281348b8d3c53d39fffb377d24eac4e', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1509445', |             'id': '1509445', | ||||||
|  |             'display_id': 'femaleagent_shy_beauty_takes_the_bait', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'FemaleAgent Shy beauty takes the bait', |             'title': 'FemaleAgent Shy beauty takes the bait', | ||||||
|             'upload_date': '20121014', |             'upload_date': '20121014', | ||||||
| @ -40,6 +42,7 @@ class XHamsterIE(InfoExtractor): | |||||||
|         'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', |         'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '2221348', |             'id': '2221348', | ||||||
|  |             'display_id': 'britney_spears_sexy_booty', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Britney Spears  Sexy Booty', |             'title': 'Britney Spears  Sexy Booty', | ||||||
|             'upload_date': '20130914', |             'upload_date': '20130914', | ||||||
| @ -81,18 +84,7 @@ class XHamsterIE(InfoExtractor): | |||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         def extract_video_url(webpage, name): |  | ||||||
|             return self._search_regex( |  | ||||||
|                 [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''', |  | ||||||
|                  r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''', |  | ||||||
|                  r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''], |  | ||||||
|                 webpage, name, group='mp4') |  | ||||||
| 
 |  | ||||||
|         def is_hd(webpage): |  | ||||||
|             return '<div class=\'icon iconHD\'' in webpage |  | ||||||
| 
 |  | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
| 
 |  | ||||||
|         video_id = mobj.group('id') or mobj.group('id_2') |         video_id = mobj.group('id') or mobj.group('id_2') | ||||||
|         display_id = mobj.group('display_id') or mobj.group('display_id_2') |         display_id = mobj.group('display_id') or mobj.group('display_id_2') | ||||||
| 
 | 
 | ||||||
| @ -110,6 +102,39 @@ class XHamsterIE(InfoExtractor): | |||||||
|              r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'], |              r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'], | ||||||
|             webpage, 'title') |             webpage, 'title') | ||||||
| 
 | 
 | ||||||
|  |         formats = [] | ||||||
|  |         format_urls = set() | ||||||
|  | 
 | ||||||
|  |         sources = self._parse_json( | ||||||
|  |             self._search_regex( | ||||||
|  |                 r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources', | ||||||
|  |                 default='{}'), | ||||||
|  |             video_id, fatal=False) | ||||||
|  |         for format_id, format_url in sources.items(): | ||||||
|  |             if not isinstance(format_url, compat_str): | ||||||
|  |                 continue | ||||||
|  |             if format_url in format_urls: | ||||||
|  |                 continue | ||||||
|  |             format_urls.add(format_url) | ||||||
|  |             formats.append({ | ||||||
|  |                 'format_id': format_id, | ||||||
|  |                 'url': format_url, | ||||||
|  |                 'height': int_or_none(self._search_regex( | ||||||
|  |                     r'^(\d+)[pP]', format_id, 'height', default=None)) | ||||||
|  |             }) | ||||||
|  | 
 | ||||||
|  |         video_url = self._search_regex( | ||||||
|  |             [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''', | ||||||
|  |              r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''', | ||||||
|  |              r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''], | ||||||
|  |             webpage, 'video url', group='mp4', default=None) | ||||||
|  |         if video_url and video_url not in format_urls: | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': video_url, | ||||||
|  |             }) | ||||||
|  | 
 | ||||||
|  |         self._sort_formats(formats) | ||||||
|  | 
 | ||||||
|         # Only a few videos have an description |         # Only a few videos have an description | ||||||
|         mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) |         mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) | ||||||
|         description = mobj.group(1) if mobj else None |         description = mobj.group(1) if mobj else None | ||||||
| @ -128,7 +153,8 @@ class XHamsterIE(InfoExtractor): | |||||||
|             webpage, 'thumbnail', fatal=False, group='thumbnail') |             webpage, 'thumbnail', fatal=False, group='thumbnail') | ||||||
| 
 | 
 | ||||||
|         duration = parse_duration(self._search_regex( |         duration = parse_duration(self._search_regex( | ||||||
|             r'Runtime:\s*</span>\s*([\d:]+)', webpage, |             [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']', | ||||||
|  |              r'Runtime:\s*</span>\s*([\d:]+)'], webpage, | ||||||
|             'duration', fatal=False)) |             'duration', fatal=False)) | ||||||
| 
 | 
 | ||||||
|         view_count = int_or_none(self._search_regex( |         view_count = int_or_none(self._search_regex( | ||||||
| @ -143,30 +169,6 @@ class XHamsterIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|         age_limit = self._rta_search(webpage) |         age_limit = self._rta_search(webpage) | ||||||
| 
 | 
 | ||||||
|         hd = is_hd(webpage) |  | ||||||
| 
 |  | ||||||
|         format_id = 'hd' if hd else 'sd' |  | ||||||
| 
 |  | ||||||
|         video_url = extract_video_url(webpage, format_id) |  | ||||||
|         formats = [{ |  | ||||||
|             'url': video_url, |  | ||||||
|             'format_id': 'hd' if hd else 'sd', |  | ||||||
|             'preference': 1, |  | ||||||
|         }] |  | ||||||
| 
 |  | ||||||
|         if not hd: |  | ||||||
|             mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url') |  | ||||||
|             webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage') |  | ||||||
|             if is_hd(webpage): |  | ||||||
|                 video_url = extract_video_url(webpage, 'hd') |  | ||||||
|                 formats.append({ |  | ||||||
|                     'url': video_url, |  | ||||||
|                     'format_id': 'hd', |  | ||||||
|                     'preference': 2, |  | ||||||
|                 }) |  | ||||||
| 
 |  | ||||||
|         self._sort_formats(formats) |  | ||||||
| 
 |  | ||||||
|         categories_html = self._search_regex( |         categories_html = self._search_regex( | ||||||
|             r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage, |             r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage, | ||||||
|             'categories', default=None) |             'categories', default=None) | ||||||
| @ -175,6 +177,7 @@ class XHamsterIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|  |             'display_id': display_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'description': description, |             'description': description, | ||||||
|             'upload_date': upload_date, |             'upload_date': upload_date, | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Sergey M․
						Sergey M․