mirror of
				https://code.hackerspace.pl/q3k/youtube-dl
				synced 2025-03-16 11:43:02 +00:00 
			
		
		
		
	Use _download_xml in more extractors
				
					
				
			This commit is contained in:
		
							parent
							
								
									a0088bdf93
								
							
						
					
					
						commit
						1825836235
					
				| @ -1,5 +1,4 @@ | |||||||
| import re | import re | ||||||
| import xml.etree.ElementTree |  | ||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @ -65,18 +64,18 @@ class AppleTrailersIE(InfoExtractor): | |||||||
|         uploader_id = mobj.group('company') |         uploader_id = mobj.group('company') | ||||||
| 
 | 
 | ||||||
|         playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') |         playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') | ||||||
|         playlist_snippet = self._download_webpage(playlist_url, movie) |         def fix_html(s): | ||||||
|         playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet) |             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s) | ||||||
|         playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned) |             s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) | ||||||
|         # The ' in the onClick attributes are not escaped, it couldn't be parsed |             # The ' in the onClick attributes are not escaped, it couldn't be parsed | ||||||
|         # with xml.etree.ElementTree.fromstring |             # like: http://trailers.apple.com/trailers/wb/gravity/ | ||||||
|         # like: http://trailers.apple.com/trailers/wb/gravity/ |             def _clean_json(m): | ||||||
|         def _clean_json(m): |                 return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') | ||||||
|             return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') |             s = re.sub(self._JSON_RE, _clean_json, s) | ||||||
|         playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned) |             s = u'<html>' + s + u'</html>' | ||||||
|         playlist_html = u'<html>' + playlist_cleaned + u'</html>' |             return s | ||||||
|  |         doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | ||||||
| 
 | 
 | ||||||
|         doc = xml.etree.ElementTree.fromstring(playlist_html) |  | ||||||
|         playlist = [] |         playlist = [] | ||||||
|         for li in doc.findall('./div/ul/li'): |         for li in doc.findall('./div/ul/li'): | ||||||
|             on_click = li.find('.//a').attrib['onClick'] |             on_click = li.find('.//a').attrib['onClick'] | ||||||
|  | |||||||
| @ -1,9 +1,9 @@ | |||||||
| import re | import re | ||||||
| import xml.etree.ElementTree |  | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|  |     fix_xml_all_ampersand, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -30,12 +30,10 @@ class ClipsyndicateIE(InfoExtractor): | |||||||
|         # it includes a required token |         # it includes a required token | ||||||
|         flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars') |         flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars') | ||||||
| 
 | 
 | ||||||
|         playlist_page = self._download_webpage( |         pdoc = self._download_xml( | ||||||
|             'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, |             'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, | ||||||
|             video_id, u'Downloading video info')  |             video_id, u'Downloading video info', | ||||||
|         # Fix broken xml |             transform_source=fix_xml_all_ampersand)  | ||||||
|         playlist_page = re.sub('&', '&', playlist_page) |  | ||||||
|         pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8')) |  | ||||||
| 
 | 
 | ||||||
|         track_doc = pdoc.find('trackList/track') |         track_doc = pdoc.find('trackList/track') | ||||||
|         def find_param(name): |         def find_param(name): | ||||||
|  | |||||||
| @ -1,8 +1,10 @@ | |||||||
| import re | import re | ||||||
| import xml.etree.ElementTree |  | ||||||
| import operator | import operator | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     fix_xml_all_ampersand, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class MetacriticIE(InfoExtractor): | class MetacriticIE(InfoExtractor): | ||||||
| @ -23,9 +25,8 @@ class MetacriticIE(InfoExtractor): | |||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         # The xml is not well formatted, there are raw '&' |         # The xml is not well formatted, there are raw '&' | ||||||
|         info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id, |         info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id, | ||||||
|             video_id, u'Downloading info xml').replace('&', '&') |             video_id, u'Downloading info xml', transform_source=fix_xml_all_ampersand) | ||||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) |  | ||||||
| 
 | 
 | ||||||
|         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) |         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) | ||||||
|         formats = [] |         formats = [] | ||||||
|  | |||||||
| @ -1057,3 +1057,8 @@ def month_by_name(name): | |||||||
|         return ENGLISH_NAMES.index(name) + 1 |         return ENGLISH_NAMES.index(name) + 1 | ||||||
|     except ValueError: |     except ValueError: | ||||||
|         return None |         return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def fix_xml_all_ampersand(xml_str): | ||||||
|  |     """Replace all the '&' by '&' in XML""" | ||||||
|  |     return xml_str.replace(u'&', u'&') | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
						Jaime Marquínez Ferrándiz