hbmartin commited on
Commit
4245a85
·
unverified ·
2 Parent(s): 86b69559a04477

Merge pull request #11 from hbmartin/resolution-selection

Browse files
README.md CHANGED
@@ -244,6 +244,9 @@ Finally, if you're filing a bug report, the cli contains a switch called ``--bui
244
 
245
  <a href="https://deepsource.io/gh/hbmartin/pytube3/?ref=repository-badge" target="_blank"><img alt="DeepSource" title="DeepSource" src="https://static.deepsource.io/deepsource-badge-light-mini.svg"></a>
246
  <a href="https://www.codacy.com/manual/hbmartin/pytube3?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=hbmartin/pytube3&amp;utm_campaign=Badge_Grade"><img src="https://api.codacy.com/project/badge/Grade/53794f06983a46829620b3284c6a5596"/></a>
 
 
 
247
 
248
  Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
249
 
 
244
 
245
  <a href="https://deepsource.io/gh/hbmartin/pytube3/?ref=repository-badge" target="_blank"><img alt="DeepSource" title="DeepSource" src="https://static.deepsource.io/deepsource-badge-light-mini.svg"></a>
246
  <a href="https://www.codacy.com/manual/hbmartin/pytube3?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=hbmartin/pytube3&amp;utm_campaign=Badge_Grade"><img src="https://api.codacy.com/project/badge/Grade/53794f06983a46829620b3284c6a5596"/></a>
247
+ <a href="https://codecov.io/gh/hbmartin/pytube3">
248
+ <img src="https://codecov.io/gh/hbmartin/pytube3/branch/master/graph/badge.svg" />
249
+ </a>
250
 
251
  Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
252
 
pytube/__main__.py CHANGED
@@ -61,7 +61,7 @@ class YouTube:
61
 
62
  # the url to vid info, parsed from watch html
63
  self.vid_info_url: Optional[str] = None
64
- self.vid_info_raw = None # content fetched by vid_info_url
65
  self.vid_info: Optional[Dict] = None # parsed content of vid_info_raw
66
 
67
  self.watch_html: Optional[str] = None # the html of /watch?v=<video_id>
 
61
 
62
  # the url to vid info, parsed from watch html
63
  self.vid_info_url: Optional[str] = None
64
+ self.vid_info_raw: Optional[str] = None # content fetched by vid_info_url
65
  self.vid_info: Optional[Dict] = None # parsed content of vid_info_raw
66
 
67
  self.watch_html: Optional[str] = None # the html of /watch?v=<video_id>
pytube/captions.py CHANGED
@@ -25,7 +25,7 @@ class Caption:
25
  self.code = caption_track["languageCode"]
26
 
27
  @property
28
- def xml_captions(self):
29
  """Download the xml caption tracks."""
30
  return request.get(self.url)
31
 
 
25
  self.code = caption_track["languageCode"]
26
 
27
  @property
28
+ def xml_captions(self) -> str:
29
  """Download the xml caption tracks."""
30
  return request.get(self.url)
31
 
pytube/cli.py CHANGED
@@ -11,7 +11,7 @@ import sys
11
  from io import BufferedWriter
12
  from typing import Tuple, Any, Optional, List
13
 
14
- from pytube import __version__, CaptionQuery
15
  from pytube import YouTube
16
 
17
 
@@ -36,9 +36,11 @@ def main():
36
  if args.build_playback_report:
37
  build_playback_report(youtube)
38
  if args.itag:
39
- download(youtube=youtube, itag=args.itag)
40
  if hasattr(args, "caption_code"):
41
  download_caption(youtube=youtube, lang_code=args.caption_code)
 
 
42
 
43
 
44
  def _parse_args(
@@ -51,6 +53,9 @@ def _parse_args(
51
  parser.add_argument(
52
  "--itag", type=int, help="The itag for the desired stream",
53
  )
 
 
 
54
  parser.add_argument(
55
  "-l",
56
  "--list",
@@ -166,12 +171,18 @@ def on_progress(
166
  display_progress_bar(bytes_received, filesize)
167
 
168
 
169
- def download(youtube: YouTube, itag: int) -> None:
 
 
 
 
 
 
170
  """Start downloading a YouTube video.
171
 
172
  :param YouTube youtube:
173
  A valid YouTube object.
174
- :param str itag:
175
  YouTube format identifier code.
176
 
177
  """
@@ -185,10 +196,39 @@ def download(youtube: YouTube, itag: int) -> None:
185
  sys.exit()
186
 
187
  youtube.register_on_progress_callback(on_progress)
188
- print("\n{fn} | {fs} bytes".format(fn=stream.default_filename, fs=stream.filesize,))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  try:
190
- stream.download()
191
- sys.stdout.write("\n")
192
  except KeyboardInterrupt:
193
  sys.exit()
194
 
 
11
  from io import BufferedWriter
12
  from typing import Tuple, Any, Optional, List
13
 
14
+ from pytube import __version__, CaptionQuery, Stream
15
  from pytube import YouTube
16
 
17
 
 
36
  if args.build_playback_report:
37
  build_playback_report(youtube)
38
  if args.itag:
39
+ download_by_itag(youtube=youtube, itag=args.itag)
40
  if hasattr(args, "caption_code"):
41
  download_caption(youtube=youtube, lang_code=args.caption_code)
42
+ if args.resolution:
43
+ download_by_resolution(youtube=youtube, resolution=args.resolution)
44
 
45
 
46
  def _parse_args(
 
53
  parser.add_argument(
54
  "--itag", type=int, help="The itag for the desired stream",
55
  )
56
+ parser.add_argument(
57
+ "-r", "--resolution", type=str, help="The resolution for the desired stream",
58
+ )
59
  parser.add_argument(
60
  "-l",
61
  "--list",
 
171
  display_progress_bar(bytes_received, filesize)
172
 
173
 
174
+ def _download(stream: Stream) -> None:
175
+ print("\n{fn} | {fs} bytes".format(fn=stream.default_filename, fs=stream.filesize))
176
+ stream.download()
177
+ sys.stdout.write("\n")
178
+
179
+
180
+ def download_by_itag(youtube: YouTube, itag: int) -> None:
181
  """Start downloading a YouTube video.
182
 
183
  :param YouTube youtube:
184
  A valid YouTube object.
185
+ :param int itag:
186
  YouTube format identifier code.
187
 
188
  """
 
196
  sys.exit()
197
 
198
  youtube.register_on_progress_callback(on_progress)
199
+
200
+ try:
201
+ _download(stream)
202
+ except KeyboardInterrupt:
203
+ sys.exit()
204
+
205
+
206
+ def download_by_resolution(youtube: YouTube, resolution: str) -> None:
207
+ """Start downloading a YouTube video.
208
+
209
+ :param YouTube youtube:
210
+ A valid YouTube object.
211
+ :param str resolution:
212
+ YouTube video resolution.
213
+
214
+ """
215
+ # TODO(nficano): allow download target to be specified
216
+ # TODO(nficano): allow dash itags to be selected
217
+ stream = youtube.streams.get_by_resolution(resolution)
218
+ if stream is None:
219
+ print(
220
+ "Could not find a stream with resolution: {resolution}".format(
221
+ resolution=resolution
222
+ )
223
+ )
224
+ print("Try one of these:")
225
+ display_streams(youtube)
226
+ sys.exit()
227
+
228
+ youtube.register_on_progress_callback(on_progress)
229
+
230
  try:
231
+ _download(stream)
 
232
  except KeyboardInterrupt:
233
  sys.exit()
234
 
pytube/contrib/playlist.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
  import re
7
  from collections import OrderedDict
8
  from typing import List, Optional
 
9
 
10
  from pytube import request
11
  from pytube.__main__ import YouTube
@@ -19,40 +20,28 @@ class Playlist:
19
  """
20
 
21
  def __init__(self, url: str, suppress_exception: bool = False):
22
- self.playlist_url = url
23
  self.video_urls: List[str] = []
24
  self.suppress_exception = suppress_exception
 
25
 
26
- def construct_playlist_url(self) -> str:
27
- """There are two kinds of playlist urls in YouTube. One that contains
28
- watch?v= in URL, another one contains the "playlist?list=" portion. It
29
- is preferable to work with the later one.
30
-
31
- :return: playlist url
32
- """
33
-
34
- if "watch?v=" in self.playlist_url:
35
  base_url = "https://www.youtube.com/playlist?list="
36
- playlist_code = self.playlist_url.split("&list=")[1]
37
- return base_url + playlist_code
38
-
39
- # url is already in the desired format, so just return it
40
- return self.playlist_url
41
 
42
  @staticmethod
43
- def _load_more_url(req):
44
  """Given an html page or a fragment thereof, looks for
45
  and returns the "load more" url if found.
46
  """
47
- try:
48
- load_more_url = "https://www.youtube.com" + re.search(
49
- r"data-uix-load-more-href=\"(/browse_ajax\?"
50
- 'action_continuation=.*?)"',
51
- req,
52
- ).group(1)
53
- except AttributeError:
54
- load_more_url = ""
55
- return load_more_url
56
 
57
  def parse_links(self) -> List[str]:
58
  """Parse the video links from the page source, extracts and
@@ -60,8 +49,7 @@ class Playlist:
60
  It's an alternative for BeautifulSoup
61
  """
62
 
63
- url = self.construct_playlist_url()
64
- req = request.get(url)
65
 
66
  # split the page source by line and process each line
67
  content = [x for x in req.split("\n") if "pl-video-title-link" in x]
@@ -69,8 +57,8 @@ class Playlist:
69
 
70
  # The above only returns 100 or fewer links
71
  # Simulating a browser request for the load more link
72
- load_more_url = self._load_more_url(req)
73
- while len(load_more_url) > 0: # there is an url found
74
  logger.debug("load more url: %s", load_more_url)
75
  req = request.get(load_more_url)
76
  load_more = json.loads(req)
@@ -79,11 +67,13 @@ class Playlist:
79
  )
80
  # remove duplicates
81
  link_list.extend(list(OrderedDict.fromkeys(videos)))
82
- load_more_url = self._load_more_url(load_more["load_more_widget_html"],)
 
 
83
 
84
  return link_list
85
 
86
- def populate_video_urls(self):
87
  """Construct complete links of all the videos in playlist and
88
  populate video_urls list
89
 
@@ -120,13 +110,12 @@ class Playlist:
120
  download_path: Optional[str] = None,
121
  prefix_number: bool = True,
122
  reverse_numbering: bool = False,
 
123
  ) -> None:
124
  """Download all the videos in the the playlist. Initially, download
125
  resolution is 720p (or highest available), later more option
126
  should be added to download resolution of choice
127
 
128
- TODO(nficano): Add option to download resolution of user's choice
129
-
130
  :param download_path:
131
  (optional) Output path for the playlist If one is not
132
  specified, defaults to the current working directory.
@@ -140,6 +129,9 @@ class Playlist:
140
  (optional) Lets you number playlists in reverse, since some
141
  playlists are ordered newest -> oldest.
142
  :type reverse_numbering: bool
 
 
 
143
  """
144
 
145
  self.populate_video_urls()
@@ -156,14 +148,11 @@ class Playlist:
156
  if not self.suppress_exception:
157
  raise e
158
  else:
159
- # TODO: this should not be hardcoded to a single user's
160
- # preference
161
  dl_stream = (
162
- yt.streams.filter(progressive=True, subtype="mp4",)
163
- .order_by("resolution")
164
- .desc()
165
- .first()
166
  )
 
167
 
168
  logger.debug("download path: %s", download_path)
169
  if prefix_number:
@@ -176,8 +165,7 @@ class Playlist:
176
 
177
  def title(self) -> Optional[str]:
178
  """return playlist title (name)"""
179
- url = self.construct_playlist_url()
180
- req = request.get(url)
181
  open_tag = "<title>"
182
  end_tag = "</title>"
183
  pattern = re.compile(open_tag + "(.+?)" + end_tag)
 
6
  import re
7
  from collections import OrderedDict
8
  from typing import List, Optional
9
+ from urllib.parse import parse_qs
10
 
11
  from pytube import request
12
  from pytube.__main__ import YouTube
 
20
  """
21
 
22
  def __init__(self, url: str, suppress_exception: bool = False):
 
23
  self.video_urls: List[str] = []
24
  self.suppress_exception = suppress_exception
25
+ self.playlist_url: str = url
26
 
27
+ if "watch?v=" in url:
 
 
 
 
 
 
 
 
28
  base_url = "https://www.youtube.com/playlist?list="
29
+ query_parameters = parse_qs(url.split("?")[1])
30
+ self.playlist_url = base_url + query_parameters["list"][0]
 
 
 
31
 
32
  @staticmethod
33
+ def _find_load_more_url(req: str) -> Optional[str]:
34
  """Given an html page or a fragment thereof, looks for
35
  and returns the "load more" url if found.
36
  """
37
+ match = re.search(
38
+ r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
39
+ req,
40
+ )
41
+ if match:
42
+ return "https://www.youtube.com" + match.group(1)
43
+
44
+ return None
 
45
 
46
  def parse_links(self) -> List[str]:
47
  """Parse the video links from the page source, extracts and
 
49
  It's an alternative for BeautifulSoup
50
  """
51
 
52
+ req = request.get(self.playlist_url)
 
53
 
54
  # split the page source by line and process each line
55
  content = [x for x in req.split("\n") if "pl-video-title-link" in x]
 
57
 
58
  # The above only returns 100 or fewer links
59
  # Simulating a browser request for the load more link
60
+ load_more_url = self._find_load_more_url(req)
61
+ while load_more_url: # there is an url found
62
  logger.debug("load more url: %s", load_more_url)
63
  req = request.get(load_more_url)
64
  load_more = json.loads(req)
 
67
  )
68
  # remove duplicates
69
  link_list.extend(list(OrderedDict.fromkeys(videos)))
70
+ load_more_url = self._find_load_more_url(
71
+ load_more["load_more_widget_html"],
72
+ )
73
 
74
  return link_list
75
 
76
+ def populate_video_urls(self) -> None:
77
  """Construct complete links of all the videos in playlist and
78
  populate video_urls list
79
 
 
110
  download_path: Optional[str] = None,
111
  prefix_number: bool = True,
112
  reverse_numbering: bool = False,
113
+ resolution: str = "720p",
114
  ) -> None:
115
  """Download all the videos in the the playlist. Initially, download
116
  resolution is 720p (or highest available), later more option
117
  should be added to download resolution of choice
118
 
 
 
119
  :param download_path:
120
  (optional) Output path for the playlist If one is not
121
  specified, defaults to the current working directory.
 
129
  (optional) Lets you number playlists in reverse, since some
130
  playlists are ordered newest -> oldest.
131
  :type reverse_numbering: bool
132
+ :param resolution:
133
+ Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
134
+ :type resolution: str
135
  """
136
 
137
  self.populate_video_urls()
 
148
  if not self.suppress_exception:
149
  raise e
150
  else:
 
 
151
  dl_stream = (
152
+ yt.streams.get_by_resolution(resolution=resolution)
153
+ or yt.streams.get_lowest_resolution()
 
 
154
  )
155
+ assert dl_stream is not None
156
 
157
  logger.debug("download path: %s", download_path)
158
  if prefix_number:
 
165
 
166
  def title(self) -> Optional[str]:
167
  """return playlist title (name)"""
168
+ req = request.get(self.playlist_url)
 
169
  open_tag = "<title>"
170
  end_tag = "</title>"
171
  pattern = re.compile(open_tag + "(.+?)" + end_tag)
pytube/helpers.py CHANGED
@@ -1,9 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
  """Various helper functions implemented by pytube."""
3
-
4
  import logging
5
  import pprint
6
  import re
 
7
 
8
  from pytube.exceptions import RegexMatchError
9
 
@@ -99,3 +100,11 @@ def create_logger(level: int = logging.ERROR) -> logging.Logger:
99
  logger.addHandler(handler)
100
  logger.setLevel(level)
101
  return logger
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
  """Various helper functions implemented by pytube."""
3
+ import functools
4
  import logging
5
  import pprint
6
  import re
7
+ from typing import TypeVar, Callable
8
 
9
  from pytube.exceptions import RegexMatchError
10
 
 
100
  logger.addHandler(handler)
101
  logger.setLevel(level)
102
  return logger
103
+
104
+
105
+ GenericType = TypeVar("GenericType")
106
+
107
+
108
+ def cache(func: Callable[..., GenericType]) -> GenericType:
109
+ """ mypy compatible annotation wrapper for lru_cache"""
110
+ return functools.lru_cache()(func) # type: ignore
pytube/itags.py CHANGED
@@ -98,6 +98,30 @@ _3D = [82, 83, 84, 85, 100, 101, 102]
98
  LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
99
  DASH_MP4_VIDEO = [133, 134, 135, 136, 137, 138, 160, 212, 264, 266, 298, 299]
100
  DASH_MP4_AUDIO = [139, 140, 141, 256, 258, 325, 328]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
 
103
  def get_format_profile(itag: int) -> Dict:
@@ -118,4 +142,8 @@ def get_format_profile(itag: int) -> Dict:
118
  "is_3d": itag in _3D,
119
  "is_hdr": itag in HDR,
120
  "fps": 60 if itag in _60FPS else 30,
 
 
 
 
121
  }
 
98
  LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
99
  DASH_MP4_VIDEO = [133, 134, 135, 136, 137, 138, 160, 212, 264, 266, 298, 299]
100
  DASH_MP4_AUDIO = [139, 140, 141, 256, 258, 325, 328]
101
+ DASH_WEBM_VIDEO = [
102
+ 167,
103
+ 168,
104
+ 169,
105
+ 170,
106
+ 218,
107
+ 219,
108
+ 278,
109
+ 242,
110
+ 243,
111
+ 244,
112
+ 245,
113
+ 246,
114
+ 247,
115
+ 248,
116
+ 271,
117
+ 272,
118
+ 302,
119
+ 303,
120
+ 308,
121
+ 313,
122
+ 315,
123
+ ]
124
+ DASH_WEBM_AUDIO = [171, 172, 249, 250, 251]
125
 
126
 
127
  def get_format_profile(itag: int) -> Dict:
 
142
  "is_3d": itag in _3D,
143
  "is_hdr": itag in HDR,
144
  "fps": 60 if itag in _60FPS else 30,
145
+ "is_dash": itag in DASH_MP4_VIDEO
146
+ or itag in DASH_MP4_AUDIO
147
+ or itag in DASH_WEBM_VIDEO
148
+ or itag in DASH_WEBM_AUDIO,
149
  }
pytube/query.py CHANGED
@@ -34,6 +34,7 @@ class StreamQuery:
34
  only_video=None,
35
  progressive=None,
36
  adaptive=None,
 
37
  custom_filter_functions=None,
38
  ):
39
  """Apply the given filtering criterion.
@@ -103,6 +104,9 @@ class StreamQuery:
103
  Excludes progressive streams (audio and video are on separate
104
  tracks).
105
 
 
 
 
106
  :param bool only_audio:
107
  Excludes streams with video tracks.
108
 
@@ -161,6 +165,9 @@ class StreamQuery:
161
  for fn in custom_filter_functions:
162
  filters.append(fn)
163
 
 
 
 
164
  fmt_streams = self.fmt_streams
165
  for fn in filters:
166
  fmt_streams = list(filter(fn, fmt_streams))
@@ -221,7 +228,7 @@ class StreamQuery:
221
  """
222
  return self
223
 
224
- def get_by_itag(self, itag) -> Optional[Stream]:
225
  """Get the corresponding :class:`Stream <Stream>` for a given itag.
226
 
227
  :param int itag:
@@ -234,6 +241,38 @@ class StreamQuery:
234
  """
235
  return self.itag_index.get(int(itag))
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  def first(self) -> Optional[Stream]:
238
  """Get the first :class:`Stream <Stream>` in the results.
239
 
 
34
  only_video=None,
35
  progressive=None,
36
  adaptive=None,
37
+ is_dash=None,
38
  custom_filter_functions=None,
39
  ):
40
  """Apply the given filtering criterion.
 
104
  Excludes progressive streams (audio and video are on separate
105
  tracks).
106
 
107
+ :param bool is_dash:
108
+ Include/exclude dash streams.
109
+
110
  :param bool only_audio:
111
  Excludes streams with video tracks.
112
 
 
165
  for fn in custom_filter_functions:
166
  filters.append(fn)
167
 
168
+ if is_dash is not None:
169
+ filters.append(lambda s: s.is_dash == is_dash)
170
+
171
  fmt_streams = self.fmt_streams
172
  for fn in filters:
173
  fmt_streams = list(filter(fn, fmt_streams))
 
228
  """
229
  return self
230
 
231
+ def get_by_itag(self, itag: int) -> Optional[Stream]:
232
  """Get the corresponding :class:`Stream <Stream>` for a given itag.
233
 
234
  :param int itag:
 
241
  """
242
  return self.itag_index.get(int(itag))
243
 
244
+ def get_by_resolution(self, resolution: str) -> Optional[Stream]:
245
+ """Get the corresponding :class:`Stream <Stream>` for a given resolution.
246
+ Stream must be a progressive mp4.
247
+
248
+ :param str resolution:
249
+ Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
250
+ :rtype: :class:`Stream <Stream>` or None
251
+ :returns:
252
+ The :class:`Stream <Stream>` matching the given itag or None if
253
+ not found.
254
+
255
+ """
256
+ return self.filter(
257
+ progressive=True, subtype="mp4", resolution=resolution
258
+ ).first()
259
+
260
+ def get_lowest_resolution(self) -> Optional[Stream]:
261
+ """Get lowest resolution stream that is a progressive mp4.
262
+
263
+ :rtype: :class:`Stream <Stream>` or None
264
+ :returns:
265
+ The :class:`Stream <Stream>` matching the given itag or None if
266
+ not found.
267
+
268
+ """
269
+ return (
270
+ self.filter(progressive=True, subtype="mp4")
271
+ .order_by("resolution")
272
+ .desc()
273
+ .last()
274
+ )
275
+
276
  def first(self) -> Optional[Stream]:
277
  """Get the first :class:`Stream <Stream>` in the results.
278
 
pytube/request.py CHANGED
@@ -1,39 +1,51 @@
1
  # -*- coding: utf-8 -*-
2
  """Implements a simple wrapper around urlopen."""
 
3
  from urllib.request import Request
4
  from urllib.request import urlopen
5
 
6
 
7
- def get(url, headers=False, streaming=False, chunk_size=8192):
 
 
 
 
 
 
8
  """Send an http GET request.
9
 
10
  :param str url:
11
  The URL to perform the GET request for.
12
- :param bool headers:
13
- Only return the http headers.
14
- :param bool streaming:
15
- Returns the response body in chunks via a generator.
16
- :param int chunk_size:
17
- The size in bytes of each chunk. Defaults to 8*1024
18
  """
 
19
 
20
- req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
21
- response = urlopen(req)
22
-
23
- if streaming:
24
- return stream_response(response, chunk_size)
25
 
26
- if headers:
27
- # https://github.com/nficano/pytube/issues/160
28
- return {k.lower(): v for k, v in response.info().items()}
29
-
30
- return response.read().decode("utf-8")
31
-
32
-
33
- def stream_response(response, chunk_size=8 * 1024):
34
- """Read the response in chunks."""
35
  while True:
36
  buf = response.read(chunk_size)
37
  if not buf:
38
  break
39
  yield buf
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
  """Implements a simple wrapper around urlopen."""
3
+ from typing import Any, Iterable, Dict
4
  from urllib.request import Request
5
  from urllib.request import urlopen
6
 
7
 
8
+ def _execute_request(url: str) -> Any:
9
+ if not url.lower().startswith("http"):
10
+ raise ValueError
11
+ return urlopen(Request(url, headers={"User-Agent": "Mozilla/5.0"})) # nosec
12
+
13
+
14
+ def get(url) -> str:
15
  """Send an http GET request.
16
 
17
  :param str url:
18
  The URL to perform the GET request for.
19
+ :rtype: str
20
+ :returns:
21
+ UTF-8 encoded string of response
 
 
 
22
  """
23
+ return _execute_request(url).read().decode("utf-8")
24
 
 
 
 
 
 
25
 
26
+ def stream(url: str, chunk_size: int = 8192) -> Iterable[bytes]:
27
+ """Read the response in chunks.
28
+ :param str url:
29
+ The URL to perform the GET request for.
30
+ :param int chunk_size:
31
+ The size in bytes of each chunk. Defaults to 8*1024
32
+ :rtype: Iterable[bytes]
33
+ """
34
+ response = _execute_request(url)
35
  while True:
36
  buf = response.read(chunk_size)
37
  if not buf:
38
  break
39
  yield buf
40
+
41
+
42
+ def headers(url: str) -> Dict:
43
+ """Fetch headers returned http GET request.
44
+
45
+ :param str url:
46
+ The URL to perform the GET request for.
47
+ :rtype: dict
48
+ :returns:
49
+ dictionary of lowercase headers
50
+ """
51
+ return {k.lower(): v for k, v in _execute_request(url).info().items()}
pytube/streams.py CHANGED
@@ -59,6 +59,7 @@ class Stream:
59
  self.codecs: List[str] = [] # audio/video encoders (e.g.: vp8, mp4a)
60
  self.audio_codec = None # audio codec of the stream (e.g.: vorbis)
61
  self.video_codec = None # video codec of the stream (e.g.: vp8)
 
62
 
63
  # Iterates over the key/values of stream and sets them as class
64
  # attributes. This is an anti-pattern and should be removed.
@@ -118,9 +119,7 @@ class Stream:
118
 
119
  :rtype: bool
120
  """
121
- if self.is_progressive:
122
- return True
123
- return self.type == "audio"
124
 
125
  @property
126
  def includes_video_track(self) -> bool:
@@ -128,9 +127,7 @@ class Stream:
128
 
129
  :rtype: bool
130
  """
131
- if self.is_progressive:
132
- return True
133
- return self.type == "video"
134
 
135
  def parse_codecs(self) -> Tuple:
136
  """Get the video/audio codecs from list of codecs.
@@ -164,7 +161,7 @@ class Stream:
164
  Filesize (in bytes) of the stream.
165
  """
166
  if self._filesize is None:
167
- headers = request.get(self.url, headers=True)
168
  self._filesize = int(headers["content-length"])
169
  return self._filesize
170
 
@@ -243,7 +240,7 @@ class Stream:
243
  )
244
 
245
  with open(file_path, "wb") as fh:
246
- for chunk in request.get(self.url, streaming=True):
247
  # reduce the (bytes) remainder by the length of the chunk.
248
  bytes_remaining -= len(chunk)
249
  # send to the on_progress callback.
@@ -262,7 +259,7 @@ class Stream:
262
  "downloading (%s total bytes) file to BytesIO buffer", self.filesize,
263
  )
264
 
265
- for chunk in request.get(self.url, streaming=True):
266
  # reduce the (bytes) remainder by the length of the chunk.
267
  bytes_remaining -= len(chunk)
268
  # send to the on_progress callback.
 
59
  self.codecs: List[str] = [] # audio/video encoders (e.g.: vp8, mp4a)
60
  self.audio_codec = None # audio codec of the stream (e.g.: vorbis)
61
  self.video_codec = None # video codec of the stream (e.g.: vp8)
62
+ self.is_dash: Optional[bool] = None
63
 
64
  # Iterates over the key/values of stream and sets them as class
65
  # attributes. This is an anti-pattern and should be removed.
 
119
 
120
  :rtype: bool
121
  """
122
+ return self.is_progressive or self.type == "audio"
 
 
123
 
124
  @property
125
  def includes_video_track(self) -> bool:
 
127
 
128
  :rtype: bool
129
  """
130
+ return self.is_progressive or self.type == "video"
 
 
131
 
132
  def parse_codecs(self) -> Tuple:
133
  """Get the video/audio codecs from list of codecs.
 
161
  Filesize (in bytes) of the stream.
162
  """
163
  if self._filesize is None:
164
+ headers = request.headers(self.url)
165
  self._filesize = int(headers["content-length"])
166
  return self._filesize
167
 
 
240
  )
241
 
242
  with open(file_path, "wb") as fh:
243
+ for chunk in request.stream(self.url):
244
  # reduce the (bytes) remainder by the length of the chunk.
245
  bytes_remaining -= len(chunk)
246
  # send to the on_progress callback.
 
259
  "downloading (%s total bytes) file to BytesIO buffer", self.filesize,
260
  )
261
 
262
+ for chunk in request.stream(self.url):
263
  # reduce the (bytes) remainder by the length of the chunk.
264
  bytes_remaining -= len(chunk)
265
  # send to the on_progress callback.
tests/conftest.py CHANGED
@@ -50,3 +50,15 @@ def age_restricted():
50
  """Youtube instance initialized with video id zRbsm3e2ltw."""
51
  filename = "yt-video-zRbsm3e2ltw-1507777044.json.gz"
52
  return load_playback_file(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  """Youtube instance initialized with video id zRbsm3e2ltw."""
51
  filename = "yt-video-zRbsm3e2ltw-1507777044.json.gz"
52
  return load_playback_file(filename)
53
+
54
+
55
+ @pytest.fixture
56
+ def playlist_html():
57
+ """Youtube playlist HTML loaded on 2020-01-25 from
58
+ https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr"""
59
+ file_path = os.path.join(
60
+ os.path.dirname(os.path.realpath(__file__)), "mocks", "playlist.html"
61
+ )
62
+ with open(file_path, encoding="utf-8") as f:
63
+ read_data = f.read()
64
+ return read_data
tests/contrib/test_playlist.py CHANGED
@@ -1,5 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
  from unittest import mock
 
3
 
4
  from pytube import Playlist
5
 
@@ -14,3 +15,69 @@ def test_title(request_get):
14
  pl = Playlist(url)
15
  pl_title = pl.title()
16
  assert pl_title == "(149) Python Tutorial for Beginners (For Absolute Beginners)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
  from unittest import mock
3
+ from unittest.mock import MagicMock
4
 
5
  from pytube import Playlist
6
 
 
15
  pl = Playlist(url)
16
  pl_title = pl.title()
17
  assert pl_title == "(149) Python Tutorial for Beginners (For Absolute Beginners)"
18
+
19
+
20
+ def test_init_with_playlist_url():
21
+ url = "https://www.youtube.com/playlist?list=PLynhp4cZEpTbRs_PYISQ8v_uwO0_mDg_X"
22
+ playlist = Playlist(url)
23
+ assert playlist.playlist_url == url
24
+
25
+
26
+ def test_init_with_watch_url():
27
+ url = (
28
+ "https://www.youtube.com/watch?v=1KeYzjILqDo&"
29
+ "list=PLynhp4cZEpTbRs_PYISQ8v_uwO0_mDg_X&index=2&t=661s"
30
+ )
31
+ playlist = Playlist(url)
32
+ assert (
33
+ playlist.playlist_url
34
+ == "https://www.youtube.com/playlist?list=PLynhp4cZEpTbRs_PYISQ8v_uwO0_mDg_X"
35
+ )
36
+
37
+
38
+ @mock.patch("pytube.contrib.playlist.request.get")
39
+ def test_parse_links(request_get, playlist_html):
40
+ url = "https://www.fakeurl.com/playlist?list=whatever"
41
+ request_get.return_value = playlist_html
42
+ playlist = Playlist(url)
43
+ playlist._find_load_more_url = MagicMock(return_value=None)
44
+ links = playlist.parse_links()
45
+ request_get.assert_called()
46
+ assert links == [
47
+ "/watch?v=ujTCoH21GlA",
48
+ "/watch?v=45ryDIPHdGg",
49
+ "/watch?v=1BYu65vLKdA",
50
+ "/watch?v=3AQ_74xrch8",
51
+ "/watch?v=ddqQUz9mZaM",
52
+ "/watch?v=vwLT6bZrHEE",
53
+ "/watch?v=TQKI0KE-JYY",
54
+ "/watch?v=dNBvQ38MlT8",
55
+ "/watch?v=JHxyrMgOUWI",
56
+ "/watch?v=l2I8NycJMCY",
57
+ "/watch?v=g1Zbuk1gAfk",
58
+ "/watch?v=zixd-si9Q-o",
59
+ ]
60
+
61
+
62
+ @mock.patch("pytube.contrib.playlist.request.get")
63
+ def test_populate_video_urls(request_get, playlist_html):
64
+ url = "https://www.fakeurl.com/playlist?list=whatever"
65
+ request_get.return_value = playlist_html
66
+ playlist = Playlist(url)
67
+ playlist._find_load_more_url = MagicMock(return_value=None)
68
+ playlist.populate_video_urls()
69
+ request_get.assert_called()
70
+ assert playlist.video_urls == [
71
+ "https://www.youtube.com/watch?v=ujTCoH21GlA",
72
+ "https://www.youtube.com/watch?v=45ryDIPHdGg",
73
+ "https://www.youtube.com/watch?v=1BYu65vLKdA",
74
+ "https://www.youtube.com/watch?v=3AQ_74xrch8",
75
+ "https://www.youtube.com/watch?v=ddqQUz9mZaM",
76
+ "https://www.youtube.com/watch?v=vwLT6bZrHEE",
77
+ "https://www.youtube.com/watch?v=TQKI0KE-JYY",
78
+ "https://www.youtube.com/watch?v=dNBvQ38MlT8",
79
+ "https://www.youtube.com/watch?v=JHxyrMgOUWI",
80
+ "https://www.youtube.com/watch?v=l2I8NycJMCY",
81
+ "https://www.youtube.com/watch?v=g1Zbuk1gAfk",
82
+ "https://www.youtube.com/watch?v=zixd-si9Q-o",
83
+ ]
tests/mocks/playlist.html ADDED
The diff for this file is too large to render. See raw diff
 
tests/test_captions.py CHANGED
@@ -85,3 +85,12 @@ def test_repr():
85
  {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
86
  )
87
  assert str(caption) == '<Caption lang="name1" code="en">'
 
 
 
 
 
 
 
 
 
 
85
  {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
86
  )
87
  assert str(caption) == '<Caption lang="name1" code="en">'
88
+
89
+
90
+ @mock.patch("pytube.request.get")
91
+ def test_xml_captions(request_get):
92
+ request_get.return_value = "test"
93
+ caption = Caption(
94
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
95
+ )
96
+ assert caption.xml_captions == "test"
tests/test_cli.py CHANGED
@@ -16,7 +16,7 @@ def test_download_when_itag_not_found(youtube):
16
  youtube.streams.all.return_value = []
17
  youtube.streams.get_by_itag.return_value = None
18
  with pytest.raises(SystemExit):
19
- cli.download(youtube, 123)
20
  youtube.streams.get_by_itag.assert_called_with(123)
21
 
22
 
@@ -28,7 +28,7 @@ def test_download_when_itag_is_found(youtube, stream):
28
  with patch.object(
29
  youtube.streams, "get_by_itag", wraps=youtube.streams.get_by_itag
30
  ) as wrapped_itag:
31
- cli.download(youtube, 123)
32
  wrapped_itag.assert_called_with(123)
33
  youtube.register_on_progress_callback.assert_called_with(cli.on_progress)
34
  stream.download.assert_called()
@@ -115,14 +115,14 @@ def test_parse_args_truthy():
115
 
116
 
117
  @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
118
- def test_main_download(youtube):
119
  parser = argparse.ArgumentParser()
120
  args = parse_args(parser, ["urlhere", "--itag=10"])
121
  cli._parse_args = MagicMock(return_value=args)
122
- cli.download = MagicMock()
123
  cli.main()
124
  youtube.assert_called()
125
- cli.download.assert_called()
126
 
127
 
128
  @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
@@ -156,3 +156,14 @@ def test_main_download_caption(youtube):
156
  cli.main()
157
  youtube.assert_called()
158
  cli.download_caption.assert_called()
 
 
 
 
 
 
 
 
 
 
 
 
16
  youtube.streams.all.return_value = []
17
  youtube.streams.get_by_itag.return_value = None
18
  with pytest.raises(SystemExit):
19
+ cli.download_by_itag(youtube, 123)
20
  youtube.streams.get_by_itag.assert_called_with(123)
21
 
22
 
 
28
  with patch.object(
29
  youtube.streams, "get_by_itag", wraps=youtube.streams.get_by_itag
30
  ) as wrapped_itag:
31
+ cli.download_by_itag(youtube, 123)
32
  wrapped_itag.assert_called_with(123)
33
  youtube.register_on_progress_callback.assert_called_with(cli.on_progress)
34
  stream.download.assert_called()
 
115
 
116
 
117
  @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
118
+ def test_main_download_by_itag(youtube):
119
  parser = argparse.ArgumentParser()
120
  args = parse_args(parser, ["urlhere", "--itag=10"])
121
  cli._parse_args = MagicMock(return_value=args)
122
+ cli.download_by_itag = MagicMock()
123
  cli.main()
124
  youtube.assert_called()
125
+ cli.download_by_itag.assert_called()
126
 
127
 
128
  @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
 
156
  cli.main()
157
  youtube.assert_called()
158
  cli.download_caption.assert_called()
159
+
160
+
161
+ @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
162
+ def test_download_by_resolution(youtube):
163
+ parser = argparse.ArgumentParser()
164
+ args = parse_args(parser, ["urlhere", "-r", "320p"])
165
+ cli._parse_args = MagicMock(return_value=args)
166
+ cli.download_by_resolution = MagicMock()
167
+ cli.main()
168
+ youtube.assert_called()
169
+ cli.download_by_resolution.assert_called()
tests/test_query.py CHANGED
@@ -137,3 +137,17 @@ def test_get_by_itag(cipher_signature):
137
 
138
  def test_get_by_non_existent_itag(cipher_signature):
139
  assert not cipher_signature.streams.get_by_itag(22983)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  def test_get_by_non_existent_itag(cipher_signature):
139
  assert not cipher_signature.streams.get_by_itag(22983)
140
+
141
+
142
+ def test_get_by_resolution(cipher_signature):
143
+ assert cipher_signature.streams.get_by_resolution("360p").itag == 18
144
+
145
+
146
+ def test_get_lowest_resolution(cipher_signature):
147
+ assert cipher_signature.streams.get_lowest_resolution().itag == 18
148
+
149
+
150
+ def test_filter_is_dash(cipher_signature):
151
+ streams = cipher_signature.streams.filter(is_dash=False).all()
152
+ itags = [s.itag for s in streams]
153
+ assert itags == [18, 398, 397, 396, 395, 394]
tests/test_request.py CHANGED
@@ -3,11 +3,13 @@ import os
3
 
4
  from unittest import mock
5
 
 
 
6
  from pytube import request
7
 
8
 
9
  @mock.patch("pytube.request.urlopen")
10
- def test_get_streaming(mock_urlopen):
11
  fake_stream_binary = [
12
  iter(os.urandom(8 * 1024)),
13
  iter(os.urandom(8 * 1024)),
@@ -17,18 +19,18 @@ def test_get_streaming(mock_urlopen):
17
  response = mock.Mock()
18
  response.read.side_effect = fake_stream_binary
19
  mock_urlopen.return_value = response
20
- response = request.get("http://fakeassurl.gov", streaming=True)
21
  call_count = len(list(response))
22
 
23
  assert call_count == 3
24
 
25
 
26
  @mock.patch("pytube.request.urlopen")
27
- def test_get_headers(mock_urlopen):
28
  response = mock.Mock()
29
  response.info.return_value = {"content-length": "16384"}
30
  mock_urlopen.return_value = response
31
- response = request.get("http://fakeassurl.gov", headers=True)
32
  assert response == {"content-length": "16384"}
33
 
34
 
@@ -39,3 +41,8 @@ def test_get(mock_urlopen):
39
  mock_urlopen.return_value = response
40
  response = request.get("http://fakeassurl.gov")
41
  assert response == "<html></html>"
 
 
 
 
 
 
3
 
4
  from unittest import mock
5
 
6
+ import pytest
7
+
8
  from pytube import request
9
 
10
 
11
  @mock.patch("pytube.request.urlopen")
12
+ def test_streaming(mock_urlopen):
13
  fake_stream_binary = [
14
  iter(os.urandom(8 * 1024)),
15
  iter(os.urandom(8 * 1024)),
 
19
  response = mock.Mock()
20
  response.read.side_effect = fake_stream_binary
21
  mock_urlopen.return_value = response
22
+ response = request.stream("http://fakeassurl.gov")
23
  call_count = len(list(response))
24
 
25
  assert call_count == 3
26
 
27
 
28
  @mock.patch("pytube.request.urlopen")
29
+ def test_headers(mock_urlopen):
30
  response = mock.Mock()
31
  response.info.return_value = {"content-length": "16384"}
32
  mock_urlopen.return_value = response
33
+ response = request.headers("http://fakeassurl.gov")
34
  assert response == {"content-length": "16384"}
35
 
36
 
 
41
  mock_urlopen.return_value = response
42
  response = request.get("http://fakeassurl.gov")
43
  assert response == "<html></html>"
44
+
45
+
46
+ def test_get_non_http():
47
+ with pytest.raises(ValueError):
48
+ request.get("file://bad")
tests/test_streams.py CHANGED
@@ -8,8 +8,8 @@ from pytube import Stream
8
 
9
 
10
  def test_filesize(cipher_signature, mocker):
11
- mocker.patch.object(request, "get")
12
- request.get.return_value = {"content-length": "6796391"}
13
  assert cipher_signature.streams.first().filesize == 6796391
14
 
15
 
@@ -36,12 +36,10 @@ def test_title(cipher_signature):
36
 
37
 
38
  def test_download(cipher_signature, mocker):
39
- mocker.patch.object(request, "get")
40
- request.get.side_effect = [
41
- {"content-length": "16384"},
42
- {"content-length": "16384"},
43
- iter([str(random.getrandbits(8 * 1024))]),
44
- ]
45
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
46
  stream = cipher_signature.streams.first()
47
  stream.download()
@@ -61,12 +59,11 @@ def test_on_progress_hook(cipher_signature, mocker):
61
  callback_fn = mock.MagicMock()
62
  cipher_signature.register_on_progress_callback(callback_fn)
63
 
64
- mocker.patch.object(request, "get")
65
- request.get.side_effect = [
66
- {"content-length": "16384"},
67
- {"content-length": "16384"},
68
- iter([str(random.getrandbits(8 * 1024))]),
69
- ]
70
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
71
  stream = cipher_signature.streams.first()
72
  stream.download()
@@ -81,12 +78,11 @@ def test_on_complete_hook(cipher_signature, mocker):
81
  callback_fn = mock.MagicMock()
82
  cipher_signature.register_on_complete_callback(callback_fn)
83
 
84
- mocker.patch.object(request, "get")
85
- request.get.side_effect = [
86
- {"content-length": "16384"},
87
- {"content-length": "16384"},
88
- iter([str(random.getrandbits(8 * 1024))]),
89
- ]
90
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
91
  stream = cipher_signature.streams.first()
92
  stream.download()
 
8
 
9
 
10
  def test_filesize(cipher_signature, mocker):
11
+ mocker.patch.object(request, "headers")
12
+ request.headers.return_value = {"content-length": "6796391"}
13
  assert cipher_signature.streams.first().filesize == 6796391
14
 
15
 
 
36
 
37
 
38
  def test_download(cipher_signature, mocker):
39
+ mocker.patch.object(request, "headers")
40
+ request.headers.return_value = {"content-length": "16384"}
41
+ mocker.patch.object(request, "stream")
42
+ request.stream.return_value = iter([str(random.getrandbits(8 * 1024))])
 
 
43
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
44
  stream = cipher_signature.streams.first()
45
  stream.download()
 
59
  callback_fn = mock.MagicMock()
60
  cipher_signature.register_on_progress_callback(callback_fn)
61
 
62
+ mocker.patch.object(request, "headers")
63
+ request.headers.return_value = {"content-length": "16384"}
64
+ mocker.patch.object(request, "stream")
65
+ request.stream.return_value = iter([str(random.getrandbits(8 * 1024))])
66
+
 
67
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
68
  stream = cipher_signature.streams.first()
69
  stream.download()
 
78
  callback_fn = mock.MagicMock()
79
  cipher_signature.register_on_complete_callback(callback_fn)
80
 
81
+ mocker.patch.object(request, "headers")
82
+ request.headers.return_value = {"content-length": "16384"}
83
+ mocker.patch.object(request, "stream")
84
+ request.stream.return_value = iter([str(random.getrandbits(8 * 1024))])
85
+
 
86
  with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
87
  stream = cipher_signature.streams.first()
88
  stream.download()