hbmartin commited on Jan 25, 2020

Commit

4245a85

unverified ·

2 Parent(s): 86b6955 9a04477

Merge pull request #11 from hbmartin/resolution-selection

Browse files

Files changed (18) hide show

README.md +3 -0
pytube/__main__.py +1 -1
pytube/captions.py +1 -1
pytube/cli.py +47 -7
pytube/contrib/playlist.py +29 -41
pytube/helpers.py +10 -1
pytube/itags.py +28 -0
pytube/query.py +40 -1
pytube/request.py +33 -21
pytube/streams.py +6 -9
tests/conftest.py +12 -0
tests/contrib/test_playlist.py +67 -0
tests/mocks/playlist.html +0 -0
tests/test_captions.py +9 -0
tests/test_cli.py +16 -5
tests/test_query.py +14 -0
tests/test_request.py +11 -4
tests/test_streams.py +16 -20

README.md CHANGED Viewed

@@ -244,6 +244,9 @@ Finally, if you're filing a bug report, the cli contains a switch called ``--bui
 <a href="https://deepsource.io/gh/hbmartin/pytube3/?ref=repository-badge" target="_blank"><img alt="DeepSource" title="DeepSource" src="https://static.deepsource.io/deepsource-badge-light-mini.svg"></a>
 <a href="https://www.codacy.com/manual/hbmartin/pytube3?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=hbmartin/pytube3&amp;utm_campaign=Badge_Grade"><img src="https://api.codacy.com/project/badge/Grade/53794f06983a46829620b3284c6a5596"/></a>
 Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.

 <a href="https://deepsource.io/gh/hbmartin/pytube3/?ref=repository-badge" target="_blank"><img alt="DeepSource" title="DeepSource" src="https://static.deepsource.io/deepsource-badge-light-mini.svg"></a>
 <a href="https://www.codacy.com/manual/hbmartin/pytube3?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=hbmartin/pytube3&amp;utm_campaign=Badge_Grade"><img src="https://api.codacy.com/project/badge/Grade/53794f06983a46829620b3284c6a5596"/></a>
+<a href="https://codecov.io/gh/hbmartin/pytube3">
+  <img src="https://codecov.io/gh/hbmartin/pytube3/branch/master/graph/badge.svg" />
+</a>
 Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.

pytube/__main__.py CHANGED Viewed

@@ -61,7 +61,7 @@ class YouTube:
         # the url to vid info, parsed from watch html
         self.vid_info_url: Optional[str] = None
-        self.vid_info_raw = None  # content fetched by vid_info_url
         self.vid_info: Optional[Dict] = None  # parsed content of vid_info_raw
         self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>

         # the url to vid info, parsed from watch html
         self.vid_info_url: Optional[str] = None
+        self.vid_info_raw: Optional[str] = None  # content fetched by vid_info_url
         self.vid_info: Optional[Dict] = None  # parsed content of vid_info_raw
         self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>

pytube/captions.py CHANGED Viewed

@@ -25,7 +25,7 @@ class Caption:
         self.code = caption_track["languageCode"]
     @property
-    def xml_captions(self):
         """Download the xml caption tracks."""
         return request.get(self.url)

         self.code = caption_track["languageCode"]
     @property
+    def xml_captions(self) -> str:
         """Download the xml caption tracks."""
         return request.get(self.url)

pytube/cli.py CHANGED Viewed

@@ -11,7 +11,7 @@ import sys
 from io import BufferedWriter
 from typing import Tuple, Any, Optional, List
-from pytube import __version__, CaptionQuery
 from pytube import YouTube
@@ -36,9 +36,11 @@ def main():
     if args.build_playback_report:
         build_playback_report(youtube)
     if args.itag:
-        download(youtube=youtube, itag=args.itag)
     if hasattr(args, "caption_code"):
         download_caption(youtube=youtube, lang_code=args.caption_code)
 def _parse_args(
@@ -51,6 +53,9 @@ def _parse_args(
     parser.add_argument(
         "--itag", type=int, help="The itag for the desired stream",
     )
     parser.add_argument(
         "-l",
         "--list",
@@ -166,12 +171,18 @@ def on_progress(
     display_progress_bar(bytes_received, filesize)
-def download(youtube: YouTube, itag: int) -> None:
     """Start downloading a YouTube video.
     :param YouTube youtube:
         A valid YouTube object.
-    :param str itag:
         YouTube format identifier code.
     """
@@ -185,10 +196,39 @@ def download(youtube: YouTube, itag: int) -> None:
         sys.exit()
     youtube.register_on_progress_callback(on_progress)
-    print("\n{fn} | {fs} bytes".format(fn=stream.default_filename, fs=stream.filesize,))
     try:
-        stream.download()
-        sys.stdout.write("\n")
     except KeyboardInterrupt:
         sys.exit()

 from io import BufferedWriter
 from typing import Tuple, Any, Optional, List
+from pytube import __version__, CaptionQuery, Stream
 from pytube import YouTube
     if args.build_playback_report:
         build_playback_report(youtube)
     if args.itag:
+        download_by_itag(youtube=youtube, itag=args.itag)
     if hasattr(args, "caption_code"):
         download_caption(youtube=youtube, lang_code=args.caption_code)
+    if args.resolution:
+        download_by_resolution(youtube=youtube, resolution=args.resolution)
 def _parse_args(
     parser.add_argument(
         "--itag", type=int, help="The itag for the desired stream",
     )
+    parser.add_argument(
+        "-r", "--resolution", type=str, help="The resolution for the desired stream",
+    )
     parser.add_argument(
         "-l",
         "--list",
     display_progress_bar(bytes_received, filesize)
+def _download(stream: Stream) -> None:
+    print("\n{fn} | {fs} bytes".format(fn=stream.default_filename, fs=stream.filesize))
+    stream.download()
+    sys.stdout.write("\n")
+def download_by_itag(youtube: YouTube, itag: int) -> None:
     """Start downloading a YouTube video.
     :param YouTube youtube:
         A valid YouTube object.
+    :param int itag:
         YouTube format identifier code.
     """
         sys.exit()
     youtube.register_on_progress_callback(on_progress)
+    try:
+        _download(stream)
+    except KeyboardInterrupt:
+        sys.exit()
+def download_by_resolution(youtube: YouTube, resolution: str) -> None:
+    """Start downloading a YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    """
+    # TODO(nficano): allow download target to be specified
+    # TODO(nficano): allow dash itags to be selected
+    stream = youtube.streams.get_by_resolution(resolution)
+    if stream is None:
+        print(
+            "Could not find a stream with resolution: {resolution}".format(
+                resolution=resolution
+            )
+        )
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+    youtube.register_on_progress_callback(on_progress)
     try:
+        _download(stream)
     except KeyboardInterrupt:
         sys.exit()

pytube/contrib/playlist.py CHANGED Viewed

@@ -6,6 +6,7 @@ import logging
 import re
 from collections import OrderedDict
 from typing import List, Optional
 from pytube import request
 from pytube.__main__ import YouTube
@@ -19,40 +20,28 @@ class Playlist:
     """
     def __init__(self, url: str, suppress_exception: bool = False):
-        self.playlist_url = url
         self.video_urls: List[str] = []
         self.suppress_exception = suppress_exception
-    def construct_playlist_url(self) -> str:
-        """There are two kinds of playlist urls in YouTube. One that contains
-        watch?v= in URL, another one contains the "playlist?list=" portion. It
-        is preferable to work with the later one.
-        :return: playlist url
-        """
-        if "watch?v=" in self.playlist_url:
             base_url = "https://www.youtube.com/playlist?list="
-            playlist_code = self.playlist_url.split("&list=")[1]
-            return base_url + playlist_code
-        # url is already in the desired format, so just return it
-        return self.playlist_url
     @staticmethod
-    def _load_more_url(req):
         """Given an html page or a fragment thereof, looks for
         and returns the "load more" url if found.
         """
-        try:
-            load_more_url = "https://www.youtube.com" + re.search(
-                r"data-uix-load-more-href=\"(/browse_ajax\?"
-                'action_continuation=.*?)"',
-                req,
-            ).group(1)
-        except AttributeError:
-            load_more_url = ""
-        return load_more_url
     def parse_links(self) -> List[str]:
         """Parse the video links from the page source, extracts and
@@ -60,8 +49,7 @@ class Playlist:
         It's an alternative for BeautifulSoup
         """
-        url = self.construct_playlist_url()
-        req = request.get(url)
         # split the page source by line and process each line
         content = [x for x in req.split("\n") if "pl-video-title-link" in x]
@@ -69,8 +57,8 @@ class Playlist:
         # The above only returns 100 or fewer links
         # Simulating a browser request for the load more link
-        load_more_url = self._load_more_url(req)
-        while len(load_more_url) > 0:  # there is an url found
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
@@ -79,11 +67,13 @@ class Playlist:
             )
             # remove duplicates
             link_list.extend(list(OrderedDict.fromkeys(videos)))
-            load_more_url = self._load_more_url(load_more["load_more_widget_html"],)
         return link_list
-    def populate_video_urls(self):
         """Construct complete links of all the videos in playlist and
         populate video_urls list
@@ -120,13 +110,12 @@ class Playlist:
         download_path: Optional[str] = None,
         prefix_number: bool = True,
         reverse_numbering: bool = False,
     ) -> None:
         """Download all the videos in the the playlist. Initially, download
         resolution is 720p (or highest available), later more option
         should be added to download resolution of choice
-        TODO(nficano): Add option to download resolution of user's choice
         :param download_path:
             (optional) Output path for the playlist If one is not
             specified, defaults to the current working directory.
@@ -140,6 +129,9 @@ class Playlist:
             (optional) Lets you number playlists in reverse, since some
             playlists are ordered newest -> oldest.
         :type reverse_numbering: bool
         """
         self.populate_video_urls()
@@ -156,14 +148,11 @@ class Playlist:
                 if not self.suppress_exception:
                     raise e
             else:
-                # TODO: this should not be hardcoded to a single user's
-                # preference
                 dl_stream = (
-                    yt.streams.filter(progressive=True, subtype="mp4",)
-                    .order_by("resolution")
-                    .desc()
-                    .first()
                 )
                 logger.debug("download path: %s", download_path)
                 if prefix_number:
@@ -176,8 +165,7 @@ class Playlist:
     def title(self) -> Optional[str]:
         """return playlist title (name)"""
-        url = self.construct_playlist_url()
-        req = request.get(url)
         open_tag = "<title>"
         end_tag = "</title>"
         pattern = re.compile(open_tag + "(.+?)" + end_tag)

 import re
 from collections import OrderedDict
 from typing import List, Optional
+from urllib.parse import parse_qs
 from pytube import request
 from pytube.__main__ import YouTube
     """
     def __init__(self, url: str, suppress_exception: bool = False):
         self.video_urls: List[str] = []
         self.suppress_exception = suppress_exception
+        self.playlist_url: str = url
+        if "watch?v=" in url:
             base_url = "https://www.youtube.com/playlist?list="
+            query_parameters = parse_qs(url.split("?")[1])
+            self.playlist_url = base_url + query_parameters["list"][0]
     @staticmethod
+    def _find_load_more_url(req: str) -> Optional[str]:
         """Given an html page or a fragment thereof, looks for
         and returns the "load more" url if found.
         """
+        match = re.search(
+            r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
+            req,
+        )
+        if match:
+            return "https://www.youtube.com" + match.group(1)
+        return None
     def parse_links(self) -> List[str]:
         """Parse the video links from the page source, extracts and
         It's an alternative for BeautifulSoup
         """
+        req = request.get(self.playlist_url)
         # split the page source by line and process each line
         content = [x for x in req.split("\n") if "pl-video-title-link" in x]
         # The above only returns 100 or fewer links
         # Simulating a browser request for the load more link
+        load_more_url = self._find_load_more_url(req)
+        while load_more_url:  # there is an url found
             logger.debug("load more url: %s", load_more_url)
             req = request.get(load_more_url)
             load_more = json.loads(req)
             )
             # remove duplicates
             link_list.extend(list(OrderedDict.fromkeys(videos)))
+            load_more_url = self._find_load_more_url(
+                load_more["load_more_widget_html"],
+            )
         return link_list
+    def populate_video_urls(self) -> None:
         """Construct complete links of all the videos in playlist and
         populate video_urls list
         download_path: Optional[str] = None,
         prefix_number: bool = True,
         reverse_numbering: bool = False,
+        resolution: str = "720p",
     ) -> None:
         """Download all the videos in the the playlist. Initially, download
         resolution is 720p (or highest available), later more option
         should be added to download resolution of choice
         :param download_path:
             (optional) Output path for the playlist If one is not
             specified, defaults to the current working directory.
             (optional) Lets you number playlists in reverse, since some
             playlists are ordered newest -> oldest.
         :type reverse_numbering: bool
+        :param resolution:
+            Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
+        :type resolution: str
         """
         self.populate_video_urls()
                 if not self.suppress_exception:
                     raise e
             else:
                 dl_stream = (
+                    yt.streams.get_by_resolution(resolution=resolution)
+                    or yt.streams.get_lowest_resolution()
                 )
+                assert dl_stream is not None
                 logger.debug("download path: %s", download_path)
                 if prefix_number:
     def title(self) -> Optional[str]:
         """return playlist title (name)"""
+        req = request.get(self.playlist_url)
         open_tag = "<title>"
         end_tag = "</title>"
         pattern = re.compile(open_tag + "(.+?)" + end_tag)

pytube/helpers.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 """Various helper functions implemented by pytube."""
 import logging
 import pprint
 import re
 from pytube.exceptions import RegexMatchError
@@ -99,3 +100,11 @@ def create_logger(level: int = logging.ERROR) -> logging.Logger:
     logger.addHandler(handler)
     logger.setLevel(level)
     return logger

 # -*- coding: utf-8 -*-
 """Various helper functions implemented by pytube."""
+import functools
 import logging
 import pprint
 import re
+from typing import TypeVar, Callable
 from pytube.exceptions import RegexMatchError
     logger.addHandler(handler)
     logger.setLevel(level)
     return logger
+GenericType = TypeVar("GenericType")
+def cache(func: Callable[..., GenericType]) -> GenericType:
+    """ mypy compatible annotation wrapper for lru_cache"""
+    return functools.lru_cache()(func)  # type: ignore

pytube/itags.py CHANGED Viewed

@@ -98,6 +98,30 @@ _3D = [82, 83, 84, 85, 100, 101, 102]
 LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
 DASH_MP4_VIDEO = [133, 134, 135, 136, 137, 138, 160, 212, 264, 266, 298, 299]
 DASH_MP4_AUDIO = [139, 140, 141, 256, 258, 325, 328]
 def get_format_profile(itag: int) -> Dict:
@@ -118,4 +142,8 @@ def get_format_profile(itag: int) -> Dict:
         "is_3d": itag in _3D,
         "is_hdr": itag in HDR,
         "fps": 60 if itag in _60FPS else 30,
     }

 LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
 DASH_MP4_VIDEO = [133, 134, 135, 136, 137, 138, 160, 212, 264, 266, 298, 299]
 DASH_MP4_AUDIO = [139, 140, 141, 256, 258, 325, 328]
+DASH_WEBM_VIDEO = [
+    167,
+    168,
+    169,
+    170,
+    218,
+    219,
+    278,
+    242,
+    243,
+    244,
+    245,
+    246,
+    247,
+    248,
+    271,
+    272,
+    302,
+    303,
+    308,
+    313,
+    315,
+]
+DASH_WEBM_AUDIO = [171, 172, 249, 250, 251]
 def get_format_profile(itag: int) -> Dict:
         "is_3d": itag in _3D,
         "is_hdr": itag in HDR,
         "fps": 60 if itag in _60FPS else 30,
+        "is_dash": itag in DASH_MP4_VIDEO
+        or itag in DASH_MP4_AUDIO
+        or itag in DASH_WEBM_VIDEO
+        or itag in DASH_WEBM_AUDIO,
     }

pytube/query.py CHANGED Viewed

@@ -34,6 +34,7 @@ class StreamQuery:
         only_video=None,
         progressive=None,
         adaptive=None,
         custom_filter_functions=None,
     ):
         """Apply the given filtering criterion.
@@ -103,6 +104,9 @@ class StreamQuery:
             Excludes progressive streams (audio and video are on separate
             tracks).
         :param bool only_audio:
             Excludes streams with video tracks.
@@ -161,6 +165,9 @@ class StreamQuery:
             for fn in custom_filter_functions:
                 filters.append(fn)
         fmt_streams = self.fmt_streams
         for fn in filters:
             fmt_streams = list(filter(fn, fmt_streams))
@@ -221,7 +228,7 @@ class StreamQuery:
         """
         return self
-    def get_by_itag(self, itag) -> Optional[Stream]:
         """Get the corresponding :class:`Stream <Stream>` for a given itag.
         :param int itag:
@@ -234,6 +241,38 @@ class StreamQuery:
         """
         return self.itag_index.get(int(itag))
     def first(self) -> Optional[Stream]:
         """Get the first :class:`Stream <Stream>` in the results.

         only_video=None,
         progressive=None,
         adaptive=None,
+        is_dash=None,
         custom_filter_functions=None,
     ):
         """Apply the given filtering criterion.
             Excludes progressive streams (audio and video are on separate
             tracks).
+        :param bool is_dash:
+            Include/exclude dash streams.
         :param bool only_audio:
             Excludes streams with video tracks.
             for fn in custom_filter_functions:
                 filters.append(fn)
+        if is_dash is not None:
+            filters.append(lambda s: s.is_dash == is_dash)
         fmt_streams = self.fmt_streams
         for fn in filters:
             fmt_streams = list(filter(fn, fmt_streams))
         """
         return self
+    def get_by_itag(self, itag: int) -> Optional[Stream]:
         """Get the corresponding :class:`Stream <Stream>` for a given itag.
         :param int itag:
         """
         return self.itag_index.get(int(itag))
+    def get_by_resolution(self, resolution: str) -> Optional[Stream]:
+        """Get the corresponding :class:`Stream <Stream>` for a given resolution.
+        Stream must be a progressive mp4.
+        :param str resolution:
+            Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+        """
+        return self.filter(
+            progressive=True, subtype="mp4", resolution=resolution
+        ).first()
+    def get_lowest_resolution(self) -> Optional[Stream]:
+        """Get lowest resolution stream that is a progressive mp4.
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+        """
+        return (
+            self.filter(progressive=True, subtype="mp4")
+            .order_by("resolution")
+            .desc()
+            .last()
+        )
     def first(self) -> Optional[Stream]:
         """Get the first :class:`Stream <Stream>` in the results.

pytube/request.py CHANGED Viewed

@@ -1,39 +1,51 @@
 # -*- coding: utf-8 -*-
 """Implements a simple wrapper around urlopen."""
 from urllib.request import Request
 from urllib.request import urlopen
-def get(url, headers=False, streaming=False, chunk_size=8192):
     """Send an http GET request.
     :param str url:
         The URL to perform the GET request for.
-    :param bool headers:
-        Only return the http headers.
-    :param bool streaming:
-        Returns the response body in chunks via a generator.
-    :param int chunk_size:
-        The size in bytes of each chunk. Defaults to 8*1024
     """
-    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
-    response = urlopen(req)
-    if streaming:
-        return stream_response(response, chunk_size)
-    if headers:
-        # https://github.com/nficano/pytube/issues/160
-        return {k.lower(): v for k, v in response.info().items()}
-    return response.read().decode("utf-8")
-def stream_response(response, chunk_size=8 * 1024):
-    """Read the response in chunks."""
     while True:
         buf = response.read(chunk_size)
         if not buf:
             break
         yield buf

 # -*- coding: utf-8 -*-
 """Implements a simple wrapper around urlopen."""
+from typing import Any, Iterable, Dict
 from urllib.request import Request
 from urllib.request import urlopen
+def _execute_request(url: str) -> Any:
+    if not url.lower().startswith("http"):
+        raise ValueError
+    return urlopen(Request(url, headers={"User-Agent": "Mozilla/5.0"}))  # nosec
+def get(url) -> str:
     """Send an http GET request.
     :param str url:
         The URL to perform the GET request for.
+    :rtype: str
+    :returns:
+        UTF-8 encoded string of response
     """
+    return _execute_request(url).read().decode("utf-8")
+def stream(url: str, chunk_size: int = 8192) -> Iterable[bytes]:
+    """Read the response in chunks.
+    :param str url:
+        The URL to perform the GET request for.
+    :param int chunk_size:
+        The size in bytes of each chunk. Defaults to 8*1024
+    :rtype: Iterable[bytes]
+    """
+    response = _execute_request(url)
     while True:
         buf = response.read(chunk_size)
         if not buf:
             break
         yield buf
+def headers(url: str) -> Dict:
+    """Fetch headers returned http GET request.
+    :param str url:
+        The URL to perform the GET request for.
+    :rtype: dict
+    :returns:
+        dictionary of lowercase headers
+    """
+    return {k.lower(): v for k, v in _execute_request(url).info().items()}

pytube/streams.py CHANGED Viewed

@@ -59,6 +59,7 @@ class Stream:
         self.codecs: List[str] = []  # audio/video encoders (e.g.: vp8, mp4a)
         self.audio_codec = None  # audio codec of the stream (e.g.: vorbis)
         self.video_codec = None  # video codec of the stream (e.g.: vp8)
         # Iterates over the key/values of stream and sets them as class
         # attributes. This is an anti-pattern and should be removed.
@@ -118,9 +119,7 @@ class Stream:
         :rtype: bool
         """
-        if self.is_progressive:
-            return True
-        return self.type == "audio"
     @property
     def includes_video_track(self) -> bool:
@@ -128,9 +127,7 @@ class Stream:
         :rtype: bool
         """
-        if self.is_progressive:
-            return True
-        return self.type == "video"
     def parse_codecs(self) -> Tuple:
         """Get the video/audio codecs from list of codecs.
@@ -164,7 +161,7 @@ class Stream:
             Filesize (in bytes) of the stream.
         """
         if self._filesize is None:
-            headers = request.get(self.url, headers=True)
             self._filesize = int(headers["content-length"])
         return self._filesize
@@ -243,7 +240,7 @@ class Stream:
         )
         with open(file_path, "wb") as fh:
-            for chunk in request.get(self.url, streaming=True):
                 # reduce the (bytes) remainder by the length of the chunk.
                 bytes_remaining -= len(chunk)
                 # send to the on_progress callback.
@@ -262,7 +259,7 @@ class Stream:
             "downloading (%s total bytes) file to BytesIO buffer", self.filesize,
         )
-        for chunk in request.get(self.url, streaming=True):
             # reduce the (bytes) remainder by the length of the chunk.
             bytes_remaining -= len(chunk)
             # send to the on_progress callback.

         self.codecs: List[str] = []  # audio/video encoders (e.g.: vp8, mp4a)
         self.audio_codec = None  # audio codec of the stream (e.g.: vorbis)
         self.video_codec = None  # video codec of the stream (e.g.: vp8)
+        self.is_dash: Optional[bool] = None
         # Iterates over the key/values of stream and sets them as class
         # attributes. This is an anti-pattern and should be removed.
         :rtype: bool
         """
+        return self.is_progressive or self.type == "audio"
     @property
     def includes_video_track(self) -> bool:
         :rtype: bool
         """
+        return self.is_progressive or self.type == "video"
     def parse_codecs(self) -> Tuple:
         """Get the video/audio codecs from list of codecs.
             Filesize (in bytes) of the stream.
         """
         if self._filesize is None:
+            headers = request.headers(self.url)
             self._filesize = int(headers["content-length"])
         return self._filesize
         )
         with open(file_path, "wb") as fh:
+            for chunk in request.stream(self.url):
                 # reduce the (bytes) remainder by the length of the chunk.
                 bytes_remaining -= len(chunk)
                 # send to the on_progress callback.
             "downloading (%s total bytes) file to BytesIO buffer", self.filesize,
         )
+        for chunk in request.stream(self.url):
             # reduce the (bytes) remainder by the length of the chunk.
             bytes_remaining -= len(chunk)
             # send to the on_progress callback.

tests/conftest.py CHANGED Viewed

@@ -50,3 +50,15 @@ def age_restricted():
     """Youtube instance initialized with video id zRbsm3e2ltw."""
     filename = "yt-video-zRbsm3e2ltw-1507777044.json.gz"
     return load_playback_file(filename)

     """Youtube instance initialized with video id zRbsm3e2ltw."""
     filename = "yt-video-zRbsm3e2ltw-1507777044.json.gz"
     return load_playback_file(filename)
+@pytest.fixture
+def playlist_html():
+    """Youtube playlist HTML loaded on 2020-01-25 from
+    https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr"""
+    file_path = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), "mocks", "playlist.html"
+    )
+    with open(file_path, encoding="utf-8") as f:
+        read_data = f.read()
+    return read_data

tests/contrib/test_playlist.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 from unittest import mock
 from pytube import Playlist
@@ -14,3 +15,69 @@ def test_title(request_get):
     pl = Playlist(url)
     pl_title = pl.title()
     assert pl_title == "(149) Python Tutorial for Beginners (For Absolute Beginners)"

 # -*- coding: utf-8 -*-
 from unittest import mock
+from unittest.mock import MagicMock
 from pytube import Playlist
     pl = Playlist(url)
     pl_title = pl.title()
     assert pl_title == "(149) Python Tutorial for Beginners (For Absolute Beginners)"
+def test_init_with_playlist_url():
+    url = "https://www.youtube.com/playlist?list=PLynhp4cZEpTbRs_PYISQ8v_uwO0_mDg_X"
+    playlist = Playlist(url)
+    assert playlist.playlist_url == url
+def test_init_with_watch_url():
+    url = (
+        "https://www.youtube.com/watch?v=1KeYzjILqDo&"
+        "list=PLynhp4cZEpTbRs_PYISQ8v_uwO0_mDg_X&index=2&t=661s"
+    )
+    playlist = Playlist(url)
+    assert (
+        playlist.playlist_url
+        == "https://www.youtube.com/playlist?list=PLynhp4cZEpTbRs_PYISQ8v_uwO0_mDg_X"
+    )
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_parse_links(request_get, playlist_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.return_value = playlist_html
+    playlist = Playlist(url)
+    playlist._find_load_more_url = MagicMock(return_value=None)
+    links = playlist.parse_links()
+    request_get.assert_called()
+    assert links == [
+        "/watch?v=ujTCoH21GlA",
+        "/watch?v=45ryDIPHdGg",
+        "/watch?v=1BYu65vLKdA",
+        "/watch?v=3AQ_74xrch8",
+        "/watch?v=ddqQUz9mZaM",
+        "/watch?v=vwLT6bZrHEE",
+        "/watch?v=TQKI0KE-JYY",
+        "/watch?v=dNBvQ38MlT8",
+        "/watch?v=JHxyrMgOUWI",
+        "/watch?v=l2I8NycJMCY",
+        "/watch?v=g1Zbuk1gAfk",
+        "/watch?v=zixd-si9Q-o",
+    ]
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_populate_video_urls(request_get, playlist_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.return_value = playlist_html
+    playlist = Playlist(url)
+    playlist._find_load_more_url = MagicMock(return_value=None)
+    playlist.populate_video_urls()
+    request_get.assert_called()
+    assert playlist.video_urls == [
+        "https://www.youtube.com/watch?v=ujTCoH21GlA",
+        "https://www.youtube.com/watch?v=45ryDIPHdGg",
+        "https://www.youtube.com/watch?v=1BYu65vLKdA",
+        "https://www.youtube.com/watch?v=3AQ_74xrch8",
+        "https://www.youtube.com/watch?v=ddqQUz9mZaM",
+        "https://www.youtube.com/watch?v=vwLT6bZrHEE",
+        "https://www.youtube.com/watch?v=TQKI0KE-JYY",
+        "https://www.youtube.com/watch?v=dNBvQ38MlT8",
+        "https://www.youtube.com/watch?v=JHxyrMgOUWI",
+        "https://www.youtube.com/watch?v=l2I8NycJMCY",
+        "https://www.youtube.com/watch?v=g1Zbuk1gAfk",
+        "https://www.youtube.com/watch?v=zixd-si9Q-o",
+    ]

tests/mocks/playlist.html ADDED Viewed

The diff for this file is too large to render. See raw diff

tests/test_captions.py CHANGED Viewed

@@ -85,3 +85,12 @@ def test_repr():
         {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
     )
     assert str(caption) == '<Caption lang="name1" code="en">'

         {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
     )
     assert str(caption) == '<Caption lang="name1" code="en">'
+@mock.patch("pytube.request.get")
+def test_xml_captions(request_get):
+    request_get.return_value = "test"
+    caption = Caption(
+        {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
+    )
+    assert caption.xml_captions == "test"

tests/test_cli.py CHANGED Viewed

@@ -16,7 +16,7 @@ def test_download_when_itag_not_found(youtube):
     youtube.streams.all.return_value = []
     youtube.streams.get_by_itag.return_value = None
     with pytest.raises(SystemExit):
-        cli.download(youtube, 123)
     youtube.streams.get_by_itag.assert_called_with(123)
@@ -28,7 +28,7 @@ def test_download_when_itag_is_found(youtube, stream):
     with patch.object(
         youtube.streams, "get_by_itag", wraps=youtube.streams.get_by_itag
     ) as wrapped_itag:
-        cli.download(youtube, 123)
         wrapped_itag.assert_called_with(123)
     youtube.register_on_progress_callback.assert_called_with(cli.on_progress)
     stream.download.assert_called()
@@ -115,14 +115,14 @@ def test_parse_args_truthy():
 @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
-def test_main_download(youtube):
     parser = argparse.ArgumentParser()
     args = parse_args(parser, ["urlhere", "--itag=10"])
     cli._parse_args = MagicMock(return_value=args)
-    cli.download = MagicMock()
     cli.main()
     youtube.assert_called()
-    cli.download.assert_called()
 @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
@@ -156,3 +156,14 @@ def test_main_download_caption(youtube):
     cli.main()
     youtube.assert_called()
     cli.download_caption.assert_called()

     youtube.streams.all.return_value = []
     youtube.streams.get_by_itag.return_value = None
     with pytest.raises(SystemExit):
+        cli.download_by_itag(youtube, 123)
     youtube.streams.get_by_itag.assert_called_with(123)
     with patch.object(
         youtube.streams, "get_by_itag", wraps=youtube.streams.get_by_itag
     ) as wrapped_itag:
+        cli.download_by_itag(youtube, 123)
         wrapped_itag.assert_called_with(123)
     youtube.register_on_progress_callback.assert_called_with(cli.on_progress)
     stream.download.assert_called()
 @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
+def test_main_download_by_itag(youtube):
     parser = argparse.ArgumentParser()
     args = parse_args(parser, ["urlhere", "--itag=10"])
     cli._parse_args = MagicMock(return_value=args)
+    cli.download_by_itag = MagicMock()
     cli.main()
     youtube.assert_called()
+    cli.download_by_itag.assert_called()
 @mock.patch("pytube.cli.YouTube.__init__", return_value=None)
     cli.main()
     youtube.assert_called()
     cli.download_caption.assert_called()
+@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
+def test_download_by_resolution(youtube):
+    parser = argparse.ArgumentParser()
+    args = parse_args(parser, ["urlhere", "-r", "320p"])
+    cli._parse_args = MagicMock(return_value=args)
+    cli.download_by_resolution = MagicMock()
+    cli.main()
+    youtube.assert_called()
+    cli.download_by_resolution.assert_called()

tests/test_query.py CHANGED Viewed

@@ -137,3 +137,17 @@ def test_get_by_itag(cipher_signature):
 def test_get_by_non_existent_itag(cipher_signature):
     assert not cipher_signature.streams.get_by_itag(22983)

 def test_get_by_non_existent_itag(cipher_signature):
     assert not cipher_signature.streams.get_by_itag(22983)
+def test_get_by_resolution(cipher_signature):
+    assert cipher_signature.streams.get_by_resolution("360p").itag == 18
+def test_get_lowest_resolution(cipher_signature):
+    assert cipher_signature.streams.get_lowest_resolution().itag == 18
+def test_filter_is_dash(cipher_signature):
+    streams = cipher_signature.streams.filter(is_dash=False).all()
+    itags = [s.itag for s in streams]
+    assert itags == [18, 398, 397, 396, 395, 394]

tests/test_request.py CHANGED Viewed

@@ -3,11 +3,13 @@ import os
 from unittest import mock
 from pytube import request
 @mock.patch("pytube.request.urlopen")
-def test_get_streaming(mock_urlopen):
     fake_stream_binary = [
         iter(os.urandom(8 * 1024)),
         iter(os.urandom(8 * 1024)),
@@ -17,18 +19,18 @@ def test_get_streaming(mock_urlopen):
     response = mock.Mock()
     response.read.side_effect = fake_stream_binary
     mock_urlopen.return_value = response
-    response = request.get("http://fakeassurl.gov", streaming=True)
     call_count = len(list(response))
     assert call_count == 3
 @mock.patch("pytube.request.urlopen")
-def test_get_headers(mock_urlopen):
     response = mock.Mock()
     response.info.return_value = {"content-length": "16384"}
     mock_urlopen.return_value = response
-    response = request.get("http://fakeassurl.gov", headers=True)
     assert response == {"content-length": "16384"}
@@ -39,3 +41,8 @@ def test_get(mock_urlopen):
     mock_urlopen.return_value = response
     response = request.get("http://fakeassurl.gov")
     assert response == "<html></html>"

 from unittest import mock
+import pytest
 from pytube import request
 @mock.patch("pytube.request.urlopen")
+def test_streaming(mock_urlopen):
     fake_stream_binary = [
         iter(os.urandom(8 * 1024)),
         iter(os.urandom(8 * 1024)),
     response = mock.Mock()
     response.read.side_effect = fake_stream_binary
     mock_urlopen.return_value = response
+    response = request.stream("http://fakeassurl.gov")
     call_count = len(list(response))
     assert call_count == 3
 @mock.patch("pytube.request.urlopen")
+def test_headers(mock_urlopen):
     response = mock.Mock()
     response.info.return_value = {"content-length": "16384"}
     mock_urlopen.return_value = response
+    response = request.headers("http://fakeassurl.gov")
     assert response == {"content-length": "16384"}
     mock_urlopen.return_value = response
     response = request.get("http://fakeassurl.gov")
     assert response == "<html></html>"
+def test_get_non_http():
+    with pytest.raises(ValueError):
+        request.get("file://bad")

tests/test_streams.py CHANGED Viewed

@@ -8,8 +8,8 @@ from pytube import Stream
 def test_filesize(cipher_signature, mocker):
-    mocker.patch.object(request, "get")
-    request.get.return_value = {"content-length": "6796391"}
     assert cipher_signature.streams.first().filesize == 6796391
@@ -36,12 +36,10 @@ def test_title(cipher_signature):
 def test_download(cipher_signature, mocker):
-    mocker.patch.object(request, "get")
-    request.get.side_effect = [
-        {"content-length": "16384"},
-        {"content-length": "16384"},
-        iter([str(random.getrandbits(8 * 1024))]),
-    ]
     with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
         stream = cipher_signature.streams.first()
         stream.download()
@@ -61,12 +59,11 @@ def test_on_progress_hook(cipher_signature, mocker):
     callback_fn = mock.MagicMock()
     cipher_signature.register_on_progress_callback(callback_fn)
-    mocker.patch.object(request, "get")
-    request.get.side_effect = [
-        {"content-length": "16384"},
-        {"content-length": "16384"},
-        iter([str(random.getrandbits(8 * 1024))]),
-    ]
     with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
         stream = cipher_signature.streams.first()
         stream.download()
@@ -81,12 +78,11 @@ def test_on_complete_hook(cipher_signature, mocker):
     callback_fn = mock.MagicMock()
     cipher_signature.register_on_complete_callback(callback_fn)
-    mocker.patch.object(request, "get")
-    request.get.side_effect = [
-        {"content-length": "16384"},
-        {"content-length": "16384"},
-        iter([str(random.getrandbits(8 * 1024))]),
-    ]
     with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
         stream = cipher_signature.streams.first()
         stream.download()

 def test_filesize(cipher_signature, mocker):
+    mocker.patch.object(request, "headers")
+    request.headers.return_value = {"content-length": "6796391"}
     assert cipher_signature.streams.first().filesize == 6796391
 def test_download(cipher_signature, mocker):
+    mocker.patch.object(request, "headers")
+    request.headers.return_value = {"content-length": "16384"}
+    mocker.patch.object(request, "stream")
+    request.stream.return_value = iter([str(random.getrandbits(8 * 1024))])
     with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
         stream = cipher_signature.streams.first()
         stream.download()
     callback_fn = mock.MagicMock()
     cipher_signature.register_on_progress_callback(callback_fn)
+    mocker.patch.object(request, "headers")
+    request.headers.return_value = {"content-length": "16384"}
+    mocker.patch.object(request, "stream")
+    request.stream.return_value = iter([str(random.getrandbits(8 * 1024))])
     with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
         stream = cipher_signature.streams.first()
         stream.download()
     callback_fn = mock.MagicMock()
     cipher_signature.register_on_complete_callback(callback_fn)
+    mocker.patch.object(request, "headers")
+    request.headers.return_value = {"content-length": "16384"}
+    mocker.patch.object(request, "stream")
+    request.stream.return_value = iter([str(random.getrandbits(8 * 1024))])
     with mock.patch("pytube.streams.open", mock.mock_open(), create=True):
         stream = cipher_signature.streams.first()
         stream.download()