Spaces:

chipling
/

ytmp4

Sleeping

App Files Files Community

chipling commited on Jun 23, 2025

Commit

44bafb2

verified ·

1 Parent(s): 9ff4ef1

Upload 106 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

pytubefix/.DS_Store +0 -0
pytubefix/__cache__/tokens.json +1 -0
pytubefix/__init__.py +23 -0
pytubefix/__main__.py +992 -0
pytubefix/__pycache__/__init__.cpython-311.pyc +0 -0
pytubefix/__pycache__/__main__.cpython-311.pyc +0 -0
pytubefix/__pycache__/buffer.cpython-311.pyc +0 -0
pytubefix/__pycache__/captions.cpython-311.pyc +0 -0
pytubefix/__pycache__/chapters.cpython-311.pyc +0 -0
pytubefix/__pycache__/cipher.cpython-311.pyc +0 -0
pytubefix/__pycache__/cli.cpython-311.pyc +0 -0
pytubefix/__pycache__/exceptions.cpython-311.pyc +0 -0
pytubefix/__pycache__/extract.cpython-311.pyc +0 -0
pytubefix/__pycache__/file_system.cpython-311.pyc +0 -0
pytubefix/__pycache__/helpers.cpython-311.pyc +0 -0
pytubefix/__pycache__/info.cpython-311.pyc +0 -0
pytubefix/__pycache__/innertube.cpython-311.pyc +0 -0
pytubefix/__pycache__/itags.cpython-311.pyc +0 -0
pytubefix/__pycache__/jsinterp.cpython-311.pyc +0 -0
pytubefix/__pycache__/keymoments.cpython-311.pyc +0 -0
pytubefix/__pycache__/metadata.cpython-311.pyc +0 -0
pytubefix/__pycache__/monostate.cpython-311.pyc +0 -0
pytubefix/__pycache__/parser.cpython-311.pyc +0 -0
pytubefix/__pycache__/protobuf.cpython-311.pyc +0 -0
pytubefix/__pycache__/query.cpython-311.pyc +0 -0
pytubefix/__pycache__/request.cpython-311.pyc +0 -0
pytubefix/__pycache__/streams.cpython-311.pyc +0 -0
pytubefix/__pycache__/version.cpython-311.pyc +0 -0
pytubefix/botGuard/.DS_Store +0 -0
pytubefix/botGuard/__init__.py +0 -0
pytubefix/botGuard/__pycache__/__init__.cpython-311.pyc +0 -0
pytubefix/botGuard/__pycache__/bot_guard.cpython-311.pyc +0 -0
pytubefix/botGuard/bot_guard.py +47 -0
pytubefix/botGuard/vm/botGuard.js +0 -0
pytubefix/buffer.py +48 -0
pytubefix/captions.py +215 -0
pytubefix/chapters.py +47 -0
pytubefix/cipher.py +190 -0
pytubefix/cli.py +355 -0
pytubefix/contrib/__init__.py +0 -0
pytubefix/contrib/__pycache__/__init__.cpython-311.pyc +0 -0
pytubefix/contrib/__pycache__/channel.cpython-311.pyc +0 -0
pytubefix/contrib/__pycache__/playlist.cpython-311.pyc +0 -0
pytubefix/contrib/__pycache__/search.cpython-311.pyc +0 -0
pytubefix/contrib/channel.py +655 -0
pytubefix/contrib/playlist.py +496 -0
pytubefix/contrib/search.py +557 -0
pytubefix/exceptions.py +344 -0
pytubefix/extract.py +646 -0
pytubefix/file_system.py +80 -0

pytubefix/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

pytubefix/__cache__/tokens.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"access_token": null, "refresh_token": null, "expires": null, "visitorData": "CgtYOTFud0twS0VmOCiD2OTCBjIKCgJJThIEGgAgQg%3D%3D", "po_token": "MnSZ2tgzlZc8xJni8Vz8ITSbjhi-7cChHWF4_eFiXnowkgRlC-yG3IdTqDT9PvJhNbcDK43DnBhNlOCBTjY1Y9aKFBYp_h0-yiT5TefusxCtFbd98AA4HdGX6XmAbvgLujXCQZj14n_wlWR9y3i4CZjjl8pmYg=="}

pytubefix/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# flake8: noqa: F401
+# noreorder
+"""
+Pytubefix: a very serious Python library for downloading YouTube Videos.
+"""
+__title__ = "pytubefix"
+__author__ = "Juan Bindez"
+__license__ = "MIT License"
+__js__ = None
+__js_url__ = None
+from pytubefix.version import __version__
+from pytubefix.streams import Stream
+from pytubefix.captions import Caption
+from pytubefix.chapters import Chapter
+from pytubefix.keymoments import KeyMoment
+from pytubefix.query import CaptionQuery, StreamQuery
+from pytubefix.__main__ import YouTube
+from pytubefix.contrib.playlist import Playlist
+from pytubefix.contrib.channel import Channel
+from pytubefix.contrib.search import Search
+from pytubefix.info import info
+from pytubefix.buffer import Buffer

pytubefix/__main__.py ADDED Viewed

	@@ -0,0 +1,992 @@

+# MIT License
+#
+# Copyright (c) 2023 - 2025 Juan Bindez <juanbindez780@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+This module implements the core developer interface for pytubefix.
+The problem domain of the :class:`YouTube <YouTube> class focuses almost
+exclusively on the developer interface. Pytubefix offloads the heavy lifting to
+smaller peripheral modules and functions.
+"""
+import logging
+from subprocess import CalledProcessError
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import pytubefix
+import pytubefix.exceptions as exceptions
+from pytubefix import extract, request
+from pytubefix import Stream, StreamQuery
+from pytubefix.helpers import install_proxy
+from pytubefix.innertube import InnerTube
+from pytubefix.metadata import YouTubeMetadata
+from pytubefix.monostate import Monostate
+from pytubefix.botGuard import bot_guard
+logger = logging.getLogger(__name__)
+class YouTube:
+    """Core developer interface for pytubefix."""
+    def __init__(
+            self,
+            url: str,
+            client: str = InnerTube().client_name,
+            on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
+            on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
+            proxies: Optional[Dict[str, str]] = None,
+            use_oauth: bool = False,
+            allow_oauth_cache: bool = True,
+            token_file: Optional[str] = None,
+            oauth_verifier: Optional[Callable[[str, str], None]] = None,
+            use_po_token: Optional[bool] = False,
+            po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
+    ):
+        """Construct a :class:`YouTube <YouTube>`.
+        :param str url:
+            A valid YouTube watch URL.
+        :param str client:
+            (Optional) A YouTube client,
+            Available:
+                WEB, WEB_EMBED, WEB_MUSIC, WEB_CREATOR, WEB_SAFARI,
+                ANDROID, ANDROID_MUSIC, ANDROID_CREATOR, ANDROID_VR, ANDROID_PRODUCER, ANDROID_TESTSUITE,
+                IOS, IOS_MUSIC, IOS_CREATOR,
+                MWEB, TV, TV_EMBED, MEDIA_CONNECT.
+        :param func on_progress_callback:
+            (Optional) User defined callback function for stream download
+            progress events.
+        :param func on_complete_callback:
+            (Optional) User defined callback function for stream download
+            complete events.
+        :param dict proxies:
+            (Optional) A dict mapping protocol to proxy address which will be used by pytube.
+        :param bool use_oauth:
+            (Optional) Prompt the user to authenticate to YouTube.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param bool allow_oauth_cache:
+            (Optional) Cache OAuth and Po tokens locally on the machine. Defaults to True.
+            These tokens are only generated if use_oauth is set to True as well.
+        :param str token_file:
+            (Optional) Path to the file where the OAuth and Po tokens will be stored.
+            Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
+        :param Callable oauth_verifier:
+            (optional) Verifier to be used for getting oauth tokens.
+            Verification URL and User-Code will be passed to it respectively.
+            (if passed, else default verifier will be used)
+        :param bool use_po_token:
+            (Optional) Prompt the user to use the proof of origin token on YouTube.
+            It must be sent with the API along with the linked visitorData and
+            then passed as a `po_token` query parameter to affected clients.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+            (Do not use together with `use_oauth=True`)
+        :param Callable po_token_verifier:
+            (Optional) Verified used to obtain the visitorData and po_token.
+            The verifier will return the visitorData and po_token respectively.
+            (if passed, else default verifier will be used)
+        """
+        # js fetched by js_url
+        self._js: Optional[str] = None
+        # the url to the js, parsed from watch html
+        self._js_url: Optional[str] = None
+        # content fetched from innertube/player
+        self._vid_info: Optional[Dict] = None
+        self._vid_details: Optional[Dict] = None
+        # the html of /watch?v=<video_id>
+        self._watch_html: Optional[str] = None
+        self._embed_html: Optional[str] = None
+        # inline js in the html containing
+        self._player_config_args: Optional[Dict] = None
+        self._age_restricted: Optional[bool] = None
+        self._fmt_streams: Optional[List[Stream]] = None
+        self._initial_data = None
+        self._metadata: Optional[YouTubeMetadata] = None
+        # video_id part of /watch?v=<video_id>
+        self.video_id = extract.video_id(url)
+        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
+        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
+        self.client = 'WEB' if use_po_token else client
+        # oauth can only be used by the TV and TV_EMBED client.
+        self.client = 'TV' if use_oauth else self.client
+        self.fallback_clients = ['TV', 'IOS']
+        self._signature_timestamp: dict = {}
+        self._visitor_data = None
+        # Shared between all instances of `Stream` (Borg pattern).
+        self.stream_monostate = Monostate(
+            on_progress=on_progress_callback, on_complete=on_complete_callback, youtube=self
+        )
+        if proxies:
+            install_proxy(proxies)
+        self._author = None
+        self._title = None
+        self._publish_date = None
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
+        self.token_file = token_file
+        self.oauth_verifier = oauth_verifier
+        self.use_po_token = use_po_token
+        self.po_token_verifier = po_token_verifier
+        self.po_token = None
+        self._pot = None
+    def __repr__(self):
+        return f'<pytubefix.__main__.YouTube object: videoId={self.video_id}>'
+    def __eq__(self, o: object) -> bool:
+        # Compare types and urls, if they're same return true, else return false.
+        return type(o) == type(self) and o.watch_url == self.watch_url
+    @property
+    def watch_html(self):
+        if self._watch_html:
+            return self._watch_html
+        self._watch_html = request.get(url=self.watch_url)
+        return self._watch_html
+    @property
+    def embed_html(self):
+        if self._embed_html:
+            return self._embed_html
+        self._embed_html = request.get(url=self.embed_url)
+        return self._embed_html
+    @property
+    def age_restricted(self):
+        if self._age_restricted:
+            return self._age_restricted
+        self._age_restricted = extract.is_age_restricted(self.watch_html)
+        return self._age_restricted
+    @property
+    def js_url(self):
+        if self._js_url:
+            return self._js_url
+        if self.age_restricted:
+            self._js_url = extract.js_url(self.embed_html)
+        else:
+            self._js_url = extract.js_url(self.watch_html)
+        return self._js_url
+    @property
+    def js(self):
+        if self._js:
+            return self._js
+        # If the js_url doesn't match the cached url, fetch the new js and update
+        #  the cache; otherwise, load the cache.
+        if pytubefix.__js_url__ != self.js_url:
+            self._js = request.get(self.js_url)
+            pytubefix.__js__ = self._js
+            pytubefix.__js_url__ = self.js_url
+        else:
+            self._js = pytubefix.__js__
+        return self._js
+    @property
+    def visitor_data(self) -> str:
+        """
+        Retrieves the visitorData from the WEB client.
+        """
+        if self._visitor_data:
+            return self._visitor_data
+        if InnerTube(self.client).require_po_token:
+            try:
+                logger.debug("Looking for visitorData in initial_data")
+                self._visitor_data = extract.visitor_data(str(self.initial_data['responseContext']))
+                logger.debug('VisitorData obtained successfully')
+                return self._visitor_data
+            except (KeyError, pytubefix.exceptions.RegexMatchError):
+                logger.debug("Unable to obtain visitorData from initial_data. Trying to request from the WEB client")
+        logger.debug("Looking for visitorData in InnerTube API")
+        innertube_response = InnerTube('WEB').player(self.video_id)
+        try:
+            self._visitor_data = innertube_response['responseContext']['visitorData']
+        except KeyError:
+            self._visitor_data = innertube_response['responseContext']['serviceTrackingParams'][0]['params'][6]['value']
+        logger.debug('VisitorData obtained successfully')
+        return self._visitor_data
+    @property
+    def pot(self) -> str:
+        """
+        Retrieves the poToken generated by botGuard.
+        This poToken only works for WEB-based clients.
+        """
+        if self._pot:
+            return self._pot
+        logger.debug('Invoking botGuard')
+        try:
+            self._pot = bot_guard.generate_po_token(visitor_data=self.visitor_data)
+            logger.debug('PoToken generated successfully')
+        except Exception as e:
+            logger.warning('Unable to run botGuard. Skipping poToken generation, reason: ' + e.__str__())
+        return self._pot
+    @property
+    def initial_data(self):
+        if self._initial_data:
+            return self._initial_data
+        self._initial_data = extract.initial_data(self.watch_html)
+        return self._initial_data
+    @property
+    def streaming_data(self):
+        """Return streamingData from video info."""
+        # List of YouTube error video IDs
+        invalid_id_list = ['aQvGIIdgFDM']
+        # If my previously valid video_info doesn't have the streamingData,
+        #   or it is an invalid video,
+        #   try to get a new video_info with a different client.
+        if 'streamingData' not in self.vid_info or self.vid_info['videoDetails']['videoId'] in invalid_id_list:
+            original_client = self.client
+            # for each fallback client set, revert videodata, and run check_availability, which
+            #   will try to get a new video_info with a different client.
+            #   if it fails try the next fallback client, and so on.
+            # If none of the clients have valid streamingData, raise an exception.
+            for client in self.fallback_clients:
+                self.client = client
+                self.vid_info = None
+                try:
+                    self.check_availability()
+                except Exception as e:
+                    continue
+                if 'streamingData' in self.vid_info:
+                    break
+            if 'streamingData' not in self.vid_info:
+                raise exceptions.UnknownVideoError(video_id=self.video_id,
+                                                   developer_message=f'Streaming data is missing, '
+                                                                     f'original client: {original_client}, '
+                                                                     f'fallback clients: {self.fallback_clients}')
+        return self.vid_info['streamingData']
+    @property
+    def fmt_streams(self):
+        """Returns a list of streams if they have been initialized.
+        If the streams have not been initialized, finds all relevant
+        streams and initializes them.
+        """
+        self.check_availability()
+        if self._fmt_streams:
+            return self._fmt_streams
+        self._fmt_streams = []
+        stream_manifest = extract.apply_descrambler(self.streaming_data)
+        inner_tube = InnerTube(self.client)
+        if self.po_token:
+            extract.apply_po_token(stream_manifest, self.vid_info, self.po_token)
+        if inner_tube.require_js_player:
+            # If the cached js doesn't work, try fetching a new js file
+            # https://github.com/pytube/pytube/issues/1054
+            try:
+                extract.apply_signature(stream_manifest, self.vid_info, self.js, self.js_url)
+            except exceptions.ExtractError:
+                # To force an update to the js file, we clear the cache and retry
+                self._js = None
+                self._js_url = None
+                pytubefix.__js__ = None
+                pytubefix.__js_url__ = None
+                extract.apply_signature(stream_manifest, self.vid_info, self.js, self.js_url)
+        # build instances of :class:`Stream <Stream>`
+        # Initialize stream objects
+        for stream in stream_manifest:
+            video = Stream(
+                stream=stream,
+                monostate=self.stream_monostate,
+                po_token=self.po_token,
+                video_playback_ustreamer_config=self.video_playback_ustreamer_config
+            )
+            self._fmt_streams.append(video)
+        self.stream_monostate.title = self.title
+        self.stream_monostate.duration = self.length
+        return self._fmt_streams
+    def check_availability(self):
+        """Check whether the video is available.
+        Raises different exceptions based on why the video is unavailable,
+        otherwise does nothing.
+        """
+        status, messages = extract.playability_status(self.vid_info)
+        if InnerTube(self.client).require_po_token and not self.po_token:
+            logger.warning(f"The {self.client} client requires PoToken to obtain functional streams, "
+                           f"See more details at https://github.com/JuanBindez/pytubefix/pull/209")
+        for reason in messages:
+            if status == 'UNPLAYABLE':
+                if reason == (
+                        'Join this channel to get access to members-only content '
+                        'like this video, and other exclusive perks.'
+                ):
+                    raise exceptions.MembersOnly(video_id=self.video_id)
+                elif reason == 'This live stream recording is not available.':
+                    raise exceptions.RecordingUnavailable(video_id=self.video_id)
+                elif reason == (
+                        'Sorry, something is wrong. This video may be inappropriate for some users. '
+                        'Sign in to your primary account to confirm your age.'
+                ):
+                    raise exceptions.AgeCheckRequiredAccountError(video_id=self.video_id)
+                elif reason == (
+                        'The uploader has not made this video available in your country'
+                ):
+                    raise exceptions.VideoRegionBlocked(video_id=self.video_id)
+                else:
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+            elif status == 'LOGIN_REQUIRED':
+                if reason == (
+                        'Sign in to confirm your age'
+                ):
+                    raise exceptions.AgeRestrictedError(video_id=self.video_id)
+                elif reason == (
+                        'Sign in to confirm you’re not a bot'
+                ):
+                    raise exceptions.BotDetection(video_id=self.video_id)
+                else:
+                    raise exceptions.LoginRequired(video_id=self.video_id, reason=reason)
+            elif status == 'AGE_CHECK_REQUIRED':
+                if self.use_oauth:
+                    self.age_check()
+                else:
+                    raise exceptions.AgeCheckRequiredError(video_id=self.video_id)
+            elif status == 'LIVE_STREAM_OFFLINE':
+                raise exceptions.LiveStreamOffline(video_id=self.video_id, reason=reason)
+            elif status == 'ERROR':
+                if reason == 'Video unavailable':
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+                elif reason == 'This video is private':
+                    raise exceptions.VideoPrivate(video_id=self.video_id)
+                elif reason == 'This video is unavailable':
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+                elif reason == 'This video has been removed by the uploader':
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+                elif reason == 'This video is no longer available because the YouTube account associated with this video has been terminated.':
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+                else:
+                    raise exceptions.UnknownVideoError(video_id=self.video_id, status=status, reason=reason, developer_message=f'Unknown reason type for Error status')
+            elif status == 'LIVE_STREAM':
+                raise exceptions.LiveStreamError(video_id=self.video_id)
+            elif status is None:
+                pass
+            else:
+                raise exceptions.UnknownVideoError(video_id=self.video_id, status=status, reason=reason, developer_message=f'Unknown video status')
+    @property
+    def signature_timestamp(self) -> dict:
+        """WEB clients need to be signed with a signature timestamp.
+        The signature is found inside the player's base.js.
+        :rtype: Dict
+        """
+        if not self._signature_timestamp:
+            self._signature_timestamp = {
+                'playbackContext': {
+                    'contentPlaybackContext': {
+                        'signatureTimestamp': extract.signature_timestamp(self.js)
+                    }
+                }
+            }
+        return self._signature_timestamp
+    @property
+    def video_playback_ustreamer_config(self):
+        return self.vid_info[
+            'playerConfig'][
+            'mediaCommonConfig'][
+            'mediaUstreamerRequestConfig'][
+            'videoPlaybackUstreamerConfig']
+    @property
+    def server_abr_streaming_url(self):
+        """
+        Extract the url for abr server and decrypt the `n` parameter
+        """
+        try:
+            url = self.vid_info[
+                'streamingData'][
+                'serverAbrStreamingUrl']
+            stream_manifest = [{"url": url}]
+            extract.apply_signature(stream_manifest, vid_info=self.vid_info, js=self.js, url_js=self.js_url)
+            return stream_manifest[0]["url"]
+        except Exception:
+            return None
+    @property
+    def vid_info(self):
+        """Parse the raw vid info and return the parsed result.
+        :rtype: Dict[Any, Any]
+        """
+        if self._vid_info:
+            return self._vid_info
+        def call_innertube():
+            innertube = InnerTube(
+                client=self.client,
+                use_oauth=self.use_oauth,
+                allow_cache=self.allow_oauth_cache,
+                token_file=self.token_file,
+                oauth_verifier=self.oauth_verifier,
+                use_po_token=self.use_po_token,
+                po_token_verifier=self.po_token_verifier
+            )
+            if innertube.require_js_player:
+                innertube.innertube_context.update(self.signature_timestamp)
+            # Automatically generates a poToken
+            if innertube.require_po_token and not self.use_po_token:
+                logger.debug(f"The {self.client} client requires poToken to obtain functional streams")
+                logger.debug("Automatically generating poToken")
+                innertube.insert_po_token(visitor_data=self.visitor_data, po_token=self.pot)
+            elif not self.use_po_token:
+                # from 01/22/2025 all clients must send the visitorData in the API request
+                innertube.insert_visitor_data(visitor_data=self.visitor_data)
+            response = innertube.player(self.video_id)
+            # Retrieves the sent poToken
+            if self.use_po_token or innertube.require_po_token:
+                self.po_token = innertube.access_po_token or self.pot
+            return response
+        innertube_response = call_innertube()
+        for client in self.fallback_clients:
+            # Some clients are unable to access certain types of videos
+            # If the video is unavailable for the current client, attempts will be made with fallback clients
+            playability_status = innertube_response['playabilityStatus']
+            if playability_status['status'] == 'UNPLAYABLE' and 'reason' in playability_status and playability_status['reason'] == 'This video is not available':
+                logger.warning(f"{self.client} client returned: This video is not available")
+                self.client = client
+                logger.warning(f"Switching to client: {client}")
+                innertube_response = call_innertube()
+            else:
+                break
+        self._vid_info = innertube_response
+        if not self._vid_info:
+            raise pytubefix.exceptions.InnerTubeResponseError(self.video_id, self.client)
+        return self._vid_info
+    @vid_info.setter
+    def vid_info(self, value):
+        self._vid_info = value
+    @property
+    def vid_details(self):
+        """Parse the raw vid details and return the parsed result.
+        The official player sends a request to the `next` endpoint to obtain some details of the video.
+        :rtype: Dict[Any, Any]
+        """
+        if self._vid_details:
+            return self._vid_details
+        innertube = InnerTube(
+            client='TV' if self.use_oauth else 'WEB',
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache,
+            token_file=self.token_file,
+            oauth_verifier=self.oauth_verifier,
+            use_po_token=self.use_po_token,
+            po_token_verifier=self.po_token_verifier
+        )
+        innertube_response = innertube.next(self.video_id)
+        self._vid_details = innertube_response
+        return self._vid_details
+    @vid_details.setter
+    def vid_details(self, value):
+        self._vid_details = value
+    def age_check(self):
+        """If the video has any age restrictions, you must confirm that you wish to continue.
+        Originally the WEB client was used, but with the implementation of PoToken we switched to MWEB.
+        """
+        self.client = 'TV'
+        innertube = InnerTube(
+            client=self.client,
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache,
+            token_file=self.token_file,
+            oauth_verifier=self.oauth_verifier,
+            use_po_token=self.use_po_token,
+            po_token_verifier=self.po_token_verifier
+        )
+        if innertube.require_js_player:
+            innertube.innertube_context.update(self.signature_timestamp)
+        innertube.verify_age(self.video_id)
+        innertube_response = innertube.player(self.video_id)
+        playability_status = innertube_response['playabilityStatus'].get('status', None)
+        # If we still can't access the video, raise an exception
+        if playability_status != 'OK':
+            if playability_status == 'UNPLAYABLE':
+                raise exceptions.AgeCheckRequiredAccountError(self.video_id)
+            else:
+                raise exceptions.AgeCheckRequiredError(self.video_id)
+        self._vid_info = innertube_response
+    @property
+    def caption_tracks(self) -> List[pytubefix.Caption]:
+        """Get a list of :class:`Caption <Caption>`.
+        :rtype: List[Caption]
+        """
+        innertube_response = InnerTube(
+            client='WEB' if not self.use_oauth else self.client,
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache,
+            token_file=self.token_file,
+            oauth_verifier=self.oauth_verifier,
+            use_po_token=self.use_po_token,
+            po_token_verifier=self.po_token_verifier
+        ).player(self.video_id)
+        raw_tracks = (
+            innertube_response.get("captions", {})
+            .get("playerCaptionsTracklistRenderer", {})
+            .get("captionTracks", [])
+        )
+        return [pytubefix.Caption(track) for track in raw_tracks]
+    @property
+    def captions(self) -> pytubefix.CaptionQuery:
+        """Interface to query caption tracks.
+        :rtype: :class:`CaptionQuery <CaptionQuery>`.
+        """
+        return pytubefix.CaptionQuery(self.caption_tracks)
+    @property
+    def chapters(self) -> List[pytubefix.Chapter]:
+        """Get a list of :class:`Chapter <Chapter>`.
+        :rtype: List[Chapter]
+        """
+        try:
+            chapters_data = []
+            markers_map = self.initial_data['playerOverlays']['playerOverlayRenderer'][
+                'decoratedPlayerBarRenderer']['decoratedPlayerBarRenderer']['playerBar'][
+                'multiMarkersPlayerBarRenderer']['markersMap']
+            for marker in markers_map:
+                if marker['key'].upper() == 'DESCRIPTION_CHAPTERS':
+                    chapters_data = marker['value']['chapters']
+                    break
+        except (KeyError, IndexError):
+            return []
+        result: List[pytubefix.Chapter] = []
+        for i, chapter_data in enumerate(chapters_data):
+            chapter_start = int(
+                chapter_data['chapterRenderer']['timeRangeStartMillis'] / 1000
+            )
+            if i == len(chapters_data) - 1:
+                chapter_end = self.length
+            else:
+                chapter_end = int(
+                    chapters_data[i + 1]['chapterRenderer']['timeRangeStartMillis'] / 1000
+                )
+            result.append(pytubefix.Chapter(chapter_data, chapter_end - chapter_start))
+        return result
+    @property
+    def key_moments(self) -> List[pytubefix.KeyMoment]:
+        """Get a list of :class:`KeyMoment <KeyMoment>`.
+        :rtype: List[KeyMoment]
+        """
+        try:
+            mutations = self.initial_data['frameworkUpdates']['entityBatchUpdate']['mutations']
+            found = False
+            for mutation in mutations:
+                if mutation.get('payload', {}).get('macroMarkersListEntity', {}).get('markersList', {}).get(
+                        'markerType') == "MARKER_TYPE_TIMESTAMPS":
+                    key_moments_data = mutation['payload']['macroMarkersListEntity']['markersList']['markers']
+                    found = True
+                    break
+            if not found:
+                return []
+        except (KeyError, IndexError):
+            return []
+        result: List[pytubefix.KeyMoment] = []
+        for i, key_moment_data in enumerate(key_moments_data):
+            key_moment_start = int(key_moment_data['startMillis']) // 1000
+            if i == len(key_moments_data) - 1:
+                key_moment_end = self.length
+            else:
+                key_moment_end = int(key_moments_data[i + 1]['startMillis']) // 1000
+            result.append(pytubefix.KeyMoment(key_moment_data, key_moment_end - key_moment_start))
+        return result
+    @property
+    def replayed_heatmap(self) -> List[Dict[str, float]]:
+        """Get a list of : `Dict<str, float>`.
+        :rtype: List[Dict[str, float]]
+        """
+        try:
+            mutations = self.initial_data['frameworkUpdates']['entityBatchUpdate']['mutations']
+            found = False
+            for mutation in mutations:
+                if mutation.get('payload', {}).get('macroMarkersListEntity', {}).get('markersList', {}).get(
+                        'markerType') == "MARKER_TYPE_HEATMAP":
+                    heatmaps_data = mutation['payload']['macroMarkersListEntity']['markersList']['markers']
+                    found = True
+                    break
+            if not found:
+                return []
+        except (KeyError, IndexError):
+            return []
+        result: List[Dict[str, float]] = []
+        for heatmap_data in heatmaps_data:
+            heatmap_start = int(heatmap_data['startMillis']) / 1000
+            duration = int(heatmap_data['durationMillis']) / 1000
+            norm_intensity = float(heatmap_data['intensityScoreNormalized'])
+            result.append({
+                "start_seconds": heatmap_start,
+                "duration": duration,
+                "norm_intensity": norm_intensity
+            })
+        return result
+    @property
+    def streams(self) -> StreamQuery:
+        """Interface to query both adaptive (DASH) and progressive streams.
+        :rtype: :class:`StreamQuery <StreamQuery>`.
+        """
+        self.check_availability()
+        return StreamQuery(self.fmt_streams)
+    @property
+    def thumbnail_url(self) -> str:
+        """Get the thumbnail url image.
+        :rtype: str
+        """
+        thumbnail_details = (
+            self.vid_info.get("videoDetails", {})
+            .get("thumbnail", {})
+            .get("thumbnails")
+        )
+        if thumbnail_details:
+            thumbnail_details = thumbnail_details[-1]  # last item has max size
+            return thumbnail_details["url"]
+        return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
+    @property
+    def publish_date(self):
+        """Get the publish date.
+        :rtype: datetime
+        """
+        if self._publish_date:
+            return self._publish_date
+        self._publish_date = extract.publish_date(self.watch_html)
+        return self._publish_date
+    @publish_date.setter
+    def publish_date(self, value):
+        """Sets the publish date."""
+        self._publish_date = value
+    @property
+    def title(self) -> str:
+        """Get the video title.
+        :rtype: str
+        """
+        self._author = self.vid_info.get("videoDetails", {}).get(
+            "author", "unknown"
+        )
+        if self._title:
+            return self._title
+        try:
+            # Some clients may not return the title in the `player` endpoint,
+            # so if it is not found we will look for it in the `next` endpoint
+            if 'title' in self.vid_info['videoDetails']:
+                self._title = self.vid_info['videoDetails']['title']
+                logger.debug('Found title in vid_info')
+            else:
+                if 'singleColumnWatchNextResults' in self.vid_details['contents']:
+                    contents = self.vid_details['contents'][
+                        'singleColumnWatchNextResults'][
+                        'results'][
+                        'results'][
+                        'contents'][0][
+                        'itemSectionRenderer'][
+                        'contents'][0]
+                    if 'videoMetadataRenderer' in contents:
+                        self._title = contents['videoMetadataRenderer']['title']['runs'][0]['text']
+                    else:
+                        # JSON tree for titles in videos available on YouTube music
+                        self._title = contents['musicWatchMetadataRenderer']['title']['simpleText']
+                # The type of video with this structure is not yet known.
+                # First reported in: https://github.com/JuanBindez/pytubefix/issues/351
+                elif 'twoColumnWatchNextResults' in self.vid_details['contents']:
+                    self._title = self.vid_details['contents'][
+                        'twoColumnWatchNextResults'][
+                        'results'][
+                        'results'][
+                        'contents'][0][
+                        'videoPrimaryInfoRenderer'][
+                        'title'][
+                        'runs'][0][
+                        'text']
+                logger.debug('Found title in vid_details')
+        except KeyError as e:
+            # Check_availability will raise the correct exception in most cases
+            #  if it doesn't, ask for a report.
+            self.check_availability()
+            raise exceptions.PytubeFixError(
+                (
+                    f'Exception while accessing title of {self.watch_url}. '
+                    'Please file a bug report at https://github.com/JuanBindez/pytubefix'
+                )
+            ) from e
+        return self._title
+    @title.setter
+    def title(self, value):
+        """Sets the title value."""
+        self._title = value
+    @property
+    def description(self) -> str:
+        """Get the video description.
+        :rtype: str
+        """
+        return self.vid_info.get("videoDetails", {}).get("shortDescription")
+    @property
+    def rating(self) -> float:
+        """Get the video average rating.
+        :rtype: float
+        """
+        return self.vid_info.get("videoDetails", {}).get("averageRating")
+    @property
+    def length(self) -> int:
+        """Get the video length in seconds.
+        :rtype: int
+        """
+        return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
+    @property
+    def views(self) -> int:
+        """Get the number of the times the video has been viewed.
+        :rtype: int
+        """
+        return int(self.vid_info.get("videoDetails", {}).get("viewCount", "0"))
+    @property
+    def author(self) -> str:
+        """Get the video author.
+        :rtype: str
+        """
+        if self._author:
+            return self._author
+        self._author = self.vid_info.get("videoDetails", {}).get(
+            "author", "unknown"
+        )
+        return self._author
+    @author.setter
+    def author(self, value):
+        """Set the video author."""
+        self._author = value
+    @property
+    def keywords(self) -> List[str]:
+        """Get the video keywords.
+        :rtype: List[str]
+        """
+        return self.vid_info.get('videoDetails', {}).get('keywords', [])
+    @property
+    def channel_id(self) -> str:
+        """Get the video poster's channel id.
+        :rtype: str
+        """
+        return self.vid_info.get('videoDetails', {}).get('channelId', None)
+    @property
+    def channel_url(self) -> str:
+        """Construct the channel url for the video's poster from the channel id.
+        :rtype: str
+        """
+        return f'https://www.youtube.com/channel/{self.channel_id}'
+    @property
+    def likes(self):
+        """Get the video likes
+        :rtype: str
+        """
+        try:
+            return self.vid_details[
+                'contents'][
+                'twoColumnWatchNextResults'][
+                'results'][
+                'results'][
+                'contents'][
+                0][
+                'videoPrimaryInfoRenderer'][
+                'videoActions'][
+                'menuRenderer'][
+                'topLevelButtons'][
+                0][
+                'segmentedLikeDislikeButtonViewModel'][
+                'likeCountEntity'][
+                'likeCountIfLikedNumber']
+        except (KeyError, IndexError):
+            return None
+    @property
+    def metadata(self) -> Optional[YouTubeMetadata]:
+        """Get the metadata for the video.
+        :rtype: YouTubeMetadata
+        """
+        if not self._metadata:
+            self._metadata = extract.metadata(
+                self.initial_data)  # Creating the metadata
+        return self._metadata
+    def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]):
+        """Register a download progress callback function post initialization.
+        :param callable func:
+            A callback function that takes ``stream``, ``chunk``,
+             and ``bytes_remaining`` as parameters.
+        :rtype: None
+        """
+        self.stream_monostate.on_progress = func
+    def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]):
+        """Register a download complete callback function post initialization.
+        :param callable func:
+            A callback function that takes ``stream`` and  ``file_path``.
+        :rtype: None
+        """
+        self.stream_monostate.on_complete = func
+    @staticmethod
+    def from_id(video_id: str) -> "YouTube":
+        """Construct a :class:`YouTube <YouTube>` object from a video id.
+        :param str video_id:
+            The video id of the YouTube video.
+        :rtype: :class:`YouTube <YouTube>`
+        """
+        return YouTube(f"https://www.youtube.com/watch?v={video_id}")

pytubefix/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.28 kB). View file

pytubefix/__pycache__/__main__.cpython-311.pyc ADDED Viewed

Binary file (42.6 kB). View file

pytubefix/__pycache__/buffer.cpython-311.pyc ADDED Viewed

Binary file (2.91 kB). View file

pytubefix/__pycache__/captions.cpython-311.pyc ADDED Viewed

Binary file (10.8 kB). View file

pytubefix/__pycache__/chapters.cpython-311.pyc ADDED Viewed

Binary file (3.18 kB). View file

pytubefix/__pycache__/cipher.cpython-311.pyc ADDED Viewed

Binary file (9.97 kB). View file

pytubefix/__pycache__/cli.cpython-311.pyc ADDED Viewed

Binary file (21.2 kB). View file

pytubefix/__pycache__/exceptions.cpython-311.pyc ADDED Viewed

Binary file (18.3 kB). View file

pytubefix/__pycache__/extract.cpython-311.pyc ADDED Viewed

Binary file (25.1 kB). View file

pytubefix/__pycache__/file_system.cpython-311.pyc ADDED Viewed

Binary file (2.54 kB). View file

pytubefix/__pycache__/helpers.cpython-311.pyc ADDED Viewed

Binary file (17.4 kB). View file

pytubefix/__pycache__/info.cpython-311.pyc ADDED Viewed

Binary file (1.54 kB). View file

pytubefix/__pycache__/innertube.cpython-311.pyc ADDED Viewed

Binary file (24.6 kB). View file

pytubefix/__pycache__/itags.cpython-311.pyc ADDED Viewed

Binary file (4.05 kB). View file

pytubefix/__pycache__/jsinterp.cpython-311.pyc ADDED Viewed

Binary file (70.1 kB). View file

pytubefix/__pycache__/keymoments.cpython-311.pyc ADDED Viewed

Binary file (3.19 kB). View file

pytubefix/__pycache__/metadata.cpython-311.pyc ADDED Viewed

Binary file (2.68 kB). View file

pytubefix/__pycache__/monostate.cpython-311.pyc ADDED Viewed

Binary file (1.15 kB). View file

pytubefix/__pycache__/parser.cpython-311.pyc ADDED Viewed

Binary file (6.64 kB). View file

pytubefix/__pycache__/protobuf.cpython-311.pyc ADDED Viewed

Binary file (8.88 kB). View file

pytubefix/__pycache__/query.cpython-311.pyc ADDED Viewed

Binary file (24.1 kB). View file

pytubefix/__pycache__/request.cpython-311.pyc ADDED Viewed

Binary file (10.3 kB). View file

pytubefix/__pycache__/streams.cpython-311.pyc ADDED Viewed

Binary file (28.7 kB). View file

pytubefix/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (323 Bytes). View file

pytubefix/botGuard/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

pytubefix/botGuard/__init__.py ADDED Viewed

File without changes

pytubefix/botGuard/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (211 Bytes). View file

pytubefix/botGuard/__pycache__/bot_guard.cpython-311.pyc ADDED Viewed

Binary file (2.82 kB). View file

pytubefix/botGuard/bot_guard.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import subprocess
+import sys
+import shutil
+from typing import Optional
+PLATFORM = sys.platform
+NODE = 'node' if PLATFORM in ['linux', 'darwin'] else 'node.exe'
+def _find_node_path() -> Optional[str]:
+    """Try multiple ways to find Node.js path."""
+    local_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), f'binaries/{NODE}')
+    if os.path.isfile(local_path):
+        return local_path
+    system_path = shutil.which(NODE)
+    if system_path:
+        return system_path
+    return NODE
+NODE_PATH = _find_node_path()
+VM_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vm/botGuard.js')
+def generate_po_token(visitor_data: str) -> str:
+    """
+    Run nodejs to generate poToken through botGuard.
+    Raises:
+        RuntimeError: If Node.js is not available
+    """
+    try:
+        result = subprocess.check_output(
+            [NODE_PATH, VM_PATH, visitor_data],
+            stderr=subprocess.PIPE
+        ).decode()
+        return result.replace("\n", "")
+    except FileNotFoundError as e:
+        raise RuntimeError(
+            f"Node.js is required but not found. Tried path: {NODE_PATH}\n"
+            "Please install Node.js or ensure it's in your PATH."
+        ) from e
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(
+            f"Failed to execute botGuard.js: {e.stderr.decode().strip()}"
+        ) from e

pytubefix/botGuard/vm/botGuard.js ADDED Viewed

The diff for this file is too large to render. See raw diff

pytubefix/buffer.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""This module implements a `Buffer` class for handling in-memory data storage, downloading streams,
+and redirecting content to standard output (stdout)."""
+import sys
+import io
+class Buffer:
+    def __init__(self):
+        """
+        Initializes the in-memory buffer to store data.
+        """
+        self.buffer = io.BytesIO()
+    def download_in_buffer(self, source):
+        """
+        Downloads data directly into the buffer. Accepts objects with the `stream_to_buffer`
+        method or strings.
+        Args:
+            source: Object or data to be written to the buffer.
+        """
+        if hasattr(source, 'stream_to_buffer') and callable(source.stream_to_buffer):
+            source.stream_to_buffer(self.buffer)
+        elif isinstance(source, str):
+            self.buffer.write(source.encode('utf-8'))
+        else:
+            raise TypeError("The provided object is not compatible for downloading into the buffer.")
+    def redirect_to_stdout(self):
+        """
+        Redirects the buffer's content to stdout.
+        """
+        self.buffer.seek(0)  # Go back to the start of the buffer
+        sys.stdout.buffer.write(self.buffer.read())
+    def read(self):
+        """
+        Reads the buffer's content.
+        """
+        self.buffer.seek(0)
+        return self.buffer.read()
+    def clear(self):
+        """
+        Clears the buffer for reuse.
+        """
+        self.buffer = io.BytesIO()

pytubefix/captions.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import math
+import os
+import time
+import json
+import re
+import xml.etree.ElementTree as ElementTree
+from html import unescape
+from typing import Dict, Optional
+from pytubefix import request
+from pytubefix.helpers import safe_filename, target_directory
+class Caption:
+    """Container for caption tracks."""
+    def __init__(self, caption_track: Dict):
+        """Construct a :class:`Caption <Caption>`.
+        :param dict caption_track:
+            Caption track data extracted from ``watch_html``.
+        """
+        self.url = caption_track.get("baseUrl")
+        # Certain videos have runs instead of simpleText
+        #  this handles that edge case
+        name_dict = caption_track['name']
+        if 'simpleText' in name_dict:
+            self.name = name_dict['simpleText']
+        else:
+            for el in name_dict['runs']:
+                if 'text' in el:
+                    self.name = el['text']
+        # Use "vssId" instead of "languageCode", fix issue #779
+        self.code = caption_track["vssId"]
+        # Remove preceding '.' for backwards compatibility, e.g.:
+        # English -> vssId: .en, languageCode: en
+        # English (auto-generated) -> vssId: a.en, languageCode: en
+        self.code = self.code.strip('.')
+    @property
+    def xml_captions(self) -> str:
+        """Download the xml caption tracks."""
+        return request.get(self.url)
+    @property
+    def json_captions(self) -> dict:
+        """Download and parse the json caption tracks."""
+        if 'ftm=' in self.url:
+            json_captions_url = self.url.replace('fmt=srv3', 'fmt=json3')
+        else:
+            json_captions_url = f'{self.url}&fmt=json3'
+        text = request.get(json_captions_url)
+        parsed = json.loads(text)
+        assert parsed['wireMagic'] == 'pb3', 'Unexpected captions format'
+        return parsed
+    def generate_srt_captions(self) -> str:
+        """Generate "SubRip Subtitle" captions.
+        Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
+        recompiles them into the "SubRip Subtitle" format.
+        """
+        return self.xml_caption_to_srt(self.xml_captions)
+    def generate_txt_captions(self) -> str:
+        """Generate Text captions.
+        Takes the "SubRip Subtitle" format captions and converts them into text
+        """
+        srt_captions = self.generate_srt_captions()
+        lines = srt_captions.splitlines()
+        text = ''
+        for line in lines:
+            if re.search('^[0-9]+$', line) is None and \
+               re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and \
+               re.search('^$', line) is None:
+                text += ' ' + line.strip()
+            text = text.lstrip()
+        return text.strip()
+    def save_captions(self, filename: str):
+        """Generate and save "SubRip Subtitle" captions to a text file.
+        Takes the xml captions from :meth:`~pytubefix.Caption.xml_captions` and
+        recompiles them into the "SubRip Subtitle" format and saves it to a text file.
+        :param filename: The name of the file to save the captions.
+        """
+        srt_captions = self.xml_caption_to_srt(self.xml_captions)
+        with open(filename, 'w', encoding='utf-8') as file:
+            file.write(srt_captions)
+    @staticmethod
+    def float_to_srt_time_format(d: float) -> str:
+        """Convert decimal durations into proper srt format.
+        :rtype: str
+        :returns:
+            SubRip Subtitle (str) formatted time duration.
+        float_to_srt_time_format(3.89) -> '00:00:03,890'
+        """
+        fraction, whole = math.modf(d)
+        time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
+        ms = f"{fraction:.3f}".replace("0.", "")
+        return time_fmt + ms
+    def xml_caption_to_srt(self, xml_captions: str) -> str:
+        """Convert xml caption tracks to "SubRip Subtitle (srt)".
+        :param str xml_captions:
+            XML formatted caption tracks.
+        """
+        segments = []
+        root = ElementTree.fromstring(xml_captions)
+        i = 0
+        for child in list(root.iter(root.tag))[0]:
+            if child.tag in ['p', 'text']:
+                caption = ''
+                # I think it will be faster than `len(list(child)) == 0`
+                if not list(child):
+                    # instead of 'continue'
+                    caption = child.text
+                for s in list(child):
+                    if s.tag == 's':
+                        caption += f' {s.text}'
+                if not caption:
+                    continue
+                caption = unescape(caption.replace("\n", " ").replace("  ", " "),)
+                try:
+                    if "d" in child.attrib:
+                        duration = float(child.attrib["d"]) / 1000.0
+                    else:
+                        duration = float(child.attrib["dur"])
+                except KeyError:
+                    duration = 0.0
+                if "t" in child.attrib:
+                    start = float(child.attrib["t"]) / 1000.0
+                else:
+                    start = float(child.attrib["start"])
+                end = start + duration
+                sequence_number = i + 1  # convert from 0-indexed to 1.
+                line = "{seq}\n{start} --> {end}\n{text}\n".format(
+                    seq=sequence_number,
+                    start=self.float_to_srt_time_format(start),
+                    end=self.float_to_srt_time_format(end),
+                    text=caption,
+                )
+                segments.append(line)
+                i += 1
+        return "\n".join(segments).strip()
+    def download(
+        self,
+        title: str,
+        srt: bool = True,
+        output_path: Optional[str] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> str:
+        """Write the media stream to disk.
+        :param title:
+            Output filename (stem only) for writing media file.
+            If one is not specified, the default filename is used.
+        :type title: str
+        :param srt:
+            Set to True to download srt, false to download xml. Defaults to True.
+        :type srt bool
+        :param output_path:
+            (optional) Output path for writing media file. If one is not
+            specified, defaults to the current working directory.
+        :type output_path: str or None
+        :param filename_prefix:
+            (optional) A string that will be prepended to the filename.
+            For example a number in a playlist or the name of a series.
+            If one is not specified, nothing will be prepended
+            This is separate from filename so you can use the default
+            filename but still add a prefix.
+        :type filename_prefix: str or None
+        :rtype: str
+        """
+        if title.endswith(".srt") or title.endswith(".xml"):
+            filename = ".".join(title.split(".")[:-1])
+        else:
+            filename = title
+        if filename_prefix:
+            filename = f"{safe_filename(filename_prefix)}{filename}"
+        filename = safe_filename(filename)
+        filename += f" ({self.code})"
+        filename += ".srt" if srt else ".xml"
+        file_path = os.path.join(target_directory(output_path), filename)
+        with open(file_path, "w", encoding="utf-8") as file_handle:
+            if srt:
+                file_handle.write(self.generate_srt_captions())
+            else:
+                file_handle.write(self.xml_captions)
+        return file_path
+    def __repr__(self):
+        """Printable object representation."""
+        return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)

pytubefix/chapters.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Native python imports
+from datetime import timedelta
+from typing import List
+class ChapterThumbnail:
+    """Container for chapter thumbnails."""
+    def __init__(self, width: int, height: int, url: str):
+        self.width = width
+        self.height = height
+        self.url = url
+    def __repr__(self):
+        return f'<pytubefix.chapters.ChapterThumbnail: width={self.width}, height={self.height}, url={self.url}>'
+class Chapter:
+    """Container for chapters tracks."""
+    title: str
+    start_seconds: int
+    duration: int  # in seconds
+    thumbnails: List[ChapterThumbnail]
+    def __init__(self, chapter_data: dict, duration: int):
+        data = chapter_data['chapterRenderer']
+        self.title = data['title']['simpleText']
+        self.start_seconds = int(data['timeRangeStartMillis'] / 1000)
+        self.duration = duration
+        thumbnails_data = data.get('thumbnail', {}).get('thumbnails', [])
+        self.thumbnails = [
+            ChapterThumbnail(
+                width=thumb['width'],
+                height=thumb['height'],
+                url=thumb['url']
+            )
+            for thumb in thumbnails_data
+        ]
+    @property
+    def start_label(self) -> str:
+        return str(timedelta(seconds=self.start_seconds))
+    def __repr__(self):
+        return f'<Chapter: {self.title} | {self.start_label}>'

pytubefix/cipher.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""
+This module contains all the logic needed to find the signature functions.
+YouTube's strategy to restrict downloading videos is to send a ciphered version
+of the signature to the client, along with the decryption algorithm obfuscated
+in JavaScript. For the clients to play the videos, JavaScript must take the
+ciphered version, cycle it through a series of "transform functions," and then
+signs the media URL with the output.
+This module is responsible for (1) finding these "transformations
+functions" (2) sends them to be interpreted by jsinterp.py
+"""
+import logging
+import re
+from pytubefix.exceptions import RegexMatchError, InterpretationError
+from pytubefix.jsinterp import JSInterpreter, extract_player_js_global_var
+logger = logging.getLogger(__name__)
+class Cipher:
+    def __init__(self, js: str, js_url: str):
+        self.js_url = js_url
+        self.signature_function_name = get_initial_function_name(js, js_url)
+        self.throttling_function_name = get_throttling_function_name(js, js_url)
+        self.calculated_n = None
+        self.js_interpreter = JSInterpreter(js)
+    def get_throttling(self, n: str):
+        """Interpret the function that throttles download speed.
+        :param str n:
+            Contains the parameter that must be transformed.
+        :rtype: str
+        :returns:
+            Returns the transformed value "n".
+        """
+        try:
+            return self.js_interpreter.call_function(self.throttling_function_name, n)
+        except:
+            raise InterpretationError(js_url=self.js_url)
+    def get_signature(self, ciphered_signature: str) -> str:
+        """interprets the function that signs the streams.
+            The lack of this signature generates the 403 forbidden error.
+        :param str ciphered_signature:
+           Contains the signature that must be transformed.
+        :rtype: str
+        :returns:
+           Returns the correct stream signature.
+        """
+        try:
+            return self.js_interpreter.call_function(self.signature_function_name, ciphered_signature)
+        except:
+            raise InterpretationError(js_url=self.js_url)
+def get_initial_function_name(js: str, js_url: str) -> str:
+    """Extract the name of the function responsible for computing the signature.
+    :param str js:
+        The contents of the base.js asset file.
+    :param str js_url:
+        Full base.js url
+    :rtype: str
+    :returns:
+        Function name from regex match
+    """
+    function_patterns = [
+        r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*[a-zA-Z0-9_\$\"\[\]]+\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*[a-zA-Z0-9_\$\"\[\]]+\s*\)',
+        r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
+        r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
+        # Old patterns
+        r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
+        # Obsolete patterns
+        r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+        r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
+    ]
+    logger.debug("looking for signature cipher name")
+    for pattern in function_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(js)
+        if function_match:
+            sig = function_match.group('sig')
+            logger.debug("finished regex search, matched: %s", pattern)
+            logger.debug(f'Signature cipher function name: {sig}')
+            return sig
+    raise RegexMatchError(
+        caller="get_initial_function_name", pattern=f"multiple in {js_url}"
+    )
+def get_throttling_function_name(js: str, js_url: str) -> str:
+    """Extract the name of the function that computes the throttling parameter.
+    :param str js:
+        The contents of the base.js asset file.
+    :param str js_url:
+        Full base.js url
+    :rtype: str
+    :returns:
+        The name of the function used to compute the throttling parameter.
+    """
+    logger.debug("looking for nsig name")
+    try:
+        # Extracts the function name based on the global array
+        global_obj, varname, code = extract_player_js_global_var(js)
+        if global_obj and varname and code:
+            logger.debug(f"Global Obj name is: {varname}")
+            global_obj = JSInterpreter(js).interpret_expression(code, {}, 100)
+            logger.debug("Successfully interpreted global object")
+            for k, v in enumerate(global_obj):
+                if v.endswith('_w8_'):
+                    logger.debug(f"_w8_ found in index {k}")
+                    pattern = r'''(?xs)
+                            [;\n](?:
+                                (?P<f>function\s+)|
+                                (?:var\s+)?
+                            )(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
+                            \((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
+                            (?:(?!\};(?![\]\)])).)+
+                            \}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
+                            \{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
+                        '''  % (re.escape(varname), k)
+                    func_name = re.search(pattern, js)
+                    if func_name:
+                        n_func = func_name.group("funcname")
+                        logger.debug(f"Nfunc name is: {n_func}")
+                        return n_func
+    except:
+        pass
+    pattern = r'''(?x)
+            (?:
+                \.get\("n"\)\)&&\(b=|
+                (?:
+                    b=String\.fromCharCode\(110\)|
+                    (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
+                )
+                (?:
+                    ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
+                    (?:
+                        get\(b\)|
+                        [a-zA-Z0-9_$]+\[b\]\|\|null
+                    )\)&&\(c=|
+                \b(?P<var>[a-zA-Z0-9_$]+)=
+            )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
+            (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))'''
+    logger.debug('Finding throttling function name')
+    regex = re.compile(pattern)
+    function_match = regex.search(js)
+    if function_match:
+        logger.debug("finished regex search, matched: %s", pattern)
+        func = function_match.group('nfunc')
+        idx = function_match.group('idx')
+        logger.debug(f'func is: {func}')
+        logger.debug(f'idx is: {idx}')
+        logger.debug('Checking throttling function name')
+        if idx:
+            n_func_check_pattern = fr'var {re.escape(func)}\s*=\s*\[(.+?)];'
+            n_func_found = re.search(n_func_check_pattern, js)
+            if n_func_found:
+                throttling_function = n_func_found.group(1)
+                logger.debug(f'Throttling function name is: {throttling_function}')
+                return throttling_function
+            raise RegexMatchError(
+                caller="get_throttling_function_name", pattern=f"{n_func_check_pattern} in {js_url}"
+            )
+    raise RegexMatchError(
+        caller="get_throttling_function_name", pattern=f"{pattern} in {js_url}"
+    )

pytubefix/cli.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import random
+import argparse
+import gzip
+import json
+import logging
+import os
+import shutil
+import sys
+import datetime as dt
+import subprocess  # nosec
+from typing import List, Optional
+import pytubefix.exceptions as exceptions
+from pytubefix import __version__
+from pytubefix import CaptionQuery, Playlist, Stream
+from pytubefix.helpers import safe_filename, setup_logger
+from pytubefix import YouTube
+logger = logging.getLogger(__name__)
+def build_playback_report(youtube: YouTube) -> None:
+    """Serialize the request data to json for offline debugging.
+    :param YouTube youtube:
+        A YouTube object.
+    """
+    ts = int(dt.datetime.now(dt.timezone.utc).timestamp())
+    fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
+    js = youtube.js
+    watch_html = youtube.watch_html
+    vid_info = youtube.vid_info
+    with gzip.open(fp, "wb") as fh:
+        fh.write(
+            json.dumps(
+                {
+                    "url": youtube.watch_url,
+                    "js": js,
+                    "watch_html": watch_html,
+                    "video_info": vid_info,
+                }
+            ).encode("utf8"),
+        )
+def display_progress_bar(bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55) -> None:
+    """Display a simple, pretty progress bar.
+    Example:
+    ~~~~~~~~
+    PSY - GANGNAM STYLE(강남스타일) MV.mp4
+    ↳ |███████████████████████████████████████| 100.0%
+    :param int bytes_received:
+        The delta between the total file size (bytes) and bytes already
+        written to disk.
+    :param int filesize:
+        File size of the media stream in bytes.
+    :param str ch:
+        Character to use for presenting progress segment.
+    :param float scale:
+        Scale multiplier to reduce progress bar size.
+    """
+    columns = shutil.get_terminal_size().columns
+    max_width = int(columns * scale)
+    filled = int(round(max_width * bytes_received / float(filesize)))
+    remaining = max_width - filled
+    progress_bar = ch * filled + " " * remaining
+    percent = round(100.0 * bytes_received / float(filesize), 1)
+    text = f" ↳ |{progress_bar}| {percent}%\r"
+    sys.stdout.write(text)
+    sys.stdout.flush()
+def on_progress(stream: Stream, chunk: bytes, bytes_remaining: int) -> None:  # pylint: disable=W0613
+    filesize = stream.filesize
+    bytes_received = filesize - bytes_remaining
+    display_progress_bar(bytes_received, filesize)
+def _download(stream: Stream, target: Optional[str] = None, filename: Optional[str] = None) -> None:
+    filesize_megabytes = stream.filesize // 1048576
+    print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
+    file_path = stream.get_file_path(filename=filename, output_path=target)
+    if stream.exists_at_path(file_path):
+        print(f"Already downloaded at:\n{file_path}")
+        return
+    stream.download(output_path=target, filename=filename)
+    sys.stdout.write("\n")
+def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
+    """
+    Given a base name, the file format, and the target directory, will generate
+    a filename unique for that directory and file format.
+    :param str base:
+        The given base-name.
+    :param str subtype:
+        The filetype of the video which will be downloaded.
+    :param str media_type:
+        The media_type of the file, ie. "audio" or "video"
+    :param Path target:
+        Target directory for download.
+    """
+    counter = 0
+    while True:
+        file_name = f"{base}_{media_type}_{counter}"
+        file_path = os.path.join(target, f"{file_name}.{subtype}")
+        if not os.path.exists(file_path):
+            return file_name
+        counter += 1
+def ffmpeg_process(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
+    """
+    Decides the correct video stream to download, then calls _ffmpeg_downloader.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    youtube.register_on_progress_callback(on_progress)
+    target = target or os.getcwd()
+    if resolution == None or resolution == "best":
+        highest_quality_stream = youtube.streams.filter(progressive=False).order_by("resolution").last()
+        mp4_stream = youtube.streams.filter(progressive=False, subtype="mp4").order_by("resolution").last()
+        if highest_quality_stream.resolution == mp4_stream.resolution:
+            video_stream = mp4_stream
+        else:
+            video_stream = highest_quality_stream
+    else:
+        video_stream = youtube.streams.filter(progressive=False, resolution=resolution).first()
+    if not video_stream:
+        print(f"No streams found for resolution {resolution}")
+        return
+    audio_stream = youtube.streams.filter(progressive=False).order_by("abr").last()
+    video_file_name = _unique_name(youtube.title, "mp4", "video", target)
+    audio_file_name = _unique_name(youtube.title, "mp4", "audio", target)
+    video_path = video_stream.get_file_path(filename=video_file_name, output_path=target)
+    audio_path = audio_stream.get_file_path(filename=audio_file_name, output_path=target)
+    if os.path.exists(video_path) and os.path.exists(audio_path):
+        print("Already downloaded both video and audio.")
+        return
+    _download(video_stream, target=target, filename=video_file_name)
+    _download(audio_stream, target=target, filename=audio_file_name)
+    # Construct the command to run ffmpeg
+    command = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-strict", "experimental", f"{target}/{youtube.title}.mp4"]
+    # Execute the command
+    subprocess.run(command)
+def download_by_resolution(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
+    """Download a stream by the specified resolution.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        The desired resolution of the stream.
+    :param Optional[str] target:
+        The target directory for the download.
+    """
+    print(f"Downloading {resolution}...")
+    stream = youtube.streams.filter(resolution=resolution).first()
+    if stream is None:
+        print(f"No stream found for resolution {resolution}")
+    else:
+        _download(stream, target)
+def download_audio(youtube: YouTube, filetype: Optional[str] = "mp4", target: Optional[str] = None) -> None:
+    """Download audio stream of a YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param Optional[str] filetype:
+        The filetype for the audio. Defaults to "mp4".
+    :param Optional[str] target:
+        The target directory for the download.
+    """
+    print("Downloading audio...")
+    stream = youtube.streams.filter(progressive=False, subtype=filetype).order_by("abr").last()
+    if stream is None:
+        print(f"No audio stream found for filetype {filetype}")
+    else:
+        _download(stream, target)
+def download_highest_resolution_progressive(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
+    """Download a YouTube video stream at the highest resolution.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        The resolution of the stream.
+    :param Optional[str] target:
+        The target directory for the download.
+    """
+    print("Downloading highest resolution progressive stream...")
+    stream = youtube.streams.filter(progressive=True).order_by("resolution").last()
+    if stream is None:
+        print("No progressive stream found.")
+    else:
+        _download(stream, target)
+def download_by_itag(youtube: YouTube, itag: int, target: Optional[str] = None) -> None:
+    """Download a YouTube stream by its itag.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param int itag:
+        The itag of the desired stream.
+    :param Optional[str] target:
+        The target directory for the download.
+    """
+    stream = youtube.streams.get_by_itag(itag)
+    if stream is None:
+        print(f"No stream found with itag {itag}.")
+    else:
+        print(f"Downloading stream with itag {itag}...")
+        _download(stream, target)
+def download_caption(youtube: YouTube, lang_code: str, target: Optional[str] = None) -> None:
+    """Download captions for a given YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str lang_code:
+        The language code for the desired captions.
+    :param Optional[str] target:
+        The target directory for the downloaded captions.
+    """
+    print(f"Downloading captions for language: {lang_code}...")
+    caption = youtube.captions.get_by_language_code(lang_code)
+    if caption is None:
+        print(f"No captions found for language code: {lang_code}.")
+    else:
+        caption.download(target)
+def _print_available_captions(captions: List[CaptionQuery]) -> None:
+    """Print available captions for a YouTube video.
+    :param List[CaptionQuery] captions:
+        The list of available captions.
+    """
+    print("Available captions:")
+    for caption in captions:
+        print(f" - {caption.language_code}: {caption.name}")
+def display_streams(youtube: YouTube) -> None:
+    """Display available streams for the given YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    """
+    print(f"Available streams for {youtube.title}:")
+    for stream in youtube.streams:
+        print(f" - {stream}")
+def _parse_args(parser: argparse.ArgumentParser, args: Optional[List] = None) -> argparse.Namespace:
+    parser.add_argument("url", help="The YouTube /watch or /playlist url", nargs="?")
+    parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}")
+    parser.add_argument("--itag", type=int, help="The itag for the desired stream")
+    parser.add_argument("-r", "--resolution", type=str, help="The resolution for the desired stream")
+    parser.add_argument("-l", "--list", action="store_true", help="The list option causes pytubefix cli to return a list of streams available to download")
+    parser.add_argument("--oauth", action="store_true", help="use oauth token")
+    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="Set logger output to verbose output.")
+    parser.add_argument("--logfile", action="store", help="logging debug and error messages into a log file")
+    parser.add_argument("--build-playback-report", action="store_true", help="Save the html and js to disk")
+    parser.add_argument("-c", "--caption-code", type=str, help="Download srt captions for given language code. Prints available language codes if no argument given")
+    parser.add_argument('-lc', '--list-captions', action='store_true', help="List available caption codes for a video")
+    parser.add_argument("-t", "--target", help="The output directory for the downloaded stream. Default is current working directory")
+    parser.add_argument("-a", "--audio", const="mp4", nargs="?", help="Download the audio for a given URL at the highest bitrate available. Defaults to mp4 format if none is specified")
+    parser.add_argument("-f", "--ffmpeg", const="best", nargs="?", help="Downloads the audio and video stream for resolution provided. If no resolution is provided, downloads the best resolution. Runs the command line program ffmpeg to combine the audio and video")
+    return parser.parse_args(args)
+def _perform_args_on_youtube(youtube: YouTube, args: argparse.Namespace) -> None:
+    if len(sys.argv) == 2:
+        download_highest_resolution_progressive(youtube=youtube, resolution="highest", target=args.target)
+    if args.list_captions:
+        _print_available_captions(youtube.captions)
+    if args.list:
+        display_streams(youtube)
+    if args.itag:
+        download_by_itag(youtube=youtube, itag=args.itag, target=args.target)
+    elif args.caption_code:
+        download_caption(youtube=youtube, lang_code=args.caption_code, target=args.target)
+    elif args.resolution:
+        download_by_resolution(youtube=youtube, resolution=args.resolution, target=args.target)
+    elif args.audio:
+        download_audio(youtube=youtube, filetype=args.audio, target=args.target)
+    if args.ffmpeg:
+        ffmpeg_process(youtube=youtube, resolution=args.resolution, target=args.target)
+    if args.build_playback_report:
+        build_playback_report(youtube)
+    oauth = False
+    cache = False
+    if args.oauth:
+        oauth = True
+        cache = True
+        print("Loading video...")
+        youtube = YouTube(args.url, use_oauth=oauth, allow_oauth_cache=cache)
+        download_highest_resolution_progressive(youtube=youtube, resolution="highest", target=args.target)
+def main():
+    parser = argparse.ArgumentParser(description=main.__doc__)
+    args = _parse_args(parser)
+    log_filename = args.logfile if args.verbose else None
+    setup_logger(logging.DEBUG if args.verbose else logging.INFO, log_filename=log_filename)
+    if args.verbose:
+        logger.debug(f'Pytubefix version: {__version__}')
+    if not args.url or "youtu" not in args.url:
+        parser.print_help()
+        sys.exit(0)
+    if "/playlist" in args.url:
+        print("Loading playlist...")
+        playlist = Playlist(args.url)
+        args.target = args.target or safe_filename(playlist.title)
+        for youtube_video in playlist.videos:
+            try:
+                _perform_args_on_youtube(youtube_video, args)
+            except exceptions.PytubeFixError as e:
+                print(f"There was an error with video: {youtube_video}")
+                print(e)
+    else:
+        print("Loading video...")
+        youtube = YouTube(args.url)
+        _perform_args_on_youtube(youtube, args)
+if __name__ == "__main__":
+    main()

pytubefix/contrib/__init__.py ADDED Viewed

File without changes

pytubefix/contrib/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (210 Bytes). View file

pytubefix/contrib/__pycache__/channel.cpython-311.pyc ADDED Viewed

Binary file (29.9 kB). View file

pytubefix/contrib/__pycache__/playlist.cpython-311.pyc ADDED Viewed

Binary file (23.4 kB). View file

pytubefix/contrib/__pycache__/search.cpython-311.pyc ADDED Viewed

Binary file (22 kB). View file

pytubefix/contrib/channel.py ADDED Viewed

	@@ -0,0 +1,655 @@

+# -*- coding: utf-8 -*-
+"""Module for interacting with a user's youtube channel."""
+import json
+import logging
+from typing import Dict, List, Optional, Tuple, Iterable, Any, Callable
+from pytubefix import extract, YouTube, Playlist, request
+from pytubefix.helpers import cache, uniqueify, DeferredGeneratorList
+from pytubefix.innertube import InnerTube
+logger = logging.getLogger(__name__)
+class Channel(Playlist):
+    def __init__(
+            self,
+            url: str,
+            client: str = InnerTube().client_name,
+            proxies: Optional[Dict[str, str]] = None,
+            use_oauth: bool = False,
+            allow_oauth_cache: bool = True,
+            token_file: Optional[str] = None,
+            oauth_verifier: Optional[Callable[[str, str], None]] = None,
+            use_po_token: Optional[bool] = False,
+            po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
+    ):
+        """Construct a :class:`Channel <Channel>`.
+        :param str url:
+            A valid YouTube channel URL.
+         :param dict proxies:
+            (Optional) A dict mapping protocol to proxy address which will be used by pytube.
+        :param bool use_oauth:
+            (Optional) Prompt the user to authenticate to YouTube.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param bool allow_oauth_cache:
+            (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
+            These tokens are only generated if use_oauth is set to True as well.
+        :param str token_file:
+            (Optional) Path to the file where the OAuth tokens will be stored.
+            Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
+        :param Callable oauth_verifier:
+            (optional) Verifier to be used for getting OAuth tokens.
+            Verification URL and User-Code will be passed to it respectively.
+            (if passed, else default verifier will be used)
+        :param bool use_po_token:
+            (Optional) Prompt the user to use the proof of origin token on YouTube.
+            It must be sent with the API along with the linked visitorData and
+            then passed as a `po_token` query parameter to affected clients.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param Callable po_token_verifier:
+            (Optional) Verified used to obtain the visitorData and po_token.
+            The verifier will return the visitorData and po_token respectively.
+            (if passed, else default verifier will be used)
+        """
+        super().__init__(url, proxies)
+        self.channel_uri = extract.channel_name(url)
+        self.client = client
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
+        self.token_file = token_file
+        self.oauth_verifier = oauth_verifier
+        self.use_po_token = use_po_token
+        self.po_token_verifier = po_token_verifier
+        self.channel_url = (
+            f"https://www.youtube.com{self.channel_uri}"
+        )
+        self.featured_url = self.channel_url + '/featured'
+        self.videos_url = self.channel_url + '/videos'
+        self.shorts_url = self.channel_url + '/shorts'
+        self.live_url = self.channel_url + '/streams'
+        self.releases_url = self.channel_url + '/releases'
+        self.playlists_url = self.channel_url + '/playlists'
+        self.community_url = self.channel_url + '/community'
+        self.featured_channels_url = self.channel_url + '/channels'
+        self.about_url = self.channel_url + '/about'
+        self._html_url = self.videos_url  # Videos will be preferred over short videos and live
+        # Possible future additions
+        self._playlists_html = None
+        self._community_html = None
+        self._featured_channels_html = None
+        self._about_html = None
+    def __repr__(self) -> str:
+        return f'<pytubefix.contrib.Channel object: channelUri={self.channel_uri}>'
+    @property
+    def channel_name(self):
+        """Get the name of the YouTube channel.
+        :rtype: str
+        """
+        return self.initial_data['metadata']['channelMetadataRenderer']['title']
+    @property
+    def channel_id(self):
+        """Get the ID of the YouTube channel.
+        This will return the underlying ID, not the vanity URL.
+        :rtype: str
+        """
+        return self.initial_data['metadata']['channelMetadataRenderer']['externalId']
+    @property
+    def vanity_url(self):
+        """Get the vanity URL of the YouTube channel.
+        Returns None if it doesn't exist.
+        :rtype: str
+        """
+        return self.initial_data['metadata']['channelMetadataRenderer'].get('vanityChannelUrl', None)  # noqa:E501
+    @property
+    def html_url(self):
+        """Get the html url.
+        :rtype: str
+        """
+        return self._html_url
+    @html_url.setter
+    def html_url(self, value):
+        """Set the html url and clear the cache."""
+        if self._html_url != value:
+            self._html = None
+            self._initial_data = None
+            self.__class__.video_urls.fget.cache_clear()
+            self._html_url = value
+    @property
+    def html(self):
+        """Get the html for the /videos, /shorts or /streams page.
+        :rtype: str
+        """
+        if self._html:
+            return self._html
+        self._html = request.get(self.html_url)
+        return self._html
+    @property
+    def playlists_html(self):
+        """Get the html for the /playlists page.
+        Currently unused for any functionality.
+        :rtype: str
+        """
+        if self._playlists_html:
+            return self._playlists_html
+        else:
+            self._playlists_html = request.get(self.playlists_url)
+            return self._playlists_html
+    @property
+    def community_html(self):
+        """Get the html for the /community page.
+        Currently unused for any functionality.
+        :rtype: str
+        """
+        if self._community_html:
+            return self._community_html
+        else:
+            self._community_html = request.get(self.community_url)
+            return self._community_html
+    @property
+    def featured_channels_html(self):
+        """Get the html for the /channels page.
+        Currently unused for any functionality.
+        :rtype: str
+        """
+        if self._featured_channels_html:
+            return self._featured_channels_html
+        else:
+            self._featured_channels_html = request.get(self.featured_channels_url)
+            return self._featured_channels_html
+    @property
+    def about_html(self):
+        """Get the html for the /about page.
+        Currently unused for any functionality.
+        :rtype: str
+        """
+        if self._about_html:
+            return self._about_html
+        else:
+            self._about_html = request.get(self.about_url)
+            return self._about_html
+    def url_generator(self):
+        """Generator that yields video URLs.
+        :Yields: Video URLs
+        """
+        for page in self._paginate(self.html):
+            for obj in page:
+                yield obj
+    def videos_generator(self):
+        for url in self.video_urls:
+            yield url
+    def _get_active_tab(self, initial_data) -> dict:
+        """ Receive the raw json and return the active page.
+        :returns: Active page json object.
+        """
+        active_tab = {}
+        # Possible tabs: Home, Videos, Shorts, Live, Releases, Playlists, Community, Channels, About
+        # We check each page for the URL that is active.
+        for tab in initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
+            if 'tabRenderer' in tab:
+                tab_url = tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
+                if tab_url.rsplit('/', maxsplit=1)[-1] == self.html_url.rsplit('/', maxsplit=1)[-1]:
+                    active_tab = tab
+                    break
+        return active_tab
+    def _extract_obj_from_home(self) -> list:
+        """ Extract items from the channel home page.
+        :returns: list of home page objects.
+        """
+        items = []
+        try:
+            contents = self._get_active_tab(self.initial_data)['tabRenderer']['content'][
+                'sectionListRenderer']['contents']
+            for obj in contents:
+                item_section_renderer = obj['itemSectionRenderer']['contents'][0]
+                # Skip the presentation videos for non-subscribers
+                if 'channelVideoPlayerRenderer' in item_section_renderer:
+                    continue
+                # Skip presentation videos for subscribers
+                if 'channelFeaturedContentRenderer' in item_section_renderer:
+                    continue
+                # skip the list with channel members
+                if 'recognitionShelfRenderer' in item_section_renderer:
+                    continue
+                # Get the horizontal shorts
+                if 'reelShelfRenderer' in item_section_renderer:
+                    for x in item_section_renderer['reelShelfRenderer']['items']:
+                        items.append(x)
+                # Get videos, playlist and horizontal channels
+                if 'shelfRenderer' in item_section_renderer:
+                    # We only take items that are horizontal
+                    if 'horizontalListRenderer' in item_section_renderer['shelfRenderer']['content']:
+                        # We iterate over each item in the array, which could be videos, playlist or channel
+                        for x in item_section_renderer['shelfRenderer']['content']['horizontalListRenderer']['items']:
+                            items.append(x)
+        except (KeyError, IndexError, TypeError):
+            return []
+        # Extract object from each corresponding url
+        items_obj = self._extract_ids(items)
+        # remove duplicates
+        return uniqueify(items_obj)
+    def _extract_videos(self, raw_json: str, context: Optional[Any] = None) -> Tuple[List[str], Optional[str]]:
+        """Extracts videos from a raw json page
+        :param str raw_json: Input json extracted from the page or the last
+            server response
+        :rtype: Tuple[List[str], Optional[str]]
+        :returns: Tuple containing a list of up to 100 video watch ids and
+            a continuation token, if more videos are available
+        """
+        if isinstance(raw_json, dict):
+            initial_data = raw_json
+        else:
+            initial_data = json.loads(raw_json)
+        # this is the json tree structure, if the json was extracted from
+        # html
+        try:
+            active_tab = self._get_active_tab(initial_data)
+            try:
+                # This is the json tree structure for videos, shorts and streams
+                items = active_tab['tabRenderer']['content']['richGridRenderer']['contents']
+            except (KeyError, IndexError, TypeError):
+                # This is the json tree structure for playlists
+                items = active_tab['tabRenderer']['content']['sectionListRenderer']['contents'][0][
+                    'itemSectionRenderer']['contents'][0]['gridRenderer']['items']
+            # This is the json tree structure of visitor data
+            # It is necessary to send the visitorData together with the continuation token
+            self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
+                "ytConfigData"]["visitorData"]
+        except (KeyError, IndexError, TypeError):
+            try:
+                # this is the json tree structure, if the json was directly sent
+                # by the server in a continuation response
+                important_content = initial_data[1]['response']['onResponseReceivedActions'][
+                    0
+                ]['appendContinuationItemsAction']['continuationItems']
+                items = important_content
+            except (KeyError, IndexError, TypeError):
+                try:
+                    # this is the json tree structure, if the json was directly sent
+                    # by the server in a continuation response
+                    # no longer a list and no longer has the "response" key
+                    important_content = initial_data['onResponseReceivedActions'][0][
+                        'appendContinuationItemsAction']['continuationItems']
+                    items = important_content
+                except (KeyError, IndexError, TypeError) as p:
+                    logger.info(p)
+                    return [], None
+        try:
+            continuation = items[-1]['continuationItemRenderer'][
+                'continuationEndpoint'
+            ]['continuationCommand']['token']
+            items = items[:-1]
+        except (KeyError, IndexError):
+            # if there is an error, no continuation is available
+            continuation = None
+        # Extract object from each corresponding url
+        items_obj = self._extract_ids(items)
+        # remove duplicates
+        return uniqueify(items_obj), continuation
+    def _extract_video_id(self, x: dict):
+        """ Try extracting video ids, if it fails, try extracting shorts ids.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return YouTube(f"/watch?v="
+                           f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}",
+                           client=self.client,
+                           use_oauth=self.use_oauth,
+                           allow_oauth_cache=self.allow_oauth_cache,
+                           token_file=self.token_file,
+                           oauth_verifier=self.oauth_verifier,
+                           use_po_token=self.use_po_token,
+                           po_token_verifier=self.po_token_verifier
+                           )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_shorts_id(x)
+    def _extract_shorts_id(self, x: dict):
+        """ Try extracting shorts ids, if it fails, try extracting release ids.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            content = x['richItemRenderer']['content']
+            # New json tree added on 09/12/2024
+            if 'shortsLockupViewModel' in content:
+                video_id = content['shortsLockupViewModel']['onTap']['innertubeCommand']['reelWatchEndpoint']['videoId']
+            else:
+                video_id = content['reelItemRenderer']['videoId']
+            return YouTube(f"/watch?v={video_id}",
+                           client=self.client,
+                           use_oauth=self.use_oauth,
+                           allow_oauth_cache=self.allow_oauth_cache,
+                           token_file=self.token_file,
+                           oauth_verifier=self.oauth_verifier,
+                           use_po_token=self.use_po_token,
+                           po_token_verifier=self.po_token_verifier
+                           )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_release_id(x)
+    def _extract_release_id(self, x: dict):
+        """ Try extracting release ids, if it fails, try extracting video IDs from the home page.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return Playlist(f"/playlist?list="
+                            f"{x['richItemRenderer']['content']['playlistRenderer']['playlistId']}",
+                            client=self.client,
+                            use_oauth=self.use_oauth,
+                            allow_oauth_cache=self.allow_oauth_cache,
+                            token_file=self.token_file,
+                            oauth_verifier=self.oauth_verifier,
+                            use_po_token=self.use_po_token,
+                            po_token_verifier=self.po_token_verifier
+                            )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_video_id_from_home(x)
+    def _extract_video_id_from_home(self, x: dict):
+        """ Try extracting the video IDs from the home page,
+        if that fails, try extracting the shorts IDs from the home page.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return YouTube(f"/watch?v="
+                           f"{x['gridVideoRenderer']['videoId']}",
+                           client=self.client,
+                           use_oauth=self.use_oauth,
+                           allow_oauth_cache=self.allow_oauth_cache,
+                           token_file=self.token_file,
+                           oauth_verifier=self.oauth_verifier,
+                           use_po_token=self.use_po_token,
+                           po_token_verifier=self.po_token_verifier
+                           )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_shorts_id_from_home(x)
+    def _extract_shorts_id_from_home(self, x: dict):
+        """ Try extracting the shorts IDs from the home page, if that fails, try extracting the playlist IDs.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return YouTube(f"/watch?v="
+                           f"{x['reelItemRenderer']['videoId']}",
+                           client=self.client,
+                           use_oauth=self.use_oauth,
+                           allow_oauth_cache=self.allow_oauth_cache,
+                           token_file=self.token_file,
+                           oauth_verifier=self.oauth_verifier,
+                           use_po_token=self.use_po_token,
+                           po_token_verifier=self.po_token_verifier
+                           )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_playlist_id(x)
+    def _extract_playlist_id(self, x: dict):
+        """ Try extracting the playlist IDs, if that fails, try extracting the channel IDs.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return Playlist(f"/playlist?list="
+                            f"{x['gridPlaylistRenderer']['playlistId']}",
+                            client=self.client,
+                            use_oauth=self.use_oauth,
+                            allow_oauth_cache=self.allow_oauth_cache,
+                            token_file=self.token_file,
+                            oauth_verifier=self.oauth_verifier,
+                            use_po_token=self.use_po_token,
+                            po_token_verifier=self.po_token_verifier
+                            )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_channel_id_from_home(x)
+    def _extract_channel_id_from_home(self, x: dict):
+        """ Try extracting the channel IDs from the home page, if that fails, return playlist IDs from lockupViewModel.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return Channel(f"/channel/"
+                           f"{x['gridChannelRenderer']['channelId']}",
+                           client=self.client,
+                           use_oauth=self.use_oauth,
+                           allow_oauth_cache=self.allow_oauth_cache,
+                           token_file=self.token_file,
+                           oauth_verifier=self.oauth_verifier,
+                           use_po_token=self.use_po_token,
+                           po_token_verifier=self.po_token_verifier
+                           )
+        except (KeyError, IndexError, TypeError):
+            return self._extract_playlist_id_from_lockup_view_model(x)
+    def _extract_playlist_id_from_lockup_view_model(self, x: dict):
+        """ Try extracting the playlist IDs, if that fails, return nothing.
+        :returns: List of YouTube, Playlist or Channel objects.
+        """
+        try:
+            return Playlist(f"/playlist?list="
+                            f"{x['lockupViewModel']['contentId']}",
+                            client=self.client,
+                            use_oauth=self.use_oauth,
+                            allow_oauth_cache=self.allow_oauth_cache,
+                            token_file=self.token_file,
+                            oauth_verifier=self.oauth_verifier,
+                            use_po_token=self.use_po_token,
+                            po_token_verifier=self.po_token_verifier
+                            )
+        except (KeyError, IndexError, TypeError):
+            return []
+    @property
+    def views(self) -> int:
+        """Extract view count for channel.
+        :return: Channel view count
+        :rtype: int
+        """
+        self.html_url = self.about_url
+        try:
+            views_text = self.initial_data['onResponseReceivedEndpoints'][0]['showEngagementPanelEndpoint'][
+                'engagementPanel']['engagementPanelSectionListRenderer']['content']['sectionListRenderer'][
+                'contents'][0]['itemSectionRenderer']['contents'][0]['aboutChannelRenderer']['metadata'][
+                'aboutChannelViewModel']['viewCountText']
+            # "1,234,567 view"
+            count_text = views_text.split(' ')[0]
+            # "1234567"
+            count_text = count_text.replace(',', '')
+            return int(count_text)
+        except KeyError:
+            return 0
+    @property
+    def description(self) -> str:
+        """Extract the channel description.
+        :return: Channel description
+        :rtype: str
+        """
+        self.html_url = self.channel_url
+        return self.initial_data['metadata']['channelMetadataRenderer']['description']
+    def find_videos_info(self, data):
+        """Recursively search for 'videos' in the text content of the JSON."""
+        if isinstance(data, dict):
+            for key, value in data.items():
+                if key == 'content' and isinstance(value, str) and 'videos' in value:
+                    return value
+                if isinstance(value, (dict, list)):
+                    result = self.find_videos_info(value)
+                    if result:
+                        return result
+        elif isinstance(data, list):
+            for item in data:
+                result = self.find_videos_info(item)
+                if result:
+                    return result
+        return None
+    @property
+    def length(self):
+        """Extracts the approximate amount of videos from the channel."""
+        try:
+            result = self.find_videos_info(self.initial_data)
+            return result if result else 'Unknown'
+        except Exception as e:
+            print(f"Exception: {e}")
+            return 'Unknown'
+    @property
+    def last_updated(self) -> str:
+        """Extract the date of the last uploaded video.
+        :return: Last video uploaded
+        :rtype: str
+        """
+        self.html_url = self.videos_url
+        try:
+            last_updated_text = self.initial_data['contents']['twoColumnBrowseResultsRenderer']['tabs'][1][
+                'tabRenderer']['content']['richGridRenderer']['contents'][0]['richItemRenderer']['content'][
+                'videoRenderer']['publishedTimeText']['simpleText']
+            return last_updated_text
+        except KeyError:
+            return None
+    @property
+    def thumbnail_url(self) -> str:
+        """extract the profile image from the json of the channel home page
+        :rtype: str
+        :return: a string with the url of the channel's profile image
+        """
+        self.html_url = self.channel_url  # get the url of the channel home page
+        return self.initial_data['metadata']['channelMetadataRenderer']['avatar']['thumbnails'][0]['url']
+    @property
+    def home(self) -> list:
+        """ Yields YouTube, Playlist and Channel objects from the channel home page.
+        :returns: List of YouTube, Playlist and Channel objects.
+        """
+        self.html_url = self.featured_url  # Set home tab
+        return self._extract_obj_from_home()
+    @property
+    def videos(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of videos in this channel
+        :rtype: List[YouTube]
+        :returns: List of YouTube
+        """
+        self.html_url = self.videos_url  # Set video tab
+        return DeferredGeneratorList(self.videos_generator())
+    @property
+    def shorts(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of short videos in this channel
+       :rtype: List[YouTube]
+       :returns: List of YouTube
+       """
+        self.html_url = self.shorts_url  # Set shorts tab
+        return DeferredGeneratorList(self.videos_generator())
+    @property
+    def live(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of live in this channel
+       :rtype: List[YouTube]
+       :returns: List of YouTube
+       """
+        self.html_url = self.live_url  # Set streams tab
+        return DeferredGeneratorList(self.videos_generator())
+    @property
+    def lives(self) -> Iterable[YouTube]:
+        """Alias for the 'live' property."""
+        return self.live
+    @property
+    def releases(self) -> Iterable[Playlist]:
+        """Yields Playlist objects in this channel
+       :rtype: List[Playlist]
+       :returns: List of YouTube
+       """
+        self.html_url = self.releases_url  # Set releases tab
+        return DeferredGeneratorList(self.videos_generator())
+    @property
+    def playlists(self) -> Iterable[Playlist]:
+        """Yields Playlist objects in this channel
+       :rtype: List[Playlist]
+       :returns: List of Playlist
+       """
+        self.html_url = self.playlists_url  # Set playlists tab
+        return DeferredGeneratorList(self.videos_generator())

pytubefix/contrib/playlist.py ADDED Viewed

	@@ -0,0 +1,496 @@

+"""Module to download a complete playlist from a youtube channel."""
+import json
+import logging
+from collections.abc import Sequence
+from datetime import date, datetime
+from typing import Dict, Iterable, List, Optional, Tuple, Union, Any, Callable
+from pytubefix import extract, request, YouTube
+from pytubefix.innertube import InnerTube
+from pytubefix.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
+logger = logging.getLogger(__name__)
+class Playlist(Sequence):
+    """Load a YouTube playlist with URL"""
+    def __init__(
+            self,
+            url: str,
+            client: str = InnerTube().client_name,
+            proxies: Optional[Dict[str, str]] = None,
+            use_oauth: bool = False,
+            allow_oauth_cache: bool = True,
+            token_file: Optional[str] = None,
+            oauth_verifier: Optional[Callable[[str, str], None]] = None,
+            use_po_token: Optional[bool] = False,
+            po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
+    ):
+        """
+        :param dict proxies:
+            (Optional) A dict mapping protocol to proxy address which will be used by pytube.
+        :param bool use_oauth:
+            (Optional) Prompt the user to authenticate to YouTube.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param bool allow_oauth_cache:
+            (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
+            These tokens are only generated if use_oauth is set to True as well.
+        :param str token_file:
+            (Optional) Path to the file where the OAuth tokens will be stored.
+            Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
+        :param Callable oauth_verifier:
+            (optional) Verifier to be used for getting OAuth tokens.
+            Verification URL and User-Code will be passed to it respectively.
+            (if passed, else default verifier will be used)
+        :param bool use_po_token:
+            (Optional) Prompt the user to use the proof of origin token on YouTube.
+            It must be sent with the API along with the linked visitorData and
+            then passed as a `po_token` query parameter to affected clients.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param Callable po_token_verifier:
+            (Optional) Verified used to obtain the visitorData and po_token.
+            The verifier will return the visitorData and po_token respectively.
+            (if passed, else default verifier will be used)
+        """
+        if proxies:
+            install_proxy(proxies)
+        self._input_url = url
+        self._visitor_data = None
+        self.client = client
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
+        self.token_file = token_file
+        self.oauth_verifier = oauth_verifier
+        self.use_po_token = use_po_token
+        self.po_token_verifier = po_token_verifier
+        # These need to be initialized as None for the properties.
+        self._html = None
+        self._ytcfg = None
+        self._initial_data = None
+        self._sidebar_info = None
+        self._playlist_id = None
+    @property
+    def playlist_id(self):
+        """Get the playlist id.
+        :rtype: str
+        """
+        if self._playlist_id:
+            return self._playlist_id
+        self._playlist_id = extract.playlist_id(self._input_url)
+        return self._playlist_id
+    @property
+    def playlist_url(self):
+        """Get the base playlist url.
+        :rtype: str
+        """
+        return f"https://www.youtube.com/playlist?list={self.playlist_id}"
+    @property
+    def html(self):
+        """Get the playlist page html.
+        :rtype: str
+        """
+        if self._html:
+            return self._html
+        self._html = request.get(self.playlist_url)
+        return self._html
+    @property
+    def ytcfg(self):
+        """Extract the ytcfg from the playlist page html.
+        :rtype: dict
+        """
+        if self._ytcfg:
+            return self._ytcfg
+        self._ytcfg = extract.get_ytcfg(self.html)
+        return self._ytcfg
+    @property
+    def initial_data(self):
+        """Extract the initial data from the playlist page html.
+        :rtype: dict
+        """
+        if self._initial_data:
+            return self._initial_data
+        else:
+            self._initial_data = extract.initial_data(self.html)
+            return self._initial_data
+    @property
+    def sidebar_info(self):
+        """Extract the sidebar info from the playlist page html.
+        :rtype: dict
+        """
+        if self._sidebar_info:
+            return self._sidebar_info
+        else:
+            self._sidebar_info = self.initial_data['sidebar'][
+                'playlistSidebarRenderer']['items']
+            return self._sidebar_info
+    @property
+    def yt_api_key(self):
+        """Extract the INNERTUBE_API_KEY from the playlist ytcfg.
+        :rtype: str
+        """
+        return self.ytcfg['INNERTUBE_API_KEY']
+    def _paginate(
+            self, initial_html: str, context: Optional[Any] = None,
+            until_watch_id: Optional[str] = None
+    ) -> Iterable[List[str]]:
+        """Parse the video links from the page source, yields the /watch?v=
+        part from video link
+        :param initial_html str: html from the initial YouTube url, default: self.html
+        :param context Optional[Any]: Auxiliary object
+        :param until_watch_id Optional[str]: YouTube Video watch id until
+            which the playlist should be read.
+        :rtype: Iterable[List[str]]
+        :returns: Iterable of lists of YouTube watch ids
+        """
+        videos_urls, continuation = self._extract_videos(
+            json.dumps(extract.initial_data(initial_html)), context
+        )
+        if until_watch_id:
+            try:
+                trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
+                yield videos_urls[:trim_index]
+                return
+            except ValueError:
+                pass
+        yield videos_urls
+        # Extraction from a playlist only returns 100 videos at a time
+        # if self._extract_videos returns a continuation there are more
+        # than 100 songs inside a playlist, so we need to add further requests
+        # to gather all of them
+        while continuation:  # there is an url found
+            # requesting the next page of videos with the url generated from the
+            # previous page, needs to be a post
+            req = InnerTube('WEB').browse(continuation=continuation, visitor_data=self._visitor_data)
+            # extract up to 100 songs from the page loaded
+            # returns another continuation if more videos are available
+            videos_urls, continuation = self._extract_videos(req, context)
+            if until_watch_id:
+                try:
+                    trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
+                    yield videos_urls[:trim_index]
+                    return
+                except ValueError:
+                    pass
+            yield videos_urls
+    def _extract_videos(self, raw_json: str, context: Optional[Any] = None) -> Tuple[List[str], Optional[str]]:
+        """Extracts videos from a raw json page
+        :param str raw_json: Input json extracted from the page or the last
+            server response
+        :param Optional[Any] context: Auxiliary object from _paginate
+        :rtype: Tuple[List[str], Optional[str]]
+        :returns: Tuple containing a list of up to 100 video watch ids and
+            a continuation token, if more videos are available
+        """
+        if isinstance(raw_json, dict):
+            initial_data = raw_json
+        else:
+            initial_data = json.loads(raw_json)
+        try:
+            # this is the json tree structure, if the json was extracted from
+            # html
+            section_contents = initial_data["contents"][
+                "twoColumnBrowseResultsRenderer"][
+                "tabs"][0]["tabRenderer"]["content"][
+                "sectionListRenderer"]["contents"]
+            try:
+                renderer = section_contents[0]["itemSectionRenderer"]["contents"][0]
+                if 'richGridRenderer' in renderer:
+                    important_content = renderer["richGridRenderer"]
+                else:
+                    important_content = renderer["playlistVideoListRenderer"]
+            except (KeyError, IndexError, TypeError):
+                # Playlist with submenus
+                important_content = section_contents[
+                    1]["itemSectionRenderer"][
+                    "contents"][0]["playlistVideoListRenderer"]
+            videos = important_content["contents"]
+            self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
+                "ytConfigData"]["visitorData"]
+        except (KeyError, IndexError, TypeError):
+            try:
+                # this is the json tree structure, if the json was directly sent
+                # by the server in a continuation response
+                # no longer a list and no longer has the "response" key
+                important_content = initial_data['onResponseReceivedActions'][0][
+                    'appendContinuationItemsAction']['continuationItems']
+                videos = important_content
+            except (KeyError, IndexError, TypeError) as p:
+                logger.info(p)
+                return [], None
+        try:
+            # For some reason YouTube only returns the first 100 shorts of a playlist
+            # token provided by the API doesn't seem to work even in the official player
+            try:
+                continuation = videos[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
+            except:
+                for command in videos[-1]['continuationItemRenderer']['continuationEndpoint']['commandExecutorCommand']['commands']:
+                    if 'continuationCommand' in command:
+                        continuation = command['continuationCommand']['token']
+                        break
+            videos = videos[:-1]
+        except (KeyError, IndexError):
+            # if there is an error, no continuation is available
+            continuation = None
+        items_obj = self._extract_ids(videos)
+        # remove duplicates
+        return uniqueify(items_obj), continuation
+    def _extract_ids(self, items: list) -> list:
+        """ Iterate over the extracted urls.
+        :returns: List with extracted ids.
+        """
+        items_obj = []
+        for x in items:
+            items_obj.append(self._extract_video_id(x))
+        return items_obj
+    def _extract_video_id(self, x: dict):
+        """ Try extracting video ids, if it fails, try extracting shorts ids.
+        :returns: List with extracted ids.
+        """
+        try:
+            return f"/watch?v={x['playlistVideoRenderer']['videoId']}"
+        except (KeyError, IndexError, TypeError):
+            return self._extract_shorts_id(x)
+    def _extract_shorts_id(self, x: dict):
+        """ Try extracting shorts ids.
+        :returns: List with extracted ids.
+        """
+        try:
+            content = x['richItemRenderer']['content']
+            # New json tree added on 09/12/2024
+            if 'shortsLockupViewModel' in content:
+                video_id = content['shortsLockupViewModel']['onTap']['innertubeCommand']['reelWatchEndpoint']['videoId']
+            else:
+                video_id = content['reelItemRenderer']['videoId']
+            return f"/watch?v={video_id}"
+        except (KeyError, IndexError, TypeError):
+            return []
+    def trimmed(self, video_id: str) -> Iterable[str]:
+        """Retrieve a list of YouTube video URLs trimmed at the given video ID
+        i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns
+        [1,2]
+        :type video_id: str
+            video ID to trim the returned list of playlist URLs at
+        :rtype: List[str]
+        :returns:
+            List of video URLs from the playlist trimmed at the given ID
+        """
+        for page in self._paginate(self.html, until_watch_id=video_id):
+            yield from (self._video_url(watch_path) for watch_path in page)
+    def url_generator(self):
+        """Generator that yields video URLs.
+        :Yields: Video URLs
+        """
+        for page in self._paginate(self.html):
+            for video in page:
+                yield self._video_url(video)
+    @property  # type: ignore
+    @cache
+    def video_urls(self) -> DeferredGeneratorList:
+        """Complete links of all the videos in playlist
+        :rtype: List[str]
+        :returns: List of video URLs
+        """
+        return DeferredGeneratorList(self.url_generator())
+    def videos_generator(self):
+        for url in self.video_urls:
+            yield YouTube(
+                url,
+                client=self.client,
+                use_oauth=self.use_oauth,
+                allow_oauth_cache=self.allow_oauth_cache,
+                token_file=self.token_file,
+                oauth_verifier=self.oauth_verifier,
+                use_po_token=self.use_po_token,
+                po_token_verifier=self.po_token_verifier
+            )
+    @property
+    def videos(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of videos in this playlist
+        :rtype: List[YouTube]
+        :returns: List of YouTube
+        """
+        return DeferredGeneratorList(self.videos_generator())
+    def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
+        return self.video_urls[i]
+    def __len__(self) -> int:
+        return len(self.video_urls)
+    def __repr__(self) -> str:
+        return f'<pytubefix.contrib.Playlist object: playlistId={self.playlist_id}>'
+    @property
+    @cache
+    def last_updated(self) -> Optional[date]:
+        """Extract the date that the playlist was last updated.
+        For some playlists, this will be a specific date, which is returned as a datetime
+        object. For other playlists, this is an estimate such as "1 week ago". Due to the
+        fact that this value is returned as a string, pytube does a best-effort parsing
+        where possible, and returns the raw string where it is not possible.
+        :return: Date of last playlist update where possible, else the string provided
+        :rtype: datetime.date
+        """
+        last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'stats'][2]['runs'][1]['text']
+        try:
+            date_components = last_updated_text.split()
+            month = date_components[0]
+            day = date_components[1].strip(',')
+            year = date_components[2]
+            return datetime.strptime(
+                f"{month} {day:0>2} {year}", "%b %d %Y"
+            ).date()
+        except (IndexError, KeyError):
+            return last_updated_text
+    @property
+    @cache
+    def title(self) -> Optional[str]:
+        """Extract playlist title
+        :return: playlist title (name)
+        :rtype: Optional[str]
+        """
+        return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'title']['runs'][0]['text']
+    @property
+    def thumbnail_url(self):
+        thumbnail_renderer = self.sidebar_info[0][
+                'playlistSidebarPrimaryInfoRenderer'][
+                'thumbnailRenderer']
+        if 'playlistVideoThumbnailRenderer' in thumbnail_renderer:
+            return thumbnail_renderer[
+                'playlistVideoThumbnailRenderer'][
+                'thumbnail'][
+                'thumbnails'][-1][
+                'url']
+        elif 'playlistCustomThumbnailRenderer' in thumbnail_renderer:
+            return thumbnail_renderer[
+                'playlistCustomThumbnailRenderer'][
+                'thumbnail'][
+                'thumbnails'][-1][
+                'url']
+    @property
+    def description(self) -> str:
+        return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'description']['simpleText']
+    @property
+    def length(self):
+        """Extract the number of videos in the playlist.
+        :return: Playlist video count
+        :rtype: int
+        """
+        count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'stats'][0]['runs'][0]['text']
+        count_text = count_text.replace(',', '')
+        return int(count_text)
+    @property
+    def views(self):
+        """Extract view count for playlist.
+        :return: Playlist view count
+        :rtype: int
+        """
+        # "1,234,567 views"
+        views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'stats'][1]['simpleText']
+        # "1,234,567"
+        count_text = views_text.split()[0]
+        # "1234567"
+        count_text = count_text.replace(',', '')
+        return int(count_text)
+    @property
+    def owner(self):
+        """Extract the owner of the playlist.
+        :return: Playlist owner name.
+        :rtype: str
+        """
+        return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
+            'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
+    @property
+    def owner_id(self):
+        """Extract the channel_id of the owner of the playlist.
+        :return: Playlist owner's channel ID.
+        :rtype: str
+        """
+        return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
+            'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
+            'navigationEndpoint']['browseEndpoint']['browseId']
+    @property
+    def owner_url(self):
+        """Create the channel url of the owner of the playlist.
+        :return: Playlist owner's channel url.
+        :rtype: str
+        """
+        return f'https://www.youtube.com/channel/{self.owner_id}'
+    @staticmethod
+    def _video_url(watch_path: str):
+        return f"https://www.youtube.com{watch_path}"

pytubefix/contrib/search.py ADDED Viewed

	@@ -0,0 +1,557 @@

+"""Module for interacting with YouTube search."""
+# Native python imports
+import logging
+from typing import List, Optional, Dict, Callable, Tuple
+# Local imports
+from pytubefix import YouTube, Channel, Playlist
+from pytubefix.helpers import deprecated, install_proxy
+from pytubefix.innertube import InnerTube
+from pytubefix.protobuf import encode_protobuf
+logger = logging.getLogger(__name__)
+class Search:
+    def __init__(
+            self, query: str,
+            client: str = InnerTube().client_name,
+            proxies: Optional[Dict[str, str]] = None,
+            use_oauth: bool = False,
+            allow_oauth_cache: bool = True,
+            token_file: Optional[str] = None,
+            oauth_verifier: Optional[Callable[[str, str], None]] = None,
+            use_po_token: Optional[bool] = False,
+            po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
+            filters: Optional[dict] = None
+    ):
+        """Initialize Search object.
+        :param str query:
+            Search query provided by the user.
+        :param dict proxies:
+            (Optional) A dict mapping protocol to proxy address which will be used by pytube.
+        :param bool use_oauth:
+            (Optional) Prompt the user to authenticate to YouTube.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param bool allow_oauth_cache:
+            (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
+            These tokens are only generated if use_oauth is set to True as well.
+        :param str token_file:
+            (Optional) Path to the file where the OAuth tokens will be stored.
+            Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
+        :param Callable oauth_verifier:
+            (optional) Verifier to be used for getting OAuth tokens.
+            Verification URL and User-Code will be passed to it respectively.
+            (if passed, else default verifier will be used)
+        :param bool use_po_token:
+            (Optional) Prompt the user to use the proof of origin token on YouTube.
+            It must be sent with the API along with the linked visitorData and
+            then passed as a `po_token` query parameter to affected clients.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param Callable po_token_verifier:
+            (Optional) Verified used to obtain the visitorData and po_token.
+            The verifier will return the visitorData and po_token respectively.
+            (if passed, else default verifier will be used)
+        :param dict filters:
+            (Optional) Apply filters when searching.
+            Can be used: `upload_date`, `type`, `duration`, `features`, `sort_by`.
+            features can be combined into a list with other parameters of the same type.
+        """
+        self.query = query
+        self.client = client
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
+        self.token_file = token_file
+        self.oauth_verifier = oauth_verifier
+        self.use_po_token = use_po_token
+        self.po_token_verifier = po_token_verifier
+        self._innertube_client = InnerTube(
+            client='WEB',
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache,
+            token_file=self.token_file,
+            oauth_verifier=self.oauth_verifier,
+            use_po_token=self.use_po_token,
+            po_token_verifier=self.po_token_verifier
+        )
+        # The first search, without a continuation, is structured differently
+        #  and contains completion suggestions, so we must store this separately
+        self._initial_results = None
+        self._results = {}
+        self._completion_suggestions = None
+        # Used for keeping track of query continuations so that new results
+        #  are always returned when get_next_results() is called
+        self._current_continuation = None
+        if proxies:
+            install_proxy(proxies)
+        self.filter = None
+        if filters:
+            logger.debug("Filters found, starting combination")
+            filter_protobuf = Filter()
+            filter_protobuf.set_filters(filters)
+            self.filter = filter_protobuf.get_filters_params()
+    @property
+    def completion_suggestions(self):
+        """Return query autocompletion suggestions for the query.
+        :rtype: list
+        :returns:
+            A list of autocomplete suggestions provided by YouTube for the query.
+        """
+        if self._completion_suggestions:
+            return self._completion_suggestions
+        if self.results:
+            self._completion_suggestions = self._initial_results['refinements']
+        return self._completion_suggestions
+    def _get_results(self):
+        """Search results and filter them
+        """
+        results, continuation = self.fetch_and_parse()
+        self._current_continuation = continuation
+        self._results['videos'] = results['videos']
+        self._results['shorts'] = results['shorts']
+        self._results['playlist'] = results['playlist']
+        self._results['channel'] = results['channel']
+    @property
+    def videos(self) -> List[YouTube]:
+        """Returns the search result videos.
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+        :rtype: list[YouTube]
+        :returns:
+            A list of YouTube objects.
+        """
+        if not self._results:
+            self._get_results()
+        return [items for items in self._results['videos']]
+    @property
+    def shorts(self) -> List[YouTube]:
+        """Returns the search result shorts.
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+        :rtype: list[YouTube]
+        :returns:
+            A list of YouTube objects.
+        """
+        if not self._results:
+            self._get_results()
+        return [items for items in self._results['shorts']]
+    @property
+    def playlist(self) -> List[Playlist]:
+        """Returns the search result playlist.
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+        :rtype: list[Playlist]
+        :returns:
+            A list of Playlist objects.
+        """
+        if not self._results:
+            self._get_results()
+        return [items for items in self._results['playlist']]
+    @property
+    def channel(self) -> List[Channel]:
+        """Returns the search result channel.
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+        :rtype: list[Channel]
+        :returns:
+            A list of Channel objects.
+        """
+        if not self._results:
+            self._get_results()
+        return [items for items in self._results['channel']]
+    @property
+    @deprecated("Get video results using: .videos")
+    def results(self) -> list:
+        """returns a list with videos, shorts, playlist and channels.
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+        :rtype: list
+        :returns:
+            A list of YouTube, Playlist and Channel objects.
+        """
+        # Remove these comments to get the list of videos, shorts, playlist and channel
+        #         if not self._results:
+        #             self._get_results()
+        #  return [items for values in self._results.values() for items in values]
+        return self.videos
+    @property
+    def all(self) -> list:
+        """
+        Return all objects found in the search
+        """
+        if not self._results:
+            self._get_results()
+        return [items for values in self._results.values() for items in values]
+    def get_next_results(self):
+        """Use the stored continuation string to fetch the next set of results.
+        This method does not return the results, but instead updates the results property.
+        """
+        if self._current_continuation:
+            results, continuation = self.fetch_and_parse(self._current_continuation)
+            self._current_continuation = continuation
+            self._results['videos'].extend(results['videos'])
+            self._results['shorts'].extend(results['shorts'])
+            self._results['playlist'].extend(results['playlist'])
+            self._results['channel'].extend(results['channel'])
+        else:
+            self._get_results()
+    def fetch_and_parse(self, continuation=None):
+        """Fetch from the innertube API and parse the results.
+        :param str continuation:
+            Continuation string for fetching results.
+        :rtype: tuple
+        :returns:
+            A tuple of a list of YouTube objects and a continuation string.
+        """
+        # Begin by executing the query and identifying the relevant sections
+        #  of the results
+        raw_results = self.fetch_query(continuation,
+                                       # The filter parameter must only be passed in the first API call
+                                       # After the first call, the continuation token already contains the filter
+                                       {'params': self.filter} if self.filter and not continuation else None
+                                       )
+        # Initial result is handled by try block, continuations by except block
+        try:
+            sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
+                'primaryContents']['sectionListRenderer']['contents']
+        except KeyError:
+            sections = raw_results['onResponseReceivedCommands'][0][
+                'appendContinuationItemsAction']['continuationItems']
+        item_renderer = None
+        continuation_renderer = None
+        for s in sections:
+            if 'itemSectionRenderer' in s:
+                item_renderer = s['itemSectionRenderer']
+            if 'continuationItemRenderer' in s:
+                continuation_renderer = s['continuationItemRenderer']
+        # If the continuationItemRenderer doesn't exist, assume no further results
+        if continuation_renderer:
+            next_continuation = continuation_renderer['continuationEndpoint'][
+                'continuationCommand']['token']
+        else:
+            next_continuation = None
+        # If the itemSectionRenderer doesn't exist, assume no results.
+        results = {}
+        if item_renderer:
+            videos = []
+            shorts = []
+            playlist = []
+            channel = []
+            raw_video_list = item_renderer['contents']
+            for video_details in raw_video_list:
+                # Skip over ads
+                if video_details.get('searchPyvRenderer', {}).get('ads', None):
+                    continue
+                # Skip "recommended" type videos e.g. "people also watched" and "popular X"
+                #  that break up the search results
+                if 'shelfRenderer' in video_details:
+                    continue
+                # Skip auto-generated "mix" playlist results
+                if 'radioRenderer' in video_details:
+                    continue
+                # Skip 'people also searched for' results
+                if 'horizontalCardListRenderer' in video_details:
+                    continue
+                # Can't seem to reproduce, probably related to typo fix suggestions
+                if 'didYouMeanRenderer' in video_details:
+                    continue
+                # Seems to be the renderer used for the image shown on a no results page
+                if 'backgroundPromoRenderer' in video_details:
+                    continue
+                # Get playlist results
+                if 'playlistRenderer' in video_details:
+                    playlist.append(Playlist(f"https://www.youtube.com/playlist?list="
+                                             f"{video_details['playlistRenderer']['playlistId']}",
+                                             client=self.client,
+                                             use_oauth=self.use_oauth,
+                                             allow_oauth_cache=self.allow_oauth_cache,
+                                             token_file=self.token_file,
+                                             oauth_verifier=self.oauth_verifier,
+                                             use_po_token=self.use_po_token,
+                                             po_token_verifier=self.po_token_verifier
+                                             ))
+                # Get channel results
+                if 'channelRenderer' in video_details:
+                    channel.append(Channel(f"https://www.youtube.com/channel/"
+                                           f"{video_details['channelRenderer']['channelId']}",
+                                           client=self.client,
+                                           use_oauth=self.use_oauth,
+                                           allow_oauth_cache=self.allow_oauth_cache,
+                                           token_file=self.token_file,
+                                           oauth_verifier=self.oauth_verifier,
+                                           use_po_token=self.use_po_token,
+                                           po_token_verifier=self.po_token_verifier
+                                           ))
+                # Get shorts results
+                if 'reelShelfRenderer' in video_details:
+                    for items in video_details['reelShelfRenderer']['items']:
+                        if 'reelItemRenderer' in items:
+                            video_id = items['reelItemRenderer']['videoId']
+                        else:
+                            video_id = items['shortsLockupViewModel']['onTap']['innertubeCommand'][
+                                'reelWatchEndpoint']['videoId']
+                        shorts.append(YouTube(f"https://www.youtube.com/watch?v={video_id}",
+                                              client=self.client,
+                                              use_oauth=self.use_oauth,
+                                              allow_oauth_cache=self.allow_oauth_cache,
+                                              token_file=self.token_file,
+                                              oauth_verifier=self.oauth_verifier,
+                                              use_po_token=self.use_po_token,
+                                              po_token_verifier=self.po_token_verifier
+                                              ))
+                # Get videos results
+                if 'videoRenderer' in video_details:
+                    videos.append(YouTube(f"https://www.youtube.com/watch?v="
+                                          f"{video_details['videoRenderer']['videoId']}",
+                                          client=self.client,
+                                          use_oauth=self.use_oauth,
+                                          allow_oauth_cache=self.allow_oauth_cache,
+                                          token_file=self.token_file,
+                                          oauth_verifier=self.oauth_verifier,
+                                          use_po_token=self.use_po_token,
+                                          po_token_verifier=self.po_token_verifier
+                                          ))
+            results['videos'] = videos
+            results['shorts'] = shorts
+            results['playlist'] = playlist
+            results['channel'] = channel
+        return results, next_continuation
+    def fetch_query(self, continuation: str = None, filters: dict = None):
+        """Fetch raw results from the innertube API.
+        :param str continuation:
+            Continuation string for fetching results.
+        :param dict filters:
+            Parameter encoded in protobuf that contains the search filters.
+        :rtype: dict
+        :returns:
+            The raw json object returned by the innertube API.
+        """
+        query_results = self._innertube_client.search(self.query, continuation=continuation, data=filters)
+        if not self._initial_results:
+            self._initial_results = query_results
+        return query_results  # noqa:R504
+class Filter:
+    """
+    Build filters for YouTube search in protobuf format
+    """
+    def __init__(self):
+        self.filters = {
+            'upload_date': None,
+            'type': None,
+            'duration': None,
+            'features': [],
+            'sort_by': None
+        }
+    def set_filters(self, filter_dict):
+        """
+        Applies multiple filters at once using a dictionary.
+        """
+        for category, value in filter_dict.items():
+            if category == 'features':
+                if isinstance(value, list):
+                    logger.debug("Filter features is a list")
+                    self.filters['features'].extend(value)
+                else:
+                    self.filters['features'].append(value)
+            else:
+                self.filters[category] = value
+    def clear_filters(self):
+        """
+        Clear all filters
+        """
+        for category in self.filters:
+            if category == 'features':
+                self.filters[category] = []
+            else:
+                self.filters[category] = None
+    def get_filters_params(self):
+        """
+        Combines selected filters into a final structure
+        """
+        combined = {}
+        if self.filters['sort_by']:
+            combined.update(self.filters['sort_by'])
+        combined[2] = {}
+        if self.filters['type']:
+            combined[2].update(self.filters['type'])
+        if self.filters['duration']:
+            combined[2].update(self.filters['duration'])
+        if self.filters['features']:
+            for feature in self.filters['features']:
+                combined[2].update(feature)
+        if self.filters['upload_date']:
+            combined[2].update(self.filters['upload_date'])
+        combined[2] = dict(sorted(combined.get(2, {}).items()))
+        logger.debug(f"Combined filters: {combined}")
+        encoded_filters = encode_protobuf(str(combined))
+        logger.debug(f"Filter encoded in protobuf: {encoded_filters}")
+        return encoded_filters
+    @staticmethod
+    def get_upload_date(option: str) -> dict:
+        """
+        Last Hour,
+        Today,
+        This Week,
+        This Month,
+        This Year
+        """
+        filters = {
+            "Last Hour": {1: 1},
+            "Today": {1: 2},
+            "This Week": {1: 3},
+            "This Month": {1: 4},
+            "This Year": {1: 5},
+        }
+        return filters.get(option)
+    @staticmethod
+    def get_type(option: str) -> dict:
+        """
+        Video,
+        Channel,
+        Playlist,
+        Movie
+        """
+        filters = {
+            "Video": {2: 1},
+            "Channel": {2: 2},
+            "Playlist": {2: 3},
+            "Movie": {2: 4},
+        }
+        return filters.get(option)
+    @staticmethod
+    def get_duration(option: str) -> dict:
+        """
+        Under 4 minutes,
+        Over 20 minutes,
+        4 - 20 minutes
+        """
+        filters = {
+            "Under 4 minutes": {3: 1},
+            "Over 20 minutes": {3: 2},
+            "4 - 20 minutes": {3: 3},
+        }
+        return filters.get(option)
+    @staticmethod
+    def get_features(option: str) -> dict:
+        """
+        Live,
+        4K,
+        HD,
+        Subtitles/CC,
+        Creative Commons,
+        360,
+        VR180,
+        3D,
+        HDR,
+        Location,
+        Purchased
+        """
+        filters = {
+            "Live": {8: 1},
+            "4K": {14: 1},
+            "HD": {4: 1},
+            "Subtitles/CC": {5: 1},
+            "Creative Commons": {6: 1},
+            "360": {15: 1},
+            "VR180": {26: 1},
+            "3D": {7: 1},
+            "HDR": {25: 1},
+            "Location": {23: 1},
+            "Purchased": {9: 1},
+        }
+        return filters.get(option)
+    @staticmethod
+    def get_sort_by(option: str) -> dict:
+        """
+        Relevance,
+        Upload date,
+        View count,
+        Rating
+        """
+        filters = {
+            "Relevance": {1: 0},
+            "Upload date": {1: 2},
+            "View count": {1: 3},
+            "Rating": {1: 1},
+        }
+        return filters.get(option)

pytubefix/exceptions.py ADDED Viewed

	@@ -0,0 +1,344 @@

+"""Library specific exception definitions."""
+from typing import Pattern, Union
+import logging
+logger = logging.getLogger(__name__)
+class PytubeFixError(Exception):
+    """Base pytubefix exception that all others inherit.
+    This is done to not pollute the built-in exceptions, which *could* result
+    in unintended errors being unexpectedly and incorrectly handled within
+    implementers code.
+    """
+### MISC Errors ###
+class MaxRetriesExceeded(PytubeFixError):
+    """Maximum number of retries exceeded."""
+class HTMLParseError(PytubeFixError):
+    """HTML could not be parsed"""
+class ExtractError(PytubeFixError):
+    """Data extraction based exception."""
+class SABRError(PytubeFixError):
+    def __init__(self, msg: str):
+        self.msg = msg
+        super().__init__(self.msg)
+    @property
+    def error_string(self):
+        return self.msg
+class RegexMatchError(ExtractError):
+    """Regex pattern did not return any matches."""
+    def __init__(self, caller: str, pattern: Union[str, Pattern]):
+        """
+        :param str caller:
+            Calling function
+        :param str pattern:
+            Pattern that failed to match
+        """
+        super().__init__(
+            f"{caller}: could not find match for {pattern}")
+        self.caller = caller
+        self.pattern = pattern
+class InterpretationError(PytubeFixError):
+    def __init__(self, js_url: str):
+        self.js_url = js_url
+        super().__init__(self.error_string)
+    @property
+    def error_string(self):
+        return f'Error interpreting player js: {self.js_url}'
+### Video Unavailable Errors ###
+# There are really 3 types of errors thrown
+# 1. VideoUnavailable - This is the base error type for all video errors.
+#   Or a catchall if neither the user or developer cares about the specific error.
+# 2. Known Error Type, Extra info useful for user
+# 3. Unknown Error Type, Important to Developer
+## 1. VideoUnavailable ##
+class VideoUnavailable(PytubeFixError):
+    """
+    Base video error.
+    This is the base error type for all video errors.
+    Call this if you can't group the error by known error type and it is not important to the developer.
+    """
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.error_string)
+    @property
+    def error_string(self):
+        return f'{self.video_id} is unavailable'
+## 2. Known Error Type, Extra info useful for user ##
+class VideoPrivate(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} is a private video'
+class MembersOnly(VideoUnavailable):
+    """Video is members-only.
+    YouTube has special videos that are only viewable to users who have
+    subscribed to a content creator.
+    ref: https://support.google.com/youtube/answer/7544492?hl=en
+    """
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} is a members-only video'
+class VideoRegionBlocked(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} is not available in your region'
+class BotDetection(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return (
+            f'{self.video_id} This request was detected as a bot. Use `use_po_token=True` or switch to WEB client to view. '
+            f'See more details at https://github.com/JuanBindez/pytubefix/pull/209')
+class PoTokenRequired(VideoUnavailable):
+    def __init__(self, video_id: str, client_name: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        :param str client_name:
+            A YouTube client identifier.
+        """
+        self.video_id = video_id
+        self.client_name = client_name
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return (
+            f'{self.video_id} The {self.client_name} client requires PoToken to obtain functional streams, '
+            f'See more details at https://github.com/JuanBindez/pytubefix/pull/209')
+class LoginRequired(VideoUnavailable):
+    def __init__(self, video_id: str, reason: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        self.reason = reason
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return (
+            f'{self.video_id} requires login to view, YouTube reason: {self.reason}')
+# legacy livestream error types still supported
+class RecordingUnavailable(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} does not have a live stream recording available'
+class LiveStreamError(VideoUnavailable):
+    """Video is a live stream."""
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} is streaming live and cannot be loaded'
+class LiveStreamOffline(VideoUnavailable):
+    """The live will start soon"""
+    def __init__(self, video_id: str, reason: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        :param str reason:
+            reason for the error
+        """
+        self.video_id = video_id
+        self.reason = reason
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} {self.reason}'
+# legacy age restricted error types still supported
+class AgeRestrictedError(VideoUnavailable):
+    """Video is age restricted, and cannot be accessed without OAuth."""
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f"{self.video_id} is age restricted, and can't be accessed without logging in."
+class AgeCheckRequiredError(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f"{self.video_id} has age restrictions and cannot be accessed without confirmation."
+class AgeCheckRequiredAccountError(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return (
+            f"{self.video_id} may be inappropriate for "
+            f"some users. Sign in to your primary account to confirm your age.")
+class InnerTubeResponseError(VideoUnavailable):
+    def __init__(self, video_id: str, client: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        self.client = client
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return (
+            f"{self.video_id} : {self.client} client did not receive a response from YouTube")
+## 3. Unknown Error Type, Important to Developer ##
+class UnknownVideoError(VideoUnavailable):
+    """Unknown video error."""
+    def __init__(self, video_id: str, status: str = None, reason: str = None, developer_message: str = None):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        :param str status:
+            The status code of the response.
+        :param str reason:
+            The reason for the error.
+        :param str developer_message:
+            The message from the developer.
+        """
+        self.video_id = video_id
+        self.status = status
+        self.reason = reason
+        self.developer_message = developer_message
+        logger.warning('Unknown Video Error')
+        logger.warning(f'Video ID: {self.video_id}')
+        logger.warning(f'Status: {self.status}')
+        logger.warning(f'Reason: {self.reason}')
+        logger.warning(f'Developer Message: {self.developer_message}')
+        logger.warning(
+            'Please open an issue at '
+            'https://github.com/JuanBindez/pytubefix/issues '
+            'and provide the above log output.'
+        )
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f'{self.video_id} has an unknown error, check logs for more info [Status: {self.status}] [Reason: {self.reason}]'

pytubefix/extract.py ADDED Viewed

	@@ -0,0 +1,646 @@

+"""This module contains all non-cipher related data extraction logic."""
+import logging
+import urllib.parse
+import re
+from collections import OrderedDict
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import parse_qs, quote, urlencode, urlparse
+from pytubefix.cipher import Cipher
+from pytubefix.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
+from pytubefix.helpers import regex_search
+from pytubefix.metadata import YouTubeMetadata
+from pytubefix.parser import parse_for_object, parse_for_all_objects
+logger = logging.getLogger(__name__)
+def publish_date(watch_html: str):
+    """Extract publish date and return it as a datetime object
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: datetime
+    :returns:
+        Publish date of the video as a datetime object with timezone.
+    """
+    try:
+        result = re.search(
+            r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}",
+            watch_html
+        )
+        if result:
+            return datetime.fromisoformat(result.group(0))
+    except AttributeError:
+        return None
+def recording_available(watch_html):
+    """Check if live stream recording is available.
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content is private.
+    """
+    unavailable_strings = [
+        'This live stream recording is not available.'
+    ]
+    for string in unavailable_strings:
+        if string in watch_html:
+            return False
+    return True
+def is_private(watch_html):
+    """Check if content is private.
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content is private.
+    """
+    private_strings = [
+        "This is a private video. Please sign in to verify that you may see it.",
+        "\"simpleText\":\"Private video\"",
+        "This video is private."
+    ]
+    for string in private_strings:
+        if string in watch_html:
+            return True
+    return False
+def is_age_restricted(watch_html: str) -> bool:
+    """Check if content is age restricted.
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content is age restricted.
+    """
+    try:
+        regex_search(r"og:restrictions:age", watch_html, group=0)
+    except RegexMatchError:
+        return False
+    return True
+def playability_status(player_response: dict) -> Tuple[Any, Any]:
+    """Return the playability status and status explanation of a video.
+    For example, a video may have a status of LOGIN_REQUIRED, and an explanation
+    of "This is a private video. Please sign in to verify that you may see it."
+    This explanation is what gets incorporated into the media player overlay.
+    :param str player_response:
+        Content of the player's response.
+    :rtype: bool
+    :returns:
+        Playability status and reason of the video.
+    """
+    status_dict = player_response.get('playabilityStatus', {})
+    # if 'liveStreamability' in status_dict:
+    # We used liveStreamability to know if the video was live,
+    # however some clients still return this parameter even if the video is already available
+    if 'videoDetails' in player_response:  # Private videos do not contain videoDetails
+        if 'isLive' in player_response['videoDetails']:
+            return 'LIVE_STREAM', 'Video is a live stream.'
+    if 'status' in status_dict:
+        if 'reason' in status_dict:
+            return status_dict['status'], [status_dict['reason']]
+        if 'messages' in status_dict:
+            return status_dict['status'], status_dict['messages']
+    return None, [None]
+def signature_timestamp(js: str) -> str:
+    return regex_search(r"signatureTimestamp:(\d*)", js, group=1)
+def visitor_data(response_context: str) -> str:
+    return regex_search(r"visitor_data[',\"\s]+value['\"]:\s?['\"]([a-zA-Z0-9_%-]+)['\"]", response_context, group=1)
+def video_id(url: str) -> str:
+    """Extract the ``video_id`` from a YouTube url.
+    This function supports the following patterns:
+    - :samp:`https://youtube.com/watch?v={video_id}`
+    - :samp:`https://youtube.com/embed/{video_id}`
+    - :samp:`https://youtu.be/{video_id}`
+    :param str url:
+        A YouTube url containing a video id.
+    :rtype: str
+    :returns:
+        YouTube video id.
+    """
+    return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1)
+def playlist_id(url: str) -> str:
+    """Extract the ``playlist_id`` from a YouTube url.
+    This function supports the following patterns:
+    - :samp:`https://youtube.com/playlist?list={playlist_id}`
+    - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`
+    :param str url:
+        A YouTube url containing a playlist id.
+    :rtype: str
+    :returns:
+        YouTube playlist id.
+    """
+    parsed = urllib.parse.urlparse(url)
+    return parse_qs(parsed.query)['list'][0]
+def channel_name(url: str) -> str:
+    """Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
+    This function supports the following patterns:
+    - :samp:`https://youtube.com/c/{channel_name}/*`
+    - :samp:`https://youtube.com/channel/{channel_id}/*
+    - :samp:`https://youtube.com/u/{channel_name}/*`
+    - :samp:`https://youtube.com/user/{channel_id}/*
+    - :samp:`https://youtube.com/@{channel_id}/*
+    :param str url:
+        A YouTube url containing a channel name.
+    :rtype: str
+    :returns:
+        YouTube channel name.
+    """
+    patterns = [
+        r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
+        r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
+        r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
+        r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)",
+        r"(?:\/(\@)([%\d\w_\-\.]+)(\/.*)?)"
+    ]
+    for pattern in patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(url)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            uri_style = function_match.group(1)
+            uri_identifier = function_match.group(2)
+            return f'/{uri_style}/{uri_identifier}' if uri_style != '@' else f'/{uri_style}{uri_identifier}'
+    raise RegexMatchError(
+        caller="channel_name", pattern="patterns"
+    )
+def video_info_url(video_id: str, watch_url: str) -> str:
+    """Construct the video_info url.
+    :param str video_id:
+        A YouTube video identifier.
+    :param str watch_url:
+        A YouTube watch url.
+    :rtype: str
+    :returns:
+        :samp:`https://youtube.com/get_video_info` with necessary GET
+        parameters.
+    """
+    params = OrderedDict(
+        [
+            ("video_id", video_id),
+            ("ps", "default"),
+            ("eurl", quote(watch_url)),
+            ("hl", "en_US"),
+            ("html5", "1"),
+            ("c", "TVHTML5"),
+            ("cver", "7.20201028"),
+        ]
+    )
+    return _video_info_url(params)
+def video_info_url_age_restricted(video_id: str, embed_html: str) -> str:
+    """Construct the video_info url.
+    :param str video_id:
+        A YouTube video identifier.
+    :param str embed_html:
+        The html contents of the embed page (for age restricted videos).
+    :rtype: str
+    :returns:
+        :samp:`https://youtube.com/get_video_info` with necessary GET
+        parameters.
+    """
+    try:
+        sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
+    except RegexMatchError:
+        sts = ""
+    # Here we use ``OrderedDict`` so that the output is consistent between
+    # Python 2.7+.
+    eurl = f"https://youtube.googleapis.com/v/{video_id}"
+    params = OrderedDict(
+        [
+            ("video_id", video_id),
+            ("eurl", eurl),
+            ("sts", sts),
+            ("html5", "1"),
+            ("c", "TVHTML5"),
+            ("cver", "7.20201028"),
+        ]
+    )
+    return _video_info_url(params)
+def _video_info_url(params: OrderedDict) -> str:
+    return f"https://www.youtube.com/get_video_info?{urlencode(params)}"
+def js_url(html: str) -> str:
+    """Get the base JavaScript url.
+    Construct the base JavaScript url, which contains the decipher
+    "transforms".
+    :param str html:
+        The html contents of the watch page.
+    """
+    try:
+        base_js = get_ytplayer_config(html)['assets']['js']
+    except (KeyError, RegexMatchError):
+        base_js = get_ytplayer_js(html)
+    return f"https://youtube.com{base_js}"
+def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
+    """Parse the type data.
+    Breaks up the data in the ``type`` key of the manifest, which contains the
+    mime type and codecs serialized together, and splits them into separate
+    elements.
+    **Example**:
+    mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])
+    :param str mime_type_codec:
+        String containing mime type and codecs.
+    :rtype: tuple
+    :returns:
+        The mime type and a list of codecs.
+    """
+    pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
+    regex = re.compile(pattern)
+    results = regex.search(mime_type_codec)
+    if not results:
+        raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
+    mime_type, codecs = results.groups()
+    return mime_type, [c.strip() for c in codecs.split(",")]
+def get_ytplayer_js(html: str) -> Any:
+    """Get the YouTube player base JavaScript path.
+    :param str html
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Path to YouTube's base.js file.
+    """
+    js_url_patterns = [
+        r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)"
+    ]
+    for pattern in js_url_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(html)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            yt_player_js = function_match.group(1)
+            logger.debug("player JS: " + yt_player_js)
+            return yt_player_js
+    raise RegexMatchError(
+        caller="get_ytplayer_js", pattern="js_url_patterns"
+    )
+def get_ytplayer_config(html: str) -> Any:
+    """Get the YouTube player configuration data from the watch html.
+    Extract the ``ytplayer_config``, which is json data embedded within the
+    watch html and serves as the primary source of obtaining the stream
+    manifest data.
+    :param str html:
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Substring of the html containing the encoded manifest data.
+    """
+    logger.debug("finding initial function name")
+    config_patterns = [
+        r"ytplayer\.config\s*=\s*",
+        r"ytInitialPlayerResponse\s*=\s*"
+    ]
+    for pattern in config_patterns:
+        # Try each pattern consecutively if they don't find a match
+        try:
+            return parse_for_object(html, pattern)
+        except HTMLParseError as e:
+            logger.debug(f'Pattern failed: {pattern}')
+            logger.debug(e)
+            continue
+    # setConfig() needs to be handled a little differently.
+    # We want to parse the entire argument to setConfig()
+    #  and use then load that as json to find PLAYER_CONFIG
+    #  inside of it.
+    setconfig_patterns = [
+        r"yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*"
+    ]
+    for pattern in setconfig_patterns:
+        # Try each pattern consecutively if they don't find a match
+        try:
+            return parse_for_object(html, pattern)
+        except HTMLParseError:
+            continue
+    raise RegexMatchError(
+        caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns"
+    )
+def get_ytcfg(html: str) -> str:
+    """Get the entirety of the ytcfg object.
+    This is built over multiple pieces, so we have to find all matches and
+    combine the dicts together.
+    :param str html:
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Substring of the html containing the encoded manifest data.
+    """
+    ytcfg = {}
+    ytcfg_patterns = [
+        r"ytcfg\s=\s",
+        r"ytcfg\.set\("
+    ]
+    for pattern in ytcfg_patterns:
+        # Try each pattern consecutively and try to build a cohesive object
+        try:
+            found_objects = parse_for_all_objects(html, pattern)
+            for obj in found_objects:
+                ytcfg.update(obj)
+        except HTMLParseError:
+            continue
+    if ytcfg: # there is at least one item
+        return ytcfg
+    raise RegexMatchError(
+        caller="get_ytcfg", pattern="ytcfg_pattenrs"
+    )
+def apply_po_token(stream_manifest: Dict, vid_info: Dict, po_token: str) -> None:
+    """Apply the proof of origin token to the stream manifest
+    :param dict stream_manifest:
+        Details of the media streams available.
+    :param str po_token:
+        Proof of Origin Token.
+    """
+    logger.debug(f'Applying poToken')
+    for i, stream in enumerate(stream_manifest):
+        try:
+            url: str = stream["url"]
+        except KeyError:
+            live_stream = (
+                vid_info.get("playabilityStatus", {}, )
+                .get("liveStreamability")
+            )
+            if live_stream:
+                raise LiveStreamError("UNKNOWN")
+        parsed_url = urlparse(url)
+        # Convert query params off url to dict
+        query_params = parse_qs(urlparse(url).query)
+        query_params = {
+            k: v[0] for k, v in query_params.items()
+        }
+        query_params['pot'] = po_token
+        url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}'
+        stream_manifest[i]["url"] = url
+def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str, url_js: str) -> None:
+    """Apply the decrypted signature to the stream manifest.
+    :param dict stream_manifest:
+        Details of the media streams available.
+    :param str js:
+        The contents of the base.js asset file.
+    :param str url_js:
+        Full base.js url
+    """
+    cipher = Cipher(js=js, js_url=url_js)
+    discovered_n = dict()
+    for i, stream in enumerate(stream_manifest):
+        try:
+            url: str = stream["url"]
+        except KeyError:
+            live_stream = (
+                vid_info.get("playabilityStatus", {}, )
+                .get("liveStreamability")
+            )
+            if live_stream:
+                raise LiveStreamError("UNKNOWN")
+        parsed_url = urlparse(url)
+        # Convert query params off url to dict
+        query_params = parse_qs(urlparse(url).query)
+        query_params = {
+            k: v[0] for k, v in query_params.items()
+        }
+        # 403 Forbidden fix.
+        if "signature" in url or (
+                "s" not in stream and ("&sig=" in url or "&lsig=" in url)
+        ):
+            # For certain videos, YouTube will just provide them pre-signed, in
+            # which case there's no real magic to download them and we can skip
+            # the whole signature descrambling entirely.
+            logger.debug("signature found, skip decipher")
+        else:
+            signature = cipher.get_signature(ciphered_signature=stream["s"])
+            logger.debug(
+                "finished descrambling signature for itag=%s", stream["itag"]
+            )
+            query_params['sig'] = signature
+        if 'n' in query_params.keys():
+            # For WEB-based clients, YouTube sends an "n" parameter that throttles download speed.
+            # To decipher the value of "n", we must interpret the player's JavaScript.
+            initial_n = query_params['n']
+            logger.debug(f'Parameter n is: {initial_n}')
+            # Check if any previous stream decrypted the parameter
+            if initial_n not in discovered_n:
+                discovered_n[initial_n] = cipher.get_throttling(initial_n)
+            else:
+                logger.debug('Parameter n found skipping decryption')
+            new_n = discovered_n[initial_n]
+            query_params['n'] = new_n
+            logger.debug(f'Parameter n deciphered: {new_n}')
+        url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}'  # noqa:E501
+        stream_manifest[i]["url"] = url
+def apply_descrambler(stream_data: Dict) -> Optional[List[Dict]]:
+    """Apply various in-place transforms to YouTube's media stream data.
+    Creates a ``list`` of dictionaries by string splitting on commas, then
+    taking each list item, parsing it as a query string, converting it to a
+    ``dict`` and unquoting the value.
+    :param dict stream_data:
+        Dictionary containing query string encoded values.
+    **Example**:
+    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
+    >>> apply_descrambler(d, 'foo')
+    >>> print(d)
+    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
+    """
+    if 'url' in stream_data:
+        return None
+    # Merge formats and adaptiveFormats into a single list
+    formats: list[Dict] = []
+    if 'formats' in stream_data.keys():
+        formats.extend(stream_data['formats'])
+    if 'adaptiveFormats' in stream_data.keys():
+        formats.extend(stream_data['adaptiveFormats'])
+    # Extract url and s from signatureCiphers as necessary
+    for data in formats:
+        if 'url' not in data and 'signatureCipher' in data:
+            cipher_url = parse_qs(data['signatureCipher'])
+            data['url'] = cipher_url['url'][0]
+            data['s'] = cipher_url['s'][0]
+            data['is_sabr'] = False
+        elif 'url' not in data and 'signatureCipher' not in data:
+            data['url'] = stream_data['serverAbrStreamingUrl']
+            data['is_sabr'] = True
+        data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
+    logger.debug("applying descrambler")
+    return formats
+def initial_data(watch_html: str) -> dict:
+    """Extract the ytInitialData json from the watch_html page.
+    This mostly contains metadata necessary for rendering the page on-load,
+    such as video information, copyright notices, etc.
+    @param watch_html: Html of the watch page
+    @return:
+    """
+    patterns = [
+        r"window\[['\"]ytInitialData['\"]]\s*=\s*",
+        r"ytInitialData\s*=\s*"
+    ]
+    for pattern in patterns:
+        try:
+            return parse_for_object(watch_html, pattern)
+        except HTMLParseError:
+            pass
+    raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
+def initial_player_response(watch_html: str) -> str:
+    """Extract the ytInitialPlayerResponse json from the watch_html page.
+    This mostly contains metadata necessary for rendering the page on-load,
+    such as video information, copyright notices, etc.
+    @param watch_html: Html of the watch page
+    @return:
+    """
+    patterns = [
+        r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*",
+        r"ytInitialPlayerResponse\s*=\s*"
+    ]
+    for pattern in patterns:
+        try:
+            return parse_for_object(watch_html, pattern)
+        except HTMLParseError:
+            pass
+    raise RegexMatchError(
+        caller='initial_player_response',
+        pattern='initial_player_response_pattern'
+    )
+def metadata(initial_data) -> Optional[YouTubeMetadata]:
+    """Get the informational metadata for the video.
+    e.g.:
+    [
+        {
+            'Song': '강남스타일(Gangnam Style)',
+            'Artist': 'PSY',
+            'Album': 'PSY SIX RULES Pt.1',
+            'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
+        }
+    ]
+    :rtype: YouTubeMetadata
+    """
+    try:
+        metadata_rows: List = initial_data["contents"]["twoColumnWatchNextResults"][
+            "results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"][
+            "metadataRowContainer"]["metadataRowContainerRenderer"]["rows"]
+    except (KeyError, IndexError):
+        # If there's an exception accessing this data, it probably doesn't exist.
+        return YouTubeMetadata([])
+    # Rows appear to only have "metadataRowRenderer" or "metadataRowHeaderRenderer"
+    #  and we only care about the former, so we filter the others
+    metadata_rows = filter(
+        lambda x: "metadataRowRenderer" in x.keys(),
+        metadata_rows
+    )
+    # We then access the metadataRowRenderer key in each element
+    #  and build a metadata object from this new list
+    metadata_rows = [x["metadataRowRenderer"] for x in metadata_rows]
+    return YouTubeMetadata(metadata_rows)

pytubefix/file_system.py ADDED Viewed

	@@ -0,0 +1,80 @@

+windows = ['Windows', 'NTFS', 'FAT32', 'exFAT', 'ReFS']
+linux = ['Linux', 'ext2', 'ext3', 'ext4', 'Btrfs', 'XFS', 'ZFS']
+macOS = ['macOS', 'APFS', 'HFS+']
+bsd_unix = ['BSD', 'UFS']
+network_filesystems = ['CIFS', 'SMB']
+windows_translation = str.maketrans({
+            '\\': '',
+            '/': '',
+            '?': '',
+            ':': '',
+            '*': '',
+            '"': '',
+            '<': '',
+            '>': '',
+            '|': '',
+        })
+linux_translation = str.maketrans({
+            '/': '',
+        })
+macos_translation = str.maketrans({
+            '/': '',
+        })
+bsd_translation = str.maketrans({
+            '/': '',
+        })
+network_filesystems_translation = str.maketrans({
+            '\\': '',
+            '/': '',
+            '?': '',
+            ':': '',
+            '*': '',
+            '"': '',
+            '<': '',
+            '>': '',
+            '|': '',
+        })
+def file_system_verify(file_type) -> dict:
+    """
+    Returns a translation table to remove invalid characters for a specified file system type.
+    This function identifies the file system type and returns a translation table for removing
+    characters that are not allowed in filenames for that specific file system.
+    Args:
+        file_type (str): The type of file system being checked. Supported file systems include:
+                         - Windows: NTFS, FAT32, exFAT, ReFS
+                         - Linux: ext2, ext3, ext4, Btrfs, XFS, ZFS
+                         - macOS: APFS, HFS+
+                         - BSD/UNIX: UFS
+                         - Network Filesystems: CIFS, SMB
+    Returns:
+        dict: A translation table where invalid characters are mapped to an empty string.
+    Example:
+        >>> ys = yt.streams.get_highest_resolution()
+        >>> ys.download(file_system='ext4')
+    Raises:
+        None, but prints a message if the file system type is not recognized.
+    """
+    if file_type in windows:
+        return windows_translation
+    elif file_type in linux:
+        return linux_translation
+    elif file_type in macOS:
+        return macos_translation
+    elif file_type in bsd_unix:
+        return bsd_translation
+    elif file_type in network_filesystems:
+        return network_filesystems_translation