Upload 106 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- pytubefix/.DS_Store +0 -0
- pytubefix/__cache__/tokens.json +1 -0
- pytubefix/__init__.py +23 -0
- pytubefix/__main__.py +992 -0
- pytubefix/__pycache__/__init__.cpython-311.pyc +0 -0
- pytubefix/__pycache__/__main__.cpython-311.pyc +0 -0
- pytubefix/__pycache__/buffer.cpython-311.pyc +0 -0
- pytubefix/__pycache__/captions.cpython-311.pyc +0 -0
- pytubefix/__pycache__/chapters.cpython-311.pyc +0 -0
- pytubefix/__pycache__/cipher.cpython-311.pyc +0 -0
- pytubefix/__pycache__/cli.cpython-311.pyc +0 -0
- pytubefix/__pycache__/exceptions.cpython-311.pyc +0 -0
- pytubefix/__pycache__/extract.cpython-311.pyc +0 -0
- pytubefix/__pycache__/file_system.cpython-311.pyc +0 -0
- pytubefix/__pycache__/helpers.cpython-311.pyc +0 -0
- pytubefix/__pycache__/info.cpython-311.pyc +0 -0
- pytubefix/__pycache__/innertube.cpython-311.pyc +0 -0
- pytubefix/__pycache__/itags.cpython-311.pyc +0 -0
- pytubefix/__pycache__/jsinterp.cpython-311.pyc +0 -0
- pytubefix/__pycache__/keymoments.cpython-311.pyc +0 -0
- pytubefix/__pycache__/metadata.cpython-311.pyc +0 -0
- pytubefix/__pycache__/monostate.cpython-311.pyc +0 -0
- pytubefix/__pycache__/parser.cpython-311.pyc +0 -0
- pytubefix/__pycache__/protobuf.cpython-311.pyc +0 -0
- pytubefix/__pycache__/query.cpython-311.pyc +0 -0
- pytubefix/__pycache__/request.cpython-311.pyc +0 -0
- pytubefix/__pycache__/streams.cpython-311.pyc +0 -0
- pytubefix/__pycache__/version.cpython-311.pyc +0 -0
- pytubefix/botGuard/.DS_Store +0 -0
- pytubefix/botGuard/__init__.py +0 -0
- pytubefix/botGuard/__pycache__/__init__.cpython-311.pyc +0 -0
- pytubefix/botGuard/__pycache__/bot_guard.cpython-311.pyc +0 -0
- pytubefix/botGuard/bot_guard.py +47 -0
- pytubefix/botGuard/vm/botGuard.js +0 -0
- pytubefix/buffer.py +48 -0
- pytubefix/captions.py +215 -0
- pytubefix/chapters.py +47 -0
- pytubefix/cipher.py +190 -0
- pytubefix/cli.py +355 -0
- pytubefix/contrib/__init__.py +0 -0
- pytubefix/contrib/__pycache__/__init__.cpython-311.pyc +0 -0
- pytubefix/contrib/__pycache__/channel.cpython-311.pyc +0 -0
- pytubefix/contrib/__pycache__/playlist.cpython-311.pyc +0 -0
- pytubefix/contrib/__pycache__/search.cpython-311.pyc +0 -0
- pytubefix/contrib/channel.py +655 -0
- pytubefix/contrib/playlist.py +496 -0
- pytubefix/contrib/search.py +557 -0
- pytubefix/exceptions.py +344 -0
- pytubefix/extract.py +646 -0
- pytubefix/file_system.py +80 -0
pytubefix/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
pytubefix/__cache__/tokens.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"access_token": null, "refresh_token": null, "expires": null, "visitorData": "CgtYOTFud0twS0VmOCiD2OTCBjIKCgJJThIEGgAgQg%3D%3D", "po_token": "MnSZ2tgzlZc8xJni8Vz8ITSbjhi-7cChHWF4_eFiXnowkgRlC-yG3IdTqDT9PvJhNbcDK43DnBhNlOCBTjY1Y9aKFBYp_h0-yiT5TefusxCtFbd98AA4HdGX6XmAbvgLujXCQZj14n_wlWR9y3i4CZjjl8pmYg=="}
|
pytubefix/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa: F401
|
| 2 |
+
# noreorder
|
| 3 |
+
"""
|
| 4 |
+
Pytubefix: a very serious Python library for downloading YouTube Videos.
|
| 5 |
+
"""
|
| 6 |
+
__title__ = "pytubefix"
|
| 7 |
+
__author__ = "Juan Bindez"
|
| 8 |
+
__license__ = "MIT License"
|
| 9 |
+
__js__ = None
|
| 10 |
+
__js_url__ = None
|
| 11 |
+
|
| 12 |
+
from pytubefix.version import __version__
|
| 13 |
+
from pytubefix.streams import Stream
|
| 14 |
+
from pytubefix.captions import Caption
|
| 15 |
+
from pytubefix.chapters import Chapter
|
| 16 |
+
from pytubefix.keymoments import KeyMoment
|
| 17 |
+
from pytubefix.query import CaptionQuery, StreamQuery
|
| 18 |
+
from pytubefix.__main__ import YouTube
|
| 19 |
+
from pytubefix.contrib.playlist import Playlist
|
| 20 |
+
from pytubefix.contrib.channel import Channel
|
| 21 |
+
from pytubefix.contrib.search import Search
|
| 22 |
+
from pytubefix.info import info
|
| 23 |
+
from pytubefix.buffer import Buffer
|
pytubefix/__main__.py
ADDED
|
@@ -0,0 +1,992 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
#
|
| 3 |
+
# Copyright (c) 2023 - 2025 Juan Bindez <juanbindez780@gmail.com>
|
| 4 |
+
#
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
#
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
#
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
"""
|
| 25 |
+
This module implements the core developer interface for pytubefix.
|
| 26 |
+
|
| 27 |
+
The problem domain of the :class:`YouTube <YouTube> class focuses almost
|
| 28 |
+
exclusively on the developer interface. Pytubefix offloads the heavy lifting to
|
| 29 |
+
smaller peripheral modules and functions.
|
| 30 |
+
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
import logging
|
| 34 |
+
from subprocess import CalledProcessError
|
| 35 |
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
| 36 |
+
|
| 37 |
+
import pytubefix
|
| 38 |
+
import pytubefix.exceptions as exceptions
|
| 39 |
+
from pytubefix import extract, request
|
| 40 |
+
from pytubefix import Stream, StreamQuery
|
| 41 |
+
from pytubefix.helpers import install_proxy
|
| 42 |
+
from pytubefix.innertube import InnerTube
|
| 43 |
+
from pytubefix.metadata import YouTubeMetadata
|
| 44 |
+
from pytubefix.monostate import Monostate
|
| 45 |
+
from pytubefix.botGuard import bot_guard
|
| 46 |
+
|
| 47 |
+
logger = logging.getLogger(__name__)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class YouTube:
|
| 51 |
+
"""Core developer interface for pytubefix."""
|
| 52 |
+
|
| 53 |
+
def __init__(
|
| 54 |
+
self,
|
| 55 |
+
url: str,
|
| 56 |
+
client: str = InnerTube().client_name,
|
| 57 |
+
on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
|
| 58 |
+
on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
|
| 59 |
+
proxies: Optional[Dict[str, str]] = None,
|
| 60 |
+
use_oauth: bool = False,
|
| 61 |
+
allow_oauth_cache: bool = True,
|
| 62 |
+
token_file: Optional[str] = None,
|
| 63 |
+
oauth_verifier: Optional[Callable[[str, str], None]] = None,
|
| 64 |
+
use_po_token: Optional[bool] = False,
|
| 65 |
+
po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
|
| 66 |
+
):
|
| 67 |
+
"""Construct a :class:`YouTube <YouTube>`.
|
| 68 |
+
|
| 69 |
+
:param str url:
|
| 70 |
+
A valid YouTube watch URL.
|
| 71 |
+
:param str client:
|
| 72 |
+
(Optional) A YouTube client,
|
| 73 |
+
Available:
|
| 74 |
+
WEB, WEB_EMBED, WEB_MUSIC, WEB_CREATOR, WEB_SAFARI,
|
| 75 |
+
ANDROID, ANDROID_MUSIC, ANDROID_CREATOR, ANDROID_VR, ANDROID_PRODUCER, ANDROID_TESTSUITE,
|
| 76 |
+
IOS, IOS_MUSIC, IOS_CREATOR,
|
| 77 |
+
MWEB, TV, TV_EMBED, MEDIA_CONNECT.
|
| 78 |
+
:param func on_progress_callback:
|
| 79 |
+
(Optional) User defined callback function for stream download
|
| 80 |
+
progress events.
|
| 81 |
+
:param func on_complete_callback:
|
| 82 |
+
(Optional) User defined callback function for stream download
|
| 83 |
+
complete events.
|
| 84 |
+
:param dict proxies:
|
| 85 |
+
(Optional) A dict mapping protocol to proxy address which will be used by pytube.
|
| 86 |
+
:param bool use_oauth:
|
| 87 |
+
(Optional) Prompt the user to authenticate to YouTube.
|
| 88 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 89 |
+
:param bool allow_oauth_cache:
|
| 90 |
+
(Optional) Cache OAuth and Po tokens locally on the machine. Defaults to True.
|
| 91 |
+
These tokens are only generated if use_oauth is set to True as well.
|
| 92 |
+
:param str token_file:
|
| 93 |
+
(Optional) Path to the file where the OAuth and Po tokens will be stored.
|
| 94 |
+
Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
|
| 95 |
+
:param Callable oauth_verifier:
|
| 96 |
+
(optional) Verifier to be used for getting oauth tokens.
|
| 97 |
+
Verification URL and User-Code will be passed to it respectively.
|
| 98 |
+
(if passed, else default verifier will be used)
|
| 99 |
+
:param bool use_po_token:
|
| 100 |
+
(Optional) Prompt the user to use the proof of origin token on YouTube.
|
| 101 |
+
It must be sent with the API along with the linked visitorData and
|
| 102 |
+
then passed as a `po_token` query parameter to affected clients.
|
| 103 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 104 |
+
(Do not use together with `use_oauth=True`)
|
| 105 |
+
:param Callable po_token_verifier:
|
| 106 |
+
(Optional) Verified used to obtain the visitorData and po_token.
|
| 107 |
+
The verifier will return the visitorData and po_token respectively.
|
| 108 |
+
(if passed, else default verifier will be used)
|
| 109 |
+
"""
|
| 110 |
+
# js fetched by js_url
|
| 111 |
+
self._js: Optional[str] = None
|
| 112 |
+
|
| 113 |
+
# the url to the js, parsed from watch html
|
| 114 |
+
self._js_url: Optional[str] = None
|
| 115 |
+
|
| 116 |
+
# content fetched from innertube/player
|
| 117 |
+
self._vid_info: Optional[Dict] = None
|
| 118 |
+
self._vid_details: Optional[Dict] = None
|
| 119 |
+
|
| 120 |
+
# the html of /watch?v=<video_id>
|
| 121 |
+
self._watch_html: Optional[str] = None
|
| 122 |
+
self._embed_html: Optional[str] = None
|
| 123 |
+
|
| 124 |
+
# inline js in the html containing
|
| 125 |
+
self._player_config_args: Optional[Dict] = None
|
| 126 |
+
self._age_restricted: Optional[bool] = None
|
| 127 |
+
|
| 128 |
+
self._fmt_streams: Optional[List[Stream]] = None
|
| 129 |
+
|
| 130 |
+
self._initial_data = None
|
| 131 |
+
self._metadata: Optional[YouTubeMetadata] = None
|
| 132 |
+
|
| 133 |
+
# video_id part of /watch?v=<video_id>
|
| 134 |
+
self.video_id = extract.video_id(url)
|
| 135 |
+
|
| 136 |
+
self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
|
| 137 |
+
self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
|
| 138 |
+
|
| 139 |
+
self.client = 'WEB' if use_po_token else client
|
| 140 |
+
|
| 141 |
+
# oauth can only be used by the TV and TV_EMBED client.
|
| 142 |
+
self.client = 'TV' if use_oauth else self.client
|
| 143 |
+
|
| 144 |
+
self.fallback_clients = ['TV', 'IOS']
|
| 145 |
+
|
| 146 |
+
self._signature_timestamp: dict = {}
|
| 147 |
+
self._visitor_data = None
|
| 148 |
+
|
| 149 |
+
# Shared between all instances of `Stream` (Borg pattern).
|
| 150 |
+
self.stream_monostate = Monostate(
|
| 151 |
+
on_progress=on_progress_callback, on_complete=on_complete_callback, youtube=self
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
if proxies:
|
| 155 |
+
install_proxy(proxies)
|
| 156 |
+
|
| 157 |
+
self._author = None
|
| 158 |
+
self._title = None
|
| 159 |
+
self._publish_date = None
|
| 160 |
+
|
| 161 |
+
self.use_oauth = use_oauth
|
| 162 |
+
self.allow_oauth_cache = allow_oauth_cache
|
| 163 |
+
self.token_file = token_file
|
| 164 |
+
self.oauth_verifier = oauth_verifier
|
| 165 |
+
|
| 166 |
+
self.use_po_token = use_po_token
|
| 167 |
+
self.po_token_verifier = po_token_verifier
|
| 168 |
+
|
| 169 |
+
self.po_token = None
|
| 170 |
+
self._pot = None
|
| 171 |
+
|
| 172 |
+
def __repr__(self):
|
| 173 |
+
return f'<pytubefix.__main__.YouTube object: videoId={self.video_id}>'
|
| 174 |
+
|
| 175 |
+
def __eq__(self, o: object) -> bool:
|
| 176 |
+
# Compare types and urls, if they're same return true, else return false.
|
| 177 |
+
return type(o) == type(self) and o.watch_url == self.watch_url
|
| 178 |
+
|
| 179 |
+
@property
|
| 180 |
+
def watch_html(self):
|
| 181 |
+
if self._watch_html:
|
| 182 |
+
return self._watch_html
|
| 183 |
+
self._watch_html = request.get(url=self.watch_url)
|
| 184 |
+
return self._watch_html
|
| 185 |
+
|
| 186 |
+
@property
|
| 187 |
+
def embed_html(self):
|
| 188 |
+
if self._embed_html:
|
| 189 |
+
return self._embed_html
|
| 190 |
+
self._embed_html = request.get(url=self.embed_url)
|
| 191 |
+
return self._embed_html
|
| 192 |
+
|
| 193 |
+
@property
|
| 194 |
+
def age_restricted(self):
|
| 195 |
+
if self._age_restricted:
|
| 196 |
+
return self._age_restricted
|
| 197 |
+
self._age_restricted = extract.is_age_restricted(self.watch_html)
|
| 198 |
+
return self._age_restricted
|
| 199 |
+
|
| 200 |
+
@property
|
| 201 |
+
def js_url(self):
|
| 202 |
+
if self._js_url:
|
| 203 |
+
return self._js_url
|
| 204 |
+
|
| 205 |
+
if self.age_restricted:
|
| 206 |
+
self._js_url = extract.js_url(self.embed_html)
|
| 207 |
+
else:
|
| 208 |
+
self._js_url = extract.js_url(self.watch_html)
|
| 209 |
+
|
| 210 |
+
return self._js_url
|
| 211 |
+
|
| 212 |
+
@property
|
| 213 |
+
def js(self):
|
| 214 |
+
if self._js:
|
| 215 |
+
return self._js
|
| 216 |
+
|
| 217 |
+
# If the js_url doesn't match the cached url, fetch the new js and update
|
| 218 |
+
# the cache; otherwise, load the cache.
|
| 219 |
+
if pytubefix.__js_url__ != self.js_url:
|
| 220 |
+
self._js = request.get(self.js_url)
|
| 221 |
+
pytubefix.__js__ = self._js
|
| 222 |
+
pytubefix.__js_url__ = self.js_url
|
| 223 |
+
else:
|
| 224 |
+
self._js = pytubefix.__js__
|
| 225 |
+
|
| 226 |
+
return self._js
|
| 227 |
+
|
| 228 |
+
@property
|
| 229 |
+
def visitor_data(self) -> str:
|
| 230 |
+
"""
|
| 231 |
+
Retrieves the visitorData from the WEB client.
|
| 232 |
+
"""
|
| 233 |
+
if self._visitor_data:
|
| 234 |
+
return self._visitor_data
|
| 235 |
+
|
| 236 |
+
if InnerTube(self.client).require_po_token:
|
| 237 |
+
try:
|
| 238 |
+
logger.debug("Looking for visitorData in initial_data")
|
| 239 |
+
self._visitor_data = extract.visitor_data(str(self.initial_data['responseContext']))
|
| 240 |
+
logger.debug('VisitorData obtained successfully')
|
| 241 |
+
return self._visitor_data
|
| 242 |
+
except (KeyError, pytubefix.exceptions.RegexMatchError):
|
| 243 |
+
logger.debug("Unable to obtain visitorData from initial_data. Trying to request from the WEB client")
|
| 244 |
+
|
| 245 |
+
logger.debug("Looking for visitorData in InnerTube API")
|
| 246 |
+
innertube_response = InnerTube('WEB').player(self.video_id)
|
| 247 |
+
try:
|
| 248 |
+
self._visitor_data = innertube_response['responseContext']['visitorData']
|
| 249 |
+
except KeyError:
|
| 250 |
+
self._visitor_data = innertube_response['responseContext']['serviceTrackingParams'][0]['params'][6]['value']
|
| 251 |
+
logger.debug('VisitorData obtained successfully')
|
| 252 |
+
|
| 253 |
+
return self._visitor_data
|
| 254 |
+
|
| 255 |
+
@property
|
| 256 |
+
def pot(self) -> str:
|
| 257 |
+
"""
|
| 258 |
+
Retrieves the poToken generated by botGuard.
|
| 259 |
+
|
| 260 |
+
This poToken only works for WEB-based clients.
|
| 261 |
+
"""
|
| 262 |
+
if self._pot:
|
| 263 |
+
return self._pot
|
| 264 |
+
logger.debug('Invoking botGuard')
|
| 265 |
+
try:
|
| 266 |
+
self._pot = bot_guard.generate_po_token(visitor_data=self.visitor_data)
|
| 267 |
+
logger.debug('PoToken generated successfully')
|
| 268 |
+
except Exception as e:
|
| 269 |
+
logger.warning('Unable to run botGuard. Skipping poToken generation, reason: ' + e.__str__())
|
| 270 |
+
return self._pot
|
| 271 |
+
|
| 272 |
+
@property
|
| 273 |
+
def initial_data(self):
|
| 274 |
+
if self._initial_data:
|
| 275 |
+
return self._initial_data
|
| 276 |
+
self._initial_data = extract.initial_data(self.watch_html)
|
| 277 |
+
return self._initial_data
|
| 278 |
+
|
| 279 |
+
@property
|
| 280 |
+
def streaming_data(self):
|
| 281 |
+
"""Return streamingData from video info."""
|
| 282 |
+
|
| 283 |
+
# List of YouTube error video IDs
|
| 284 |
+
invalid_id_list = ['aQvGIIdgFDM']
|
| 285 |
+
|
| 286 |
+
# If my previously valid video_info doesn't have the streamingData,
|
| 287 |
+
# or it is an invalid video,
|
| 288 |
+
# try to get a new video_info with a different client.
|
| 289 |
+
if 'streamingData' not in self.vid_info or self.vid_info['videoDetails']['videoId'] in invalid_id_list:
|
| 290 |
+
original_client = self.client
|
| 291 |
+
|
| 292 |
+
# for each fallback client set, revert videodata, and run check_availability, which
|
| 293 |
+
# will try to get a new video_info with a different client.
|
| 294 |
+
# if it fails try the next fallback client, and so on.
|
| 295 |
+
# If none of the clients have valid streamingData, raise an exception.
|
| 296 |
+
for client in self.fallback_clients:
|
| 297 |
+
self.client = client
|
| 298 |
+
self.vid_info = None
|
| 299 |
+
try:
|
| 300 |
+
self.check_availability()
|
| 301 |
+
except Exception as e:
|
| 302 |
+
continue
|
| 303 |
+
if 'streamingData' in self.vid_info:
|
| 304 |
+
break
|
| 305 |
+
if 'streamingData' not in self.vid_info:
|
| 306 |
+
raise exceptions.UnknownVideoError(video_id=self.video_id,
|
| 307 |
+
developer_message=f'Streaming data is missing, '
|
| 308 |
+
f'original client: {original_client}, '
|
| 309 |
+
f'fallback clients: {self.fallback_clients}')
|
| 310 |
+
|
| 311 |
+
return self.vid_info['streamingData']
|
| 312 |
+
|
| 313 |
+
@property
|
| 314 |
+
def fmt_streams(self):
|
| 315 |
+
"""Returns a list of streams if they have been initialized.
|
| 316 |
+
|
| 317 |
+
If the streams have not been initialized, finds all relevant
|
| 318 |
+
streams and initializes them.
|
| 319 |
+
"""
|
| 320 |
+
self.check_availability()
|
| 321 |
+
if self._fmt_streams:
|
| 322 |
+
return self._fmt_streams
|
| 323 |
+
|
| 324 |
+
self._fmt_streams = []
|
| 325 |
+
|
| 326 |
+
stream_manifest = extract.apply_descrambler(self.streaming_data)
|
| 327 |
+
inner_tube = InnerTube(self.client)
|
| 328 |
+
if self.po_token:
|
| 329 |
+
extract.apply_po_token(stream_manifest, self.vid_info, self.po_token)
|
| 330 |
+
|
| 331 |
+
if inner_tube.require_js_player:
|
| 332 |
+
# If the cached js doesn't work, try fetching a new js file
|
| 333 |
+
# https://github.com/pytube/pytube/issues/1054
|
| 334 |
+
try:
|
| 335 |
+
extract.apply_signature(stream_manifest, self.vid_info, self.js, self.js_url)
|
| 336 |
+
except exceptions.ExtractError:
|
| 337 |
+
# To force an update to the js file, we clear the cache and retry
|
| 338 |
+
self._js = None
|
| 339 |
+
self._js_url = None
|
| 340 |
+
pytubefix.__js__ = None
|
| 341 |
+
pytubefix.__js_url__ = None
|
| 342 |
+
extract.apply_signature(stream_manifest, self.vid_info, self.js, self.js_url)
|
| 343 |
+
|
| 344 |
+
# build instances of :class:`Stream <Stream>`
|
| 345 |
+
# Initialize stream objects
|
| 346 |
+
for stream in stream_manifest:
|
| 347 |
+
video = Stream(
|
| 348 |
+
stream=stream,
|
| 349 |
+
monostate=self.stream_monostate,
|
| 350 |
+
po_token=self.po_token,
|
| 351 |
+
video_playback_ustreamer_config=self.video_playback_ustreamer_config
|
| 352 |
+
)
|
| 353 |
+
self._fmt_streams.append(video)
|
| 354 |
+
|
| 355 |
+
self.stream_monostate.title = self.title
|
| 356 |
+
self.stream_monostate.duration = self.length
|
| 357 |
+
|
| 358 |
+
return self._fmt_streams
|
| 359 |
+
|
| 360 |
+
def check_availability(self):
|
| 361 |
+
"""Check whether the video is available.
|
| 362 |
+
|
| 363 |
+
Raises different exceptions based on why the video is unavailable,
|
| 364 |
+
otherwise does nothing.
|
| 365 |
+
"""
|
| 366 |
+
status, messages = extract.playability_status(self.vid_info)
|
| 367 |
+
|
| 368 |
+
if InnerTube(self.client).require_po_token and not self.po_token:
|
| 369 |
+
logger.warning(f"The {self.client} client requires PoToken to obtain functional streams, "
|
| 370 |
+
f"See more details at https://github.com/JuanBindez/pytubefix/pull/209")
|
| 371 |
+
|
| 372 |
+
for reason in messages:
|
| 373 |
+
if status == 'UNPLAYABLE':
|
| 374 |
+
if reason == (
|
| 375 |
+
'Join this channel to get access to members-only content '
|
| 376 |
+
'like this video, and other exclusive perks.'
|
| 377 |
+
):
|
| 378 |
+
raise exceptions.MembersOnly(video_id=self.video_id)
|
| 379 |
+
|
| 380 |
+
elif reason == 'This live stream recording is not available.':
|
| 381 |
+
raise exceptions.RecordingUnavailable(video_id=self.video_id)
|
| 382 |
+
|
| 383 |
+
elif reason == (
|
| 384 |
+
'Sorry, something is wrong. This video may be inappropriate for some users. '
|
| 385 |
+
'Sign in to your primary account to confirm your age.'
|
| 386 |
+
):
|
| 387 |
+
raise exceptions.AgeCheckRequiredAccountError(video_id=self.video_id)
|
| 388 |
+
elif reason == (
|
| 389 |
+
'The uploader has not made this video available in your country'
|
| 390 |
+
):
|
| 391 |
+
raise exceptions.VideoRegionBlocked(video_id=self.video_id)
|
| 392 |
+
else:
|
| 393 |
+
raise exceptions.VideoUnavailable(video_id=self.video_id)
|
| 394 |
+
|
| 395 |
+
elif status == 'LOGIN_REQUIRED':
|
| 396 |
+
if reason == (
|
| 397 |
+
'Sign in to confirm your age'
|
| 398 |
+
):
|
| 399 |
+
raise exceptions.AgeRestrictedError(video_id=self.video_id)
|
| 400 |
+
elif reason == (
|
| 401 |
+
'Sign in to confirm you’re not a bot'
|
| 402 |
+
):
|
| 403 |
+
raise exceptions.BotDetection(video_id=self.video_id)
|
| 404 |
+
else:
|
| 405 |
+
raise exceptions.LoginRequired(video_id=self.video_id, reason=reason)
|
| 406 |
+
|
| 407 |
+
elif status == 'AGE_CHECK_REQUIRED':
|
| 408 |
+
if self.use_oauth:
|
| 409 |
+
self.age_check()
|
| 410 |
+
else:
|
| 411 |
+
raise exceptions.AgeCheckRequiredError(video_id=self.video_id)
|
| 412 |
+
|
| 413 |
+
elif status == 'LIVE_STREAM_OFFLINE':
|
| 414 |
+
raise exceptions.LiveStreamOffline(video_id=self.video_id, reason=reason)
|
| 415 |
+
|
| 416 |
+
elif status == 'ERROR':
|
| 417 |
+
if reason == 'Video unavailable':
|
| 418 |
+
raise exceptions.VideoUnavailable(video_id=self.video_id)
|
| 419 |
+
elif reason == 'This video is private':
|
| 420 |
+
raise exceptions.VideoPrivate(video_id=self.video_id)
|
| 421 |
+
elif reason == 'This video is unavailable':
|
| 422 |
+
raise exceptions.VideoUnavailable(video_id=self.video_id)
|
| 423 |
+
elif reason == 'This video has been removed by the uploader':
|
| 424 |
+
raise exceptions.VideoUnavailable(video_id=self.video_id)
|
| 425 |
+
elif reason == 'This video is no longer available because the YouTube account associated with this video has been terminated.':
|
| 426 |
+
raise exceptions.VideoUnavailable(video_id=self.video_id)
|
| 427 |
+
else:
|
| 428 |
+
raise exceptions.UnknownVideoError(video_id=self.video_id, status=status, reason=reason, developer_message=f'Unknown reason type for Error status')
|
| 429 |
+
elif status == 'LIVE_STREAM':
|
| 430 |
+
raise exceptions.LiveStreamError(video_id=self.video_id)
|
| 431 |
+
elif status is None:
|
| 432 |
+
pass
|
| 433 |
+
else:
|
| 434 |
+
raise exceptions.UnknownVideoError(video_id=self.video_id, status=status, reason=reason, developer_message=f'Unknown video status')
|
| 435 |
+
|
| 436 |
+
@property
|
| 437 |
+
def signature_timestamp(self) -> dict:
|
| 438 |
+
"""WEB clients need to be signed with a signature timestamp.
|
| 439 |
+
|
| 440 |
+
The signature is found inside the player's base.js.
|
| 441 |
+
|
| 442 |
+
:rtype: Dict
|
| 443 |
+
"""
|
| 444 |
+
if not self._signature_timestamp:
|
| 445 |
+
self._signature_timestamp = {
|
| 446 |
+
'playbackContext': {
|
| 447 |
+
'contentPlaybackContext': {
|
| 448 |
+
'signatureTimestamp': extract.signature_timestamp(self.js)
|
| 449 |
+
}
|
| 450 |
+
}
|
| 451 |
+
}
|
| 452 |
+
return self._signature_timestamp
|
| 453 |
+
|
| 454 |
+
@property
|
| 455 |
+
def video_playback_ustreamer_config(self):
|
| 456 |
+
return self.vid_info[
|
| 457 |
+
'playerConfig'][
|
| 458 |
+
'mediaCommonConfig'][
|
| 459 |
+
'mediaUstreamerRequestConfig'][
|
| 460 |
+
'videoPlaybackUstreamerConfig']
|
| 461 |
+
|
| 462 |
+
@property
|
| 463 |
+
def server_abr_streaming_url(self):
|
| 464 |
+
"""
|
| 465 |
+
Extract the url for abr server and decrypt the `n` parameter
|
| 466 |
+
"""
|
| 467 |
+
try:
|
| 468 |
+
url = self.vid_info[
|
| 469 |
+
'streamingData'][
|
| 470 |
+
'serverAbrStreamingUrl']
|
| 471 |
+
stream_manifest = [{"url": url}]
|
| 472 |
+
extract.apply_signature(stream_manifest, vid_info=self.vid_info, js=self.js, url_js=self.js_url)
|
| 473 |
+
return stream_manifest[0]["url"]
|
| 474 |
+
except Exception:
|
| 475 |
+
return None
|
| 476 |
+
|
| 477 |
+
@property
|
| 478 |
+
def vid_info(self):
|
| 479 |
+
"""Parse the raw vid info and return the parsed result.
|
| 480 |
+
|
| 481 |
+
:rtype: Dict[Any, Any]
|
| 482 |
+
"""
|
| 483 |
+
if self._vid_info:
|
| 484 |
+
return self._vid_info
|
| 485 |
+
|
| 486 |
+
def call_innertube():
|
| 487 |
+
innertube = InnerTube(
|
| 488 |
+
client=self.client,
|
| 489 |
+
use_oauth=self.use_oauth,
|
| 490 |
+
allow_cache=self.allow_oauth_cache,
|
| 491 |
+
token_file=self.token_file,
|
| 492 |
+
oauth_verifier=self.oauth_verifier,
|
| 493 |
+
use_po_token=self.use_po_token,
|
| 494 |
+
po_token_verifier=self.po_token_verifier
|
| 495 |
+
)
|
| 496 |
+
if innertube.require_js_player:
|
| 497 |
+
innertube.innertube_context.update(self.signature_timestamp)
|
| 498 |
+
|
| 499 |
+
# Automatically generates a poToken
|
| 500 |
+
if innertube.require_po_token and not self.use_po_token:
|
| 501 |
+
logger.debug(f"The {self.client} client requires poToken to obtain functional streams")
|
| 502 |
+
logger.debug("Automatically generating poToken")
|
| 503 |
+
innertube.insert_po_token(visitor_data=self.visitor_data, po_token=self.pot)
|
| 504 |
+
elif not self.use_po_token:
|
| 505 |
+
# from 01/22/2025 all clients must send the visitorData in the API request
|
| 506 |
+
innertube.insert_visitor_data(visitor_data=self.visitor_data)
|
| 507 |
+
|
| 508 |
+
response = innertube.player(self.video_id)
|
| 509 |
+
|
| 510 |
+
# Retrieves the sent poToken
|
| 511 |
+
if self.use_po_token or innertube.require_po_token:
|
| 512 |
+
self.po_token = innertube.access_po_token or self.pot
|
| 513 |
+
return response
|
| 514 |
+
|
| 515 |
+
innertube_response = call_innertube()
|
| 516 |
+
for client in self.fallback_clients:
|
| 517 |
+
# Some clients are unable to access certain types of videos
|
| 518 |
+
# If the video is unavailable for the current client, attempts will be made with fallback clients
|
| 519 |
+
playability_status = innertube_response['playabilityStatus']
|
| 520 |
+
if playability_status['status'] == 'UNPLAYABLE' and 'reason' in playability_status and playability_status['reason'] == 'This video is not available':
|
| 521 |
+
logger.warning(f"{self.client} client returned: This video is not available")
|
| 522 |
+
self.client = client
|
| 523 |
+
logger.warning(f"Switching to client: {client}")
|
| 524 |
+
innertube_response = call_innertube()
|
| 525 |
+
else:
|
| 526 |
+
break
|
| 527 |
+
|
| 528 |
+
self._vid_info = innertube_response
|
| 529 |
+
if not self._vid_info:
|
| 530 |
+
raise pytubefix.exceptions.InnerTubeResponseError(self.video_id, self.client)
|
| 531 |
+
|
| 532 |
+
return self._vid_info
|
| 533 |
+
|
| 534 |
+
@vid_info.setter
|
| 535 |
+
def vid_info(self, value):
|
| 536 |
+
self._vid_info = value
|
| 537 |
+
|
| 538 |
+
@property
|
| 539 |
+
def vid_details(self):
|
| 540 |
+
"""Parse the raw vid details and return the parsed result.
|
| 541 |
+
|
| 542 |
+
The official player sends a request to the `next` endpoint to obtain some details of the video.
|
| 543 |
+
|
| 544 |
+
:rtype: Dict[Any, Any]
|
| 545 |
+
"""
|
| 546 |
+
if self._vid_details:
|
| 547 |
+
return self._vid_details
|
| 548 |
+
|
| 549 |
+
innertube = InnerTube(
|
| 550 |
+
client='TV' if self.use_oauth else 'WEB',
|
| 551 |
+
use_oauth=self.use_oauth,
|
| 552 |
+
allow_cache=self.allow_oauth_cache,
|
| 553 |
+
token_file=self.token_file,
|
| 554 |
+
oauth_verifier=self.oauth_verifier,
|
| 555 |
+
use_po_token=self.use_po_token,
|
| 556 |
+
po_token_verifier=self.po_token_verifier
|
| 557 |
+
)
|
| 558 |
+
innertube_response = innertube.next(self.video_id)
|
| 559 |
+
self._vid_details = innertube_response
|
| 560 |
+
return self._vid_details
|
| 561 |
+
|
| 562 |
+
@vid_details.setter
|
| 563 |
+
def vid_details(self, value):
|
| 564 |
+
self._vid_details = value
|
| 565 |
+
|
| 566 |
+
def age_check(self):
|
| 567 |
+
"""If the video has any age restrictions, you must confirm that you wish to continue.
|
| 568 |
+
|
| 569 |
+
Originally the WEB client was used, but with the implementation of PoToken we switched to MWEB.
|
| 570 |
+
"""
|
| 571 |
+
|
| 572 |
+
self.client = 'TV'
|
| 573 |
+
innertube = InnerTube(
|
| 574 |
+
client=self.client,
|
| 575 |
+
use_oauth=self.use_oauth,
|
| 576 |
+
allow_cache=self.allow_oauth_cache,
|
| 577 |
+
token_file=self.token_file,
|
| 578 |
+
oauth_verifier=self.oauth_verifier,
|
| 579 |
+
use_po_token=self.use_po_token,
|
| 580 |
+
po_token_verifier=self.po_token_verifier
|
| 581 |
+
)
|
| 582 |
+
|
| 583 |
+
if innertube.require_js_player:
|
| 584 |
+
innertube.innertube_context.update(self.signature_timestamp)
|
| 585 |
+
|
| 586 |
+
innertube.verify_age(self.video_id)
|
| 587 |
+
|
| 588 |
+
innertube_response = innertube.player(self.video_id)
|
| 589 |
+
|
| 590 |
+
playability_status = innertube_response['playabilityStatus'].get('status', None)
|
| 591 |
+
|
| 592 |
+
# If we still can't access the video, raise an exception
|
| 593 |
+
if playability_status != 'OK':
|
| 594 |
+
if playability_status == 'UNPLAYABLE':
|
| 595 |
+
raise exceptions.AgeCheckRequiredAccountError(self.video_id)
|
| 596 |
+
else:
|
| 597 |
+
raise exceptions.AgeCheckRequiredError(self.video_id)
|
| 598 |
+
|
| 599 |
+
self._vid_info = innertube_response
|
| 600 |
+
|
| 601 |
+
@property
|
| 602 |
+
def caption_tracks(self) -> List[pytubefix.Caption]:
|
| 603 |
+
"""Get a list of :class:`Caption <Caption>`.
|
| 604 |
+
|
| 605 |
+
:rtype: List[Caption]
|
| 606 |
+
"""
|
| 607 |
+
|
| 608 |
+
innertube_response = InnerTube(
|
| 609 |
+
client='WEB' if not self.use_oauth else self.client,
|
| 610 |
+
use_oauth=self.use_oauth,
|
| 611 |
+
allow_cache=self.allow_oauth_cache,
|
| 612 |
+
token_file=self.token_file,
|
| 613 |
+
oauth_verifier=self.oauth_verifier,
|
| 614 |
+
use_po_token=self.use_po_token,
|
| 615 |
+
po_token_verifier=self.po_token_verifier
|
| 616 |
+
).player(self.video_id)
|
| 617 |
+
|
| 618 |
+
raw_tracks = (
|
| 619 |
+
innertube_response.get("captions", {})
|
| 620 |
+
.get("playerCaptionsTracklistRenderer", {})
|
| 621 |
+
.get("captionTracks", [])
|
| 622 |
+
)
|
| 623 |
+
return [pytubefix.Caption(track) for track in raw_tracks]
|
| 624 |
+
|
| 625 |
+
@property
|
| 626 |
+
def captions(self) -> pytubefix.CaptionQuery:
|
| 627 |
+
"""Interface to query caption tracks.
|
| 628 |
+
|
| 629 |
+
:rtype: :class:`CaptionQuery <CaptionQuery>`.
|
| 630 |
+
"""
|
| 631 |
+
return pytubefix.CaptionQuery(self.caption_tracks)
|
| 632 |
+
|
| 633 |
+
@property
|
| 634 |
+
def chapters(self) -> List[pytubefix.Chapter]:
|
| 635 |
+
"""Get a list of :class:`Chapter <Chapter>`.
|
| 636 |
+
|
| 637 |
+
:rtype: List[Chapter]
|
| 638 |
+
"""
|
| 639 |
+
try:
|
| 640 |
+
chapters_data = []
|
| 641 |
+
markers_map = self.initial_data['playerOverlays']['playerOverlayRenderer'][
|
| 642 |
+
'decoratedPlayerBarRenderer']['decoratedPlayerBarRenderer']['playerBar'][
|
| 643 |
+
'multiMarkersPlayerBarRenderer']['markersMap']
|
| 644 |
+
for marker in markers_map:
|
| 645 |
+
if marker['key'].upper() == 'DESCRIPTION_CHAPTERS':
|
| 646 |
+
chapters_data = marker['value']['chapters']
|
| 647 |
+
break
|
| 648 |
+
except (KeyError, IndexError):
|
| 649 |
+
return []
|
| 650 |
+
|
| 651 |
+
result: List[pytubefix.Chapter] = []
|
| 652 |
+
|
| 653 |
+
for i, chapter_data in enumerate(chapters_data):
|
| 654 |
+
chapter_start = int(
|
| 655 |
+
chapter_data['chapterRenderer']['timeRangeStartMillis'] / 1000
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
if i == len(chapters_data) - 1:
|
| 659 |
+
chapter_end = self.length
|
| 660 |
+
else:
|
| 661 |
+
chapter_end = int(
|
| 662 |
+
chapters_data[i + 1]['chapterRenderer']['timeRangeStartMillis'] / 1000
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
result.append(pytubefix.Chapter(chapter_data, chapter_end - chapter_start))
|
| 666 |
+
|
| 667 |
+
return result
|
| 668 |
+
|
| 669 |
+
@property
|
| 670 |
+
def key_moments(self) -> List[pytubefix.KeyMoment]:
|
| 671 |
+
"""Get a list of :class:`KeyMoment <KeyMoment>`.
|
| 672 |
+
|
| 673 |
+
:rtype: List[KeyMoment]
|
| 674 |
+
"""
|
| 675 |
+
try:
|
| 676 |
+
mutations = self.initial_data['frameworkUpdates']['entityBatchUpdate']['mutations']
|
| 677 |
+
found = False
|
| 678 |
+
for mutation in mutations:
|
| 679 |
+
if mutation.get('payload', {}).get('macroMarkersListEntity', {}).get('markersList', {}).get(
|
| 680 |
+
'markerType') == "MARKER_TYPE_TIMESTAMPS":
|
| 681 |
+
key_moments_data = mutation['payload']['macroMarkersListEntity']['markersList']['markers']
|
| 682 |
+
found = True
|
| 683 |
+
break
|
| 684 |
+
|
| 685 |
+
if not found:
|
| 686 |
+
return []
|
| 687 |
+
except (KeyError, IndexError):
|
| 688 |
+
return []
|
| 689 |
+
|
| 690 |
+
result: List[pytubefix.KeyMoment] = []
|
| 691 |
+
|
| 692 |
+
for i, key_moment_data in enumerate(key_moments_data):
|
| 693 |
+
key_moment_start = int(key_moment_data['startMillis']) // 1000
|
| 694 |
+
|
| 695 |
+
if i == len(key_moments_data) - 1:
|
| 696 |
+
key_moment_end = self.length
|
| 697 |
+
else:
|
| 698 |
+
key_moment_end = int(key_moments_data[i + 1]['startMillis']) // 1000
|
| 699 |
+
|
| 700 |
+
result.append(pytubefix.KeyMoment(key_moment_data, key_moment_end - key_moment_start))
|
| 701 |
+
|
| 702 |
+
return result
|
| 703 |
+
|
| 704 |
+
@property
|
| 705 |
+
def replayed_heatmap(self) -> List[Dict[str, float]]:
|
| 706 |
+
"""Get a list of : `Dict<str, float>`.
|
| 707 |
+
|
| 708 |
+
:rtype: List[Dict[str, float]]
|
| 709 |
+
"""
|
| 710 |
+
try:
|
| 711 |
+
mutations = self.initial_data['frameworkUpdates']['entityBatchUpdate']['mutations']
|
| 712 |
+
found = False
|
| 713 |
+
for mutation in mutations:
|
| 714 |
+
if mutation.get('payload', {}).get('macroMarkersListEntity', {}).get('markersList', {}).get(
|
| 715 |
+
'markerType') == "MARKER_TYPE_HEATMAP":
|
| 716 |
+
heatmaps_data = mutation['payload']['macroMarkersListEntity']['markersList']['markers']
|
| 717 |
+
found = True
|
| 718 |
+
break
|
| 719 |
+
|
| 720 |
+
if not found:
|
| 721 |
+
return []
|
| 722 |
+
except (KeyError, IndexError):
|
| 723 |
+
return []
|
| 724 |
+
|
| 725 |
+
result: List[Dict[str, float]] = []
|
| 726 |
+
|
| 727 |
+
for heatmap_data in heatmaps_data:
|
| 728 |
+
heatmap_start = int(heatmap_data['startMillis']) / 1000
|
| 729 |
+
duration = int(heatmap_data['durationMillis']) / 1000
|
| 730 |
+
|
| 731 |
+
norm_intensity = float(heatmap_data['intensityScoreNormalized'])
|
| 732 |
+
|
| 733 |
+
result.append({
|
| 734 |
+
"start_seconds": heatmap_start,
|
| 735 |
+
"duration": duration,
|
| 736 |
+
"norm_intensity": norm_intensity
|
| 737 |
+
})
|
| 738 |
+
|
| 739 |
+
return result
|
| 740 |
+
|
| 741 |
+
@property
|
| 742 |
+
def streams(self) -> StreamQuery:
|
| 743 |
+
"""Interface to query both adaptive (DASH) and progressive streams.
|
| 744 |
+
|
| 745 |
+
:rtype: :class:`StreamQuery <StreamQuery>`.
|
| 746 |
+
"""
|
| 747 |
+
self.check_availability()
|
| 748 |
+
return StreamQuery(self.fmt_streams)
|
| 749 |
+
|
| 750 |
+
@property
|
| 751 |
+
def thumbnail_url(self) -> str:
|
| 752 |
+
"""Get the thumbnail url image.
|
| 753 |
+
|
| 754 |
+
:rtype: str
|
| 755 |
+
"""
|
| 756 |
+
thumbnail_details = (
|
| 757 |
+
self.vid_info.get("videoDetails", {})
|
| 758 |
+
.get("thumbnail", {})
|
| 759 |
+
.get("thumbnails")
|
| 760 |
+
)
|
| 761 |
+
if thumbnail_details:
|
| 762 |
+
thumbnail_details = thumbnail_details[-1] # last item has max size
|
| 763 |
+
return thumbnail_details["url"]
|
| 764 |
+
|
| 765 |
+
return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
|
| 766 |
+
|
| 767 |
+
@property
|
| 768 |
+
def publish_date(self):
|
| 769 |
+
"""Get the publish date.
|
| 770 |
+
|
| 771 |
+
:rtype: datetime
|
| 772 |
+
"""
|
| 773 |
+
if self._publish_date:
|
| 774 |
+
return self._publish_date
|
| 775 |
+
self._publish_date = extract.publish_date(self.watch_html)
|
| 776 |
+
return self._publish_date
|
| 777 |
+
|
| 778 |
+
@publish_date.setter
|
| 779 |
+
def publish_date(self, value):
|
| 780 |
+
"""Sets the publish date."""
|
| 781 |
+
self._publish_date = value
|
| 782 |
+
|
| 783 |
+
@property
|
| 784 |
+
def title(self) -> str:
|
| 785 |
+
"""Get the video title.
|
| 786 |
+
|
| 787 |
+
:rtype: str
|
| 788 |
+
"""
|
| 789 |
+
self._author = self.vid_info.get("videoDetails", {}).get(
|
| 790 |
+
"author", "unknown"
|
| 791 |
+
)
|
| 792 |
+
|
| 793 |
+
if self._title:
|
| 794 |
+
return self._title
|
| 795 |
+
|
| 796 |
+
try:
|
| 797 |
+
# Some clients may not return the title in the `player` endpoint,
|
| 798 |
+
# so if it is not found we will look for it in the `next` endpoint
|
| 799 |
+
if 'title' in self.vid_info['videoDetails']:
|
| 800 |
+
self._title = self.vid_info['videoDetails']['title']
|
| 801 |
+
logger.debug('Found title in vid_info')
|
| 802 |
+
else:
|
| 803 |
+
if 'singleColumnWatchNextResults' in self.vid_details['contents']:
|
| 804 |
+
contents = self.vid_details['contents'][
|
| 805 |
+
'singleColumnWatchNextResults'][
|
| 806 |
+
'results'][
|
| 807 |
+
'results'][
|
| 808 |
+
'contents'][0][
|
| 809 |
+
'itemSectionRenderer'][
|
| 810 |
+
'contents'][0]
|
| 811 |
+
|
| 812 |
+
if 'videoMetadataRenderer' in contents:
|
| 813 |
+
self._title = contents['videoMetadataRenderer']['title']['runs'][0]['text']
|
| 814 |
+
else:
|
| 815 |
+
# JSON tree for titles in videos available on YouTube music
|
| 816 |
+
self._title = contents['musicWatchMetadataRenderer']['title']['simpleText']
|
| 817 |
+
|
| 818 |
+
# The type of video with this structure is not yet known.
|
| 819 |
+
# First reported in: https://github.com/JuanBindez/pytubefix/issues/351
|
| 820 |
+
elif 'twoColumnWatchNextResults' in self.vid_details['contents']:
|
| 821 |
+
self._title = self.vid_details['contents'][
|
| 822 |
+
'twoColumnWatchNextResults'][
|
| 823 |
+
'results'][
|
| 824 |
+
'results'][
|
| 825 |
+
'contents'][0][
|
| 826 |
+
'videoPrimaryInfoRenderer'][
|
| 827 |
+
'title'][
|
| 828 |
+
'runs'][0][
|
| 829 |
+
'text']
|
| 830 |
+
|
| 831 |
+
logger.debug('Found title in vid_details')
|
| 832 |
+
except KeyError as e:
|
| 833 |
+
# Check_availability will raise the correct exception in most cases
|
| 834 |
+
# if it doesn't, ask for a report.
|
| 835 |
+
self.check_availability()
|
| 836 |
+
raise exceptions.PytubeFixError(
|
| 837 |
+
(
|
| 838 |
+
f'Exception while accessing title of {self.watch_url}. '
|
| 839 |
+
'Please file a bug report at https://github.com/JuanBindez/pytubefix'
|
| 840 |
+
)
|
| 841 |
+
) from e
|
| 842 |
+
|
| 843 |
+
return self._title
|
| 844 |
+
|
| 845 |
+
@title.setter
|
| 846 |
+
def title(self, value):
|
| 847 |
+
"""Sets the title value."""
|
| 848 |
+
self._title = value
|
| 849 |
+
|
| 850 |
+
@property
|
| 851 |
+
def description(self) -> str:
|
| 852 |
+
"""Get the video description.
|
| 853 |
+
|
| 854 |
+
:rtype: str
|
| 855 |
+
"""
|
| 856 |
+
return self.vid_info.get("videoDetails", {}).get("shortDescription")
|
| 857 |
+
|
| 858 |
+
@property
|
| 859 |
+
def rating(self) -> float:
|
| 860 |
+
"""Get the video average rating.
|
| 861 |
+
|
| 862 |
+
:rtype: float
|
| 863 |
+
|
| 864 |
+
"""
|
| 865 |
+
return self.vid_info.get("videoDetails", {}).get("averageRating")
|
| 866 |
+
|
| 867 |
+
@property
|
| 868 |
+
def length(self) -> int:
|
| 869 |
+
"""Get the video length in seconds.
|
| 870 |
+
|
| 871 |
+
:rtype: int
|
| 872 |
+
"""
|
| 873 |
+
return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
|
| 874 |
+
|
| 875 |
+
@property
|
| 876 |
+
def views(self) -> int:
|
| 877 |
+
"""Get the number of the times the video has been viewed.
|
| 878 |
+
|
| 879 |
+
:rtype: int
|
| 880 |
+
"""
|
| 881 |
+
return int(self.vid_info.get("videoDetails", {}).get("viewCount", "0"))
|
| 882 |
+
|
| 883 |
+
@property
|
| 884 |
+
def author(self) -> str:
|
| 885 |
+
"""Get the video author.
|
| 886 |
+
:rtype: str
|
| 887 |
+
"""
|
| 888 |
+
if self._author:
|
| 889 |
+
return self._author
|
| 890 |
+
self._author = self.vid_info.get("videoDetails", {}).get(
|
| 891 |
+
"author", "unknown"
|
| 892 |
+
)
|
| 893 |
+
return self._author
|
| 894 |
+
|
| 895 |
+
@author.setter
|
| 896 |
+
def author(self, value):
|
| 897 |
+
"""Set the video author."""
|
| 898 |
+
self._author = value
|
| 899 |
+
|
| 900 |
+
@property
|
| 901 |
+
def keywords(self) -> List[str]:
|
| 902 |
+
"""Get the video keywords.
|
| 903 |
+
|
| 904 |
+
:rtype: List[str]
|
| 905 |
+
"""
|
| 906 |
+
return self.vid_info.get('videoDetails', {}).get('keywords', [])
|
| 907 |
+
|
| 908 |
+
@property
|
| 909 |
+
def channel_id(self) -> str:
|
| 910 |
+
"""Get the video poster's channel id.
|
| 911 |
+
|
| 912 |
+
:rtype: str
|
| 913 |
+
"""
|
| 914 |
+
return self.vid_info.get('videoDetails', {}).get('channelId', None)
|
| 915 |
+
|
| 916 |
+
@property
|
| 917 |
+
def channel_url(self) -> str:
|
| 918 |
+
"""Construct the channel url for the video's poster from the channel id.
|
| 919 |
+
|
| 920 |
+
:rtype: str
|
| 921 |
+
"""
|
| 922 |
+
return f'https://www.youtube.com/channel/{self.channel_id}'
|
| 923 |
+
|
| 924 |
+
@property
|
| 925 |
+
def likes(self):
|
| 926 |
+
"""Get the video likes
|
| 927 |
+
|
| 928 |
+
:rtype: str
|
| 929 |
+
"""
|
| 930 |
+
try:
|
| 931 |
+
return self.vid_details[
|
| 932 |
+
'contents'][
|
| 933 |
+
'twoColumnWatchNextResults'][
|
| 934 |
+
'results'][
|
| 935 |
+
'results'][
|
| 936 |
+
'contents'][
|
| 937 |
+
0][
|
| 938 |
+
'videoPrimaryInfoRenderer'][
|
| 939 |
+
'videoActions'][
|
| 940 |
+
'menuRenderer'][
|
| 941 |
+
'topLevelButtons'][
|
| 942 |
+
0][
|
| 943 |
+
'segmentedLikeDislikeButtonViewModel'][
|
| 944 |
+
'likeCountEntity'][
|
| 945 |
+
'likeCountIfLikedNumber']
|
| 946 |
+
except (KeyError, IndexError):
|
| 947 |
+
return None
|
| 948 |
+
|
| 949 |
+
@property
|
| 950 |
+
def metadata(self) -> Optional[YouTubeMetadata]:
|
| 951 |
+
"""Get the metadata for the video.
|
| 952 |
+
|
| 953 |
+
:rtype: YouTubeMetadata
|
| 954 |
+
"""
|
| 955 |
+
if not self._metadata:
|
| 956 |
+
self._metadata = extract.metadata(
|
| 957 |
+
self.initial_data) # Creating the metadata
|
| 958 |
+
return self._metadata
|
| 959 |
+
|
| 960 |
+
def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]):
|
| 961 |
+
"""Register a download progress callback function post initialization.
|
| 962 |
+
|
| 963 |
+
:param callable func:
|
| 964 |
+
A callback function that takes ``stream``, ``chunk``,
|
| 965 |
+
and ``bytes_remaining`` as parameters.
|
| 966 |
+
|
| 967 |
+
:rtype: None
|
| 968 |
+
|
| 969 |
+
"""
|
| 970 |
+
self.stream_monostate.on_progress = func
|
| 971 |
+
|
| 972 |
+
def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]):
|
| 973 |
+
"""Register a download complete callback function post initialization.
|
| 974 |
+
|
| 975 |
+
:param callable func:
|
| 976 |
+
A callback function that takes ``stream`` and ``file_path``.
|
| 977 |
+
|
| 978 |
+
:rtype: None
|
| 979 |
+
|
| 980 |
+
"""
|
| 981 |
+
self.stream_monostate.on_complete = func
|
| 982 |
+
|
| 983 |
+
@staticmethod
|
| 984 |
+
def from_id(video_id: str) -> "YouTube":
|
| 985 |
+
"""Construct a :class:`YouTube <YouTube>` object from a video id.
|
| 986 |
+
|
| 987 |
+
:param str video_id:
|
| 988 |
+
The video id of the YouTube video.
|
| 989 |
+
|
| 990 |
+
:rtype: :class:`YouTube <YouTube>`
|
| 991 |
+
"""
|
| 992 |
+
return YouTube(f"https://www.youtube.com/watch?v={video_id}")
|
pytubefix/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.28 kB). View file
|
|
|
pytubefix/__pycache__/__main__.cpython-311.pyc
ADDED
|
Binary file (42.6 kB). View file
|
|
|
pytubefix/__pycache__/buffer.cpython-311.pyc
ADDED
|
Binary file (2.91 kB). View file
|
|
|
pytubefix/__pycache__/captions.cpython-311.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
pytubefix/__pycache__/chapters.cpython-311.pyc
ADDED
|
Binary file (3.18 kB). View file
|
|
|
pytubefix/__pycache__/cipher.cpython-311.pyc
ADDED
|
Binary file (9.97 kB). View file
|
|
|
pytubefix/__pycache__/cli.cpython-311.pyc
ADDED
|
Binary file (21.2 kB). View file
|
|
|
pytubefix/__pycache__/exceptions.cpython-311.pyc
ADDED
|
Binary file (18.3 kB). View file
|
|
|
pytubefix/__pycache__/extract.cpython-311.pyc
ADDED
|
Binary file (25.1 kB). View file
|
|
|
pytubefix/__pycache__/file_system.cpython-311.pyc
ADDED
|
Binary file (2.54 kB). View file
|
|
|
pytubefix/__pycache__/helpers.cpython-311.pyc
ADDED
|
Binary file (17.4 kB). View file
|
|
|
pytubefix/__pycache__/info.cpython-311.pyc
ADDED
|
Binary file (1.54 kB). View file
|
|
|
pytubefix/__pycache__/innertube.cpython-311.pyc
ADDED
|
Binary file (24.6 kB). View file
|
|
|
pytubefix/__pycache__/itags.cpython-311.pyc
ADDED
|
Binary file (4.05 kB). View file
|
|
|
pytubefix/__pycache__/jsinterp.cpython-311.pyc
ADDED
|
Binary file (70.1 kB). View file
|
|
|
pytubefix/__pycache__/keymoments.cpython-311.pyc
ADDED
|
Binary file (3.19 kB). View file
|
|
|
pytubefix/__pycache__/metadata.cpython-311.pyc
ADDED
|
Binary file (2.68 kB). View file
|
|
|
pytubefix/__pycache__/monostate.cpython-311.pyc
ADDED
|
Binary file (1.15 kB). View file
|
|
|
pytubefix/__pycache__/parser.cpython-311.pyc
ADDED
|
Binary file (6.64 kB). View file
|
|
|
pytubefix/__pycache__/protobuf.cpython-311.pyc
ADDED
|
Binary file (8.88 kB). View file
|
|
|
pytubefix/__pycache__/query.cpython-311.pyc
ADDED
|
Binary file (24.1 kB). View file
|
|
|
pytubefix/__pycache__/request.cpython-311.pyc
ADDED
|
Binary file (10.3 kB). View file
|
|
|
pytubefix/__pycache__/streams.cpython-311.pyc
ADDED
|
Binary file (28.7 kB). View file
|
|
|
pytubefix/__pycache__/version.cpython-311.pyc
ADDED
|
Binary file (323 Bytes). View file
|
|
|
pytubefix/botGuard/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
pytubefix/botGuard/__init__.py
ADDED
|
File without changes
|
pytubefix/botGuard/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (211 Bytes). View file
|
|
|
pytubefix/botGuard/__pycache__/bot_guard.cpython-311.pyc
ADDED
|
Binary file (2.82 kB). View file
|
|
|
pytubefix/botGuard/bot_guard.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import sys
|
| 4 |
+
import shutil
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
PLATFORM = sys.platform
|
| 8 |
+
|
| 9 |
+
NODE = 'node' if PLATFORM in ['linux', 'darwin'] else 'node.exe'
|
| 10 |
+
|
| 11 |
+
def _find_node_path() -> Optional[str]:
|
| 12 |
+
"""Try multiple ways to find Node.js path."""
|
| 13 |
+
local_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), f'binaries/{NODE}')
|
| 14 |
+
if os.path.isfile(local_path):
|
| 15 |
+
return local_path
|
| 16 |
+
|
| 17 |
+
system_path = shutil.which(NODE)
|
| 18 |
+
if system_path:
|
| 19 |
+
return system_path
|
| 20 |
+
|
| 21 |
+
return NODE
|
| 22 |
+
|
| 23 |
+
NODE_PATH = _find_node_path()
|
| 24 |
+
VM_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vm/botGuard.js')
|
| 25 |
+
|
| 26 |
+
def generate_po_token(visitor_data: str) -> str:
|
| 27 |
+
"""
|
| 28 |
+
Run nodejs to generate poToken through botGuard.
|
| 29 |
+
|
| 30 |
+
Raises:
|
| 31 |
+
RuntimeError: If Node.js is not available
|
| 32 |
+
"""
|
| 33 |
+
try:
|
| 34 |
+
result = subprocess.check_output(
|
| 35 |
+
[NODE_PATH, VM_PATH, visitor_data],
|
| 36 |
+
stderr=subprocess.PIPE
|
| 37 |
+
).decode()
|
| 38 |
+
return result.replace("\n", "")
|
| 39 |
+
except FileNotFoundError as e:
|
| 40 |
+
raise RuntimeError(
|
| 41 |
+
f"Node.js is required but not found. Tried path: {NODE_PATH}\n"
|
| 42 |
+
"Please install Node.js or ensure it's in your PATH."
|
| 43 |
+
) from e
|
| 44 |
+
except subprocess.CalledProcessError as e:
|
| 45 |
+
raise RuntimeError(
|
| 46 |
+
f"Failed to execute botGuard.js: {e.stderr.decode().strip()}"
|
| 47 |
+
) from e
|
pytubefix/botGuard/vm/botGuard.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pytubefix/buffer.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This module implements a `Buffer` class for handling in-memory data storage, downloading streams,
|
| 2 |
+
and redirecting content to standard output (stdout)."""
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
import io
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Buffer:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
"""
|
| 11 |
+
Initializes the in-memory buffer to store data.
|
| 12 |
+
"""
|
| 13 |
+
self.buffer = io.BytesIO()
|
| 14 |
+
|
| 15 |
+
def download_in_buffer(self, source):
|
| 16 |
+
"""
|
| 17 |
+
Downloads data directly into the buffer. Accepts objects with the `stream_to_buffer`
|
| 18 |
+
method or strings.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
source: Object or data to be written to the buffer.
|
| 22 |
+
"""
|
| 23 |
+
if hasattr(source, 'stream_to_buffer') and callable(source.stream_to_buffer):
|
| 24 |
+
source.stream_to_buffer(self.buffer)
|
| 25 |
+
elif isinstance(source, str):
|
| 26 |
+
self.buffer.write(source.encode('utf-8'))
|
| 27 |
+
else:
|
| 28 |
+
raise TypeError("The provided object is not compatible for downloading into the buffer.")
|
| 29 |
+
|
| 30 |
+
def redirect_to_stdout(self):
|
| 31 |
+
"""
|
| 32 |
+
Redirects the buffer's content to stdout.
|
| 33 |
+
"""
|
| 34 |
+
self.buffer.seek(0) # Go back to the start of the buffer
|
| 35 |
+
sys.stdout.buffer.write(self.buffer.read())
|
| 36 |
+
|
| 37 |
+
def read(self):
|
| 38 |
+
"""
|
| 39 |
+
Reads the buffer's content.
|
| 40 |
+
"""
|
| 41 |
+
self.buffer.seek(0)
|
| 42 |
+
return self.buffer.read()
|
| 43 |
+
|
| 44 |
+
def clear(self):
|
| 45 |
+
"""
|
| 46 |
+
Clears the buffer for reuse.
|
| 47 |
+
"""
|
| 48 |
+
self.buffer = io.BytesIO()
|
pytubefix/captions.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import json
|
| 5 |
+
import re
|
| 6 |
+
import xml.etree.ElementTree as ElementTree
|
| 7 |
+
from html import unescape
|
| 8 |
+
from typing import Dict, Optional
|
| 9 |
+
|
| 10 |
+
from pytubefix import request
|
| 11 |
+
from pytubefix.helpers import safe_filename, target_directory
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Caption:
|
| 15 |
+
"""Container for caption tracks."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, caption_track: Dict):
|
| 18 |
+
"""Construct a :class:`Caption <Caption>`.
|
| 19 |
+
|
| 20 |
+
:param dict caption_track:
|
| 21 |
+
Caption track data extracted from ``watch_html``.
|
| 22 |
+
"""
|
| 23 |
+
self.url = caption_track.get("baseUrl")
|
| 24 |
+
|
| 25 |
+
# Certain videos have runs instead of simpleText
|
| 26 |
+
# this handles that edge case
|
| 27 |
+
name_dict = caption_track['name']
|
| 28 |
+
if 'simpleText' in name_dict:
|
| 29 |
+
self.name = name_dict['simpleText']
|
| 30 |
+
else:
|
| 31 |
+
for el in name_dict['runs']:
|
| 32 |
+
if 'text' in el:
|
| 33 |
+
self.name = el['text']
|
| 34 |
+
|
| 35 |
+
# Use "vssId" instead of "languageCode", fix issue #779
|
| 36 |
+
self.code = caption_track["vssId"]
|
| 37 |
+
# Remove preceding '.' for backwards compatibility, e.g.:
|
| 38 |
+
# English -> vssId: .en, languageCode: en
|
| 39 |
+
# English (auto-generated) -> vssId: a.en, languageCode: en
|
| 40 |
+
self.code = self.code.strip('.')
|
| 41 |
+
|
| 42 |
+
@property
|
| 43 |
+
def xml_captions(self) -> str:
|
| 44 |
+
"""Download the xml caption tracks."""
|
| 45 |
+
return request.get(self.url)
|
| 46 |
+
|
| 47 |
+
@property
|
| 48 |
+
def json_captions(self) -> dict:
|
| 49 |
+
"""Download and parse the json caption tracks."""
|
| 50 |
+
if 'ftm=' in self.url:
|
| 51 |
+
json_captions_url = self.url.replace('fmt=srv3', 'fmt=json3')
|
| 52 |
+
else:
|
| 53 |
+
json_captions_url = f'{self.url}&fmt=json3'
|
| 54 |
+
text = request.get(json_captions_url)
|
| 55 |
+
parsed = json.loads(text)
|
| 56 |
+
assert parsed['wireMagic'] == 'pb3', 'Unexpected captions format'
|
| 57 |
+
return parsed
|
| 58 |
+
|
| 59 |
+
def generate_srt_captions(self) -> str:
|
| 60 |
+
"""Generate "SubRip Subtitle" captions.
|
| 61 |
+
|
| 62 |
+
Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
|
| 63 |
+
recompiles them into the "SubRip Subtitle" format.
|
| 64 |
+
"""
|
| 65 |
+
return self.xml_caption_to_srt(self.xml_captions)
|
| 66 |
+
|
| 67 |
+
def generate_txt_captions(self) -> str:
|
| 68 |
+
"""Generate Text captions.
|
| 69 |
+
|
| 70 |
+
Takes the "SubRip Subtitle" format captions and converts them into text
|
| 71 |
+
"""
|
| 72 |
+
srt_captions = self.generate_srt_captions()
|
| 73 |
+
lines = srt_captions.splitlines()
|
| 74 |
+
text = ''
|
| 75 |
+
for line in lines:
|
| 76 |
+
if re.search('^[0-9]+$', line) is None and \
|
| 77 |
+
re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and \
|
| 78 |
+
re.search('^$', line) is None:
|
| 79 |
+
text += ' ' + line.strip()
|
| 80 |
+
text = text.lstrip()
|
| 81 |
+
return text.strip()
|
| 82 |
+
|
| 83 |
+
def save_captions(self, filename: str):
|
| 84 |
+
"""Generate and save "SubRip Subtitle" captions to a text file.
|
| 85 |
+
|
| 86 |
+
Takes the xml captions from :meth:`~pytubefix.Caption.xml_captions` and
|
| 87 |
+
recompiles them into the "SubRip Subtitle" format and saves it to a text file.
|
| 88 |
+
|
| 89 |
+
:param filename: The name of the file to save the captions.
|
| 90 |
+
"""
|
| 91 |
+
srt_captions = self.xml_caption_to_srt(self.xml_captions)
|
| 92 |
+
|
| 93 |
+
with open(filename, 'w', encoding='utf-8') as file:
|
| 94 |
+
file.write(srt_captions)
|
| 95 |
+
|
| 96 |
+
@staticmethod
|
| 97 |
+
def float_to_srt_time_format(d: float) -> str:
|
| 98 |
+
"""Convert decimal durations into proper srt format.
|
| 99 |
+
|
| 100 |
+
:rtype: str
|
| 101 |
+
:returns:
|
| 102 |
+
SubRip Subtitle (str) formatted time duration.
|
| 103 |
+
|
| 104 |
+
float_to_srt_time_format(3.89) -> '00:00:03,890'
|
| 105 |
+
"""
|
| 106 |
+
fraction, whole = math.modf(d)
|
| 107 |
+
time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
|
| 108 |
+
ms = f"{fraction:.3f}".replace("0.", "")
|
| 109 |
+
return time_fmt + ms
|
| 110 |
+
|
| 111 |
+
def xml_caption_to_srt(self, xml_captions: str) -> str:
|
| 112 |
+
"""Convert xml caption tracks to "SubRip Subtitle (srt)".
|
| 113 |
+
|
| 114 |
+
:param str xml_captions:
|
| 115 |
+
XML formatted caption tracks.
|
| 116 |
+
"""
|
| 117 |
+
segments = []
|
| 118 |
+
root = ElementTree.fromstring(xml_captions)
|
| 119 |
+
|
| 120 |
+
i = 0
|
| 121 |
+
for child in list(root.iter(root.tag))[0]:
|
| 122 |
+
if child.tag in ['p', 'text']:
|
| 123 |
+
caption = ''
|
| 124 |
+
|
| 125 |
+
# I think it will be faster than `len(list(child)) == 0`
|
| 126 |
+
if not list(child):
|
| 127 |
+
# instead of 'continue'
|
| 128 |
+
caption = child.text
|
| 129 |
+
for s in list(child):
|
| 130 |
+
if s.tag == 's':
|
| 131 |
+
caption += f' {s.text}'
|
| 132 |
+
if not caption:
|
| 133 |
+
continue
|
| 134 |
+
caption = unescape(caption.replace("\n", " ").replace(" ", " "),)
|
| 135 |
+
try:
|
| 136 |
+
if "d" in child.attrib:
|
| 137 |
+
duration = float(child.attrib["d"]) / 1000.0
|
| 138 |
+
else:
|
| 139 |
+
duration = float(child.attrib["dur"])
|
| 140 |
+
except KeyError:
|
| 141 |
+
duration = 0.0
|
| 142 |
+
|
| 143 |
+
if "t" in child.attrib:
|
| 144 |
+
start = float(child.attrib["t"]) / 1000.0
|
| 145 |
+
else:
|
| 146 |
+
start = float(child.attrib["start"])
|
| 147 |
+
|
| 148 |
+
end = start + duration
|
| 149 |
+
sequence_number = i + 1 # convert from 0-indexed to 1.
|
| 150 |
+
line = "{seq}\n{start} --> {end}\n{text}\n".format(
|
| 151 |
+
seq=sequence_number,
|
| 152 |
+
start=self.float_to_srt_time_format(start),
|
| 153 |
+
end=self.float_to_srt_time_format(end),
|
| 154 |
+
text=caption,
|
| 155 |
+
)
|
| 156 |
+
segments.append(line)
|
| 157 |
+
i += 1
|
| 158 |
+
return "\n".join(segments).strip()
|
| 159 |
+
|
| 160 |
+
def download(
|
| 161 |
+
self,
|
| 162 |
+
title: str,
|
| 163 |
+
srt: bool = True,
|
| 164 |
+
output_path: Optional[str] = None,
|
| 165 |
+
filename_prefix: Optional[str] = None,
|
| 166 |
+
) -> str:
|
| 167 |
+
"""Write the media stream to disk.
|
| 168 |
+
|
| 169 |
+
:param title:
|
| 170 |
+
Output filename (stem only) for writing media file.
|
| 171 |
+
If one is not specified, the default filename is used.
|
| 172 |
+
:type title: str
|
| 173 |
+
:param srt:
|
| 174 |
+
Set to True to download srt, false to download xml. Defaults to True.
|
| 175 |
+
:type srt bool
|
| 176 |
+
:param output_path:
|
| 177 |
+
(optional) Output path for writing media file. If one is not
|
| 178 |
+
specified, defaults to the current working directory.
|
| 179 |
+
:type output_path: str or None
|
| 180 |
+
:param filename_prefix:
|
| 181 |
+
(optional) A string that will be prepended to the filename.
|
| 182 |
+
For example a number in a playlist or the name of a series.
|
| 183 |
+
If one is not specified, nothing will be prepended
|
| 184 |
+
This is separate from filename so you can use the default
|
| 185 |
+
filename but still add a prefix.
|
| 186 |
+
:type filename_prefix: str or None
|
| 187 |
+
|
| 188 |
+
:rtype: str
|
| 189 |
+
"""
|
| 190 |
+
if title.endswith(".srt") or title.endswith(".xml"):
|
| 191 |
+
filename = ".".join(title.split(".")[:-1])
|
| 192 |
+
else:
|
| 193 |
+
filename = title
|
| 194 |
+
|
| 195 |
+
if filename_prefix:
|
| 196 |
+
filename = f"{safe_filename(filename_prefix)}{filename}"
|
| 197 |
+
|
| 198 |
+
filename = safe_filename(filename)
|
| 199 |
+
|
| 200 |
+
filename += f" ({self.code})"
|
| 201 |
+
filename += ".srt" if srt else ".xml"
|
| 202 |
+
|
| 203 |
+
file_path = os.path.join(target_directory(output_path), filename)
|
| 204 |
+
|
| 205 |
+
with open(file_path, "w", encoding="utf-8") as file_handle:
|
| 206 |
+
if srt:
|
| 207 |
+
file_handle.write(self.generate_srt_captions())
|
| 208 |
+
else:
|
| 209 |
+
file_handle.write(self.xml_captions)
|
| 210 |
+
|
| 211 |
+
return file_path
|
| 212 |
+
|
| 213 |
+
def __repr__(self):
|
| 214 |
+
"""Printable object representation."""
|
| 215 |
+
return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)
|
pytubefix/chapters.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Native python imports
|
| 2 |
+
from datetime import timedelta
|
| 3 |
+
from typing import List
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class ChapterThumbnail:
|
| 7 |
+
"""Container for chapter thumbnails."""
|
| 8 |
+
|
| 9 |
+
def __init__(self, width: int, height: int, url: str):
|
| 10 |
+
self.width = width
|
| 11 |
+
self.height = height
|
| 12 |
+
self.url = url
|
| 13 |
+
|
| 14 |
+
def __repr__(self):
|
| 15 |
+
return f'<pytubefix.chapters.ChapterThumbnail: width={self.width}, height={self.height}, url={self.url}>'
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Chapter:
|
| 19 |
+
"""Container for chapters tracks."""
|
| 20 |
+
title: str
|
| 21 |
+
start_seconds: int
|
| 22 |
+
duration: int # in seconds
|
| 23 |
+
thumbnails: List[ChapterThumbnail]
|
| 24 |
+
|
| 25 |
+
def __init__(self, chapter_data: dict, duration: int):
|
| 26 |
+
data = chapter_data['chapterRenderer']
|
| 27 |
+
|
| 28 |
+
self.title = data['title']['simpleText']
|
| 29 |
+
self.start_seconds = int(data['timeRangeStartMillis'] / 1000)
|
| 30 |
+
self.duration = duration
|
| 31 |
+
|
| 32 |
+
thumbnails_data = data.get('thumbnail', {}).get('thumbnails', [])
|
| 33 |
+
self.thumbnails = [
|
| 34 |
+
ChapterThumbnail(
|
| 35 |
+
width=thumb['width'],
|
| 36 |
+
height=thumb['height'],
|
| 37 |
+
url=thumb['url']
|
| 38 |
+
)
|
| 39 |
+
for thumb in thumbnails_data
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
@property
|
| 43 |
+
def start_label(self) -> str:
|
| 44 |
+
return str(timedelta(seconds=self.start_seconds))
|
| 45 |
+
|
| 46 |
+
def __repr__(self):
|
| 47 |
+
return f'<Chapter: {self.title} | {self.start_label}>'
|
pytubefix/cipher.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module contains all the logic needed to find the signature functions.
|
| 3 |
+
|
| 4 |
+
YouTube's strategy to restrict downloading videos is to send a ciphered version
|
| 5 |
+
of the signature to the client, along with the decryption algorithm obfuscated
|
| 6 |
+
in JavaScript. For the clients to play the videos, JavaScript must take the
|
| 7 |
+
ciphered version, cycle it through a series of "transform functions," and then
|
| 8 |
+
signs the media URL with the output.
|
| 9 |
+
|
| 10 |
+
This module is responsible for (1) finding these "transformations
|
| 11 |
+
functions" (2) sends them to be interpreted by jsinterp.py
|
| 12 |
+
"""
|
| 13 |
+
import logging
|
| 14 |
+
import re
|
| 15 |
+
|
| 16 |
+
from pytubefix.exceptions import RegexMatchError, InterpretationError
|
| 17 |
+
from pytubefix.jsinterp import JSInterpreter, extract_player_js_global_var
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class Cipher:
|
| 23 |
+
def __init__(self, js: str, js_url: str):
|
| 24 |
+
|
| 25 |
+
self.js_url = js_url
|
| 26 |
+
|
| 27 |
+
self.signature_function_name = get_initial_function_name(js, js_url)
|
| 28 |
+
self.throttling_function_name = get_throttling_function_name(js, js_url)
|
| 29 |
+
|
| 30 |
+
self.calculated_n = None
|
| 31 |
+
|
| 32 |
+
self.js_interpreter = JSInterpreter(js)
|
| 33 |
+
|
| 34 |
+
def get_throttling(self, n: str):
|
| 35 |
+
"""Interpret the function that throttles download speed.
|
| 36 |
+
:param str n:
|
| 37 |
+
Contains the parameter that must be transformed.
|
| 38 |
+
:rtype: str
|
| 39 |
+
:returns:
|
| 40 |
+
Returns the transformed value "n".
|
| 41 |
+
"""
|
| 42 |
+
try:
|
| 43 |
+
return self.js_interpreter.call_function(self.throttling_function_name, n)
|
| 44 |
+
except:
|
| 45 |
+
raise InterpretationError(js_url=self.js_url)
|
| 46 |
+
|
| 47 |
+
def get_signature(self, ciphered_signature: str) -> str:
|
| 48 |
+
"""interprets the function that signs the streams.
|
| 49 |
+
The lack of this signature generates the 403 forbidden error.
|
| 50 |
+
:param str ciphered_signature:
|
| 51 |
+
Contains the signature that must be transformed.
|
| 52 |
+
:rtype: str
|
| 53 |
+
:returns:
|
| 54 |
+
Returns the correct stream signature.
|
| 55 |
+
"""
|
| 56 |
+
try:
|
| 57 |
+
return self.js_interpreter.call_function(self.signature_function_name, ciphered_signature)
|
| 58 |
+
except:
|
| 59 |
+
raise InterpretationError(js_url=self.js_url)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_initial_function_name(js: str, js_url: str) -> str:
|
| 63 |
+
"""Extract the name of the function responsible for computing the signature.
|
| 64 |
+
:param str js:
|
| 65 |
+
The contents of the base.js asset file.
|
| 66 |
+
:param str js_url:
|
| 67 |
+
Full base.js url
|
| 68 |
+
:rtype: str
|
| 69 |
+
:returns:
|
| 70 |
+
Function name from regex match
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
function_patterns = [
|
| 74 |
+
r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*[a-zA-Z0-9_\$\"\[\]]+\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*[a-zA-Z0-9_\$\"\[\]]+\s*\)',
|
| 75 |
+
r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
|
| 76 |
+
r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
|
| 77 |
+
# Old patterns
|
| 78 |
+
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
| 79 |
+
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
| 80 |
+
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
| 81 |
+
# Obsolete patterns
|
| 82 |
+
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
| 83 |
+
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
| 84 |
+
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
| 85 |
+
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
| 86 |
+
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
|
| 87 |
+
]
|
| 88 |
+
logger.debug("looking for signature cipher name")
|
| 89 |
+
for pattern in function_patterns:
|
| 90 |
+
regex = re.compile(pattern)
|
| 91 |
+
function_match = regex.search(js)
|
| 92 |
+
if function_match:
|
| 93 |
+
sig = function_match.group('sig')
|
| 94 |
+
logger.debug("finished regex search, matched: %s", pattern)
|
| 95 |
+
logger.debug(f'Signature cipher function name: {sig}')
|
| 96 |
+
return sig
|
| 97 |
+
|
| 98 |
+
raise RegexMatchError(
|
| 99 |
+
caller="get_initial_function_name", pattern=f"multiple in {js_url}"
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def get_throttling_function_name(js: str, js_url: str) -> str:
|
| 104 |
+
"""Extract the name of the function that computes the throttling parameter.
|
| 105 |
+
|
| 106 |
+
:param str js:
|
| 107 |
+
The contents of the base.js asset file.
|
| 108 |
+
:param str js_url:
|
| 109 |
+
Full base.js url
|
| 110 |
+
:rtype: str
|
| 111 |
+
:returns:
|
| 112 |
+
The name of the function used to compute the throttling parameter.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
logger.debug("looking for nsig name")
|
| 116 |
+
try:
|
| 117 |
+
# Extracts the function name based on the global array
|
| 118 |
+
global_obj, varname, code = extract_player_js_global_var(js)
|
| 119 |
+
if global_obj and varname and code:
|
| 120 |
+
logger.debug(f"Global Obj name is: {varname}")
|
| 121 |
+
global_obj = JSInterpreter(js).interpret_expression(code, {}, 100)
|
| 122 |
+
logger.debug("Successfully interpreted global object")
|
| 123 |
+
for k, v in enumerate(global_obj):
|
| 124 |
+
if v.endswith('_w8_'):
|
| 125 |
+
logger.debug(f"_w8_ found in index {k}")
|
| 126 |
+
pattern = r'''(?xs)
|
| 127 |
+
[;\n](?:
|
| 128 |
+
(?P<f>function\s+)|
|
| 129 |
+
(?:var\s+)?
|
| 130 |
+
)(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
|
| 131 |
+
\((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
|
| 132 |
+
(?:(?!\};(?![\]\)])).)+
|
| 133 |
+
\}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
|
| 134 |
+
\{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
|
| 135 |
+
''' % (re.escape(varname), k)
|
| 136 |
+
func_name = re.search(pattern, js)
|
| 137 |
+
if func_name:
|
| 138 |
+
n_func = func_name.group("funcname")
|
| 139 |
+
logger.debug(f"Nfunc name is: {n_func}")
|
| 140 |
+
return n_func
|
| 141 |
+
except:
|
| 142 |
+
pass
|
| 143 |
+
|
| 144 |
+
pattern = r'''(?x)
|
| 145 |
+
(?:
|
| 146 |
+
\.get\("n"\)\)&&\(b=|
|
| 147 |
+
(?:
|
| 148 |
+
b=String\.fromCharCode\(110\)|
|
| 149 |
+
(?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
|
| 150 |
+
)
|
| 151 |
+
(?:
|
| 152 |
+
,[a-zA-Z0-9_$]+\(a\))?,c=a\.
|
| 153 |
+
(?:
|
| 154 |
+
get\(b\)|
|
| 155 |
+
[a-zA-Z0-9_$]+\[b\]\|\|null
|
| 156 |
+
)\)&&\(c=|
|
| 157 |
+
\b(?P<var>[a-zA-Z0-9_$]+)=
|
| 158 |
+
)(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
|
| 159 |
+
(?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))'''
|
| 160 |
+
|
| 161 |
+
logger.debug('Finding throttling function name')
|
| 162 |
+
|
| 163 |
+
regex = re.compile(pattern)
|
| 164 |
+
function_match = regex.search(js)
|
| 165 |
+
if function_match:
|
| 166 |
+
logger.debug("finished regex search, matched: %s", pattern)
|
| 167 |
+
|
| 168 |
+
func = function_match.group('nfunc')
|
| 169 |
+
idx = function_match.group('idx')
|
| 170 |
+
|
| 171 |
+
logger.debug(f'func is: {func}')
|
| 172 |
+
logger.debug(f'idx is: {idx}')
|
| 173 |
+
|
| 174 |
+
logger.debug('Checking throttling function name')
|
| 175 |
+
if idx:
|
| 176 |
+
n_func_check_pattern = fr'var {re.escape(func)}\s*=\s*\[(.+?)];'
|
| 177 |
+
n_func_found = re.search(n_func_check_pattern, js)
|
| 178 |
+
|
| 179 |
+
if n_func_found:
|
| 180 |
+
throttling_function = n_func_found.group(1)
|
| 181 |
+
logger.debug(f'Throttling function name is: {throttling_function}')
|
| 182 |
+
return throttling_function
|
| 183 |
+
|
| 184 |
+
raise RegexMatchError(
|
| 185 |
+
caller="get_throttling_function_name", pattern=f"{n_func_check_pattern} in {js_url}"
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
raise RegexMatchError(
|
| 189 |
+
caller="get_throttling_function_name", pattern=f"{pattern} in {js_url}"
|
| 190 |
+
)
|
pytubefix/cli.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import argparse
|
| 3 |
+
import gzip
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
import sys
|
| 9 |
+
import datetime as dt
|
| 10 |
+
import subprocess # nosec
|
| 11 |
+
from typing import List, Optional
|
| 12 |
+
|
| 13 |
+
import pytubefix.exceptions as exceptions
|
| 14 |
+
from pytubefix import __version__
|
| 15 |
+
from pytubefix import CaptionQuery, Playlist, Stream
|
| 16 |
+
from pytubefix.helpers import safe_filename, setup_logger
|
| 17 |
+
from pytubefix import YouTube
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
def build_playback_report(youtube: YouTube) -> None:
|
| 22 |
+
"""Serialize the request data to json for offline debugging.
|
| 23 |
+
|
| 24 |
+
:param YouTube youtube:
|
| 25 |
+
A YouTube object.
|
| 26 |
+
"""
|
| 27 |
+
ts = int(dt.datetime.now(dt.timezone.utc).timestamp())
|
| 28 |
+
fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
|
| 29 |
+
|
| 30 |
+
js = youtube.js
|
| 31 |
+
watch_html = youtube.watch_html
|
| 32 |
+
vid_info = youtube.vid_info
|
| 33 |
+
|
| 34 |
+
with gzip.open(fp, "wb") as fh:
|
| 35 |
+
fh.write(
|
| 36 |
+
json.dumps(
|
| 37 |
+
{
|
| 38 |
+
"url": youtube.watch_url,
|
| 39 |
+
"js": js,
|
| 40 |
+
"watch_html": watch_html,
|
| 41 |
+
"video_info": vid_info,
|
| 42 |
+
}
|
| 43 |
+
).encode("utf8"),
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
def display_progress_bar(bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55) -> None:
|
| 47 |
+
"""Display a simple, pretty progress bar.
|
| 48 |
+
|
| 49 |
+
Example:
|
| 50 |
+
~~~~~~~~
|
| 51 |
+
PSY - GANGNAM STYLE(강남스타일) MV.mp4
|
| 52 |
+
↳ |███████████████████████████████████████| 100.0%
|
| 53 |
+
|
| 54 |
+
:param int bytes_received:
|
| 55 |
+
The delta between the total file size (bytes) and bytes already
|
| 56 |
+
written to disk.
|
| 57 |
+
:param int filesize:
|
| 58 |
+
File size of the media stream in bytes.
|
| 59 |
+
:param str ch:
|
| 60 |
+
Character to use for presenting progress segment.
|
| 61 |
+
:param float scale:
|
| 62 |
+
Scale multiplier to reduce progress bar size.
|
| 63 |
+
"""
|
| 64 |
+
columns = shutil.get_terminal_size().columns
|
| 65 |
+
max_width = int(columns * scale)
|
| 66 |
+
|
| 67 |
+
filled = int(round(max_width * bytes_received / float(filesize)))
|
| 68 |
+
remaining = max_width - filled
|
| 69 |
+
progress_bar = ch * filled + " " * remaining
|
| 70 |
+
percent = round(100.0 * bytes_received / float(filesize), 1)
|
| 71 |
+
text = f" ↳ |{progress_bar}| {percent}%\r"
|
| 72 |
+
sys.stdout.write(text)
|
| 73 |
+
sys.stdout.flush()
|
| 74 |
+
|
| 75 |
+
def on_progress(stream: Stream, chunk: bytes, bytes_remaining: int) -> None: # pylint: disable=W0613
|
| 76 |
+
filesize = stream.filesize
|
| 77 |
+
bytes_received = filesize - bytes_remaining
|
| 78 |
+
display_progress_bar(bytes_received, filesize)
|
| 79 |
+
|
| 80 |
+
def _download(stream: Stream, target: Optional[str] = None, filename: Optional[str] = None) -> None:
|
| 81 |
+
filesize_megabytes = stream.filesize // 1048576
|
| 82 |
+
print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
|
| 83 |
+
file_path = stream.get_file_path(filename=filename, output_path=target)
|
| 84 |
+
if stream.exists_at_path(file_path):
|
| 85 |
+
print(f"Already downloaded at:\n{file_path}")
|
| 86 |
+
return
|
| 87 |
+
|
| 88 |
+
stream.download(output_path=target, filename=filename)
|
| 89 |
+
sys.stdout.write("\n")
|
| 90 |
+
|
| 91 |
+
def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
|
| 92 |
+
"""
|
| 93 |
+
Given a base name, the file format, and the target directory, will generate
|
| 94 |
+
a filename unique for that directory and file format.
|
| 95 |
+
|
| 96 |
+
:param str base:
|
| 97 |
+
The given base-name.
|
| 98 |
+
:param str subtype:
|
| 99 |
+
The filetype of the video which will be downloaded.
|
| 100 |
+
:param str media_type:
|
| 101 |
+
The media_type of the file, ie. "audio" or "video"
|
| 102 |
+
:param Path target:
|
| 103 |
+
Target directory for download.
|
| 104 |
+
"""
|
| 105 |
+
counter = 0
|
| 106 |
+
while True:
|
| 107 |
+
file_name = f"{base}_{media_type}_{counter}"
|
| 108 |
+
file_path = os.path.join(target, f"{file_name}.{subtype}")
|
| 109 |
+
if not os.path.exists(file_path):
|
| 110 |
+
return file_name
|
| 111 |
+
counter += 1
|
| 112 |
+
|
| 113 |
+
def ffmpeg_process(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
|
| 114 |
+
"""
|
| 115 |
+
Decides the correct video stream to download, then calls _ffmpeg_downloader.
|
| 116 |
+
|
| 117 |
+
:param YouTube youtube:
|
| 118 |
+
A valid YouTube object.
|
| 119 |
+
:param str resolution:
|
| 120 |
+
YouTube video resolution.
|
| 121 |
+
:param str target:
|
| 122 |
+
Target directory for download
|
| 123 |
+
"""
|
| 124 |
+
youtube.register_on_progress_callback(on_progress)
|
| 125 |
+
target = target or os.getcwd()
|
| 126 |
+
|
| 127 |
+
if resolution == None or resolution == "best":
|
| 128 |
+
highest_quality_stream = youtube.streams.filter(progressive=False).order_by("resolution").last()
|
| 129 |
+
mp4_stream = youtube.streams.filter(progressive=False, subtype="mp4").order_by("resolution").last()
|
| 130 |
+
if highest_quality_stream.resolution == mp4_stream.resolution:
|
| 131 |
+
video_stream = mp4_stream
|
| 132 |
+
else:
|
| 133 |
+
video_stream = highest_quality_stream
|
| 134 |
+
else:
|
| 135 |
+
video_stream = youtube.streams.filter(progressive=False, resolution=resolution).first()
|
| 136 |
+
|
| 137 |
+
if not video_stream:
|
| 138 |
+
print(f"No streams found for resolution {resolution}")
|
| 139 |
+
return
|
| 140 |
+
|
| 141 |
+
audio_stream = youtube.streams.filter(progressive=False).order_by("abr").last()
|
| 142 |
+
|
| 143 |
+
video_file_name = _unique_name(youtube.title, "mp4", "video", target)
|
| 144 |
+
audio_file_name = _unique_name(youtube.title, "mp4", "audio", target)
|
| 145 |
+
|
| 146 |
+
video_path = video_stream.get_file_path(filename=video_file_name, output_path=target)
|
| 147 |
+
audio_path = audio_stream.get_file_path(filename=audio_file_name, output_path=target)
|
| 148 |
+
|
| 149 |
+
if os.path.exists(video_path) and os.path.exists(audio_path):
|
| 150 |
+
print("Already downloaded both video and audio.")
|
| 151 |
+
return
|
| 152 |
+
|
| 153 |
+
_download(video_stream, target=target, filename=video_file_name)
|
| 154 |
+
_download(audio_stream, target=target, filename=audio_file_name)
|
| 155 |
+
|
| 156 |
+
# Construct the command to run ffmpeg
|
| 157 |
+
command = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-strict", "experimental", f"{target}/{youtube.title}.mp4"]
|
| 158 |
+
|
| 159 |
+
# Execute the command
|
| 160 |
+
subprocess.run(command)
|
| 161 |
+
|
| 162 |
+
def download_by_resolution(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
|
| 163 |
+
"""Download a stream by the specified resolution.
|
| 164 |
+
|
| 165 |
+
:param YouTube youtube:
|
| 166 |
+
A valid YouTube object.
|
| 167 |
+
:param str resolution:
|
| 168 |
+
The desired resolution of the stream.
|
| 169 |
+
:param Optional[str] target:
|
| 170 |
+
The target directory for the download.
|
| 171 |
+
"""
|
| 172 |
+
print(f"Downloading {resolution}...")
|
| 173 |
+
stream = youtube.streams.filter(resolution=resolution).first()
|
| 174 |
+
if stream is None:
|
| 175 |
+
print(f"No stream found for resolution {resolution}")
|
| 176 |
+
else:
|
| 177 |
+
_download(stream, target)
|
| 178 |
+
|
| 179 |
+
def download_audio(youtube: YouTube, filetype: Optional[str] = "mp4", target: Optional[str] = None) -> None:
|
| 180 |
+
"""Download audio stream of a YouTube video.
|
| 181 |
+
|
| 182 |
+
:param YouTube youtube:
|
| 183 |
+
A valid YouTube object.
|
| 184 |
+
:param Optional[str] filetype:
|
| 185 |
+
The filetype for the audio. Defaults to "mp4".
|
| 186 |
+
:param Optional[str] target:
|
| 187 |
+
The target directory for the download.
|
| 188 |
+
"""
|
| 189 |
+
print("Downloading audio...")
|
| 190 |
+
stream = youtube.streams.filter(progressive=False, subtype=filetype).order_by("abr").last()
|
| 191 |
+
if stream is None:
|
| 192 |
+
print(f"No audio stream found for filetype {filetype}")
|
| 193 |
+
else:
|
| 194 |
+
_download(stream, target)
|
| 195 |
+
|
| 196 |
+
def download_highest_resolution_progressive(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
|
| 197 |
+
"""Download a YouTube video stream at the highest resolution.
|
| 198 |
+
|
| 199 |
+
:param YouTube youtube:
|
| 200 |
+
A valid YouTube object.
|
| 201 |
+
:param str resolution:
|
| 202 |
+
The resolution of the stream.
|
| 203 |
+
:param Optional[str] target:
|
| 204 |
+
The target directory for the download.
|
| 205 |
+
"""
|
| 206 |
+
print("Downloading highest resolution progressive stream...")
|
| 207 |
+
stream = youtube.streams.filter(progressive=True).order_by("resolution").last()
|
| 208 |
+
if stream is None:
|
| 209 |
+
print("No progressive stream found.")
|
| 210 |
+
else:
|
| 211 |
+
_download(stream, target)
|
| 212 |
+
|
| 213 |
+
def download_by_itag(youtube: YouTube, itag: int, target: Optional[str] = None) -> None:
|
| 214 |
+
"""Download a YouTube stream by its itag.
|
| 215 |
+
|
| 216 |
+
:param YouTube youtube:
|
| 217 |
+
A valid YouTube object.
|
| 218 |
+
:param int itag:
|
| 219 |
+
The itag of the desired stream.
|
| 220 |
+
:param Optional[str] target:
|
| 221 |
+
The target directory for the download.
|
| 222 |
+
"""
|
| 223 |
+
stream = youtube.streams.get_by_itag(itag)
|
| 224 |
+
if stream is None:
|
| 225 |
+
print(f"No stream found with itag {itag}.")
|
| 226 |
+
else:
|
| 227 |
+
print(f"Downloading stream with itag {itag}...")
|
| 228 |
+
_download(stream, target)
|
| 229 |
+
|
| 230 |
+
def download_caption(youtube: YouTube, lang_code: str, target: Optional[str] = None) -> None:
|
| 231 |
+
"""Download captions for a given YouTube video.
|
| 232 |
+
|
| 233 |
+
:param YouTube youtube:
|
| 234 |
+
A valid YouTube object.
|
| 235 |
+
:param str lang_code:
|
| 236 |
+
The language code for the desired captions.
|
| 237 |
+
:param Optional[str] target:
|
| 238 |
+
The target directory for the downloaded captions.
|
| 239 |
+
"""
|
| 240 |
+
print(f"Downloading captions for language: {lang_code}...")
|
| 241 |
+
caption = youtube.captions.get_by_language_code(lang_code)
|
| 242 |
+
if caption is None:
|
| 243 |
+
print(f"No captions found for language code: {lang_code}.")
|
| 244 |
+
else:
|
| 245 |
+
caption.download(target)
|
| 246 |
+
|
| 247 |
+
def _print_available_captions(captions: List[CaptionQuery]) -> None:
|
| 248 |
+
"""Print available captions for a YouTube video.
|
| 249 |
+
|
| 250 |
+
:param List[CaptionQuery] captions:
|
| 251 |
+
The list of available captions.
|
| 252 |
+
"""
|
| 253 |
+
print("Available captions:")
|
| 254 |
+
for caption in captions:
|
| 255 |
+
print(f" - {caption.language_code}: {caption.name}")
|
| 256 |
+
|
| 257 |
+
def display_streams(youtube: YouTube) -> None:
|
| 258 |
+
"""Display available streams for the given YouTube video.
|
| 259 |
+
|
| 260 |
+
:param YouTube youtube:
|
| 261 |
+
A valid YouTube object.
|
| 262 |
+
"""
|
| 263 |
+
print(f"Available streams for {youtube.title}:")
|
| 264 |
+
for stream in youtube.streams:
|
| 265 |
+
print(f" - {stream}")
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def _parse_args(parser: argparse.ArgumentParser, args: Optional[List] = None) -> argparse.Namespace:
|
| 269 |
+
parser.add_argument("url", help="The YouTube /watch or /playlist url", nargs="?")
|
| 270 |
+
parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}")
|
| 271 |
+
parser.add_argument("--itag", type=int, help="The itag for the desired stream")
|
| 272 |
+
parser.add_argument("-r", "--resolution", type=str, help="The resolution for the desired stream")
|
| 273 |
+
parser.add_argument("-l", "--list", action="store_true", help="The list option causes pytubefix cli to return a list of streams available to download")
|
| 274 |
+
parser.add_argument("--oauth", action="store_true", help="use oauth token")
|
| 275 |
+
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="Set logger output to verbose output.")
|
| 276 |
+
parser.add_argument("--logfile", action="store", help="logging debug and error messages into a log file")
|
| 277 |
+
parser.add_argument("--build-playback-report", action="store_true", help="Save the html and js to disk")
|
| 278 |
+
parser.add_argument("-c", "--caption-code", type=str, help="Download srt captions for given language code. Prints available language codes if no argument given")
|
| 279 |
+
parser.add_argument('-lc', '--list-captions', action='store_true', help="List available caption codes for a video")
|
| 280 |
+
parser.add_argument("-t", "--target", help="The output directory for the downloaded stream. Default is current working directory")
|
| 281 |
+
parser.add_argument("-a", "--audio", const="mp4", nargs="?", help="Download the audio for a given URL at the highest bitrate available. Defaults to mp4 format if none is specified")
|
| 282 |
+
parser.add_argument("-f", "--ffmpeg", const="best", nargs="?", help="Downloads the audio and video stream for resolution provided. If no resolution is provided, downloads the best resolution. Runs the command line program ffmpeg to combine the audio and video")
|
| 283 |
+
|
| 284 |
+
return parser.parse_args(args)
|
| 285 |
+
|
| 286 |
+
def _perform_args_on_youtube(youtube: YouTube, args: argparse.Namespace) -> None:
|
| 287 |
+
if len(sys.argv) == 2:
|
| 288 |
+
download_highest_resolution_progressive(youtube=youtube, resolution="highest", target=args.target)
|
| 289 |
+
|
| 290 |
+
if args.list_captions:
|
| 291 |
+
_print_available_captions(youtube.captions)
|
| 292 |
+
if args.list:
|
| 293 |
+
display_streams(youtube)
|
| 294 |
+
|
| 295 |
+
if args.itag:
|
| 296 |
+
download_by_itag(youtube=youtube, itag=args.itag, target=args.target)
|
| 297 |
+
elif args.caption_code:
|
| 298 |
+
download_caption(youtube=youtube, lang_code=args.caption_code, target=args.target)
|
| 299 |
+
elif args.resolution:
|
| 300 |
+
download_by_resolution(youtube=youtube, resolution=args.resolution, target=args.target)
|
| 301 |
+
elif args.audio:
|
| 302 |
+
download_audio(youtube=youtube, filetype=args.audio, target=args.target)
|
| 303 |
+
|
| 304 |
+
if args.ffmpeg:
|
| 305 |
+
ffmpeg_process(youtube=youtube, resolution=args.resolution, target=args.target)
|
| 306 |
+
|
| 307 |
+
if args.build_playback_report:
|
| 308 |
+
build_playback_report(youtube)
|
| 309 |
+
|
| 310 |
+
oauth = False
|
| 311 |
+
cache = False
|
| 312 |
+
|
| 313 |
+
if args.oauth:
|
| 314 |
+
oauth = True
|
| 315 |
+
cache = True
|
| 316 |
+
|
| 317 |
+
print("Loading video...")
|
| 318 |
+
youtube = YouTube(args.url, use_oauth=oauth, allow_oauth_cache=cache)
|
| 319 |
+
|
| 320 |
+
download_highest_resolution_progressive(youtube=youtube, resolution="highest", target=args.target)
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def main():
|
| 324 |
+
parser = argparse.ArgumentParser(description=main.__doc__)
|
| 325 |
+
args = _parse_args(parser)
|
| 326 |
+
|
| 327 |
+
log_filename = args.logfile if args.verbose else None
|
| 328 |
+
setup_logger(logging.DEBUG if args.verbose else logging.INFO, log_filename=log_filename)
|
| 329 |
+
|
| 330 |
+
if args.verbose:
|
| 331 |
+
logger.debug(f'Pytubefix version: {__version__}')
|
| 332 |
+
|
| 333 |
+
if not args.url or "youtu" not in args.url:
|
| 334 |
+
parser.print_help()
|
| 335 |
+
sys.exit(0)
|
| 336 |
+
|
| 337 |
+
if "/playlist" in args.url:
|
| 338 |
+
print("Loading playlist...")
|
| 339 |
+
playlist = Playlist(args.url)
|
| 340 |
+
args.target = args.target or safe_filename(playlist.title)
|
| 341 |
+
|
| 342 |
+
for youtube_video in playlist.videos:
|
| 343 |
+
try:
|
| 344 |
+
_perform_args_on_youtube(youtube_video, args)
|
| 345 |
+
except exceptions.PytubeFixError as e:
|
| 346 |
+
print(f"There was an error with video: {youtube_video}")
|
| 347 |
+
print(e)
|
| 348 |
+
|
| 349 |
+
else:
|
| 350 |
+
print("Loading video...")
|
| 351 |
+
youtube = YouTube(args.url)
|
| 352 |
+
_perform_args_on_youtube(youtube, args)
|
| 353 |
+
|
| 354 |
+
if __name__ == "__main__":
|
| 355 |
+
main()
|
pytubefix/contrib/__init__.py
ADDED
|
File without changes
|
pytubefix/contrib/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (210 Bytes). View file
|
|
|
pytubefix/contrib/__pycache__/channel.cpython-311.pyc
ADDED
|
Binary file (29.9 kB). View file
|
|
|
pytubefix/contrib/__pycache__/playlist.cpython-311.pyc
ADDED
|
Binary file (23.4 kB). View file
|
|
|
pytubefix/contrib/__pycache__/search.cpython-311.pyc
ADDED
|
Binary file (22 kB). View file
|
|
|
pytubefix/contrib/channel.py
ADDED
|
@@ -0,0 +1,655 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""Module for interacting with a user's youtube channel."""
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Dict, List, Optional, Tuple, Iterable, Any, Callable
|
| 6 |
+
|
| 7 |
+
from pytubefix import extract, YouTube, Playlist, request
|
| 8 |
+
from pytubefix.helpers import cache, uniqueify, DeferredGeneratorList
|
| 9 |
+
from pytubefix.innertube import InnerTube
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Channel(Playlist):
|
| 15 |
+
def __init__(
|
| 16 |
+
self,
|
| 17 |
+
url: str,
|
| 18 |
+
client: str = InnerTube().client_name,
|
| 19 |
+
proxies: Optional[Dict[str, str]] = None,
|
| 20 |
+
use_oauth: bool = False,
|
| 21 |
+
allow_oauth_cache: bool = True,
|
| 22 |
+
token_file: Optional[str] = None,
|
| 23 |
+
oauth_verifier: Optional[Callable[[str, str], None]] = None,
|
| 24 |
+
use_po_token: Optional[bool] = False,
|
| 25 |
+
po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
|
| 26 |
+
):
|
| 27 |
+
"""Construct a :class:`Channel <Channel>`.
|
| 28 |
+
:param str url:
|
| 29 |
+
A valid YouTube channel URL.
|
| 30 |
+
:param dict proxies:
|
| 31 |
+
(Optional) A dict mapping protocol to proxy address which will be used by pytube.
|
| 32 |
+
:param bool use_oauth:
|
| 33 |
+
(Optional) Prompt the user to authenticate to YouTube.
|
| 34 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 35 |
+
:param bool allow_oauth_cache:
|
| 36 |
+
(Optional) Cache OAuth tokens locally on the machine. Defaults to True.
|
| 37 |
+
These tokens are only generated if use_oauth is set to True as well.
|
| 38 |
+
:param str token_file:
|
| 39 |
+
(Optional) Path to the file where the OAuth tokens will be stored.
|
| 40 |
+
Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
|
| 41 |
+
:param Callable oauth_verifier:
|
| 42 |
+
(optional) Verifier to be used for getting OAuth tokens.
|
| 43 |
+
Verification URL and User-Code will be passed to it respectively.
|
| 44 |
+
(if passed, else default verifier will be used)
|
| 45 |
+
:param bool use_po_token:
|
| 46 |
+
(Optional) Prompt the user to use the proof of origin token on YouTube.
|
| 47 |
+
It must be sent with the API along with the linked visitorData and
|
| 48 |
+
then passed as a `po_token` query parameter to affected clients.
|
| 49 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 50 |
+
:param Callable po_token_verifier:
|
| 51 |
+
(Optional) Verified used to obtain the visitorData and po_token.
|
| 52 |
+
The verifier will return the visitorData and po_token respectively.
|
| 53 |
+
(if passed, else default verifier will be used)
|
| 54 |
+
"""
|
| 55 |
+
super().__init__(url, proxies)
|
| 56 |
+
|
| 57 |
+
self.channel_uri = extract.channel_name(url)
|
| 58 |
+
|
| 59 |
+
self.client = client
|
| 60 |
+
self.use_oauth = use_oauth
|
| 61 |
+
self.allow_oauth_cache = allow_oauth_cache
|
| 62 |
+
self.token_file = token_file
|
| 63 |
+
self.oauth_verifier = oauth_verifier
|
| 64 |
+
|
| 65 |
+
self.use_po_token = use_po_token
|
| 66 |
+
self.po_token_verifier = po_token_verifier
|
| 67 |
+
|
| 68 |
+
self.channel_url = (
|
| 69 |
+
f"https://www.youtube.com{self.channel_uri}"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
self.featured_url = self.channel_url + '/featured'
|
| 73 |
+
self.videos_url = self.channel_url + '/videos'
|
| 74 |
+
self.shorts_url = self.channel_url + '/shorts'
|
| 75 |
+
self.live_url = self.channel_url + '/streams'
|
| 76 |
+
self.releases_url = self.channel_url + '/releases'
|
| 77 |
+
self.playlists_url = self.channel_url + '/playlists'
|
| 78 |
+
self.community_url = self.channel_url + '/community'
|
| 79 |
+
self.featured_channels_url = self.channel_url + '/channels'
|
| 80 |
+
self.about_url = self.channel_url + '/about'
|
| 81 |
+
|
| 82 |
+
self._html_url = self.videos_url # Videos will be preferred over short videos and live
|
| 83 |
+
|
| 84 |
+
# Possible future additions
|
| 85 |
+
self._playlists_html = None
|
| 86 |
+
self._community_html = None
|
| 87 |
+
self._featured_channels_html = None
|
| 88 |
+
self._about_html = None
|
| 89 |
+
|
| 90 |
+
def __repr__(self) -> str:
|
| 91 |
+
return f'<pytubefix.contrib.Channel object: channelUri={self.channel_uri}>'
|
| 92 |
+
|
| 93 |
+
@property
|
| 94 |
+
def channel_name(self):
|
| 95 |
+
"""Get the name of the YouTube channel.
|
| 96 |
+
|
| 97 |
+
:rtype: str
|
| 98 |
+
"""
|
| 99 |
+
return self.initial_data['metadata']['channelMetadataRenderer']['title']
|
| 100 |
+
|
| 101 |
+
@property
|
| 102 |
+
def channel_id(self):
|
| 103 |
+
"""Get the ID of the YouTube channel.
|
| 104 |
+
|
| 105 |
+
This will return the underlying ID, not the vanity URL.
|
| 106 |
+
|
| 107 |
+
:rtype: str
|
| 108 |
+
"""
|
| 109 |
+
return self.initial_data['metadata']['channelMetadataRenderer']['externalId']
|
| 110 |
+
|
| 111 |
+
@property
|
| 112 |
+
def vanity_url(self):
|
| 113 |
+
"""Get the vanity URL of the YouTube channel.
|
| 114 |
+
|
| 115 |
+
Returns None if it doesn't exist.
|
| 116 |
+
|
| 117 |
+
:rtype: str
|
| 118 |
+
"""
|
| 119 |
+
return self.initial_data['metadata']['channelMetadataRenderer'].get('vanityChannelUrl', None) # noqa:E501
|
| 120 |
+
|
| 121 |
+
@property
|
| 122 |
+
def html_url(self):
|
| 123 |
+
"""Get the html url.
|
| 124 |
+
|
| 125 |
+
:rtype: str
|
| 126 |
+
"""
|
| 127 |
+
return self._html_url
|
| 128 |
+
|
| 129 |
+
@html_url.setter
|
| 130 |
+
def html_url(self, value):
|
| 131 |
+
"""Set the html url and clear the cache."""
|
| 132 |
+
if self._html_url != value:
|
| 133 |
+
self._html = None
|
| 134 |
+
self._initial_data = None
|
| 135 |
+
self.__class__.video_urls.fget.cache_clear()
|
| 136 |
+
self._html_url = value
|
| 137 |
+
|
| 138 |
+
@property
|
| 139 |
+
def html(self):
|
| 140 |
+
"""Get the html for the /videos, /shorts or /streams page.
|
| 141 |
+
|
| 142 |
+
:rtype: str
|
| 143 |
+
"""
|
| 144 |
+
if self._html:
|
| 145 |
+
return self._html
|
| 146 |
+
self._html = request.get(self.html_url)
|
| 147 |
+
return self._html
|
| 148 |
+
|
| 149 |
+
@property
|
| 150 |
+
def playlists_html(self):
|
| 151 |
+
"""Get the html for the /playlists page.
|
| 152 |
+
|
| 153 |
+
Currently unused for any functionality.
|
| 154 |
+
|
| 155 |
+
:rtype: str
|
| 156 |
+
"""
|
| 157 |
+
if self._playlists_html:
|
| 158 |
+
return self._playlists_html
|
| 159 |
+
else:
|
| 160 |
+
self._playlists_html = request.get(self.playlists_url)
|
| 161 |
+
return self._playlists_html
|
| 162 |
+
|
| 163 |
+
@property
|
| 164 |
+
def community_html(self):
|
| 165 |
+
"""Get the html for the /community page.
|
| 166 |
+
|
| 167 |
+
Currently unused for any functionality.
|
| 168 |
+
|
| 169 |
+
:rtype: str
|
| 170 |
+
"""
|
| 171 |
+
if self._community_html:
|
| 172 |
+
return self._community_html
|
| 173 |
+
else:
|
| 174 |
+
self._community_html = request.get(self.community_url)
|
| 175 |
+
return self._community_html
|
| 176 |
+
|
| 177 |
+
@property
|
| 178 |
+
def featured_channels_html(self):
|
| 179 |
+
"""Get the html for the /channels page.
|
| 180 |
+
|
| 181 |
+
Currently unused for any functionality.
|
| 182 |
+
|
| 183 |
+
:rtype: str
|
| 184 |
+
"""
|
| 185 |
+
if self._featured_channels_html:
|
| 186 |
+
return self._featured_channels_html
|
| 187 |
+
else:
|
| 188 |
+
self._featured_channels_html = request.get(self.featured_channels_url)
|
| 189 |
+
return self._featured_channels_html
|
| 190 |
+
|
| 191 |
+
@property
|
| 192 |
+
def about_html(self):
|
| 193 |
+
"""Get the html for the /about page.
|
| 194 |
+
|
| 195 |
+
Currently unused for any functionality.
|
| 196 |
+
|
| 197 |
+
:rtype: str
|
| 198 |
+
"""
|
| 199 |
+
if self._about_html:
|
| 200 |
+
return self._about_html
|
| 201 |
+
else:
|
| 202 |
+
self._about_html = request.get(self.about_url)
|
| 203 |
+
return self._about_html
|
| 204 |
+
|
| 205 |
+
def url_generator(self):
|
| 206 |
+
"""Generator that yields video URLs.
|
| 207 |
+
|
| 208 |
+
:Yields: Video URLs
|
| 209 |
+
"""
|
| 210 |
+
for page in self._paginate(self.html):
|
| 211 |
+
for obj in page:
|
| 212 |
+
yield obj
|
| 213 |
+
|
| 214 |
+
def videos_generator(self):
|
| 215 |
+
for url in self.video_urls:
|
| 216 |
+
yield url
|
| 217 |
+
|
| 218 |
+
def _get_active_tab(self, initial_data) -> dict:
|
| 219 |
+
""" Receive the raw json and return the active page.
|
| 220 |
+
|
| 221 |
+
:returns: Active page json object.
|
| 222 |
+
"""
|
| 223 |
+
active_tab = {}
|
| 224 |
+
# Possible tabs: Home, Videos, Shorts, Live, Releases, Playlists, Community, Channels, About
|
| 225 |
+
# We check each page for the URL that is active.
|
| 226 |
+
for tab in initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
|
| 227 |
+
if 'tabRenderer' in tab:
|
| 228 |
+
tab_url = tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
|
| 229 |
+
if tab_url.rsplit('/', maxsplit=1)[-1] == self.html_url.rsplit('/', maxsplit=1)[-1]:
|
| 230 |
+
active_tab = tab
|
| 231 |
+
break
|
| 232 |
+
return active_tab
|
| 233 |
+
|
| 234 |
+
def _extract_obj_from_home(self) -> list:
|
| 235 |
+
""" Extract items from the channel home page.
|
| 236 |
+
|
| 237 |
+
:returns: list of home page objects.
|
| 238 |
+
"""
|
| 239 |
+
items = []
|
| 240 |
+
try:
|
| 241 |
+
contents = self._get_active_tab(self.initial_data)['tabRenderer']['content'][
|
| 242 |
+
'sectionListRenderer']['contents']
|
| 243 |
+
|
| 244 |
+
for obj in contents:
|
| 245 |
+
item_section_renderer = obj['itemSectionRenderer']['contents'][0]
|
| 246 |
+
|
| 247 |
+
# Skip the presentation videos for non-subscribers
|
| 248 |
+
if 'channelVideoPlayerRenderer' in item_section_renderer:
|
| 249 |
+
continue
|
| 250 |
+
|
| 251 |
+
# Skip presentation videos for subscribers
|
| 252 |
+
if 'channelFeaturedContentRenderer' in item_section_renderer:
|
| 253 |
+
continue
|
| 254 |
+
|
| 255 |
+
# skip the list with channel members
|
| 256 |
+
if 'recognitionShelfRenderer' in item_section_renderer:
|
| 257 |
+
continue
|
| 258 |
+
|
| 259 |
+
# Get the horizontal shorts
|
| 260 |
+
if 'reelShelfRenderer' in item_section_renderer:
|
| 261 |
+
for x in item_section_renderer['reelShelfRenderer']['items']:
|
| 262 |
+
items.append(x)
|
| 263 |
+
|
| 264 |
+
# Get videos, playlist and horizontal channels
|
| 265 |
+
if 'shelfRenderer' in item_section_renderer:
|
| 266 |
+
# We only take items that are horizontal
|
| 267 |
+
if 'horizontalListRenderer' in item_section_renderer['shelfRenderer']['content']:
|
| 268 |
+
# We iterate over each item in the array, which could be videos, playlist or channel
|
| 269 |
+
for x in item_section_renderer['shelfRenderer']['content']['horizontalListRenderer']['items']:
|
| 270 |
+
items.append(x)
|
| 271 |
+
|
| 272 |
+
except (KeyError, IndexError, TypeError):
|
| 273 |
+
return []
|
| 274 |
+
|
| 275 |
+
# Extract object from each corresponding url
|
| 276 |
+
items_obj = self._extract_ids(items)
|
| 277 |
+
|
| 278 |
+
# remove duplicates
|
| 279 |
+
return uniqueify(items_obj)
|
| 280 |
+
|
| 281 |
+
def _extract_videos(self, raw_json: str, context: Optional[Any] = None) -> Tuple[List[str], Optional[str]]:
|
| 282 |
+
"""Extracts videos from a raw json page
|
| 283 |
+
|
| 284 |
+
:param str raw_json: Input json extracted from the page or the last
|
| 285 |
+
server response
|
| 286 |
+
:rtype: Tuple[List[str], Optional[str]]
|
| 287 |
+
:returns: Tuple containing a list of up to 100 video watch ids and
|
| 288 |
+
a continuation token, if more videos are available
|
| 289 |
+
"""
|
| 290 |
+
|
| 291 |
+
if isinstance(raw_json, dict):
|
| 292 |
+
initial_data = raw_json
|
| 293 |
+
else:
|
| 294 |
+
initial_data = json.loads(raw_json)
|
| 295 |
+
# this is the json tree structure, if the json was extracted from
|
| 296 |
+
# html
|
| 297 |
+
try:
|
| 298 |
+
active_tab = self._get_active_tab(initial_data)
|
| 299 |
+
try:
|
| 300 |
+
# This is the json tree structure for videos, shorts and streams
|
| 301 |
+
items = active_tab['tabRenderer']['content']['richGridRenderer']['contents']
|
| 302 |
+
except (KeyError, IndexError, TypeError):
|
| 303 |
+
# This is the json tree structure for playlists
|
| 304 |
+
items = active_tab['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
| 305 |
+
'itemSectionRenderer']['contents'][0]['gridRenderer']['items']
|
| 306 |
+
|
| 307 |
+
# This is the json tree structure of visitor data
|
| 308 |
+
# It is necessary to send the visitorData together with the continuation token
|
| 309 |
+
self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
|
| 310 |
+
"ytConfigData"]["visitorData"]
|
| 311 |
+
|
| 312 |
+
except (KeyError, IndexError, TypeError):
|
| 313 |
+
try:
|
| 314 |
+
# this is the json tree structure, if the json was directly sent
|
| 315 |
+
# by the server in a continuation response
|
| 316 |
+
important_content = initial_data[1]['response']['onResponseReceivedActions'][
|
| 317 |
+
0
|
| 318 |
+
]['appendContinuationItemsAction']['continuationItems']
|
| 319 |
+
items = important_content
|
| 320 |
+
except (KeyError, IndexError, TypeError):
|
| 321 |
+
try:
|
| 322 |
+
# this is the json tree structure, if the json was directly sent
|
| 323 |
+
# by the server in a continuation response
|
| 324 |
+
# no longer a list and no longer has the "response" key
|
| 325 |
+
important_content = initial_data['onResponseReceivedActions'][0][
|
| 326 |
+
'appendContinuationItemsAction']['continuationItems']
|
| 327 |
+
items = important_content
|
| 328 |
+
except (KeyError, IndexError, TypeError) as p:
|
| 329 |
+
logger.info(p)
|
| 330 |
+
return [], None
|
| 331 |
+
|
| 332 |
+
try:
|
| 333 |
+
continuation = items[-1]['continuationItemRenderer'][
|
| 334 |
+
'continuationEndpoint'
|
| 335 |
+
]['continuationCommand']['token']
|
| 336 |
+
items = items[:-1]
|
| 337 |
+
except (KeyError, IndexError):
|
| 338 |
+
# if there is an error, no continuation is available
|
| 339 |
+
continuation = None
|
| 340 |
+
|
| 341 |
+
# Extract object from each corresponding url
|
| 342 |
+
items_obj = self._extract_ids(items)
|
| 343 |
+
|
| 344 |
+
# remove duplicates
|
| 345 |
+
return uniqueify(items_obj), continuation
|
| 346 |
+
|
| 347 |
+
def _extract_video_id(self, x: dict):
|
| 348 |
+
""" Try extracting video ids, if it fails, try extracting shorts ids.
|
| 349 |
+
|
| 350 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 351 |
+
"""
|
| 352 |
+
try:
|
| 353 |
+
return YouTube(f"/watch?v="
|
| 354 |
+
f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}",
|
| 355 |
+
client=self.client,
|
| 356 |
+
use_oauth=self.use_oauth,
|
| 357 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 358 |
+
token_file=self.token_file,
|
| 359 |
+
oauth_verifier=self.oauth_verifier,
|
| 360 |
+
use_po_token=self.use_po_token,
|
| 361 |
+
po_token_verifier=self.po_token_verifier
|
| 362 |
+
)
|
| 363 |
+
except (KeyError, IndexError, TypeError):
|
| 364 |
+
return self._extract_shorts_id(x)
|
| 365 |
+
|
| 366 |
+
def _extract_shorts_id(self, x: dict):
|
| 367 |
+
""" Try extracting shorts ids, if it fails, try extracting release ids.
|
| 368 |
+
|
| 369 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 370 |
+
"""
|
| 371 |
+
try:
|
| 372 |
+
content = x['richItemRenderer']['content']
|
| 373 |
+
|
| 374 |
+
# New json tree added on 09/12/2024
|
| 375 |
+
if 'shortsLockupViewModel' in content:
|
| 376 |
+
video_id = content['shortsLockupViewModel']['onTap']['innertubeCommand']['reelWatchEndpoint']['videoId']
|
| 377 |
+
else:
|
| 378 |
+
video_id = content['reelItemRenderer']['videoId']
|
| 379 |
+
|
| 380 |
+
return YouTube(f"/watch?v={video_id}",
|
| 381 |
+
client=self.client,
|
| 382 |
+
use_oauth=self.use_oauth,
|
| 383 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 384 |
+
token_file=self.token_file,
|
| 385 |
+
oauth_verifier=self.oauth_verifier,
|
| 386 |
+
use_po_token=self.use_po_token,
|
| 387 |
+
po_token_verifier=self.po_token_verifier
|
| 388 |
+
)
|
| 389 |
+
except (KeyError, IndexError, TypeError):
|
| 390 |
+
return self._extract_release_id(x)
|
| 391 |
+
|
| 392 |
+
def _extract_release_id(self, x: dict):
|
| 393 |
+
""" Try extracting release ids, if it fails, try extracting video IDs from the home page.
|
| 394 |
+
|
| 395 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 396 |
+
"""
|
| 397 |
+
try:
|
| 398 |
+
return Playlist(f"/playlist?list="
|
| 399 |
+
f"{x['richItemRenderer']['content']['playlistRenderer']['playlistId']}",
|
| 400 |
+
client=self.client,
|
| 401 |
+
use_oauth=self.use_oauth,
|
| 402 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 403 |
+
token_file=self.token_file,
|
| 404 |
+
oauth_verifier=self.oauth_verifier,
|
| 405 |
+
use_po_token=self.use_po_token,
|
| 406 |
+
po_token_verifier=self.po_token_verifier
|
| 407 |
+
)
|
| 408 |
+
except (KeyError, IndexError, TypeError):
|
| 409 |
+
return self._extract_video_id_from_home(x)
|
| 410 |
+
|
| 411 |
+
def _extract_video_id_from_home(self, x: dict):
|
| 412 |
+
""" Try extracting the video IDs from the home page,
|
| 413 |
+
if that fails, try extracting the shorts IDs from the home page.
|
| 414 |
+
|
| 415 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 416 |
+
"""
|
| 417 |
+
try:
|
| 418 |
+
return YouTube(f"/watch?v="
|
| 419 |
+
f"{x['gridVideoRenderer']['videoId']}",
|
| 420 |
+
client=self.client,
|
| 421 |
+
use_oauth=self.use_oauth,
|
| 422 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 423 |
+
token_file=self.token_file,
|
| 424 |
+
oauth_verifier=self.oauth_verifier,
|
| 425 |
+
use_po_token=self.use_po_token,
|
| 426 |
+
po_token_verifier=self.po_token_verifier
|
| 427 |
+
)
|
| 428 |
+
except (KeyError, IndexError, TypeError):
|
| 429 |
+
return self._extract_shorts_id_from_home(x)
|
| 430 |
+
|
| 431 |
+
def _extract_shorts_id_from_home(self, x: dict):
|
| 432 |
+
""" Try extracting the shorts IDs from the home page, if that fails, try extracting the playlist IDs.
|
| 433 |
+
|
| 434 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 435 |
+
"""
|
| 436 |
+
try:
|
| 437 |
+
return YouTube(f"/watch?v="
|
| 438 |
+
f"{x['reelItemRenderer']['videoId']}",
|
| 439 |
+
client=self.client,
|
| 440 |
+
use_oauth=self.use_oauth,
|
| 441 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 442 |
+
token_file=self.token_file,
|
| 443 |
+
oauth_verifier=self.oauth_verifier,
|
| 444 |
+
use_po_token=self.use_po_token,
|
| 445 |
+
po_token_verifier=self.po_token_verifier
|
| 446 |
+
)
|
| 447 |
+
except (KeyError, IndexError, TypeError):
|
| 448 |
+
return self._extract_playlist_id(x)
|
| 449 |
+
|
| 450 |
+
def _extract_playlist_id(self, x: dict):
|
| 451 |
+
""" Try extracting the playlist IDs, if that fails, try extracting the channel IDs.
|
| 452 |
+
|
| 453 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 454 |
+
"""
|
| 455 |
+
try:
|
| 456 |
+
return Playlist(f"/playlist?list="
|
| 457 |
+
f"{x['gridPlaylistRenderer']['playlistId']}",
|
| 458 |
+
client=self.client,
|
| 459 |
+
use_oauth=self.use_oauth,
|
| 460 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 461 |
+
token_file=self.token_file,
|
| 462 |
+
oauth_verifier=self.oauth_verifier,
|
| 463 |
+
use_po_token=self.use_po_token,
|
| 464 |
+
po_token_verifier=self.po_token_verifier
|
| 465 |
+
)
|
| 466 |
+
except (KeyError, IndexError, TypeError):
|
| 467 |
+
return self._extract_channel_id_from_home(x)
|
| 468 |
+
|
| 469 |
+
def _extract_channel_id_from_home(self, x: dict):
|
| 470 |
+
""" Try extracting the channel IDs from the home page, if that fails, return playlist IDs from lockupViewModel.
|
| 471 |
+
|
| 472 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 473 |
+
"""
|
| 474 |
+
try:
|
| 475 |
+
return Channel(f"/channel/"
|
| 476 |
+
f"{x['gridChannelRenderer']['channelId']}",
|
| 477 |
+
client=self.client,
|
| 478 |
+
use_oauth=self.use_oauth,
|
| 479 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 480 |
+
token_file=self.token_file,
|
| 481 |
+
oauth_verifier=self.oauth_verifier,
|
| 482 |
+
use_po_token=self.use_po_token,
|
| 483 |
+
po_token_verifier=self.po_token_verifier
|
| 484 |
+
)
|
| 485 |
+
except (KeyError, IndexError, TypeError):
|
| 486 |
+
return self._extract_playlist_id_from_lockup_view_model(x)
|
| 487 |
+
|
| 488 |
+
def _extract_playlist_id_from_lockup_view_model(self, x: dict):
|
| 489 |
+
""" Try extracting the playlist IDs, if that fails, return nothing.
|
| 490 |
+
|
| 491 |
+
:returns: List of YouTube, Playlist or Channel objects.
|
| 492 |
+
"""
|
| 493 |
+
try:
|
| 494 |
+
return Playlist(f"/playlist?list="
|
| 495 |
+
f"{x['lockupViewModel']['contentId']}",
|
| 496 |
+
client=self.client,
|
| 497 |
+
use_oauth=self.use_oauth,
|
| 498 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 499 |
+
token_file=self.token_file,
|
| 500 |
+
oauth_verifier=self.oauth_verifier,
|
| 501 |
+
use_po_token=self.use_po_token,
|
| 502 |
+
po_token_verifier=self.po_token_verifier
|
| 503 |
+
)
|
| 504 |
+
except (KeyError, IndexError, TypeError):
|
| 505 |
+
return []
|
| 506 |
+
|
| 507 |
+
@property
|
| 508 |
+
def views(self) -> int:
|
| 509 |
+
"""Extract view count for channel.
|
| 510 |
+
|
| 511 |
+
:return: Channel view count
|
| 512 |
+
:rtype: int
|
| 513 |
+
"""
|
| 514 |
+
self.html_url = self.about_url
|
| 515 |
+
|
| 516 |
+
try:
|
| 517 |
+
views_text = self.initial_data['onResponseReceivedEndpoints'][0]['showEngagementPanelEndpoint'][
|
| 518 |
+
'engagementPanel']['engagementPanelSectionListRenderer']['content']['sectionListRenderer'][
|
| 519 |
+
'contents'][0]['itemSectionRenderer']['contents'][0]['aboutChannelRenderer']['metadata'][
|
| 520 |
+
'aboutChannelViewModel']['viewCountText']
|
| 521 |
+
|
| 522 |
+
# "1,234,567 view"
|
| 523 |
+
count_text = views_text.split(' ')[0]
|
| 524 |
+
# "1234567"
|
| 525 |
+
count_text = count_text.replace(',', '')
|
| 526 |
+
return int(count_text)
|
| 527 |
+
except KeyError:
|
| 528 |
+
return 0
|
| 529 |
+
|
| 530 |
+
@property
|
| 531 |
+
def description(self) -> str:
|
| 532 |
+
"""Extract the channel description.
|
| 533 |
+
|
| 534 |
+
:return: Channel description
|
| 535 |
+
:rtype: str
|
| 536 |
+
"""
|
| 537 |
+
self.html_url = self.channel_url
|
| 538 |
+
return self.initial_data['metadata']['channelMetadataRenderer']['description']
|
| 539 |
+
|
| 540 |
+
def find_videos_info(self, data):
|
| 541 |
+
"""Recursively search for 'videos' in the text content of the JSON."""
|
| 542 |
+
if isinstance(data, dict):
|
| 543 |
+
for key, value in data.items():
|
| 544 |
+
if key == 'content' and isinstance(value, str) and 'videos' in value:
|
| 545 |
+
return value
|
| 546 |
+
if isinstance(value, (dict, list)):
|
| 547 |
+
result = self.find_videos_info(value)
|
| 548 |
+
if result:
|
| 549 |
+
return result
|
| 550 |
+
elif isinstance(data, list):
|
| 551 |
+
for item in data:
|
| 552 |
+
result = self.find_videos_info(item)
|
| 553 |
+
if result:
|
| 554 |
+
return result
|
| 555 |
+
return None
|
| 556 |
+
|
| 557 |
+
@property
|
| 558 |
+
def length(self):
|
| 559 |
+
"""Extracts the approximate amount of videos from the channel."""
|
| 560 |
+
try:
|
| 561 |
+
result = self.find_videos_info(self.initial_data)
|
| 562 |
+
return result if result else 'Unknown'
|
| 563 |
+
except Exception as e:
|
| 564 |
+
print(f"Exception: {e}")
|
| 565 |
+
return 'Unknown'
|
| 566 |
+
|
| 567 |
+
@property
|
| 568 |
+
def last_updated(self) -> str:
|
| 569 |
+
"""Extract the date of the last uploaded video.
|
| 570 |
+
|
| 571 |
+
:return: Last video uploaded
|
| 572 |
+
:rtype: str
|
| 573 |
+
"""
|
| 574 |
+
self.html_url = self.videos_url
|
| 575 |
+
try:
|
| 576 |
+
last_updated_text = self.initial_data['contents']['twoColumnBrowseResultsRenderer']['tabs'][1][
|
| 577 |
+
'tabRenderer']['content']['richGridRenderer']['contents'][0]['richItemRenderer']['content'][
|
| 578 |
+
'videoRenderer']['publishedTimeText']['simpleText']
|
| 579 |
+
return last_updated_text
|
| 580 |
+
except KeyError:
|
| 581 |
+
return None
|
| 582 |
+
|
| 583 |
+
@property
|
| 584 |
+
def thumbnail_url(self) -> str:
|
| 585 |
+
"""extract the profile image from the json of the channel home page
|
| 586 |
+
|
| 587 |
+
:rtype: str
|
| 588 |
+
:return: a string with the url of the channel's profile image
|
| 589 |
+
"""
|
| 590 |
+
self.html_url = self.channel_url # get the url of the channel home page
|
| 591 |
+
return self.initial_data['metadata']['channelMetadataRenderer']['avatar']['thumbnails'][0]['url']
|
| 592 |
+
|
| 593 |
+
@property
|
| 594 |
+
def home(self) -> list:
|
| 595 |
+
""" Yields YouTube, Playlist and Channel objects from the channel home page.
|
| 596 |
+
|
| 597 |
+
:returns: List of YouTube, Playlist and Channel objects.
|
| 598 |
+
"""
|
| 599 |
+
self.html_url = self.featured_url # Set home tab
|
| 600 |
+
return self._extract_obj_from_home()
|
| 601 |
+
|
| 602 |
+
@property
|
| 603 |
+
def videos(self) -> Iterable[YouTube]:
|
| 604 |
+
"""Yields YouTube objects of videos in this channel
|
| 605 |
+
|
| 606 |
+
:rtype: List[YouTube]
|
| 607 |
+
:returns: List of YouTube
|
| 608 |
+
"""
|
| 609 |
+
self.html_url = self.videos_url # Set video tab
|
| 610 |
+
return DeferredGeneratorList(self.videos_generator())
|
| 611 |
+
|
| 612 |
+
@property
|
| 613 |
+
def shorts(self) -> Iterable[YouTube]:
|
| 614 |
+
"""Yields YouTube objects of short videos in this channel
|
| 615 |
+
|
| 616 |
+
:rtype: List[YouTube]
|
| 617 |
+
:returns: List of YouTube
|
| 618 |
+
"""
|
| 619 |
+
self.html_url = self.shorts_url # Set shorts tab
|
| 620 |
+
return DeferredGeneratorList(self.videos_generator())
|
| 621 |
+
|
| 622 |
+
@property
|
| 623 |
+
def live(self) -> Iterable[YouTube]:
|
| 624 |
+
"""Yields YouTube objects of live in this channel
|
| 625 |
+
|
| 626 |
+
:rtype: List[YouTube]
|
| 627 |
+
:returns: List of YouTube
|
| 628 |
+
"""
|
| 629 |
+
self.html_url = self.live_url # Set streams tab
|
| 630 |
+
return DeferredGeneratorList(self.videos_generator())
|
| 631 |
+
|
| 632 |
+
@property
|
| 633 |
+
def lives(self) -> Iterable[YouTube]:
|
| 634 |
+
"""Alias for the 'live' property."""
|
| 635 |
+
return self.live
|
| 636 |
+
|
| 637 |
+
@property
|
| 638 |
+
def releases(self) -> Iterable[Playlist]:
|
| 639 |
+
"""Yields Playlist objects in this channel
|
| 640 |
+
|
| 641 |
+
:rtype: List[Playlist]
|
| 642 |
+
:returns: List of YouTube
|
| 643 |
+
"""
|
| 644 |
+
self.html_url = self.releases_url # Set releases tab
|
| 645 |
+
return DeferredGeneratorList(self.videos_generator())
|
| 646 |
+
|
| 647 |
+
@property
|
| 648 |
+
def playlists(self) -> Iterable[Playlist]:
|
| 649 |
+
"""Yields Playlist objects in this channel
|
| 650 |
+
|
| 651 |
+
:rtype: List[Playlist]
|
| 652 |
+
:returns: List of Playlist
|
| 653 |
+
"""
|
| 654 |
+
self.html_url = self.playlists_url # Set playlists tab
|
| 655 |
+
return DeferredGeneratorList(self.videos_generator())
|
pytubefix/contrib/playlist.py
ADDED
|
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module to download a complete playlist from a youtube channel."""
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
from collections.abc import Sequence
|
| 5 |
+
from datetime import date, datetime
|
| 6 |
+
from typing import Dict, Iterable, List, Optional, Tuple, Union, Any, Callable
|
| 7 |
+
|
| 8 |
+
from pytubefix import extract, request, YouTube
|
| 9 |
+
from pytubefix.innertube import InnerTube
|
| 10 |
+
from pytubefix.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Playlist(Sequence):
|
| 16 |
+
"""Load a YouTube playlist with URL"""
|
| 17 |
+
|
| 18 |
+
def __init__(
|
| 19 |
+
self,
|
| 20 |
+
url: str,
|
| 21 |
+
client: str = InnerTube().client_name,
|
| 22 |
+
proxies: Optional[Dict[str, str]] = None,
|
| 23 |
+
use_oauth: bool = False,
|
| 24 |
+
allow_oauth_cache: bool = True,
|
| 25 |
+
token_file: Optional[str] = None,
|
| 26 |
+
oauth_verifier: Optional[Callable[[str, str], None]] = None,
|
| 27 |
+
use_po_token: Optional[bool] = False,
|
| 28 |
+
po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
|
| 29 |
+
):
|
| 30 |
+
"""
|
| 31 |
+
:param dict proxies:
|
| 32 |
+
(Optional) A dict mapping protocol to proxy address which will be used by pytube.
|
| 33 |
+
:param bool use_oauth:
|
| 34 |
+
(Optional) Prompt the user to authenticate to YouTube.
|
| 35 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 36 |
+
:param bool allow_oauth_cache:
|
| 37 |
+
(Optional) Cache OAuth tokens locally on the machine. Defaults to True.
|
| 38 |
+
These tokens are only generated if use_oauth is set to True as well.
|
| 39 |
+
:param str token_file:
|
| 40 |
+
(Optional) Path to the file where the OAuth tokens will be stored.
|
| 41 |
+
Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
|
| 42 |
+
:param Callable oauth_verifier:
|
| 43 |
+
(optional) Verifier to be used for getting OAuth tokens.
|
| 44 |
+
Verification URL and User-Code will be passed to it respectively.
|
| 45 |
+
(if passed, else default verifier will be used)
|
| 46 |
+
:param bool use_po_token:
|
| 47 |
+
(Optional) Prompt the user to use the proof of origin token on YouTube.
|
| 48 |
+
It must be sent with the API along with the linked visitorData and
|
| 49 |
+
then passed as a `po_token` query parameter to affected clients.
|
| 50 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 51 |
+
:param Callable po_token_verifier:
|
| 52 |
+
(Optional) Verified used to obtain the visitorData and po_token.
|
| 53 |
+
The verifier will return the visitorData and po_token respectively.
|
| 54 |
+
(if passed, else default verifier will be used)
|
| 55 |
+
"""
|
| 56 |
+
if proxies:
|
| 57 |
+
install_proxy(proxies)
|
| 58 |
+
|
| 59 |
+
self._input_url = url
|
| 60 |
+
self._visitor_data = None
|
| 61 |
+
|
| 62 |
+
self.client = client
|
| 63 |
+
self.use_oauth = use_oauth
|
| 64 |
+
self.allow_oauth_cache = allow_oauth_cache
|
| 65 |
+
self.token_file = token_file
|
| 66 |
+
self.oauth_verifier = oauth_verifier
|
| 67 |
+
|
| 68 |
+
self.use_po_token = use_po_token
|
| 69 |
+
self.po_token_verifier = po_token_verifier
|
| 70 |
+
|
| 71 |
+
# These need to be initialized as None for the properties.
|
| 72 |
+
self._html = None
|
| 73 |
+
self._ytcfg = None
|
| 74 |
+
self._initial_data = None
|
| 75 |
+
self._sidebar_info = None
|
| 76 |
+
|
| 77 |
+
self._playlist_id = None
|
| 78 |
+
|
| 79 |
+
@property
|
| 80 |
+
def playlist_id(self):
|
| 81 |
+
"""Get the playlist id.
|
| 82 |
+
|
| 83 |
+
:rtype: str
|
| 84 |
+
"""
|
| 85 |
+
if self._playlist_id:
|
| 86 |
+
return self._playlist_id
|
| 87 |
+
self._playlist_id = extract.playlist_id(self._input_url)
|
| 88 |
+
return self._playlist_id
|
| 89 |
+
|
| 90 |
+
@property
|
| 91 |
+
def playlist_url(self):
|
| 92 |
+
"""Get the base playlist url.
|
| 93 |
+
|
| 94 |
+
:rtype: str
|
| 95 |
+
"""
|
| 96 |
+
return f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
| 97 |
+
|
| 98 |
+
@property
|
| 99 |
+
def html(self):
|
| 100 |
+
"""Get the playlist page html.
|
| 101 |
+
|
| 102 |
+
:rtype: str
|
| 103 |
+
"""
|
| 104 |
+
if self._html:
|
| 105 |
+
return self._html
|
| 106 |
+
self._html = request.get(self.playlist_url)
|
| 107 |
+
return self._html
|
| 108 |
+
|
| 109 |
+
@property
|
| 110 |
+
def ytcfg(self):
|
| 111 |
+
"""Extract the ytcfg from the playlist page html.
|
| 112 |
+
|
| 113 |
+
:rtype: dict
|
| 114 |
+
"""
|
| 115 |
+
if self._ytcfg:
|
| 116 |
+
return self._ytcfg
|
| 117 |
+
self._ytcfg = extract.get_ytcfg(self.html)
|
| 118 |
+
return self._ytcfg
|
| 119 |
+
|
| 120 |
+
@property
|
| 121 |
+
def initial_data(self):
|
| 122 |
+
"""Extract the initial data from the playlist page html.
|
| 123 |
+
|
| 124 |
+
:rtype: dict
|
| 125 |
+
"""
|
| 126 |
+
if self._initial_data:
|
| 127 |
+
return self._initial_data
|
| 128 |
+
else:
|
| 129 |
+
self._initial_data = extract.initial_data(self.html)
|
| 130 |
+
return self._initial_data
|
| 131 |
+
|
| 132 |
+
@property
|
| 133 |
+
def sidebar_info(self):
|
| 134 |
+
"""Extract the sidebar info from the playlist page html.
|
| 135 |
+
|
| 136 |
+
:rtype: dict
|
| 137 |
+
"""
|
| 138 |
+
if self._sidebar_info:
|
| 139 |
+
return self._sidebar_info
|
| 140 |
+
else:
|
| 141 |
+
self._sidebar_info = self.initial_data['sidebar'][
|
| 142 |
+
'playlistSidebarRenderer']['items']
|
| 143 |
+
return self._sidebar_info
|
| 144 |
+
|
| 145 |
+
@property
|
| 146 |
+
def yt_api_key(self):
|
| 147 |
+
"""Extract the INNERTUBE_API_KEY from the playlist ytcfg.
|
| 148 |
+
|
| 149 |
+
:rtype: str
|
| 150 |
+
"""
|
| 151 |
+
return self.ytcfg['INNERTUBE_API_KEY']
|
| 152 |
+
|
| 153 |
+
def _paginate(
|
| 154 |
+
self, initial_html: str, context: Optional[Any] = None,
|
| 155 |
+
until_watch_id: Optional[str] = None
|
| 156 |
+
) -> Iterable[List[str]]:
|
| 157 |
+
"""Parse the video links from the page source, yields the /watch?v=
|
| 158 |
+
part from video link
|
| 159 |
+
|
| 160 |
+
:param initial_html str: html from the initial YouTube url, default: self.html
|
| 161 |
+
:param context Optional[Any]: Auxiliary object
|
| 162 |
+
:param until_watch_id Optional[str]: YouTube Video watch id until
|
| 163 |
+
which the playlist should be read.
|
| 164 |
+
|
| 165 |
+
:rtype: Iterable[List[str]]
|
| 166 |
+
:returns: Iterable of lists of YouTube watch ids
|
| 167 |
+
"""
|
| 168 |
+
videos_urls, continuation = self._extract_videos(
|
| 169 |
+
json.dumps(extract.initial_data(initial_html)), context
|
| 170 |
+
)
|
| 171 |
+
if until_watch_id:
|
| 172 |
+
try:
|
| 173 |
+
trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
|
| 174 |
+
yield videos_urls[:trim_index]
|
| 175 |
+
return
|
| 176 |
+
except ValueError:
|
| 177 |
+
pass
|
| 178 |
+
yield videos_urls
|
| 179 |
+
|
| 180 |
+
# Extraction from a playlist only returns 100 videos at a time
|
| 181 |
+
# if self._extract_videos returns a continuation there are more
|
| 182 |
+
# than 100 songs inside a playlist, so we need to add further requests
|
| 183 |
+
# to gather all of them
|
| 184 |
+
|
| 185 |
+
while continuation: # there is an url found
|
| 186 |
+
# requesting the next page of videos with the url generated from the
|
| 187 |
+
# previous page, needs to be a post
|
| 188 |
+
req = InnerTube('WEB').browse(continuation=continuation, visitor_data=self._visitor_data)
|
| 189 |
+
# extract up to 100 songs from the page loaded
|
| 190 |
+
# returns another continuation if more videos are available
|
| 191 |
+
videos_urls, continuation = self._extract_videos(req, context)
|
| 192 |
+
if until_watch_id:
|
| 193 |
+
try:
|
| 194 |
+
trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
|
| 195 |
+
yield videos_urls[:trim_index]
|
| 196 |
+
return
|
| 197 |
+
except ValueError:
|
| 198 |
+
pass
|
| 199 |
+
yield videos_urls
|
| 200 |
+
|
| 201 |
+
def _extract_videos(self, raw_json: str, context: Optional[Any] = None) -> Tuple[List[str], Optional[str]]:
|
| 202 |
+
"""Extracts videos from a raw json page
|
| 203 |
+
|
| 204 |
+
:param str raw_json: Input json extracted from the page or the last
|
| 205 |
+
server response
|
| 206 |
+
:param Optional[Any] context: Auxiliary object from _paginate
|
| 207 |
+
:rtype: Tuple[List[str], Optional[str]]
|
| 208 |
+
:returns: Tuple containing a list of up to 100 video watch ids and
|
| 209 |
+
a continuation token, if more videos are available
|
| 210 |
+
"""
|
| 211 |
+
if isinstance(raw_json, dict):
|
| 212 |
+
initial_data = raw_json
|
| 213 |
+
else:
|
| 214 |
+
initial_data = json.loads(raw_json)
|
| 215 |
+
try:
|
| 216 |
+
# this is the json tree structure, if the json was extracted from
|
| 217 |
+
# html
|
| 218 |
+
section_contents = initial_data["contents"][
|
| 219 |
+
"twoColumnBrowseResultsRenderer"][
|
| 220 |
+
"tabs"][0]["tabRenderer"]["content"][
|
| 221 |
+
"sectionListRenderer"]["contents"]
|
| 222 |
+
try:
|
| 223 |
+
renderer = section_contents[0]["itemSectionRenderer"]["contents"][0]
|
| 224 |
+
|
| 225 |
+
if 'richGridRenderer' in renderer:
|
| 226 |
+
important_content = renderer["richGridRenderer"]
|
| 227 |
+
else:
|
| 228 |
+
important_content = renderer["playlistVideoListRenderer"]
|
| 229 |
+
|
| 230 |
+
except (KeyError, IndexError, TypeError):
|
| 231 |
+
# Playlist with submenus
|
| 232 |
+
important_content = section_contents[
|
| 233 |
+
1]["itemSectionRenderer"][
|
| 234 |
+
"contents"][0]["playlistVideoListRenderer"]
|
| 235 |
+
videos = important_content["contents"]
|
| 236 |
+
|
| 237 |
+
self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
|
| 238 |
+
"ytConfigData"]["visitorData"]
|
| 239 |
+
except (KeyError, IndexError, TypeError):
|
| 240 |
+
try:
|
| 241 |
+
# this is the json tree structure, if the json was directly sent
|
| 242 |
+
# by the server in a continuation response
|
| 243 |
+
# no longer a list and no longer has the "response" key
|
| 244 |
+
important_content = initial_data['onResponseReceivedActions'][0][
|
| 245 |
+
'appendContinuationItemsAction']['continuationItems']
|
| 246 |
+
videos = important_content
|
| 247 |
+
except (KeyError, IndexError, TypeError) as p:
|
| 248 |
+
logger.info(p)
|
| 249 |
+
return [], None
|
| 250 |
+
|
| 251 |
+
try:
|
| 252 |
+
# For some reason YouTube only returns the first 100 shorts of a playlist
|
| 253 |
+
# token provided by the API doesn't seem to work even in the official player
|
| 254 |
+
try:
|
| 255 |
+
continuation = videos[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
|
| 256 |
+
except:
|
| 257 |
+
for command in videos[-1]['continuationItemRenderer']['continuationEndpoint']['commandExecutorCommand']['commands']:
|
| 258 |
+
if 'continuationCommand' in command:
|
| 259 |
+
continuation = command['continuationCommand']['token']
|
| 260 |
+
break
|
| 261 |
+
videos = videos[:-1]
|
| 262 |
+
except (KeyError, IndexError):
|
| 263 |
+
# if there is an error, no continuation is available
|
| 264 |
+
continuation = None
|
| 265 |
+
|
| 266 |
+
items_obj = self._extract_ids(videos)
|
| 267 |
+
|
| 268 |
+
# remove duplicates
|
| 269 |
+
return uniqueify(items_obj), continuation
|
| 270 |
+
|
| 271 |
+
def _extract_ids(self, items: list) -> list:
|
| 272 |
+
""" Iterate over the extracted urls.
|
| 273 |
+
|
| 274 |
+
:returns: List with extracted ids.
|
| 275 |
+
"""
|
| 276 |
+
items_obj = []
|
| 277 |
+
for x in items:
|
| 278 |
+
items_obj.append(self._extract_video_id(x))
|
| 279 |
+
return items_obj
|
| 280 |
+
|
| 281 |
+
def _extract_video_id(self, x: dict):
|
| 282 |
+
""" Try extracting video ids, if it fails, try extracting shorts ids.
|
| 283 |
+
|
| 284 |
+
:returns: List with extracted ids.
|
| 285 |
+
"""
|
| 286 |
+
try:
|
| 287 |
+
return f"/watch?v={x['playlistVideoRenderer']['videoId']}"
|
| 288 |
+
except (KeyError, IndexError, TypeError):
|
| 289 |
+
return self._extract_shorts_id(x)
|
| 290 |
+
|
| 291 |
+
def _extract_shorts_id(self, x: dict):
|
| 292 |
+
""" Try extracting shorts ids.
|
| 293 |
+
|
| 294 |
+
:returns: List with extracted ids.
|
| 295 |
+
"""
|
| 296 |
+
try:
|
| 297 |
+
content = x['richItemRenderer']['content']
|
| 298 |
+
|
| 299 |
+
# New json tree added on 09/12/2024
|
| 300 |
+
if 'shortsLockupViewModel' in content:
|
| 301 |
+
video_id = content['shortsLockupViewModel']['onTap']['innertubeCommand']['reelWatchEndpoint']['videoId']
|
| 302 |
+
else:
|
| 303 |
+
video_id = content['reelItemRenderer']['videoId']
|
| 304 |
+
|
| 305 |
+
return f"/watch?v={video_id}"
|
| 306 |
+
|
| 307 |
+
except (KeyError, IndexError, TypeError):
|
| 308 |
+
return []
|
| 309 |
+
|
| 310 |
+
def trimmed(self, video_id: str) -> Iterable[str]:
|
| 311 |
+
"""Retrieve a list of YouTube video URLs trimmed at the given video ID
|
| 312 |
+
|
| 313 |
+
i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns
|
| 314 |
+
[1,2]
|
| 315 |
+
:type video_id: str
|
| 316 |
+
video ID to trim the returned list of playlist URLs at
|
| 317 |
+
:rtype: List[str]
|
| 318 |
+
:returns:
|
| 319 |
+
List of video URLs from the playlist trimmed at the given ID
|
| 320 |
+
"""
|
| 321 |
+
for page in self._paginate(self.html, until_watch_id=video_id):
|
| 322 |
+
yield from (self._video_url(watch_path) for watch_path in page)
|
| 323 |
+
|
| 324 |
+
def url_generator(self):
|
| 325 |
+
"""Generator that yields video URLs.
|
| 326 |
+
|
| 327 |
+
:Yields: Video URLs
|
| 328 |
+
"""
|
| 329 |
+
for page in self._paginate(self.html):
|
| 330 |
+
for video in page:
|
| 331 |
+
yield self._video_url(video)
|
| 332 |
+
|
| 333 |
+
@property # type: ignore
|
| 334 |
+
@cache
|
| 335 |
+
def video_urls(self) -> DeferredGeneratorList:
|
| 336 |
+
"""Complete links of all the videos in playlist
|
| 337 |
+
|
| 338 |
+
:rtype: List[str]
|
| 339 |
+
:returns: List of video URLs
|
| 340 |
+
"""
|
| 341 |
+
return DeferredGeneratorList(self.url_generator())
|
| 342 |
+
|
| 343 |
+
def videos_generator(self):
|
| 344 |
+
for url in self.video_urls:
|
| 345 |
+
yield YouTube(
|
| 346 |
+
url,
|
| 347 |
+
client=self.client,
|
| 348 |
+
use_oauth=self.use_oauth,
|
| 349 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 350 |
+
token_file=self.token_file,
|
| 351 |
+
oauth_verifier=self.oauth_verifier,
|
| 352 |
+
use_po_token=self.use_po_token,
|
| 353 |
+
po_token_verifier=self.po_token_verifier
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
@property
|
| 357 |
+
def videos(self) -> Iterable[YouTube]:
|
| 358 |
+
"""Yields YouTube objects of videos in this playlist
|
| 359 |
+
|
| 360 |
+
:rtype: List[YouTube]
|
| 361 |
+
:returns: List of YouTube
|
| 362 |
+
"""
|
| 363 |
+
return DeferredGeneratorList(self.videos_generator())
|
| 364 |
+
|
| 365 |
+
def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
|
| 366 |
+
return self.video_urls[i]
|
| 367 |
+
|
| 368 |
+
def __len__(self) -> int:
|
| 369 |
+
return len(self.video_urls)
|
| 370 |
+
|
| 371 |
+
def __repr__(self) -> str:
|
| 372 |
+
return f'<pytubefix.contrib.Playlist object: playlistId={self.playlist_id}>'
|
| 373 |
+
|
| 374 |
+
@property
|
| 375 |
+
@cache
|
| 376 |
+
def last_updated(self) -> Optional[date]:
|
| 377 |
+
"""Extract the date that the playlist was last updated.
|
| 378 |
+
|
| 379 |
+
For some playlists, this will be a specific date, which is returned as a datetime
|
| 380 |
+
object. For other playlists, this is an estimate such as "1 week ago". Due to the
|
| 381 |
+
fact that this value is returned as a string, pytube does a best-effort parsing
|
| 382 |
+
where possible, and returns the raw string where it is not possible.
|
| 383 |
+
|
| 384 |
+
:return: Date of last playlist update where possible, else the string provided
|
| 385 |
+
:rtype: datetime.date
|
| 386 |
+
"""
|
| 387 |
+
last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 388 |
+
'stats'][2]['runs'][1]['text']
|
| 389 |
+
try:
|
| 390 |
+
date_components = last_updated_text.split()
|
| 391 |
+
month = date_components[0]
|
| 392 |
+
day = date_components[1].strip(',')
|
| 393 |
+
year = date_components[2]
|
| 394 |
+
return datetime.strptime(
|
| 395 |
+
f"{month} {day:0>2} {year}", "%b %d %Y"
|
| 396 |
+
).date()
|
| 397 |
+
except (IndexError, KeyError):
|
| 398 |
+
return last_updated_text
|
| 399 |
+
|
| 400 |
+
@property
|
| 401 |
+
@cache
|
| 402 |
+
def title(self) -> Optional[str]:
|
| 403 |
+
"""Extract playlist title
|
| 404 |
+
|
| 405 |
+
:return: playlist title (name)
|
| 406 |
+
:rtype: Optional[str]
|
| 407 |
+
"""
|
| 408 |
+
return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 409 |
+
'title']['runs'][0]['text']
|
| 410 |
+
|
| 411 |
+
@property
|
| 412 |
+
def thumbnail_url(self):
|
| 413 |
+
thumbnail_renderer = self.sidebar_info[0][
|
| 414 |
+
'playlistSidebarPrimaryInfoRenderer'][
|
| 415 |
+
'thumbnailRenderer']
|
| 416 |
+
|
| 417 |
+
if 'playlistVideoThumbnailRenderer' in thumbnail_renderer:
|
| 418 |
+
return thumbnail_renderer[
|
| 419 |
+
'playlistVideoThumbnailRenderer'][
|
| 420 |
+
'thumbnail'][
|
| 421 |
+
'thumbnails'][-1][
|
| 422 |
+
'url']
|
| 423 |
+
|
| 424 |
+
elif 'playlistCustomThumbnailRenderer' in thumbnail_renderer:
|
| 425 |
+
return thumbnail_renderer[
|
| 426 |
+
'playlistCustomThumbnailRenderer'][
|
| 427 |
+
'thumbnail'][
|
| 428 |
+
'thumbnails'][-1][
|
| 429 |
+
'url']
|
| 430 |
+
|
| 431 |
+
@property
|
| 432 |
+
def description(self) -> str:
|
| 433 |
+
return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 434 |
+
'description']['simpleText']
|
| 435 |
+
|
| 436 |
+
@property
|
| 437 |
+
def length(self):
|
| 438 |
+
"""Extract the number of videos in the playlist.
|
| 439 |
+
|
| 440 |
+
:return: Playlist video count
|
| 441 |
+
:rtype: int
|
| 442 |
+
"""
|
| 443 |
+
count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 444 |
+
'stats'][0]['runs'][0]['text']
|
| 445 |
+
count_text = count_text.replace(',', '')
|
| 446 |
+
return int(count_text)
|
| 447 |
+
|
| 448 |
+
@property
|
| 449 |
+
def views(self):
|
| 450 |
+
"""Extract view count for playlist.
|
| 451 |
+
|
| 452 |
+
:return: Playlist view count
|
| 453 |
+
:rtype: int
|
| 454 |
+
"""
|
| 455 |
+
# "1,234,567 views"
|
| 456 |
+
views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 457 |
+
'stats'][1]['simpleText']
|
| 458 |
+
# "1,234,567"
|
| 459 |
+
count_text = views_text.split()[0]
|
| 460 |
+
# "1234567"
|
| 461 |
+
count_text = count_text.replace(',', '')
|
| 462 |
+
return int(count_text)
|
| 463 |
+
|
| 464 |
+
@property
|
| 465 |
+
def owner(self):
|
| 466 |
+
"""Extract the owner of the playlist.
|
| 467 |
+
|
| 468 |
+
:return: Playlist owner name.
|
| 469 |
+
:rtype: str
|
| 470 |
+
"""
|
| 471 |
+
return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
|
| 472 |
+
'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
|
| 473 |
+
|
| 474 |
+
@property
|
| 475 |
+
def owner_id(self):
|
| 476 |
+
"""Extract the channel_id of the owner of the playlist.
|
| 477 |
+
|
| 478 |
+
:return: Playlist owner's channel ID.
|
| 479 |
+
:rtype: str
|
| 480 |
+
"""
|
| 481 |
+
return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
|
| 482 |
+
'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
|
| 483 |
+
'navigationEndpoint']['browseEndpoint']['browseId']
|
| 484 |
+
|
| 485 |
+
@property
|
| 486 |
+
def owner_url(self):
|
| 487 |
+
"""Create the channel url of the owner of the playlist.
|
| 488 |
+
|
| 489 |
+
:return: Playlist owner's channel url.
|
| 490 |
+
:rtype: str
|
| 491 |
+
"""
|
| 492 |
+
return f'https://www.youtube.com/channel/{self.owner_id}'
|
| 493 |
+
|
| 494 |
+
@staticmethod
|
| 495 |
+
def _video_url(watch_path: str):
|
| 496 |
+
return f"https://www.youtube.com{watch_path}"
|
pytubefix/contrib/search.py
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module for interacting with YouTube search."""
|
| 2 |
+
# Native python imports
|
| 3 |
+
import logging
|
| 4 |
+
from typing import List, Optional, Dict, Callable, Tuple
|
| 5 |
+
|
| 6 |
+
# Local imports
|
| 7 |
+
from pytubefix import YouTube, Channel, Playlist
|
| 8 |
+
from pytubefix.helpers import deprecated, install_proxy
|
| 9 |
+
from pytubefix.innertube import InnerTube
|
| 10 |
+
from pytubefix.protobuf import encode_protobuf
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Search:
|
| 16 |
+
def __init__(
|
| 17 |
+
self, query: str,
|
| 18 |
+
client: str = InnerTube().client_name,
|
| 19 |
+
proxies: Optional[Dict[str, str]] = None,
|
| 20 |
+
use_oauth: bool = False,
|
| 21 |
+
allow_oauth_cache: bool = True,
|
| 22 |
+
token_file: Optional[str] = None,
|
| 23 |
+
oauth_verifier: Optional[Callable[[str, str], None]] = None,
|
| 24 |
+
use_po_token: Optional[bool] = False,
|
| 25 |
+
po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
|
| 26 |
+
filters: Optional[dict] = None
|
| 27 |
+
):
|
| 28 |
+
"""Initialize Search object.
|
| 29 |
+
|
| 30 |
+
:param str query:
|
| 31 |
+
Search query provided by the user.
|
| 32 |
+
:param dict proxies:
|
| 33 |
+
(Optional) A dict mapping protocol to proxy address which will be used by pytube.
|
| 34 |
+
:param bool use_oauth:
|
| 35 |
+
(Optional) Prompt the user to authenticate to YouTube.
|
| 36 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 37 |
+
:param bool allow_oauth_cache:
|
| 38 |
+
(Optional) Cache OAuth tokens locally on the machine. Defaults to True.
|
| 39 |
+
These tokens are only generated if use_oauth is set to True as well.
|
| 40 |
+
:param str token_file:
|
| 41 |
+
(Optional) Path to the file where the OAuth tokens will be stored.
|
| 42 |
+
Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
|
| 43 |
+
:param Callable oauth_verifier:
|
| 44 |
+
(optional) Verifier to be used for getting OAuth tokens.
|
| 45 |
+
Verification URL and User-Code will be passed to it respectively.
|
| 46 |
+
(if passed, else default verifier will be used)
|
| 47 |
+
:param bool use_po_token:
|
| 48 |
+
(Optional) Prompt the user to use the proof of origin token on YouTube.
|
| 49 |
+
It must be sent with the API along with the linked visitorData and
|
| 50 |
+
then passed as a `po_token` query parameter to affected clients.
|
| 51 |
+
If allow_oauth_cache is set to True, the user should only be prompted once.
|
| 52 |
+
:param Callable po_token_verifier:
|
| 53 |
+
(Optional) Verified used to obtain the visitorData and po_token.
|
| 54 |
+
The verifier will return the visitorData and po_token respectively.
|
| 55 |
+
(if passed, else default verifier will be used)
|
| 56 |
+
:param dict filters:
|
| 57 |
+
(Optional) Apply filters when searching.
|
| 58 |
+
Can be used: `upload_date`, `type`, `duration`, `features`, `sort_by`.
|
| 59 |
+
features can be combined into a list with other parameters of the same type.
|
| 60 |
+
"""
|
| 61 |
+
self.query = query
|
| 62 |
+
self.client = client
|
| 63 |
+
self.use_oauth = use_oauth
|
| 64 |
+
self.allow_oauth_cache = allow_oauth_cache
|
| 65 |
+
self.token_file = token_file
|
| 66 |
+
self.oauth_verifier = oauth_verifier
|
| 67 |
+
|
| 68 |
+
self.use_po_token = use_po_token
|
| 69 |
+
self.po_token_verifier = po_token_verifier
|
| 70 |
+
|
| 71 |
+
self._innertube_client = InnerTube(
|
| 72 |
+
client='WEB',
|
| 73 |
+
use_oauth=self.use_oauth,
|
| 74 |
+
allow_cache=self.allow_oauth_cache,
|
| 75 |
+
token_file=self.token_file,
|
| 76 |
+
oauth_verifier=self.oauth_verifier,
|
| 77 |
+
use_po_token=self.use_po_token,
|
| 78 |
+
po_token_verifier=self.po_token_verifier
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# The first search, without a continuation, is structured differently
|
| 82 |
+
# and contains completion suggestions, so we must store this separately
|
| 83 |
+
self._initial_results = None
|
| 84 |
+
|
| 85 |
+
self._results = {}
|
| 86 |
+
self._completion_suggestions = None
|
| 87 |
+
|
| 88 |
+
# Used for keeping track of query continuations so that new results
|
| 89 |
+
# are always returned when get_next_results() is called
|
| 90 |
+
self._current_continuation = None
|
| 91 |
+
|
| 92 |
+
if proxies:
|
| 93 |
+
install_proxy(proxies)
|
| 94 |
+
|
| 95 |
+
self.filter = None
|
| 96 |
+
if filters:
|
| 97 |
+
logger.debug("Filters found, starting combination")
|
| 98 |
+
filter_protobuf = Filter()
|
| 99 |
+
|
| 100 |
+
filter_protobuf.set_filters(filters)
|
| 101 |
+
|
| 102 |
+
self.filter = filter_protobuf.get_filters_params()
|
| 103 |
+
|
| 104 |
+
@property
|
| 105 |
+
def completion_suggestions(self):
|
| 106 |
+
"""Return query autocompletion suggestions for the query.
|
| 107 |
+
|
| 108 |
+
:rtype: list
|
| 109 |
+
:returns:
|
| 110 |
+
A list of autocomplete suggestions provided by YouTube for the query.
|
| 111 |
+
"""
|
| 112 |
+
if self._completion_suggestions:
|
| 113 |
+
return self._completion_suggestions
|
| 114 |
+
if self.results:
|
| 115 |
+
self._completion_suggestions = self._initial_results['refinements']
|
| 116 |
+
return self._completion_suggestions
|
| 117 |
+
|
| 118 |
+
def _get_results(self):
|
| 119 |
+
"""Search results and filter them
|
| 120 |
+
|
| 121 |
+
"""
|
| 122 |
+
results, continuation = self.fetch_and_parse()
|
| 123 |
+
self._current_continuation = continuation
|
| 124 |
+
self._results['videos'] = results['videos']
|
| 125 |
+
self._results['shorts'] = results['shorts']
|
| 126 |
+
self._results['playlist'] = results['playlist']
|
| 127 |
+
self._results['channel'] = results['channel']
|
| 128 |
+
|
| 129 |
+
@property
|
| 130 |
+
def videos(self) -> List[YouTube]:
|
| 131 |
+
"""Returns the search result videos.
|
| 132 |
+
|
| 133 |
+
On first call, will generate and return the first set of results.
|
| 134 |
+
Additional results can be generated using ``.get_next_results()``.
|
| 135 |
+
|
| 136 |
+
:rtype: list[YouTube]
|
| 137 |
+
:returns:
|
| 138 |
+
A list of YouTube objects.
|
| 139 |
+
"""
|
| 140 |
+
if not self._results:
|
| 141 |
+
self._get_results()
|
| 142 |
+
|
| 143 |
+
return [items for items in self._results['videos']]
|
| 144 |
+
|
| 145 |
+
@property
|
| 146 |
+
def shorts(self) -> List[YouTube]:
|
| 147 |
+
"""Returns the search result shorts.
|
| 148 |
+
|
| 149 |
+
On first call, will generate and return the first set of results.
|
| 150 |
+
Additional results can be generated using ``.get_next_results()``.
|
| 151 |
+
|
| 152 |
+
:rtype: list[YouTube]
|
| 153 |
+
:returns:
|
| 154 |
+
A list of YouTube objects.
|
| 155 |
+
"""
|
| 156 |
+
if not self._results:
|
| 157 |
+
self._get_results()
|
| 158 |
+
|
| 159 |
+
return [items for items in self._results['shorts']]
|
| 160 |
+
|
| 161 |
+
@property
|
| 162 |
+
def playlist(self) -> List[Playlist]:
|
| 163 |
+
"""Returns the search result playlist.
|
| 164 |
+
|
| 165 |
+
On first call, will generate and return the first set of results.
|
| 166 |
+
Additional results can be generated using ``.get_next_results()``.
|
| 167 |
+
|
| 168 |
+
:rtype: list[Playlist]
|
| 169 |
+
:returns:
|
| 170 |
+
A list of Playlist objects.
|
| 171 |
+
"""
|
| 172 |
+
if not self._results:
|
| 173 |
+
self._get_results()
|
| 174 |
+
|
| 175 |
+
return [items for items in self._results['playlist']]
|
| 176 |
+
|
| 177 |
+
@property
|
| 178 |
+
def channel(self) -> List[Channel]:
|
| 179 |
+
"""Returns the search result channel.
|
| 180 |
+
|
| 181 |
+
On first call, will generate and return the first set of results.
|
| 182 |
+
Additional results can be generated using ``.get_next_results()``.
|
| 183 |
+
|
| 184 |
+
:rtype: list[Channel]
|
| 185 |
+
:returns:
|
| 186 |
+
A list of Channel objects.
|
| 187 |
+
"""
|
| 188 |
+
if not self._results:
|
| 189 |
+
self._get_results()
|
| 190 |
+
|
| 191 |
+
return [items for items in self._results['channel']]
|
| 192 |
+
|
| 193 |
+
@property
|
| 194 |
+
@deprecated("Get video results using: .videos")
|
| 195 |
+
def results(self) -> list:
|
| 196 |
+
"""returns a list with videos, shorts, playlist and channels.
|
| 197 |
+
|
| 198 |
+
On first call, will generate and return the first set of results.
|
| 199 |
+
Additional results can be generated using ``.get_next_results()``.
|
| 200 |
+
|
| 201 |
+
:rtype: list
|
| 202 |
+
:returns:
|
| 203 |
+
A list of YouTube, Playlist and Channel objects.
|
| 204 |
+
"""
|
| 205 |
+
# Remove these comments to get the list of videos, shorts, playlist and channel
|
| 206 |
+
|
| 207 |
+
# if not self._results:
|
| 208 |
+
# self._get_results()
|
| 209 |
+
|
| 210 |
+
# return [items for values in self._results.values() for items in values]
|
| 211 |
+
return self.videos
|
| 212 |
+
|
| 213 |
+
@property
|
| 214 |
+
def all(self) -> list:
|
| 215 |
+
"""
|
| 216 |
+
Return all objects found in the search
|
| 217 |
+
"""
|
| 218 |
+
if not self._results:
|
| 219 |
+
self._get_results()
|
| 220 |
+
|
| 221 |
+
return [items for values in self._results.values() for items in values]
|
| 222 |
+
|
| 223 |
+
def get_next_results(self):
|
| 224 |
+
"""Use the stored continuation string to fetch the next set of results.
|
| 225 |
+
|
| 226 |
+
This method does not return the results, but instead updates the results property.
|
| 227 |
+
"""
|
| 228 |
+
if self._current_continuation:
|
| 229 |
+
results, continuation = self.fetch_and_parse(self._current_continuation)
|
| 230 |
+
self._current_continuation = continuation
|
| 231 |
+
self._results['videos'].extend(results['videos'])
|
| 232 |
+
self._results['shorts'].extend(results['shorts'])
|
| 233 |
+
self._results['playlist'].extend(results['playlist'])
|
| 234 |
+
self._results['channel'].extend(results['channel'])
|
| 235 |
+
else:
|
| 236 |
+
self._get_results()
|
| 237 |
+
|
| 238 |
+
def fetch_and_parse(self, continuation=None):
|
| 239 |
+
"""Fetch from the innertube API and parse the results.
|
| 240 |
+
|
| 241 |
+
:param str continuation:
|
| 242 |
+
Continuation string for fetching results.
|
| 243 |
+
:rtype: tuple
|
| 244 |
+
:returns:
|
| 245 |
+
A tuple of a list of YouTube objects and a continuation string.
|
| 246 |
+
"""
|
| 247 |
+
# Begin by executing the query and identifying the relevant sections
|
| 248 |
+
# of the results
|
| 249 |
+
raw_results = self.fetch_query(continuation,
|
| 250 |
+
# The filter parameter must only be passed in the first API call
|
| 251 |
+
# After the first call, the continuation token already contains the filter
|
| 252 |
+
{'params': self.filter} if self.filter and not continuation else None
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Initial result is handled by try block, continuations by except block
|
| 256 |
+
try:
|
| 257 |
+
sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
|
| 258 |
+
'primaryContents']['sectionListRenderer']['contents']
|
| 259 |
+
except KeyError:
|
| 260 |
+
sections = raw_results['onResponseReceivedCommands'][0][
|
| 261 |
+
'appendContinuationItemsAction']['continuationItems']
|
| 262 |
+
item_renderer = None
|
| 263 |
+
continuation_renderer = None
|
| 264 |
+
for s in sections:
|
| 265 |
+
if 'itemSectionRenderer' in s:
|
| 266 |
+
item_renderer = s['itemSectionRenderer']
|
| 267 |
+
if 'continuationItemRenderer' in s:
|
| 268 |
+
continuation_renderer = s['continuationItemRenderer']
|
| 269 |
+
|
| 270 |
+
# If the continuationItemRenderer doesn't exist, assume no further results
|
| 271 |
+
if continuation_renderer:
|
| 272 |
+
next_continuation = continuation_renderer['continuationEndpoint'][
|
| 273 |
+
'continuationCommand']['token']
|
| 274 |
+
else:
|
| 275 |
+
next_continuation = None
|
| 276 |
+
|
| 277 |
+
# If the itemSectionRenderer doesn't exist, assume no results.
|
| 278 |
+
results = {}
|
| 279 |
+
if item_renderer:
|
| 280 |
+
videos = []
|
| 281 |
+
shorts = []
|
| 282 |
+
playlist = []
|
| 283 |
+
channel = []
|
| 284 |
+
raw_video_list = item_renderer['contents']
|
| 285 |
+
for video_details in raw_video_list:
|
| 286 |
+
# Skip over ads
|
| 287 |
+
if video_details.get('searchPyvRenderer', {}).get('ads', None):
|
| 288 |
+
continue
|
| 289 |
+
|
| 290 |
+
# Skip "recommended" type videos e.g. "people also watched" and "popular X"
|
| 291 |
+
# that break up the search results
|
| 292 |
+
if 'shelfRenderer' in video_details:
|
| 293 |
+
continue
|
| 294 |
+
|
| 295 |
+
# Skip auto-generated "mix" playlist results
|
| 296 |
+
if 'radioRenderer' in video_details:
|
| 297 |
+
continue
|
| 298 |
+
|
| 299 |
+
# Skip 'people also searched for' results
|
| 300 |
+
if 'horizontalCardListRenderer' in video_details:
|
| 301 |
+
continue
|
| 302 |
+
|
| 303 |
+
# Can't seem to reproduce, probably related to typo fix suggestions
|
| 304 |
+
if 'didYouMeanRenderer' in video_details:
|
| 305 |
+
continue
|
| 306 |
+
|
| 307 |
+
# Seems to be the renderer used for the image shown on a no results page
|
| 308 |
+
if 'backgroundPromoRenderer' in video_details:
|
| 309 |
+
continue
|
| 310 |
+
|
| 311 |
+
# Get playlist results
|
| 312 |
+
if 'playlistRenderer' in video_details:
|
| 313 |
+
playlist.append(Playlist(f"https://www.youtube.com/playlist?list="
|
| 314 |
+
f"{video_details['playlistRenderer']['playlistId']}",
|
| 315 |
+
client=self.client,
|
| 316 |
+
use_oauth=self.use_oauth,
|
| 317 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 318 |
+
token_file=self.token_file,
|
| 319 |
+
oauth_verifier=self.oauth_verifier,
|
| 320 |
+
use_po_token=self.use_po_token,
|
| 321 |
+
po_token_verifier=self.po_token_verifier
|
| 322 |
+
))
|
| 323 |
+
|
| 324 |
+
# Get channel results
|
| 325 |
+
if 'channelRenderer' in video_details:
|
| 326 |
+
channel.append(Channel(f"https://www.youtube.com/channel/"
|
| 327 |
+
f"{video_details['channelRenderer']['channelId']}",
|
| 328 |
+
client=self.client,
|
| 329 |
+
use_oauth=self.use_oauth,
|
| 330 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 331 |
+
token_file=self.token_file,
|
| 332 |
+
oauth_verifier=self.oauth_verifier,
|
| 333 |
+
use_po_token=self.use_po_token,
|
| 334 |
+
po_token_verifier=self.po_token_verifier
|
| 335 |
+
))
|
| 336 |
+
|
| 337 |
+
# Get shorts results
|
| 338 |
+
if 'reelShelfRenderer' in video_details:
|
| 339 |
+
for items in video_details['reelShelfRenderer']['items']:
|
| 340 |
+
if 'reelItemRenderer' in items:
|
| 341 |
+
video_id = items['reelItemRenderer']['videoId']
|
| 342 |
+
else:
|
| 343 |
+
video_id = items['shortsLockupViewModel']['onTap']['innertubeCommand'][
|
| 344 |
+
'reelWatchEndpoint']['videoId']
|
| 345 |
+
|
| 346 |
+
shorts.append(YouTube(f"https://www.youtube.com/watch?v={video_id}",
|
| 347 |
+
client=self.client,
|
| 348 |
+
use_oauth=self.use_oauth,
|
| 349 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 350 |
+
token_file=self.token_file,
|
| 351 |
+
oauth_verifier=self.oauth_verifier,
|
| 352 |
+
use_po_token=self.use_po_token,
|
| 353 |
+
po_token_verifier=self.po_token_verifier
|
| 354 |
+
))
|
| 355 |
+
|
| 356 |
+
# Get videos results
|
| 357 |
+
if 'videoRenderer' in video_details:
|
| 358 |
+
videos.append(YouTube(f"https://www.youtube.com/watch?v="
|
| 359 |
+
f"{video_details['videoRenderer']['videoId']}",
|
| 360 |
+
client=self.client,
|
| 361 |
+
use_oauth=self.use_oauth,
|
| 362 |
+
allow_oauth_cache=self.allow_oauth_cache,
|
| 363 |
+
token_file=self.token_file,
|
| 364 |
+
oauth_verifier=self.oauth_verifier,
|
| 365 |
+
use_po_token=self.use_po_token,
|
| 366 |
+
po_token_verifier=self.po_token_verifier
|
| 367 |
+
))
|
| 368 |
+
|
| 369 |
+
results['videos'] = videos
|
| 370 |
+
results['shorts'] = shorts
|
| 371 |
+
results['playlist'] = playlist
|
| 372 |
+
results['channel'] = channel
|
| 373 |
+
|
| 374 |
+
return results, next_continuation
|
| 375 |
+
|
| 376 |
+
def fetch_query(self, continuation: str = None, filters: dict = None):
|
| 377 |
+
"""Fetch raw results from the innertube API.
|
| 378 |
+
|
| 379 |
+
:param str continuation:
|
| 380 |
+
Continuation string for fetching results.
|
| 381 |
+
:param dict filters:
|
| 382 |
+
Parameter encoded in protobuf that contains the search filters.
|
| 383 |
+
:rtype: dict
|
| 384 |
+
:returns:
|
| 385 |
+
The raw json object returned by the innertube API.
|
| 386 |
+
"""
|
| 387 |
+
query_results = self._innertube_client.search(self.query, continuation=continuation, data=filters)
|
| 388 |
+
if not self._initial_results:
|
| 389 |
+
self._initial_results = query_results
|
| 390 |
+
return query_results # noqa:R504
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
class Filter:
|
| 394 |
+
"""
|
| 395 |
+
Build filters for YouTube search in protobuf format
|
| 396 |
+
"""
|
| 397 |
+
|
| 398 |
+
def __init__(self):
|
| 399 |
+
self.filters = {
|
| 400 |
+
'upload_date': None,
|
| 401 |
+
'type': None,
|
| 402 |
+
'duration': None,
|
| 403 |
+
'features': [],
|
| 404 |
+
'sort_by': None
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
def set_filters(self, filter_dict):
|
| 408 |
+
"""
|
| 409 |
+
Applies multiple filters at once using a dictionary.
|
| 410 |
+
"""
|
| 411 |
+
for category, value in filter_dict.items():
|
| 412 |
+
if category == 'features':
|
| 413 |
+
if isinstance(value, list):
|
| 414 |
+
logger.debug("Filter features is a list")
|
| 415 |
+
self.filters['features'].extend(value)
|
| 416 |
+
else:
|
| 417 |
+
self.filters['features'].append(value)
|
| 418 |
+
else:
|
| 419 |
+
self.filters[category] = value
|
| 420 |
+
|
| 421 |
+
def clear_filters(self):
|
| 422 |
+
"""
|
| 423 |
+
Clear all filters
|
| 424 |
+
"""
|
| 425 |
+
for category in self.filters:
|
| 426 |
+
if category == 'features':
|
| 427 |
+
self.filters[category] = []
|
| 428 |
+
else:
|
| 429 |
+
self.filters[category] = None
|
| 430 |
+
|
| 431 |
+
def get_filters_params(self):
|
| 432 |
+
"""
|
| 433 |
+
Combines selected filters into a final structure
|
| 434 |
+
"""
|
| 435 |
+
combined = {}
|
| 436 |
+
|
| 437 |
+
if self.filters['sort_by']:
|
| 438 |
+
combined.update(self.filters['sort_by'])
|
| 439 |
+
|
| 440 |
+
combined[2] = {}
|
| 441 |
+
|
| 442 |
+
if self.filters['type']:
|
| 443 |
+
combined[2].update(self.filters['type'])
|
| 444 |
+
|
| 445 |
+
if self.filters['duration']:
|
| 446 |
+
combined[2].update(self.filters['duration'])
|
| 447 |
+
|
| 448 |
+
if self.filters['features']:
|
| 449 |
+
for feature in self.filters['features']:
|
| 450 |
+
combined[2].update(feature)
|
| 451 |
+
|
| 452 |
+
if self.filters['upload_date']:
|
| 453 |
+
combined[2].update(self.filters['upload_date'])
|
| 454 |
+
|
| 455 |
+
combined[2] = dict(sorted(combined.get(2, {}).items()))
|
| 456 |
+
|
| 457 |
+
logger.debug(f"Combined filters: {combined}")
|
| 458 |
+
|
| 459 |
+
encoded_filters = encode_protobuf(str(combined))
|
| 460 |
+
|
| 461 |
+
logger.debug(f"Filter encoded in protobuf: {encoded_filters}")
|
| 462 |
+
|
| 463 |
+
return encoded_filters
|
| 464 |
+
|
| 465 |
+
@staticmethod
|
| 466 |
+
def get_upload_date(option: str) -> dict:
|
| 467 |
+
"""
|
| 468 |
+
Last Hour,
|
| 469 |
+
Today,
|
| 470 |
+
This Week,
|
| 471 |
+
This Month,
|
| 472 |
+
This Year
|
| 473 |
+
"""
|
| 474 |
+
filters = {
|
| 475 |
+
"Last Hour": {1: 1},
|
| 476 |
+
"Today": {1: 2},
|
| 477 |
+
"This Week": {1: 3},
|
| 478 |
+
"This Month": {1: 4},
|
| 479 |
+
"This Year": {1: 5},
|
| 480 |
+
}
|
| 481 |
+
return filters.get(option)
|
| 482 |
+
|
| 483 |
+
@staticmethod
|
| 484 |
+
def get_type(option: str) -> dict:
|
| 485 |
+
"""
|
| 486 |
+
Video,
|
| 487 |
+
Channel,
|
| 488 |
+
Playlist,
|
| 489 |
+
Movie
|
| 490 |
+
"""
|
| 491 |
+
filters = {
|
| 492 |
+
"Video": {2: 1},
|
| 493 |
+
"Channel": {2: 2},
|
| 494 |
+
"Playlist": {2: 3},
|
| 495 |
+
"Movie": {2: 4},
|
| 496 |
+
}
|
| 497 |
+
return filters.get(option)
|
| 498 |
+
|
| 499 |
+
@staticmethod
|
| 500 |
+
def get_duration(option: str) -> dict:
|
| 501 |
+
"""
|
| 502 |
+
Under 4 minutes,
|
| 503 |
+
Over 20 minutes,
|
| 504 |
+
4 - 20 minutes
|
| 505 |
+
"""
|
| 506 |
+
filters = {
|
| 507 |
+
"Under 4 minutes": {3: 1},
|
| 508 |
+
"Over 20 minutes": {3: 2},
|
| 509 |
+
"4 - 20 minutes": {3: 3},
|
| 510 |
+
}
|
| 511 |
+
return filters.get(option)
|
| 512 |
+
|
| 513 |
+
@staticmethod
|
| 514 |
+
def get_features(option: str) -> dict:
|
| 515 |
+
"""
|
| 516 |
+
Live,
|
| 517 |
+
4K,
|
| 518 |
+
HD,
|
| 519 |
+
Subtitles/CC,
|
| 520 |
+
Creative Commons,
|
| 521 |
+
360,
|
| 522 |
+
VR180,
|
| 523 |
+
3D,
|
| 524 |
+
HDR,
|
| 525 |
+
Location,
|
| 526 |
+
Purchased
|
| 527 |
+
"""
|
| 528 |
+
filters = {
|
| 529 |
+
"Live": {8: 1},
|
| 530 |
+
"4K": {14: 1},
|
| 531 |
+
"HD": {4: 1},
|
| 532 |
+
"Subtitles/CC": {5: 1},
|
| 533 |
+
"Creative Commons": {6: 1},
|
| 534 |
+
"360": {15: 1},
|
| 535 |
+
"VR180": {26: 1},
|
| 536 |
+
"3D": {7: 1},
|
| 537 |
+
"HDR": {25: 1},
|
| 538 |
+
"Location": {23: 1},
|
| 539 |
+
"Purchased": {9: 1},
|
| 540 |
+
}
|
| 541 |
+
return filters.get(option)
|
| 542 |
+
|
| 543 |
+
@staticmethod
|
| 544 |
+
def get_sort_by(option: str) -> dict:
|
| 545 |
+
"""
|
| 546 |
+
Relevance,
|
| 547 |
+
Upload date,
|
| 548 |
+
View count,
|
| 549 |
+
Rating
|
| 550 |
+
"""
|
| 551 |
+
filters = {
|
| 552 |
+
"Relevance": {1: 0},
|
| 553 |
+
"Upload date": {1: 2},
|
| 554 |
+
"View count": {1: 3},
|
| 555 |
+
"Rating": {1: 1},
|
| 556 |
+
}
|
| 557 |
+
return filters.get(option)
|
pytubefix/exceptions.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Library specific exception definitions."""
|
| 2 |
+
from typing import Pattern, Union
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class PytubeFixError(Exception):
|
| 9 |
+
"""Base pytubefix exception that all others inherit.
|
| 10 |
+
|
| 11 |
+
This is done to not pollute the built-in exceptions, which *could* result
|
| 12 |
+
in unintended errors being unexpectedly and incorrectly handled within
|
| 13 |
+
implementers code.
|
| 14 |
+
"""
|
| 15 |
+
### MISC Errors ###
|
| 16 |
+
|
| 17 |
+
class MaxRetriesExceeded(PytubeFixError):
|
| 18 |
+
"""Maximum number of retries exceeded."""
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class HTMLParseError(PytubeFixError):
|
| 22 |
+
"""HTML could not be parsed"""
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ExtractError(PytubeFixError):
|
| 26 |
+
"""Data extraction based exception."""
|
| 27 |
+
|
| 28 |
+
class SABRError(PytubeFixError):
|
| 29 |
+
def __init__(self, msg: str):
|
| 30 |
+
self.msg = msg
|
| 31 |
+
super().__init__(self.msg)
|
| 32 |
+
|
| 33 |
+
@property
|
| 34 |
+
def error_string(self):
|
| 35 |
+
return self.msg
|
| 36 |
+
|
| 37 |
+
class RegexMatchError(ExtractError):
|
| 38 |
+
"""Regex pattern did not return any matches."""
|
| 39 |
+
|
| 40 |
+
def __init__(self, caller: str, pattern: Union[str, Pattern]):
|
| 41 |
+
"""
|
| 42 |
+
:param str caller:
|
| 43 |
+
Calling function
|
| 44 |
+
:param str pattern:
|
| 45 |
+
Pattern that failed to match
|
| 46 |
+
"""
|
| 47 |
+
super().__init__(
|
| 48 |
+
f"{caller}: could not find match for {pattern}")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
self.caller = caller
|
| 52 |
+
self.pattern = pattern
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class InterpretationError(PytubeFixError):
|
| 56 |
+
def __init__(self, js_url: str):
|
| 57 |
+
self.js_url = js_url
|
| 58 |
+
super().__init__(self.error_string)
|
| 59 |
+
|
| 60 |
+
@property
|
| 61 |
+
def error_string(self):
|
| 62 |
+
return f'Error interpreting player js: {self.js_url}'
|
| 63 |
+
|
| 64 |
+
### Video Unavailable Errors ###
|
| 65 |
+
# There are really 3 types of errors thrown
|
| 66 |
+
# 1. VideoUnavailable - This is the base error type for all video errors.
|
| 67 |
+
# Or a catchall if neither the user or developer cares about the specific error.
|
| 68 |
+
# 2. Known Error Type, Extra info useful for user
|
| 69 |
+
# 3. Unknown Error Type, Important to Developer
|
| 70 |
+
|
| 71 |
+
## 1. VideoUnavailable ##
|
| 72 |
+
|
| 73 |
+
class VideoUnavailable(PytubeFixError):
|
| 74 |
+
"""
|
| 75 |
+
Base video error.
|
| 76 |
+
|
| 77 |
+
This is the base error type for all video errors.
|
| 78 |
+
|
| 79 |
+
Call this if you can't group the error by known error type and it is not important to the developer.
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
def __init__(self, video_id: str):
|
| 83 |
+
"""
|
| 84 |
+
:param str video_id:
|
| 85 |
+
A YouTube video identifier.
|
| 86 |
+
"""
|
| 87 |
+
self.video_id = video_id
|
| 88 |
+
super().__init__(self.error_string)
|
| 89 |
+
|
| 90 |
+
@property
|
| 91 |
+
def error_string(self):
|
| 92 |
+
return f'{self.video_id} is unavailable'
|
| 93 |
+
|
| 94 |
+
## 2. Known Error Type, Extra info useful for user ##
|
| 95 |
+
|
| 96 |
+
class VideoPrivate(VideoUnavailable):
|
| 97 |
+
def __init__(self, video_id: str):
|
| 98 |
+
"""
|
| 99 |
+
:param str video_id:
|
| 100 |
+
A YouTube video identifier.
|
| 101 |
+
"""
|
| 102 |
+
self.video_id = video_id
|
| 103 |
+
super().__init__(self.video_id)
|
| 104 |
+
|
| 105 |
+
@property
|
| 106 |
+
def error_string(self):
|
| 107 |
+
return f'{self.video_id} is a private video'
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class MembersOnly(VideoUnavailable):
|
| 111 |
+
"""Video is members-only.
|
| 112 |
+
|
| 113 |
+
YouTube has special videos that are only viewable to users who have
|
| 114 |
+
subscribed to a content creator.
|
| 115 |
+
ref: https://support.google.com/youtube/answer/7544492?hl=en
|
| 116 |
+
"""
|
| 117 |
+
|
| 118 |
+
def __init__(self, video_id: str):
|
| 119 |
+
"""
|
| 120 |
+
:param str video_id:
|
| 121 |
+
A YouTube video identifier.
|
| 122 |
+
"""
|
| 123 |
+
self.video_id = video_id
|
| 124 |
+
super().__init__(self.video_id)
|
| 125 |
+
|
| 126 |
+
@property
|
| 127 |
+
def error_string(self):
|
| 128 |
+
return f'{self.video_id} is a members-only video'
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
class VideoRegionBlocked(VideoUnavailable):
|
| 132 |
+
def __init__(self, video_id: str):
|
| 133 |
+
"""
|
| 134 |
+
:param str video_id:
|
| 135 |
+
A YouTube video identifier.
|
| 136 |
+
"""
|
| 137 |
+
self.video_id = video_id
|
| 138 |
+
super().__init__(self.video_id)
|
| 139 |
+
|
| 140 |
+
@property
|
| 141 |
+
def error_string(self):
|
| 142 |
+
return f'{self.video_id} is not available in your region'
|
| 143 |
+
|
| 144 |
+
class BotDetection(VideoUnavailable):
|
| 145 |
+
def __init__(self, video_id: str):
|
| 146 |
+
"""
|
| 147 |
+
:param str video_id:
|
| 148 |
+
A YouTube video identifier.
|
| 149 |
+
"""
|
| 150 |
+
self.video_id = video_id
|
| 151 |
+
super().__init__(self.video_id)
|
| 152 |
+
|
| 153 |
+
@property
|
| 154 |
+
def error_string(self):
|
| 155 |
+
return (
|
| 156 |
+
f'{self.video_id} This request was detected as a bot. Use `use_po_token=True` or switch to WEB client to view. '
|
| 157 |
+
f'See more details at https://github.com/JuanBindez/pytubefix/pull/209')
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class PoTokenRequired(VideoUnavailable):
|
| 161 |
+
def __init__(self, video_id: str, client_name: str):
|
| 162 |
+
"""
|
| 163 |
+
:param str video_id:
|
| 164 |
+
A YouTube video identifier.
|
| 165 |
+
:param str client_name:
|
| 166 |
+
A YouTube client identifier.
|
| 167 |
+
"""
|
| 168 |
+
self.video_id = video_id
|
| 169 |
+
self.client_name = client_name
|
| 170 |
+
super().__init__(self.video_id)
|
| 171 |
+
|
| 172 |
+
@property
|
| 173 |
+
def error_string(self):
|
| 174 |
+
return (
|
| 175 |
+
f'{self.video_id} The {self.client_name} client requires PoToken to obtain functional streams, '
|
| 176 |
+
f'See more details at https://github.com/JuanBindez/pytubefix/pull/209')
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
class LoginRequired(VideoUnavailable):
|
| 180 |
+
def __init__(self, video_id: str, reason: str):
|
| 181 |
+
"""
|
| 182 |
+
:param str video_id:
|
| 183 |
+
A YouTube video identifier.
|
| 184 |
+
"""
|
| 185 |
+
self.video_id = video_id
|
| 186 |
+
self.reason = reason
|
| 187 |
+
super().__init__(self.video_id)
|
| 188 |
+
|
| 189 |
+
@property
|
| 190 |
+
def error_string(self):
|
| 191 |
+
return (
|
| 192 |
+
f'{self.video_id} requires login to view, YouTube reason: {self.reason}')
|
| 193 |
+
|
| 194 |
+
# legacy livestream error types still supported
|
| 195 |
+
|
| 196 |
+
class RecordingUnavailable(VideoUnavailable):
|
| 197 |
+
def __init__(self, video_id: str):
|
| 198 |
+
"""
|
| 199 |
+
:param str video_id:
|
| 200 |
+
A YouTube video identifier.
|
| 201 |
+
"""
|
| 202 |
+
self.video_id = video_id
|
| 203 |
+
super().__init__(self.video_id)
|
| 204 |
+
|
| 205 |
+
@property
|
| 206 |
+
def error_string(self):
|
| 207 |
+
return f'{self.video_id} does not have a live stream recording available'
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
class LiveStreamError(VideoUnavailable):
|
| 211 |
+
"""Video is a live stream."""
|
| 212 |
+
|
| 213 |
+
def __init__(self, video_id: str):
|
| 214 |
+
"""
|
| 215 |
+
:param str video_id:
|
| 216 |
+
A YouTube video identifier.
|
| 217 |
+
"""
|
| 218 |
+
self.video_id = video_id
|
| 219 |
+
super().__init__(self.video_id)
|
| 220 |
+
|
| 221 |
+
@property
|
| 222 |
+
def error_string(self):
|
| 223 |
+
return f'{self.video_id} is streaming live and cannot be loaded'
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
class LiveStreamOffline(VideoUnavailable):
|
| 227 |
+
"""The live will start soon"""
|
| 228 |
+
|
| 229 |
+
def __init__(self, video_id: str, reason: str):
|
| 230 |
+
"""
|
| 231 |
+
:param str video_id:
|
| 232 |
+
A YouTube video identifier.
|
| 233 |
+
:param str reason:
|
| 234 |
+
reason for the error
|
| 235 |
+
"""
|
| 236 |
+
self.video_id = video_id
|
| 237 |
+
self.reason = reason
|
| 238 |
+
super().__init__(self.video_id)
|
| 239 |
+
|
| 240 |
+
@property
|
| 241 |
+
def error_string(self):
|
| 242 |
+
return f'{self.video_id} {self.reason}'
|
| 243 |
+
|
| 244 |
+
# legacy age restricted error types still supported
|
| 245 |
+
|
| 246 |
+
class AgeRestrictedError(VideoUnavailable):
|
| 247 |
+
"""Video is age restricted, and cannot be accessed without OAuth."""
|
| 248 |
+
|
| 249 |
+
def __init__(self, video_id: str):
|
| 250 |
+
"""
|
| 251 |
+
:param str video_id:
|
| 252 |
+
A YouTube video identifier.
|
| 253 |
+
"""
|
| 254 |
+
self.video_id = video_id
|
| 255 |
+
super().__init__(self.video_id)
|
| 256 |
+
|
| 257 |
+
@property
|
| 258 |
+
def error_string(self):
|
| 259 |
+
return f"{self.video_id} is age restricted, and can't be accessed without logging in."
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
class AgeCheckRequiredError(VideoUnavailable):
|
| 263 |
+
def __init__(self, video_id: str):
|
| 264 |
+
"""
|
| 265 |
+
:param str video_id:
|
| 266 |
+
A YouTube video identifier.
|
| 267 |
+
"""
|
| 268 |
+
self.video_id = video_id
|
| 269 |
+
super().__init__(self.video_id)
|
| 270 |
+
|
| 271 |
+
@property
|
| 272 |
+
def error_string(self):
|
| 273 |
+
return f"{self.video_id} has age restrictions and cannot be accessed without confirmation."
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
class AgeCheckRequiredAccountError(VideoUnavailable):
|
| 277 |
+
def __init__(self, video_id: str):
|
| 278 |
+
"""
|
| 279 |
+
:param str video_id:
|
| 280 |
+
A YouTube video identifier.
|
| 281 |
+
"""
|
| 282 |
+
self.video_id = video_id
|
| 283 |
+
super().__init__(self.video_id)
|
| 284 |
+
|
| 285 |
+
@property
|
| 286 |
+
def error_string(self):
|
| 287 |
+
return (
|
| 288 |
+
f"{self.video_id} may be inappropriate for "
|
| 289 |
+
f"some users. Sign in to your primary account to confirm your age.")
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
class InnerTubeResponseError(VideoUnavailable):
|
| 293 |
+
def __init__(self, video_id: str, client: str):
|
| 294 |
+
"""
|
| 295 |
+
:param str video_id:
|
| 296 |
+
A YouTube video identifier.
|
| 297 |
+
"""
|
| 298 |
+
self.video_id = video_id
|
| 299 |
+
self.client = client
|
| 300 |
+
super().__init__(self.video_id)
|
| 301 |
+
|
| 302 |
+
@property
|
| 303 |
+
def error_string(self):
|
| 304 |
+
return (
|
| 305 |
+
f"{self.video_id} : {self.client} client did not receive a response from YouTube")
|
| 306 |
+
|
| 307 |
+
## 3. Unknown Error Type, Important to Developer ##
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
class UnknownVideoError(VideoUnavailable):
|
| 311 |
+
"""Unknown video error."""
|
| 312 |
+
|
| 313 |
+
def __init__(self, video_id: str, status: str = None, reason: str = None, developer_message: str = None):
|
| 314 |
+
"""
|
| 315 |
+
:param str video_id:
|
| 316 |
+
A YouTube video identifier.
|
| 317 |
+
:param str status:
|
| 318 |
+
The status code of the response.
|
| 319 |
+
:param str reason:
|
| 320 |
+
The reason for the error.
|
| 321 |
+
:param str developer_message:
|
| 322 |
+
The message from the developer.
|
| 323 |
+
"""
|
| 324 |
+
self.video_id = video_id
|
| 325 |
+
self.status = status
|
| 326 |
+
self.reason = reason
|
| 327 |
+
self.developer_message = developer_message
|
| 328 |
+
|
| 329 |
+
logger.warning('Unknown Video Error')
|
| 330 |
+
logger.warning(f'Video ID: {self.video_id}')
|
| 331 |
+
logger.warning(f'Status: {self.status}')
|
| 332 |
+
logger.warning(f'Reason: {self.reason}')
|
| 333 |
+
logger.warning(f'Developer Message: {self.developer_message}')
|
| 334 |
+
logger.warning(
|
| 335 |
+
'Please open an issue at '
|
| 336 |
+
'https://github.com/JuanBindez/pytubefix/issues '
|
| 337 |
+
'and provide the above log output.'
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
super().__init__(self.video_id)
|
| 341 |
+
|
| 342 |
+
@property
|
| 343 |
+
def error_string(self):
|
| 344 |
+
return f'{self.video_id} has an unknown error, check logs for more info [Status: {self.status}] [Reason: {self.reason}]'
|
pytubefix/extract.py
ADDED
|
@@ -0,0 +1,646 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This module contains all non-cipher related data extraction logic."""
|
| 2 |
+
import logging
|
| 3 |
+
import urllib.parse
|
| 4 |
+
import re
|
| 5 |
+
from collections import OrderedDict
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 8 |
+
from urllib.parse import parse_qs, quote, urlencode, urlparse
|
| 9 |
+
|
| 10 |
+
from pytubefix.cipher import Cipher
|
| 11 |
+
from pytubefix.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
|
| 12 |
+
from pytubefix.helpers import regex_search
|
| 13 |
+
from pytubefix.metadata import YouTubeMetadata
|
| 14 |
+
from pytubefix.parser import parse_for_object, parse_for_all_objects
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def publish_date(watch_html: str):
|
| 20 |
+
"""Extract publish date and return it as a datetime object
|
| 21 |
+
:param str watch_html:
|
| 22 |
+
The html contents of the watch page.
|
| 23 |
+
:rtype: datetime
|
| 24 |
+
:returns:
|
| 25 |
+
Publish date of the video as a datetime object with timezone.
|
| 26 |
+
"""
|
| 27 |
+
try:
|
| 28 |
+
result = re.search(
|
| 29 |
+
r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}",
|
| 30 |
+
watch_html
|
| 31 |
+
)
|
| 32 |
+
if result:
|
| 33 |
+
return datetime.fromisoformat(result.group(0))
|
| 34 |
+
except AttributeError:
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def recording_available(watch_html):
|
| 39 |
+
"""Check if live stream recording is available.
|
| 40 |
+
|
| 41 |
+
:param str watch_html:
|
| 42 |
+
The html contents of the watch page.
|
| 43 |
+
:rtype: bool
|
| 44 |
+
:returns:
|
| 45 |
+
Whether or not the content is private.
|
| 46 |
+
"""
|
| 47 |
+
unavailable_strings = [
|
| 48 |
+
'This live stream recording is not available.'
|
| 49 |
+
]
|
| 50 |
+
for string in unavailable_strings:
|
| 51 |
+
if string in watch_html:
|
| 52 |
+
return False
|
| 53 |
+
return True
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def is_private(watch_html):
|
| 57 |
+
"""Check if content is private.
|
| 58 |
+
|
| 59 |
+
:param str watch_html:
|
| 60 |
+
The html contents of the watch page.
|
| 61 |
+
:rtype: bool
|
| 62 |
+
:returns:
|
| 63 |
+
Whether or not the content is private.
|
| 64 |
+
"""
|
| 65 |
+
private_strings = [
|
| 66 |
+
"This is a private video. Please sign in to verify that you may see it.",
|
| 67 |
+
"\"simpleText\":\"Private video\"",
|
| 68 |
+
"This video is private."
|
| 69 |
+
]
|
| 70 |
+
for string in private_strings:
|
| 71 |
+
if string in watch_html:
|
| 72 |
+
return True
|
| 73 |
+
return False
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def is_age_restricted(watch_html: str) -> bool:
|
| 77 |
+
"""Check if content is age restricted.
|
| 78 |
+
|
| 79 |
+
:param str watch_html:
|
| 80 |
+
The html contents of the watch page.
|
| 81 |
+
:rtype: bool
|
| 82 |
+
:returns:
|
| 83 |
+
Whether or not the content is age restricted.
|
| 84 |
+
"""
|
| 85 |
+
try:
|
| 86 |
+
regex_search(r"og:restrictions:age", watch_html, group=0)
|
| 87 |
+
except RegexMatchError:
|
| 88 |
+
return False
|
| 89 |
+
return True
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def playability_status(player_response: dict) -> Tuple[Any, Any]:
|
| 93 |
+
"""Return the playability status and status explanation of a video.
|
| 94 |
+
|
| 95 |
+
For example, a video may have a status of LOGIN_REQUIRED, and an explanation
|
| 96 |
+
of "This is a private video. Please sign in to verify that you may see it."
|
| 97 |
+
|
| 98 |
+
This explanation is what gets incorporated into the media player overlay.
|
| 99 |
+
|
| 100 |
+
:param str player_response:
|
| 101 |
+
Content of the player's response.
|
| 102 |
+
:rtype: bool
|
| 103 |
+
:returns:
|
| 104 |
+
Playability status and reason of the video.
|
| 105 |
+
"""
|
| 106 |
+
status_dict = player_response.get('playabilityStatus', {})
|
| 107 |
+
# if 'liveStreamability' in status_dict:
|
| 108 |
+
# We used liveStreamability to know if the video was live,
|
| 109 |
+
# however some clients still return this parameter even if the video is already available
|
| 110 |
+
if 'videoDetails' in player_response: # Private videos do not contain videoDetails
|
| 111 |
+
if 'isLive' in player_response['videoDetails']:
|
| 112 |
+
return 'LIVE_STREAM', 'Video is a live stream.'
|
| 113 |
+
|
| 114 |
+
if 'status' in status_dict:
|
| 115 |
+
if 'reason' in status_dict:
|
| 116 |
+
return status_dict['status'], [status_dict['reason']]
|
| 117 |
+
if 'messages' in status_dict:
|
| 118 |
+
return status_dict['status'], status_dict['messages']
|
| 119 |
+
return None, [None]
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def signature_timestamp(js: str) -> str:
|
| 123 |
+
return regex_search(r"signatureTimestamp:(\d*)", js, group=1)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def visitor_data(response_context: str) -> str:
|
| 127 |
+
return regex_search(r"visitor_data[',\"\s]+value['\"]:\s?['\"]([a-zA-Z0-9_%-]+)['\"]", response_context, group=1)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def video_id(url: str) -> str:
|
| 131 |
+
"""Extract the ``video_id`` from a YouTube url.
|
| 132 |
+
|
| 133 |
+
This function supports the following patterns:
|
| 134 |
+
|
| 135 |
+
- :samp:`https://youtube.com/watch?v={video_id}`
|
| 136 |
+
- :samp:`https://youtube.com/embed/{video_id}`
|
| 137 |
+
- :samp:`https://youtu.be/{video_id}`
|
| 138 |
+
|
| 139 |
+
:param str url:
|
| 140 |
+
A YouTube url containing a video id.
|
| 141 |
+
:rtype: str
|
| 142 |
+
:returns:
|
| 143 |
+
YouTube video id.
|
| 144 |
+
"""
|
| 145 |
+
return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def playlist_id(url: str) -> str:
|
| 149 |
+
"""Extract the ``playlist_id`` from a YouTube url.
|
| 150 |
+
|
| 151 |
+
This function supports the following patterns:
|
| 152 |
+
|
| 153 |
+
- :samp:`https://youtube.com/playlist?list={playlist_id}`
|
| 154 |
+
- :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`
|
| 155 |
+
|
| 156 |
+
:param str url:
|
| 157 |
+
A YouTube url containing a playlist id.
|
| 158 |
+
:rtype: str
|
| 159 |
+
:returns:
|
| 160 |
+
YouTube playlist id.
|
| 161 |
+
"""
|
| 162 |
+
parsed = urllib.parse.urlparse(url)
|
| 163 |
+
return parse_qs(parsed.query)['list'][0]
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def channel_name(url: str) -> str:
|
| 167 |
+
"""Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
|
| 168 |
+
|
| 169 |
+
This function supports the following patterns:
|
| 170 |
+
|
| 171 |
+
- :samp:`https://youtube.com/c/{channel_name}/*`
|
| 172 |
+
- :samp:`https://youtube.com/channel/{channel_id}/*
|
| 173 |
+
- :samp:`https://youtube.com/u/{channel_name}/*`
|
| 174 |
+
- :samp:`https://youtube.com/user/{channel_id}/*
|
| 175 |
+
- :samp:`https://youtube.com/@{channel_id}/*
|
| 176 |
+
|
| 177 |
+
:param str url:
|
| 178 |
+
A YouTube url containing a channel name.
|
| 179 |
+
:rtype: str
|
| 180 |
+
:returns:
|
| 181 |
+
YouTube channel name.
|
| 182 |
+
"""
|
| 183 |
+
patterns = [
|
| 184 |
+
r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
|
| 185 |
+
r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
|
| 186 |
+
r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
|
| 187 |
+
r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)",
|
| 188 |
+
r"(?:\/(\@)([%\d\w_\-\.]+)(\/.*)?)"
|
| 189 |
+
]
|
| 190 |
+
for pattern in patterns:
|
| 191 |
+
regex = re.compile(pattern)
|
| 192 |
+
function_match = regex.search(url)
|
| 193 |
+
if function_match:
|
| 194 |
+
logger.debug("finished regex search, matched: %s", pattern)
|
| 195 |
+
uri_style = function_match.group(1)
|
| 196 |
+
uri_identifier = function_match.group(2)
|
| 197 |
+
return f'/{uri_style}/{uri_identifier}' if uri_style != '@' else f'/{uri_style}{uri_identifier}'
|
| 198 |
+
|
| 199 |
+
raise RegexMatchError(
|
| 200 |
+
caller="channel_name", pattern="patterns"
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
def video_info_url(video_id: str, watch_url: str) -> str:
|
| 204 |
+
"""Construct the video_info url.
|
| 205 |
+
|
| 206 |
+
:param str video_id:
|
| 207 |
+
A YouTube video identifier.
|
| 208 |
+
:param str watch_url:
|
| 209 |
+
A YouTube watch url.
|
| 210 |
+
:rtype: str
|
| 211 |
+
:returns:
|
| 212 |
+
:samp:`https://youtube.com/get_video_info` with necessary GET
|
| 213 |
+
parameters.
|
| 214 |
+
"""
|
| 215 |
+
params = OrderedDict(
|
| 216 |
+
[
|
| 217 |
+
("video_id", video_id),
|
| 218 |
+
("ps", "default"),
|
| 219 |
+
("eurl", quote(watch_url)),
|
| 220 |
+
("hl", "en_US"),
|
| 221 |
+
("html5", "1"),
|
| 222 |
+
("c", "TVHTML5"),
|
| 223 |
+
("cver", "7.20201028"),
|
| 224 |
+
]
|
| 225 |
+
)
|
| 226 |
+
return _video_info_url(params)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def video_info_url_age_restricted(video_id: str, embed_html: str) -> str:
|
| 230 |
+
"""Construct the video_info url.
|
| 231 |
+
|
| 232 |
+
:param str video_id:
|
| 233 |
+
A YouTube video identifier.
|
| 234 |
+
:param str embed_html:
|
| 235 |
+
The html contents of the embed page (for age restricted videos).
|
| 236 |
+
:rtype: str
|
| 237 |
+
:returns:
|
| 238 |
+
:samp:`https://youtube.com/get_video_info` with necessary GET
|
| 239 |
+
parameters.
|
| 240 |
+
"""
|
| 241 |
+
try:
|
| 242 |
+
sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
|
| 243 |
+
except RegexMatchError:
|
| 244 |
+
sts = ""
|
| 245 |
+
# Here we use ``OrderedDict`` so that the output is consistent between
|
| 246 |
+
# Python 2.7+.
|
| 247 |
+
eurl = f"https://youtube.googleapis.com/v/{video_id}"
|
| 248 |
+
params = OrderedDict(
|
| 249 |
+
[
|
| 250 |
+
("video_id", video_id),
|
| 251 |
+
("eurl", eurl),
|
| 252 |
+
("sts", sts),
|
| 253 |
+
("html5", "1"),
|
| 254 |
+
("c", "TVHTML5"),
|
| 255 |
+
("cver", "7.20201028"),
|
| 256 |
+
]
|
| 257 |
+
)
|
| 258 |
+
return _video_info_url(params)
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def _video_info_url(params: OrderedDict) -> str:
|
| 262 |
+
return f"https://www.youtube.com/get_video_info?{urlencode(params)}"
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def js_url(html: str) -> str:
|
| 266 |
+
"""Get the base JavaScript url.
|
| 267 |
+
|
| 268 |
+
Construct the base JavaScript url, which contains the decipher
|
| 269 |
+
"transforms".
|
| 270 |
+
|
| 271 |
+
:param str html:
|
| 272 |
+
The html contents of the watch page.
|
| 273 |
+
"""
|
| 274 |
+
try:
|
| 275 |
+
base_js = get_ytplayer_config(html)['assets']['js']
|
| 276 |
+
except (KeyError, RegexMatchError):
|
| 277 |
+
base_js = get_ytplayer_js(html)
|
| 278 |
+
return f"https://youtube.com{base_js}"
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
|
| 282 |
+
"""Parse the type data.
|
| 283 |
+
|
| 284 |
+
Breaks up the data in the ``type`` key of the manifest, which contains the
|
| 285 |
+
mime type and codecs serialized together, and splits them into separate
|
| 286 |
+
elements.
|
| 287 |
+
|
| 288 |
+
**Example**:
|
| 289 |
+
|
| 290 |
+
mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])
|
| 291 |
+
|
| 292 |
+
:param str mime_type_codec:
|
| 293 |
+
String containing mime type and codecs.
|
| 294 |
+
:rtype: tuple
|
| 295 |
+
:returns:
|
| 296 |
+
The mime type and a list of codecs.
|
| 297 |
+
|
| 298 |
+
"""
|
| 299 |
+
pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
|
| 300 |
+
regex = re.compile(pattern)
|
| 301 |
+
results = regex.search(mime_type_codec)
|
| 302 |
+
if not results:
|
| 303 |
+
raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
|
| 304 |
+
mime_type, codecs = results.groups()
|
| 305 |
+
return mime_type, [c.strip() for c in codecs.split(",")]
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def get_ytplayer_js(html: str) -> Any:
|
| 309 |
+
"""Get the YouTube player base JavaScript path.
|
| 310 |
+
|
| 311 |
+
:param str html
|
| 312 |
+
The html contents of the watch page.
|
| 313 |
+
:rtype: str
|
| 314 |
+
:returns:
|
| 315 |
+
Path to YouTube's base.js file.
|
| 316 |
+
"""
|
| 317 |
+
js_url_patterns = [
|
| 318 |
+
r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)"
|
| 319 |
+
]
|
| 320 |
+
for pattern in js_url_patterns:
|
| 321 |
+
regex = re.compile(pattern)
|
| 322 |
+
function_match = regex.search(html)
|
| 323 |
+
if function_match:
|
| 324 |
+
logger.debug("finished regex search, matched: %s", pattern)
|
| 325 |
+
yt_player_js = function_match.group(1)
|
| 326 |
+
logger.debug("player JS: " + yt_player_js)
|
| 327 |
+
return yt_player_js
|
| 328 |
+
|
| 329 |
+
raise RegexMatchError(
|
| 330 |
+
caller="get_ytplayer_js", pattern="js_url_patterns"
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def get_ytplayer_config(html: str) -> Any:
|
| 335 |
+
"""Get the YouTube player configuration data from the watch html.
|
| 336 |
+
|
| 337 |
+
Extract the ``ytplayer_config``, which is json data embedded within the
|
| 338 |
+
watch html and serves as the primary source of obtaining the stream
|
| 339 |
+
manifest data.
|
| 340 |
+
|
| 341 |
+
:param str html:
|
| 342 |
+
The html contents of the watch page.
|
| 343 |
+
:rtype: str
|
| 344 |
+
:returns:
|
| 345 |
+
Substring of the html containing the encoded manifest data.
|
| 346 |
+
"""
|
| 347 |
+
logger.debug("finding initial function name")
|
| 348 |
+
config_patterns = [
|
| 349 |
+
r"ytplayer\.config\s*=\s*",
|
| 350 |
+
r"ytInitialPlayerResponse\s*=\s*"
|
| 351 |
+
]
|
| 352 |
+
for pattern in config_patterns:
|
| 353 |
+
# Try each pattern consecutively if they don't find a match
|
| 354 |
+
try:
|
| 355 |
+
return parse_for_object(html, pattern)
|
| 356 |
+
except HTMLParseError as e:
|
| 357 |
+
logger.debug(f'Pattern failed: {pattern}')
|
| 358 |
+
logger.debug(e)
|
| 359 |
+
continue
|
| 360 |
+
|
| 361 |
+
# setConfig() needs to be handled a little differently.
|
| 362 |
+
# We want to parse the entire argument to setConfig()
|
| 363 |
+
# and use then load that as json to find PLAYER_CONFIG
|
| 364 |
+
# inside of it.
|
| 365 |
+
setconfig_patterns = [
|
| 366 |
+
r"yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*"
|
| 367 |
+
]
|
| 368 |
+
for pattern in setconfig_patterns:
|
| 369 |
+
# Try each pattern consecutively if they don't find a match
|
| 370 |
+
try:
|
| 371 |
+
return parse_for_object(html, pattern)
|
| 372 |
+
except HTMLParseError:
|
| 373 |
+
continue
|
| 374 |
+
|
| 375 |
+
raise RegexMatchError(
|
| 376 |
+
caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns"
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def get_ytcfg(html: str) -> str:
|
| 381 |
+
"""Get the entirety of the ytcfg object.
|
| 382 |
+
|
| 383 |
+
This is built over multiple pieces, so we have to find all matches and
|
| 384 |
+
combine the dicts together.
|
| 385 |
+
|
| 386 |
+
:param str html:
|
| 387 |
+
The html contents of the watch page.
|
| 388 |
+
:rtype: str
|
| 389 |
+
:returns:
|
| 390 |
+
Substring of the html containing the encoded manifest data.
|
| 391 |
+
"""
|
| 392 |
+
ytcfg = {}
|
| 393 |
+
ytcfg_patterns = [
|
| 394 |
+
r"ytcfg\s=\s",
|
| 395 |
+
r"ytcfg\.set\("
|
| 396 |
+
]
|
| 397 |
+
for pattern in ytcfg_patterns:
|
| 398 |
+
# Try each pattern consecutively and try to build a cohesive object
|
| 399 |
+
try:
|
| 400 |
+
found_objects = parse_for_all_objects(html, pattern)
|
| 401 |
+
for obj in found_objects:
|
| 402 |
+
ytcfg.update(obj)
|
| 403 |
+
except HTMLParseError:
|
| 404 |
+
continue
|
| 405 |
+
|
| 406 |
+
if ytcfg: # there is at least one item
|
| 407 |
+
return ytcfg
|
| 408 |
+
|
| 409 |
+
raise RegexMatchError(
|
| 410 |
+
caller="get_ytcfg", pattern="ytcfg_pattenrs"
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
def apply_po_token(stream_manifest: Dict, vid_info: Dict, po_token: str) -> None:
|
| 415 |
+
"""Apply the proof of origin token to the stream manifest
|
| 416 |
+
|
| 417 |
+
:param dict stream_manifest:
|
| 418 |
+
Details of the media streams available.
|
| 419 |
+
:param str po_token:
|
| 420 |
+
Proof of Origin Token.
|
| 421 |
+
"""
|
| 422 |
+
logger.debug(f'Applying poToken')
|
| 423 |
+
for i, stream in enumerate(stream_manifest):
|
| 424 |
+
try:
|
| 425 |
+
url: str = stream["url"]
|
| 426 |
+
except KeyError:
|
| 427 |
+
live_stream = (
|
| 428 |
+
vid_info.get("playabilityStatus", {}, )
|
| 429 |
+
.get("liveStreamability")
|
| 430 |
+
)
|
| 431 |
+
if live_stream:
|
| 432 |
+
raise LiveStreamError("UNKNOWN")
|
| 433 |
+
|
| 434 |
+
parsed_url = urlparse(url)
|
| 435 |
+
|
| 436 |
+
# Convert query params off url to dict
|
| 437 |
+
query_params = parse_qs(urlparse(url).query)
|
| 438 |
+
query_params = {
|
| 439 |
+
k: v[0] for k, v in query_params.items()
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
query_params['pot'] = po_token
|
| 443 |
+
|
| 444 |
+
url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}'
|
| 445 |
+
|
| 446 |
+
stream_manifest[i]["url"] = url
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str, url_js: str) -> None:
|
| 450 |
+
"""Apply the decrypted signature to the stream manifest.
|
| 451 |
+
|
| 452 |
+
:param dict stream_manifest:
|
| 453 |
+
Details of the media streams available.
|
| 454 |
+
:param str js:
|
| 455 |
+
The contents of the base.js asset file.
|
| 456 |
+
:param str url_js:
|
| 457 |
+
Full base.js url
|
| 458 |
+
|
| 459 |
+
"""
|
| 460 |
+
cipher = Cipher(js=js, js_url=url_js)
|
| 461 |
+
discovered_n = dict()
|
| 462 |
+
for i, stream in enumerate(stream_manifest):
|
| 463 |
+
try:
|
| 464 |
+
url: str = stream["url"]
|
| 465 |
+
except KeyError:
|
| 466 |
+
live_stream = (
|
| 467 |
+
vid_info.get("playabilityStatus", {}, )
|
| 468 |
+
.get("liveStreamability")
|
| 469 |
+
)
|
| 470 |
+
if live_stream:
|
| 471 |
+
raise LiveStreamError("UNKNOWN")
|
| 472 |
+
|
| 473 |
+
parsed_url = urlparse(url)
|
| 474 |
+
|
| 475 |
+
# Convert query params off url to dict
|
| 476 |
+
query_params = parse_qs(urlparse(url).query)
|
| 477 |
+
query_params = {
|
| 478 |
+
k: v[0] for k, v in query_params.items()
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
# 403 Forbidden fix.
|
| 482 |
+
if "signature" in url or (
|
| 483 |
+
"s" not in stream and ("&sig=" in url or "&lsig=" in url)
|
| 484 |
+
):
|
| 485 |
+
# For certain videos, YouTube will just provide them pre-signed, in
|
| 486 |
+
# which case there's no real magic to download them and we can skip
|
| 487 |
+
# the whole signature descrambling entirely.
|
| 488 |
+
logger.debug("signature found, skip decipher")
|
| 489 |
+
|
| 490 |
+
else:
|
| 491 |
+
signature = cipher.get_signature(ciphered_signature=stream["s"])
|
| 492 |
+
|
| 493 |
+
logger.debug(
|
| 494 |
+
"finished descrambling signature for itag=%s", stream["itag"]
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
query_params['sig'] = signature
|
| 498 |
+
|
| 499 |
+
if 'n' in query_params.keys():
|
| 500 |
+
# For WEB-based clients, YouTube sends an "n" parameter that throttles download speed.
|
| 501 |
+
# To decipher the value of "n", we must interpret the player's JavaScript.
|
| 502 |
+
|
| 503 |
+
initial_n = query_params['n']
|
| 504 |
+
logger.debug(f'Parameter n is: {initial_n}')
|
| 505 |
+
|
| 506 |
+
# Check if any previous stream decrypted the parameter
|
| 507 |
+
if initial_n not in discovered_n:
|
| 508 |
+
discovered_n[initial_n] = cipher.get_throttling(initial_n)
|
| 509 |
+
else:
|
| 510 |
+
logger.debug('Parameter n found skipping decryption')
|
| 511 |
+
|
| 512 |
+
new_n = discovered_n[initial_n]
|
| 513 |
+
query_params['n'] = new_n
|
| 514 |
+
logger.debug(f'Parameter n deciphered: {new_n}')
|
| 515 |
+
|
| 516 |
+
url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}' # noqa:E501
|
| 517 |
+
|
| 518 |
+
stream_manifest[i]["url"] = url
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
def apply_descrambler(stream_data: Dict) -> Optional[List[Dict]]:
|
| 522 |
+
"""Apply various in-place transforms to YouTube's media stream data.
|
| 523 |
+
|
| 524 |
+
Creates a ``list`` of dictionaries by string splitting on commas, then
|
| 525 |
+
taking each list item, parsing it as a query string, converting it to a
|
| 526 |
+
``dict`` and unquoting the value.
|
| 527 |
+
|
| 528 |
+
:param dict stream_data:
|
| 529 |
+
Dictionary containing query string encoded values.
|
| 530 |
+
|
| 531 |
+
**Example**:
|
| 532 |
+
|
| 533 |
+
>>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
|
| 534 |
+
>>> apply_descrambler(d, 'foo')
|
| 535 |
+
>>> print(d)
|
| 536 |
+
{'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
|
| 537 |
+
|
| 538 |
+
"""
|
| 539 |
+
if 'url' in stream_data:
|
| 540 |
+
return None
|
| 541 |
+
|
| 542 |
+
# Merge formats and adaptiveFormats into a single list
|
| 543 |
+
formats: list[Dict] = []
|
| 544 |
+
if 'formats' in stream_data.keys():
|
| 545 |
+
formats.extend(stream_data['formats'])
|
| 546 |
+
if 'adaptiveFormats' in stream_data.keys():
|
| 547 |
+
formats.extend(stream_data['adaptiveFormats'])
|
| 548 |
+
|
| 549 |
+
# Extract url and s from signatureCiphers as necessary
|
| 550 |
+
for data in formats:
|
| 551 |
+
if 'url' not in data and 'signatureCipher' in data:
|
| 552 |
+
cipher_url = parse_qs(data['signatureCipher'])
|
| 553 |
+
data['url'] = cipher_url['url'][0]
|
| 554 |
+
data['s'] = cipher_url['s'][0]
|
| 555 |
+
data['is_sabr'] = False
|
| 556 |
+
elif 'url' not in data and 'signatureCipher' not in data:
|
| 557 |
+
data['url'] = stream_data['serverAbrStreamingUrl']
|
| 558 |
+
data['is_sabr'] = True
|
| 559 |
+
data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
|
| 560 |
+
|
| 561 |
+
logger.debug("applying descrambler")
|
| 562 |
+
return formats
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
def initial_data(watch_html: str) -> dict:
|
| 566 |
+
"""Extract the ytInitialData json from the watch_html page.
|
| 567 |
+
|
| 568 |
+
This mostly contains metadata necessary for rendering the page on-load,
|
| 569 |
+
such as video information, copyright notices, etc.
|
| 570 |
+
|
| 571 |
+
@param watch_html: Html of the watch page
|
| 572 |
+
@return:
|
| 573 |
+
"""
|
| 574 |
+
patterns = [
|
| 575 |
+
r"window\[['\"]ytInitialData['\"]]\s*=\s*",
|
| 576 |
+
r"ytInitialData\s*=\s*"
|
| 577 |
+
]
|
| 578 |
+
for pattern in patterns:
|
| 579 |
+
try:
|
| 580 |
+
return parse_for_object(watch_html, pattern)
|
| 581 |
+
except HTMLParseError:
|
| 582 |
+
pass
|
| 583 |
+
|
| 584 |
+
raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
|
| 585 |
+
|
| 586 |
+
|
| 587 |
+
def initial_player_response(watch_html: str) -> str:
|
| 588 |
+
"""Extract the ytInitialPlayerResponse json from the watch_html page.
|
| 589 |
+
|
| 590 |
+
This mostly contains metadata necessary for rendering the page on-load,
|
| 591 |
+
such as video information, copyright notices, etc.
|
| 592 |
+
|
| 593 |
+
@param watch_html: Html of the watch page
|
| 594 |
+
@return:
|
| 595 |
+
"""
|
| 596 |
+
patterns = [
|
| 597 |
+
r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*",
|
| 598 |
+
r"ytInitialPlayerResponse\s*=\s*"
|
| 599 |
+
]
|
| 600 |
+
for pattern in patterns:
|
| 601 |
+
try:
|
| 602 |
+
return parse_for_object(watch_html, pattern)
|
| 603 |
+
except HTMLParseError:
|
| 604 |
+
pass
|
| 605 |
+
|
| 606 |
+
raise RegexMatchError(
|
| 607 |
+
caller='initial_player_response',
|
| 608 |
+
pattern='initial_player_response_pattern'
|
| 609 |
+
)
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
def metadata(initial_data) -> Optional[YouTubeMetadata]:
|
| 613 |
+
"""Get the informational metadata for the video.
|
| 614 |
+
|
| 615 |
+
e.g.:
|
| 616 |
+
[
|
| 617 |
+
{
|
| 618 |
+
'Song': '강남스타일(Gangnam Style)',
|
| 619 |
+
'Artist': 'PSY',
|
| 620 |
+
'Album': 'PSY SIX RULES Pt.1',
|
| 621 |
+
'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
|
| 622 |
+
}
|
| 623 |
+
]
|
| 624 |
+
|
| 625 |
+
:rtype: YouTubeMetadata
|
| 626 |
+
"""
|
| 627 |
+
try:
|
| 628 |
+
metadata_rows: List = initial_data["contents"]["twoColumnWatchNextResults"][
|
| 629 |
+
"results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"][
|
| 630 |
+
"metadataRowContainer"]["metadataRowContainerRenderer"]["rows"]
|
| 631 |
+
except (KeyError, IndexError):
|
| 632 |
+
# If there's an exception accessing this data, it probably doesn't exist.
|
| 633 |
+
return YouTubeMetadata([])
|
| 634 |
+
|
| 635 |
+
# Rows appear to only have "metadataRowRenderer" or "metadataRowHeaderRenderer"
|
| 636 |
+
# and we only care about the former, so we filter the others
|
| 637 |
+
metadata_rows = filter(
|
| 638 |
+
lambda x: "metadataRowRenderer" in x.keys(),
|
| 639 |
+
metadata_rows
|
| 640 |
+
)
|
| 641 |
+
|
| 642 |
+
# We then access the metadataRowRenderer key in each element
|
| 643 |
+
# and build a metadata object from this new list
|
| 644 |
+
metadata_rows = [x["metadataRowRenderer"] for x in metadata_rows]
|
| 645 |
+
|
| 646 |
+
return YouTubeMetadata(metadata_rows)
|
pytubefix/file_system.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
windows = ['Windows', 'NTFS', 'FAT32', 'exFAT', 'ReFS']
|
| 3 |
+
linux = ['Linux', 'ext2', 'ext3', 'ext4', 'Btrfs', 'XFS', 'ZFS']
|
| 4 |
+
macOS = ['macOS', 'APFS', 'HFS+']
|
| 5 |
+
bsd_unix = ['BSD', 'UFS']
|
| 6 |
+
network_filesystems = ['CIFS', 'SMB']
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
windows_translation = str.maketrans({
|
| 10 |
+
'\\': '',
|
| 11 |
+
'/': '',
|
| 12 |
+
'?': '',
|
| 13 |
+
':': '',
|
| 14 |
+
'*': '',
|
| 15 |
+
'"': '',
|
| 16 |
+
'<': '',
|
| 17 |
+
'>': '',
|
| 18 |
+
'|': '',
|
| 19 |
+
})
|
| 20 |
+
|
| 21 |
+
linux_translation = str.maketrans({
|
| 22 |
+
'/': '',
|
| 23 |
+
})
|
| 24 |
+
|
| 25 |
+
macos_translation = str.maketrans({
|
| 26 |
+
'/': '',
|
| 27 |
+
})
|
| 28 |
+
|
| 29 |
+
bsd_translation = str.maketrans({
|
| 30 |
+
'/': '',
|
| 31 |
+
})
|
| 32 |
+
|
| 33 |
+
network_filesystems_translation = str.maketrans({
|
| 34 |
+
'\\': '',
|
| 35 |
+
'/': '',
|
| 36 |
+
'?': '',
|
| 37 |
+
':': '',
|
| 38 |
+
'*': '',
|
| 39 |
+
'"': '',
|
| 40 |
+
'<': '',
|
| 41 |
+
'>': '',
|
| 42 |
+
'|': '',
|
| 43 |
+
})
|
| 44 |
+
|
| 45 |
+
def file_system_verify(file_type) -> dict:
|
| 46 |
+
"""
|
| 47 |
+
Returns a translation table to remove invalid characters for a specified file system type.
|
| 48 |
+
|
| 49 |
+
This function identifies the file system type and returns a translation table for removing
|
| 50 |
+
characters that are not allowed in filenames for that specific file system.
|
| 51 |
+
|
| 52 |
+
Args:
|
| 53 |
+
file_type (str): The type of file system being checked. Supported file systems include:
|
| 54 |
+
- Windows: NTFS, FAT32, exFAT, ReFS
|
| 55 |
+
- Linux: ext2, ext3, ext4, Btrfs, XFS, ZFS
|
| 56 |
+
- macOS: APFS, HFS+
|
| 57 |
+
- BSD/UNIX: UFS
|
| 58 |
+
- Network Filesystems: CIFS, SMB
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
dict: A translation table where invalid characters are mapped to an empty string.
|
| 62 |
+
|
| 63 |
+
Example:
|
| 64 |
+
>>> ys = yt.streams.get_highest_resolution()
|
| 65 |
+
>>> ys.download(file_system='ext4')
|
| 66 |
+
|
| 67 |
+
Raises:
|
| 68 |
+
None, but prints a message if the file system type is not recognized.
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
if file_type in windows:
|
| 72 |
+
return windows_translation
|
| 73 |
+
elif file_type in linux:
|
| 74 |
+
return linux_translation
|
| 75 |
+
elif file_type in macOS:
|
| 76 |
+
return macos_translation
|
| 77 |
+
elif file_type in bsd_unix:
|
| 78 |
+
return bsd_translation
|
| 79 |
+
elif file_type in network_filesystems:
|
| 80 |
+
return network_filesystems_translation
|