chipling commited on
Commit
44bafb2
·
verified ·
1 Parent(s): 9ff4ef1

Upload 106 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pytubefix/.DS_Store +0 -0
  2. pytubefix/__cache__/tokens.json +1 -0
  3. pytubefix/__init__.py +23 -0
  4. pytubefix/__main__.py +992 -0
  5. pytubefix/__pycache__/__init__.cpython-311.pyc +0 -0
  6. pytubefix/__pycache__/__main__.cpython-311.pyc +0 -0
  7. pytubefix/__pycache__/buffer.cpython-311.pyc +0 -0
  8. pytubefix/__pycache__/captions.cpython-311.pyc +0 -0
  9. pytubefix/__pycache__/chapters.cpython-311.pyc +0 -0
  10. pytubefix/__pycache__/cipher.cpython-311.pyc +0 -0
  11. pytubefix/__pycache__/cli.cpython-311.pyc +0 -0
  12. pytubefix/__pycache__/exceptions.cpython-311.pyc +0 -0
  13. pytubefix/__pycache__/extract.cpython-311.pyc +0 -0
  14. pytubefix/__pycache__/file_system.cpython-311.pyc +0 -0
  15. pytubefix/__pycache__/helpers.cpython-311.pyc +0 -0
  16. pytubefix/__pycache__/info.cpython-311.pyc +0 -0
  17. pytubefix/__pycache__/innertube.cpython-311.pyc +0 -0
  18. pytubefix/__pycache__/itags.cpython-311.pyc +0 -0
  19. pytubefix/__pycache__/jsinterp.cpython-311.pyc +0 -0
  20. pytubefix/__pycache__/keymoments.cpython-311.pyc +0 -0
  21. pytubefix/__pycache__/metadata.cpython-311.pyc +0 -0
  22. pytubefix/__pycache__/monostate.cpython-311.pyc +0 -0
  23. pytubefix/__pycache__/parser.cpython-311.pyc +0 -0
  24. pytubefix/__pycache__/protobuf.cpython-311.pyc +0 -0
  25. pytubefix/__pycache__/query.cpython-311.pyc +0 -0
  26. pytubefix/__pycache__/request.cpython-311.pyc +0 -0
  27. pytubefix/__pycache__/streams.cpython-311.pyc +0 -0
  28. pytubefix/__pycache__/version.cpython-311.pyc +0 -0
  29. pytubefix/botGuard/.DS_Store +0 -0
  30. pytubefix/botGuard/__init__.py +0 -0
  31. pytubefix/botGuard/__pycache__/__init__.cpython-311.pyc +0 -0
  32. pytubefix/botGuard/__pycache__/bot_guard.cpython-311.pyc +0 -0
  33. pytubefix/botGuard/bot_guard.py +47 -0
  34. pytubefix/botGuard/vm/botGuard.js +0 -0
  35. pytubefix/buffer.py +48 -0
  36. pytubefix/captions.py +215 -0
  37. pytubefix/chapters.py +47 -0
  38. pytubefix/cipher.py +190 -0
  39. pytubefix/cli.py +355 -0
  40. pytubefix/contrib/__init__.py +0 -0
  41. pytubefix/contrib/__pycache__/__init__.cpython-311.pyc +0 -0
  42. pytubefix/contrib/__pycache__/channel.cpython-311.pyc +0 -0
  43. pytubefix/contrib/__pycache__/playlist.cpython-311.pyc +0 -0
  44. pytubefix/contrib/__pycache__/search.cpython-311.pyc +0 -0
  45. pytubefix/contrib/channel.py +655 -0
  46. pytubefix/contrib/playlist.py +496 -0
  47. pytubefix/contrib/search.py +557 -0
  48. pytubefix/exceptions.py +344 -0
  49. pytubefix/extract.py +646 -0
  50. pytubefix/file_system.py +80 -0
pytubefix/.DS_Store ADDED
Binary file (6.15 kB). View file
 
pytubefix/__cache__/tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"access_token": null, "refresh_token": null, "expires": null, "visitorData": "CgtYOTFud0twS0VmOCiD2OTCBjIKCgJJThIEGgAgQg%3D%3D", "po_token": "MnSZ2tgzlZc8xJni8Vz8ITSbjhi-7cChHWF4_eFiXnowkgRlC-yG3IdTqDT9PvJhNbcDK43DnBhNlOCBTjY1Y9aKFBYp_h0-yiT5TefusxCtFbd98AA4HdGX6XmAbvgLujXCQZj14n_wlWR9y3i4CZjjl8pmYg=="}
pytubefix/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # flake8: noqa: F401
2
+ # noreorder
3
+ """
4
+ Pytubefix: a very serious Python library for downloading YouTube Videos.
5
+ """
6
+ __title__ = "pytubefix"
7
+ __author__ = "Juan Bindez"
8
+ __license__ = "MIT License"
9
+ __js__ = None
10
+ __js_url__ = None
11
+
12
+ from pytubefix.version import __version__
13
+ from pytubefix.streams import Stream
14
+ from pytubefix.captions import Caption
15
+ from pytubefix.chapters import Chapter
16
+ from pytubefix.keymoments import KeyMoment
17
+ from pytubefix.query import CaptionQuery, StreamQuery
18
+ from pytubefix.__main__ import YouTube
19
+ from pytubefix.contrib.playlist import Playlist
20
+ from pytubefix.contrib.channel import Channel
21
+ from pytubefix.contrib.search import Search
22
+ from pytubefix.info import info
23
+ from pytubefix.buffer import Buffer
pytubefix/__main__.py ADDED
@@ -0,0 +1,992 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+ #
3
+ # Copyright (c) 2023 - 2025 Juan Bindez <juanbindez780@gmail.com>
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+
24
+ """
25
+ This module implements the core developer interface for pytubefix.
26
+
27
+ The problem domain of the :class:`YouTube <YouTube> class focuses almost
28
+ exclusively on the developer interface. Pytubefix offloads the heavy lifting to
29
+ smaller peripheral modules and functions.
30
+
31
+ """
32
+
33
+ import logging
34
+ from subprocess import CalledProcessError
35
+ from typing import Any, Callable, Dict, List, Optional, Tuple
36
+
37
+ import pytubefix
38
+ import pytubefix.exceptions as exceptions
39
+ from pytubefix import extract, request
40
+ from pytubefix import Stream, StreamQuery
41
+ from pytubefix.helpers import install_proxy
42
+ from pytubefix.innertube import InnerTube
43
+ from pytubefix.metadata import YouTubeMetadata
44
+ from pytubefix.monostate import Monostate
45
+ from pytubefix.botGuard import bot_guard
46
+
47
+ logger = logging.getLogger(__name__)
48
+
49
+
50
+ class YouTube:
51
+ """Core developer interface for pytubefix."""
52
+
53
+ def __init__(
54
+ self,
55
+ url: str,
56
+ client: str = InnerTube().client_name,
57
+ on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
58
+ on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
59
+ proxies: Optional[Dict[str, str]] = None,
60
+ use_oauth: bool = False,
61
+ allow_oauth_cache: bool = True,
62
+ token_file: Optional[str] = None,
63
+ oauth_verifier: Optional[Callable[[str, str], None]] = None,
64
+ use_po_token: Optional[bool] = False,
65
+ po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
66
+ ):
67
+ """Construct a :class:`YouTube <YouTube>`.
68
+
69
+ :param str url:
70
+ A valid YouTube watch URL.
71
+ :param str client:
72
+ (Optional) A YouTube client,
73
+ Available:
74
+ WEB, WEB_EMBED, WEB_MUSIC, WEB_CREATOR, WEB_SAFARI,
75
+ ANDROID, ANDROID_MUSIC, ANDROID_CREATOR, ANDROID_VR, ANDROID_PRODUCER, ANDROID_TESTSUITE,
76
+ IOS, IOS_MUSIC, IOS_CREATOR,
77
+ MWEB, TV, TV_EMBED, MEDIA_CONNECT.
78
+ :param func on_progress_callback:
79
+ (Optional) User defined callback function for stream download
80
+ progress events.
81
+ :param func on_complete_callback:
82
+ (Optional) User defined callback function for stream download
83
+ complete events.
84
+ :param dict proxies:
85
+ (Optional) A dict mapping protocol to proxy address which will be used by pytube.
86
+ :param bool use_oauth:
87
+ (Optional) Prompt the user to authenticate to YouTube.
88
+ If allow_oauth_cache is set to True, the user should only be prompted once.
89
+ :param bool allow_oauth_cache:
90
+ (Optional) Cache OAuth and Po tokens locally on the machine. Defaults to True.
91
+ These tokens are only generated if use_oauth is set to True as well.
92
+ :param str token_file:
93
+ (Optional) Path to the file where the OAuth and Po tokens will be stored.
94
+ Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
95
+ :param Callable oauth_verifier:
96
+ (optional) Verifier to be used for getting oauth tokens.
97
+ Verification URL and User-Code will be passed to it respectively.
98
+ (if passed, else default verifier will be used)
99
+ :param bool use_po_token:
100
+ (Optional) Prompt the user to use the proof of origin token on YouTube.
101
+ It must be sent with the API along with the linked visitorData and
102
+ then passed as a `po_token` query parameter to affected clients.
103
+ If allow_oauth_cache is set to True, the user should only be prompted once.
104
+ (Do not use together with `use_oauth=True`)
105
+ :param Callable po_token_verifier:
106
+ (Optional) Verified used to obtain the visitorData and po_token.
107
+ The verifier will return the visitorData and po_token respectively.
108
+ (if passed, else default verifier will be used)
109
+ """
110
+ # js fetched by js_url
111
+ self._js: Optional[str] = None
112
+
113
+ # the url to the js, parsed from watch html
114
+ self._js_url: Optional[str] = None
115
+
116
+ # content fetched from innertube/player
117
+ self._vid_info: Optional[Dict] = None
118
+ self._vid_details: Optional[Dict] = None
119
+
120
+ # the html of /watch?v=<video_id>
121
+ self._watch_html: Optional[str] = None
122
+ self._embed_html: Optional[str] = None
123
+
124
+ # inline js in the html containing
125
+ self._player_config_args: Optional[Dict] = None
126
+ self._age_restricted: Optional[bool] = None
127
+
128
+ self._fmt_streams: Optional[List[Stream]] = None
129
+
130
+ self._initial_data = None
131
+ self._metadata: Optional[YouTubeMetadata] = None
132
+
133
+ # video_id part of /watch?v=<video_id>
134
+ self.video_id = extract.video_id(url)
135
+
136
+ self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
137
+ self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
138
+
139
+ self.client = 'WEB' if use_po_token else client
140
+
141
+ # oauth can only be used by the TV and TV_EMBED client.
142
+ self.client = 'TV' if use_oauth else self.client
143
+
144
+ self.fallback_clients = ['TV', 'IOS']
145
+
146
+ self._signature_timestamp: dict = {}
147
+ self._visitor_data = None
148
+
149
+ # Shared between all instances of `Stream` (Borg pattern).
150
+ self.stream_monostate = Monostate(
151
+ on_progress=on_progress_callback, on_complete=on_complete_callback, youtube=self
152
+ )
153
+
154
+ if proxies:
155
+ install_proxy(proxies)
156
+
157
+ self._author = None
158
+ self._title = None
159
+ self._publish_date = None
160
+
161
+ self.use_oauth = use_oauth
162
+ self.allow_oauth_cache = allow_oauth_cache
163
+ self.token_file = token_file
164
+ self.oauth_verifier = oauth_verifier
165
+
166
+ self.use_po_token = use_po_token
167
+ self.po_token_verifier = po_token_verifier
168
+
169
+ self.po_token = None
170
+ self._pot = None
171
+
172
+ def __repr__(self):
173
+ return f'<pytubefix.__main__.YouTube object: videoId={self.video_id}>'
174
+
175
+ def __eq__(self, o: object) -> bool:
176
+ # Compare types and urls, if they're same return true, else return false.
177
+ return type(o) == type(self) and o.watch_url == self.watch_url
178
+
179
+ @property
180
+ def watch_html(self):
181
+ if self._watch_html:
182
+ return self._watch_html
183
+ self._watch_html = request.get(url=self.watch_url)
184
+ return self._watch_html
185
+
186
+ @property
187
+ def embed_html(self):
188
+ if self._embed_html:
189
+ return self._embed_html
190
+ self._embed_html = request.get(url=self.embed_url)
191
+ return self._embed_html
192
+
193
+ @property
194
+ def age_restricted(self):
195
+ if self._age_restricted:
196
+ return self._age_restricted
197
+ self._age_restricted = extract.is_age_restricted(self.watch_html)
198
+ return self._age_restricted
199
+
200
+ @property
201
+ def js_url(self):
202
+ if self._js_url:
203
+ return self._js_url
204
+
205
+ if self.age_restricted:
206
+ self._js_url = extract.js_url(self.embed_html)
207
+ else:
208
+ self._js_url = extract.js_url(self.watch_html)
209
+
210
+ return self._js_url
211
+
212
+ @property
213
+ def js(self):
214
+ if self._js:
215
+ return self._js
216
+
217
+ # If the js_url doesn't match the cached url, fetch the new js and update
218
+ # the cache; otherwise, load the cache.
219
+ if pytubefix.__js_url__ != self.js_url:
220
+ self._js = request.get(self.js_url)
221
+ pytubefix.__js__ = self._js
222
+ pytubefix.__js_url__ = self.js_url
223
+ else:
224
+ self._js = pytubefix.__js__
225
+
226
+ return self._js
227
+
228
+ @property
229
+ def visitor_data(self) -> str:
230
+ """
231
+ Retrieves the visitorData from the WEB client.
232
+ """
233
+ if self._visitor_data:
234
+ return self._visitor_data
235
+
236
+ if InnerTube(self.client).require_po_token:
237
+ try:
238
+ logger.debug("Looking for visitorData in initial_data")
239
+ self._visitor_data = extract.visitor_data(str(self.initial_data['responseContext']))
240
+ logger.debug('VisitorData obtained successfully')
241
+ return self._visitor_data
242
+ except (KeyError, pytubefix.exceptions.RegexMatchError):
243
+ logger.debug("Unable to obtain visitorData from initial_data. Trying to request from the WEB client")
244
+
245
+ logger.debug("Looking for visitorData in InnerTube API")
246
+ innertube_response = InnerTube('WEB').player(self.video_id)
247
+ try:
248
+ self._visitor_data = innertube_response['responseContext']['visitorData']
249
+ except KeyError:
250
+ self._visitor_data = innertube_response['responseContext']['serviceTrackingParams'][0]['params'][6]['value']
251
+ logger.debug('VisitorData obtained successfully')
252
+
253
+ return self._visitor_data
254
+
255
+ @property
256
+ def pot(self) -> str:
257
+ """
258
+ Retrieves the poToken generated by botGuard.
259
+
260
+ This poToken only works for WEB-based clients.
261
+ """
262
+ if self._pot:
263
+ return self._pot
264
+ logger.debug('Invoking botGuard')
265
+ try:
266
+ self._pot = bot_guard.generate_po_token(visitor_data=self.visitor_data)
267
+ logger.debug('PoToken generated successfully')
268
+ except Exception as e:
269
+ logger.warning('Unable to run botGuard. Skipping poToken generation, reason: ' + e.__str__())
270
+ return self._pot
271
+
272
+ @property
273
+ def initial_data(self):
274
+ if self._initial_data:
275
+ return self._initial_data
276
+ self._initial_data = extract.initial_data(self.watch_html)
277
+ return self._initial_data
278
+
279
+ @property
280
+ def streaming_data(self):
281
+ """Return streamingData from video info."""
282
+
283
+ # List of YouTube error video IDs
284
+ invalid_id_list = ['aQvGIIdgFDM']
285
+
286
+ # If my previously valid video_info doesn't have the streamingData,
287
+ # or it is an invalid video,
288
+ # try to get a new video_info with a different client.
289
+ if 'streamingData' not in self.vid_info or self.vid_info['videoDetails']['videoId'] in invalid_id_list:
290
+ original_client = self.client
291
+
292
+ # for each fallback client set, revert videodata, and run check_availability, which
293
+ # will try to get a new video_info with a different client.
294
+ # if it fails try the next fallback client, and so on.
295
+ # If none of the clients have valid streamingData, raise an exception.
296
+ for client in self.fallback_clients:
297
+ self.client = client
298
+ self.vid_info = None
299
+ try:
300
+ self.check_availability()
301
+ except Exception as e:
302
+ continue
303
+ if 'streamingData' in self.vid_info:
304
+ break
305
+ if 'streamingData' not in self.vid_info:
306
+ raise exceptions.UnknownVideoError(video_id=self.video_id,
307
+ developer_message=f'Streaming data is missing, '
308
+ f'original client: {original_client}, '
309
+ f'fallback clients: {self.fallback_clients}')
310
+
311
+ return self.vid_info['streamingData']
312
+
313
+ @property
314
+ def fmt_streams(self):
315
+ """Returns a list of streams if they have been initialized.
316
+
317
+ If the streams have not been initialized, finds all relevant
318
+ streams and initializes them.
319
+ """
320
+ self.check_availability()
321
+ if self._fmt_streams:
322
+ return self._fmt_streams
323
+
324
+ self._fmt_streams = []
325
+
326
+ stream_manifest = extract.apply_descrambler(self.streaming_data)
327
+ inner_tube = InnerTube(self.client)
328
+ if self.po_token:
329
+ extract.apply_po_token(stream_manifest, self.vid_info, self.po_token)
330
+
331
+ if inner_tube.require_js_player:
332
+ # If the cached js doesn't work, try fetching a new js file
333
+ # https://github.com/pytube/pytube/issues/1054
334
+ try:
335
+ extract.apply_signature(stream_manifest, self.vid_info, self.js, self.js_url)
336
+ except exceptions.ExtractError:
337
+ # To force an update to the js file, we clear the cache and retry
338
+ self._js = None
339
+ self._js_url = None
340
+ pytubefix.__js__ = None
341
+ pytubefix.__js_url__ = None
342
+ extract.apply_signature(stream_manifest, self.vid_info, self.js, self.js_url)
343
+
344
+ # build instances of :class:`Stream <Stream>`
345
+ # Initialize stream objects
346
+ for stream in stream_manifest:
347
+ video = Stream(
348
+ stream=stream,
349
+ monostate=self.stream_monostate,
350
+ po_token=self.po_token,
351
+ video_playback_ustreamer_config=self.video_playback_ustreamer_config
352
+ )
353
+ self._fmt_streams.append(video)
354
+
355
+ self.stream_monostate.title = self.title
356
+ self.stream_monostate.duration = self.length
357
+
358
+ return self._fmt_streams
359
+
360
+ def check_availability(self):
361
+ """Check whether the video is available.
362
+
363
+ Raises different exceptions based on why the video is unavailable,
364
+ otherwise does nothing.
365
+ """
366
+ status, messages = extract.playability_status(self.vid_info)
367
+
368
+ if InnerTube(self.client).require_po_token and not self.po_token:
369
+ logger.warning(f"The {self.client} client requires PoToken to obtain functional streams, "
370
+ f"See more details at https://github.com/JuanBindez/pytubefix/pull/209")
371
+
372
+ for reason in messages:
373
+ if status == 'UNPLAYABLE':
374
+ if reason == (
375
+ 'Join this channel to get access to members-only content '
376
+ 'like this video, and other exclusive perks.'
377
+ ):
378
+ raise exceptions.MembersOnly(video_id=self.video_id)
379
+
380
+ elif reason == 'This live stream recording is not available.':
381
+ raise exceptions.RecordingUnavailable(video_id=self.video_id)
382
+
383
+ elif reason == (
384
+ 'Sorry, something is wrong. This video may be inappropriate for some users. '
385
+ 'Sign in to your primary account to confirm your age.'
386
+ ):
387
+ raise exceptions.AgeCheckRequiredAccountError(video_id=self.video_id)
388
+ elif reason == (
389
+ 'The uploader has not made this video available in your country'
390
+ ):
391
+ raise exceptions.VideoRegionBlocked(video_id=self.video_id)
392
+ else:
393
+ raise exceptions.VideoUnavailable(video_id=self.video_id)
394
+
395
+ elif status == 'LOGIN_REQUIRED':
396
+ if reason == (
397
+ 'Sign in to confirm your age'
398
+ ):
399
+ raise exceptions.AgeRestrictedError(video_id=self.video_id)
400
+ elif reason == (
401
+ 'Sign in to confirm you’re not a bot'
402
+ ):
403
+ raise exceptions.BotDetection(video_id=self.video_id)
404
+ else:
405
+ raise exceptions.LoginRequired(video_id=self.video_id, reason=reason)
406
+
407
+ elif status == 'AGE_CHECK_REQUIRED':
408
+ if self.use_oauth:
409
+ self.age_check()
410
+ else:
411
+ raise exceptions.AgeCheckRequiredError(video_id=self.video_id)
412
+
413
+ elif status == 'LIVE_STREAM_OFFLINE':
414
+ raise exceptions.LiveStreamOffline(video_id=self.video_id, reason=reason)
415
+
416
+ elif status == 'ERROR':
417
+ if reason == 'Video unavailable':
418
+ raise exceptions.VideoUnavailable(video_id=self.video_id)
419
+ elif reason == 'This video is private':
420
+ raise exceptions.VideoPrivate(video_id=self.video_id)
421
+ elif reason == 'This video is unavailable':
422
+ raise exceptions.VideoUnavailable(video_id=self.video_id)
423
+ elif reason == 'This video has been removed by the uploader':
424
+ raise exceptions.VideoUnavailable(video_id=self.video_id)
425
+ elif reason == 'This video is no longer available because the YouTube account associated with this video has been terminated.':
426
+ raise exceptions.VideoUnavailable(video_id=self.video_id)
427
+ else:
428
+ raise exceptions.UnknownVideoError(video_id=self.video_id, status=status, reason=reason, developer_message=f'Unknown reason type for Error status')
429
+ elif status == 'LIVE_STREAM':
430
+ raise exceptions.LiveStreamError(video_id=self.video_id)
431
+ elif status is None:
432
+ pass
433
+ else:
434
+ raise exceptions.UnknownVideoError(video_id=self.video_id, status=status, reason=reason, developer_message=f'Unknown video status')
435
+
436
+ @property
437
+ def signature_timestamp(self) -> dict:
438
+ """WEB clients need to be signed with a signature timestamp.
439
+
440
+ The signature is found inside the player's base.js.
441
+
442
+ :rtype: Dict
443
+ """
444
+ if not self._signature_timestamp:
445
+ self._signature_timestamp = {
446
+ 'playbackContext': {
447
+ 'contentPlaybackContext': {
448
+ 'signatureTimestamp': extract.signature_timestamp(self.js)
449
+ }
450
+ }
451
+ }
452
+ return self._signature_timestamp
453
+
454
+ @property
455
+ def video_playback_ustreamer_config(self):
456
+ return self.vid_info[
457
+ 'playerConfig'][
458
+ 'mediaCommonConfig'][
459
+ 'mediaUstreamerRequestConfig'][
460
+ 'videoPlaybackUstreamerConfig']
461
+
462
+ @property
463
+ def server_abr_streaming_url(self):
464
+ """
465
+ Extract the url for abr server and decrypt the `n` parameter
466
+ """
467
+ try:
468
+ url = self.vid_info[
469
+ 'streamingData'][
470
+ 'serverAbrStreamingUrl']
471
+ stream_manifest = [{"url": url}]
472
+ extract.apply_signature(stream_manifest, vid_info=self.vid_info, js=self.js, url_js=self.js_url)
473
+ return stream_manifest[0]["url"]
474
+ except Exception:
475
+ return None
476
+
477
+ @property
478
+ def vid_info(self):
479
+ """Parse the raw vid info and return the parsed result.
480
+
481
+ :rtype: Dict[Any, Any]
482
+ """
483
+ if self._vid_info:
484
+ return self._vid_info
485
+
486
+ def call_innertube():
487
+ innertube = InnerTube(
488
+ client=self.client,
489
+ use_oauth=self.use_oauth,
490
+ allow_cache=self.allow_oauth_cache,
491
+ token_file=self.token_file,
492
+ oauth_verifier=self.oauth_verifier,
493
+ use_po_token=self.use_po_token,
494
+ po_token_verifier=self.po_token_verifier
495
+ )
496
+ if innertube.require_js_player:
497
+ innertube.innertube_context.update(self.signature_timestamp)
498
+
499
+ # Automatically generates a poToken
500
+ if innertube.require_po_token and not self.use_po_token:
501
+ logger.debug(f"The {self.client} client requires poToken to obtain functional streams")
502
+ logger.debug("Automatically generating poToken")
503
+ innertube.insert_po_token(visitor_data=self.visitor_data, po_token=self.pot)
504
+ elif not self.use_po_token:
505
+ # from 01/22/2025 all clients must send the visitorData in the API request
506
+ innertube.insert_visitor_data(visitor_data=self.visitor_data)
507
+
508
+ response = innertube.player(self.video_id)
509
+
510
+ # Retrieves the sent poToken
511
+ if self.use_po_token or innertube.require_po_token:
512
+ self.po_token = innertube.access_po_token or self.pot
513
+ return response
514
+
515
+ innertube_response = call_innertube()
516
+ for client in self.fallback_clients:
517
+ # Some clients are unable to access certain types of videos
518
+ # If the video is unavailable for the current client, attempts will be made with fallback clients
519
+ playability_status = innertube_response['playabilityStatus']
520
+ if playability_status['status'] == 'UNPLAYABLE' and 'reason' in playability_status and playability_status['reason'] == 'This video is not available':
521
+ logger.warning(f"{self.client} client returned: This video is not available")
522
+ self.client = client
523
+ logger.warning(f"Switching to client: {client}")
524
+ innertube_response = call_innertube()
525
+ else:
526
+ break
527
+
528
+ self._vid_info = innertube_response
529
+ if not self._vid_info:
530
+ raise pytubefix.exceptions.InnerTubeResponseError(self.video_id, self.client)
531
+
532
+ return self._vid_info
533
+
534
+ @vid_info.setter
535
+ def vid_info(self, value):
536
+ self._vid_info = value
537
+
538
+ @property
539
+ def vid_details(self):
540
+ """Parse the raw vid details and return the parsed result.
541
+
542
+ The official player sends a request to the `next` endpoint to obtain some details of the video.
543
+
544
+ :rtype: Dict[Any, Any]
545
+ """
546
+ if self._vid_details:
547
+ return self._vid_details
548
+
549
+ innertube = InnerTube(
550
+ client='TV' if self.use_oauth else 'WEB',
551
+ use_oauth=self.use_oauth,
552
+ allow_cache=self.allow_oauth_cache,
553
+ token_file=self.token_file,
554
+ oauth_verifier=self.oauth_verifier,
555
+ use_po_token=self.use_po_token,
556
+ po_token_verifier=self.po_token_verifier
557
+ )
558
+ innertube_response = innertube.next(self.video_id)
559
+ self._vid_details = innertube_response
560
+ return self._vid_details
561
+
562
+ @vid_details.setter
563
+ def vid_details(self, value):
564
+ self._vid_details = value
565
+
566
+ def age_check(self):
567
+ """If the video has any age restrictions, you must confirm that you wish to continue.
568
+
569
+ Originally the WEB client was used, but with the implementation of PoToken we switched to MWEB.
570
+ """
571
+
572
+ self.client = 'TV'
573
+ innertube = InnerTube(
574
+ client=self.client,
575
+ use_oauth=self.use_oauth,
576
+ allow_cache=self.allow_oauth_cache,
577
+ token_file=self.token_file,
578
+ oauth_verifier=self.oauth_verifier,
579
+ use_po_token=self.use_po_token,
580
+ po_token_verifier=self.po_token_verifier
581
+ )
582
+
583
+ if innertube.require_js_player:
584
+ innertube.innertube_context.update(self.signature_timestamp)
585
+
586
+ innertube.verify_age(self.video_id)
587
+
588
+ innertube_response = innertube.player(self.video_id)
589
+
590
+ playability_status = innertube_response['playabilityStatus'].get('status', None)
591
+
592
+ # If we still can't access the video, raise an exception
593
+ if playability_status != 'OK':
594
+ if playability_status == 'UNPLAYABLE':
595
+ raise exceptions.AgeCheckRequiredAccountError(self.video_id)
596
+ else:
597
+ raise exceptions.AgeCheckRequiredError(self.video_id)
598
+
599
+ self._vid_info = innertube_response
600
+
601
+ @property
602
+ def caption_tracks(self) -> List[pytubefix.Caption]:
603
+ """Get a list of :class:`Caption <Caption>`.
604
+
605
+ :rtype: List[Caption]
606
+ """
607
+
608
+ innertube_response = InnerTube(
609
+ client='WEB' if not self.use_oauth else self.client,
610
+ use_oauth=self.use_oauth,
611
+ allow_cache=self.allow_oauth_cache,
612
+ token_file=self.token_file,
613
+ oauth_verifier=self.oauth_verifier,
614
+ use_po_token=self.use_po_token,
615
+ po_token_verifier=self.po_token_verifier
616
+ ).player(self.video_id)
617
+
618
+ raw_tracks = (
619
+ innertube_response.get("captions", {})
620
+ .get("playerCaptionsTracklistRenderer", {})
621
+ .get("captionTracks", [])
622
+ )
623
+ return [pytubefix.Caption(track) for track in raw_tracks]
624
+
625
+ @property
626
+ def captions(self) -> pytubefix.CaptionQuery:
627
+ """Interface to query caption tracks.
628
+
629
+ :rtype: :class:`CaptionQuery <CaptionQuery>`.
630
+ """
631
+ return pytubefix.CaptionQuery(self.caption_tracks)
632
+
633
+ @property
634
+ def chapters(self) -> List[pytubefix.Chapter]:
635
+ """Get a list of :class:`Chapter <Chapter>`.
636
+
637
+ :rtype: List[Chapter]
638
+ """
639
+ try:
640
+ chapters_data = []
641
+ markers_map = self.initial_data['playerOverlays']['playerOverlayRenderer'][
642
+ 'decoratedPlayerBarRenderer']['decoratedPlayerBarRenderer']['playerBar'][
643
+ 'multiMarkersPlayerBarRenderer']['markersMap']
644
+ for marker in markers_map:
645
+ if marker['key'].upper() == 'DESCRIPTION_CHAPTERS':
646
+ chapters_data = marker['value']['chapters']
647
+ break
648
+ except (KeyError, IndexError):
649
+ return []
650
+
651
+ result: List[pytubefix.Chapter] = []
652
+
653
+ for i, chapter_data in enumerate(chapters_data):
654
+ chapter_start = int(
655
+ chapter_data['chapterRenderer']['timeRangeStartMillis'] / 1000
656
+ )
657
+
658
+ if i == len(chapters_data) - 1:
659
+ chapter_end = self.length
660
+ else:
661
+ chapter_end = int(
662
+ chapters_data[i + 1]['chapterRenderer']['timeRangeStartMillis'] / 1000
663
+ )
664
+
665
+ result.append(pytubefix.Chapter(chapter_data, chapter_end - chapter_start))
666
+
667
+ return result
668
+
669
+ @property
670
+ def key_moments(self) -> List[pytubefix.KeyMoment]:
671
+ """Get a list of :class:`KeyMoment <KeyMoment>`.
672
+
673
+ :rtype: List[KeyMoment]
674
+ """
675
+ try:
676
+ mutations = self.initial_data['frameworkUpdates']['entityBatchUpdate']['mutations']
677
+ found = False
678
+ for mutation in mutations:
679
+ if mutation.get('payload', {}).get('macroMarkersListEntity', {}).get('markersList', {}).get(
680
+ 'markerType') == "MARKER_TYPE_TIMESTAMPS":
681
+ key_moments_data = mutation['payload']['macroMarkersListEntity']['markersList']['markers']
682
+ found = True
683
+ break
684
+
685
+ if not found:
686
+ return []
687
+ except (KeyError, IndexError):
688
+ return []
689
+
690
+ result: List[pytubefix.KeyMoment] = []
691
+
692
+ for i, key_moment_data in enumerate(key_moments_data):
693
+ key_moment_start = int(key_moment_data['startMillis']) // 1000
694
+
695
+ if i == len(key_moments_data) - 1:
696
+ key_moment_end = self.length
697
+ else:
698
+ key_moment_end = int(key_moments_data[i + 1]['startMillis']) // 1000
699
+
700
+ result.append(pytubefix.KeyMoment(key_moment_data, key_moment_end - key_moment_start))
701
+
702
+ return result
703
+
704
+ @property
705
+ def replayed_heatmap(self) -> List[Dict[str, float]]:
706
+ """Get a list of : `Dict<str, float>`.
707
+
708
+ :rtype: List[Dict[str, float]]
709
+ """
710
+ try:
711
+ mutations = self.initial_data['frameworkUpdates']['entityBatchUpdate']['mutations']
712
+ found = False
713
+ for mutation in mutations:
714
+ if mutation.get('payload', {}).get('macroMarkersListEntity', {}).get('markersList', {}).get(
715
+ 'markerType') == "MARKER_TYPE_HEATMAP":
716
+ heatmaps_data = mutation['payload']['macroMarkersListEntity']['markersList']['markers']
717
+ found = True
718
+ break
719
+
720
+ if not found:
721
+ return []
722
+ except (KeyError, IndexError):
723
+ return []
724
+
725
+ result: List[Dict[str, float]] = []
726
+
727
+ for heatmap_data in heatmaps_data:
728
+ heatmap_start = int(heatmap_data['startMillis']) / 1000
729
+ duration = int(heatmap_data['durationMillis']) / 1000
730
+
731
+ norm_intensity = float(heatmap_data['intensityScoreNormalized'])
732
+
733
+ result.append({
734
+ "start_seconds": heatmap_start,
735
+ "duration": duration,
736
+ "norm_intensity": norm_intensity
737
+ })
738
+
739
+ return result
740
+
741
+ @property
742
+ def streams(self) -> StreamQuery:
743
+ """Interface to query both adaptive (DASH) and progressive streams.
744
+
745
+ :rtype: :class:`StreamQuery <StreamQuery>`.
746
+ """
747
+ self.check_availability()
748
+ return StreamQuery(self.fmt_streams)
749
+
750
+ @property
751
+ def thumbnail_url(self) -> str:
752
+ """Get the thumbnail url image.
753
+
754
+ :rtype: str
755
+ """
756
+ thumbnail_details = (
757
+ self.vid_info.get("videoDetails", {})
758
+ .get("thumbnail", {})
759
+ .get("thumbnails")
760
+ )
761
+ if thumbnail_details:
762
+ thumbnail_details = thumbnail_details[-1] # last item has max size
763
+ return thumbnail_details["url"]
764
+
765
+ return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
766
+
767
+ @property
768
+ def publish_date(self):
769
+ """Get the publish date.
770
+
771
+ :rtype: datetime
772
+ """
773
+ if self._publish_date:
774
+ return self._publish_date
775
+ self._publish_date = extract.publish_date(self.watch_html)
776
+ return self._publish_date
777
+
778
+ @publish_date.setter
779
+ def publish_date(self, value):
780
+ """Sets the publish date."""
781
+ self._publish_date = value
782
+
783
+ @property
784
+ def title(self) -> str:
785
+ """Get the video title.
786
+
787
+ :rtype: str
788
+ """
789
+ self._author = self.vid_info.get("videoDetails", {}).get(
790
+ "author", "unknown"
791
+ )
792
+
793
+ if self._title:
794
+ return self._title
795
+
796
+ try:
797
+ # Some clients may not return the title in the `player` endpoint,
798
+ # so if it is not found we will look for it in the `next` endpoint
799
+ if 'title' in self.vid_info['videoDetails']:
800
+ self._title = self.vid_info['videoDetails']['title']
801
+ logger.debug('Found title in vid_info')
802
+ else:
803
+ if 'singleColumnWatchNextResults' in self.vid_details['contents']:
804
+ contents = self.vid_details['contents'][
805
+ 'singleColumnWatchNextResults'][
806
+ 'results'][
807
+ 'results'][
808
+ 'contents'][0][
809
+ 'itemSectionRenderer'][
810
+ 'contents'][0]
811
+
812
+ if 'videoMetadataRenderer' in contents:
813
+ self._title = contents['videoMetadataRenderer']['title']['runs'][0]['text']
814
+ else:
815
+ # JSON tree for titles in videos available on YouTube music
816
+ self._title = contents['musicWatchMetadataRenderer']['title']['simpleText']
817
+
818
+ # The type of video with this structure is not yet known.
819
+ # First reported in: https://github.com/JuanBindez/pytubefix/issues/351
820
+ elif 'twoColumnWatchNextResults' in self.vid_details['contents']:
821
+ self._title = self.vid_details['contents'][
822
+ 'twoColumnWatchNextResults'][
823
+ 'results'][
824
+ 'results'][
825
+ 'contents'][0][
826
+ 'videoPrimaryInfoRenderer'][
827
+ 'title'][
828
+ 'runs'][0][
829
+ 'text']
830
+
831
+ logger.debug('Found title in vid_details')
832
+ except KeyError as e:
833
+ # Check_availability will raise the correct exception in most cases
834
+ # if it doesn't, ask for a report.
835
+ self.check_availability()
836
+ raise exceptions.PytubeFixError(
837
+ (
838
+ f'Exception while accessing title of {self.watch_url}. '
839
+ 'Please file a bug report at https://github.com/JuanBindez/pytubefix'
840
+ )
841
+ ) from e
842
+
843
+ return self._title
844
+
845
+ @title.setter
846
+ def title(self, value):
847
+ """Sets the title value."""
848
+ self._title = value
849
+
850
+ @property
851
+ def description(self) -> str:
852
+ """Get the video description.
853
+
854
+ :rtype: str
855
+ """
856
+ return self.vid_info.get("videoDetails", {}).get("shortDescription")
857
+
858
+ @property
859
+ def rating(self) -> float:
860
+ """Get the video average rating.
861
+
862
+ :rtype: float
863
+
864
+ """
865
+ return self.vid_info.get("videoDetails", {}).get("averageRating")
866
+
867
+ @property
868
+ def length(self) -> int:
869
+ """Get the video length in seconds.
870
+
871
+ :rtype: int
872
+ """
873
+ return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
874
+
875
+ @property
876
+ def views(self) -> int:
877
+ """Get the number of the times the video has been viewed.
878
+
879
+ :rtype: int
880
+ """
881
+ return int(self.vid_info.get("videoDetails", {}).get("viewCount", "0"))
882
+
883
+ @property
884
+ def author(self) -> str:
885
+ """Get the video author.
886
+ :rtype: str
887
+ """
888
+ if self._author:
889
+ return self._author
890
+ self._author = self.vid_info.get("videoDetails", {}).get(
891
+ "author", "unknown"
892
+ )
893
+ return self._author
894
+
895
+ @author.setter
896
+ def author(self, value):
897
+ """Set the video author."""
898
+ self._author = value
899
+
900
+ @property
901
+ def keywords(self) -> List[str]:
902
+ """Get the video keywords.
903
+
904
+ :rtype: List[str]
905
+ """
906
+ return self.vid_info.get('videoDetails', {}).get('keywords', [])
907
+
908
+ @property
909
+ def channel_id(self) -> str:
910
+ """Get the video poster's channel id.
911
+
912
+ :rtype: str
913
+ """
914
+ return self.vid_info.get('videoDetails', {}).get('channelId', None)
915
+
916
+ @property
917
+ def channel_url(self) -> str:
918
+ """Construct the channel url for the video's poster from the channel id.
919
+
920
+ :rtype: str
921
+ """
922
+ return f'https://www.youtube.com/channel/{self.channel_id}'
923
+
924
+ @property
925
+ def likes(self):
926
+ """Get the video likes
927
+
928
+ :rtype: str
929
+ """
930
+ try:
931
+ return self.vid_details[
932
+ 'contents'][
933
+ 'twoColumnWatchNextResults'][
934
+ 'results'][
935
+ 'results'][
936
+ 'contents'][
937
+ 0][
938
+ 'videoPrimaryInfoRenderer'][
939
+ 'videoActions'][
940
+ 'menuRenderer'][
941
+ 'topLevelButtons'][
942
+ 0][
943
+ 'segmentedLikeDislikeButtonViewModel'][
944
+ 'likeCountEntity'][
945
+ 'likeCountIfLikedNumber']
946
+ except (KeyError, IndexError):
947
+ return None
948
+
949
+ @property
950
+ def metadata(self) -> Optional[YouTubeMetadata]:
951
+ """Get the metadata for the video.
952
+
953
+ :rtype: YouTubeMetadata
954
+ """
955
+ if not self._metadata:
956
+ self._metadata = extract.metadata(
957
+ self.initial_data) # Creating the metadata
958
+ return self._metadata
959
+
960
+ def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]):
961
+ """Register a download progress callback function post initialization.
962
+
963
+ :param callable func:
964
+ A callback function that takes ``stream``, ``chunk``,
965
+ and ``bytes_remaining`` as parameters.
966
+
967
+ :rtype: None
968
+
969
+ """
970
+ self.stream_monostate.on_progress = func
971
+
972
+ def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]):
973
+ """Register a download complete callback function post initialization.
974
+
975
+ :param callable func:
976
+ A callback function that takes ``stream`` and ``file_path``.
977
+
978
+ :rtype: None
979
+
980
+ """
981
+ self.stream_monostate.on_complete = func
982
+
983
+ @staticmethod
984
+ def from_id(video_id: str) -> "YouTube":
985
+ """Construct a :class:`YouTube <YouTube>` object from a video id.
986
+
987
+ :param str video_id:
988
+ The video id of the YouTube video.
989
+
990
+ :rtype: :class:`YouTube <YouTube>`
991
+ """
992
+ return YouTube(f"https://www.youtube.com/watch?v={video_id}")
pytubefix/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.28 kB). View file
 
pytubefix/__pycache__/__main__.cpython-311.pyc ADDED
Binary file (42.6 kB). View file
 
pytubefix/__pycache__/buffer.cpython-311.pyc ADDED
Binary file (2.91 kB). View file
 
pytubefix/__pycache__/captions.cpython-311.pyc ADDED
Binary file (10.8 kB). View file
 
pytubefix/__pycache__/chapters.cpython-311.pyc ADDED
Binary file (3.18 kB). View file
 
pytubefix/__pycache__/cipher.cpython-311.pyc ADDED
Binary file (9.97 kB). View file
 
pytubefix/__pycache__/cli.cpython-311.pyc ADDED
Binary file (21.2 kB). View file
 
pytubefix/__pycache__/exceptions.cpython-311.pyc ADDED
Binary file (18.3 kB). View file
 
pytubefix/__pycache__/extract.cpython-311.pyc ADDED
Binary file (25.1 kB). View file
 
pytubefix/__pycache__/file_system.cpython-311.pyc ADDED
Binary file (2.54 kB). View file
 
pytubefix/__pycache__/helpers.cpython-311.pyc ADDED
Binary file (17.4 kB). View file
 
pytubefix/__pycache__/info.cpython-311.pyc ADDED
Binary file (1.54 kB). View file
 
pytubefix/__pycache__/innertube.cpython-311.pyc ADDED
Binary file (24.6 kB). View file
 
pytubefix/__pycache__/itags.cpython-311.pyc ADDED
Binary file (4.05 kB). View file
 
pytubefix/__pycache__/jsinterp.cpython-311.pyc ADDED
Binary file (70.1 kB). View file
 
pytubefix/__pycache__/keymoments.cpython-311.pyc ADDED
Binary file (3.19 kB). View file
 
pytubefix/__pycache__/metadata.cpython-311.pyc ADDED
Binary file (2.68 kB). View file
 
pytubefix/__pycache__/monostate.cpython-311.pyc ADDED
Binary file (1.15 kB). View file
 
pytubefix/__pycache__/parser.cpython-311.pyc ADDED
Binary file (6.64 kB). View file
 
pytubefix/__pycache__/protobuf.cpython-311.pyc ADDED
Binary file (8.88 kB). View file
 
pytubefix/__pycache__/query.cpython-311.pyc ADDED
Binary file (24.1 kB). View file
 
pytubefix/__pycache__/request.cpython-311.pyc ADDED
Binary file (10.3 kB). View file
 
pytubefix/__pycache__/streams.cpython-311.pyc ADDED
Binary file (28.7 kB). View file
 
pytubefix/__pycache__/version.cpython-311.pyc ADDED
Binary file (323 Bytes). View file
 
pytubefix/botGuard/.DS_Store ADDED
Binary file (6.15 kB). View file
 
pytubefix/botGuard/__init__.py ADDED
File without changes
pytubefix/botGuard/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (211 Bytes). View file
 
pytubefix/botGuard/__pycache__/bot_guard.cpython-311.pyc ADDED
Binary file (2.82 kB). View file
 
pytubefix/botGuard/bot_guard.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import shutil
5
+ from typing import Optional
6
+
7
+ PLATFORM = sys.platform
8
+
9
+ NODE = 'node' if PLATFORM in ['linux', 'darwin'] else 'node.exe'
10
+
11
+ def _find_node_path() -> Optional[str]:
12
+ """Try multiple ways to find Node.js path."""
13
+ local_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), f'binaries/{NODE}')
14
+ if os.path.isfile(local_path):
15
+ return local_path
16
+
17
+ system_path = shutil.which(NODE)
18
+ if system_path:
19
+ return system_path
20
+
21
+ return NODE
22
+
23
+ NODE_PATH = _find_node_path()
24
+ VM_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vm/botGuard.js')
25
+
26
+ def generate_po_token(visitor_data: str) -> str:
27
+ """
28
+ Run nodejs to generate poToken through botGuard.
29
+
30
+ Raises:
31
+ RuntimeError: If Node.js is not available
32
+ """
33
+ try:
34
+ result = subprocess.check_output(
35
+ [NODE_PATH, VM_PATH, visitor_data],
36
+ stderr=subprocess.PIPE
37
+ ).decode()
38
+ return result.replace("\n", "")
39
+ except FileNotFoundError as e:
40
+ raise RuntimeError(
41
+ f"Node.js is required but not found. Tried path: {NODE_PATH}\n"
42
+ "Please install Node.js or ensure it's in your PATH."
43
+ ) from e
44
+ except subprocess.CalledProcessError as e:
45
+ raise RuntimeError(
46
+ f"Failed to execute botGuard.js: {e.stderr.decode().strip()}"
47
+ ) from e
pytubefix/botGuard/vm/botGuard.js ADDED
The diff for this file is too large to render. See raw diff
 
pytubefix/buffer.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This module implements a `Buffer` class for handling in-memory data storage, downloading streams,
2
+ and redirecting content to standard output (stdout)."""
3
+
4
+ import sys
5
+ import io
6
+
7
+
8
+ class Buffer:
9
+ def __init__(self):
10
+ """
11
+ Initializes the in-memory buffer to store data.
12
+ """
13
+ self.buffer = io.BytesIO()
14
+
15
+ def download_in_buffer(self, source):
16
+ """
17
+ Downloads data directly into the buffer. Accepts objects with the `stream_to_buffer`
18
+ method or strings.
19
+
20
+ Args:
21
+ source: Object or data to be written to the buffer.
22
+ """
23
+ if hasattr(source, 'stream_to_buffer') and callable(source.stream_to_buffer):
24
+ source.stream_to_buffer(self.buffer)
25
+ elif isinstance(source, str):
26
+ self.buffer.write(source.encode('utf-8'))
27
+ else:
28
+ raise TypeError("The provided object is not compatible for downloading into the buffer.")
29
+
30
+ def redirect_to_stdout(self):
31
+ """
32
+ Redirects the buffer's content to stdout.
33
+ """
34
+ self.buffer.seek(0) # Go back to the start of the buffer
35
+ sys.stdout.buffer.write(self.buffer.read())
36
+
37
+ def read(self):
38
+ """
39
+ Reads the buffer's content.
40
+ """
41
+ self.buffer.seek(0)
42
+ return self.buffer.read()
43
+
44
+ def clear(self):
45
+ """
46
+ Clears the buffer for reuse.
47
+ """
48
+ self.buffer = io.BytesIO()
pytubefix/captions.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+ import time
4
+ import json
5
+ import re
6
+ import xml.etree.ElementTree as ElementTree
7
+ from html import unescape
8
+ from typing import Dict, Optional
9
+
10
+ from pytubefix import request
11
+ from pytubefix.helpers import safe_filename, target_directory
12
+
13
+
14
+ class Caption:
15
+ """Container for caption tracks."""
16
+
17
+ def __init__(self, caption_track: Dict):
18
+ """Construct a :class:`Caption <Caption>`.
19
+
20
+ :param dict caption_track:
21
+ Caption track data extracted from ``watch_html``.
22
+ """
23
+ self.url = caption_track.get("baseUrl")
24
+
25
+ # Certain videos have runs instead of simpleText
26
+ # this handles that edge case
27
+ name_dict = caption_track['name']
28
+ if 'simpleText' in name_dict:
29
+ self.name = name_dict['simpleText']
30
+ else:
31
+ for el in name_dict['runs']:
32
+ if 'text' in el:
33
+ self.name = el['text']
34
+
35
+ # Use "vssId" instead of "languageCode", fix issue #779
36
+ self.code = caption_track["vssId"]
37
+ # Remove preceding '.' for backwards compatibility, e.g.:
38
+ # English -> vssId: .en, languageCode: en
39
+ # English (auto-generated) -> vssId: a.en, languageCode: en
40
+ self.code = self.code.strip('.')
41
+
42
+ @property
43
+ def xml_captions(self) -> str:
44
+ """Download the xml caption tracks."""
45
+ return request.get(self.url)
46
+
47
+ @property
48
+ def json_captions(self) -> dict:
49
+ """Download and parse the json caption tracks."""
50
+ if 'ftm=' in self.url:
51
+ json_captions_url = self.url.replace('fmt=srv3', 'fmt=json3')
52
+ else:
53
+ json_captions_url = f'{self.url}&fmt=json3'
54
+ text = request.get(json_captions_url)
55
+ parsed = json.loads(text)
56
+ assert parsed['wireMagic'] == 'pb3', 'Unexpected captions format'
57
+ return parsed
58
+
59
+ def generate_srt_captions(self) -> str:
60
+ """Generate "SubRip Subtitle" captions.
61
+
62
+ Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
63
+ recompiles them into the "SubRip Subtitle" format.
64
+ """
65
+ return self.xml_caption_to_srt(self.xml_captions)
66
+
67
+ def generate_txt_captions(self) -> str:
68
+ """Generate Text captions.
69
+
70
+ Takes the "SubRip Subtitle" format captions and converts them into text
71
+ """
72
+ srt_captions = self.generate_srt_captions()
73
+ lines = srt_captions.splitlines()
74
+ text = ''
75
+ for line in lines:
76
+ if re.search('^[0-9]+$', line) is None and \
77
+ re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and \
78
+ re.search('^$', line) is None:
79
+ text += ' ' + line.strip()
80
+ text = text.lstrip()
81
+ return text.strip()
82
+
83
+ def save_captions(self, filename: str):
84
+ """Generate and save "SubRip Subtitle" captions to a text file.
85
+
86
+ Takes the xml captions from :meth:`~pytubefix.Caption.xml_captions` and
87
+ recompiles them into the "SubRip Subtitle" format and saves it to a text file.
88
+
89
+ :param filename: The name of the file to save the captions.
90
+ """
91
+ srt_captions = self.xml_caption_to_srt(self.xml_captions)
92
+
93
+ with open(filename, 'w', encoding='utf-8') as file:
94
+ file.write(srt_captions)
95
+
96
+ @staticmethod
97
+ def float_to_srt_time_format(d: float) -> str:
98
+ """Convert decimal durations into proper srt format.
99
+
100
+ :rtype: str
101
+ :returns:
102
+ SubRip Subtitle (str) formatted time duration.
103
+
104
+ float_to_srt_time_format(3.89) -> '00:00:03,890'
105
+ """
106
+ fraction, whole = math.modf(d)
107
+ time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
108
+ ms = f"{fraction:.3f}".replace("0.", "")
109
+ return time_fmt + ms
110
+
111
+ def xml_caption_to_srt(self, xml_captions: str) -> str:
112
+ """Convert xml caption tracks to "SubRip Subtitle (srt)".
113
+
114
+ :param str xml_captions:
115
+ XML formatted caption tracks.
116
+ """
117
+ segments = []
118
+ root = ElementTree.fromstring(xml_captions)
119
+
120
+ i = 0
121
+ for child in list(root.iter(root.tag))[0]:
122
+ if child.tag in ['p', 'text']:
123
+ caption = ''
124
+
125
+ # I think it will be faster than `len(list(child)) == 0`
126
+ if not list(child):
127
+ # instead of 'continue'
128
+ caption = child.text
129
+ for s in list(child):
130
+ if s.tag == 's':
131
+ caption += f' {s.text}'
132
+ if not caption:
133
+ continue
134
+ caption = unescape(caption.replace("\n", " ").replace(" ", " "),)
135
+ try:
136
+ if "d" in child.attrib:
137
+ duration = float(child.attrib["d"]) / 1000.0
138
+ else:
139
+ duration = float(child.attrib["dur"])
140
+ except KeyError:
141
+ duration = 0.0
142
+
143
+ if "t" in child.attrib:
144
+ start = float(child.attrib["t"]) / 1000.0
145
+ else:
146
+ start = float(child.attrib["start"])
147
+
148
+ end = start + duration
149
+ sequence_number = i + 1 # convert from 0-indexed to 1.
150
+ line = "{seq}\n{start} --> {end}\n{text}\n".format(
151
+ seq=sequence_number,
152
+ start=self.float_to_srt_time_format(start),
153
+ end=self.float_to_srt_time_format(end),
154
+ text=caption,
155
+ )
156
+ segments.append(line)
157
+ i += 1
158
+ return "\n".join(segments).strip()
159
+
160
+ def download(
161
+ self,
162
+ title: str,
163
+ srt: bool = True,
164
+ output_path: Optional[str] = None,
165
+ filename_prefix: Optional[str] = None,
166
+ ) -> str:
167
+ """Write the media stream to disk.
168
+
169
+ :param title:
170
+ Output filename (stem only) for writing media file.
171
+ If one is not specified, the default filename is used.
172
+ :type title: str
173
+ :param srt:
174
+ Set to True to download srt, false to download xml. Defaults to True.
175
+ :type srt bool
176
+ :param output_path:
177
+ (optional) Output path for writing media file. If one is not
178
+ specified, defaults to the current working directory.
179
+ :type output_path: str or None
180
+ :param filename_prefix:
181
+ (optional) A string that will be prepended to the filename.
182
+ For example a number in a playlist or the name of a series.
183
+ If one is not specified, nothing will be prepended
184
+ This is separate from filename so you can use the default
185
+ filename but still add a prefix.
186
+ :type filename_prefix: str or None
187
+
188
+ :rtype: str
189
+ """
190
+ if title.endswith(".srt") or title.endswith(".xml"):
191
+ filename = ".".join(title.split(".")[:-1])
192
+ else:
193
+ filename = title
194
+
195
+ if filename_prefix:
196
+ filename = f"{safe_filename(filename_prefix)}{filename}"
197
+
198
+ filename = safe_filename(filename)
199
+
200
+ filename += f" ({self.code})"
201
+ filename += ".srt" if srt else ".xml"
202
+
203
+ file_path = os.path.join(target_directory(output_path), filename)
204
+
205
+ with open(file_path, "w", encoding="utf-8") as file_handle:
206
+ if srt:
207
+ file_handle.write(self.generate_srt_captions())
208
+ else:
209
+ file_handle.write(self.xml_captions)
210
+
211
+ return file_path
212
+
213
+ def __repr__(self):
214
+ """Printable object representation."""
215
+ return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)
pytubefix/chapters.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Native python imports
2
+ from datetime import timedelta
3
+ from typing import List
4
+
5
+
6
+ class ChapterThumbnail:
7
+ """Container for chapter thumbnails."""
8
+
9
+ def __init__(self, width: int, height: int, url: str):
10
+ self.width = width
11
+ self.height = height
12
+ self.url = url
13
+
14
+ def __repr__(self):
15
+ return f'<pytubefix.chapters.ChapterThumbnail: width={self.width}, height={self.height}, url={self.url}>'
16
+
17
+
18
+ class Chapter:
19
+ """Container for chapters tracks."""
20
+ title: str
21
+ start_seconds: int
22
+ duration: int # in seconds
23
+ thumbnails: List[ChapterThumbnail]
24
+
25
+ def __init__(self, chapter_data: dict, duration: int):
26
+ data = chapter_data['chapterRenderer']
27
+
28
+ self.title = data['title']['simpleText']
29
+ self.start_seconds = int(data['timeRangeStartMillis'] / 1000)
30
+ self.duration = duration
31
+
32
+ thumbnails_data = data.get('thumbnail', {}).get('thumbnails', [])
33
+ self.thumbnails = [
34
+ ChapterThumbnail(
35
+ width=thumb['width'],
36
+ height=thumb['height'],
37
+ url=thumb['url']
38
+ )
39
+ for thumb in thumbnails_data
40
+ ]
41
+
42
+ @property
43
+ def start_label(self) -> str:
44
+ return str(timedelta(seconds=self.start_seconds))
45
+
46
+ def __repr__(self):
47
+ return f'<Chapter: {self.title} | {self.start_label}>'
pytubefix/cipher.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module contains all the logic needed to find the signature functions.
3
+
4
+ YouTube's strategy to restrict downloading videos is to send a ciphered version
5
+ of the signature to the client, along with the decryption algorithm obfuscated
6
+ in JavaScript. For the clients to play the videos, JavaScript must take the
7
+ ciphered version, cycle it through a series of "transform functions," and then
8
+ signs the media URL with the output.
9
+
10
+ This module is responsible for (1) finding these "transformations
11
+ functions" (2) sends them to be interpreted by jsinterp.py
12
+ """
13
+ import logging
14
+ import re
15
+
16
+ from pytubefix.exceptions import RegexMatchError, InterpretationError
17
+ from pytubefix.jsinterp import JSInterpreter, extract_player_js_global_var
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class Cipher:
23
+ def __init__(self, js: str, js_url: str):
24
+
25
+ self.js_url = js_url
26
+
27
+ self.signature_function_name = get_initial_function_name(js, js_url)
28
+ self.throttling_function_name = get_throttling_function_name(js, js_url)
29
+
30
+ self.calculated_n = None
31
+
32
+ self.js_interpreter = JSInterpreter(js)
33
+
34
+ def get_throttling(self, n: str):
35
+ """Interpret the function that throttles download speed.
36
+ :param str n:
37
+ Contains the parameter that must be transformed.
38
+ :rtype: str
39
+ :returns:
40
+ Returns the transformed value "n".
41
+ """
42
+ try:
43
+ return self.js_interpreter.call_function(self.throttling_function_name, n)
44
+ except:
45
+ raise InterpretationError(js_url=self.js_url)
46
+
47
+ def get_signature(self, ciphered_signature: str) -> str:
48
+ """interprets the function that signs the streams.
49
+ The lack of this signature generates the 403 forbidden error.
50
+ :param str ciphered_signature:
51
+ Contains the signature that must be transformed.
52
+ :rtype: str
53
+ :returns:
54
+ Returns the correct stream signature.
55
+ """
56
+ try:
57
+ return self.js_interpreter.call_function(self.signature_function_name, ciphered_signature)
58
+ except:
59
+ raise InterpretationError(js_url=self.js_url)
60
+
61
+
62
+ def get_initial_function_name(js: str, js_url: str) -> str:
63
+ """Extract the name of the function responsible for computing the signature.
64
+ :param str js:
65
+ The contents of the base.js asset file.
66
+ :param str js_url:
67
+ Full base.js url
68
+ :rtype: str
69
+ :returns:
70
+ Function name from regex match
71
+ """
72
+
73
+ function_patterns = [
74
+ r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*[a-zA-Z0-9_\$\"\[\]]+\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*[a-zA-Z0-9_\$\"\[\]]+\s*\)',
75
+ r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
76
+ r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
77
+ # Old patterns
78
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
79
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
80
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
81
+ # Obsolete patterns
82
+ r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
83
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
84
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
85
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
86
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
87
+ ]
88
+ logger.debug("looking for signature cipher name")
89
+ for pattern in function_patterns:
90
+ regex = re.compile(pattern)
91
+ function_match = regex.search(js)
92
+ if function_match:
93
+ sig = function_match.group('sig')
94
+ logger.debug("finished regex search, matched: %s", pattern)
95
+ logger.debug(f'Signature cipher function name: {sig}')
96
+ return sig
97
+
98
+ raise RegexMatchError(
99
+ caller="get_initial_function_name", pattern=f"multiple in {js_url}"
100
+ )
101
+
102
+
103
+ def get_throttling_function_name(js: str, js_url: str) -> str:
104
+ """Extract the name of the function that computes the throttling parameter.
105
+
106
+ :param str js:
107
+ The contents of the base.js asset file.
108
+ :param str js_url:
109
+ Full base.js url
110
+ :rtype: str
111
+ :returns:
112
+ The name of the function used to compute the throttling parameter.
113
+ """
114
+
115
+ logger.debug("looking for nsig name")
116
+ try:
117
+ # Extracts the function name based on the global array
118
+ global_obj, varname, code = extract_player_js_global_var(js)
119
+ if global_obj and varname and code:
120
+ logger.debug(f"Global Obj name is: {varname}")
121
+ global_obj = JSInterpreter(js).interpret_expression(code, {}, 100)
122
+ logger.debug("Successfully interpreted global object")
123
+ for k, v in enumerate(global_obj):
124
+ if v.endswith('_w8_'):
125
+ logger.debug(f"_w8_ found in index {k}")
126
+ pattern = r'''(?xs)
127
+ [;\n](?:
128
+ (?P<f>function\s+)|
129
+ (?:var\s+)?
130
+ )(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
131
+ \((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
132
+ (?:(?!\};(?![\]\)])).)+
133
+ \}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
134
+ \{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
135
+ ''' % (re.escape(varname), k)
136
+ func_name = re.search(pattern, js)
137
+ if func_name:
138
+ n_func = func_name.group("funcname")
139
+ logger.debug(f"Nfunc name is: {n_func}")
140
+ return n_func
141
+ except:
142
+ pass
143
+
144
+ pattern = r'''(?x)
145
+ (?:
146
+ \.get\("n"\)\)&&\(b=|
147
+ (?:
148
+ b=String\.fromCharCode\(110\)|
149
+ (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
150
+ )
151
+ (?:
152
+ ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
153
+ (?:
154
+ get\(b\)|
155
+ [a-zA-Z0-9_$]+\[b\]\|\|null
156
+ )\)&&\(c=|
157
+ \b(?P<var>[a-zA-Z0-9_$]+)=
158
+ )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
159
+ (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))'''
160
+
161
+ logger.debug('Finding throttling function name')
162
+
163
+ regex = re.compile(pattern)
164
+ function_match = regex.search(js)
165
+ if function_match:
166
+ logger.debug("finished regex search, matched: %s", pattern)
167
+
168
+ func = function_match.group('nfunc')
169
+ idx = function_match.group('idx')
170
+
171
+ logger.debug(f'func is: {func}')
172
+ logger.debug(f'idx is: {idx}')
173
+
174
+ logger.debug('Checking throttling function name')
175
+ if idx:
176
+ n_func_check_pattern = fr'var {re.escape(func)}\s*=\s*\[(.+?)];'
177
+ n_func_found = re.search(n_func_check_pattern, js)
178
+
179
+ if n_func_found:
180
+ throttling_function = n_func_found.group(1)
181
+ logger.debug(f'Throttling function name is: {throttling_function}')
182
+ return throttling_function
183
+
184
+ raise RegexMatchError(
185
+ caller="get_throttling_function_name", pattern=f"{n_func_check_pattern} in {js_url}"
186
+ )
187
+
188
+ raise RegexMatchError(
189
+ caller="get_throttling_function_name", pattern=f"{pattern} in {js_url}"
190
+ )
pytubefix/cli.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import argparse
3
+ import gzip
4
+ import json
5
+ import logging
6
+ import os
7
+ import shutil
8
+ import sys
9
+ import datetime as dt
10
+ import subprocess # nosec
11
+ from typing import List, Optional
12
+
13
+ import pytubefix.exceptions as exceptions
14
+ from pytubefix import __version__
15
+ from pytubefix import CaptionQuery, Playlist, Stream
16
+ from pytubefix.helpers import safe_filename, setup_logger
17
+ from pytubefix import YouTube
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ def build_playback_report(youtube: YouTube) -> None:
22
+ """Serialize the request data to json for offline debugging.
23
+
24
+ :param YouTube youtube:
25
+ A YouTube object.
26
+ """
27
+ ts = int(dt.datetime.now(dt.timezone.utc).timestamp())
28
+ fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
29
+
30
+ js = youtube.js
31
+ watch_html = youtube.watch_html
32
+ vid_info = youtube.vid_info
33
+
34
+ with gzip.open(fp, "wb") as fh:
35
+ fh.write(
36
+ json.dumps(
37
+ {
38
+ "url": youtube.watch_url,
39
+ "js": js,
40
+ "watch_html": watch_html,
41
+ "video_info": vid_info,
42
+ }
43
+ ).encode("utf8"),
44
+ )
45
+
46
+ def display_progress_bar(bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55) -> None:
47
+ """Display a simple, pretty progress bar.
48
+
49
+ Example:
50
+ ~~~~~~~~
51
+ PSY - GANGNAM STYLE(강남스타일) MV.mp4
52
+ ↳ |███████████████████████████████████████| 100.0%
53
+
54
+ :param int bytes_received:
55
+ The delta between the total file size (bytes) and bytes already
56
+ written to disk.
57
+ :param int filesize:
58
+ File size of the media stream in bytes.
59
+ :param str ch:
60
+ Character to use for presenting progress segment.
61
+ :param float scale:
62
+ Scale multiplier to reduce progress bar size.
63
+ """
64
+ columns = shutil.get_terminal_size().columns
65
+ max_width = int(columns * scale)
66
+
67
+ filled = int(round(max_width * bytes_received / float(filesize)))
68
+ remaining = max_width - filled
69
+ progress_bar = ch * filled + " " * remaining
70
+ percent = round(100.0 * bytes_received / float(filesize), 1)
71
+ text = f" ↳ |{progress_bar}| {percent}%\r"
72
+ sys.stdout.write(text)
73
+ sys.stdout.flush()
74
+
75
+ def on_progress(stream: Stream, chunk: bytes, bytes_remaining: int) -> None: # pylint: disable=W0613
76
+ filesize = stream.filesize
77
+ bytes_received = filesize - bytes_remaining
78
+ display_progress_bar(bytes_received, filesize)
79
+
80
+ def _download(stream: Stream, target: Optional[str] = None, filename: Optional[str] = None) -> None:
81
+ filesize_megabytes = stream.filesize // 1048576
82
+ print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
83
+ file_path = stream.get_file_path(filename=filename, output_path=target)
84
+ if stream.exists_at_path(file_path):
85
+ print(f"Already downloaded at:\n{file_path}")
86
+ return
87
+
88
+ stream.download(output_path=target, filename=filename)
89
+ sys.stdout.write("\n")
90
+
91
+ def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
92
+ """
93
+ Given a base name, the file format, and the target directory, will generate
94
+ a filename unique for that directory and file format.
95
+
96
+ :param str base:
97
+ The given base-name.
98
+ :param str subtype:
99
+ The filetype of the video which will be downloaded.
100
+ :param str media_type:
101
+ The media_type of the file, ie. "audio" or "video"
102
+ :param Path target:
103
+ Target directory for download.
104
+ """
105
+ counter = 0
106
+ while True:
107
+ file_name = f"{base}_{media_type}_{counter}"
108
+ file_path = os.path.join(target, f"{file_name}.{subtype}")
109
+ if not os.path.exists(file_path):
110
+ return file_name
111
+ counter += 1
112
+
113
+ def ffmpeg_process(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
114
+ """
115
+ Decides the correct video stream to download, then calls _ffmpeg_downloader.
116
+
117
+ :param YouTube youtube:
118
+ A valid YouTube object.
119
+ :param str resolution:
120
+ YouTube video resolution.
121
+ :param str target:
122
+ Target directory for download
123
+ """
124
+ youtube.register_on_progress_callback(on_progress)
125
+ target = target or os.getcwd()
126
+
127
+ if resolution == None or resolution == "best":
128
+ highest_quality_stream = youtube.streams.filter(progressive=False).order_by("resolution").last()
129
+ mp4_stream = youtube.streams.filter(progressive=False, subtype="mp4").order_by("resolution").last()
130
+ if highest_quality_stream.resolution == mp4_stream.resolution:
131
+ video_stream = mp4_stream
132
+ else:
133
+ video_stream = highest_quality_stream
134
+ else:
135
+ video_stream = youtube.streams.filter(progressive=False, resolution=resolution).first()
136
+
137
+ if not video_stream:
138
+ print(f"No streams found for resolution {resolution}")
139
+ return
140
+
141
+ audio_stream = youtube.streams.filter(progressive=False).order_by("abr").last()
142
+
143
+ video_file_name = _unique_name(youtube.title, "mp4", "video", target)
144
+ audio_file_name = _unique_name(youtube.title, "mp4", "audio", target)
145
+
146
+ video_path = video_stream.get_file_path(filename=video_file_name, output_path=target)
147
+ audio_path = audio_stream.get_file_path(filename=audio_file_name, output_path=target)
148
+
149
+ if os.path.exists(video_path) and os.path.exists(audio_path):
150
+ print("Already downloaded both video and audio.")
151
+ return
152
+
153
+ _download(video_stream, target=target, filename=video_file_name)
154
+ _download(audio_stream, target=target, filename=audio_file_name)
155
+
156
+ # Construct the command to run ffmpeg
157
+ command = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-strict", "experimental", f"{target}/{youtube.title}.mp4"]
158
+
159
+ # Execute the command
160
+ subprocess.run(command)
161
+
162
+ def download_by_resolution(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
163
+ """Download a stream by the specified resolution.
164
+
165
+ :param YouTube youtube:
166
+ A valid YouTube object.
167
+ :param str resolution:
168
+ The desired resolution of the stream.
169
+ :param Optional[str] target:
170
+ The target directory for the download.
171
+ """
172
+ print(f"Downloading {resolution}...")
173
+ stream = youtube.streams.filter(resolution=resolution).first()
174
+ if stream is None:
175
+ print(f"No stream found for resolution {resolution}")
176
+ else:
177
+ _download(stream, target)
178
+
179
+ def download_audio(youtube: YouTube, filetype: Optional[str] = "mp4", target: Optional[str] = None) -> None:
180
+ """Download audio stream of a YouTube video.
181
+
182
+ :param YouTube youtube:
183
+ A valid YouTube object.
184
+ :param Optional[str] filetype:
185
+ The filetype for the audio. Defaults to "mp4".
186
+ :param Optional[str] target:
187
+ The target directory for the download.
188
+ """
189
+ print("Downloading audio...")
190
+ stream = youtube.streams.filter(progressive=False, subtype=filetype).order_by("abr").last()
191
+ if stream is None:
192
+ print(f"No audio stream found for filetype {filetype}")
193
+ else:
194
+ _download(stream, target)
195
+
196
+ def download_highest_resolution_progressive(youtube: YouTube, resolution: str, target: Optional[str] = None) -> None:
197
+ """Download a YouTube video stream at the highest resolution.
198
+
199
+ :param YouTube youtube:
200
+ A valid YouTube object.
201
+ :param str resolution:
202
+ The resolution of the stream.
203
+ :param Optional[str] target:
204
+ The target directory for the download.
205
+ """
206
+ print("Downloading highest resolution progressive stream...")
207
+ stream = youtube.streams.filter(progressive=True).order_by("resolution").last()
208
+ if stream is None:
209
+ print("No progressive stream found.")
210
+ else:
211
+ _download(stream, target)
212
+
213
+ def download_by_itag(youtube: YouTube, itag: int, target: Optional[str] = None) -> None:
214
+ """Download a YouTube stream by its itag.
215
+
216
+ :param YouTube youtube:
217
+ A valid YouTube object.
218
+ :param int itag:
219
+ The itag of the desired stream.
220
+ :param Optional[str] target:
221
+ The target directory for the download.
222
+ """
223
+ stream = youtube.streams.get_by_itag(itag)
224
+ if stream is None:
225
+ print(f"No stream found with itag {itag}.")
226
+ else:
227
+ print(f"Downloading stream with itag {itag}...")
228
+ _download(stream, target)
229
+
230
+ def download_caption(youtube: YouTube, lang_code: str, target: Optional[str] = None) -> None:
231
+ """Download captions for a given YouTube video.
232
+
233
+ :param YouTube youtube:
234
+ A valid YouTube object.
235
+ :param str lang_code:
236
+ The language code for the desired captions.
237
+ :param Optional[str] target:
238
+ The target directory for the downloaded captions.
239
+ """
240
+ print(f"Downloading captions for language: {lang_code}...")
241
+ caption = youtube.captions.get_by_language_code(lang_code)
242
+ if caption is None:
243
+ print(f"No captions found for language code: {lang_code}.")
244
+ else:
245
+ caption.download(target)
246
+
247
+ def _print_available_captions(captions: List[CaptionQuery]) -> None:
248
+ """Print available captions for a YouTube video.
249
+
250
+ :param List[CaptionQuery] captions:
251
+ The list of available captions.
252
+ """
253
+ print("Available captions:")
254
+ for caption in captions:
255
+ print(f" - {caption.language_code}: {caption.name}")
256
+
257
+ def display_streams(youtube: YouTube) -> None:
258
+ """Display available streams for the given YouTube video.
259
+
260
+ :param YouTube youtube:
261
+ A valid YouTube object.
262
+ """
263
+ print(f"Available streams for {youtube.title}:")
264
+ for stream in youtube.streams:
265
+ print(f" - {stream}")
266
+
267
+
268
+ def _parse_args(parser: argparse.ArgumentParser, args: Optional[List] = None) -> argparse.Namespace:
269
+ parser.add_argument("url", help="The YouTube /watch or /playlist url", nargs="?")
270
+ parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}")
271
+ parser.add_argument("--itag", type=int, help="The itag for the desired stream")
272
+ parser.add_argument("-r", "--resolution", type=str, help="The resolution for the desired stream")
273
+ parser.add_argument("-l", "--list", action="store_true", help="The list option causes pytubefix cli to return a list of streams available to download")
274
+ parser.add_argument("--oauth", action="store_true", help="use oauth token")
275
+ parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="Set logger output to verbose output.")
276
+ parser.add_argument("--logfile", action="store", help="logging debug and error messages into a log file")
277
+ parser.add_argument("--build-playback-report", action="store_true", help="Save the html and js to disk")
278
+ parser.add_argument("-c", "--caption-code", type=str, help="Download srt captions for given language code. Prints available language codes if no argument given")
279
+ parser.add_argument('-lc', '--list-captions', action='store_true', help="List available caption codes for a video")
280
+ parser.add_argument("-t", "--target", help="The output directory for the downloaded stream. Default is current working directory")
281
+ parser.add_argument("-a", "--audio", const="mp4", nargs="?", help="Download the audio for a given URL at the highest bitrate available. Defaults to mp4 format if none is specified")
282
+ parser.add_argument("-f", "--ffmpeg", const="best", nargs="?", help="Downloads the audio and video stream for resolution provided. If no resolution is provided, downloads the best resolution. Runs the command line program ffmpeg to combine the audio and video")
283
+
284
+ return parser.parse_args(args)
285
+
286
+ def _perform_args_on_youtube(youtube: YouTube, args: argparse.Namespace) -> None:
287
+ if len(sys.argv) == 2:
288
+ download_highest_resolution_progressive(youtube=youtube, resolution="highest", target=args.target)
289
+
290
+ if args.list_captions:
291
+ _print_available_captions(youtube.captions)
292
+ if args.list:
293
+ display_streams(youtube)
294
+
295
+ if args.itag:
296
+ download_by_itag(youtube=youtube, itag=args.itag, target=args.target)
297
+ elif args.caption_code:
298
+ download_caption(youtube=youtube, lang_code=args.caption_code, target=args.target)
299
+ elif args.resolution:
300
+ download_by_resolution(youtube=youtube, resolution=args.resolution, target=args.target)
301
+ elif args.audio:
302
+ download_audio(youtube=youtube, filetype=args.audio, target=args.target)
303
+
304
+ if args.ffmpeg:
305
+ ffmpeg_process(youtube=youtube, resolution=args.resolution, target=args.target)
306
+
307
+ if args.build_playback_report:
308
+ build_playback_report(youtube)
309
+
310
+ oauth = False
311
+ cache = False
312
+
313
+ if args.oauth:
314
+ oauth = True
315
+ cache = True
316
+
317
+ print("Loading video...")
318
+ youtube = YouTube(args.url, use_oauth=oauth, allow_oauth_cache=cache)
319
+
320
+ download_highest_resolution_progressive(youtube=youtube, resolution="highest", target=args.target)
321
+
322
+
323
+ def main():
324
+ parser = argparse.ArgumentParser(description=main.__doc__)
325
+ args = _parse_args(parser)
326
+
327
+ log_filename = args.logfile if args.verbose else None
328
+ setup_logger(logging.DEBUG if args.verbose else logging.INFO, log_filename=log_filename)
329
+
330
+ if args.verbose:
331
+ logger.debug(f'Pytubefix version: {__version__}')
332
+
333
+ if not args.url or "youtu" not in args.url:
334
+ parser.print_help()
335
+ sys.exit(0)
336
+
337
+ if "/playlist" in args.url:
338
+ print("Loading playlist...")
339
+ playlist = Playlist(args.url)
340
+ args.target = args.target or safe_filename(playlist.title)
341
+
342
+ for youtube_video in playlist.videos:
343
+ try:
344
+ _perform_args_on_youtube(youtube_video, args)
345
+ except exceptions.PytubeFixError as e:
346
+ print(f"There was an error with video: {youtube_video}")
347
+ print(e)
348
+
349
+ else:
350
+ print("Loading video...")
351
+ youtube = YouTube(args.url)
352
+ _perform_args_on_youtube(youtube, args)
353
+
354
+ if __name__ == "__main__":
355
+ main()
pytubefix/contrib/__init__.py ADDED
File without changes
pytubefix/contrib/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (210 Bytes). View file
 
pytubefix/contrib/__pycache__/channel.cpython-311.pyc ADDED
Binary file (29.9 kB). View file
 
pytubefix/contrib/__pycache__/playlist.cpython-311.pyc ADDED
Binary file (23.4 kB). View file
 
pytubefix/contrib/__pycache__/search.cpython-311.pyc ADDED
Binary file (22 kB). View file
 
pytubefix/contrib/channel.py ADDED
@@ -0,0 +1,655 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Module for interacting with a user's youtube channel."""
3
+ import json
4
+ import logging
5
+ from typing import Dict, List, Optional, Tuple, Iterable, Any, Callable
6
+
7
+ from pytubefix import extract, YouTube, Playlist, request
8
+ from pytubefix.helpers import cache, uniqueify, DeferredGeneratorList
9
+ from pytubefix.innertube import InnerTube
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class Channel(Playlist):
15
+ def __init__(
16
+ self,
17
+ url: str,
18
+ client: str = InnerTube().client_name,
19
+ proxies: Optional[Dict[str, str]] = None,
20
+ use_oauth: bool = False,
21
+ allow_oauth_cache: bool = True,
22
+ token_file: Optional[str] = None,
23
+ oauth_verifier: Optional[Callable[[str, str], None]] = None,
24
+ use_po_token: Optional[bool] = False,
25
+ po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
26
+ ):
27
+ """Construct a :class:`Channel <Channel>`.
28
+ :param str url:
29
+ A valid YouTube channel URL.
30
+ :param dict proxies:
31
+ (Optional) A dict mapping protocol to proxy address which will be used by pytube.
32
+ :param bool use_oauth:
33
+ (Optional) Prompt the user to authenticate to YouTube.
34
+ If allow_oauth_cache is set to True, the user should only be prompted once.
35
+ :param bool allow_oauth_cache:
36
+ (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
37
+ These tokens are only generated if use_oauth is set to True as well.
38
+ :param str token_file:
39
+ (Optional) Path to the file where the OAuth tokens will be stored.
40
+ Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
41
+ :param Callable oauth_verifier:
42
+ (optional) Verifier to be used for getting OAuth tokens.
43
+ Verification URL and User-Code will be passed to it respectively.
44
+ (if passed, else default verifier will be used)
45
+ :param bool use_po_token:
46
+ (Optional) Prompt the user to use the proof of origin token on YouTube.
47
+ It must be sent with the API along with the linked visitorData and
48
+ then passed as a `po_token` query parameter to affected clients.
49
+ If allow_oauth_cache is set to True, the user should only be prompted once.
50
+ :param Callable po_token_verifier:
51
+ (Optional) Verified used to obtain the visitorData and po_token.
52
+ The verifier will return the visitorData and po_token respectively.
53
+ (if passed, else default verifier will be used)
54
+ """
55
+ super().__init__(url, proxies)
56
+
57
+ self.channel_uri = extract.channel_name(url)
58
+
59
+ self.client = client
60
+ self.use_oauth = use_oauth
61
+ self.allow_oauth_cache = allow_oauth_cache
62
+ self.token_file = token_file
63
+ self.oauth_verifier = oauth_verifier
64
+
65
+ self.use_po_token = use_po_token
66
+ self.po_token_verifier = po_token_verifier
67
+
68
+ self.channel_url = (
69
+ f"https://www.youtube.com{self.channel_uri}"
70
+ )
71
+
72
+ self.featured_url = self.channel_url + '/featured'
73
+ self.videos_url = self.channel_url + '/videos'
74
+ self.shorts_url = self.channel_url + '/shorts'
75
+ self.live_url = self.channel_url + '/streams'
76
+ self.releases_url = self.channel_url + '/releases'
77
+ self.playlists_url = self.channel_url + '/playlists'
78
+ self.community_url = self.channel_url + '/community'
79
+ self.featured_channels_url = self.channel_url + '/channels'
80
+ self.about_url = self.channel_url + '/about'
81
+
82
+ self._html_url = self.videos_url # Videos will be preferred over short videos and live
83
+
84
+ # Possible future additions
85
+ self._playlists_html = None
86
+ self._community_html = None
87
+ self._featured_channels_html = None
88
+ self._about_html = None
89
+
90
+ def __repr__(self) -> str:
91
+ return f'<pytubefix.contrib.Channel object: channelUri={self.channel_uri}>'
92
+
93
+ @property
94
+ def channel_name(self):
95
+ """Get the name of the YouTube channel.
96
+
97
+ :rtype: str
98
+ """
99
+ return self.initial_data['metadata']['channelMetadataRenderer']['title']
100
+
101
+ @property
102
+ def channel_id(self):
103
+ """Get the ID of the YouTube channel.
104
+
105
+ This will return the underlying ID, not the vanity URL.
106
+
107
+ :rtype: str
108
+ """
109
+ return self.initial_data['metadata']['channelMetadataRenderer']['externalId']
110
+
111
+ @property
112
+ def vanity_url(self):
113
+ """Get the vanity URL of the YouTube channel.
114
+
115
+ Returns None if it doesn't exist.
116
+
117
+ :rtype: str
118
+ """
119
+ return self.initial_data['metadata']['channelMetadataRenderer'].get('vanityChannelUrl', None) # noqa:E501
120
+
121
+ @property
122
+ def html_url(self):
123
+ """Get the html url.
124
+
125
+ :rtype: str
126
+ """
127
+ return self._html_url
128
+
129
+ @html_url.setter
130
+ def html_url(self, value):
131
+ """Set the html url and clear the cache."""
132
+ if self._html_url != value:
133
+ self._html = None
134
+ self._initial_data = None
135
+ self.__class__.video_urls.fget.cache_clear()
136
+ self._html_url = value
137
+
138
+ @property
139
+ def html(self):
140
+ """Get the html for the /videos, /shorts or /streams page.
141
+
142
+ :rtype: str
143
+ """
144
+ if self._html:
145
+ return self._html
146
+ self._html = request.get(self.html_url)
147
+ return self._html
148
+
149
+ @property
150
+ def playlists_html(self):
151
+ """Get the html for the /playlists page.
152
+
153
+ Currently unused for any functionality.
154
+
155
+ :rtype: str
156
+ """
157
+ if self._playlists_html:
158
+ return self._playlists_html
159
+ else:
160
+ self._playlists_html = request.get(self.playlists_url)
161
+ return self._playlists_html
162
+
163
+ @property
164
+ def community_html(self):
165
+ """Get the html for the /community page.
166
+
167
+ Currently unused for any functionality.
168
+
169
+ :rtype: str
170
+ """
171
+ if self._community_html:
172
+ return self._community_html
173
+ else:
174
+ self._community_html = request.get(self.community_url)
175
+ return self._community_html
176
+
177
+ @property
178
+ def featured_channels_html(self):
179
+ """Get the html for the /channels page.
180
+
181
+ Currently unused for any functionality.
182
+
183
+ :rtype: str
184
+ """
185
+ if self._featured_channels_html:
186
+ return self._featured_channels_html
187
+ else:
188
+ self._featured_channels_html = request.get(self.featured_channels_url)
189
+ return self._featured_channels_html
190
+
191
+ @property
192
+ def about_html(self):
193
+ """Get the html for the /about page.
194
+
195
+ Currently unused for any functionality.
196
+
197
+ :rtype: str
198
+ """
199
+ if self._about_html:
200
+ return self._about_html
201
+ else:
202
+ self._about_html = request.get(self.about_url)
203
+ return self._about_html
204
+
205
+ def url_generator(self):
206
+ """Generator that yields video URLs.
207
+
208
+ :Yields: Video URLs
209
+ """
210
+ for page in self._paginate(self.html):
211
+ for obj in page:
212
+ yield obj
213
+
214
+ def videos_generator(self):
215
+ for url in self.video_urls:
216
+ yield url
217
+
218
+ def _get_active_tab(self, initial_data) -> dict:
219
+ """ Receive the raw json and return the active page.
220
+
221
+ :returns: Active page json object.
222
+ """
223
+ active_tab = {}
224
+ # Possible tabs: Home, Videos, Shorts, Live, Releases, Playlists, Community, Channels, About
225
+ # We check each page for the URL that is active.
226
+ for tab in initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
227
+ if 'tabRenderer' in tab:
228
+ tab_url = tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
229
+ if tab_url.rsplit('/', maxsplit=1)[-1] == self.html_url.rsplit('/', maxsplit=1)[-1]:
230
+ active_tab = tab
231
+ break
232
+ return active_tab
233
+
234
+ def _extract_obj_from_home(self) -> list:
235
+ """ Extract items from the channel home page.
236
+
237
+ :returns: list of home page objects.
238
+ """
239
+ items = []
240
+ try:
241
+ contents = self._get_active_tab(self.initial_data)['tabRenderer']['content'][
242
+ 'sectionListRenderer']['contents']
243
+
244
+ for obj in contents:
245
+ item_section_renderer = obj['itemSectionRenderer']['contents'][0]
246
+
247
+ # Skip the presentation videos for non-subscribers
248
+ if 'channelVideoPlayerRenderer' in item_section_renderer:
249
+ continue
250
+
251
+ # Skip presentation videos for subscribers
252
+ if 'channelFeaturedContentRenderer' in item_section_renderer:
253
+ continue
254
+
255
+ # skip the list with channel members
256
+ if 'recognitionShelfRenderer' in item_section_renderer:
257
+ continue
258
+
259
+ # Get the horizontal shorts
260
+ if 'reelShelfRenderer' in item_section_renderer:
261
+ for x in item_section_renderer['reelShelfRenderer']['items']:
262
+ items.append(x)
263
+
264
+ # Get videos, playlist and horizontal channels
265
+ if 'shelfRenderer' in item_section_renderer:
266
+ # We only take items that are horizontal
267
+ if 'horizontalListRenderer' in item_section_renderer['shelfRenderer']['content']:
268
+ # We iterate over each item in the array, which could be videos, playlist or channel
269
+ for x in item_section_renderer['shelfRenderer']['content']['horizontalListRenderer']['items']:
270
+ items.append(x)
271
+
272
+ except (KeyError, IndexError, TypeError):
273
+ return []
274
+
275
+ # Extract object from each corresponding url
276
+ items_obj = self._extract_ids(items)
277
+
278
+ # remove duplicates
279
+ return uniqueify(items_obj)
280
+
281
+ def _extract_videos(self, raw_json: str, context: Optional[Any] = None) -> Tuple[List[str], Optional[str]]:
282
+ """Extracts videos from a raw json page
283
+
284
+ :param str raw_json: Input json extracted from the page or the last
285
+ server response
286
+ :rtype: Tuple[List[str], Optional[str]]
287
+ :returns: Tuple containing a list of up to 100 video watch ids and
288
+ a continuation token, if more videos are available
289
+ """
290
+
291
+ if isinstance(raw_json, dict):
292
+ initial_data = raw_json
293
+ else:
294
+ initial_data = json.loads(raw_json)
295
+ # this is the json tree structure, if the json was extracted from
296
+ # html
297
+ try:
298
+ active_tab = self._get_active_tab(initial_data)
299
+ try:
300
+ # This is the json tree structure for videos, shorts and streams
301
+ items = active_tab['tabRenderer']['content']['richGridRenderer']['contents']
302
+ except (KeyError, IndexError, TypeError):
303
+ # This is the json tree structure for playlists
304
+ items = active_tab['tabRenderer']['content']['sectionListRenderer']['contents'][0][
305
+ 'itemSectionRenderer']['contents'][0]['gridRenderer']['items']
306
+
307
+ # This is the json tree structure of visitor data
308
+ # It is necessary to send the visitorData together with the continuation token
309
+ self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
310
+ "ytConfigData"]["visitorData"]
311
+
312
+ except (KeyError, IndexError, TypeError):
313
+ try:
314
+ # this is the json tree structure, if the json was directly sent
315
+ # by the server in a continuation response
316
+ important_content = initial_data[1]['response']['onResponseReceivedActions'][
317
+ 0
318
+ ]['appendContinuationItemsAction']['continuationItems']
319
+ items = important_content
320
+ except (KeyError, IndexError, TypeError):
321
+ try:
322
+ # this is the json tree structure, if the json was directly sent
323
+ # by the server in a continuation response
324
+ # no longer a list and no longer has the "response" key
325
+ important_content = initial_data['onResponseReceivedActions'][0][
326
+ 'appendContinuationItemsAction']['continuationItems']
327
+ items = important_content
328
+ except (KeyError, IndexError, TypeError) as p:
329
+ logger.info(p)
330
+ return [], None
331
+
332
+ try:
333
+ continuation = items[-1]['continuationItemRenderer'][
334
+ 'continuationEndpoint'
335
+ ]['continuationCommand']['token']
336
+ items = items[:-1]
337
+ except (KeyError, IndexError):
338
+ # if there is an error, no continuation is available
339
+ continuation = None
340
+
341
+ # Extract object from each corresponding url
342
+ items_obj = self._extract_ids(items)
343
+
344
+ # remove duplicates
345
+ return uniqueify(items_obj), continuation
346
+
347
+ def _extract_video_id(self, x: dict):
348
+ """ Try extracting video ids, if it fails, try extracting shorts ids.
349
+
350
+ :returns: List of YouTube, Playlist or Channel objects.
351
+ """
352
+ try:
353
+ return YouTube(f"/watch?v="
354
+ f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}",
355
+ client=self.client,
356
+ use_oauth=self.use_oauth,
357
+ allow_oauth_cache=self.allow_oauth_cache,
358
+ token_file=self.token_file,
359
+ oauth_verifier=self.oauth_verifier,
360
+ use_po_token=self.use_po_token,
361
+ po_token_verifier=self.po_token_verifier
362
+ )
363
+ except (KeyError, IndexError, TypeError):
364
+ return self._extract_shorts_id(x)
365
+
366
+ def _extract_shorts_id(self, x: dict):
367
+ """ Try extracting shorts ids, if it fails, try extracting release ids.
368
+
369
+ :returns: List of YouTube, Playlist or Channel objects.
370
+ """
371
+ try:
372
+ content = x['richItemRenderer']['content']
373
+
374
+ # New json tree added on 09/12/2024
375
+ if 'shortsLockupViewModel' in content:
376
+ video_id = content['shortsLockupViewModel']['onTap']['innertubeCommand']['reelWatchEndpoint']['videoId']
377
+ else:
378
+ video_id = content['reelItemRenderer']['videoId']
379
+
380
+ return YouTube(f"/watch?v={video_id}",
381
+ client=self.client,
382
+ use_oauth=self.use_oauth,
383
+ allow_oauth_cache=self.allow_oauth_cache,
384
+ token_file=self.token_file,
385
+ oauth_verifier=self.oauth_verifier,
386
+ use_po_token=self.use_po_token,
387
+ po_token_verifier=self.po_token_verifier
388
+ )
389
+ except (KeyError, IndexError, TypeError):
390
+ return self._extract_release_id(x)
391
+
392
+ def _extract_release_id(self, x: dict):
393
+ """ Try extracting release ids, if it fails, try extracting video IDs from the home page.
394
+
395
+ :returns: List of YouTube, Playlist or Channel objects.
396
+ """
397
+ try:
398
+ return Playlist(f"/playlist?list="
399
+ f"{x['richItemRenderer']['content']['playlistRenderer']['playlistId']}",
400
+ client=self.client,
401
+ use_oauth=self.use_oauth,
402
+ allow_oauth_cache=self.allow_oauth_cache,
403
+ token_file=self.token_file,
404
+ oauth_verifier=self.oauth_verifier,
405
+ use_po_token=self.use_po_token,
406
+ po_token_verifier=self.po_token_verifier
407
+ )
408
+ except (KeyError, IndexError, TypeError):
409
+ return self._extract_video_id_from_home(x)
410
+
411
+ def _extract_video_id_from_home(self, x: dict):
412
+ """ Try extracting the video IDs from the home page,
413
+ if that fails, try extracting the shorts IDs from the home page.
414
+
415
+ :returns: List of YouTube, Playlist or Channel objects.
416
+ """
417
+ try:
418
+ return YouTube(f"/watch?v="
419
+ f"{x['gridVideoRenderer']['videoId']}",
420
+ client=self.client,
421
+ use_oauth=self.use_oauth,
422
+ allow_oauth_cache=self.allow_oauth_cache,
423
+ token_file=self.token_file,
424
+ oauth_verifier=self.oauth_verifier,
425
+ use_po_token=self.use_po_token,
426
+ po_token_verifier=self.po_token_verifier
427
+ )
428
+ except (KeyError, IndexError, TypeError):
429
+ return self._extract_shorts_id_from_home(x)
430
+
431
+ def _extract_shorts_id_from_home(self, x: dict):
432
+ """ Try extracting the shorts IDs from the home page, if that fails, try extracting the playlist IDs.
433
+
434
+ :returns: List of YouTube, Playlist or Channel objects.
435
+ """
436
+ try:
437
+ return YouTube(f"/watch?v="
438
+ f"{x['reelItemRenderer']['videoId']}",
439
+ client=self.client,
440
+ use_oauth=self.use_oauth,
441
+ allow_oauth_cache=self.allow_oauth_cache,
442
+ token_file=self.token_file,
443
+ oauth_verifier=self.oauth_verifier,
444
+ use_po_token=self.use_po_token,
445
+ po_token_verifier=self.po_token_verifier
446
+ )
447
+ except (KeyError, IndexError, TypeError):
448
+ return self._extract_playlist_id(x)
449
+
450
+ def _extract_playlist_id(self, x: dict):
451
+ """ Try extracting the playlist IDs, if that fails, try extracting the channel IDs.
452
+
453
+ :returns: List of YouTube, Playlist or Channel objects.
454
+ """
455
+ try:
456
+ return Playlist(f"/playlist?list="
457
+ f"{x['gridPlaylistRenderer']['playlistId']}",
458
+ client=self.client,
459
+ use_oauth=self.use_oauth,
460
+ allow_oauth_cache=self.allow_oauth_cache,
461
+ token_file=self.token_file,
462
+ oauth_verifier=self.oauth_verifier,
463
+ use_po_token=self.use_po_token,
464
+ po_token_verifier=self.po_token_verifier
465
+ )
466
+ except (KeyError, IndexError, TypeError):
467
+ return self._extract_channel_id_from_home(x)
468
+
469
+ def _extract_channel_id_from_home(self, x: dict):
470
+ """ Try extracting the channel IDs from the home page, if that fails, return playlist IDs from lockupViewModel.
471
+
472
+ :returns: List of YouTube, Playlist or Channel objects.
473
+ """
474
+ try:
475
+ return Channel(f"/channel/"
476
+ f"{x['gridChannelRenderer']['channelId']}",
477
+ client=self.client,
478
+ use_oauth=self.use_oauth,
479
+ allow_oauth_cache=self.allow_oauth_cache,
480
+ token_file=self.token_file,
481
+ oauth_verifier=self.oauth_verifier,
482
+ use_po_token=self.use_po_token,
483
+ po_token_verifier=self.po_token_verifier
484
+ )
485
+ except (KeyError, IndexError, TypeError):
486
+ return self._extract_playlist_id_from_lockup_view_model(x)
487
+
488
+ def _extract_playlist_id_from_lockup_view_model(self, x: dict):
489
+ """ Try extracting the playlist IDs, if that fails, return nothing.
490
+
491
+ :returns: List of YouTube, Playlist or Channel objects.
492
+ """
493
+ try:
494
+ return Playlist(f"/playlist?list="
495
+ f"{x['lockupViewModel']['contentId']}",
496
+ client=self.client,
497
+ use_oauth=self.use_oauth,
498
+ allow_oauth_cache=self.allow_oauth_cache,
499
+ token_file=self.token_file,
500
+ oauth_verifier=self.oauth_verifier,
501
+ use_po_token=self.use_po_token,
502
+ po_token_verifier=self.po_token_verifier
503
+ )
504
+ except (KeyError, IndexError, TypeError):
505
+ return []
506
+
507
+ @property
508
+ def views(self) -> int:
509
+ """Extract view count for channel.
510
+
511
+ :return: Channel view count
512
+ :rtype: int
513
+ """
514
+ self.html_url = self.about_url
515
+
516
+ try:
517
+ views_text = self.initial_data['onResponseReceivedEndpoints'][0]['showEngagementPanelEndpoint'][
518
+ 'engagementPanel']['engagementPanelSectionListRenderer']['content']['sectionListRenderer'][
519
+ 'contents'][0]['itemSectionRenderer']['contents'][0]['aboutChannelRenderer']['metadata'][
520
+ 'aboutChannelViewModel']['viewCountText']
521
+
522
+ # "1,234,567 view"
523
+ count_text = views_text.split(' ')[0]
524
+ # "1234567"
525
+ count_text = count_text.replace(',', '')
526
+ return int(count_text)
527
+ except KeyError:
528
+ return 0
529
+
530
+ @property
531
+ def description(self) -> str:
532
+ """Extract the channel description.
533
+
534
+ :return: Channel description
535
+ :rtype: str
536
+ """
537
+ self.html_url = self.channel_url
538
+ return self.initial_data['metadata']['channelMetadataRenderer']['description']
539
+
540
+ def find_videos_info(self, data):
541
+ """Recursively search for 'videos' in the text content of the JSON."""
542
+ if isinstance(data, dict):
543
+ for key, value in data.items():
544
+ if key == 'content' and isinstance(value, str) and 'videos' in value:
545
+ return value
546
+ if isinstance(value, (dict, list)):
547
+ result = self.find_videos_info(value)
548
+ if result:
549
+ return result
550
+ elif isinstance(data, list):
551
+ for item in data:
552
+ result = self.find_videos_info(item)
553
+ if result:
554
+ return result
555
+ return None
556
+
557
+ @property
558
+ def length(self):
559
+ """Extracts the approximate amount of videos from the channel."""
560
+ try:
561
+ result = self.find_videos_info(self.initial_data)
562
+ return result if result else 'Unknown'
563
+ except Exception as e:
564
+ print(f"Exception: {e}")
565
+ return 'Unknown'
566
+
567
+ @property
568
+ def last_updated(self) -> str:
569
+ """Extract the date of the last uploaded video.
570
+
571
+ :return: Last video uploaded
572
+ :rtype: str
573
+ """
574
+ self.html_url = self.videos_url
575
+ try:
576
+ last_updated_text = self.initial_data['contents']['twoColumnBrowseResultsRenderer']['tabs'][1][
577
+ 'tabRenderer']['content']['richGridRenderer']['contents'][0]['richItemRenderer']['content'][
578
+ 'videoRenderer']['publishedTimeText']['simpleText']
579
+ return last_updated_text
580
+ except KeyError:
581
+ return None
582
+
583
+ @property
584
+ def thumbnail_url(self) -> str:
585
+ """extract the profile image from the json of the channel home page
586
+
587
+ :rtype: str
588
+ :return: a string with the url of the channel's profile image
589
+ """
590
+ self.html_url = self.channel_url # get the url of the channel home page
591
+ return self.initial_data['metadata']['channelMetadataRenderer']['avatar']['thumbnails'][0]['url']
592
+
593
+ @property
594
+ def home(self) -> list:
595
+ """ Yields YouTube, Playlist and Channel objects from the channel home page.
596
+
597
+ :returns: List of YouTube, Playlist and Channel objects.
598
+ """
599
+ self.html_url = self.featured_url # Set home tab
600
+ return self._extract_obj_from_home()
601
+
602
+ @property
603
+ def videos(self) -> Iterable[YouTube]:
604
+ """Yields YouTube objects of videos in this channel
605
+
606
+ :rtype: List[YouTube]
607
+ :returns: List of YouTube
608
+ """
609
+ self.html_url = self.videos_url # Set video tab
610
+ return DeferredGeneratorList(self.videos_generator())
611
+
612
+ @property
613
+ def shorts(self) -> Iterable[YouTube]:
614
+ """Yields YouTube objects of short videos in this channel
615
+
616
+ :rtype: List[YouTube]
617
+ :returns: List of YouTube
618
+ """
619
+ self.html_url = self.shorts_url # Set shorts tab
620
+ return DeferredGeneratorList(self.videos_generator())
621
+
622
+ @property
623
+ def live(self) -> Iterable[YouTube]:
624
+ """Yields YouTube objects of live in this channel
625
+
626
+ :rtype: List[YouTube]
627
+ :returns: List of YouTube
628
+ """
629
+ self.html_url = self.live_url # Set streams tab
630
+ return DeferredGeneratorList(self.videos_generator())
631
+
632
+ @property
633
+ def lives(self) -> Iterable[YouTube]:
634
+ """Alias for the 'live' property."""
635
+ return self.live
636
+
637
+ @property
638
+ def releases(self) -> Iterable[Playlist]:
639
+ """Yields Playlist objects in this channel
640
+
641
+ :rtype: List[Playlist]
642
+ :returns: List of YouTube
643
+ """
644
+ self.html_url = self.releases_url # Set releases tab
645
+ return DeferredGeneratorList(self.videos_generator())
646
+
647
+ @property
648
+ def playlists(self) -> Iterable[Playlist]:
649
+ """Yields Playlist objects in this channel
650
+
651
+ :rtype: List[Playlist]
652
+ :returns: List of Playlist
653
+ """
654
+ self.html_url = self.playlists_url # Set playlists tab
655
+ return DeferredGeneratorList(self.videos_generator())
pytubefix/contrib/playlist.py ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module to download a complete playlist from a youtube channel."""
2
+ import json
3
+ import logging
4
+ from collections.abc import Sequence
5
+ from datetime import date, datetime
6
+ from typing import Dict, Iterable, List, Optional, Tuple, Union, Any, Callable
7
+
8
+ from pytubefix import extract, request, YouTube
9
+ from pytubefix.innertube import InnerTube
10
+ from pytubefix.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class Playlist(Sequence):
16
+ """Load a YouTube playlist with URL"""
17
+
18
+ def __init__(
19
+ self,
20
+ url: str,
21
+ client: str = InnerTube().client_name,
22
+ proxies: Optional[Dict[str, str]] = None,
23
+ use_oauth: bool = False,
24
+ allow_oauth_cache: bool = True,
25
+ token_file: Optional[str] = None,
26
+ oauth_verifier: Optional[Callable[[str, str], None]] = None,
27
+ use_po_token: Optional[bool] = False,
28
+ po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
29
+ ):
30
+ """
31
+ :param dict proxies:
32
+ (Optional) A dict mapping protocol to proxy address which will be used by pytube.
33
+ :param bool use_oauth:
34
+ (Optional) Prompt the user to authenticate to YouTube.
35
+ If allow_oauth_cache is set to True, the user should only be prompted once.
36
+ :param bool allow_oauth_cache:
37
+ (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
38
+ These tokens are only generated if use_oauth is set to True as well.
39
+ :param str token_file:
40
+ (Optional) Path to the file where the OAuth tokens will be stored.
41
+ Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
42
+ :param Callable oauth_verifier:
43
+ (optional) Verifier to be used for getting OAuth tokens.
44
+ Verification URL and User-Code will be passed to it respectively.
45
+ (if passed, else default verifier will be used)
46
+ :param bool use_po_token:
47
+ (Optional) Prompt the user to use the proof of origin token on YouTube.
48
+ It must be sent with the API along with the linked visitorData and
49
+ then passed as a `po_token` query parameter to affected clients.
50
+ If allow_oauth_cache is set to True, the user should only be prompted once.
51
+ :param Callable po_token_verifier:
52
+ (Optional) Verified used to obtain the visitorData and po_token.
53
+ The verifier will return the visitorData and po_token respectively.
54
+ (if passed, else default verifier will be used)
55
+ """
56
+ if proxies:
57
+ install_proxy(proxies)
58
+
59
+ self._input_url = url
60
+ self._visitor_data = None
61
+
62
+ self.client = client
63
+ self.use_oauth = use_oauth
64
+ self.allow_oauth_cache = allow_oauth_cache
65
+ self.token_file = token_file
66
+ self.oauth_verifier = oauth_verifier
67
+
68
+ self.use_po_token = use_po_token
69
+ self.po_token_verifier = po_token_verifier
70
+
71
+ # These need to be initialized as None for the properties.
72
+ self._html = None
73
+ self._ytcfg = None
74
+ self._initial_data = None
75
+ self._sidebar_info = None
76
+
77
+ self._playlist_id = None
78
+
79
+ @property
80
+ def playlist_id(self):
81
+ """Get the playlist id.
82
+
83
+ :rtype: str
84
+ """
85
+ if self._playlist_id:
86
+ return self._playlist_id
87
+ self._playlist_id = extract.playlist_id(self._input_url)
88
+ return self._playlist_id
89
+
90
+ @property
91
+ def playlist_url(self):
92
+ """Get the base playlist url.
93
+
94
+ :rtype: str
95
+ """
96
+ return f"https://www.youtube.com/playlist?list={self.playlist_id}"
97
+
98
+ @property
99
+ def html(self):
100
+ """Get the playlist page html.
101
+
102
+ :rtype: str
103
+ """
104
+ if self._html:
105
+ return self._html
106
+ self._html = request.get(self.playlist_url)
107
+ return self._html
108
+
109
+ @property
110
+ def ytcfg(self):
111
+ """Extract the ytcfg from the playlist page html.
112
+
113
+ :rtype: dict
114
+ """
115
+ if self._ytcfg:
116
+ return self._ytcfg
117
+ self._ytcfg = extract.get_ytcfg(self.html)
118
+ return self._ytcfg
119
+
120
+ @property
121
+ def initial_data(self):
122
+ """Extract the initial data from the playlist page html.
123
+
124
+ :rtype: dict
125
+ """
126
+ if self._initial_data:
127
+ return self._initial_data
128
+ else:
129
+ self._initial_data = extract.initial_data(self.html)
130
+ return self._initial_data
131
+
132
+ @property
133
+ def sidebar_info(self):
134
+ """Extract the sidebar info from the playlist page html.
135
+
136
+ :rtype: dict
137
+ """
138
+ if self._sidebar_info:
139
+ return self._sidebar_info
140
+ else:
141
+ self._sidebar_info = self.initial_data['sidebar'][
142
+ 'playlistSidebarRenderer']['items']
143
+ return self._sidebar_info
144
+
145
+ @property
146
+ def yt_api_key(self):
147
+ """Extract the INNERTUBE_API_KEY from the playlist ytcfg.
148
+
149
+ :rtype: str
150
+ """
151
+ return self.ytcfg['INNERTUBE_API_KEY']
152
+
153
+ def _paginate(
154
+ self, initial_html: str, context: Optional[Any] = None,
155
+ until_watch_id: Optional[str] = None
156
+ ) -> Iterable[List[str]]:
157
+ """Parse the video links from the page source, yields the /watch?v=
158
+ part from video link
159
+
160
+ :param initial_html str: html from the initial YouTube url, default: self.html
161
+ :param context Optional[Any]: Auxiliary object
162
+ :param until_watch_id Optional[str]: YouTube Video watch id until
163
+ which the playlist should be read.
164
+
165
+ :rtype: Iterable[List[str]]
166
+ :returns: Iterable of lists of YouTube watch ids
167
+ """
168
+ videos_urls, continuation = self._extract_videos(
169
+ json.dumps(extract.initial_data(initial_html)), context
170
+ )
171
+ if until_watch_id:
172
+ try:
173
+ trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
174
+ yield videos_urls[:trim_index]
175
+ return
176
+ except ValueError:
177
+ pass
178
+ yield videos_urls
179
+
180
+ # Extraction from a playlist only returns 100 videos at a time
181
+ # if self._extract_videos returns a continuation there are more
182
+ # than 100 songs inside a playlist, so we need to add further requests
183
+ # to gather all of them
184
+
185
+ while continuation: # there is an url found
186
+ # requesting the next page of videos with the url generated from the
187
+ # previous page, needs to be a post
188
+ req = InnerTube('WEB').browse(continuation=continuation, visitor_data=self._visitor_data)
189
+ # extract up to 100 songs from the page loaded
190
+ # returns another continuation if more videos are available
191
+ videos_urls, continuation = self._extract_videos(req, context)
192
+ if until_watch_id:
193
+ try:
194
+ trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
195
+ yield videos_urls[:trim_index]
196
+ return
197
+ except ValueError:
198
+ pass
199
+ yield videos_urls
200
+
201
+ def _extract_videos(self, raw_json: str, context: Optional[Any] = None) -> Tuple[List[str], Optional[str]]:
202
+ """Extracts videos from a raw json page
203
+
204
+ :param str raw_json: Input json extracted from the page or the last
205
+ server response
206
+ :param Optional[Any] context: Auxiliary object from _paginate
207
+ :rtype: Tuple[List[str], Optional[str]]
208
+ :returns: Tuple containing a list of up to 100 video watch ids and
209
+ a continuation token, if more videos are available
210
+ """
211
+ if isinstance(raw_json, dict):
212
+ initial_data = raw_json
213
+ else:
214
+ initial_data = json.loads(raw_json)
215
+ try:
216
+ # this is the json tree structure, if the json was extracted from
217
+ # html
218
+ section_contents = initial_data["contents"][
219
+ "twoColumnBrowseResultsRenderer"][
220
+ "tabs"][0]["tabRenderer"]["content"][
221
+ "sectionListRenderer"]["contents"]
222
+ try:
223
+ renderer = section_contents[0]["itemSectionRenderer"]["contents"][0]
224
+
225
+ if 'richGridRenderer' in renderer:
226
+ important_content = renderer["richGridRenderer"]
227
+ else:
228
+ important_content = renderer["playlistVideoListRenderer"]
229
+
230
+ except (KeyError, IndexError, TypeError):
231
+ # Playlist with submenus
232
+ important_content = section_contents[
233
+ 1]["itemSectionRenderer"][
234
+ "contents"][0]["playlistVideoListRenderer"]
235
+ videos = important_content["contents"]
236
+
237
+ self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
238
+ "ytConfigData"]["visitorData"]
239
+ except (KeyError, IndexError, TypeError):
240
+ try:
241
+ # this is the json tree structure, if the json was directly sent
242
+ # by the server in a continuation response
243
+ # no longer a list and no longer has the "response" key
244
+ important_content = initial_data['onResponseReceivedActions'][0][
245
+ 'appendContinuationItemsAction']['continuationItems']
246
+ videos = important_content
247
+ except (KeyError, IndexError, TypeError) as p:
248
+ logger.info(p)
249
+ return [], None
250
+
251
+ try:
252
+ # For some reason YouTube only returns the first 100 shorts of a playlist
253
+ # token provided by the API doesn't seem to work even in the official player
254
+ try:
255
+ continuation = videos[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
256
+ except:
257
+ for command in videos[-1]['continuationItemRenderer']['continuationEndpoint']['commandExecutorCommand']['commands']:
258
+ if 'continuationCommand' in command:
259
+ continuation = command['continuationCommand']['token']
260
+ break
261
+ videos = videos[:-1]
262
+ except (KeyError, IndexError):
263
+ # if there is an error, no continuation is available
264
+ continuation = None
265
+
266
+ items_obj = self._extract_ids(videos)
267
+
268
+ # remove duplicates
269
+ return uniqueify(items_obj), continuation
270
+
271
+ def _extract_ids(self, items: list) -> list:
272
+ """ Iterate over the extracted urls.
273
+
274
+ :returns: List with extracted ids.
275
+ """
276
+ items_obj = []
277
+ for x in items:
278
+ items_obj.append(self._extract_video_id(x))
279
+ return items_obj
280
+
281
+ def _extract_video_id(self, x: dict):
282
+ """ Try extracting video ids, if it fails, try extracting shorts ids.
283
+
284
+ :returns: List with extracted ids.
285
+ """
286
+ try:
287
+ return f"/watch?v={x['playlistVideoRenderer']['videoId']}"
288
+ except (KeyError, IndexError, TypeError):
289
+ return self._extract_shorts_id(x)
290
+
291
+ def _extract_shorts_id(self, x: dict):
292
+ """ Try extracting shorts ids.
293
+
294
+ :returns: List with extracted ids.
295
+ """
296
+ try:
297
+ content = x['richItemRenderer']['content']
298
+
299
+ # New json tree added on 09/12/2024
300
+ if 'shortsLockupViewModel' in content:
301
+ video_id = content['shortsLockupViewModel']['onTap']['innertubeCommand']['reelWatchEndpoint']['videoId']
302
+ else:
303
+ video_id = content['reelItemRenderer']['videoId']
304
+
305
+ return f"/watch?v={video_id}"
306
+
307
+ except (KeyError, IndexError, TypeError):
308
+ return []
309
+
310
+ def trimmed(self, video_id: str) -> Iterable[str]:
311
+ """Retrieve a list of YouTube video URLs trimmed at the given video ID
312
+
313
+ i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns
314
+ [1,2]
315
+ :type video_id: str
316
+ video ID to trim the returned list of playlist URLs at
317
+ :rtype: List[str]
318
+ :returns:
319
+ List of video URLs from the playlist trimmed at the given ID
320
+ """
321
+ for page in self._paginate(self.html, until_watch_id=video_id):
322
+ yield from (self._video_url(watch_path) for watch_path in page)
323
+
324
+ def url_generator(self):
325
+ """Generator that yields video URLs.
326
+
327
+ :Yields: Video URLs
328
+ """
329
+ for page in self._paginate(self.html):
330
+ for video in page:
331
+ yield self._video_url(video)
332
+
333
+ @property # type: ignore
334
+ @cache
335
+ def video_urls(self) -> DeferredGeneratorList:
336
+ """Complete links of all the videos in playlist
337
+
338
+ :rtype: List[str]
339
+ :returns: List of video URLs
340
+ """
341
+ return DeferredGeneratorList(self.url_generator())
342
+
343
+ def videos_generator(self):
344
+ for url in self.video_urls:
345
+ yield YouTube(
346
+ url,
347
+ client=self.client,
348
+ use_oauth=self.use_oauth,
349
+ allow_oauth_cache=self.allow_oauth_cache,
350
+ token_file=self.token_file,
351
+ oauth_verifier=self.oauth_verifier,
352
+ use_po_token=self.use_po_token,
353
+ po_token_verifier=self.po_token_verifier
354
+ )
355
+
356
+ @property
357
+ def videos(self) -> Iterable[YouTube]:
358
+ """Yields YouTube objects of videos in this playlist
359
+
360
+ :rtype: List[YouTube]
361
+ :returns: List of YouTube
362
+ """
363
+ return DeferredGeneratorList(self.videos_generator())
364
+
365
+ def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
366
+ return self.video_urls[i]
367
+
368
+ def __len__(self) -> int:
369
+ return len(self.video_urls)
370
+
371
+ def __repr__(self) -> str:
372
+ return f'<pytubefix.contrib.Playlist object: playlistId={self.playlist_id}>'
373
+
374
+ @property
375
+ @cache
376
+ def last_updated(self) -> Optional[date]:
377
+ """Extract the date that the playlist was last updated.
378
+
379
+ For some playlists, this will be a specific date, which is returned as a datetime
380
+ object. For other playlists, this is an estimate such as "1 week ago". Due to the
381
+ fact that this value is returned as a string, pytube does a best-effort parsing
382
+ where possible, and returns the raw string where it is not possible.
383
+
384
+ :return: Date of last playlist update where possible, else the string provided
385
+ :rtype: datetime.date
386
+ """
387
+ last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
388
+ 'stats'][2]['runs'][1]['text']
389
+ try:
390
+ date_components = last_updated_text.split()
391
+ month = date_components[0]
392
+ day = date_components[1].strip(',')
393
+ year = date_components[2]
394
+ return datetime.strptime(
395
+ f"{month} {day:0>2} {year}", "%b %d %Y"
396
+ ).date()
397
+ except (IndexError, KeyError):
398
+ return last_updated_text
399
+
400
+ @property
401
+ @cache
402
+ def title(self) -> Optional[str]:
403
+ """Extract playlist title
404
+
405
+ :return: playlist title (name)
406
+ :rtype: Optional[str]
407
+ """
408
+ return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
409
+ 'title']['runs'][0]['text']
410
+
411
+ @property
412
+ def thumbnail_url(self):
413
+ thumbnail_renderer = self.sidebar_info[0][
414
+ 'playlistSidebarPrimaryInfoRenderer'][
415
+ 'thumbnailRenderer']
416
+
417
+ if 'playlistVideoThumbnailRenderer' in thumbnail_renderer:
418
+ return thumbnail_renderer[
419
+ 'playlistVideoThumbnailRenderer'][
420
+ 'thumbnail'][
421
+ 'thumbnails'][-1][
422
+ 'url']
423
+
424
+ elif 'playlistCustomThumbnailRenderer' in thumbnail_renderer:
425
+ return thumbnail_renderer[
426
+ 'playlistCustomThumbnailRenderer'][
427
+ 'thumbnail'][
428
+ 'thumbnails'][-1][
429
+ 'url']
430
+
431
+ @property
432
+ def description(self) -> str:
433
+ return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
434
+ 'description']['simpleText']
435
+
436
+ @property
437
+ def length(self):
438
+ """Extract the number of videos in the playlist.
439
+
440
+ :return: Playlist video count
441
+ :rtype: int
442
+ """
443
+ count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
444
+ 'stats'][0]['runs'][0]['text']
445
+ count_text = count_text.replace(',', '')
446
+ return int(count_text)
447
+
448
+ @property
449
+ def views(self):
450
+ """Extract view count for playlist.
451
+
452
+ :return: Playlist view count
453
+ :rtype: int
454
+ """
455
+ # "1,234,567 views"
456
+ views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
457
+ 'stats'][1]['simpleText']
458
+ # "1,234,567"
459
+ count_text = views_text.split()[0]
460
+ # "1234567"
461
+ count_text = count_text.replace(',', '')
462
+ return int(count_text)
463
+
464
+ @property
465
+ def owner(self):
466
+ """Extract the owner of the playlist.
467
+
468
+ :return: Playlist owner name.
469
+ :rtype: str
470
+ """
471
+ return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
472
+ 'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
473
+
474
+ @property
475
+ def owner_id(self):
476
+ """Extract the channel_id of the owner of the playlist.
477
+
478
+ :return: Playlist owner's channel ID.
479
+ :rtype: str
480
+ """
481
+ return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
482
+ 'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
483
+ 'navigationEndpoint']['browseEndpoint']['browseId']
484
+
485
+ @property
486
+ def owner_url(self):
487
+ """Create the channel url of the owner of the playlist.
488
+
489
+ :return: Playlist owner's channel url.
490
+ :rtype: str
491
+ """
492
+ return f'https://www.youtube.com/channel/{self.owner_id}'
493
+
494
+ @staticmethod
495
+ def _video_url(watch_path: str):
496
+ return f"https://www.youtube.com{watch_path}"
pytubefix/contrib/search.py ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module for interacting with YouTube search."""
2
+ # Native python imports
3
+ import logging
4
+ from typing import List, Optional, Dict, Callable, Tuple
5
+
6
+ # Local imports
7
+ from pytubefix import YouTube, Channel, Playlist
8
+ from pytubefix.helpers import deprecated, install_proxy
9
+ from pytubefix.innertube import InnerTube
10
+ from pytubefix.protobuf import encode_protobuf
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class Search:
16
+ def __init__(
17
+ self, query: str,
18
+ client: str = InnerTube().client_name,
19
+ proxies: Optional[Dict[str, str]] = None,
20
+ use_oauth: bool = False,
21
+ allow_oauth_cache: bool = True,
22
+ token_file: Optional[str] = None,
23
+ oauth_verifier: Optional[Callable[[str, str], None]] = None,
24
+ use_po_token: Optional[bool] = False,
25
+ po_token_verifier: Optional[Callable[[None], Tuple[str, str]]] = None,
26
+ filters: Optional[dict] = None
27
+ ):
28
+ """Initialize Search object.
29
+
30
+ :param str query:
31
+ Search query provided by the user.
32
+ :param dict proxies:
33
+ (Optional) A dict mapping protocol to proxy address which will be used by pytube.
34
+ :param bool use_oauth:
35
+ (Optional) Prompt the user to authenticate to YouTube.
36
+ If allow_oauth_cache is set to True, the user should only be prompted once.
37
+ :param bool allow_oauth_cache:
38
+ (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
39
+ These tokens are only generated if use_oauth is set to True as well.
40
+ :param str token_file:
41
+ (Optional) Path to the file where the OAuth tokens will be stored.
42
+ Defaults to None, which means the tokens will be stored in the pytubefix/__cache__ directory.
43
+ :param Callable oauth_verifier:
44
+ (optional) Verifier to be used for getting OAuth tokens.
45
+ Verification URL and User-Code will be passed to it respectively.
46
+ (if passed, else default verifier will be used)
47
+ :param bool use_po_token:
48
+ (Optional) Prompt the user to use the proof of origin token on YouTube.
49
+ It must be sent with the API along with the linked visitorData and
50
+ then passed as a `po_token` query parameter to affected clients.
51
+ If allow_oauth_cache is set to True, the user should only be prompted once.
52
+ :param Callable po_token_verifier:
53
+ (Optional) Verified used to obtain the visitorData and po_token.
54
+ The verifier will return the visitorData and po_token respectively.
55
+ (if passed, else default verifier will be used)
56
+ :param dict filters:
57
+ (Optional) Apply filters when searching.
58
+ Can be used: `upload_date`, `type`, `duration`, `features`, `sort_by`.
59
+ features can be combined into a list with other parameters of the same type.
60
+ """
61
+ self.query = query
62
+ self.client = client
63
+ self.use_oauth = use_oauth
64
+ self.allow_oauth_cache = allow_oauth_cache
65
+ self.token_file = token_file
66
+ self.oauth_verifier = oauth_verifier
67
+
68
+ self.use_po_token = use_po_token
69
+ self.po_token_verifier = po_token_verifier
70
+
71
+ self._innertube_client = InnerTube(
72
+ client='WEB',
73
+ use_oauth=self.use_oauth,
74
+ allow_cache=self.allow_oauth_cache,
75
+ token_file=self.token_file,
76
+ oauth_verifier=self.oauth_verifier,
77
+ use_po_token=self.use_po_token,
78
+ po_token_verifier=self.po_token_verifier
79
+ )
80
+
81
+ # The first search, without a continuation, is structured differently
82
+ # and contains completion suggestions, so we must store this separately
83
+ self._initial_results = None
84
+
85
+ self._results = {}
86
+ self._completion_suggestions = None
87
+
88
+ # Used for keeping track of query continuations so that new results
89
+ # are always returned when get_next_results() is called
90
+ self._current_continuation = None
91
+
92
+ if proxies:
93
+ install_proxy(proxies)
94
+
95
+ self.filter = None
96
+ if filters:
97
+ logger.debug("Filters found, starting combination")
98
+ filter_protobuf = Filter()
99
+
100
+ filter_protobuf.set_filters(filters)
101
+
102
+ self.filter = filter_protobuf.get_filters_params()
103
+
104
+ @property
105
+ def completion_suggestions(self):
106
+ """Return query autocompletion suggestions for the query.
107
+
108
+ :rtype: list
109
+ :returns:
110
+ A list of autocomplete suggestions provided by YouTube for the query.
111
+ """
112
+ if self._completion_suggestions:
113
+ return self._completion_suggestions
114
+ if self.results:
115
+ self._completion_suggestions = self._initial_results['refinements']
116
+ return self._completion_suggestions
117
+
118
+ def _get_results(self):
119
+ """Search results and filter them
120
+
121
+ """
122
+ results, continuation = self.fetch_and_parse()
123
+ self._current_continuation = continuation
124
+ self._results['videos'] = results['videos']
125
+ self._results['shorts'] = results['shorts']
126
+ self._results['playlist'] = results['playlist']
127
+ self._results['channel'] = results['channel']
128
+
129
+ @property
130
+ def videos(self) -> List[YouTube]:
131
+ """Returns the search result videos.
132
+
133
+ On first call, will generate and return the first set of results.
134
+ Additional results can be generated using ``.get_next_results()``.
135
+
136
+ :rtype: list[YouTube]
137
+ :returns:
138
+ A list of YouTube objects.
139
+ """
140
+ if not self._results:
141
+ self._get_results()
142
+
143
+ return [items for items in self._results['videos']]
144
+
145
+ @property
146
+ def shorts(self) -> List[YouTube]:
147
+ """Returns the search result shorts.
148
+
149
+ On first call, will generate and return the first set of results.
150
+ Additional results can be generated using ``.get_next_results()``.
151
+
152
+ :rtype: list[YouTube]
153
+ :returns:
154
+ A list of YouTube objects.
155
+ """
156
+ if not self._results:
157
+ self._get_results()
158
+
159
+ return [items for items in self._results['shorts']]
160
+
161
+ @property
162
+ def playlist(self) -> List[Playlist]:
163
+ """Returns the search result playlist.
164
+
165
+ On first call, will generate and return the first set of results.
166
+ Additional results can be generated using ``.get_next_results()``.
167
+
168
+ :rtype: list[Playlist]
169
+ :returns:
170
+ A list of Playlist objects.
171
+ """
172
+ if not self._results:
173
+ self._get_results()
174
+
175
+ return [items for items in self._results['playlist']]
176
+
177
+ @property
178
+ def channel(self) -> List[Channel]:
179
+ """Returns the search result channel.
180
+
181
+ On first call, will generate and return the first set of results.
182
+ Additional results can be generated using ``.get_next_results()``.
183
+
184
+ :rtype: list[Channel]
185
+ :returns:
186
+ A list of Channel objects.
187
+ """
188
+ if not self._results:
189
+ self._get_results()
190
+
191
+ return [items for items in self._results['channel']]
192
+
193
+ @property
194
+ @deprecated("Get video results using: .videos")
195
+ def results(self) -> list:
196
+ """returns a list with videos, shorts, playlist and channels.
197
+
198
+ On first call, will generate and return the first set of results.
199
+ Additional results can be generated using ``.get_next_results()``.
200
+
201
+ :rtype: list
202
+ :returns:
203
+ A list of YouTube, Playlist and Channel objects.
204
+ """
205
+ # Remove these comments to get the list of videos, shorts, playlist and channel
206
+
207
+ # if not self._results:
208
+ # self._get_results()
209
+
210
+ # return [items for values in self._results.values() for items in values]
211
+ return self.videos
212
+
213
+ @property
214
+ def all(self) -> list:
215
+ """
216
+ Return all objects found in the search
217
+ """
218
+ if not self._results:
219
+ self._get_results()
220
+
221
+ return [items for values in self._results.values() for items in values]
222
+
223
+ def get_next_results(self):
224
+ """Use the stored continuation string to fetch the next set of results.
225
+
226
+ This method does not return the results, but instead updates the results property.
227
+ """
228
+ if self._current_continuation:
229
+ results, continuation = self.fetch_and_parse(self._current_continuation)
230
+ self._current_continuation = continuation
231
+ self._results['videos'].extend(results['videos'])
232
+ self._results['shorts'].extend(results['shorts'])
233
+ self._results['playlist'].extend(results['playlist'])
234
+ self._results['channel'].extend(results['channel'])
235
+ else:
236
+ self._get_results()
237
+
238
+ def fetch_and_parse(self, continuation=None):
239
+ """Fetch from the innertube API and parse the results.
240
+
241
+ :param str continuation:
242
+ Continuation string for fetching results.
243
+ :rtype: tuple
244
+ :returns:
245
+ A tuple of a list of YouTube objects and a continuation string.
246
+ """
247
+ # Begin by executing the query and identifying the relevant sections
248
+ # of the results
249
+ raw_results = self.fetch_query(continuation,
250
+ # The filter parameter must only be passed in the first API call
251
+ # After the first call, the continuation token already contains the filter
252
+ {'params': self.filter} if self.filter and not continuation else None
253
+ )
254
+
255
+ # Initial result is handled by try block, continuations by except block
256
+ try:
257
+ sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
258
+ 'primaryContents']['sectionListRenderer']['contents']
259
+ except KeyError:
260
+ sections = raw_results['onResponseReceivedCommands'][0][
261
+ 'appendContinuationItemsAction']['continuationItems']
262
+ item_renderer = None
263
+ continuation_renderer = None
264
+ for s in sections:
265
+ if 'itemSectionRenderer' in s:
266
+ item_renderer = s['itemSectionRenderer']
267
+ if 'continuationItemRenderer' in s:
268
+ continuation_renderer = s['continuationItemRenderer']
269
+
270
+ # If the continuationItemRenderer doesn't exist, assume no further results
271
+ if continuation_renderer:
272
+ next_continuation = continuation_renderer['continuationEndpoint'][
273
+ 'continuationCommand']['token']
274
+ else:
275
+ next_continuation = None
276
+
277
+ # If the itemSectionRenderer doesn't exist, assume no results.
278
+ results = {}
279
+ if item_renderer:
280
+ videos = []
281
+ shorts = []
282
+ playlist = []
283
+ channel = []
284
+ raw_video_list = item_renderer['contents']
285
+ for video_details in raw_video_list:
286
+ # Skip over ads
287
+ if video_details.get('searchPyvRenderer', {}).get('ads', None):
288
+ continue
289
+
290
+ # Skip "recommended" type videos e.g. "people also watched" and "popular X"
291
+ # that break up the search results
292
+ if 'shelfRenderer' in video_details:
293
+ continue
294
+
295
+ # Skip auto-generated "mix" playlist results
296
+ if 'radioRenderer' in video_details:
297
+ continue
298
+
299
+ # Skip 'people also searched for' results
300
+ if 'horizontalCardListRenderer' in video_details:
301
+ continue
302
+
303
+ # Can't seem to reproduce, probably related to typo fix suggestions
304
+ if 'didYouMeanRenderer' in video_details:
305
+ continue
306
+
307
+ # Seems to be the renderer used for the image shown on a no results page
308
+ if 'backgroundPromoRenderer' in video_details:
309
+ continue
310
+
311
+ # Get playlist results
312
+ if 'playlistRenderer' in video_details:
313
+ playlist.append(Playlist(f"https://www.youtube.com/playlist?list="
314
+ f"{video_details['playlistRenderer']['playlistId']}",
315
+ client=self.client,
316
+ use_oauth=self.use_oauth,
317
+ allow_oauth_cache=self.allow_oauth_cache,
318
+ token_file=self.token_file,
319
+ oauth_verifier=self.oauth_verifier,
320
+ use_po_token=self.use_po_token,
321
+ po_token_verifier=self.po_token_verifier
322
+ ))
323
+
324
+ # Get channel results
325
+ if 'channelRenderer' in video_details:
326
+ channel.append(Channel(f"https://www.youtube.com/channel/"
327
+ f"{video_details['channelRenderer']['channelId']}",
328
+ client=self.client,
329
+ use_oauth=self.use_oauth,
330
+ allow_oauth_cache=self.allow_oauth_cache,
331
+ token_file=self.token_file,
332
+ oauth_verifier=self.oauth_verifier,
333
+ use_po_token=self.use_po_token,
334
+ po_token_verifier=self.po_token_verifier
335
+ ))
336
+
337
+ # Get shorts results
338
+ if 'reelShelfRenderer' in video_details:
339
+ for items in video_details['reelShelfRenderer']['items']:
340
+ if 'reelItemRenderer' in items:
341
+ video_id = items['reelItemRenderer']['videoId']
342
+ else:
343
+ video_id = items['shortsLockupViewModel']['onTap']['innertubeCommand'][
344
+ 'reelWatchEndpoint']['videoId']
345
+
346
+ shorts.append(YouTube(f"https://www.youtube.com/watch?v={video_id}",
347
+ client=self.client,
348
+ use_oauth=self.use_oauth,
349
+ allow_oauth_cache=self.allow_oauth_cache,
350
+ token_file=self.token_file,
351
+ oauth_verifier=self.oauth_verifier,
352
+ use_po_token=self.use_po_token,
353
+ po_token_verifier=self.po_token_verifier
354
+ ))
355
+
356
+ # Get videos results
357
+ if 'videoRenderer' in video_details:
358
+ videos.append(YouTube(f"https://www.youtube.com/watch?v="
359
+ f"{video_details['videoRenderer']['videoId']}",
360
+ client=self.client,
361
+ use_oauth=self.use_oauth,
362
+ allow_oauth_cache=self.allow_oauth_cache,
363
+ token_file=self.token_file,
364
+ oauth_verifier=self.oauth_verifier,
365
+ use_po_token=self.use_po_token,
366
+ po_token_verifier=self.po_token_verifier
367
+ ))
368
+
369
+ results['videos'] = videos
370
+ results['shorts'] = shorts
371
+ results['playlist'] = playlist
372
+ results['channel'] = channel
373
+
374
+ return results, next_continuation
375
+
376
+ def fetch_query(self, continuation: str = None, filters: dict = None):
377
+ """Fetch raw results from the innertube API.
378
+
379
+ :param str continuation:
380
+ Continuation string for fetching results.
381
+ :param dict filters:
382
+ Parameter encoded in protobuf that contains the search filters.
383
+ :rtype: dict
384
+ :returns:
385
+ The raw json object returned by the innertube API.
386
+ """
387
+ query_results = self._innertube_client.search(self.query, continuation=continuation, data=filters)
388
+ if not self._initial_results:
389
+ self._initial_results = query_results
390
+ return query_results # noqa:R504
391
+
392
+
393
+ class Filter:
394
+ """
395
+ Build filters for YouTube search in protobuf format
396
+ """
397
+
398
+ def __init__(self):
399
+ self.filters = {
400
+ 'upload_date': None,
401
+ 'type': None,
402
+ 'duration': None,
403
+ 'features': [],
404
+ 'sort_by': None
405
+ }
406
+
407
+ def set_filters(self, filter_dict):
408
+ """
409
+ Applies multiple filters at once using a dictionary.
410
+ """
411
+ for category, value in filter_dict.items():
412
+ if category == 'features':
413
+ if isinstance(value, list):
414
+ logger.debug("Filter features is a list")
415
+ self.filters['features'].extend(value)
416
+ else:
417
+ self.filters['features'].append(value)
418
+ else:
419
+ self.filters[category] = value
420
+
421
+ def clear_filters(self):
422
+ """
423
+ Clear all filters
424
+ """
425
+ for category in self.filters:
426
+ if category == 'features':
427
+ self.filters[category] = []
428
+ else:
429
+ self.filters[category] = None
430
+
431
+ def get_filters_params(self):
432
+ """
433
+ Combines selected filters into a final structure
434
+ """
435
+ combined = {}
436
+
437
+ if self.filters['sort_by']:
438
+ combined.update(self.filters['sort_by'])
439
+
440
+ combined[2] = {}
441
+
442
+ if self.filters['type']:
443
+ combined[2].update(self.filters['type'])
444
+
445
+ if self.filters['duration']:
446
+ combined[2].update(self.filters['duration'])
447
+
448
+ if self.filters['features']:
449
+ for feature in self.filters['features']:
450
+ combined[2].update(feature)
451
+
452
+ if self.filters['upload_date']:
453
+ combined[2].update(self.filters['upload_date'])
454
+
455
+ combined[2] = dict(sorted(combined.get(2, {}).items()))
456
+
457
+ logger.debug(f"Combined filters: {combined}")
458
+
459
+ encoded_filters = encode_protobuf(str(combined))
460
+
461
+ logger.debug(f"Filter encoded in protobuf: {encoded_filters}")
462
+
463
+ return encoded_filters
464
+
465
+ @staticmethod
466
+ def get_upload_date(option: str) -> dict:
467
+ """
468
+ Last Hour,
469
+ Today,
470
+ This Week,
471
+ This Month,
472
+ This Year
473
+ """
474
+ filters = {
475
+ "Last Hour": {1: 1},
476
+ "Today": {1: 2},
477
+ "This Week": {1: 3},
478
+ "This Month": {1: 4},
479
+ "This Year": {1: 5},
480
+ }
481
+ return filters.get(option)
482
+
483
+ @staticmethod
484
+ def get_type(option: str) -> dict:
485
+ """
486
+ Video,
487
+ Channel,
488
+ Playlist,
489
+ Movie
490
+ """
491
+ filters = {
492
+ "Video": {2: 1},
493
+ "Channel": {2: 2},
494
+ "Playlist": {2: 3},
495
+ "Movie": {2: 4},
496
+ }
497
+ return filters.get(option)
498
+
499
+ @staticmethod
500
+ def get_duration(option: str) -> dict:
501
+ """
502
+ Under 4 minutes,
503
+ Over 20 minutes,
504
+ 4 - 20 minutes
505
+ """
506
+ filters = {
507
+ "Under 4 minutes": {3: 1},
508
+ "Over 20 minutes": {3: 2},
509
+ "4 - 20 minutes": {3: 3},
510
+ }
511
+ return filters.get(option)
512
+
513
+ @staticmethod
514
+ def get_features(option: str) -> dict:
515
+ """
516
+ Live,
517
+ 4K,
518
+ HD,
519
+ Subtitles/CC,
520
+ Creative Commons,
521
+ 360,
522
+ VR180,
523
+ 3D,
524
+ HDR,
525
+ Location,
526
+ Purchased
527
+ """
528
+ filters = {
529
+ "Live": {8: 1},
530
+ "4K": {14: 1},
531
+ "HD": {4: 1},
532
+ "Subtitles/CC": {5: 1},
533
+ "Creative Commons": {6: 1},
534
+ "360": {15: 1},
535
+ "VR180": {26: 1},
536
+ "3D": {7: 1},
537
+ "HDR": {25: 1},
538
+ "Location": {23: 1},
539
+ "Purchased": {9: 1},
540
+ }
541
+ return filters.get(option)
542
+
543
+ @staticmethod
544
+ def get_sort_by(option: str) -> dict:
545
+ """
546
+ Relevance,
547
+ Upload date,
548
+ View count,
549
+ Rating
550
+ """
551
+ filters = {
552
+ "Relevance": {1: 0},
553
+ "Upload date": {1: 2},
554
+ "View count": {1: 3},
555
+ "Rating": {1: 1},
556
+ }
557
+ return filters.get(option)
pytubefix/exceptions.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Library specific exception definitions."""
2
+ from typing import Pattern, Union
3
+ import logging
4
+
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class PytubeFixError(Exception):
9
+ """Base pytubefix exception that all others inherit.
10
+
11
+ This is done to not pollute the built-in exceptions, which *could* result
12
+ in unintended errors being unexpectedly and incorrectly handled within
13
+ implementers code.
14
+ """
15
+ ### MISC Errors ###
16
+
17
+ class MaxRetriesExceeded(PytubeFixError):
18
+ """Maximum number of retries exceeded."""
19
+
20
+
21
+ class HTMLParseError(PytubeFixError):
22
+ """HTML could not be parsed"""
23
+
24
+
25
+ class ExtractError(PytubeFixError):
26
+ """Data extraction based exception."""
27
+
28
+ class SABRError(PytubeFixError):
29
+ def __init__(self, msg: str):
30
+ self.msg = msg
31
+ super().__init__(self.msg)
32
+
33
+ @property
34
+ def error_string(self):
35
+ return self.msg
36
+
37
+ class RegexMatchError(ExtractError):
38
+ """Regex pattern did not return any matches."""
39
+
40
+ def __init__(self, caller: str, pattern: Union[str, Pattern]):
41
+ """
42
+ :param str caller:
43
+ Calling function
44
+ :param str pattern:
45
+ Pattern that failed to match
46
+ """
47
+ super().__init__(
48
+ f"{caller}: could not find match for {pattern}")
49
+
50
+
51
+ self.caller = caller
52
+ self.pattern = pattern
53
+
54
+
55
+ class InterpretationError(PytubeFixError):
56
+ def __init__(self, js_url: str):
57
+ self.js_url = js_url
58
+ super().__init__(self.error_string)
59
+
60
+ @property
61
+ def error_string(self):
62
+ return f'Error interpreting player js: {self.js_url}'
63
+
64
+ ### Video Unavailable Errors ###
65
+ # There are really 3 types of errors thrown
66
+ # 1. VideoUnavailable - This is the base error type for all video errors.
67
+ # Or a catchall if neither the user or developer cares about the specific error.
68
+ # 2. Known Error Type, Extra info useful for user
69
+ # 3. Unknown Error Type, Important to Developer
70
+
71
+ ## 1. VideoUnavailable ##
72
+
73
+ class VideoUnavailable(PytubeFixError):
74
+ """
75
+ Base video error.
76
+
77
+ This is the base error type for all video errors.
78
+
79
+ Call this if you can't group the error by known error type and it is not important to the developer.
80
+ """
81
+
82
+ def __init__(self, video_id: str):
83
+ """
84
+ :param str video_id:
85
+ A YouTube video identifier.
86
+ """
87
+ self.video_id = video_id
88
+ super().__init__(self.error_string)
89
+
90
+ @property
91
+ def error_string(self):
92
+ return f'{self.video_id} is unavailable'
93
+
94
+ ## 2. Known Error Type, Extra info useful for user ##
95
+
96
+ class VideoPrivate(VideoUnavailable):
97
+ def __init__(self, video_id: str):
98
+ """
99
+ :param str video_id:
100
+ A YouTube video identifier.
101
+ """
102
+ self.video_id = video_id
103
+ super().__init__(self.video_id)
104
+
105
+ @property
106
+ def error_string(self):
107
+ return f'{self.video_id} is a private video'
108
+
109
+
110
+ class MembersOnly(VideoUnavailable):
111
+ """Video is members-only.
112
+
113
+ YouTube has special videos that are only viewable to users who have
114
+ subscribed to a content creator.
115
+ ref: https://support.google.com/youtube/answer/7544492?hl=en
116
+ """
117
+
118
+ def __init__(self, video_id: str):
119
+ """
120
+ :param str video_id:
121
+ A YouTube video identifier.
122
+ """
123
+ self.video_id = video_id
124
+ super().__init__(self.video_id)
125
+
126
+ @property
127
+ def error_string(self):
128
+ return f'{self.video_id} is a members-only video'
129
+
130
+
131
+ class VideoRegionBlocked(VideoUnavailable):
132
+ def __init__(self, video_id: str):
133
+ """
134
+ :param str video_id:
135
+ A YouTube video identifier.
136
+ """
137
+ self.video_id = video_id
138
+ super().__init__(self.video_id)
139
+
140
+ @property
141
+ def error_string(self):
142
+ return f'{self.video_id} is not available in your region'
143
+
144
+ class BotDetection(VideoUnavailable):
145
+ def __init__(self, video_id: str):
146
+ """
147
+ :param str video_id:
148
+ A YouTube video identifier.
149
+ """
150
+ self.video_id = video_id
151
+ super().__init__(self.video_id)
152
+
153
+ @property
154
+ def error_string(self):
155
+ return (
156
+ f'{self.video_id} This request was detected as a bot. Use `use_po_token=True` or switch to WEB client to view. '
157
+ f'See more details at https://github.com/JuanBindez/pytubefix/pull/209')
158
+
159
+
160
+ class PoTokenRequired(VideoUnavailable):
161
+ def __init__(self, video_id: str, client_name: str):
162
+ """
163
+ :param str video_id:
164
+ A YouTube video identifier.
165
+ :param str client_name:
166
+ A YouTube client identifier.
167
+ """
168
+ self.video_id = video_id
169
+ self.client_name = client_name
170
+ super().__init__(self.video_id)
171
+
172
+ @property
173
+ def error_string(self):
174
+ return (
175
+ f'{self.video_id} The {self.client_name} client requires PoToken to obtain functional streams, '
176
+ f'See more details at https://github.com/JuanBindez/pytubefix/pull/209')
177
+
178
+
179
+ class LoginRequired(VideoUnavailable):
180
+ def __init__(self, video_id: str, reason: str):
181
+ """
182
+ :param str video_id:
183
+ A YouTube video identifier.
184
+ """
185
+ self.video_id = video_id
186
+ self.reason = reason
187
+ super().__init__(self.video_id)
188
+
189
+ @property
190
+ def error_string(self):
191
+ return (
192
+ f'{self.video_id} requires login to view, YouTube reason: {self.reason}')
193
+
194
+ # legacy livestream error types still supported
195
+
196
+ class RecordingUnavailable(VideoUnavailable):
197
+ def __init__(self, video_id: str):
198
+ """
199
+ :param str video_id:
200
+ A YouTube video identifier.
201
+ """
202
+ self.video_id = video_id
203
+ super().__init__(self.video_id)
204
+
205
+ @property
206
+ def error_string(self):
207
+ return f'{self.video_id} does not have a live stream recording available'
208
+
209
+
210
+ class LiveStreamError(VideoUnavailable):
211
+ """Video is a live stream."""
212
+
213
+ def __init__(self, video_id: str):
214
+ """
215
+ :param str video_id:
216
+ A YouTube video identifier.
217
+ """
218
+ self.video_id = video_id
219
+ super().__init__(self.video_id)
220
+
221
+ @property
222
+ def error_string(self):
223
+ return f'{self.video_id} is streaming live and cannot be loaded'
224
+
225
+
226
+ class LiveStreamOffline(VideoUnavailable):
227
+ """The live will start soon"""
228
+
229
+ def __init__(self, video_id: str, reason: str):
230
+ """
231
+ :param str video_id:
232
+ A YouTube video identifier.
233
+ :param str reason:
234
+ reason for the error
235
+ """
236
+ self.video_id = video_id
237
+ self.reason = reason
238
+ super().__init__(self.video_id)
239
+
240
+ @property
241
+ def error_string(self):
242
+ return f'{self.video_id} {self.reason}'
243
+
244
+ # legacy age restricted error types still supported
245
+
246
+ class AgeRestrictedError(VideoUnavailable):
247
+ """Video is age restricted, and cannot be accessed without OAuth."""
248
+
249
+ def __init__(self, video_id: str):
250
+ """
251
+ :param str video_id:
252
+ A YouTube video identifier.
253
+ """
254
+ self.video_id = video_id
255
+ super().__init__(self.video_id)
256
+
257
+ @property
258
+ def error_string(self):
259
+ return f"{self.video_id} is age restricted, and can't be accessed without logging in."
260
+
261
+
262
+ class AgeCheckRequiredError(VideoUnavailable):
263
+ def __init__(self, video_id: str):
264
+ """
265
+ :param str video_id:
266
+ A YouTube video identifier.
267
+ """
268
+ self.video_id = video_id
269
+ super().__init__(self.video_id)
270
+
271
+ @property
272
+ def error_string(self):
273
+ return f"{self.video_id} has age restrictions and cannot be accessed without confirmation."
274
+
275
+
276
+ class AgeCheckRequiredAccountError(VideoUnavailable):
277
+ def __init__(self, video_id: str):
278
+ """
279
+ :param str video_id:
280
+ A YouTube video identifier.
281
+ """
282
+ self.video_id = video_id
283
+ super().__init__(self.video_id)
284
+
285
+ @property
286
+ def error_string(self):
287
+ return (
288
+ f"{self.video_id} may be inappropriate for "
289
+ f"some users. Sign in to your primary account to confirm your age.")
290
+
291
+
292
+ class InnerTubeResponseError(VideoUnavailable):
293
+ def __init__(self, video_id: str, client: str):
294
+ """
295
+ :param str video_id:
296
+ A YouTube video identifier.
297
+ """
298
+ self.video_id = video_id
299
+ self.client = client
300
+ super().__init__(self.video_id)
301
+
302
+ @property
303
+ def error_string(self):
304
+ return (
305
+ f"{self.video_id} : {self.client} client did not receive a response from YouTube")
306
+
307
+ ## 3. Unknown Error Type, Important to Developer ##
308
+
309
+
310
+ class UnknownVideoError(VideoUnavailable):
311
+ """Unknown video error."""
312
+
313
+ def __init__(self, video_id: str, status: str = None, reason: str = None, developer_message: str = None):
314
+ """
315
+ :param str video_id:
316
+ A YouTube video identifier.
317
+ :param str status:
318
+ The status code of the response.
319
+ :param str reason:
320
+ The reason for the error.
321
+ :param str developer_message:
322
+ The message from the developer.
323
+ """
324
+ self.video_id = video_id
325
+ self.status = status
326
+ self.reason = reason
327
+ self.developer_message = developer_message
328
+
329
+ logger.warning('Unknown Video Error')
330
+ logger.warning(f'Video ID: {self.video_id}')
331
+ logger.warning(f'Status: {self.status}')
332
+ logger.warning(f'Reason: {self.reason}')
333
+ logger.warning(f'Developer Message: {self.developer_message}')
334
+ logger.warning(
335
+ 'Please open an issue at '
336
+ 'https://github.com/JuanBindez/pytubefix/issues '
337
+ 'and provide the above log output.'
338
+ )
339
+
340
+ super().__init__(self.video_id)
341
+
342
+ @property
343
+ def error_string(self):
344
+ return f'{self.video_id} has an unknown error, check logs for more info [Status: {self.status}] [Reason: {self.reason}]'
pytubefix/extract.py ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This module contains all non-cipher related data extraction logic."""
2
+ import logging
3
+ import urllib.parse
4
+ import re
5
+ from collections import OrderedDict
6
+ from datetime import datetime
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+ from urllib.parse import parse_qs, quote, urlencode, urlparse
9
+
10
+ from pytubefix.cipher import Cipher
11
+ from pytubefix.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
12
+ from pytubefix.helpers import regex_search
13
+ from pytubefix.metadata import YouTubeMetadata
14
+ from pytubefix.parser import parse_for_object, parse_for_all_objects
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def publish_date(watch_html: str):
20
+ """Extract publish date and return it as a datetime object
21
+ :param str watch_html:
22
+ The html contents of the watch page.
23
+ :rtype: datetime
24
+ :returns:
25
+ Publish date of the video as a datetime object with timezone.
26
+ """
27
+ try:
28
+ result = re.search(
29
+ r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}",
30
+ watch_html
31
+ )
32
+ if result:
33
+ return datetime.fromisoformat(result.group(0))
34
+ except AttributeError:
35
+ return None
36
+
37
+
38
+ def recording_available(watch_html):
39
+ """Check if live stream recording is available.
40
+
41
+ :param str watch_html:
42
+ The html contents of the watch page.
43
+ :rtype: bool
44
+ :returns:
45
+ Whether or not the content is private.
46
+ """
47
+ unavailable_strings = [
48
+ 'This live stream recording is not available.'
49
+ ]
50
+ for string in unavailable_strings:
51
+ if string in watch_html:
52
+ return False
53
+ return True
54
+
55
+
56
+ def is_private(watch_html):
57
+ """Check if content is private.
58
+
59
+ :param str watch_html:
60
+ The html contents of the watch page.
61
+ :rtype: bool
62
+ :returns:
63
+ Whether or not the content is private.
64
+ """
65
+ private_strings = [
66
+ "This is a private video. Please sign in to verify that you may see it.",
67
+ "\"simpleText\":\"Private video\"",
68
+ "This video is private."
69
+ ]
70
+ for string in private_strings:
71
+ if string in watch_html:
72
+ return True
73
+ return False
74
+
75
+
76
+ def is_age_restricted(watch_html: str) -> bool:
77
+ """Check if content is age restricted.
78
+
79
+ :param str watch_html:
80
+ The html contents of the watch page.
81
+ :rtype: bool
82
+ :returns:
83
+ Whether or not the content is age restricted.
84
+ """
85
+ try:
86
+ regex_search(r"og:restrictions:age", watch_html, group=0)
87
+ except RegexMatchError:
88
+ return False
89
+ return True
90
+
91
+
92
+ def playability_status(player_response: dict) -> Tuple[Any, Any]:
93
+ """Return the playability status and status explanation of a video.
94
+
95
+ For example, a video may have a status of LOGIN_REQUIRED, and an explanation
96
+ of "This is a private video. Please sign in to verify that you may see it."
97
+
98
+ This explanation is what gets incorporated into the media player overlay.
99
+
100
+ :param str player_response:
101
+ Content of the player's response.
102
+ :rtype: bool
103
+ :returns:
104
+ Playability status and reason of the video.
105
+ """
106
+ status_dict = player_response.get('playabilityStatus', {})
107
+ # if 'liveStreamability' in status_dict:
108
+ # We used liveStreamability to know if the video was live,
109
+ # however some clients still return this parameter even if the video is already available
110
+ if 'videoDetails' in player_response: # Private videos do not contain videoDetails
111
+ if 'isLive' in player_response['videoDetails']:
112
+ return 'LIVE_STREAM', 'Video is a live stream.'
113
+
114
+ if 'status' in status_dict:
115
+ if 'reason' in status_dict:
116
+ return status_dict['status'], [status_dict['reason']]
117
+ if 'messages' in status_dict:
118
+ return status_dict['status'], status_dict['messages']
119
+ return None, [None]
120
+
121
+
122
+ def signature_timestamp(js: str) -> str:
123
+ return regex_search(r"signatureTimestamp:(\d*)", js, group=1)
124
+
125
+
126
+ def visitor_data(response_context: str) -> str:
127
+ return regex_search(r"visitor_data[',\"\s]+value['\"]:\s?['\"]([a-zA-Z0-9_%-]+)['\"]", response_context, group=1)
128
+
129
+
130
+ def video_id(url: str) -> str:
131
+ """Extract the ``video_id`` from a YouTube url.
132
+
133
+ This function supports the following patterns:
134
+
135
+ - :samp:`https://youtube.com/watch?v={video_id}`
136
+ - :samp:`https://youtube.com/embed/{video_id}`
137
+ - :samp:`https://youtu.be/{video_id}`
138
+
139
+ :param str url:
140
+ A YouTube url containing a video id.
141
+ :rtype: str
142
+ :returns:
143
+ YouTube video id.
144
+ """
145
+ return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1)
146
+
147
+
148
+ def playlist_id(url: str) -> str:
149
+ """Extract the ``playlist_id`` from a YouTube url.
150
+
151
+ This function supports the following patterns:
152
+
153
+ - :samp:`https://youtube.com/playlist?list={playlist_id}`
154
+ - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`
155
+
156
+ :param str url:
157
+ A YouTube url containing a playlist id.
158
+ :rtype: str
159
+ :returns:
160
+ YouTube playlist id.
161
+ """
162
+ parsed = urllib.parse.urlparse(url)
163
+ return parse_qs(parsed.query)['list'][0]
164
+
165
+
166
+ def channel_name(url: str) -> str:
167
+ """Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
168
+
169
+ This function supports the following patterns:
170
+
171
+ - :samp:`https://youtube.com/c/{channel_name}/*`
172
+ - :samp:`https://youtube.com/channel/{channel_id}/*
173
+ - :samp:`https://youtube.com/u/{channel_name}/*`
174
+ - :samp:`https://youtube.com/user/{channel_id}/*
175
+ - :samp:`https://youtube.com/@{channel_id}/*
176
+
177
+ :param str url:
178
+ A YouTube url containing a channel name.
179
+ :rtype: str
180
+ :returns:
181
+ YouTube channel name.
182
+ """
183
+ patterns = [
184
+ r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
185
+ r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
186
+ r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
187
+ r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)",
188
+ r"(?:\/(\@)([%\d\w_\-\.]+)(\/.*)?)"
189
+ ]
190
+ for pattern in patterns:
191
+ regex = re.compile(pattern)
192
+ function_match = regex.search(url)
193
+ if function_match:
194
+ logger.debug("finished regex search, matched: %s", pattern)
195
+ uri_style = function_match.group(1)
196
+ uri_identifier = function_match.group(2)
197
+ return f'/{uri_style}/{uri_identifier}' if uri_style != '@' else f'/{uri_style}{uri_identifier}'
198
+
199
+ raise RegexMatchError(
200
+ caller="channel_name", pattern="patterns"
201
+ )
202
+
203
+ def video_info_url(video_id: str, watch_url: str) -> str:
204
+ """Construct the video_info url.
205
+
206
+ :param str video_id:
207
+ A YouTube video identifier.
208
+ :param str watch_url:
209
+ A YouTube watch url.
210
+ :rtype: str
211
+ :returns:
212
+ :samp:`https://youtube.com/get_video_info` with necessary GET
213
+ parameters.
214
+ """
215
+ params = OrderedDict(
216
+ [
217
+ ("video_id", video_id),
218
+ ("ps", "default"),
219
+ ("eurl", quote(watch_url)),
220
+ ("hl", "en_US"),
221
+ ("html5", "1"),
222
+ ("c", "TVHTML5"),
223
+ ("cver", "7.20201028"),
224
+ ]
225
+ )
226
+ return _video_info_url(params)
227
+
228
+
229
+ def video_info_url_age_restricted(video_id: str, embed_html: str) -> str:
230
+ """Construct the video_info url.
231
+
232
+ :param str video_id:
233
+ A YouTube video identifier.
234
+ :param str embed_html:
235
+ The html contents of the embed page (for age restricted videos).
236
+ :rtype: str
237
+ :returns:
238
+ :samp:`https://youtube.com/get_video_info` with necessary GET
239
+ parameters.
240
+ """
241
+ try:
242
+ sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
243
+ except RegexMatchError:
244
+ sts = ""
245
+ # Here we use ``OrderedDict`` so that the output is consistent between
246
+ # Python 2.7+.
247
+ eurl = f"https://youtube.googleapis.com/v/{video_id}"
248
+ params = OrderedDict(
249
+ [
250
+ ("video_id", video_id),
251
+ ("eurl", eurl),
252
+ ("sts", sts),
253
+ ("html5", "1"),
254
+ ("c", "TVHTML5"),
255
+ ("cver", "7.20201028"),
256
+ ]
257
+ )
258
+ return _video_info_url(params)
259
+
260
+
261
+ def _video_info_url(params: OrderedDict) -> str:
262
+ return f"https://www.youtube.com/get_video_info?{urlencode(params)}"
263
+
264
+
265
+ def js_url(html: str) -> str:
266
+ """Get the base JavaScript url.
267
+
268
+ Construct the base JavaScript url, which contains the decipher
269
+ "transforms".
270
+
271
+ :param str html:
272
+ The html contents of the watch page.
273
+ """
274
+ try:
275
+ base_js = get_ytplayer_config(html)['assets']['js']
276
+ except (KeyError, RegexMatchError):
277
+ base_js = get_ytplayer_js(html)
278
+ return f"https://youtube.com{base_js}"
279
+
280
+
281
+ def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
282
+ """Parse the type data.
283
+
284
+ Breaks up the data in the ``type`` key of the manifest, which contains the
285
+ mime type and codecs serialized together, and splits them into separate
286
+ elements.
287
+
288
+ **Example**:
289
+
290
+ mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])
291
+
292
+ :param str mime_type_codec:
293
+ String containing mime type and codecs.
294
+ :rtype: tuple
295
+ :returns:
296
+ The mime type and a list of codecs.
297
+
298
+ """
299
+ pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
300
+ regex = re.compile(pattern)
301
+ results = regex.search(mime_type_codec)
302
+ if not results:
303
+ raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
304
+ mime_type, codecs = results.groups()
305
+ return mime_type, [c.strip() for c in codecs.split(",")]
306
+
307
+
308
+ def get_ytplayer_js(html: str) -> Any:
309
+ """Get the YouTube player base JavaScript path.
310
+
311
+ :param str html
312
+ The html contents of the watch page.
313
+ :rtype: str
314
+ :returns:
315
+ Path to YouTube's base.js file.
316
+ """
317
+ js_url_patterns = [
318
+ r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)"
319
+ ]
320
+ for pattern in js_url_patterns:
321
+ regex = re.compile(pattern)
322
+ function_match = regex.search(html)
323
+ if function_match:
324
+ logger.debug("finished regex search, matched: %s", pattern)
325
+ yt_player_js = function_match.group(1)
326
+ logger.debug("player JS: " + yt_player_js)
327
+ return yt_player_js
328
+
329
+ raise RegexMatchError(
330
+ caller="get_ytplayer_js", pattern="js_url_patterns"
331
+ )
332
+
333
+
334
+ def get_ytplayer_config(html: str) -> Any:
335
+ """Get the YouTube player configuration data from the watch html.
336
+
337
+ Extract the ``ytplayer_config``, which is json data embedded within the
338
+ watch html and serves as the primary source of obtaining the stream
339
+ manifest data.
340
+
341
+ :param str html:
342
+ The html contents of the watch page.
343
+ :rtype: str
344
+ :returns:
345
+ Substring of the html containing the encoded manifest data.
346
+ """
347
+ logger.debug("finding initial function name")
348
+ config_patterns = [
349
+ r"ytplayer\.config\s*=\s*",
350
+ r"ytInitialPlayerResponse\s*=\s*"
351
+ ]
352
+ for pattern in config_patterns:
353
+ # Try each pattern consecutively if they don't find a match
354
+ try:
355
+ return parse_for_object(html, pattern)
356
+ except HTMLParseError as e:
357
+ logger.debug(f'Pattern failed: {pattern}')
358
+ logger.debug(e)
359
+ continue
360
+
361
+ # setConfig() needs to be handled a little differently.
362
+ # We want to parse the entire argument to setConfig()
363
+ # and use then load that as json to find PLAYER_CONFIG
364
+ # inside of it.
365
+ setconfig_patterns = [
366
+ r"yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*"
367
+ ]
368
+ for pattern in setconfig_patterns:
369
+ # Try each pattern consecutively if they don't find a match
370
+ try:
371
+ return parse_for_object(html, pattern)
372
+ except HTMLParseError:
373
+ continue
374
+
375
+ raise RegexMatchError(
376
+ caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns"
377
+ )
378
+
379
+
380
+ def get_ytcfg(html: str) -> str:
381
+ """Get the entirety of the ytcfg object.
382
+
383
+ This is built over multiple pieces, so we have to find all matches and
384
+ combine the dicts together.
385
+
386
+ :param str html:
387
+ The html contents of the watch page.
388
+ :rtype: str
389
+ :returns:
390
+ Substring of the html containing the encoded manifest data.
391
+ """
392
+ ytcfg = {}
393
+ ytcfg_patterns = [
394
+ r"ytcfg\s=\s",
395
+ r"ytcfg\.set\("
396
+ ]
397
+ for pattern in ytcfg_patterns:
398
+ # Try each pattern consecutively and try to build a cohesive object
399
+ try:
400
+ found_objects = parse_for_all_objects(html, pattern)
401
+ for obj in found_objects:
402
+ ytcfg.update(obj)
403
+ except HTMLParseError:
404
+ continue
405
+
406
+ if ytcfg: # there is at least one item
407
+ return ytcfg
408
+
409
+ raise RegexMatchError(
410
+ caller="get_ytcfg", pattern="ytcfg_pattenrs"
411
+ )
412
+
413
+
414
+ def apply_po_token(stream_manifest: Dict, vid_info: Dict, po_token: str) -> None:
415
+ """Apply the proof of origin token to the stream manifest
416
+
417
+ :param dict stream_manifest:
418
+ Details of the media streams available.
419
+ :param str po_token:
420
+ Proof of Origin Token.
421
+ """
422
+ logger.debug(f'Applying poToken')
423
+ for i, stream in enumerate(stream_manifest):
424
+ try:
425
+ url: str = stream["url"]
426
+ except KeyError:
427
+ live_stream = (
428
+ vid_info.get("playabilityStatus", {}, )
429
+ .get("liveStreamability")
430
+ )
431
+ if live_stream:
432
+ raise LiveStreamError("UNKNOWN")
433
+
434
+ parsed_url = urlparse(url)
435
+
436
+ # Convert query params off url to dict
437
+ query_params = parse_qs(urlparse(url).query)
438
+ query_params = {
439
+ k: v[0] for k, v in query_params.items()
440
+ }
441
+
442
+ query_params['pot'] = po_token
443
+
444
+ url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}'
445
+
446
+ stream_manifest[i]["url"] = url
447
+
448
+
449
+ def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str, url_js: str) -> None:
450
+ """Apply the decrypted signature to the stream manifest.
451
+
452
+ :param dict stream_manifest:
453
+ Details of the media streams available.
454
+ :param str js:
455
+ The contents of the base.js asset file.
456
+ :param str url_js:
457
+ Full base.js url
458
+
459
+ """
460
+ cipher = Cipher(js=js, js_url=url_js)
461
+ discovered_n = dict()
462
+ for i, stream in enumerate(stream_manifest):
463
+ try:
464
+ url: str = stream["url"]
465
+ except KeyError:
466
+ live_stream = (
467
+ vid_info.get("playabilityStatus", {}, )
468
+ .get("liveStreamability")
469
+ )
470
+ if live_stream:
471
+ raise LiveStreamError("UNKNOWN")
472
+
473
+ parsed_url = urlparse(url)
474
+
475
+ # Convert query params off url to dict
476
+ query_params = parse_qs(urlparse(url).query)
477
+ query_params = {
478
+ k: v[0] for k, v in query_params.items()
479
+ }
480
+
481
+ # 403 Forbidden fix.
482
+ if "signature" in url or (
483
+ "s" not in stream and ("&sig=" in url or "&lsig=" in url)
484
+ ):
485
+ # For certain videos, YouTube will just provide them pre-signed, in
486
+ # which case there's no real magic to download them and we can skip
487
+ # the whole signature descrambling entirely.
488
+ logger.debug("signature found, skip decipher")
489
+
490
+ else:
491
+ signature = cipher.get_signature(ciphered_signature=stream["s"])
492
+
493
+ logger.debug(
494
+ "finished descrambling signature for itag=%s", stream["itag"]
495
+ )
496
+
497
+ query_params['sig'] = signature
498
+
499
+ if 'n' in query_params.keys():
500
+ # For WEB-based clients, YouTube sends an "n" parameter that throttles download speed.
501
+ # To decipher the value of "n", we must interpret the player's JavaScript.
502
+
503
+ initial_n = query_params['n']
504
+ logger.debug(f'Parameter n is: {initial_n}')
505
+
506
+ # Check if any previous stream decrypted the parameter
507
+ if initial_n not in discovered_n:
508
+ discovered_n[initial_n] = cipher.get_throttling(initial_n)
509
+ else:
510
+ logger.debug('Parameter n found skipping decryption')
511
+
512
+ new_n = discovered_n[initial_n]
513
+ query_params['n'] = new_n
514
+ logger.debug(f'Parameter n deciphered: {new_n}')
515
+
516
+ url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}' # noqa:E501
517
+
518
+ stream_manifest[i]["url"] = url
519
+
520
+
521
+ def apply_descrambler(stream_data: Dict) -> Optional[List[Dict]]:
522
+ """Apply various in-place transforms to YouTube's media stream data.
523
+
524
+ Creates a ``list`` of dictionaries by string splitting on commas, then
525
+ taking each list item, parsing it as a query string, converting it to a
526
+ ``dict`` and unquoting the value.
527
+
528
+ :param dict stream_data:
529
+ Dictionary containing query string encoded values.
530
+
531
+ **Example**:
532
+
533
+ >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
534
+ >>> apply_descrambler(d, 'foo')
535
+ >>> print(d)
536
+ {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
537
+
538
+ """
539
+ if 'url' in stream_data:
540
+ return None
541
+
542
+ # Merge formats and adaptiveFormats into a single list
543
+ formats: list[Dict] = []
544
+ if 'formats' in stream_data.keys():
545
+ formats.extend(stream_data['formats'])
546
+ if 'adaptiveFormats' in stream_data.keys():
547
+ formats.extend(stream_data['adaptiveFormats'])
548
+
549
+ # Extract url and s from signatureCiphers as necessary
550
+ for data in formats:
551
+ if 'url' not in data and 'signatureCipher' in data:
552
+ cipher_url = parse_qs(data['signatureCipher'])
553
+ data['url'] = cipher_url['url'][0]
554
+ data['s'] = cipher_url['s'][0]
555
+ data['is_sabr'] = False
556
+ elif 'url' not in data and 'signatureCipher' not in data:
557
+ data['url'] = stream_data['serverAbrStreamingUrl']
558
+ data['is_sabr'] = True
559
+ data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
560
+
561
+ logger.debug("applying descrambler")
562
+ return formats
563
+
564
+
565
+ def initial_data(watch_html: str) -> dict:
566
+ """Extract the ytInitialData json from the watch_html page.
567
+
568
+ This mostly contains metadata necessary for rendering the page on-load,
569
+ such as video information, copyright notices, etc.
570
+
571
+ @param watch_html: Html of the watch page
572
+ @return:
573
+ """
574
+ patterns = [
575
+ r"window\[['\"]ytInitialData['\"]]\s*=\s*",
576
+ r"ytInitialData\s*=\s*"
577
+ ]
578
+ for pattern in patterns:
579
+ try:
580
+ return parse_for_object(watch_html, pattern)
581
+ except HTMLParseError:
582
+ pass
583
+
584
+ raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
585
+
586
+
587
+ def initial_player_response(watch_html: str) -> str:
588
+ """Extract the ytInitialPlayerResponse json from the watch_html page.
589
+
590
+ This mostly contains metadata necessary for rendering the page on-load,
591
+ such as video information, copyright notices, etc.
592
+
593
+ @param watch_html: Html of the watch page
594
+ @return:
595
+ """
596
+ patterns = [
597
+ r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*",
598
+ r"ytInitialPlayerResponse\s*=\s*"
599
+ ]
600
+ for pattern in patterns:
601
+ try:
602
+ return parse_for_object(watch_html, pattern)
603
+ except HTMLParseError:
604
+ pass
605
+
606
+ raise RegexMatchError(
607
+ caller='initial_player_response',
608
+ pattern='initial_player_response_pattern'
609
+ )
610
+
611
+
612
+ def metadata(initial_data) -> Optional[YouTubeMetadata]:
613
+ """Get the informational metadata for the video.
614
+
615
+ e.g.:
616
+ [
617
+ {
618
+ 'Song': '강남스타일(Gangnam Style)',
619
+ 'Artist': 'PSY',
620
+ 'Album': 'PSY SIX RULES Pt.1',
621
+ 'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
622
+ }
623
+ ]
624
+
625
+ :rtype: YouTubeMetadata
626
+ """
627
+ try:
628
+ metadata_rows: List = initial_data["contents"]["twoColumnWatchNextResults"][
629
+ "results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"][
630
+ "metadataRowContainer"]["metadataRowContainerRenderer"]["rows"]
631
+ except (KeyError, IndexError):
632
+ # If there's an exception accessing this data, it probably doesn't exist.
633
+ return YouTubeMetadata([])
634
+
635
+ # Rows appear to only have "metadataRowRenderer" or "metadataRowHeaderRenderer"
636
+ # and we only care about the former, so we filter the others
637
+ metadata_rows = filter(
638
+ lambda x: "metadataRowRenderer" in x.keys(),
639
+ metadata_rows
640
+ )
641
+
642
+ # We then access the metadataRowRenderer key in each element
643
+ # and build a metadata object from this new list
644
+ metadata_rows = [x["metadataRowRenderer"] for x in metadata_rows]
645
+
646
+ return YouTubeMetadata(metadata_rows)
pytubefix/file_system.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ windows = ['Windows', 'NTFS', 'FAT32', 'exFAT', 'ReFS']
3
+ linux = ['Linux', 'ext2', 'ext3', 'ext4', 'Btrfs', 'XFS', 'ZFS']
4
+ macOS = ['macOS', 'APFS', 'HFS+']
5
+ bsd_unix = ['BSD', 'UFS']
6
+ network_filesystems = ['CIFS', 'SMB']
7
+
8
+
9
+ windows_translation = str.maketrans({
10
+ '\\': '',
11
+ '/': '',
12
+ '?': '',
13
+ ':': '',
14
+ '*': '',
15
+ '"': '',
16
+ '<': '',
17
+ '>': '',
18
+ '|': '',
19
+ })
20
+
21
+ linux_translation = str.maketrans({
22
+ '/': '',
23
+ })
24
+
25
+ macos_translation = str.maketrans({
26
+ '/': '',
27
+ })
28
+
29
+ bsd_translation = str.maketrans({
30
+ '/': '',
31
+ })
32
+
33
+ network_filesystems_translation = str.maketrans({
34
+ '\\': '',
35
+ '/': '',
36
+ '?': '',
37
+ ':': '',
38
+ '*': '',
39
+ '"': '',
40
+ '<': '',
41
+ '>': '',
42
+ '|': '',
43
+ })
44
+
45
+ def file_system_verify(file_type) -> dict:
46
+ """
47
+ Returns a translation table to remove invalid characters for a specified file system type.
48
+
49
+ This function identifies the file system type and returns a translation table for removing
50
+ characters that are not allowed in filenames for that specific file system.
51
+
52
+ Args:
53
+ file_type (str): The type of file system being checked. Supported file systems include:
54
+ - Windows: NTFS, FAT32, exFAT, ReFS
55
+ - Linux: ext2, ext3, ext4, Btrfs, XFS, ZFS
56
+ - macOS: APFS, HFS+
57
+ - BSD/UNIX: UFS
58
+ - Network Filesystems: CIFS, SMB
59
+
60
+ Returns:
61
+ dict: A translation table where invalid characters are mapped to an empty string.
62
+
63
+ Example:
64
+ >>> ys = yt.streams.get_highest_resolution()
65
+ >>> ys.download(file_system='ext4')
66
+
67
+ Raises:
68
+ None, but prints a message if the file system type is not recognized.
69
+ """
70
+
71
+ if file_type in windows:
72
+ return windows_translation
73
+ elif file_type in linux:
74
+ return linux_translation
75
+ elif file_type in macOS:
76
+ return macos_translation
77
+ elif file_type in bsd_unix:
78
+ return bsd_translation
79
+ elif file_type in network_filesystems:
80
+ return network_filesystems_translation