Taylor Fox Dahlin commited on
Improve metadata availability (#988)
Browse files* Added channel id and channel url properties to YouTube object.
* Added some metadata to playlist object:
- owner
- owner_id
- owner_url
- description
- length
- views
- pytube/__main__.py +14 -0
- pytube/contrib/playlist.py +98 -13
- tests/contrib/test_playlist.py +67 -11
- tests/test_main.py +8 -8
pytube/__main__.py
CHANGED
|
@@ -422,6 +422,20 @@ class YouTube:
|
|
| 422 |
"""
|
| 423 |
return self.player_response.get('videoDetails', {}).get('keywords', [])
|
| 424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
@property
|
| 426 |
def metadata(self) -> Optional[YouTubeMetadata]:
|
| 427 |
"""Get the metadata for the video.
|
|
|
|
| 422 |
"""
|
| 423 |
return self.player_response.get('videoDetails', {}).get('keywords', [])
|
| 424 |
|
| 425 |
+
@property
|
| 426 |
+
def channel_id(self) -> str:
|
| 427 |
+
"""Get the video poster's channel id.
|
| 428 |
+
:rtype: str
|
| 429 |
+
"""
|
| 430 |
+
return self.player_response.get('videoDetails', {}).get('channelId', None)
|
| 431 |
+
|
| 432 |
+
@property
|
| 433 |
+
def channel_url(self) -> str:
|
| 434 |
+
"""Construct the channel url for the video's poster from the channel id.
|
| 435 |
+
:rtype: str
|
| 436 |
+
"""
|
| 437 |
+
return f'https://www.youtube.com/channel/{self.channel_id}'
|
| 438 |
+
|
| 439 |
@property
|
| 440 |
def metadata(self) -> Optional[YouTubeMetadata]:
|
| 441 |
"""Get the metadata for the video.
|
pytube/contrib/playlist.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
"""Module to download a complete playlist from a youtube channel."""
|
| 2 |
import json
|
| 3 |
import logging
|
| 4 |
-
import re
|
| 5 |
from collections.abc import Sequence
|
| 6 |
from datetime import date, datetime
|
| 7 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
| 8 |
|
| 9 |
from pytube import extract, request, YouTube
|
| 10 |
-
from pytube.helpers import cache, DeferredGeneratorList, install_proxy,
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
@@ -24,6 +23,8 @@ class Playlist(Sequence):
|
|
| 24 |
# These need to be initialized as None for the properties.
|
| 25 |
self._html = None
|
| 26 |
self._ytcfg = None
|
|
|
|
|
|
|
| 27 |
|
| 28 |
self._playlist_id = None
|
| 29 |
|
|
@@ -52,6 +53,23 @@ class Playlist(Sequence):
|
|
| 52 |
self._ytcfg = extract.get_ytcfg(self.html)
|
| 53 |
return self._ytcfg
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
@property
|
| 56 |
def yt_api_key(self):
|
| 57 |
return self.ytcfg['INNERTUBE_API_KEY']
|
|
@@ -271,15 +289,20 @@ class Playlist(Sequence):
|
|
| 271 |
@property
|
| 272 |
@cache
|
| 273 |
def last_updated(self) -> Optional[date]:
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
@property
|
| 285 |
@cache
|
|
@@ -289,8 +312,70 @@ class Playlist(Sequence):
|
|
| 289 |
:return: playlist title (name)
|
| 290 |
:rtype: Optional[str]
|
| 291 |
"""
|
| 292 |
-
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
@staticmethod
|
| 296 |
def _video_url(watch_path: str):
|
|
|
|
| 1 |
"""Module to download a complete playlist from a youtube channel."""
|
| 2 |
import json
|
| 3 |
import logging
|
|
|
|
| 4 |
from collections.abc import Sequence
|
| 5 |
from datetime import date, datetime
|
| 6 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
| 7 |
|
| 8 |
from pytube import extract, request, YouTube
|
| 9 |
+
from pytube.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
|
|
|
| 23 |
# These need to be initialized as None for the properties.
|
| 24 |
self._html = None
|
| 25 |
self._ytcfg = None
|
| 26 |
+
self._initial_data = None
|
| 27 |
+
self._sidebar_info = None
|
| 28 |
|
| 29 |
self._playlist_id = None
|
| 30 |
|
|
|
|
| 53 |
self._ytcfg = extract.get_ytcfg(self.html)
|
| 54 |
return self._ytcfg
|
| 55 |
|
| 56 |
+
@property
|
| 57 |
+
def initial_data(self):
|
| 58 |
+
if self._initial_data:
|
| 59 |
+
return self._initial_data
|
| 60 |
+
else:
|
| 61 |
+
self._initial_data = extract.initial_data(self.html)
|
| 62 |
+
return self._initial_data
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def sidebar_info(self):
|
| 66 |
+
if self._sidebar_info:
|
| 67 |
+
return self._sidebar_info
|
| 68 |
+
else:
|
| 69 |
+
self._sidebar_info = self.initial_data['sidebar'][
|
| 70 |
+
'playlistSidebarRenderer']['items']
|
| 71 |
+
return self._sidebar_info
|
| 72 |
+
|
| 73 |
@property
|
| 74 |
def yt_api_key(self):
|
| 75 |
return self.ytcfg['INNERTUBE_API_KEY']
|
|
|
|
| 289 |
@property
|
| 290 |
@cache
|
| 291 |
def last_updated(self) -> Optional[date]:
|
| 292 |
+
"""Extract the date that the playlist was last updated.
|
| 293 |
+
|
| 294 |
+
:return: Date of last playlist update
|
| 295 |
+
:rtype: datetime.date
|
| 296 |
+
"""
|
| 297 |
+
last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 298 |
+
'stats'][2]['runs'][1]['text']
|
| 299 |
+
date_components = last_updated_text.split()
|
| 300 |
+
month = date_components[0]
|
| 301 |
+
day = date_components[1].strip(',')
|
| 302 |
+
year = date_components[2]
|
| 303 |
+
return datetime.strptime(
|
| 304 |
+
f"{month} {day:0>2} {year}", "%b %d %Y"
|
| 305 |
+
).date()
|
| 306 |
|
| 307 |
@property
|
| 308 |
@cache
|
|
|
|
| 312 |
:return: playlist title (name)
|
| 313 |
:rtype: Optional[str]
|
| 314 |
"""
|
| 315 |
+
return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 316 |
+
'title']['runs'][0]['text']
|
| 317 |
+
|
| 318 |
+
@property
|
| 319 |
+
def description(self) -> str:
|
| 320 |
+
return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 321 |
+
'description']['simpleText']
|
| 322 |
+
|
| 323 |
+
@property
|
| 324 |
+
def length(self):
|
| 325 |
+
"""Extract the number of videos in the playlist.
|
| 326 |
+
|
| 327 |
+
:return: Playlist video count
|
| 328 |
+
:rtype: int
|
| 329 |
+
"""
|
| 330 |
+
count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 331 |
+
'stats'][0]['runs'][0]['text']
|
| 332 |
+
return int(count_text)
|
| 333 |
+
|
| 334 |
+
@property
|
| 335 |
+
def views(self):
|
| 336 |
+
"""Extract view count for playlist.
|
| 337 |
+
|
| 338 |
+
:return: Playlist view count
|
| 339 |
+
:rtype: int
|
| 340 |
+
"""
|
| 341 |
+
# "1,234,567 views"
|
| 342 |
+
views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
|
| 343 |
+
'stats'][1]['simpleText']
|
| 344 |
+
# "1,234,567"
|
| 345 |
+
count_text = views_text.split()[0]
|
| 346 |
+
# "1234567"
|
| 347 |
+
count_text = count_text.replace(',', '')
|
| 348 |
+
return int(count_text)
|
| 349 |
+
|
| 350 |
+
@property
|
| 351 |
+
def owner(self):
|
| 352 |
+
"""Extract the owner of the playlist.
|
| 353 |
+
|
| 354 |
+
:return: Playlist owner name.
|
| 355 |
+
:rtype: str
|
| 356 |
+
"""
|
| 357 |
+
return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
|
| 358 |
+
'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
|
| 359 |
+
|
| 360 |
+
@property
|
| 361 |
+
def owner_id(self):
|
| 362 |
+
"""Extract the channel_id of the owner of the playlist.
|
| 363 |
+
|
| 364 |
+
:return: Playlist owner's channel ID.
|
| 365 |
+
:rtype: str
|
| 366 |
+
"""
|
| 367 |
+
return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
|
| 368 |
+
'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
|
| 369 |
+
'navigationEndpoint']['browseEndpoint']['browseId']
|
| 370 |
+
|
| 371 |
+
@property
|
| 372 |
+
def owner_url(self):
|
| 373 |
+
"""Create the channel url of the owner of the playlist.
|
| 374 |
+
|
| 375 |
+
:return: Playlist owner's channel url.
|
| 376 |
+
:rtype: str
|
| 377 |
+
"""
|
| 378 |
+
return f'https://www.youtube.com/channel/{self.owner_id}'
|
| 379 |
|
| 380 |
@staticmethod
|
| 381 |
def _video_url(watch_path: str):
|
tests/contrib/test_playlist.py
CHANGED
|
@@ -5,11 +5,8 @@ from pytube import Playlist
|
|
| 5 |
|
| 6 |
|
| 7 |
@mock.patch("pytube.request.get")
|
| 8 |
-
def test_title(request_get):
|
| 9 |
-
request_get.return_value =
|
| 10 |
-
"<title>(149) Python Tutorial for Beginners "
|
| 11 |
-
"(For Absolute Beginners) - YouTube</title>"
|
| 12 |
-
)
|
| 13 |
url = (
|
| 14 |
"https://www.fakeurl.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ"
|
| 15 |
"-ghgoSH6n"
|
|
@@ -18,7 +15,7 @@ def test_title(request_get):
|
|
| 18 |
pl_title = pl.title
|
| 19 |
assert (
|
| 20 |
pl_title
|
| 21 |
-
== "
|
| 22 |
)
|
| 23 |
|
| 24 |
|
|
@@ -48,9 +45,9 @@ def test_init_with_watch_url(request_get):
|
|
| 48 |
|
| 49 |
|
| 50 |
@mock.patch("pytube.request.get")
|
| 51 |
-
def test_last_updated(request_get,
|
| 52 |
-
expected = datetime.date(2020,
|
| 53 |
-
request_get.return_value =
|
| 54 |
playlist = Playlist(
|
| 55 |
"https://www.youtube.com/playlist?list"
|
| 56 |
"=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n"
|
|
@@ -251,8 +248,7 @@ def test_trimmed_pagination_not_found(
|
|
| 251 |
|
| 252 |
# test case for playlist with submenus
|
| 253 |
@mock.patch("pytube.request.get")
|
| 254 |
-
def test_playlist_submenu(
|
| 255 |
-
request_get, playlist_submenu_html):
|
| 256 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
| 257 |
request_get.side_effect = [
|
| 258 |
playlist_submenu_html,
|
|
@@ -264,3 +260,63 @@ def test_playlist_submenu(
|
|
| 264 |
]
|
| 265 |
playlist = Playlist(url)
|
| 266 |
assert len(playlist.video_urls) == 12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
@mock.patch("pytube.request.get")
|
| 8 |
+
def test_title(request_get, playlist_long_html):
|
| 9 |
+
request_get.return_value = playlist_long_html
|
|
|
|
|
|
|
|
|
|
| 10 |
url = (
|
| 11 |
"https://www.fakeurl.com/playlist?list=PLS1QulWo1RIaJECMeUT4LFwJ"
|
| 12 |
"-ghgoSH6n"
|
|
|
|
| 15 |
pl_title = pl.title
|
| 16 |
assert (
|
| 17 |
pl_title
|
| 18 |
+
== "Python Tutorial for Beginners (For Absolute Beginners)"
|
| 19 |
)
|
| 20 |
|
| 21 |
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
@mock.patch("pytube.request.get")
|
| 48 |
+
def test_last_updated(request_get, playlist_long_html):
|
| 49 |
+
expected = datetime.date(2020, 10, 8)
|
| 50 |
+
request_get.return_value = playlist_long_html
|
| 51 |
playlist = Playlist(
|
| 52 |
"https://www.youtube.com/playlist?list"
|
| 53 |
"=PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n"
|
|
|
|
| 248 |
|
| 249 |
# test case for playlist with submenus
|
| 250 |
@mock.patch("pytube.request.get")
|
| 251 |
+
def test_playlist_submenu(request_get, playlist_submenu_html):
|
|
|
|
| 252 |
url = "https://www.fakeurl.com/playlist?list=whatever"
|
| 253 |
request_get.side_effect = [
|
| 254 |
playlist_submenu_html,
|
|
|
|
| 260 |
]
|
| 261 |
playlist = Playlist(url)
|
| 262 |
assert len(playlist.video_urls) == 12
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
@mock.patch("pytube.request.get")
|
| 266 |
+
def test_playlist_length(request_get, playlist_long_html):
|
| 267 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
| 268 |
+
request_get.return_value = playlist_long_html
|
| 269 |
+
p = Playlist(url)
|
| 270 |
+
assert p.length == 217
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
@mock.patch("pytube.request.get")
|
| 274 |
+
def test_playlist_description(request_get, playlist_long_html):
|
| 275 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
| 276 |
+
request_get.return_value = playlist_long_html
|
| 277 |
+
p = Playlist(url)
|
| 278 |
+
assert p.description == (
|
| 279 |
+
'Python Object Oriented - Learning Python in '
|
| 280 |
+
"simple and easy steps ,python,xml,script,install, A beginner's "
|
| 281 |
+
'tutorial containing complete knowledge of Python Syntax Object '
|
| 282 |
+
'Oriented Language, Methods, Tuples,Learn,Python,Tutorial,Interactive,'
|
| 283 |
+
'Free, Tools/Utilities,Getting the most popular pages from your Apache'
|
| 284 |
+
' logfile,Make your life easier with Virtualenvwrapper,This site now '
|
| 285 |
+
'runs on Django,PythonForBeginners.com has a new owner,How to use '
|
| 286 |
+
'Pillow, a fork of PIL,How to use the Python Imaging Library,Python '
|
| 287 |
+
'Websites and Tutorials,How to use Envoy,Using Feedparser in Python,'
|
| 288 |
+
'Subprocess and Shell Commands in Python, Exceptions Handling, '
|
| 289 |
+
'Sockets, GUI, Extentions, XML Programming'
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
@mock.patch("pytube.request.get")
|
| 294 |
+
def test_playlist_views(request_get, playlist_long_html):
|
| 295 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
| 296 |
+
request_get.return_value = playlist_long_html
|
| 297 |
+
p = Playlist(url)
|
| 298 |
+
assert p.views == 4617130
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
@mock.patch("pytube.request.get")
|
| 302 |
+
def test_playlist_owner(request_get, playlist_long_html):
|
| 303 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
| 304 |
+
request_get.return_value = playlist_long_html
|
| 305 |
+
p = Playlist(url)
|
| 306 |
+
assert p.owner == 'ProgrammingKnowledge'
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
@mock.patch("pytube.request.get")
|
| 310 |
+
def test_playlist_owner_id(request_get, playlist_long_html):
|
| 311 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
| 312 |
+
request_get.return_value = playlist_long_html
|
| 313 |
+
p = Playlist(url)
|
| 314 |
+
assert p.owner_id == 'UCs6nmQViDpUw0nuIx9c_WvA'
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
@mock.patch("pytube.request.get")
|
| 318 |
+
def test_playlist_owner_url(request_get, playlist_long_html):
|
| 319 |
+
url = 'https://www.example.com/playlist?list=whatever'
|
| 320 |
+
request_get.return_value = playlist_long_html
|
| 321 |
+
p = Playlist(url)
|
| 322 |
+
assert p.owner_url == 'https://www.youtube.com/channel/UCs6nmQViDpUw0nuIx9c_WvA'
|
tests/test_main.py
CHANGED
|
@@ -7,14 +7,6 @@ from pytube import YouTube
|
|
| 7 |
from pytube.exceptions import RegexMatchError
|
| 8 |
|
| 9 |
|
| 10 |
-
@mock.patch("pytube.__main__.YouTube")
|
| 11 |
-
def test_prefetch_deferred(youtube):
|
| 12 |
-
instance = youtube.return_value
|
| 13 |
-
instance.prefetch_descramble.return_value = None
|
| 14 |
-
YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
|
| 15 |
-
assert not instance.prefetch_descramble.called
|
| 16 |
-
|
| 17 |
-
|
| 18 |
@mock.patch("urllib.request.install_opener")
|
| 19 |
def test_install_proxy(opener):
|
| 20 |
proxies = {"http": "http://www.example.com:3128/"}
|
|
@@ -58,3 +50,11 @@ def test_js_caching(cipher_signature):
|
|
| 58 |
assert pytube.__js_url__ is not None
|
| 59 |
assert pytube.__js__ == cipher_signature.js
|
| 60 |
assert pytube.__js_url__ == cipher_signature.js_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from pytube.exceptions import RegexMatchError
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
@mock.patch("urllib.request.install_opener")
|
| 11 |
def test_install_proxy(opener):
|
| 12 |
proxies = {"http": "http://www.example.com:3128/"}
|
|
|
|
| 50 |
assert pytube.__js_url__ is not None
|
| 51 |
assert pytube.__js__ == cipher_signature.js
|
| 52 |
assert pytube.__js_url__ == cipher_signature.js_url
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_channel_id(cipher_signature):
|
| 56 |
+
assert cipher_signature.channel_id == 'UCBR8-60-B28hp2BmDPdntcQ'
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def test_channel_url(cipher_signature):
|
| 60 |
+
assert cipher_signature.channel_url == 'https://www.youtube.com/channel/UCBR8-60-B28hp2BmDPdntcQ' # noqa:E501
|