Taylor Fox Dahlin commited on
Initial implementation of Channel object (#932)
Browse files* Implements a Channel object for downloading videos from a YouTube channel.
* Minor changes to the playlist class to make it more compatible to be subclassed.
* `.videos` and `.video_urls` now behave just like iterable lists, but defer web requests.
* Implements DeferredGeneratorList which converts generators to lazy list-like objects.
- pytube/__init__.py +1 -0
- pytube/contrib/channel.py +137 -0
- pytube/contrib/playlist.py +34 -15
- pytube/extract.py +31 -0
- pytube/helpers.py +95 -0
- tests/conftest.py +20 -3
- tests/contrib/test_channel.py +54 -0
- tests/mocks/channel-videos.html.gz +0 -0
pytube/__init__.py
CHANGED
|
@@ -15,3 +15,4 @@ from pytube.captions import Caption
|
|
| 15 |
from pytube.query import CaptionQuery, StreamQuery
|
| 16 |
from pytube.__main__ import YouTube
|
| 17 |
from pytube.contrib.playlist import Playlist
|
|
|
|
|
|
| 15 |
from pytube.query import CaptionQuery, StreamQuery
|
| 16 |
from pytube.__main__ import YouTube
|
| 17 |
from pytube.contrib.playlist import Playlist
|
| 18 |
+
from pytube.contrib.channel import Channel
|
pytube/contrib/channel.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""Module for interacting with a user's youtube channel."""
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Dict, List, Optional, Tuple
|
| 6 |
+
|
| 7 |
+
from pytube import extract, Playlist, request
|
| 8 |
+
from pytube.helpers import uniqueify
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Channel(Playlist):
|
| 14 |
+
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
|
| 15 |
+
super().__init__(url, proxies)
|
| 16 |
+
|
| 17 |
+
self.channel_name = extract.channel_name(url)
|
| 18 |
+
|
| 19 |
+
self.channel_url = (
|
| 20 |
+
f"https://www.youtube.com/c/{self.channel_name}"
|
| 21 |
+
)
|
| 22 |
+
self.videos_url = self.channel_url + '/videos'
|
| 23 |
+
self.playlists_url = self.channel_url + '/playlists'
|
| 24 |
+
self.community_url = self.channel_url + '/community'
|
| 25 |
+
self.featured_channels_url = self.channel_url + '/channels'
|
| 26 |
+
self.about_url = self.channel_url + '/about'
|
| 27 |
+
|
| 28 |
+
# Possible future additions
|
| 29 |
+
self._playlists_html = None
|
| 30 |
+
self._community_html = None
|
| 31 |
+
self._featured_channels_html = None
|
| 32 |
+
self._about_html = None
|
| 33 |
+
|
| 34 |
+
@property
|
| 35 |
+
def html(self):
|
| 36 |
+
if self._html:
|
| 37 |
+
return self._html
|
| 38 |
+
self._html = request.get(self.videos_url)
|
| 39 |
+
return self._html
|
| 40 |
+
|
| 41 |
+
@property
|
| 42 |
+
def playlists_html(self):
|
| 43 |
+
if self._playlists_html:
|
| 44 |
+
return self._playlists_html
|
| 45 |
+
else:
|
| 46 |
+
self._playlists_html = request.get(self.playlists_url)
|
| 47 |
+
return self._playlists_html
|
| 48 |
+
|
| 49 |
+
@property
|
| 50 |
+
def community_html(self):
|
| 51 |
+
if self._community_html:
|
| 52 |
+
return self._community_html
|
| 53 |
+
else:
|
| 54 |
+
self._community_html = request.get(self.community_url)
|
| 55 |
+
return self._community_html
|
| 56 |
+
|
| 57 |
+
@property
|
| 58 |
+
def featured_channels_html(self):
|
| 59 |
+
if self._featured_channels_html:
|
| 60 |
+
return self._featured_channels_html
|
| 61 |
+
else:
|
| 62 |
+
self._featured_channels_html = request.get(self.featured_channels_url)
|
| 63 |
+
return self._featured_channels_html
|
| 64 |
+
|
| 65 |
+
@property
|
| 66 |
+
def about_html(self):
|
| 67 |
+
if self._about_html:
|
| 68 |
+
return self._about_html
|
| 69 |
+
else:
|
| 70 |
+
self._about_html = request.get(self.about_url)
|
| 71 |
+
return self._about_html
|
| 72 |
+
|
| 73 |
+
@staticmethod
|
| 74 |
+
def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
|
| 75 |
+
"""Extracts videos from a raw json page
|
| 76 |
+
|
| 77 |
+
:param str raw_json: Input json extracted from the page or the last
|
| 78 |
+
server response
|
| 79 |
+
:rtype: Tuple[List[str], Optional[str]]
|
| 80 |
+
:returns: Tuple containing a list of up to 100 video watch ids and
|
| 81 |
+
a continuation token, if more videos are available
|
| 82 |
+
"""
|
| 83 |
+
initial_data = json.loads(raw_json)
|
| 84 |
+
# this is the json tree structure, if the json was extracted from
|
| 85 |
+
# html
|
| 86 |
+
try:
|
| 87 |
+
videos = initial_data["contents"][
|
| 88 |
+
"twoColumnBrowseResultsRenderer"][
|
| 89 |
+
"tabs"][1]["tabRenderer"]["content"][
|
| 90 |
+
"sectionListRenderer"]["contents"][0][
|
| 91 |
+
"itemSectionRenderer"]["contents"][0][
|
| 92 |
+
"gridRenderer"]["items"]
|
| 93 |
+
except (KeyError, IndexError, TypeError):
|
| 94 |
+
try:
|
| 95 |
+
# this is the json tree structure, if the json was directly sent
|
| 96 |
+
# by the server in a continuation response
|
| 97 |
+
important_content = initial_data[1]['response']['onResponseReceivedActions'][
|
| 98 |
+
0
|
| 99 |
+
]['appendContinuationItemsAction']['continuationItems']
|
| 100 |
+
videos = important_content
|
| 101 |
+
except (KeyError, IndexError, TypeError):
|
| 102 |
+
try:
|
| 103 |
+
# this is the json tree structure, if the json was directly sent
|
| 104 |
+
# by the server in a continuation response
|
| 105 |
+
# no longer a list and no longer has the "response" key
|
| 106 |
+
important_content = initial_data['onResponseReceivedActions'][0][
|
| 107 |
+
'appendContinuationItemsAction']['continuationItems']
|
| 108 |
+
videos = important_content
|
| 109 |
+
except (KeyError, IndexError, TypeError) as p:
|
| 110 |
+
logger.info(p)
|
| 111 |
+
return [], None
|
| 112 |
+
|
| 113 |
+
try:
|
| 114 |
+
continuation = videos[-1]['continuationItemRenderer'][
|
| 115 |
+
'continuationEndpoint'
|
| 116 |
+
]['continuationCommand']['token']
|
| 117 |
+
videos = videos[:-1]
|
| 118 |
+
except (KeyError, IndexError):
|
| 119 |
+
# if there is an error, no continuation is available
|
| 120 |
+
continuation = None
|
| 121 |
+
|
| 122 |
+
# remove duplicates
|
| 123 |
+
return (
|
| 124 |
+
uniqueify(
|
| 125 |
+
list(
|
| 126 |
+
# only extract the video ids from the video data
|
| 127 |
+
map(
|
| 128 |
+
lambda x: (
|
| 129 |
+
f"/watch?v="
|
| 130 |
+
f"{x['gridVideoRenderer']['videoId']}"
|
| 131 |
+
),
|
| 132 |
+
videos
|
| 133 |
+
)
|
| 134 |
+
),
|
| 135 |
+
),
|
| 136 |
+
continuation,
|
| 137 |
+
)
|
pytube/contrib/playlist.py
CHANGED
|
@@ -7,7 +7,7 @@ from datetime import date, datetime
|
|
| 7 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
| 8 |
|
| 9 |
from pytube import extract, request, YouTube
|
| 10 |
-
from pytube.helpers import cache, install_proxy, regex_search, uniqueify
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
@@ -19,15 +19,24 @@ class Playlist(Sequence):
|
|
| 19 |
if proxies:
|
| 20 |
install_proxy(proxies)
|
| 21 |
|
|
|
|
|
|
|
| 22 |
# These need to be initialized as None for the properties.
|
| 23 |
self._html = None
|
| 24 |
self._ytcfg = None
|
| 25 |
|
| 26 |
-
self.
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
@property
|
| 33 |
def html(self):
|
|
@@ -175,7 +184,7 @@ class Playlist(Sequence):
|
|
| 175 |
'appendContinuationItemsAction']['continuationItems']
|
| 176 |
videos = important_content
|
| 177 |
except (KeyError, IndexError, TypeError) as p:
|
| 178 |
-
|
| 179 |
return [], None
|
| 180 |
|
| 181 |
try:
|
|
@@ -218,27 +227,37 @@ class Playlist(Sequence):
|
|
| 218 |
for page in self._paginate(until_watch_id=video_id):
|
| 219 |
yield from (self._video_url(watch_path) for watch_path in page)
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
@property # type: ignore
|
| 222 |
@cache
|
| 223 |
-
def video_urls(self) ->
|
| 224 |
"""Complete links of all the videos in playlist
|
| 225 |
|
| 226 |
:rtype: List[str]
|
| 227 |
:returns: List of video URLs
|
| 228 |
"""
|
| 229 |
-
return
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
|
| 235 |
@property
|
| 236 |
def videos(self) -> Iterable[YouTube]:
|
| 237 |
"""Yields YouTube objects of videos in this playlist
|
| 238 |
|
| 239 |
-
:
|
|
|
|
| 240 |
"""
|
| 241 |
-
|
| 242 |
|
| 243 |
def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
|
| 244 |
return self.video_urls[i]
|
|
@@ -247,7 +266,7 @@ class Playlist(Sequence):
|
|
| 247 |
return len(self.video_urls)
|
| 248 |
|
| 249 |
def __repr__(self) -> str:
|
| 250 |
-
return f"{self.video_urls}"
|
| 251 |
|
| 252 |
@property
|
| 253 |
@cache
|
|
|
|
| 7 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
| 8 |
|
| 9 |
from pytube import extract, request, YouTube
|
| 10 |
+
from pytube.helpers import cache, DeferredGeneratorList, install_proxy, regex_search, uniqueify
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
|
|
| 19 |
if proxies:
|
| 20 |
install_proxy(proxies)
|
| 21 |
|
| 22 |
+
self._input_url = url
|
| 23 |
+
|
| 24 |
# These need to be initialized as None for the properties.
|
| 25 |
self._html = None
|
| 26 |
self._ytcfg = None
|
| 27 |
|
| 28 |
+
self._playlist_id = None
|
| 29 |
|
| 30 |
+
@property
|
| 31 |
+
def playlist_id(self):
|
| 32 |
+
if self._playlist_id:
|
| 33 |
+
return self._playlist_id
|
| 34 |
+
self._playlist_id = extract.playlist_id(self._input_url)
|
| 35 |
+
return self._playlist_id
|
| 36 |
+
|
| 37 |
+
@property
|
| 38 |
+
def playlist_url(self):
|
| 39 |
+
return f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
| 40 |
|
| 41 |
@property
|
| 42 |
def html(self):
|
|
|
|
| 184 |
'appendContinuationItemsAction']['continuationItems']
|
| 185 |
videos = important_content
|
| 186 |
except (KeyError, IndexError, TypeError) as p:
|
| 187 |
+
logger.info(p)
|
| 188 |
return [], None
|
| 189 |
|
| 190 |
try:
|
|
|
|
| 227 |
for page in self._paginate(until_watch_id=video_id):
|
| 228 |
yield from (self._video_url(watch_path) for watch_path in page)
|
| 229 |
|
| 230 |
+
def url_generator(self):
|
| 231 |
+
"""Generator that yields video URLs.
|
| 232 |
+
|
| 233 |
+
:Yields: Video URLs
|
| 234 |
+
"""
|
| 235 |
+
for page in self._paginate():
|
| 236 |
+
for video in page:
|
| 237 |
+
yield self._video_url(video)
|
| 238 |
+
|
| 239 |
@property # type: ignore
|
| 240 |
@cache
|
| 241 |
+
def video_urls(self) -> DeferredGeneratorList:
|
| 242 |
"""Complete links of all the videos in playlist
|
| 243 |
|
| 244 |
:rtype: List[str]
|
| 245 |
:returns: List of video URLs
|
| 246 |
"""
|
| 247 |
+
return DeferredGeneratorList(self.url_generator())
|
| 248 |
+
|
| 249 |
+
def videos_generator(self):
|
| 250 |
+
for url in self.video_urls:
|
| 251 |
+
yield YouTube(url)
|
| 252 |
|
| 253 |
@property
|
| 254 |
def videos(self) -> Iterable[YouTube]:
|
| 255 |
"""Yields YouTube objects of videos in this playlist
|
| 256 |
|
| 257 |
+
:rtype: List[YouTube]
|
| 258 |
+
:returns: List of YouTube
|
| 259 |
"""
|
| 260 |
+
return DeferredGeneratorList(self.videos_generator())
|
| 261 |
|
| 262 |
def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
|
| 263 |
return self.video_urls[i]
|
|
|
|
| 266 |
return len(self.video_urls)
|
| 267 |
|
| 268 |
def __repr__(self) -> str:
|
| 269 |
+
return f"{repr(self.video_urls)}"
|
| 270 |
|
| 271 |
@property
|
| 272 |
@cache
|
pytube/extract.py
CHANGED
|
@@ -178,6 +178,37 @@ def playlist_id(url: str) -> str:
|
|
| 178 |
return parse_qs(parsed.query)['list'][0]
|
| 179 |
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
def video_info_url(video_id: str, watch_url: str) -> str:
|
| 182 |
"""Construct the video_info url.
|
| 183 |
|
|
|
|
| 178 |
return parse_qs(parsed.query)['list'][0]
|
| 179 |
|
| 180 |
|
| 181 |
+
def channel_name(url: str) -> str:
|
| 182 |
+
"""Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
|
| 183 |
+
|
| 184 |
+
This function supports the following patterns:
|
| 185 |
+
|
| 186 |
+
- :samp:`https://youtube.com/c/{channel_name}/*`
|
| 187 |
+
- :samp:`https://youtube.com/channel/{channel_id}/*
|
| 188 |
+
|
| 189 |
+
:param str url:
|
| 190 |
+
A YouTube url containing a channel name.
|
| 191 |
+
:rtype: str
|
| 192 |
+
:returns:
|
| 193 |
+
YouTube channel name.
|
| 194 |
+
"""
|
| 195 |
+
patterns = [
|
| 196 |
+
r"(?:\/c\/([\d\w_\-]+)(\/.*)?)",
|
| 197 |
+
r"(?:\/channel\/([\w\d_\-]+)(\/.*)?)"
|
| 198 |
+
]
|
| 199 |
+
for pattern in patterns:
|
| 200 |
+
regex = re.compile(pattern)
|
| 201 |
+
function_match = regex.search(url)
|
| 202 |
+
if function_match:
|
| 203 |
+
logger.debug("finished regex search, matched: %s", pattern)
|
| 204 |
+
channel_id = function_match.group(1)
|
| 205 |
+
return channel_id
|
| 206 |
+
|
| 207 |
+
raise RegexMatchError(
|
| 208 |
+
caller="channel_name", pattern="patterns"
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
def video_info_url(video_id: str, watch_url: str) -> str:
|
| 213 |
"""Construct the video_info url.
|
| 214 |
|
pytube/helpers.py
CHANGED
|
@@ -14,6 +14,101 @@ from pytube.exceptions import RegexMatchError
|
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def regex_search(pattern: str, string: str, group: int) -> str:
|
| 18 |
"""Shortcut method to search a string for a given pattern.
|
| 19 |
|
|
|
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
|
| 17 |
+
class DeferredGeneratorList:
|
| 18 |
+
"""A wrapper class for deferring list generation.
|
| 19 |
+
|
| 20 |
+
Pytube has some continuation generators that create web calls, which means
|
| 21 |
+
that any time a full list is requested, all of those web calls must be
|
| 22 |
+
made at once, which could lead to slowdowns. This will allow individual
|
| 23 |
+
elements to be queried, so that slowdowns only happen as necessary. For
|
| 24 |
+
example, you can iterate over elements in the list without accessing them
|
| 25 |
+
all simultaneously. This should allow for speed improvements for playlist
|
| 26 |
+
and channel interactions.
|
| 27 |
+
"""
|
| 28 |
+
def __init__(self, generator):
|
| 29 |
+
"""Construct a :class:`DeferredGeneratorList <DeferredGeneratorList>`.
|
| 30 |
+
|
| 31 |
+
:param generator generator:
|
| 32 |
+
The deferrable generator to create a wrapper for.
|
| 33 |
+
:param func func:
|
| 34 |
+
(Optional) A function to call on the generator items to produce the list.
|
| 35 |
+
"""
|
| 36 |
+
self.gen = generator
|
| 37 |
+
self._elements = []
|
| 38 |
+
|
| 39 |
+
def __eq__(self, other):
|
| 40 |
+
"""We want to mimic list behavior for comparison."""
|
| 41 |
+
return list(self) == other
|
| 42 |
+
|
| 43 |
+
def __getitem__(self, key) -> Any:
|
| 44 |
+
"""Only generate items as they're asked for."""
|
| 45 |
+
# We only allow querying with indexes.
|
| 46 |
+
if not isinstance(key, (int, slice)):
|
| 47 |
+
raise TypeError('Key must be either a slice or int.')
|
| 48 |
+
|
| 49 |
+
# Convert int keys to slice
|
| 50 |
+
key_slice = key
|
| 51 |
+
if isinstance(key, int):
|
| 52 |
+
key_slice = slice(key, key + 1, 1)
|
| 53 |
+
|
| 54 |
+
# Generate all elements up to the final item
|
| 55 |
+
while len(self._elements) < key_slice.stop:
|
| 56 |
+
try:
|
| 57 |
+
next_item = next(self.gen)
|
| 58 |
+
except StopIteration:
|
| 59 |
+
# If we can't find enough elements for the slice, raise an IndexError
|
| 60 |
+
raise IndexError
|
| 61 |
+
else:
|
| 62 |
+
self._elements.append(next_item)
|
| 63 |
+
|
| 64 |
+
return self._elements[key]
|
| 65 |
+
|
| 66 |
+
def __iter__(self):
|
| 67 |
+
"""Custom iterator for dynamically generated list."""
|
| 68 |
+
iter_index = 0
|
| 69 |
+
while True:
|
| 70 |
+
try:
|
| 71 |
+
curr_item = self[iter_index]
|
| 72 |
+
except IndexError:
|
| 73 |
+
return
|
| 74 |
+
else:
|
| 75 |
+
yield curr_item
|
| 76 |
+
iter_index += 1
|
| 77 |
+
|
| 78 |
+
def __next__(self) -> Any:
|
| 79 |
+
"""Fetch next element in iterator."""
|
| 80 |
+
try:
|
| 81 |
+
curr_element = self[self.iter_index]
|
| 82 |
+
except IndexError:
|
| 83 |
+
raise StopIteration
|
| 84 |
+
self.iter_index += 1
|
| 85 |
+
return curr_element # noqa:R504
|
| 86 |
+
|
| 87 |
+
def __len__(self) -> int:
|
| 88 |
+
"""Return length of list of all items."""
|
| 89 |
+
self.generate_all()
|
| 90 |
+
return len(self._elements)
|
| 91 |
+
|
| 92 |
+
def __repr__(self) -> str:
|
| 93 |
+
"""String representation of all items."""
|
| 94 |
+
self.generate_all()
|
| 95 |
+
return str(self._elements)
|
| 96 |
+
|
| 97 |
+
def __reversed__(self):
|
| 98 |
+
self.generate_all()
|
| 99 |
+
return self._elements[::-1]
|
| 100 |
+
|
| 101 |
+
def generate_all(self):
|
| 102 |
+
"""Generate all items."""
|
| 103 |
+
while True:
|
| 104 |
+
try:
|
| 105 |
+
next_item = next(self.gen)
|
| 106 |
+
except StopIteration:
|
| 107 |
+
break
|
| 108 |
+
else:
|
| 109 |
+
self._elements.append(next_item)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
def regex_search(pattern: str, string: str, group: int) -> str:
|
| 113 |
"""Shortcut method to search a string for a given pattern.
|
| 114 |
|
tests/conftest.py
CHANGED
|
@@ -91,7 +91,8 @@ def region_blocked():
|
|
| 91 |
@pytest.fixture
|
| 92 |
def playlist_html():
|
| 93 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
| 94 |
-
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
|
|
|
| 95 |
file_path = os.path.join(
|
| 96 |
os.path.dirname(os.path.realpath(__file__)),
|
| 97 |
"mocks",
|
|
@@ -104,7 +105,8 @@ def playlist_html():
|
|
| 104 |
@pytest.fixture
|
| 105 |
def playlist_long_html():
|
| 106 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
| 107 |
-
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
|
|
|
| 108 |
file_path = os.path.join(
|
| 109 |
os.path.dirname(os.path.realpath(__file__)),
|
| 110 |
"mocks",
|
|
@@ -117,7 +119,8 @@ def playlist_long_html():
|
|
| 117 |
@pytest.fixture
|
| 118 |
def playlist_submenu_html():
|
| 119 |
"""Youtube playlist HTML loaded on 2020-01-24 from
|
| 120 |
-
https://www.youtube.com/playlist?list=PLZHQObOWTQDMsr9K-rj53DwVRMYO3t5Yr
|
|
|
|
| 121 |
file_path = os.path.join(
|
| 122 |
os.path.dirname(os.path.realpath(__file__)),
|
| 123 |
"mocks",
|
|
@@ -138,3 +141,17 @@ def stream_dict():
|
|
| 138 |
with gzip.open(file_path, "rb") as f:
|
| 139 |
content = json.loads(f.read().decode("utf-8"))
|
| 140 |
return content['watch_html']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
@pytest.fixture
|
| 92 |
def playlist_html():
|
| 93 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
| 94 |
+
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
| 95 |
+
"""
|
| 96 |
file_path = os.path.join(
|
| 97 |
os.path.dirname(os.path.realpath(__file__)),
|
| 98 |
"mocks",
|
|
|
|
| 105 |
@pytest.fixture
|
| 106 |
def playlist_long_html():
|
| 107 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
| 108 |
+
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
| 109 |
+
"""
|
| 110 |
file_path = os.path.join(
|
| 111 |
os.path.dirname(os.path.realpath(__file__)),
|
| 112 |
"mocks",
|
|
|
|
| 119 |
@pytest.fixture
|
| 120 |
def playlist_submenu_html():
|
| 121 |
"""Youtube playlist HTML loaded on 2020-01-24 from
|
| 122 |
+
https://www.youtube.com/playlist?list=PLZHQObOWTQDMsr9K-rj53DwVRMYO3t5Yr
|
| 123 |
+
"""
|
| 124 |
file_path = os.path.join(
|
| 125 |
os.path.dirname(os.path.realpath(__file__)),
|
| 126 |
"mocks",
|
|
|
|
| 141 |
with gzip.open(file_path, "rb") as f:
|
| 142 |
content = json.loads(f.read().decode("utf-8"))
|
| 143 |
return content['watch_html']
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@pytest.fixture
|
| 147 |
+
def channel_videos_html():
|
| 148 |
+
"""Youtube channel HTML loaded on 2021-05-05 from
|
| 149 |
+
https://www.youtube.com/c/ProgrammingKnowledge/videos
|
| 150 |
+
"""
|
| 151 |
+
file_path = os.path.join(
|
| 152 |
+
os.path.dirname(os.path.realpath(__file__)),
|
| 153 |
+
"mocks",
|
| 154 |
+
"channel-videos.html.gz",
|
| 155 |
+
)
|
| 156 |
+
with gzip.open(file_path, 'rb') as f:
|
| 157 |
+
return f.read().decode('utf-8')
|
tests/contrib/test_channel.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from unittest import mock
|
| 2 |
+
|
| 3 |
+
from pytube import Channel
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@mock.patch('pytube.request.get')
|
| 7 |
+
def test_init_with_url(request_get, channel_videos_html):
|
| 8 |
+
request_get.return_value = channel_videos_html
|
| 9 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge/videos')
|
| 10 |
+
assert c.channel_url == 'https://www.youtube.com/c/ProgrammingKnowledge'
|
| 11 |
+
assert c.videos_url == f'{c.channel_url}/videos'
|
| 12 |
+
assert c.playlists_url == f'{c.channel_url}/playlists'
|
| 13 |
+
assert c.community_url == f'{c.channel_url}/community'
|
| 14 |
+
assert c.featured_channels_url == f'{c.channel_url}/channels'
|
| 15 |
+
assert c.about_url == f'{c.channel_url}/about'
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@mock.patch('pytube.request.get')
|
| 19 |
+
def test_channel_name(request_get, channel_videos_html):
|
| 20 |
+
request_get.return_value = channel_videos_html
|
| 21 |
+
|
| 22 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge/videos')
|
| 23 |
+
assert c.channel_name == 'ProgrammingKnowledge'
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@mock.patch('pytube.request.get')
|
| 27 |
+
def test_channel_video_list(request_get, channel_videos_html):
|
| 28 |
+
request_get.return_value = channel_videos_html
|
| 29 |
+
|
| 30 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge/videos')
|
| 31 |
+
first_ten = [
|
| 32 |
+
'https://www.youtube.com/watch?v=t_xLpJo_35k',
|
| 33 |
+
'https://www.youtube.com/watch?v=ccbh5YhxouQ',
|
| 34 |
+
'https://www.youtube.com/watch?v=wDnFjDjxW_0',
|
| 35 |
+
'https://www.youtube.com/watch?v=F3W_p_4XftA',
|
| 36 |
+
'https://www.youtube.com/watch?v=_fxm0xGGEi4',
|
| 37 |
+
'https://www.youtube.com/watch?v=cRbKZzcuIsg',
|
| 38 |
+
'https://www.youtube.com/watch?v=sdDu3dfIuow',
|
| 39 |
+
'https://www.youtube.com/watch?v=10KIbp-gJCE',
|
| 40 |
+
'https://www.youtube.com/watch?v=wZIT-cRtd6s',
|
| 41 |
+
'https://www.youtube.com/watch?v=KucCvEbTj0w',
|
| 42 |
+
]
|
| 43 |
+
assert c.video_urls[:10] == first_ten
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@mock.patch('pytube.request.get')
|
| 47 |
+
def test_videos_html(request_get, channel_videos_html):
|
| 48 |
+
request_get.return_value = channel_videos_html
|
| 49 |
+
|
| 50 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge')
|
| 51 |
+
assert c.html == channel_videos_html
|
| 52 |
+
|
| 53 |
+
# Because the Channel object subclasses the Playlist object, most of the tests
|
| 54 |
+
# are already taken care of by the Playlist test suite.
|
tests/mocks/channel-videos.html.gz
ADDED
|
Binary file (48.6 kB). View file
|
|
|