Taylor Fox Dahlin commited on
Improved caption selection, and updated tests to reflect this. (#783)
Browse files- pytube/captions.py +6 -1
- tests/test_captions.py +14 -10
- tests/test_cli.py +5 -5
pytube/captions.py
CHANGED
|
@@ -23,7 +23,12 @@ class Caption:
|
|
| 23 |
"""
|
| 24 |
self.url = caption_track.get("baseUrl")
|
| 25 |
self.name = caption_track["name"]["simpleText"]
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
@property
|
| 29 |
def xml_captions(self) -> str:
|
|
|
|
| 23 |
"""
|
| 24 |
self.url = caption_track.get("baseUrl")
|
| 25 |
self.name = caption_track["name"]["simpleText"]
|
| 26 |
+
# Use "vssId" instead of "languageCode", fix issue #779
|
| 27 |
+
self.code = caption_track["vssId"]
|
| 28 |
+
# Remove preceding '.' for backwards compatibility, e.g.:
|
| 29 |
+
# English -> vssId: .en, languageCode: en
|
| 30 |
+
# English (auto-generated) -> vssId: a.en, languageCode: en
|
| 31 |
+
self.code = self.code.strip('.')
|
| 32 |
|
| 33 |
@property
|
| 34 |
def xml_captions(self) -> str:
|
tests/test_captions.py
CHANGED
|
@@ -14,17 +14,17 @@ from pytube import captions
|
|
| 14 |
|
| 15 |
def test_float_to_srt_time_format():
|
| 16 |
caption1 = Caption(
|
| 17 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 18 |
)
|
| 19 |
assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
|
| 20 |
|
| 21 |
|
| 22 |
def test_caption_query_sequence():
|
| 23 |
caption1 = Caption(
|
| 24 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 25 |
)
|
| 26 |
caption2 = Caption(
|
| 27 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
| 28 |
)
|
| 29 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
| 30 |
assert len(caption_query) == 2
|
|
@@ -36,10 +36,10 @@ def test_caption_query_sequence():
|
|
| 36 |
|
| 37 |
def test_caption_query_get_by_language_code_when_exists():
|
| 38 |
caption1 = Caption(
|
| 39 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 40 |
)
|
| 41 |
caption2 = Caption(
|
| 42 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
| 43 |
)
|
| 44 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
| 45 |
assert caption_query["en"] == caption1
|
|
@@ -47,10 +47,10 @@ def test_caption_query_get_by_language_code_when_exists():
|
|
| 47 |
|
| 48 |
def test_caption_query_get_by_language_code_when_not_exists():
|
| 49 |
caption1 = Caption(
|
| 50 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 51 |
)
|
| 52 |
caption2 = Caption(
|
| 53 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
| 54 |
)
|
| 55 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
| 56 |
with pytest.raises(KeyError):
|
|
@@ -68,6 +68,7 @@ def test_download(srt):
|
|
| 68 |
"url": "url1",
|
| 69 |
"name": {"simpleText": "name1"},
|
| 70 |
"languageCode": "en",
|
|
|
|
| 71 |
}
|
| 72 |
)
|
| 73 |
caption.download("title")
|
|
@@ -86,6 +87,7 @@ def test_download_with_prefix(srt):
|
|
| 86 |
"url": "url1",
|
| 87 |
"name": {"simpleText": "name1"},
|
| 88 |
"languageCode": "en",
|
|
|
|
| 89 |
}
|
| 90 |
)
|
| 91 |
caption.download("title", filename_prefix="1 ")
|
|
@@ -106,6 +108,7 @@ def test_download_with_output_path(srt):
|
|
| 106 |
"url": "url1",
|
| 107 |
"name": {"simpleText": "name1"},
|
| 108 |
"languageCode": "en",
|
|
|
|
| 109 |
}
|
| 110 |
)
|
| 111 |
file_path = caption.download("title", output_path="blah")
|
|
@@ -123,6 +126,7 @@ def test_download_xml_and_trim_extension(xml):
|
|
| 123 |
"url": "url1",
|
| 124 |
"name": {"simpleText": "name1"},
|
| 125 |
"languageCode": "en",
|
|
|
|
| 126 |
}
|
| 127 |
)
|
| 128 |
caption.download("title.xml", srt=False)
|
|
@@ -133,7 +137,7 @@ def test_download_xml_and_trim_extension(xml):
|
|
| 133 |
|
| 134 |
def test_repr():
|
| 135 |
caption = Caption(
|
| 136 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 137 |
)
|
| 138 |
assert str(caption) == '<Caption lang="name1" code="en">'
|
| 139 |
|
|
@@ -145,7 +149,7 @@ def test_repr():
|
|
| 145 |
def test_xml_captions(request_get):
|
| 146 |
request_get.return_value = "test"
|
| 147 |
caption = Caption(
|
| 148 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 149 |
)
|
| 150 |
assert caption.xml_captions == "test"
|
| 151 |
|
|
@@ -158,7 +162,7 @@ def test_generate_srt_captions(request):
|
|
| 158 |
"如要啓動字幕,請按一下這裡的圖示。</text></transcript>"
|
| 159 |
)
|
| 160 |
caption = Caption(
|
| 161 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 162 |
)
|
| 163 |
assert caption.generate_srt_captions() == (
|
| 164 |
"1\n"
|
|
|
|
| 14 |
|
| 15 |
def test_float_to_srt_time_format():
|
| 16 |
caption1 = Caption(
|
| 17 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 18 |
)
|
| 19 |
assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
|
| 20 |
|
| 21 |
|
| 22 |
def test_caption_query_sequence():
|
| 23 |
caption1 = Caption(
|
| 24 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 25 |
)
|
| 26 |
caption2 = Caption(
|
| 27 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
| 28 |
)
|
| 29 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
| 30 |
assert len(caption_query) == 2
|
|
|
|
| 36 |
|
| 37 |
def test_caption_query_get_by_language_code_when_exists():
|
| 38 |
caption1 = Caption(
|
| 39 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 40 |
)
|
| 41 |
caption2 = Caption(
|
| 42 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
| 43 |
)
|
| 44 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
| 45 |
assert caption_query["en"] == caption1
|
|
|
|
| 47 |
|
| 48 |
def test_caption_query_get_by_language_code_when_not_exists():
|
| 49 |
caption1 = Caption(
|
| 50 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 51 |
)
|
| 52 |
caption2 = Caption(
|
| 53 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
| 54 |
)
|
| 55 |
caption_query = CaptionQuery(captions=[caption1, caption2])
|
| 56 |
with pytest.raises(KeyError):
|
|
|
|
| 68 |
"url": "url1",
|
| 69 |
"name": {"simpleText": "name1"},
|
| 70 |
"languageCode": "en",
|
| 71 |
+
"vssId": ".en"
|
| 72 |
}
|
| 73 |
)
|
| 74 |
caption.download("title")
|
|
|
|
| 87 |
"url": "url1",
|
| 88 |
"name": {"simpleText": "name1"},
|
| 89 |
"languageCode": "en",
|
| 90 |
+
"vssId": ".en"
|
| 91 |
}
|
| 92 |
)
|
| 93 |
caption.download("title", filename_prefix="1 ")
|
|
|
|
| 108 |
"url": "url1",
|
| 109 |
"name": {"simpleText": "name1"},
|
| 110 |
"languageCode": "en",
|
| 111 |
+
"vssId": ".en"
|
| 112 |
}
|
| 113 |
)
|
| 114 |
file_path = caption.download("title", output_path="blah")
|
|
|
|
| 126 |
"url": "url1",
|
| 127 |
"name": {"simpleText": "name1"},
|
| 128 |
"languageCode": "en",
|
| 129 |
+
"vssId": ".en"
|
| 130 |
}
|
| 131 |
)
|
| 132 |
caption.download("title.xml", srt=False)
|
|
|
|
| 137 |
|
| 138 |
def test_repr():
|
| 139 |
caption = Caption(
|
| 140 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 141 |
)
|
| 142 |
assert str(caption) == '<Caption lang="name1" code="en">'
|
| 143 |
|
|
|
|
| 149 |
def test_xml_captions(request_get):
|
| 150 |
request_get.return_value = "test"
|
| 151 |
caption = Caption(
|
| 152 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 153 |
)
|
| 154 |
assert caption.xml_captions == "test"
|
| 155 |
|
|
|
|
| 162 |
"如要啓動字幕,請按一下這裡的圖示。</text></transcript>"
|
| 163 |
)
|
| 164 |
caption = Caption(
|
| 165 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 166 |
)
|
| 167 |
assert caption.generate_srt_captions() == (
|
| 168 |
"1\n"
|
tests/test_cli.py
CHANGED
|
@@ -71,7 +71,7 @@ def test_display_stream(youtube, stream):
|
|
| 71 |
def test_download_caption_with_none(youtube, print_available):
|
| 72 |
# Given
|
| 73 |
caption = Caption(
|
| 74 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 75 |
)
|
| 76 |
youtube.captions = CaptionQuery([caption])
|
| 77 |
# When
|
|
@@ -84,7 +84,7 @@ def test_download_caption_with_none(youtube, print_available):
|
|
| 84 |
def test_download_caption_with_language_found(youtube):
|
| 85 |
youtube.title = "video title"
|
| 86 |
caption = Caption(
|
| 87 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 88 |
)
|
| 89 |
caption.download = MagicMock(return_value="file_path")
|
| 90 |
youtube.captions = CaptionQuery([caption])
|
|
@@ -97,7 +97,7 @@ def test_download_caption_with_language_found(youtube):
|
|
| 97 |
def test_download_caption_with_lang_not_found(youtube, print_available):
|
| 98 |
# Given
|
| 99 |
caption = Caption(
|
| 100 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 101 |
)
|
| 102 |
youtube.captions = CaptionQuery([caption])
|
| 103 |
# When
|
|
@@ -109,10 +109,10 @@ def test_download_caption_with_lang_not_found(youtube, print_available):
|
|
| 109 |
def test_print_available_captions(capsys):
|
| 110 |
# Given
|
| 111 |
caption1 = Caption(
|
| 112 |
-
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
| 113 |
)
|
| 114 |
caption2 = Caption(
|
| 115 |
-
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
| 116 |
)
|
| 117 |
query = CaptionQuery([caption1, caption2])
|
| 118 |
# When
|
|
|
|
| 71 |
def test_download_caption_with_none(youtube, print_available):
|
| 72 |
# Given
|
| 73 |
caption = Caption(
|
| 74 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 75 |
)
|
| 76 |
youtube.captions = CaptionQuery([caption])
|
| 77 |
# When
|
|
|
|
| 84 |
def test_download_caption_with_language_found(youtube):
|
| 85 |
youtube.title = "video title"
|
| 86 |
caption = Caption(
|
| 87 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 88 |
)
|
| 89 |
caption.download = MagicMock(return_value="file_path")
|
| 90 |
youtube.captions = CaptionQuery([caption])
|
|
|
|
| 97 |
def test_download_caption_with_lang_not_found(youtube, print_available):
|
| 98 |
# Given
|
| 99 |
caption = Caption(
|
| 100 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 101 |
)
|
| 102 |
youtube.captions = CaptionQuery([caption])
|
| 103 |
# When
|
|
|
|
| 109 |
def test_print_available_captions(capsys):
|
| 110 |
# Given
|
| 111 |
caption1 = Caption(
|
| 112 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en", "vssId": ".en"}
|
| 113 |
)
|
| 114 |
caption2 = Caption(
|
| 115 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr", "vssId": ".fr"}
|
| 116 |
)
|
| 117 |
query = CaptionQuery([caption1, caption2])
|
| 118 |
# When
|