Spaces:
Sleeping
Sleeping
Set user requested lang to requested_subtitles
Browse filesGe subtitle from subtitles if get subtitle by lang is empty, eg: uwIHyLPBZkY this video has jp and jp to others but jp to en is empty.
- fetchYoutubeSubtitle.py +39 -27
fetchYoutubeSubtitle.py
CHANGED
|
@@ -121,7 +121,7 @@ async def fetchSubtitlebyType(
|
|
| 121 |
proxy: Optional[str] = None,
|
| 122 |
) -> dict:
|
| 123 |
# lang-code or lang.* .* is regex
|
| 124 |
-
reqLang = lang if len(lang.split("-")) > 1 or lang.endswith(".*") else lang + ".*"
|
| 125 |
|
| 126 |
ydl_opts = {
|
| 127 |
"noplaylist": True,
|
|
@@ -129,7 +129,10 @@ async def fetchSubtitlebyType(
|
|
| 129 |
"writeautomaticsub": True,
|
| 130 |
# "listsubtitles": True,
|
| 131 |
# "subtitlesformat": subType, # mark due to default youtube no srt and xml format
|
| 132 |
-
"subtitleslangs": [
|
|
|
|
|
|
|
|
|
|
| 133 |
"skip_download": True,
|
| 134 |
"socket_timeout": 10,
|
| 135 |
"extractor_retries": 0,
|
|
@@ -164,37 +167,46 @@ async def fetchSubtitlebyType(
|
|
| 164 |
isSrt = True
|
| 165 |
if debug:
|
| 166 |
print(
|
| 167 |
-
"subtitles.keys(): {} automatic_captions: {}".format(
|
| 168 |
info_dict.get("subtitles").keys(),
|
| 169 |
info_dict.get("automatic_captions").keys(),
|
|
|
|
| 170 |
)
|
| 171 |
)
|
| 172 |
|
| 173 |
-
subtitle_url = getRequestedSubtitlesUrl(info_dict, lang, subType)
|
| 174 |
-
if not subtitle_url:
|
| 175 |
-
|
| 176 |
-
if not subtitle_url:
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
print(
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
| 190 |
)
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
except Exception as e:
|
| 199 |
print(e)
|
| 200 |
traceback.print_exc()
|
|
|
|
| 121 |
proxy: Optional[str] = None,
|
| 122 |
) -> dict:
|
| 123 |
# lang-code or lang.* .* is regex
|
| 124 |
+
# reqLang = lang if len(lang.split("-")) > 1 or lang.endswith(".*") else lang + ".*"
|
| 125 |
|
| 126 |
ydl_opts = {
|
| 127 |
"noplaylist": True,
|
|
|
|
| 129 |
"writeautomaticsub": True,
|
| 130 |
# "listsubtitles": True,
|
| 131 |
# "subtitlesformat": subType, # mark due to default youtube no srt and xml format
|
| 132 |
+
"subtitleslangs": [
|
| 133 |
+
lang,
|
| 134 |
+
"-live_chat",
|
| 135 |
+
], # filter live chat to requested_subtitles
|
| 136 |
"skip_download": True,
|
| 137 |
"socket_timeout": 10,
|
| 138 |
"extractor_retries": 0,
|
|
|
|
| 167 |
isSrt = True
|
| 168 |
if debug:
|
| 169 |
print(
|
| 170 |
+
"subtitles.keys(): {} automatic_captions: {} requested_subtitles: {}".format(
|
| 171 |
info_dict.get("subtitles").keys(),
|
| 172 |
info_dict.get("automatic_captions").keys(),
|
| 173 |
+
info_dict.get("requested_subtitles").keys(),
|
| 174 |
)
|
| 175 |
)
|
| 176 |
|
| 177 |
+
# subtitle_url = getRequestedSubtitlesUrl(info_dict, lang, subType)
|
| 178 |
+
# if not subtitle_url:
|
| 179 |
+
# subtitle_url = getSubtitleLangUrl(info_dict, lang, subType)
|
| 180 |
+
# if not subtitle_url:
|
| 181 |
+
# subtitle_url = getSubtitleOtherUrl(info_dict, lang, subType)
|
| 182 |
+
|
| 183 |
+
subtitle_funcs = [
|
| 184 |
+
getRequestedSubtitlesUrl,
|
| 185 |
+
getSubtitleLangUrl,
|
| 186 |
+
getSubtitleOtherUrl,
|
| 187 |
+
]
|
| 188 |
+
for index in range(len(subtitle_funcs)):
|
| 189 |
+
subtitle_url = subtitle_funcs[index](info_dict, lang, subType)
|
| 190 |
+
if subtitle_url:
|
| 191 |
+
# print("subtitle_url: {}".format(subtitle_url))
|
| 192 |
+
with ydl.urlopen(subtitle_url) as response:
|
| 193 |
+
subtitle = (
|
| 194 |
+
xml_caption_to_srt(response.read().decode())
|
| 195 |
+
if isSrt
|
| 196 |
+
else response.read().decode()
|
| 197 |
)
|
| 198 |
+
print(
|
| 199 |
+
"function index:{}, url:{}, title:{}, duration:{} len(subtitle): {}".format(
|
| 200 |
+
index, url, title, duration, len(subtitle or "")
|
| 201 |
+
)
|
| 202 |
+
)
|
| 203 |
+
if subtitle is not None:
|
| 204 |
+
return {
|
| 205 |
+
"title": title,
|
| 206 |
+
"duration": duration,
|
| 207 |
+
"subtitle": subtitle,
|
| 208 |
+
"chapters": info_dict.get("chapters", None),
|
| 209 |
+
}
|
| 210 |
except Exception as e:
|
| 211 |
print(e)
|
| 212 |
traceback.print_exc()
|