Spaces:
Sleeping
Sleeping
Add try except to fetchSubtitle and fetchSubtitleUrls.
Browse files- fetchYoutubeSubtitle.py +38 -31
fetchYoutubeSubtitle.py
CHANGED
|
@@ -24,28 +24,21 @@ def getUrlFromSubtitles(item, lang='en', subType="vtt"):
|
|
| 24 |
return None
|
| 25 |
|
| 26 |
l = lang if lang in langs else ('en' if 'en' in langs else list(langs)[0] )
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
for subtitle in item[l]:
|
| 30 |
-
print("getUrlFromSubtitles subtitle: %s" % subtitle)
|
| 31 |
if l != "live_chat" and subType =="xml":
|
| 32 |
return subtitle.get("url").replace("fmt="+subtitle.get("ext"),"")
|
| 33 |
if subtitle.get("ext") == subType:
|
| 34 |
return subtitle.get("url")
|
| 35 |
return None
|
| 36 |
|
| 37 |
-
async def fetchSubtitle(url: str, lang: Optional[str] = 'en', subType: Optional[str] = "vtt") ->
|
| 38 |
-
|
| 39 |
-
subtitle = await fetchSubtitlebyType(url, lang, subType, True)
|
| 40 |
-
if subtitle:
|
| 41 |
-
return subtitle
|
| 42 |
-
subtitle = await fetchSubtitlebyType(url, lang, "xml", True)
|
| 43 |
-
print(subtitle)
|
| 44 |
-
return xml_caption_to_srt(subtitle)
|
| 45 |
-
else:
|
| 46 |
-
return await fetchSubtitlebyType(url, lang, subType, True)
|
| 47 |
|
| 48 |
-
async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt"
|
| 49 |
ydl_opts = {
|
| 50 |
"writesubtitles": True,
|
| 51 |
"allsubtitles": True,
|
|
@@ -54,16 +47,25 @@ async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt
|
|
| 54 |
"socket_timeout": 20
|
| 55 |
}
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
return subtitle.read().decode() if decode else subtitle.read()
|
| 65 |
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
def float_to_srt_time_format(d: float) -> str:
|
| 69 |
"""Convert decimal durations into proper srt format.
|
|
@@ -109,13 +111,18 @@ async def fetchSubtitleUrls(url: str) -> json:
|
|
| 109 |
"allsubtitles": True,
|
| 110 |
"skip_download": True,
|
| 111 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
return
|
|
|
|
| 24 |
return None
|
| 25 |
|
| 26 |
l = lang if lang in langs else ('en' if 'en' in langs else list(langs)[0] )
|
| 27 |
+
if l is None:
|
| 28 |
+
return
|
| 29 |
|
| 30 |
for subtitle in item[l]:
|
| 31 |
+
# print("getUrlFromSubtitles subtitle: %s" % subtitle)
|
| 32 |
if l != "live_chat" and subType =="xml":
|
| 33 |
return subtitle.get("url").replace("fmt="+subtitle.get("ext"),"")
|
| 34 |
if subtitle.get("ext") == subType:
|
| 35 |
return subtitle.get("url")
|
| 36 |
return None
|
| 37 |
|
| 38 |
+
async def fetchSubtitle(url: str, lang: Optional[str] = 'en', subType: Optional[str] = "vtt") -> dict:
|
| 39 |
+
return await fetchSubtitlebyType(url, lang, subType)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
async def fetchSubtitlebyType(url: str, lang: Optional[str] = 'en', subType="vtt") -> dict:
|
| 42 |
ydl_opts = {
|
| 43 |
"writesubtitles": True,
|
| 44 |
"allsubtitles": True,
|
|
|
|
| 47 |
"socket_timeout": 20
|
| 48 |
}
|
| 49 |
|
| 50 |
+
title = "unknow"
|
| 51 |
+
try:
|
| 52 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 53 |
+
info_dict = ydl.extract_info(url, download=False)
|
| 54 |
+
title = info_dict.get("title", "unknow")
|
| 55 |
+
if info_dict.get("extractor") == "youtube" and subType == "srt":
|
| 56 |
+
subType = "xml"
|
|
|
|
| 57 |
|
| 58 |
+
for subtitle_item in ["subtitles", "automatic_captions"]: # "requested_subtitles" item is dict
|
| 59 |
+
if info_dict.get(subtitle_item):
|
| 60 |
+
subtitle_url = getUrlFromSubtitles(info_dict.get(subtitle_item), lang, subType)
|
| 61 |
+
if subtitle_url:
|
| 62 |
+
with ydl.urlopen(subtitle_url) as response:
|
| 63 |
+
subtitle = xml_caption_to_srt(response.read().decode()) if subType == "xml" else response.read().decode()
|
| 64 |
+
print("url{}, title:{} len(subtitle): {}".format(url, title, len(subtitle)))
|
| 65 |
+
return {"title": title, "subtitle": subtitle}
|
| 66 |
+
except Exception as e:
|
| 67 |
+
return {"error": str(e)}
|
| 68 |
+
return {"title": title,"error": "No subtitles"}
|
| 69 |
|
| 70 |
def float_to_srt_time_format(d: float) -> str:
|
| 71 |
"""Convert decimal durations into proper srt format.
|
|
|
|
| 111 |
"allsubtitles": True,
|
| 112 |
"skip_download": True,
|
| 113 |
}
|
| 114 |
+
title = "unknow"
|
| 115 |
+
try:
|
| 116 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 117 |
+
info_dict = ydl.extract_info(url, download=False)
|
| 118 |
+
title = info_dict.get("title", "unknow")
|
| 119 |
|
| 120 |
+
if info_dict.get("subtitles"):
|
| 121 |
+
langs = info_dict.get("subtitles").keys()
|
| 122 |
+
if not (len(langs) == 1 and "live_chat" in langs):
|
| 123 |
+
return {"title": info_dict.get("title", "unknow"), "subtitles": info_dict.get("subtitles")}
|
| 124 |
+
if info_dict.get("automatic_captions"):
|
| 125 |
+
return {"title": info_dict.get("title", "unknow"), "subtitles": info_dict.get("automatic_captions")}
|
| 126 |
+
except Exception as e:
|
| 127 |
+
return {"error": str(e)}
|
| 128 |
+
return {"title": title,"error": "No subtitles"}
|