Spaces:
Runtime error
Runtime error
File size: 2,892 Bytes
22e8001 09131b8 45a8b80 4ee8f27 3ae41e8 b16f66b 4ee8f27 22e8001 2d161b2 45a8b80 481f3ca 45a8b80 b16f66b 3ae41e8 20d0c84 22e8001 1cb9b51 22e8001 fa531c0 22e8001 bbd21ee 033d900 bbd21ee 09131b8 2d161b2 22e8001 09131b8 22e8001 ee10cd9 22e8001 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import gradio as gr
from langchain_community.document_loaders import YoutubeLoader
from langchain.text_splitter import TokenTextSplitter
from fastapi import FastAPI
max_textboxes = 5
def process_youtube_url(url, language):
try:
loader = YoutubeLoader.from_youtube_url(
youtube_url=url,
add_video_info=True,
language=[language],
)
docs = loader.load()
text = str(docs)
char_count = len(text)
text_splitter = TokenTextSplitter(chunk_size=10000, chunk_overlap=0)
chunks = text_splitter.split_text(text)
output_textboxes = [chunk for i, chunk in enumerate(chunks)]
output_textboxes += ["" for _ in range(max_textboxes - len(chunks))]
return *output_textboxes,[], text, char_count
except Exception as e:
error_msg = str(e)
available_languages = extract_available_languages(error_msg)
recommended_language = extract_recommended_language(error_msg)
return *[error_msg for _ in range(max_textboxes)],available_languages, recommended_language, 0
def extract_available_languages(error_msg):
languages = []
generated_section = False
for line in error_msg.split("\n"):
if line.startswith("(GENERATED)"):
generated_section = True
elif generated_section and line.startswith(" - "):
lang_code, lang_name = line[3:].split(" (", 1)
languages.append(f"{lang_name[:-1]} ({lang_code})")
return languages
def extract_recommended_language(error_msg):
generated_section = False
for line in error_msg.split("\n"):
if line.startswith("(GENERATED)"):
generated_section = True
elif generated_section and line.startswith(" - ") and "[TRANSLATABLE]" in line:
lang_code, lang_name = line[3:].split(" (", 1)
return f"{lang_name[:-1]} ({lang_code})"
return ""
iface = gr.Interface(
fn=process_youtube_url,
inputs=[
gr.Textbox(label="YouTube URL", placeholder="https://youtu.be/example"),
gr.Dropdown(label="Language",value="ja",choices=["en","en-US", "ja", "fr","de","it"],allow_custom_value=True),
],
outputs=
[gr.Textbox(label=f"chunk{ind}",show_copy_button=True) for ind in range(max_textboxes)]
+[
gr.Dropdown(label="Available Languages", allow_custom_value=True),
gr.Textbox(label="Recommended Language"),
gr.Number(label="Character Count")
],
examples = [["https://youtu.be/6Af6b_wyiwI?si=zqD9-kjw24lpRJw3","ja"],["https://youtu.be/9kxL9Cf46VM?si=ADgUmDXb6riA-lgb","ja"]],
title="YouTube Transcript Loader",
description="Enter a YouTube URL and select the language to load the transcript using LangChain's YoutubeLoader.[buy me a coffee](https://www.buymeacoffee.com/regulusle04)",
)
if __name__ == "__main__":
iface.launch() |