Spaces:
Runtime error
Runtime error
File size: 4,139 Bytes
22e8001 09131b8 45a8b80 31f6311 8b1e0fa 3ae41e8 31f6311 b16f66b 4ee8f27 22e8001 2d161b2 a821a32 363b20b a821a32 d9c827a 45a8b80 a821a32 b16f66b 31f6311 a5c5bd4 31f6311 a5c5bd4 31f6311 f193ea1 31f6311 3ae41e8 31f6311 a821a32 22e8001 9b7e8f2 a821a32 22e8001 fa531c0 22e8001 bbd21ee 775c750 bbd21ee 09131b8 775c750 e76d38b 22e8001 9d28597 09131b8 22e8001 ee10cd9 22e8001 31f6311 06c8ee9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import gradio as gr
from langchain_community.document_loaders import YoutubeLoader
from langchain.text_splitter import TokenTextSplitter
from groq import Groq
import os
client = Groq(
api_key=os.environ.get("api_key"),
)
max_textboxes = 5
def process_youtube_url(url, language):
try:
loader = YoutubeLoader.from_youtube_url(
youtube_url=url,
add_video_info=True,
language=[language],
)
docs = loader.load()
text = str(docs)
# embeddings = OpenAIEmbeddings()
token_count = len(text)
text_splitter = TokenTextSplitter(chunk_size=30_000, chunk_overlap=0)
chunks = text_splitter.split_text(text)
output_textboxes = [chunk for i, chunk in enumerate(chunks)]
output_textboxes += ["" for _ in range(max_textboxes - len(chunks))]
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[
{
"role": "system",
"content": "lang:日本語 あなたは日本語話者の解説系Youtuberです。"
},
{
"role": "user",
"content": f"lang:日本語 日本語で次のtranscriptを解説して\n\n## trascript \n{text}"
},
{
"role": "assisnat",
"content": f"この動画は、"
},
],
temperature=0.7,
max_tokens=1024,
top_p=1,
stream=True,
stop=None,
)
summirizedtext = ""
for chunk in completion:
summirizedtext += chunk.choices[0].delta.content or ""
# print(chunk.choices[0].delta.content or "", end="")
yield *output_textboxes, [], text, token_count,summirizedtext
except Exception as e:
error_msg = str(e)
available_languages = extract_available_languages(error_msg)
recommended_language = extract_recommended_language(error_msg)
yield *[error_msg for _ in range(max_textboxes)], available_languages, recommended_language, 0,""
def extract_available_languages(error_msg):
languages = []
generated_section = False
for line in error_msg.split("\n"):
if line.startswith("(GENERATED)"):
generated_section = True
elif generated_section and line.startswith(" - "):
lang_code, lang_name = line[3:].split(" (", 1)
languages.append(f"{lang_name[:-1]} ({lang_code})")
return languages
def extract_recommended_language(error_msg):
generated_section = False
for line in error_msg.split("\n"):
if line.startswith("(GENERATED)"):
generated_section = True
elif generated_section and line.startswith(" - ") and "[TRANSLATABLE]" in line:
lang_code, lang_name = line[3:].split(" (", 1)
return f"{lang_name[:-1]} ({lang_code})"
return ""
iface = gr.Interface(
fn=process_youtube_url,
inputs=[
gr.Textbox(label="YouTube URL", placeholder="https://youtu.be/example"),
gr.Dropdown(label="Language",value="ja",choices=["en","en-US", "ja", "fr","de","it"],allow_custom_value=True),
],
outputs=
[gr.Textbox(label=f"chunk{ind}",show_copy_button=True,max_lines=5) for ind in range(max_textboxes)]
+[
gr.Dropdown(label="Available Languages", allow_custom_value=True),
gr.Textbox(label="Recommended Language",show_copy_button=True),
gr.Number(label="Character Count"),
gr.Markdown(label='summirized output'),
],
live=True,
examples = [["https://youtu.be/6Af6b_wyiwI?si=zqD9-kjw24lpRJw3","ja"],["https://youtu.be/9kxL9Cf46VM?si=ADgUmDXb6riA-lgb","ja"]],
title="YouTube Transcript Loader",
description="Enter a YouTube URL and select the language to load the transcript using LangChain's YoutubeLoader.[buy me a coffee](https://www.buymeacoffee.com/regulusle04)",
)
if __name__ == "__main__":
iface.queue()
iface.launch(share=True) |