Spaces:
Runtime error
Runtime error
File size: 4,852 Bytes
22e8001 09131b8 45a8b80 93f0416 8b1e0fa 3ae41e8 93f0416 31f6311 b16f66b 4ee8f27 049b4e5 22e8001 049b4e5 22e8001 2d161b2 a821a32 363b20b a821a32 d9c827a 45a8b80 a821a32 b16f66b 17d83d8 93f0416 31f6311 9d8529b 7f17b94 93f0416 7f17b94 31f6311 93f0416 606db80 93f0416 31f6311 93f0416 31f6311 93f0416 3ae41e8 93f0416 a821a32 22e8001 17d83d8 a821a32 22e8001 fa531c0 22e8001 bbd21ee 775c750 bbd21ee 09131b8 775c750 e76d38b 22e8001 9d28597 09131b8 22e8001 ee10cd9 22e8001 31f6311 06c8ee9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
from langchain_community.document_loaders import YoutubeLoader
from langchain.text_splitter import TokenTextSplitter
import anthropic
import os
client = anthropic.Anthropic(
api_key=os.environ.get("api_key"),
)
max_textboxes = 5
def process_youtube_url(url="", language="en"):
try:
if url == "":
return *["I'm waiting..." for _ in range(max_textboxes)], [], "", 0, ""
# 以下の処理はそのまま
loader = YoutubeLoader.from_youtube_url(
youtube_url=url,
add_video_info=True,
language=[language],
)
docs = loader.load()
text = str(docs)
# embeddings = OpenAIEmbeddings()
token_count = len(text)
text_splitter = TokenTextSplitter(chunk_size=30_000, chunk_overlap=0)
chunks = text_splitter.split_text(text)
output_textboxes = [chunk for i, chunk in enumerate(chunks)]
output_textboxes += ["" for _ in range(max_textboxes - len(chunks))]
yield *output_textboxes, [], text, token_count,""
with client.messages.stream(
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "あなたはだれ?"
}
]
},
{
"role": "assistant",
"content": [
{
"type": "text",
"text": "わたしは日本語話者の解説系Youtuberです。"
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": f"lang:日本語 日本語で次のtranscriptを解説して。長くなってもいいよ\n\n## trascript \n```{text}```"
}
]
}
],
system="lang:日本語 あなたは日本語話者の解説系Youtuberです。",
model="claude-3-haiku-20240307",
max_tokens=4096,
temperature=0.7,
) as stream:
summirizedtext = ""
for text in stream.text_stream:
summirizedtext += text
# print(text, end="")
yield *output_textboxes, [], text, token_count, summirizedtext
except Exception as e:
error_msg = str(e)
available_languages = extract_available_languages(error_msg)
recommended_language = extract_recommended_language(error_msg)
return *[error_msg for _ in range(max_textboxes)], available_languages, recommended_language, 0,""
def extract_available_languages(error_msg):
languages = []
generated_section = False
for line in error_msg.split("\n"):
if line.startswith("(GENERATED)"):
generated_section = True
elif generated_section and line.startswith(" - "):
lang_code, lang_name = line[3:].split(" (", 1)
languages.append(f"{lang_name[:-1]} ({lang_code})")
return languages
def extract_recommended_language(error_msg):
generated_section = False
for line in error_msg.split("\n"):
if line.startswith("(GENERATED)"):
generated_section = True
elif generated_section and line.startswith(" - ") and "[TRANSLATABLE]" in line:
lang_code, lang_name = line[3:].split(" (", 1)
return f"{lang_name[:-1]} ({lang_code})"
return ""
iface = gr.Interface(
fn=process_youtube_url,
inputs=[
gr.Textbox(label="YouTube URL", placeholder="https://youtu.be/example"),
gr.Dropdown(label="Language",value="ja",choices=["en","en-US", "ja", "fr","de","it"],allow_custom_value=True),
],
outputs=
[gr.Textbox(label=f"chunk{ind}",show_copy_button=True,max_lines=5) for ind in range(max_textboxes)]
+[
gr.Dropdown(label="Available Languages", allow_custom_value=True),
gr.Textbox(label="Recommended Language",show_copy_button=True),
gr.Number(label="Character Count"),
gr.Markdown(label='summirized output'),
],
live=True,
examples = [["https://youtu.be/6Af6b_wyiwI?si=zqD9-kjw24lpRJw3","ja"],["https://youtu.be/9kxL9Cf46VM?si=ADgUmDXb6riA-lgb","ja"]],
title="YouTube Transcript Loader",
description="Enter a YouTube URL and select the language to load the transcript using LangChain's YoutubeLoader.[buy me a coffee](https://www.buymeacoffee.com/regulusle04)",
)
if __name__ == "__main__":
iface.queue()
iface.launch(share=True) |