Spaces:
Sleeping
Sleeping
shan gao
commited on
Commit
·
5c7c966
1
Parent(s):
0ff6a32
change
Browse files
agent.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
# Develop an AI agent with LangGraph and LangChain
|
| 2 |
# to answer the questions in the "gaia-benchmark/GAIA" dataset.
|
| 3 |
|
|
@@ -157,10 +158,10 @@ def _convert_to_wav_mono16k(src_path: str) -> str:
|
|
| 157 |
return out
|
| 158 |
|
| 159 |
# ==== NEW: URL helpers ====
|
| 160 |
-
_URL_RE = re.compile(r'https?://\S+')
|
| 161 |
-
|
| 162 |
def _extract_urls(text: str) -> List[str]:
|
| 163 |
-
|
|
|
|
|
|
|
| 164 |
|
| 165 |
|
| 166 |
# ----------------------Tools ----------------------
|
|
@@ -362,49 +363,43 @@ def wikipedia_lookup(query: str, sentences: int = 4) -> Dict[str, Any]:
|
|
| 362 |
return {"title":"", "url":"", "summary": f"[wikipedia import error: {e}]"}
|
| 363 |
|
| 364 |
@tool
|
| 365 |
-
def youtube_get_transcript(url_or_id: str, prefer_langs: List[str]
|
| 366 |
"""
|
| 367 |
Get YouTube transcript via API (no download). Returns plain text.
|
| 368 |
"""
|
| 369 |
print('try to get youtube video transcript')
|
| 370 |
try:
|
| 371 |
-
prefer_langs = prefer_langs or ["en", "en-US", "en-GB", "auto"]
|
| 372 |
vid = url_or_id
|
| 373 |
-
print("
|
| 374 |
if "youtube.com" in url_or_id or "youtu.be" in url_or_id:
|
| 375 |
u = urlparse(url_or_id)
|
| 376 |
if u.netloc.endswith("youtu.be"):
|
| 377 |
vid = u.path.lstrip("/")
|
| 378 |
else:
|
| 379 |
vid = parse_qs(u.query).get("v", [""])[0]
|
|
|
|
|
|
|
| 380 |
ytt_api = YouTubeTranscriptApi()
|
| 381 |
trs_list = ytt_api.list(vid)
|
|
|
|
| 382 |
# choose first matching language
|
| 383 |
for lang in prefer_langs:
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
except Exception:
|
| 391 |
-
continue
|
| 392 |
-
# fallback: first any transcript
|
| 393 |
-
trs = list(trs_list)[0]
|
| 394 |
-
chunks = trs.fetch()
|
| 395 |
-
print("transcript from youtube website?")
|
| 396 |
-
print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
|
| 397 |
-
return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
|
| 398 |
except (TranscriptsDisabled, NoTranscriptFound):
|
| 399 |
return "[no captions available]"
|
| 400 |
-
|
| 401 |
-
return f"[youtube transcript error: {e}]"
|
| 402 |
|
| 403 |
@tool
|
| 404 |
def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
|
| 405 |
"""
|
| 406 |
Download YouTube audio (yt-dlp) and transcribe with Whisper.
|
| 407 |
"""
|
|
|
|
| 408 |
tmpdir = tempfile.mkdtemp(prefix="gaia_yt_")
|
| 409 |
outfile = os.path.join(tmpdir, "%(id)s.%(ext)s")
|
| 410 |
|
|
@@ -422,6 +417,7 @@ def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
|
|
| 422 |
# convert & transcribe
|
| 423 |
wav = _convert_to_wav_mono16k(path)
|
| 424 |
txt = transcribe_audio.invoke({"path": wav, "model_size": model_size})
|
|
|
|
| 425 |
return txt
|
| 426 |
except Exception as e:
|
| 427 |
return f"[youtube download/transcribe error: {e}]"
|
|
@@ -620,7 +616,8 @@ def solve_text_only_node(state: "AgentState") -> "AgentState":
|
|
| 620 |
ev_text = _summarize_evidence(evidence) or "(none)"
|
| 621 |
|
| 622 |
# LLM (text-only). Swap model as you like.
|
| 623 |
-
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
|
|
|
| 624 |
|
| 625 |
sys = SystemMessage(content=(
|
| 626 |
"You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
|
|
@@ -799,7 +796,7 @@ def crawl_node(state: AgentState) -> AgentState:
|
|
| 799 |
if _is_youtube(u):
|
| 800 |
print("is_youtube? ", _is_youtube(u))
|
| 801 |
cap = youtube_get_transcript.invoke({"url_or_id": u})
|
| 802 |
-
print('
|
| 803 |
if cap and not cap.startswith("[no captions"):
|
| 804 |
ev.append({"kind":"doc_text","text":cap,"path":None,
|
| 805 |
"meta":{"source":"youtube","title": title, "url":u,"mime":"text/plain"}})
|
|
|
|
| 1 |
+
# v8
|
| 2 |
# Develop an AI agent with LangGraph and LangChain
|
| 3 |
# to answer the questions in the "gaia-benchmark/GAIA" dataset.
|
| 4 |
|
|
|
|
| 158 |
return out
|
| 159 |
|
| 160 |
# ==== NEW: URL helpers ====
|
|
|
|
|
|
|
| 161 |
def _extract_urls(text: str) -> List[str]:
|
| 162 |
+
raw = re.findall(r'https?://\S+', text)
|
| 163 |
+
urls = [re.sub(r'[)\]\}>\'",;:.!?]+$', '', u) for u in raw]
|
| 164 |
+
return urls
|
| 165 |
|
| 166 |
|
| 167 |
# ----------------------Tools ----------------------
|
|
|
|
| 363 |
return {"title":"", "url":"", "summary": f"[wikipedia import error: {e}]"}
|
| 364 |
|
| 365 |
@tool
|
| 366 |
+
def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] = ["en"] ) -> str:
|
| 367 |
"""
|
| 368 |
Get YouTube transcript via API (no download). Returns plain text.
|
| 369 |
"""
|
| 370 |
print('try to get youtube video transcript')
|
| 371 |
try:
|
|
|
|
| 372 |
vid = url_or_id
|
| 373 |
+
print("url_or_id: ", url_or_id)
|
| 374 |
if "youtube.com" in url_or_id or "youtu.be" in url_or_id:
|
| 375 |
u = urlparse(url_or_id)
|
| 376 |
if u.netloc.endswith("youtu.be"):
|
| 377 |
vid = u.path.lstrip("/")
|
| 378 |
else:
|
| 379 |
vid = parse_qs(u.query).get("v", [""])[0]
|
| 380 |
+
|
| 381 |
+
print("vid: ", vid)
|
| 382 |
ytt_api = YouTubeTranscriptApi()
|
| 383 |
trs_list = ytt_api.list(vid)
|
| 384 |
+
|
| 385 |
# choose first matching language
|
| 386 |
for lang in prefer_langs:
|
| 387 |
+
trs = trs_list.find_transcript([lang])
|
| 388 |
+
# trs = list(trs_list)[0]
|
| 389 |
+
chunks = trs.fetch()
|
| 390 |
+
print("transcript from youtube website?")
|
| 391 |
+
transcript_en = " ".join([c.text for c in chunks]).strip()
|
| 392 |
+
return transcript_en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
except (TranscriptsDisabled, NoTranscriptFound):
|
| 394 |
return "[no captions available]"
|
| 395 |
+
|
|
|
|
| 396 |
|
| 397 |
@tool
|
| 398 |
def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
|
| 399 |
"""
|
| 400 |
Download YouTube audio (yt-dlp) and transcribe with Whisper.
|
| 401 |
"""
|
| 402 |
+
print("Download YouTube audio (yt-dlp) and transcribe with Whisper.")
|
| 403 |
tmpdir = tempfile.mkdtemp(prefix="gaia_yt_")
|
| 404 |
outfile = os.path.join(tmpdir, "%(id)s.%(ext)s")
|
| 405 |
|
|
|
|
| 417 |
# convert & transcribe
|
| 418 |
wav = _convert_to_wav_mono16k(path)
|
| 419 |
txt = transcribe_audio.invoke({"path": wav, "model_size": model_size})
|
| 420 |
+
print('caption by Whisper:', txt)
|
| 421 |
return txt
|
| 422 |
except Exception as e:
|
| 423 |
return f"[youtube download/transcribe error: {e}]"
|
|
|
|
| 616 |
ev_text = _summarize_evidence(evidence) or "(none)"
|
| 617 |
|
| 618 |
# LLM (text-only). Swap model as you like.
|
| 619 |
+
# llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
| 620 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 621 |
|
| 622 |
sys = SystemMessage(content=(
|
| 623 |
"You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
|
|
|
|
| 796 |
if _is_youtube(u):
|
| 797 |
print("is_youtube? ", _is_youtube(u))
|
| 798 |
cap = youtube_get_transcript.invoke({"url_or_id": u})
|
| 799 |
+
print('caption: ', cap)
|
| 800 |
if cap and not cap.startswith("[no captions"):
|
| 801 |
ev.append({"kind":"doc_text","text":cap,"path":None,
|
| 802 |
"meta":{"source":"youtube","title": title, "url":u,"mime":"text/plain"}})
|
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
|
|
|
| 1 |
+
# v8
|
| 2 |
import os
|
| 3 |
import gradio as gr
|
| 4 |
import requests
|