shan gao commited on
Commit
5c7c966
·
1 Parent(s): 0ff6a32
Files changed (2) hide show
  1. agent.py +21 -24
  2. app.py +1 -0
agent.py CHANGED
@@ -1,3 +1,4 @@
 
1
  # Develop an AI agent with LangGraph and LangChain
2
  # to answer the questions in the "gaia-benchmark/GAIA" dataset.
3
 
@@ -157,10 +158,10 @@ def _convert_to_wav_mono16k(src_path: str) -> str:
157
  return out
158
 
159
  # ==== NEW: URL helpers ====
160
- _URL_RE = re.compile(r'https?://\S+')
161
-
162
  def _extract_urls(text: str) -> List[str]:
163
- return _URL_RE.findall(text or "")
 
 
164
 
165
 
166
  # ----------------------Tools ----------------------
@@ -362,49 +363,43 @@ def wikipedia_lookup(query: str, sentences: int = 4) -> Dict[str, Any]:
362
  return {"title":"", "url":"", "summary": f"[wikipedia import error: {e}]"}
363
 
364
  @tool
365
- def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] | None = None) -> str:
366
  """
367
  Get YouTube transcript via API (no download). Returns plain text.
368
  """
369
  print('try to get youtube video transcript')
370
  try:
371
- prefer_langs = prefer_langs or ["en", "en-US", "en-GB", "auto"]
372
  vid = url_or_id
373
- print("vid: ", vid)
374
  if "youtube.com" in url_or_id or "youtu.be" in url_or_id:
375
  u = urlparse(url_or_id)
376
  if u.netloc.endswith("youtu.be"):
377
  vid = u.path.lstrip("/")
378
  else:
379
  vid = parse_qs(u.query).get("v", [""])[0]
 
 
380
  ytt_api = YouTubeTranscriptApi()
381
  trs_list = ytt_api.list(vid)
 
382
  # choose first matching language
383
  for lang in prefer_langs:
384
- try:
385
- trs = trs_list.find_transcript([lang])
386
- chunks = trs.fetch()
387
- print("transcript from youtube website?")
388
- print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
389
- return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
390
- except Exception:
391
- continue
392
- # fallback: first any transcript
393
- trs = list(trs_list)[0]
394
- chunks = trs.fetch()
395
- print("transcript from youtube website?")
396
- print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
397
- return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
398
  except (TranscriptsDisabled, NoTranscriptFound):
399
  return "[no captions available]"
400
- except Exception as e:
401
- return f"[youtube transcript error: {e}]"
402
 
403
  @tool
404
  def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
405
  """
406
  Download YouTube audio (yt-dlp) and transcribe with Whisper.
407
  """
 
408
  tmpdir = tempfile.mkdtemp(prefix="gaia_yt_")
409
  outfile = os.path.join(tmpdir, "%(id)s.%(ext)s")
410
 
@@ -422,6 +417,7 @@ def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
422
  # convert & transcribe
423
  wav = _convert_to_wav_mono16k(path)
424
  txt = transcribe_audio.invoke({"path": wav, "model_size": model_size})
 
425
  return txt
426
  except Exception as e:
427
  return f"[youtube download/transcribe error: {e}]"
@@ -620,7 +616,8 @@ def solve_text_only_node(state: "AgentState") -> "AgentState":
620
  ev_text = _summarize_evidence(evidence) or "(none)"
621
 
622
  # LLM (text-only). Swap model as you like.
623
- llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
 
624
 
625
  sys = SystemMessage(content=(
626
  "You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
@@ -799,7 +796,7 @@ def crawl_node(state: AgentState) -> AgentState:
799
  if _is_youtube(u):
800
  print("is_youtube? ", _is_youtube(u))
801
  cap = youtube_get_transcript.invoke({"url_or_id": u})
802
- print('cap: ', cap)
803
  if cap and not cap.startswith("[no captions"):
804
  ev.append({"kind":"doc_text","text":cap,"path":None,
805
  "meta":{"source":"youtube","title": title, "url":u,"mime":"text/plain"}})
 
1
+ # v8
2
  # Develop an AI agent with LangGraph and LangChain
3
  # to answer the questions in the "gaia-benchmark/GAIA" dataset.
4
 
 
158
  return out
159
 
160
  # ==== NEW: URL helpers ====
 
 
161
  def _extract_urls(text: str) -> List[str]:
162
+ raw = re.findall(r'https?://\S+', text)
163
+ urls = [re.sub(r'[)\]\}>\'",;:.!?]+$', '', u) for u in raw]
164
+ return urls
165
 
166
 
167
  # ----------------------Tools ----------------------
 
363
  return {"title":"", "url":"", "summary": f"[wikipedia import error: {e}]"}
364
 
365
  @tool
366
+ def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] = ["en"] ) -> str:
367
  """
368
  Get YouTube transcript via API (no download). Returns plain text.
369
  """
370
  print('try to get youtube video transcript')
371
  try:
 
372
  vid = url_or_id
373
+ print("url_or_id: ", url_or_id)
374
  if "youtube.com" in url_or_id or "youtu.be" in url_or_id:
375
  u = urlparse(url_or_id)
376
  if u.netloc.endswith("youtu.be"):
377
  vid = u.path.lstrip("/")
378
  else:
379
  vid = parse_qs(u.query).get("v", [""])[0]
380
+
381
+ print("vid: ", vid)
382
  ytt_api = YouTubeTranscriptApi()
383
  trs_list = ytt_api.list(vid)
384
+
385
  # choose first matching language
386
  for lang in prefer_langs:
387
+ trs = trs_list.find_transcript([lang])
388
+ # trs = list(trs_list)[0]
389
+ chunks = trs.fetch()
390
+ print("transcript from youtube website?")
391
+ transcript_en = " ".join([c.text for c in chunks]).strip()
392
+ return transcript_en
 
 
 
 
 
 
 
 
393
  except (TranscriptsDisabled, NoTranscriptFound):
394
  return "[no captions available]"
395
+
 
396
 
397
  @tool
398
  def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
399
  """
400
  Download YouTube audio (yt-dlp) and transcribe with Whisper.
401
  """
402
+ print("Download YouTube audio (yt-dlp) and transcribe with Whisper.")
403
  tmpdir = tempfile.mkdtemp(prefix="gaia_yt_")
404
  outfile = os.path.join(tmpdir, "%(id)s.%(ext)s")
405
 
 
417
  # convert & transcribe
418
  wav = _convert_to_wav_mono16k(path)
419
  txt = transcribe_audio.invoke({"path": wav, "model_size": model_size})
420
+ print('caption by Whisper:', txt)
421
  return txt
422
  except Exception as e:
423
  return f"[youtube download/transcribe error: {e}]"
 
616
  ev_text = _summarize_evidence(evidence) or "(none)"
617
 
618
  # LLM (text-only). Swap model as you like.
619
+ # llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
620
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
621
 
622
  sys = SystemMessage(content=(
623
  "You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
 
796
  if _is_youtube(u):
797
  print("is_youtube? ", _is_youtube(u))
798
  cap = youtube_get_transcript.invoke({"url_or_id": u})
799
+ print('caption: ', cap)
800
  if cap and not cap.startswith("[no captions"):
801
  ev.append({"kind":"doc_text","text":cap,"path":None,
802
  "meta":{"source":"youtube","title": title, "url":u,"mime":"text/plain"}})
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import gradio as gr
3
  import requests
 
1
+ # v8
2
  import os
3
  import gradio as gr
4
  import requests