Update app.py
Browse files
app.py
CHANGED
|
@@ -11,16 +11,15 @@ import os
|
|
| 11 |
import gradio as gr
|
| 12 |
import inspect
|
| 13 |
import pandas as pd
|
| 14 |
-
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
|
| 15 |
|
| 16 |
# (Keep Constants as is)
|
| 17 |
# --- Constants ---
|
| 18 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 19 |
|
| 20 |
-
|
| 21 |
|
| 22 |
# ‑‑‑ smol‑agents base imports (provided by the framework) ‑‑‑
|
| 23 |
-
from
|
| 24 |
Tool,
|
| 25 |
PipelineTool,
|
| 26 |
CodeAgent,
|
|
@@ -58,10 +57,8 @@ class SpeechToTextTool(PipelineTool):
|
|
| 58 |
if not path.is_file():
|
| 59 |
raise FileNotFoundError(f"No such audio file: {path}")
|
| 60 |
|
| 61 |
-
from openai import audio as _audio # late import
|
| 62 |
-
|
| 63 |
with path.open("rb") as fp:
|
| 64 |
-
resp =
|
| 65 |
file=fp,
|
| 66 |
model="whisper-1",
|
| 67 |
response_format="text",
|
|
@@ -122,16 +119,10 @@ class ExcelToTextTool(Tool):
|
|
| 122 |
|
| 123 |
|
| 124 |
# ---------------------------------------------------------------------------
|
| 125 |
-
#
|
| 126 |
# ---------------------------------------------------------------------------
|
| 127 |
class YouTubeQATool(PipelineTool):
|
| 128 |
-
"""Answer questions about the spoken content of a YouTube video.
|
| 129 |
-
|
| 130 |
-
• Downloads the auto‑generated or creator‑provided transcript using
|
| 131 |
-
**youtube‑transcript‑api** (no API key needed for most public videos).
|
| 132 |
-
• Feeds a compressed transcript + user question to GPT‑4o for an answer.
|
| 133 |
-
• Caches transcripts locally to avoid repeated network calls.
|
| 134 |
-
"""
|
| 135 |
|
| 136 |
name = "youtube_qa"
|
| 137 |
description = (
|
|
@@ -151,12 +142,10 @@ class YouTubeQATool(PipelineTool):
|
|
| 151 |
}
|
| 152 |
output_type = "string"
|
| 153 |
|
| 154 |
-
|
| 155 |
-
_TRANSCRIPT_CACHE: dict[str, str] = {} # simple in‑proc cache
|
| 156 |
|
| 157 |
@staticmethod
|
| 158 |
def _extract_video_id(url: str) -> str:
|
| 159 |
-
"""Return the 11‑char YouTube ID from a watch/shorts URL or raw ID."""
|
| 160 |
if len(url) == 11 and "/" not in url:
|
| 161 |
return url
|
| 162 |
parsed = _urlparse.urlparse(url)
|
|
@@ -166,7 +155,6 @@ class YouTubeQATool(PipelineTool):
|
|
| 166 |
qs = _urlparse.parse_qs(parsed.query)
|
| 167 |
if "v" in qs:
|
| 168 |
return qs["v"][0]
|
| 169 |
-
# shorts/embedded
|
| 170 |
return parsed.path.split("/")[-1]
|
| 171 |
raise ValueError("Could not parse YouTube video ID from URL")
|
| 172 |
|
|
@@ -180,13 +168,12 @@ class YouTubeQATool(PipelineTool):
|
|
| 180 |
return "Error: youtube‑transcript‑api library not installed."
|
| 181 |
try:
|
| 182 |
segments: List[dict] = YouTubeTranscriptApi.get_transcript(video_id)
|
| 183 |
-
except Exception as exc:
|
| 184 |
return f"Error fetching transcript: {exc}"
|
| 185 |
text = " ".join(seg["text"] for seg in segments)
|
| 186 |
cls._TRANSCRIPT_CACHE[video_id] = text
|
| 187 |
return text
|
| 188 |
|
| 189 |
-
# ––––– main entry point ––––– -------------------------------------------
|
| 190 |
def forward(self, url: str, question: str) -> str: # type: ignore[override]
|
| 191 |
try:
|
| 192 |
vid = self._extract_video_id(url)
|
|
@@ -197,12 +184,11 @@ class YouTubeQATool(PipelineTool):
|
|
| 197 |
if transcript.startswith("Error"):
|
| 198 |
return transcript
|
| 199 |
|
| 200 |
-
# Keep prompt under ~15k chars – truncate transcript if necessary
|
| 201 |
max_chars = 15000
|
| 202 |
if len(transcript) > max_chars:
|
| 203 |
transcript = transcript[:max_chars] + " …(truncated)…"
|
| 204 |
|
| 205 |
-
|
| 206 |
|
| 207 |
system = (
|
| 208 |
"You are a meticulous assistant. Answer the user's question about "
|
|
@@ -215,14 +201,14 @@ class YouTubeQATool(PipelineTool):
|
|
| 215 |
{"role": "user", "content": f"Question: {question}"},
|
| 216 |
]
|
| 217 |
try:
|
| 218 |
-
resp = chat.completions.create(
|
| 219 |
-
model="gpt-4o",
|
| 220 |
messages=messages,
|
| 221 |
temperature=0.2,
|
| 222 |
max_tokens=256,
|
| 223 |
)
|
| 224 |
return resp.choices[0].message.content.strip()
|
| 225 |
-
except Exception as exc:
|
| 226 |
return f"Error generating answer: {exc}"
|
| 227 |
|
| 228 |
|
|
@@ -262,7 +248,7 @@ class BasicAgent:
|
|
| 262 |
WikipediaSearchTool(),
|
| 263 |
SpeechToTextTool(),
|
| 264 |
ExcelToTextTool(),
|
| 265 |
-
YouTubeQATool(),
|
| 266 |
]
|
| 267 |
|
| 268 |
def __init__(self) -> None:
|
|
@@ -270,12 +256,7 @@ class BasicAgent:
|
|
| 270 |
model=self._model,
|
| 271 |
tools=self._tools,
|
| 272 |
add_base_tools=True,
|
| 273 |
-
additional_authorized_imports=[
|
| 274 |
-
"numpy",
|
| 275 |
-
"pandas",
|
| 276 |
-
"csv",
|
| 277 |
-
"subprocess",
|
| 278 |
-
],
|
| 279 |
)
|
| 280 |
print("BasicAgent initialized with YouTubeQATool.")
|
| 281 |
|
|
|
|
| 11 |
import gradio as gr
|
| 12 |
import inspect
|
| 13 |
import pandas as pd
|
|
|
|
| 14 |
|
| 15 |
# (Keep Constants as is)
|
| 16 |
# --- Constants ---
|
| 17 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 18 |
|
| 19 |
+
import openai
|
| 20 |
|
| 21 |
# ‑‑‑ smol‑agents base imports (provided by the framework) ‑‑‑
|
| 22 |
+
from smolagents import (
|
| 23 |
Tool,
|
| 24 |
PipelineTool,
|
| 25 |
CodeAgent,
|
|
|
|
| 57 |
if not path.is_file():
|
| 58 |
raise FileNotFoundError(f"No such audio file: {path}")
|
| 59 |
|
|
|
|
|
|
|
| 60 |
with path.open("rb") as fp:
|
| 61 |
+
resp = openai.audio.transcriptions.create(
|
| 62 |
file=fp,
|
| 63 |
model="whisper-1",
|
| 64 |
response_format="text",
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
# ---------------------------------------------------------------------------
|
| 122 |
+
# YouTube Question‑Answer Tool
|
| 123 |
# ---------------------------------------------------------------------------
|
| 124 |
class YouTubeQATool(PipelineTool):
|
| 125 |
+
"""Answer questions about the spoken content of a YouTube video."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
name = "youtube_qa"
|
| 128 |
description = (
|
|
|
|
| 142 |
}
|
| 143 |
output_type = "string"
|
| 144 |
|
| 145 |
+
_TRANSCRIPT_CACHE: dict[str, str] = {}
|
|
|
|
| 146 |
|
| 147 |
@staticmethod
|
| 148 |
def _extract_video_id(url: str) -> str:
|
|
|
|
| 149 |
if len(url) == 11 and "/" not in url:
|
| 150 |
return url
|
| 151 |
parsed = _urlparse.urlparse(url)
|
|
|
|
| 155 |
qs = _urlparse.parse_qs(parsed.query)
|
| 156 |
if "v" in qs:
|
| 157 |
return qs["v"][0]
|
|
|
|
| 158 |
return parsed.path.split("/")[-1]
|
| 159 |
raise ValueError("Could not parse YouTube video ID from URL")
|
| 160 |
|
|
|
|
| 168 |
return "Error: youtube‑transcript‑api library not installed."
|
| 169 |
try:
|
| 170 |
segments: List[dict] = YouTubeTranscriptApi.get_transcript(video_id)
|
| 171 |
+
except Exception as exc:
|
| 172 |
return f"Error fetching transcript: {exc}"
|
| 173 |
text = " ".join(seg["text"] for seg in segments)
|
| 174 |
cls._TRANSCRIPT_CACHE[video_id] = text
|
| 175 |
return text
|
| 176 |
|
|
|
|
| 177 |
def forward(self, url: str, question: str) -> str: # type: ignore[override]
|
| 178 |
try:
|
| 179 |
vid = self._extract_video_id(url)
|
|
|
|
| 184 |
if transcript.startswith("Error"):
|
| 185 |
return transcript
|
| 186 |
|
|
|
|
| 187 |
max_chars = 15000
|
| 188 |
if len(transcript) > max_chars:
|
| 189 |
transcript = transcript[:max_chars] + " …(truncated)…"
|
| 190 |
|
| 191 |
+
import openai
|
| 192 |
|
| 193 |
system = (
|
| 194 |
"You are a meticulous assistant. Answer the user's question about "
|
|
|
|
| 201 |
{"role": "user", "content": f"Question: {question}"},
|
| 202 |
]
|
| 203 |
try:
|
| 204 |
+
resp = openai.chat.completions.create(
|
| 205 |
+
model="gpt-4o",
|
| 206 |
messages=messages,
|
| 207 |
temperature=0.2,
|
| 208 |
max_tokens=256,
|
| 209 |
)
|
| 210 |
return resp.choices[0].message.content.strip()
|
| 211 |
+
except Exception as exc:
|
| 212 |
return f"Error generating answer: {exc}"
|
| 213 |
|
| 214 |
|
|
|
|
| 248 |
WikipediaSearchTool(),
|
| 249 |
SpeechToTextTool(),
|
| 250 |
ExcelToTextTool(),
|
| 251 |
+
YouTubeQATool(),
|
| 252 |
]
|
| 253 |
|
| 254 |
def __init__(self) -> None:
|
|
|
|
| 256 |
model=self._model,
|
| 257 |
tools=self._tools,
|
| 258 |
add_base_tools=True,
|
| 259 |
+
additional_authorized_imports=["numpy", "pandas", "csv", "subprocess"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
)
|
| 261 |
print("BasicAgent initialized with YouTubeQATool.")
|
| 262 |
|