Ghisalbertifederico commited on
Commit
84ffff4
Β·
verified Β·
1 Parent(s): 3dead3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -12
app.py CHANGED
@@ -1,7 +1,12 @@
1
  import os
2
  import re as _re
 
3
  import time
4
  import concurrent.futures
 
 
 
 
5
  import gradio as gr
6
  import pypdf
7
  import requests
@@ -14,29 +19,35 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
16
 
17
-
18
  SYSTEM_PROMPT_ADDITION = """You are a general-purpose assistant that answers questions accurately and concisely.
19
 
20
  ANSWERING STRATEGY β€” follow this order strictly:
 
 
 
21
 
22
- STEP 1 β€” ATTACHED FILE (if a file path is mentioned in the task context)
23
- Call `read_task_file(file_path)` immediately with the provided path.
 
24
  Parse the returned content and try to extract the answer from it.
25
  If the file contains the answer, call `final_answer` right away.
26
 
27
- STEP 2 β€” WEB SEARCH (if step 1 was not applicable or did not yield an answer)
28
  Choose the most appropriate tool:
29
- * `wikipedia_search(query)` β€” encyclopedic facts: people, places, history, science.
30
- * `web_search(query)` β€” recent events, statistics, niche facts.
 
31
  * `visit_webpage(url)` β€” fetch the full text of a URL returned by web_search.
 
32
  If you are able to extract the answer from the results, call `final_answer`.
33
 
34
- STEP 3 β€” NATIVE LLM KNOWLEDGE (only if steps 1 and 2 both failed or were not applicable)
35
  Reason from your own training knowledge and call `final_answer` with your best answer.
36
  Clearly prefix with "Based on my training knowledge:" so it is distinguishable.
37
 
38
  GENERAL RULES:
39
- - Available tools: `read_task_file`, `web_search`, `visit_webpage`, `wikipedia_search`, `final_answer`.
 
40
  - Do NOT invent other tool names.
41
  - Do NOT import modules not in additional_authorized_imports.
42
  - Always wrap code in <code> and </code> tags.
@@ -44,6 +55,77 @@ GENERAL RULES:
44
  - Give a SHORT final answer: a number, a name, a word β€” not a paragraph.
45
  """
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  @tool
49
  def read_task_file(file_path: str) -> str:
@@ -100,7 +182,10 @@ class WebSearchAgent:
100
 
101
  self.agent = CodeAgent(
102
  model=OpenAIServerModel(
103
- model_id="llama-3.1-8b-instant",
 
 
 
104
  api_base="https://api.groq.com/openai/v1",
105
  api_key=GROQ_API_KEY,
106
  ),
@@ -108,6 +193,8 @@ class WebSearchAgent:
108
  DuckDuckGoSearchTool(),
109
  VisitWebpageTool(),
110
  WikipediaSearchTool(),
 
 
111
  read_task_file,
112
  ],
113
  name="fast_agent",
@@ -134,7 +221,6 @@ class WebSearchAgent:
134
  print("Agent error:", e)
135
  return f"AGENT ERROR: {e}"
136
 
137
-
138
  MAX_WORKERS = 2 # keep low to avoid burning through Groq's free TPD limit
139
  QUESTION_TIMEOUT = 300 # seconds before a single question is abandoned
140
 
@@ -319,11 +405,9 @@ with gr.Blocks() as demo:
319
  gr.Markdown(
320
  """
321
  **Instructions:**
322
-
323
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
324
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
325
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
326
-
327
  ---
328
  **Disclaimers:**
329
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
 
1
  import os
2
  import re as _re
3
+ import sys
4
  import time
5
  import concurrent.futures
6
+ # Force UTF-8 output on Windows to avoid charmap crashes with Unicode characters
7
+ if sys.platform == "win32":
8
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
9
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
10
  import gradio as gr
11
  import pypdf
12
  import requests
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
21
 
 
22
  SYSTEM_PROMPT_ADDITION = """You are a general-purpose assistant that answers questions accurately and concisely.
23
 
24
  ANSWERING STRATEGY β€” follow this order strictly:
25
+ Step 1: Analyze the question and identify what type of information is strictly needed to answer it.
26
+ For example if the question is "Who is the person that discovered or published x?" find the info about the person by looking up data related to x. Only lookup data once you have identified what is strictly needed to answer the question. Don't make a second search if you are unable to find the answer after the first one.
27
+ For example if the question is about an artist's numer of albums, only look for information about discography, not biography or other unneccesary information. No tools are needed for this step, just careful reading and understanding of the question.
28
 
29
+ STEP 2 β€” ATTACHED FILE (if a file path is mentioned in the task context)
30
+ * If the file is an image (.png, .jpg, .jpeg, .gif, .webp): call `describe_image(file_path, question)` with a focused question derived from the task.
31
+ * Otherwise: call `read_task_file(file_path)` immediately with the provided path.
32
  Parse the returned content and try to extract the answer from it.
33
  If the file contains the answer, call `final_answer` right away.
34
 
35
+ STEP 3 β€” WEB SEARCH (if step 1 was not applicable or did not yield an answer)
36
  Choose the most appropriate tool:
37
+ * `get_youtube_transcript(video_url)` β€” fetch the transcript/captions of a YouTube video.
38
+ * `wikipedia_search(query)` β€” encyclopedic facts: people, places, history, science, discoveries.
39
+ * `web_search(query)` β€” recent events, statistics, niche facts, articles.
40
  * `visit_webpage(url)` β€” fetch the full text of a URL returned by web_search.
41
+ If the url contains "youtube.com" or "youtu.be", only use `get_youtube_transcript`. If this tool is unable to extract the answer skip to the next step.
42
  If you are able to extract the answer from the results, call `final_answer`.
43
 
44
+ STEP 4 β€” NATIVE LLM KNOWLEDGE (only if steps 1, 2, and 3 all failed or were not applicable)
45
  Reason from your own training knowledge and call `final_answer` with your best answer.
46
  Clearly prefix with "Based on my training knowledge:" so it is distinguishable.
47
 
48
  GENERAL RULES:
49
+ - Available tools: `read_task_file`, `get_youtube_transcript`, `describe_image`, `web_search`, `visit_webpage`, `wikipedia_search`, `final_answer`.
50
+ - If the task context mentions an image file path, call `describe_image(image_path, question)` with a focused question to extract the answer.
51
  - Do NOT invent other tool names.
52
  - Do NOT import modules not in additional_authorized_imports.
53
  - Always wrap code in <code> and </code> tags.
 
55
  - Give a SHORT final answer: a number, a name, a word β€” not a paragraph.
56
  """
57
 
58
+ @tool
59
+ def get_youtube_transcript(video_url: str) -> str:
60
+ """Fetch the transcript/captions of a YouTube video.
61
+
62
+ Args:
63
+ video_url: Full YouTube URL or just the video ID.
64
+
65
+ Returns:
66
+ The full transcript as a single string.
67
+ """
68
+ import re
69
+ from youtube_transcript_api import YouTubeTranscriptApi
70
+
71
+ match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", video_url)
72
+ video_id = match.group(1) if match else video_url
73
+ from youtube_transcript_api import YouTubeTranscriptApi
74
+ try:
75
+ # youtube-transcript-api >= 0.6.0
76
+ entries = YouTubeTranscriptApi().fetch(video_id)
77
+ except TypeError:
78
+ # fallback for older versions
79
+ entries = YouTubeTranscriptApi.get_transcript(video_id)
80
+ return " ".join(e["text"] for e in entries)
81
+
82
+ @tool
83
+ def describe_image(image_path: str, question: str = "Describe this image in detail.") -> str:
84
+ """Use a vision model to interpret or answer questions about an image file.
85
+
86
+ Args:
87
+ image_path: The local path to the image file (.png, .jpg, .jpeg, .gif, .webp).
88
+ question: Specific question to ask about the image content.
89
+
90
+ Returns:
91
+ A text description or answer about the image content.
92
+ """
93
+ import base64
94
+ import os
95
+ import requests as _req
96
+
97
+ if not os.path.exists(image_path):
98
+ return f"Image not found: {image_path}"
99
+
100
+ ext = os.path.splitext(image_path)[1].lower().lstrip(".")
101
+ mime_map = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
102
+ "gif": "image/gif", "webp": "image/webp"}
103
+ mime_type = mime_map.get(ext, "image/png")
104
+
105
+ with open(image_path, "rb") as f:
106
+ image_data = base64.standard_b64encode(f.read()).decode("utf-8")
107
+
108
+ payload = {
109
+ "model": "llama-3.2-11b-vision-preview",
110
+ "messages": [
111
+ {
112
+ "role": "user",
113
+ "content": [
114
+ {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_data}"}},
115
+ {"type": "text", "text": question},
116
+ ],
117
+ }
118
+ ],
119
+ "max_tokens": 1024,
120
+ }
121
+ headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
122
+ resp = _req.post(
123
+ "https://api.groq.com/openai/v1/chat/completions",
124
+ json=payload, headers=headers, timeout=60,
125
+ )
126
+ resp.raise_for_status()
127
+ return resp.json()["choices"][0]["message"]["content"]
128
+
129
 
130
  @tool
131
  def read_task_file(file_path: str) -> str:
 
182
 
183
  self.agent = CodeAgent(
184
  model=OpenAIServerModel(
185
+ # model_id="gemma2-9b-it",
186
+ # model_id="llama-3.1-8b-instant",
187
+ # model_id="llama-3.3-70b-versatile",
188
+ model_id="meta-llama/llama-4-scout-17b-16e-instruct",
189
  api_base="https://api.groq.com/openai/v1",
190
  api_key=GROQ_API_KEY,
191
  ),
 
193
  DuckDuckGoSearchTool(),
194
  VisitWebpageTool(),
195
  WikipediaSearchTool(),
196
+ get_youtube_transcript,
197
+ describe_image,
198
  read_task_file,
199
  ],
200
  name="fast_agent",
 
221
  print("Agent error:", e)
222
  return f"AGENT ERROR: {e}"
223
 
 
224
  MAX_WORKERS = 2 # keep low to avoid burning through Groq's free TPD limit
225
  QUESTION_TIMEOUT = 300 # seconds before a single question is abandoned
226
 
 
405
  gr.Markdown(
406
  """
407
  **Instructions:**
 
408
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
409
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
410
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
411
  ---
412
  **Disclaimers:**
413
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).