nikhmr1235 commited on
Commit
d116ff2
·
verified ·
1 Parent(s): c9ef4ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +515 -0
app.py CHANGED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import gradio as gr
4
+ import inspect
5
+ import pandas as pd
6
+ import time
7
+ import re
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain_community.tools import TavilySearchResults
10
+ from langchain import hub # Used to pull predefined prompts from LangChain Hub
11
+ from langchain.agents import AgentExecutor, create_react_agent
12
+ from langchain.memory import ConversationSummaryMemory
13
+ from typing import Any, List, Optional
14
+ from langchain.agents import AgentExecutor, Agent
15
+ from langchain.tools.base import BaseTool
16
+ from langchain.memory import ConversationSummaryBufferMemory
17
+ from google.api_core import retry
18
+ from google import genai
19
+ from langchain.prompts import PromptTemplate
20
+
21
+ # for openAI model
22
+ from langchain_openai import ChatOpenAI
23
+ from openai import OpenAI
24
+
25
+ # tools imported from helper.py
26
+ from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,file_saver_tool,gemini_multimodal_tool
27
+ from helper import wikipedia_search_tool2
28
+ # (Keep Constants as is)
29
+ # --- Constants ---
30
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
+
32
+ # --- Basic Agent Definition ---
33
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
34
+ class BasicAgent:
35
+ def __init__(
36
+ self,
37
+ agent: Agent,
38
+ tools: List[BaseTool],
39
+ verbose: bool = False,
40
+ handle_parsing_errors: bool = True,
41
+ max_iterations: int = 9,
42
+ memory: Optional[ConversationSummaryMemory] = None
43
+ ) -> None:
44
+ """
45
+ Initialize with parameters required for AgentExecutor.
46
+ """
47
+ self.agent: Agent = agent
48
+ self.tools: List[BaseTool] = tools
49
+ self.verbose: bool = verbose
50
+ self.handle_parsing_errors: bool = handle_parsing_errors
51
+ self.max_iterations: int = max_iterations
52
+ self.memory: Optional[ConversationSummaryMemory] = memory
53
+ self.agent_obj = AgentExecutor(
54
+ agent=self.agent,
55
+ tools=self.tools,
56
+ verbose=self.verbose,
57
+ handle_parsing_errors=self.handle_parsing_errors,
58
+ max_iterations=self.max_iterations,
59
+ memory=self.memory
60
+ )
61
+
62
+ def is_retriable(self, e: Exception) -> bool:
63
+ # Adjust this check if your error type is different
64
+ return isinstance(e, genai.errors.APIError) and getattr(e, "code", None) in {429, 503}
65
+
66
+ def invoke_with_retry(self,question: str, max_retries: int = 5, initial_delay: float = 10.0) -> str:
67
+ current_delay = initial_delay
68
+ for attempt in range(max_retries):
69
+ try:
70
+ result = self.agent_obj.invoke(
71
+ {"input": question},
72
+ config={"configurable": {"session_id": "test-session"}},
73
+ )
74
+ return result['output']
75
+ except Exception as e:
76
+ if self.is_retriable(e):
77
+ # Check if the error object provides a specific retry_delay
78
+ if hasattr(e, 'retry_delay') and hasattr(e.retry_delay, 'seconds'):
79
+ # Use the specific retry_delay provided by the API
80
+ current_delay = float(e.retry_delay.seconds)
81
+ print(f"Quota error (attempt {attempt+1}/{max_retries}). API suggested retry after {current_delay} seconds.", flush=True)
82
+ else:
83
+ # Fallback to exponential backoff if no specific delay is provided
84
+ print(f"Quota error (attempt {attempt+1}/{max_retries}). Retrying in {current_delay} seconds with exponential backoff.", flush=True)
85
+ current_delay *= 2 # Exponential backoff
86
+
87
+ time.sleep(current_delay)
88
+ else:
89
+ # If it's not a retriable error, re-raise it
90
+ raise
91
+ # If all retries fail, raise a RuntimeError
92
+ raise RuntimeError(f"Max retries ({max_retries}) exceeded due to persistent quota errors or other retriable issues.")
93
+
94
+ def __call__(self, question: str) -> str:
95
+ """
96
+ Allows the instance to be called directly to get an AgentExecutor.
97
+ """
98
+ return self.invoke_with_retry(question)
99
+
100
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
101
+ """
102
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
103
+ and displays the results.
104
+ """
105
+ # --- Determine HF Space Runtime URL and Repo URL ---
106
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
107
+
108
+ if profile:
109
+ username= f"{profile.username}"
110
+ print(f"User logged in: {username}")
111
+ else:
112
+ print("User not logged in.")
113
+ return "Please Login to Hugging Face with the button.", None
114
+
115
+ api_url = DEFAULT_API_URL
116
+ questions_url = f"{api_url}/questions"
117
+ submit_url = f"{api_url}/submit"
118
+
119
+ google_api_key = os.getenv("GOOGLE_API_KEY")
120
+ if not google_api_key:
121
+ print("Google API key not found in environment variables.")
122
+ return "Google API key not found. Please set GOOGLE_API_KEY environment variable.", None
123
+ print(f"Using Google API key: {google_api_key[:4]}... (truncated for security)")
124
+
125
+ openai_api_key = os.getenv("OPENAI_API_KEY")
126
+ if not openai_api_key:
127
+ print("OpenAI API key not found in environment variables.")
128
+ return "OpenAI API key not found. Please set OPENAI_API_KEY environment variable.", None
129
+ print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
130
+
131
+ gemini_model ="gemini-2.0-flash"
132
+ #gemini_model ="gemini-1.5-pro"
133
+
134
+ #NMODEL
135
+
136
+ llm_client = ChatGoogleGenerativeAI(
137
+ model=gemini_model, # or another Gemini model name
138
+ google_api_key=google_api_key, # your Gemini API key
139
+ temperature=0,
140
+ )
141
+
142
+
143
+ #llm_client = ChatOpenAI(model='gpt-4o',temperature=0,api_key=openai_api_key)
144
+
145
+ #llm_client = ChatOpenAI(model='gpt-4o',temperature=0,api_key=openai_api_key,top_p=1,presence_penalty=0,frequency_penalty=0,seed=12345)
146
+
147
+
148
+ serp_api_key = os.getenv("SERP_API_KEY")
149
+ if not serp_api_key:
150
+ print("SerpAPI key not found in environment variables.")
151
+ return "SerpAPI key not found. Please set SERP_API_KEY environment variable.", None
152
+ print(f"Using SerpAPI key: {serp_api_key[:4]}... (truncated for security)")
153
+
154
+ tavily_api_key = os.getenv("TAVILY_API_KEY")
155
+ if not tavily_api_key:
156
+ print("Tavily API key not found in environment variables.")
157
+ return "Tavily API key not found. Please set TAVILY_API_KEY environment variable.", None
158
+ print(f"Using Tavily API key: {tavily_api_key[:4]}... (truncated for security)")
159
+
160
+ travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
161
+ tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool, gemini_multimodal_tool, wikipedia_search_tool2]
162
+
163
+ EX5_OBSERVATION_STRING = (
164
+ "[{{'title': '1977 New York Yankees Hitting Stats - Baseball-Reference.com', "
165
+ "'url': 'https://www.baseball-reference.com/teams/NYY/1977.shtml', "
166
+ "'content': '| Rk | Player | Age | Pos | WAR | W | L | W-L% | ERA | G | GS | GF | CG | SHO | SV | IP | H | R | ER | HR | BB | IBB | SO | HBP | BK | WP | BF | ERA+ | FIP | WHIP | H9 | HR9 | BB9 | SO9 | SO/BB | Awards | All logos are the trademark & property of their owners and not Sports Reference LLC. Copyright © 2000-2025 Sports Reference LLC. Sports Info Solutions logo Sports Info Solutions logo Sports Info Solutions logo'}}]"
167
+ )
168
+
169
+ prompt = PromptTemplate(
170
+ input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
171
+ template="""
172
+ You are a smart and helpful AI Agent/Assistant that excels at fact-based reasoning. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
173
+ It is CRUCIAL that you ALWAYS follow the exact format below. Do not deviate.
174
+ NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
175
+ For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
176
+
177
+ You have access to the following tools:
178
+ {tools}
179
+
180
+ To use a tool, you MUST follow this precise format:
181
+
182
+ Thought: I need to use a tool to find the answer.
183
+ Action: [tool_name] # This will be one of [{tool_names}]
184
+ Action Input: [input_for_the_tool]
185
+ Observation: [result_from_the_tool]
186
+
187
+ IMPORTANT NOTE ON TOOL USAGE:
188
+ - If an 'Observation' from a tool, especially `tavily_search` or `serpapi_Google Search_tool`, contains a list, table, or structured text that might hold the answer, your next step should be to use `python_repl` to parse and extract the required information from that observation's content. Do NOT search again unless the content is genuinely insufficient or irrelevant.
189
+ - If an 'Observation' from a tool does NOT directly contain the specific answer to your question, you MUST refine your query or switch to a different, more suitable tool (e.g., 'tavily_search' for broader or more current information if 'wikipedia_search_tool' was insufficient). Do NOT get stuck repeatedly using the same tool if it's not yielding the direct answer.
190
+ - If the input contains the exact phrase "Attachment '{{file_name}}' available at: {{attachment_url}}" (where '{{file_name}}' and '{{attachment_url}}' are placeholders for actual values), consider the file type:
191
+ - If the file type is binary/text (e.g., .xlsx, .docx, .mp3, .jpg, .pdf,.png), you MUST use the 'file_saver' tool to download and save it.
192
+ For 'file_saver', the Action Input must be a JSON string like: '{{"url": "the_attachment_url", "local_filename": "the_file_name_from_attachment"}}'
193
+ example: for input, Attachment '1f975693-876d-457b-a649-393859e79bf3.mp3' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3, Action Input for file_saver would be '{{"url": "https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3", "local_filename": "1f975693-876d-457b-a649-393859e79bf3.mp3"}}'
194
+
195
+ IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
196
+
197
+ **For image files (like .jpg, .png) that have been saved using 'file_saver', the 'gemini_multimodal_tool' MUST be used to analyze their content and answer questions based on the image. The Action Input for 'gemini_multimodal_tool' must be a JSON string like: '{{"image_path": "the_local_filename", "question": "the_user_question"}}'**
198
+
199
+ Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
200
+
201
+ If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
202
+
203
+ Thought: I have enough information, or no tool is needed.
204
+ Final Answer: [your concise/short response here]
205
+
206
+ NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
207
+ For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
208
+ VERY IMPORTANT: Your response MUST always start with 'Thought:'.
209
+
210
+ Here are some examples of how you should respond:
211
+
212
+ Example 1:
213
+ Question: What is the capital of France?
214
+ Thought: I need to use a tool to find the capital of France.
215
+ Action: tavily_search
216
+ Action Input: capital of France
217
+ Observation: The capital of France is Paris.
218
+ Thought: I have found the answer.
219
+ Final Answer: Paris
220
+
221
+ Example 2:
222
+ Question: What is 2 + 2?
223
+ Thought: This is a simple arithmetic question, no tool is needed.
224
+ Final Answer: 4
225
+
226
+ Example 3:
227
+ Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
228
+ Thought: The user is asking for specific information about discography, which might be found with a search tool. The `serpapi_Google Search_tool` can fetch detailed sections. After getting the content, I will need to parse it using `python_repl` to count the albums within the specified years.
229
+ Action: serpapi_Google Search
230
+ Action Input: Mercedes Sosa discography
231
+ Observation: [Discography text content from search result]
232
+ Thought: I have retrieved discography text. Now I need to parse this text to identify and count studio albums released between 2000 and 2009. I will use the `python_repl` tool for this.
233
+ Action: python_repl
234
+ Action Input:
235
+ ```python
236
+ import re
237
+ text = "[Discography text content from previous observation]" # Replace with actual text
238
+ albums_2000_2009 = []
239
+ pattern = r"\((\d{{4}})\)\s*(.*?)(?:\[|\n|$)" # Ensures year is captured. Double braces {{}} to escape regex literal braces
240
+ for match in re.finditer(pattern, text):
241
+ year = int(match.group(1))
242
+ if 2000 <= year <= 2009:
243
+ albums_2000_2009.append(match.group(2).strip())
244
+ print(len(albums_2000_2009))
245
+ ```
246
+ Observation: 3
247
+ Thought: I have parsed the discography and counted the albums. I have found the answer.
248
+ Final Answer: 3
249
+
250
+ **Example 4: (Crucial new example for image processing)**
251
+ Question: What is the next best move in this chess position? Attachment 'chess_board.png' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44
252
+ Thought: The user is asking a question about a chess position and has provided an image. I need to first save the image locally using the 'file_saver' tool, and then use the 'gemini_multimodal_tool' to analyze the image and answer the question.
253
+ Action: file_saver
254
+ Action Input: {{"url": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44", "local_filename": "cca530fc-4052-43b2-b130-b30968d8aa44.png"}}
255
+ Observation: File downloaded successfully to cca530fc-4052-43b2-b130-b30968d8aa44.png
256
+ Thought: The image has been successfully downloaded. Now I need to analyze its content to determine the next best chess move using the 'gemini_multimodal_tool'.
257
+ Action: gemini_multimodal_tool
258
+ Action Input: {{"image_path": "cca530fc-4052-43b2-b130-b30968d8aa44.png", "question": "What is the next best move in this chess position?"}}
259
+ Observation: The next best move is e4.
260
+ Thought: I have used the 'gemini_multimodal_tool' to get the best move based on the image.
261
+ Final Answer: e4
262
+
263
+ Example 5: (Crucial negative example for conciseness)
264
+ Question: What is the opposite of up?
265
+ Thought: The question asks for the opposite of up. This is a direct knowledge question.
266
+ Final Answer: down
267
+ DO NOT RESPOND LIKE THIS: The opposite of up is down. or The answer is down.
268
+
269
+ Example 6: (New example for parsing baseball stats)
270
+ Question: How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?
271
+ Thought: I need to find the Yankee player with the most walks in 1977 and then find their at bats. This will require searching for Yankees 1977 stats and then parsing the results to extract the relevant player and their at-bats. I will use 'tavily_search' first to find the stats. After getting the search results, I will examine their content for a list or table of players and their stats. If found, I will use 'python_repl' to parse it.
272
+ Action: tavily_search
273
+ Action Input: New York Yankees 1977 batting stats
274
+ Observation: {{EX5_OBSERVATION_STRING}}
275
+ Thought: I have received an observation from `tavily_search`. I need to examine its `content` to determine if it contains the necessary data (e.g., a list or table of players/stats/winners). If so, my next step is to use `python_repl` to parse this content to extract the specific information needed to answer the question. I should only consider another `tavily_search` if the current observation's content is clearly insufficient.
276
+ Action: python_repl
277
+ Action Input:
278
+ ```python
279
+ # Example: Parse the text content from the tavily_search observation.
280
+ # This is a placeholder for the actual parsing logic you would write.
281
+ # For the Malko question, you would parse the list of winners and their nationalities.
282
+ # For instance, if the observation content contains:
283
+ # "1983 | Claus Peter Flor | East Germany"
284
+ # You would extract this and apply your filtering logic.
285
+ ```
286
+ Observation: 519
287
+ Thought: I have parsed the data and identified Roy White as the Yankee with the most walks (75) in 1977, and his at-bats were 519. I have found the answer.
288
+ Final Answer: 519
289
+
290
+ Example 7: (Parsing a table for minimum value)
291
+ Question: What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.
292
+ Thought: I need to find a table of athlete counts by country for the 1928 Olympics. I will use 'tavily_search' to find the data. After getting the search results, I will examine their content for a list or table of countries and their athlete counts. If found, I will use 'python_repl' to parse it.
293
+ Action: tavily_search
294
+ Action Input: 1928 Summer Olympics athlete count by country
295
+ Observation: [Table or HTML/text with country and athlete counts]
296
+ Thought: I have found the table in the `tavily_search` observation. Now I need to parse it to find the country with the least athletes, and if there is a tie, pick the first alphabetically. I will use 'python_repl' for this.
297
+ Action: python_repl
298
+ Action Input:
299
+ ```python
300
+ # Example: python code to parse the text table and find the IOC country code with the least athletes.
301
+ # (Replace this with actual code as needed)
302
+ ```
303
+ Observation: LUX Thought: I have found the country with the least athletes.
304
+ Final Answer: LUX
305
+
306
+ ---
307
+ Previous conversation history:
308
+ {chat_history}
309
+
310
+ New input: {input}
311
+ ---
312
+ {agent_scratchpad}
313
+ """
314
+ )
315
+
316
+ summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
317
+
318
+
319
+ # Initialize gemini model with streaming enabled
320
+
321
+ summary_llm = ChatGoogleGenerativeAI(
322
+ model=gemini_model,
323
+ google_api_key=google_api_key,
324
+ temperature=0,
325
+ streaming=True
326
+ )
327
+
328
+
329
+ #summary_llm = ChatOpenAI(model='gpt-4o', temperature=0, streaming=False,api_key=openai_api_key)
330
+ #summary_llm = ChatOpenAI(model='gpt-4o', temperature=0, streaming=False,api_key=openai_api_key,top_p=1,presence_penalty=0,frequency_penalty=0)
331
+
332
+
333
+ # Create a ReAct agent
334
+ summary_react_agent = create_react_agent(
335
+ llm=summary_llm,
336
+ tools=tools,
337
+ prompt=prompt
338
+ )
339
+
340
+ # 1. Instantiate Agent ( modify this part to create your agent)
341
+ try:
342
+ agent = BasicAgent(summary_react_agent, tools, True, True, 30, summary_memory)
343
+ except Exception as e:
344
+ print(f"Error instantiating agent: {e}")
345
+ return f"Error initializing agent: {e}", None
346
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
347
+ print(agent_code)
348
+
349
+ # 2. Fetch Questions
350
+ print(f"Fetching questions from: {questions_url}")
351
+ try:
352
+ response = requests.get(questions_url, timeout=15)
353
+ response.raise_for_status()
354
+ questions_data = response.json()
355
+ if not questions_data:
356
+ print("Fetched questions list is empty.")
357
+ return "Fetched questions list is empty or invalid format.", None
358
+ print(f"Fetched {len(questions_data)} questions.")
359
+ except requests.exceptions.RequestException as e:
360
+ print(f"Error fetching questions: {e}")
361
+ return f"Error fetching questions: {e}", None
362
+ except requests.exceptions.JSONDecodeError as e:
363
+ print(f"Error decoding JSON response from questions endpoint: {e}")
364
+ print(f"Response text: {response.text[:500]}")
365
+ return f"Error decoding server response for questions: {e}", None
366
+ except Exception as e:
367
+ print(f"An unexpected error occurred fetching questions: {e}")
368
+ return f"An unexpected error occurred fetching questions: {e}", None
369
+
370
+ # 3. Run your Agent
371
+ results_log = []
372
+ answers_payload = []
373
+ print(f"Running agent on {len(questions_data)} questions...")
374
+ for item in questions_data:
375
+ task_id = item.get("task_id")
376
+ question_text = item.get("question")
377
+ file_name = item.get("file_name") # Get the file_name if it exists
378
+
379
+ # Construct the question string that your LLM will see,
380
+ # including the attachment URL if present.
381
+ full_question_for_agent = question_text
382
+ if file_name:
383
+ attachment_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
384
+ full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
385
+ print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
386
+
387
+ '''
388
+ allowed_ids = {
389
+ #"cca530fc-4052-43b2-b130-b30968d8aa44",
390
+ #"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
391
+ "3f57289b-8c60-48be-bd80-01f8099ca449",
392
+ #"2d83110e-a098-4ebb-9987-066c06fa42d0",
393
+ #"cf106601-ab4f-4af9-b045-5295fe67b37d",
394
+ #"7bd855d8-463d-4ed5-93ca-5fe35145f733",
395
+ "5a0c1adf-205e-4841-a666-7c3ef95def9d",
396
+ "f918266a-b3e0-4914-865d-4faa564f1aef",
397
+ }
398
+ if task_id not in allowed_ids:
399
+ continue
400
+ '''
401
+ try:
402
+ submitted_answer = agent(full_question_for_agent)
403
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
404
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
405
+ time.sleep(61) # Add a 1 min delay before running the agent
406
+ except Exception as e:
407
+ print(f"Error running agent on task {task_id}: {e}")
408
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
409
+
410
+ if not answers_payload:
411
+ print("Agent did not produce any answers to submit.")
412
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
413
+
414
+ # 4. Prepare Submission
415
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
416
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
417
+ print(status_update)
418
+
419
+ # 5. Submit
420
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
421
+ try:
422
+ response = requests.post(submit_url, json=submission_data, timeout=60)
423
+ response.raise_for_status()
424
+ result_data = response.json()
425
+ final_status = (
426
+ f"Submission Successful!\n"
427
+ f"User: {result_data.get('username')}\n"
428
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
429
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
430
+ f"Message: {result_data.get('message', 'No message received.')}"
431
+ )
432
+ print("Submission successful.")
433
+ cleaned_final_status = re.sub(r'[^\x20-\x7E\n\r\t]+', '', final_status)
434
+ cleaned_final_status = cleaned_final_status.strip()
435
+ results_df = pd.DataFrame(results_log)
436
+ return cleaned_final_status, results_df
437
+ except requests.exceptions.HTTPError as e:
438
+ error_detail = f"Server responded with status {e.response.status_code}."
439
+ try:
440
+ error_json = e.response.json()
441
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
442
+ except requests.exceptions.JSONDecodeError:
443
+ error_detail += f" Response: {e.response.text[:500]}"
444
+ status_message = f"Submission Failed: {error_detail}"
445
+ print(status_message)
446
+ results_df = pd.DataFrame(results_log)
447
+ return status_message, results_df
448
+ except requests.exceptions.Timeout:
449
+ status_message = "Submission Failed: The request timed out."
450
+ print(status_message)
451
+ results_df = pd.DataFrame(results_log)
452
+ return status_message, results_df
453
+ except requests.exceptions.RequestException as e:
454
+ status_message = f"Submission Failed: Network error - {e}"
455
+ print(status_message)
456
+ results_df = pd.DataFrame(results_log)
457
+ return status_message, results_df
458
+ except Exception as e:
459
+ status_message = f"An unexpected error occurred during submission: {e}"
460
+ print(status_message)
461
+ results_df = pd.DataFrame(results_log)
462
+ return status_message, results_df
463
+
464
+ # --- Build Gradio Interface using Blocks ---
465
+ with gr.Blocks() as demo:
466
+ gr.Markdown("# Basic Agent Evaluation Runner")
467
+ gr.Markdown(
468
+ """
469
+ **Instructions:**
470
+
471
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
472
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
473
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
474
+
475
+ ---
476
+ **Disclaimers:**
477
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
478
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
479
+ """
480
+ )
481
+
482
+ gr.LoginButton()
483
+
484
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
485
+
486
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
487
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
488
+
489
+ run_button.click(
490
+ fn=run_and_submit_all,
491
+ outputs=[status_output, results_table]
492
+ )
493
+
494
+ if __name__ == "__main__":
495
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
496
+ space_host_startup = os.getenv("SPACE_HOST")
497
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
498
+
499
+ if space_host_startup:
500
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
501
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
502
+ else:
503
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
504
+
505
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
506
+ print(f"✅ SPACE_ID found: {space_id_startup}")
507
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
508
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
509
+ else:
510
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
511
+
512
+ print("-"*(60 + len(" App Starting ")) + "\n")
513
+
514
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
515
+ demo.launch(debug=True, share=False)