i-dhilip commited on
Commit
0fb8b09
·
verified ·
1 Parent(s): 1ef30a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -356
app.py CHANGED
@@ -1,390 +1,278 @@
1
- """LangGraph Agent with Gradio Interface"""
2
  import os
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
- from dotenv import load_dotenv
7
- from langgraph.graph import START, StateGraph, MessagesState
8
- from langgraph.prebuilt import tools_condition, ToolNode
9
- from langchain_openai import ChatOpenAI
10
- from langchain_community.tools.tavily_search import TavilySearchResults
11
- from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
12
- from langchain_community.vectorstores import Chroma
13
- from langchain_core.messages import SystemMessage, HumanMessage
14
- from langchain_core.tools import tool
15
- from langchain.tools.retriever import create_retriever_tool
16
  from langchain_community.embeddings import HuggingFaceEmbeddings
17
- from youtube_transcript_api import YouTubeTranscriptApi
18
- from PIL import Image
19
- from paddleocr import PaddleOCR
20
- import youtube_dl
21
- from pydub import AudioSegment
22
- import speech_recognition as sr
23
- import tempfile
24
-
25
- # Load environment variables
26
- load_dotenv()
27
-
28
- # Tool Definitions
29
- @tool
30
- def multiply(a: int, b: int) -> int:
31
- """Multiply two numbers."""
32
- return a * b
33
-
34
- @tool
35
- def add(a: int, b: int) -> int:
36
- """Add two numbers."""
37
- return a + b
38
-
39
- @tool
40
- def subtract(a: int, b: int) -> int:
41
- """Subtract two numbers."""
42
- return a - b
43
-
44
- @tool
45
- def divide(a: int, b: int) -> int:
46
- """Divide two numbers."""
47
- if b == 0:
48
- raise ValueError("Cannot divide by zero.")
49
- return a / b
50
-
51
- @tool
52
- def modulus(a: int, b: int) -> int:
53
- """Get the modulus of two numbers."""
54
- return a % b
55
 
56
- @tool
57
- def wiki_search(query: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  """Search Wikipedia for a query and return maximum 2 results."""
59
- try:
60
- search_docs = WikipediaLoader(query=query, load_max_docs=20).load()
61
- formatted_search_docs = "\n\n---\n\n".join(
62
- [f'<Document source="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
63
- for doc in search_docs])
64
- return {"wiki_results": formatted_search_docs}
65
- except Exception as e:
66
- return {"wiki_results": f"Error: {str(e)}"}
67
 
68
- @tool
69
- def web_search(query: str) -> str:
70
- """Search Tavily for a query and return maximum 3 results."""
71
- try:
72
- search_docs = TavilySearchResults(max_results=20).invoke(query=query)
73
- formatted_search_docs = "\n\n---\n\n".join(
74
- [f'<Document source="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
75
- for doc in search_docs])
76
- return {"web_results": formatted_search_docs}
77
- except Exception as e:
78
- return {"web_results": f"Error: {str(e)}"}
79
 
80
- @tool
81
- def arvix_search(query: str) -> str:
82
  """Search Arxiv for a query and return maximum 3 results."""
83
  try:
84
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
85
- formatted_search_docs = "\n\n---\n\n".join(
86
- [f'<Document source="{doc.metadata["source"]}"/>\n{doc.page_content[:1000]}\n</Document>'
87
- for doc in search_docs])
88
- return {"arvix_results": formatted_search_docs}
89
- except Exception as e:
90
- return {"arvix_results": f"Error: {str(e)}"}
91
-
92
- @tool
93
- def process_youtube_video(url: str) -> str:
94
- """Process YouTube video URL to extract transcript."""
95
- try:
96
- video_id = url.split("v=")[-1].split("&")[0]
97
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
98
- transcript_text = " ".join([entry['text'] for entry in transcript])
99
- return {"youtube_transcript": transcript_text}
100
- except Exception as e:
101
- return {"error": f"YouTube processing failed: {str(e)}"}
102
-
103
- @tool
104
- def process_audio(file_path: str) -> str:
105
- """Process audio file to extract transcription."""
106
- try:
107
- with tempfile.NamedTemporaryFile(suffix=".wav") as tmpfile:
108
- sound = AudioSegment.from_file(file_path)
109
- sound.export(tmpfile.name, format="wav")
110
-
111
- recognizer = sr.Recognizer()
112
- with sr.AudioFile(tmpfile.name) as source:
113
- audio_data = recognizer.record(source)
114
- text = recognizer.recognize_google(audio_data)
115
- return {"audio_transcription": text}
116
  except Exception as e:
117
- return {"error": f"Audio processing failed: {str(e)}"}
118
 
119
- @tool
120
- def process_image(image_path: str) -> str:
121
- """Process image to extract text or basic description."""
122
- try:
123
- img = Image.open(image_path)
124
- ocr = PaddleOCR(use_angle_cls=True, lang='en')
125
- result = ocr.ocr(image_path)
126
- text_lines = []
127
-
128
- if result:
129
- for detection in result[0]:
130
- text = detection[1][0] # detection[1] contains (text, confidence)
131
- text_lines.append(text)
132
-
133
- text = '\n'.join(text_lines)
134
-
135
- if text.strip():
136
- return {"image_text": text}
137
- else:
138
- basic_desc = f"Image size: {img.size}, Mode: {img.mode}, Format: {img.format}"
139
- return {"image_description": basic_desc}
140
- except Exception as e:
141
- return {"error": f"Image processing failed: {str(e)}"}
142
-
143
-
144
- # System Prompt Setup
145
- try:
146
- with open("system_prompt.txt", "r", encoding="utf-8") as f:
147
- system_prompt = f.read()
148
- sys_msg = SystemMessage(content=system_prompt)
149
- except FileNotFoundError:
150
- sys_msg = SystemMessage(content="Default system prompt")
151
-
152
- # Vector Store Setup
153
  try:
 
154
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
155
  vector_store = Chroma(
156
- collection_name="documents",
157
  embedding_function=embeddings,
158
  persist_directory="./chroma_db"
159
  )
 
 
 
 
 
 
 
 
 
 
160
  except Exception as e:
161
- print(f"Error initializing vector store: {e}")
162
- vector_store = None
 
163
 
164
- # Tool Configuration
165
  tools = [
166
- multiply, add, subtract, divide, modulus,
167
- wiki_search, web_search, arvix_search,
168
- process_youtube_video, process_audio, process_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  ]
170
 
171
- if vector_store:
172
- tools.append(
173
- create_retriever_tool(
174
- vector_store.as_retriever(),
175
- name="Question Search",
176
- description="Retrieves similar questions from vector store"
177
- )
178
- )
179
- else:
180
- print("Warning: Vector store not initialized. Question Search tool disabled.")
 
 
 
 
181
 
182
- # Model Configuration
183
- MODEL_REGISTRY = {
184
- "gpt-4.1": {
185
- "model": "gpt-4.1",
186
- "temperature": 0,
187
- "max_tokens": 2048
188
- },
189
- "llama-4-scout-17b-16e-instruct": {
190
- "model": "llama-4-scout-17b-16e-instruct",
191
- "temperature": 0,
192
- "max_tokens": 2048
193
- },
194
- "deepseek-v3": {
195
- "model": "deepseek-v3",
196
- "temperature": 0,
197
- "max_tokens": 2048
198
- },
199
- "qwen2.5-coder-32b-instruct:int8": {
200
- "model": "qwen2.5-coder-32b-instruct:int8",
201
- "temperature": 0,
202
- "max_tokens": 2048
203
- }
204
- }
205
 
206
- def get_llm(model_name: str = "gpt-4.1"):
207
- """Initialize LLM with error handling"""
208
- config = MODEL_REGISTRY.get(model_name, MODEL_REGISTRY["gpt-4.1"])
209
- try:
210
- return ChatOpenAI(
211
- base_url="https://api.llm7.io/v1",
212
- api_key="unused",
213
- model=config["model"],
214
- temperature=config["temperature"],
215
- max_tokens=config["max_tokens"]
216
- )
217
- except Exception as e:
218
- print(f"Error initializing {model_name}: {e}")
219
- return None
220
 
221
- # Graph Builder Function (unchanged)
222
- def build_graph():
223
- """Build LangGraph agent workflow with multiple models"""
224
- primary_llm = get_llm("gpt-4.1")
225
- fallback_llm1 = get_llm("llama-4-scout-17b-16e-instruct")
226
- fallback_llm2 = get_llm("deepseek-v3")
227
- fallback_llm3 = get_llm("qwen2.5-coder-32b-instruct:int8")
228
-
229
- llms = [llm for llm in [primary_llm, fallback_llm1, fallback_llm2, fallback_llm3] if llm is not None]
230
-
231
- if not llms:
232
- raise RuntimeError("Failed to initialize any LLM")
233
 
234
- current_llm_index = 0
 
 
 
 
 
235
 
236
- def assistant(state: MessagesState):
237
- nonlocal current_llm_index
238
- for attempt in range(len(llms)):
239
- try:
240
- llm = llms[current_llm_index]
241
- llm_with_tools = llm.bind_tools(tools)
242
- response = llm_with_tools.invoke(state["messages"])
243
- current_llm_index = (current_llm_index + 1) % len(llms)
244
- return {"messages": [response]}
245
- except Exception as e:
246
- print(f"Model {llms[current_llm_index].model} failed: {e}")
247
- current_llm_index = (current_llm_index + 1) % len(llms)
248
- if attempt == len(llms) - 1:
249
- error_msg = HumanMessage(content=f"All models failed: {str(e)}")
250
- return {"messages": [error_msg]}
251
 
252
- def retriever(state: MessagesState):
253
- try:
254
- if vector_store:
255
- similar_questions = vector_store.similarity_search(
256
- state["messages"][0].content,
257
- k=1
258
- )
259
- example_content = "Similar question reference: \n\n" + \
260
- (similar_questions[0].page_content if similar_questions
261
- else "No similar questions found")
262
- else:
263
- example_content = "Vector store not available"
264
-
265
- return {"messages": [sys_msg] + state["messages"] + [HumanMessage(content=example_content)]}
266
- except Exception as e:
267
- error_msg = HumanMessage(content=f"Retrieval error: {str(e)}")
268
- return {"messages": [error_msg]}
269
 
270
- builder = StateGraph(MessagesState)
271
- builder.add_node("retriever", retriever)
272
- builder.add_node("assistant", assistant)
273
- builder.add_node("tools", ToolNode(tools))
274
 
275
- builder.add_edge(START, "retriever")
276
- builder.add_edge("retriever", "assistant")
277
- builder.add_conditional_edges("assistant", tools_condition)
278
- builder.add_edge("tools", "assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- return builder.compile()
281
-
282
- class BasicAgent:
283
- """LangGraph Agent Interface"""
284
- def __init__(self):
285
- self.graph = build_graph()
286
-
287
- def __call__(self, question: str) -> str:
288
  try:
289
- messages = [HumanMessage(content=question)]
290
- result = self.graph.invoke({"messages": messages})
291
- last_message = result['messages'][-1].content
 
 
 
 
292
 
293
- if "FINAL ANSWER: " in last_message:
294
- answer_part = last_message.split("FINAL ANSWER: ")[-1].strip()
295
- if answer_part.endswith('"}'):
296
- return answer_part[:-2].strip()
297
- return answer_part
298
- elif "Answer:" in last_message:
299
- answer_part = last_message.split("Answer:")[-1].strip()
300
- if answer_part.endswith('"}'):
301
- return answer_part[:-2].strip()
302
- return answer_part
303
- return last_message
304
  except Exception as e:
305
- return f"Agent processing error: {str(e)}"
306
-
307
-
308
- # Gradio Interface Functions
309
- def run_and_submit_all(profile: gr.OAuthProfile | None):
310
- """Evaluation runner function"""
311
- if not profile:
312
- return "Please Login to Hugging Face with the button.", None
313
-
314
- space_id = os.getenv("SPACE_ID")
315
- api_url = "https://agents-course-unit4-scoring.hf.space"
316
- username = profile.username
317
- results_log = []
318
-
 
 
 
 
 
 
 
 
 
319
  try:
320
- agent = BasicAgent()
321
- agent_code = f"https://huggingface.co/spaces/ {space_id}/tree/main"
322
-
323
- # Fetch questions
324
- response = requests.get(f"{api_url}/questions", timeout=15)
325
- response.raise_for_status()
326
- questions_data = response.json()
327
-
328
- # Process questions
329
- answers_payload = []
330
- for item in questions_data:
331
- task_id = item.get("task_id")
332
- question_text = item.get("question")
333
- if not task_id or not question_text:
334
- continue
335
-
336
- try:
337
- answer = agent(question_text)
338
- answers_payload.append({
339
- "task_id": task_id,
340
- "submitted_answer": answer
341
- })
342
- results_log.append({
343
- "Task ID": task_id,
344
- "Question": question_text,
345
- "Submitted Answer": answer
346
- })
347
- except Exception as e:
348
- results_log.append({
349
- "Task ID": task_id,
350
- "Question": question_text,
351
- "Submitted Answer": f"AGENT ERROR: {e}"
352
- })
353
-
354
- # Submit answers
355
- submission_data = {
356
- "username": username.strip(),
357
- "agent_code": agent_code,
358
- "answers": answers_payload
359
- }
360
-
361
- response = requests.post(f"{api_url}/submit", json=submission_data, timeout=60)
362
  response.raise_for_status()
363
  result_data = response.json()
364
-
365
  final_status = (
366
- f"Submission Successful!\nOverall Score: {result_data.get('score', 'N/A')}%\n"
367
- f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
368
- f"Message: {result_data.get('message', 'No message')}"
 
 
369
  )
370
- return final_status, pd.DataFrame(results_log)
371
-
 
372
  except Exception as e:
373
- return f"Error: {str(e)}", pd.DataFrame(results_log)
 
 
 
374
 
375
- # Gradio UI Setup
376
  with gr.Blocks() as demo:
377
- gr.Markdown("# Basic Agent Evaluation Runner")
378
  gr.Markdown(
379
  """
380
  **Instructions:**
381
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
382
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
383
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
384
- ---
385
- **Disclaimers:**
386
- Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions).
387
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance, for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
388
  """
389
  )
390
 
@@ -402,24 +290,13 @@ with gr.Blocks() as demo:
402
 
403
  if __name__ == "__main__":
404
  print("\n" + "-"*30 + " App Starting " + "-"*30)
405
- # Check for SPACE_HOST and SPACE_ID at startup for information
406
- space_host_startup = os.getenv("SPACE_HOST")
407
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
408
 
409
- if space_host_startup:
410
- print(f"✅ SPACE_HOST found: {space_host_startup}")
411
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
412
- else:
413
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
414
-
415
- if space_id_startup: # Print repo URLs if SPACE_ID is found
416
  print(f"✅ SPACE_ID found: {space_id_startup}")
417
- print(f" Repo URL: https://huggingface.co/spaces/ {space_id_startup}")
418
- print(f" Repo Tree URL: https://huggingface.co/spaces/ {space_id_startup}/tree/main")
419
  else:
420
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
421
 
422
  print("-"*(60 + len(" App Starting ")) + "\n")
423
-
424
- print("Launching Gradio Interface for Basic Agent Evaluation...")
425
- demo.launch(debug=True, share=False)
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from datetime import datetime
6
+ from transformers import pipeline
7
+ from langchain_community.llms import HuggingFaceTextGenInference
8
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
9
+ from langchain.chains import LLMChain
10
+ from langchain.agents import Tool
11
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
12
+ from langchain_community.utilities import TextRequestsWrapper
 
 
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
+ from langchain_community.vectorstores import Chroma
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # --- Constants ---
17
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+ MAX_ANSWER_LENGTH = 50
19
+
20
+ # --- LLM Setup ---
21
+ # Using Hugging Face Text Generation Inference API instead of loading model locally
22
+ # This connects to a more powerful open source model through HF's inference API
23
+ llm = HuggingFaceTextGenInference(
24
+ inference_server_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
25
+ max_new_tokens=256,
26
+ temperature=0.1,
27
+ repetition_penalty=1.03,
28
+ top_k=10,
29
+ top_p=0.95,
30
+ timeout=120,
31
+ streaming=False,
32
+ huggingface_api_key=os.getenv("HF_API_TOKEN", None), # Set your HF API token in environment variables
33
+ )
34
+
35
+ # --- System Message ---
36
+ system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
37
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
38
+ FINAL ANSWER: [YOUR FINAL ANSWER].
39
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations, and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
40
+ system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt)
41
+
42
+ # --- Tools ---
43
+ ddg = DuckDuckGoSearchAPIWrapper()
44
+ requests_wrapper = TextRequestsWrapper()
45
+
46
+ def wiki_search(query):
47
  """Search Wikipedia for a query and return maximum 2 results."""
48
+ search_results = ddg.run(query)
49
+ return f"Wikipedia search results for '{query}': {search_results}"
 
 
 
 
 
 
50
 
51
+ def web_search(query):
52
+ """Search DuckDuckGo for a query and return maximum 3 results."""
53
+ search_results = ddg.run(query)
54
+ return f"Web search results for '{query}': {search_results}"
 
 
 
 
 
 
 
55
 
56
+ def arxiv_search(query):
 
57
  """Search Arxiv for a query and return maximum 3 results."""
58
  try:
59
+ url = f"https://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=3"
60
+ response = requests_wrapper.get(url)
61
+ return f"Arxiv search results for '{query}': {response.text[:500]}..." # Truncate for readability
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  except Exception as e:
63
+ return f"Error searching Arxiv: {str(e)}"
64
 
65
+ # --- Fallback for Chroma DB if not initialized ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  try:
67
+ # --- Chroma DB Setup ---
68
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
69
  vector_store = Chroma(
 
70
  embedding_function=embeddings,
71
  persist_directory="./chroma_db"
72
  )
73
+
74
+ def create_retriever_tool(query):
75
+ """A tool to retrieve similar questions from a vector store."""
76
+ try:
77
+ similar_question = vector_store.similarity_search(query)
78
+ if similar_question and len(similar_question) > 0:
79
+ return f"Similar question found: {similar_question[0].page_content}"
80
+ return "No similar questions found in the database."
81
+ except Exception as e:
82
+ return f"Error using retriever: {str(e)}"
83
  except Exception as e:
84
+ print(f"Warning: Could not initialize Chroma DB: {e}")
85
+ def create_retriever_tool(query):
86
+ return "Retriever tool is not available."
87
 
88
+ # Define the tools
89
  tools = [
90
+ Tool(
91
+ name="Wikipedia Search",
92
+ func=wiki_search,
93
+ description="Search Wikipedia for a query and return maximum 2 results."
94
+ ),
95
+ Tool(
96
+ name="Web Search",
97
+ func=web_search,
98
+ description="Search DuckDuckGo for a query and return maximum 3 results."
99
+ ),
100
+ Tool(
101
+ name="Arxiv Search",
102
+ func=arxiv_search,
103
+ description="Search Arxiv for a query and return maximum 3 results."
104
+ ),
105
+ Tool(
106
+ name="Retriever",
107
+ func=create_retriever_tool,
108
+ description="A tool to retrieve similar questions from a vector store."
109
+ )
110
  ]
111
 
112
+ def create_agent(llm, tools):
113
+ """Create an agent with the specified tools."""
114
+ prompt = ChatPromptTemplate.from_messages([
115
+ system_message_prompt,
116
+ HumanMessagePromptTemplate.from_template("{input}")
117
+ ])
118
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
119
+ return llm_chain
120
+
121
+ def extract_final_answer(full_response):
122
+ """Extract only the final answer from the agent's response."""
123
+ if "FINAL ANSWER:" in full_response:
124
+ return full_response.split("FINAL ANSWER:")[1].strip()
125
+ return full_response.strip()
126
 
127
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
128
+ """
129
+ Fetches all questions, runs the EnhancedAgent on them, submits all answers,
130
+ and displays the results.
131
+ """
132
+ # --- Determine HF Space Runtime URL and Repo URL ---
133
+ space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ if profile:
136
+ username = f"{profile.username}"
137
+ print(f"User logged in: {username}")
138
+ else:
139
+ print("User not logged in.")
140
+ return "Please Login to Hugging Face with the button.", None
 
 
 
 
 
 
 
 
141
 
142
+ api_url = DEFAULT_API_URL
143
+ questions_url = f"{api_url}/questions"
144
+ submit_url = f"{api_url}/submit"
 
 
 
 
 
 
 
 
 
145
 
146
+ # 1. Instantiate Agent
147
+ try:
148
+ agent = create_agent(llm, tools)
149
+ except Exception as e:
150
+ print(f"Error instantiating agent: {e}")
151
+ return f"Error initializing agent: {e}", None
152
 
153
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
154
+ print(agent_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ # 2. Fetch Questions
157
+ print(f"Fetching questions from: {questions_url}")
158
+ try:
159
+ response = requests.get(questions_url, timeout=15)
160
+ response.raise_for_status()
161
+ questions_data = response.json()
162
+ if not questions_data:
163
+ print("Fetched questions list is empty.")
164
+ return "Fetched questions list is empty or invalid format.", None
165
+ print(f"Fetched {len(questions_data)} questions.")
166
+ except requests.exceptions.RequestException as e:
167
+ print(f"Error fetching questions: {e}")
168
+ return f"Error fetching questions: {e}", None
169
+ except Exception as e:
170
+ print(f"An unexpected error occurred fetching questions: {e}")
171
+ return f"An unexpected error occurred fetching questions: {e}", None
 
172
 
173
+ # 3. Run your Agent
174
+ results_log = []
175
+ answers_payload = []
176
+ print(f"Running agent on {len(questions_data)} questions...")
177
 
178
+ # Define a fallback answer function in case the main agent fails
179
+ def get_simple_answer(question):
180
+ """Provide a simple answer when the main agent fails"""
181
+ # Very basic responses for common question types
182
+ if "capital" in question.lower():
183
+ return "Unknown"
184
+ elif "population" in question.lower() or "how many" in question.lower():
185
+ return "0"
186
+ elif "when" in question.lower():
187
+ return "Unknown"
188
+ elif "where" in question.lower():
189
+ return "Unknown"
190
+ elif "who" in question.lower():
191
+ return "Unknown"
192
+ elif "true or false" in question.lower():
193
+ return "True"
194
+ else:
195
+ return "Unknown"
196
 
197
+ for item in questions_data:
198
+ task_id = item.get("task_id")
199
+ question_text = item.get("question")
200
+ if not task_id or question_text is None:
201
+ print(f"Skipping item with missing task_id or question: {item}")
202
+ continue
203
+
 
204
  try:
205
+ print(f"Processing question: {question_text}")
206
+ # Get the response from the agent
207
+ agent_response = agent.run(question_text)
208
+ print(f"Agent response: {agent_response}")
209
+
210
+ # Extract just the final answer part
211
+ final_answer = extract_final_answer(agent_response)
212
 
213
+ # Make sure the answer isn't too long - truncate if needed
214
+ if len(final_answer) > MAX_ANSWER_LENGTH:
215
+ final_answer = final_answer[:MAX_ANSWER_LENGTH]
216
+ print(f"Warning: Answer truncated to {MAX_ANSWER_LENGTH} characters")
217
+
218
+ # Add to payload for submission
219
+ answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
220
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer})
221
+ print(f"Task {task_id}: Processed answer: {final_answer}")
222
+
 
223
  except Exception as e:
224
+ print(f"Error running agent on task {task_id}: {e}")
225
+
226
+ # Use fallback strategy
227
+ fallback_answer = get_simple_answer(question_text)
228
+ answers_payload.append({"task_id": task_id, "submitted_answer": fallback_answer})
229
+ results_log.append({
230
+ "Task ID": task_id,
231
+ "Question": question_text,
232
+ "Submitted Answer": f"{fallback_answer} (FALLBACK)"
233
+ })
234
+ print(f"Task {task_id}: Used fallback answer: {fallback_answer}")
235
+
236
+ if not answers_payload:
237
+ print("Agent did not produce any answers to submit.")
238
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
239
+
240
+ # 4. Prepare Submission
241
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
242
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
243
+ print(status_update)
244
+
245
+ # 5. Submit
246
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
247
  try:
248
+ response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  response.raise_for_status()
250
  result_data = response.json()
 
251
  final_status = (
252
+ f"Submission Successful!\n"
253
+ f"User: {result_data.get('username')}\n"
254
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
255
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
256
+ f"Message: {result_data.get('message', 'No message received.')}"
257
  )
258
+ print("Submission successful.")
259
+ results_df = pd.DataFrame(results_log)
260
+ return final_status, results_df
261
  except Exception as e:
262
+ status_message = f"Submission Failed: {e}"
263
+ print(status_message)
264
+ results_df = pd.DataFrame(results_log)
265
+ return status_message, results_df
266
 
267
+ # --- Build Gradio Interface using Blocks ---
268
  with gr.Blocks() as demo:
269
+ gr.Markdown("# GAIA Evaluation Agent using Multiple Search Tools")
270
  gr.Markdown(
271
  """
272
  **Instructions:**
273
+ 1. Clone this space and modify the agent's logic and tools as needed.
274
+ 2. Log in with your Hugging Face account.
275
+ 3. Click 'Run Evaluation & Submit All Answers' to test your agent.
 
 
 
 
276
  """
277
  )
278
 
 
290
 
291
  if __name__ == "__main__":
292
  print("\n" + "-"*30 + " App Starting " + "-"*30)
293
+ space_id_startup = os.getenv("SPACE_ID")
 
 
294
 
295
+ if space_id_startup:
 
 
 
 
 
 
296
  print(f"✅ SPACE_ID found: {space_id_startup}")
 
 
297
  else:
298
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
299
 
300
  print("-"*(60 + len(" App Starting ")) + "\n")
301
+ print("Launching Gradio Interface...")
302
+ demo.launch(debug=True, share=True)