lethaq commited on
Commit
5909f48
·
verified ·
1 Parent(s): 9640259

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -141
app.py CHANGED
@@ -3,7 +3,7 @@ import sys
3
  import subprocess
4
 
5
  # 检查并安装缺失的依赖
6
- required_packages = ["litellm", "duckduckgo-search"]
7
  for package in required_packages:
8
  try:
9
  __import__(package)
@@ -19,7 +19,6 @@ import inspect
19
  import pandas as pd
20
  import json
21
  import time
22
- import re
23
  from typing import List, Dict, Any, Optional
24
  from litellm import completion
25
  from duckduckgo_search import DDGS
@@ -34,7 +33,7 @@ class DuckDuckGoSearchTool:
34
  self.name = "duckduckgo_search"
35
  self.description = "Search the web using DuckDuckGo"
36
 
37
- def search(self, query: str, max_results: int = 8) -> List[Dict[str, str]]:
38
  """
39
  Search the web using DuckDuckGo and return results.
40
 
@@ -53,7 +52,7 @@ class DuckDuckGoSearchTool:
53
  print(f"DuckDuckGo search error: {e}")
54
  return [{"title": f"Search error: {e}", "body": "", "href": ""}]
55
 
56
- def __call__(self, query: str, max_results: int = 8) -> Dict[str, Any]:
57
  """
58
  Execute the search and return results in a structured format.
59
 
@@ -83,14 +82,13 @@ class LiteLLMModel:
83
  self.api_key = api_key
84
  print(f"Initialized LiteLLM with model: {model_id}")
85
 
86
- def generate(self, prompt: str, system_prompt: str = None, temperature: float = 0.2) -> str:
87
  """
88
  Generate text using the LiteLLM model.
89
 
90
  Args:
91
  prompt: The user prompt
92
  system_prompt: Optional system prompt
93
- temperature: Temperature for generation (lower = more deterministic)
94
 
95
  Returns:
96
  Generated text response
@@ -104,8 +102,7 @@ class LiteLLMModel:
104
  response = completion(
105
  model=self.model_id,
106
  messages=messages,
107
- api_key=self.api_key,
108
- temperature=temperature
109
  )
110
 
111
  return response.choices[0].message.content
@@ -124,102 +121,71 @@ class CodeAgent:
124
  def format_search_results(self, results: List[Dict[str, str]]) -> str:
125
  """Format search results into a readable string"""
126
  formatted = "Search Results:\n"
 
 
127
  for i, result in enumerate(results, 1):
128
  formatted += f"{i}. {result.get('title', 'No title')}\n"
129
- formatted += f" {result.get('body', 'No description')[:300]}...\n"
130
  formatted += f" URL: {result.get('href', 'No URL')}\n\n"
131
  return formatted
132
 
133
- def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
134
- """Create a prompt for the model with optional search results"""
135
- prompt = f"Question: {question}\n\n"
136
-
137
- if search_results:
138
- prompt += self.format_search_results(search_results)
139
-
140
- prompt += "\nPlease provide a concise, factual answer to the question. "
141
- prompt += "Your answer should be direct and to the point, without any explanations or reasoning. "
142
- prompt += "For example, if asked 'What is the capital of France?', just answer 'Paris'. "
143
- prompt += "If asked for a numerical value, provide only the number. "
144
- prompt += "If asked for a list, provide comma-separated values without numbering. "
145
- prompt += "If you don't know the answer, respond with 'Unknown' rather than speculating.\n\n"
146
-
147
- # 添加特定问题类型的指导
148
- if "how many" in question.lower():
149
- prompt += "For 'how many' questions, just provide the number as your answer.\n"
150
- elif "which" in question.lower() and "option" in question.lower():
151
- prompt += "For multiple choice questions, just provide the letter(s) of the correct option(s).\n"
152
- elif any(word in question.lower() for word in ["list", "name all", "what are"]):
153
- prompt += "For list questions, provide items as comma-separated values without numbering or bullet points.\n"
154
-
155
- prompt += "Answer: "
156
-
157
- return prompt
158
-
159
  def create_system_prompt(self) -> str:
160
  """Create a system prompt for the model"""
161
  return (
162
- "You are a helpful AI assistant specialized in answering factual questions. "
163
- "You always provide direct, concise answers without explanations or reasoning. "
164
- "Your answers are factual, accurate, and to the point. "
165
- "For questions requiring specific formats, you follow those formats exactly. "
166
- "You never include phrases like 'the answer is' or 'I believe' in your responses. "
167
- "For multiple choice questions, only provide the letter(s) of the correct option(s). "
168
- "For numerical questions, only provide the number. "
169
- "For list questions, provide comma-separated values without numbering or bullet points. "
170
- "If you don't know the answer, just say 'Unknown'."
171
  )
172
-
173
- def should_use_search(self, question: str) -> bool:
174
- """Determine if search should be used for this question"""
175
- # 扩展搜索触发条件
176
- search_triggers = [
177
- "what", "who", "when", "where", "how", "which",
178
- "why", "list", "name", "find", "identify", "describe",
179
- "explain", "tell me", "show", "give", "provide"
180
- ]
181
-
182
- return any(trigger in question.lower() for trigger in search_triggers)
183
-
184
- def clean_answer(self, answer: str, question: str) -> str:
185
- """Clean up the model's answer based on question type"""
186
- # 基本清理
187
- answer = answer.strip()
188
-
189
- # 移除常见前缀
190
- prefixes_to_remove = [
191
- "Answer:", "The answer is:", "I believe", "I think",
192
- "Based on", "According to", "The answer would be",
193
- "The correct answer is", "My answer is"
194
- ]
195
-
196
- for prefix in prefixes_to_remove:
197
- if answer.lower().startswith(prefix.lower()):
198
- answer = answer[len(prefix):].strip()
199
-
200
- # 移除引号
201
- if (answer.startswith('"') and answer.endswith('"')) or \
202
- (answer.startswith("'") and answer.endswith("'")):
203
- answer = answer[1:-1].strip()
204
-
205
- # 针对特定问题类型的处理
206
- if "how many" in question.lower():
207
- # 尝试提取数字
208
- numbers = re.findall(r'\d+', answer)
209
- if numbers:
210
- return numbers[0]
211
-
212
- elif "which" in question.lower() and "option" in question.lower():
213
- # 尝试提取选项字母
214
- options = re.findall(r'[A-Da-d]', answer)
215
- if options:
216
- return ", ".join(options).lower()
217
 
218
- # 移除末尾的标点符号
219
- answer = answer.rstrip(".!,;:")
220
 
221
- return answer
222
-
223
  def __call__(self, question: str) -> str:
224
  """
225
  Process a question and return an answer.
@@ -232,35 +198,62 @@ class CodeAgent:
232
  """
233
  print(f"Agent received question: {question[:100]}...")
234
 
235
- # 确定是否应该使用搜索
236
- should_search = self.should_use_search(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
 
 
 
 
 
238
  search_results = None
239
  if should_search and self.search_tool:
240
  print(f"Searching for information about: {question}")
241
- # 使用更具体的搜索查询
242
- search_query = question
243
- if "code" in question.lower() or "python" in question.lower():
244
- search_query += " code example"
245
- elif "date" in question.lower() or "when" in question.lower():
246
- search_query += " exact date"
247
-
248
- search_response = self.search_tool(search_query, max_results=8)
249
  search_results = search_response.get("results", [])
250
  print(f"Found {len(search_results)} search results")
251
 
252
- # 创建提示词和生成回答
253
  prompt = self.create_prompt(question, search_results)
254
  system_prompt = self.create_system_prompt()
255
 
256
  print("Generating response with LLM...")
257
- # 使用较低的温度以获得更确定性的回答
258
- response = self.model.generate(prompt, system_prompt, temperature=0.2)
 
259
 
260
- # 清理回答
261
- answer = self.clean_answer(response, question)
262
 
263
- print(f"Final answer: {answer[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  return answer
265
 
266
  # 简化版本,不使用OAuthProfile
@@ -269,15 +262,12 @@ def run_and_submit_all():
269
  Fetches all questions, runs the Agent on them, submits all answers,
270
  and displays the results.
271
  """
272
- # --- Determine HF Space Runtime URL and Repo URL ---
273
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
274
  api_url = DEFAULT_API_URL
275
  questions_url = f"{api_url}/questions"
276
  submit_url = f"{api_url}/submit"
277
 
278
- # 1. Instantiate Agent with Gemini model and DuckDuckGo search
279
  try:
280
- # Get API key from environment variable
281
  api_key = os.getenv("GEMINI_API_KEY")
282
  if not api_key:
283
  return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
@@ -288,34 +278,31 @@ def run_and_submit_all():
288
  print(f"Error instantiating agent: {e}")
289
  return f"Error initializing agent: {e}", None
290
 
291
- # In the case of an app running as a hugging Face space, this link points toward your codebase
292
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
293
  if not space_id:
294
- agent_code = "https://huggingface.co/spaces/lethaq/Final_Assignment_Template/tree/main"
295
  print(f"Agent code URL: {agent_code}")
296
 
297
- # 2. Fetch Questions
298
  print(f"Fetching questions from: {questions_url}")
299
  try:
300
- response = requests.get(questions_url, timeout=15)
301
  response.raise_for_status()
302
  questions_data = response.json()
303
  if not questions_data:
304
- print("Fetched questions list is empty.")
305
- return "Fetched questions list is empty or invalid format.", None
306
  print(f"Fetched {len(questions_data)} questions.")
307
  except requests.exceptions.RequestException as e:
308
  print(f"Error fetching questions: {e}")
309
  return f"Error fetching questions: {e}", None
310
  except requests.exceptions.JSONDecodeError as e:
311
- print(f"Error decoding JSON response from questions endpoint: {e}")
312
- print(f"Response text: {response.text[:500]}")
313
- return f"Error decoding server response for questions: {e}", None
314
  except Exception as e:
315
  print(f"An unexpected error occurred fetching questions: {e}")
316
  return f"An unexpected error occurred fetching questions: {e}", None
317
 
318
- # 3. Run your Agent
319
  results_log = []
320
  answers_payload = []
321
  print(f"Running agent on {len(questions_data)} questions...")
@@ -328,26 +315,23 @@ def run_and_submit_all():
328
  try:
329
  print(f"Processing task {task_id}: {question_text[:50]}...")
330
  submitted_answer = agent(question_text)
331
- # Important: Use "submitted_answer" as the key, not "model_answer"
332
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
333
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
334
  print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
335
  except Exception as e:
336
- print(f"Error running agent on task {task_id}: {e}")
337
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
338
 
339
  if not answers_payload:
340
  print("Agent did not produce any answers to submit.")
341
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
342
 
343
- # 4. Submit answers with all required fields
344
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
345
  try:
346
- # Important: Submit with all required fields
347
  submission_dict = {
348
- "username": DEFAULT_USERNAME, # 添加用户名
349
- "agent_code": agent_code, # 添加代理代码URL
350
- "answers": answers_payload # 答案列表
351
  }
352
  response = requests.post(submit_url, json=submission_dict, timeout=60)
353
  response.raise_for_status()
@@ -408,29 +392,28 @@ with gr.Blocks() as demo:
408
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
409
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
410
 
411
- # Add a single question test feature
412
  gr.Markdown("## Test Single Question")
413
  with gr.Row():
414
- question_in = gr.Textbox(label="Question", lines=3)
415
  answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
416
 
417
  test_btn = gr.Button("Test Question", variant="secondary")
418
 
419
- # Add a function to test a single question
420
  def test_single_question(question):
 
 
421
  try:
422
  api_key = os.getenv("GEMINI_API_KEY")
423
  if not api_key:
424
  return "Error: GEMINI_API_KEY environment variable not found"
425
 
426
- model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=api_key)
427
  agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
428
  answer = agent(question)
429
  return answer
430
  except Exception as e:
431
  return f"Error: {str(e)}"
432
 
433
- # 完全移除OAuthProfile相关代码
434
  run_button.click(
435
  fn=run_and_submit_all,
436
  outputs=[status_output, results_table]
@@ -444,9 +427,8 @@ with gr.Blocks() as demo:
444
 
445
  if __name__ == "__main__":
446
  print("\n" + "-"*30 + " App Starting " + "-"*30)
447
- # Check for SPACE_HOST and SPACE_ID at startup for information
448
  space_host_startup = os.getenv("SPACE_HOST")
449
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
450
 
451
  if space_host_startup:
452
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -454,7 +436,7 @@ if __name__ == "__main__":
454
  else:
455
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
456
 
457
- if space_id_startup: # Print repo URLs if SPACE_ID is found
458
  print(f"✅ SPACE_ID found: {space_id_startup}")
459
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
460
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -464,8 +446,9 @@ if __name__ == "__main__":
464
  print("-"*(60 + len(" App Starting ")) + "\n")
465
 
466
  print("Launching Gradio Interface for Gemini Agent Evaluation...")
467
- demo.launch(debug=True, share=False)
468
-
 
469
 
470
 
471
 
 
3
  import subprocess
4
 
5
  # 检查并安装缺失的依赖
6
+ required_packages = ["litellm", "duckduckgo-search", "gradio", "requests", "pandas"] # 确保gradio, requests, pandas也在这里
7
  for package in required_packages:
8
  try:
9
  __import__(package)
 
19
  import pandas as pd
20
  import json
21
  import time
 
22
  from typing import List, Dict, Any, Optional
23
  from litellm import completion
24
  from duckduckgo_search import DDGS
 
33
  self.name = "duckduckgo_search"
34
  self.description = "Search the web using DuckDuckGo"
35
 
36
+ def search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
37
  """
38
  Search the web using DuckDuckGo and return results.
39
 
 
52
  print(f"DuckDuckGo search error: {e}")
53
  return [{"title": f"Search error: {e}", "body": "", "href": ""}]
54
 
55
+ def __call__(self, query: str, max_results: int = 5) -> Dict[str, Any]:
56
  """
57
  Execute the search and return results in a structured format.
58
 
 
82
  self.api_key = api_key
83
  print(f"Initialized LiteLLM with model: {model_id}")
84
 
85
+ def generate(self, prompt: str, system_prompt: str = None) -> str:
86
  """
87
  Generate text using the LiteLLM model.
88
 
89
  Args:
90
  prompt: The user prompt
91
  system_prompt: Optional system prompt
 
92
 
93
  Returns:
94
  Generated text response
 
102
  response = completion(
103
  model=self.model_id,
104
  messages=messages,
105
+ api_key=self.api_key
 
106
  )
107
 
108
  return response.choices[0].message.content
 
121
  def format_search_results(self, results: List[Dict[str, str]]) -> str:
122
  """Format search results into a readable string"""
123
  formatted = "Search Results:\n"
124
+ if not results:
125
+ return "No search results found.\n\n"
126
  for i, result in enumerate(results, 1):
127
  formatted += f"{i}. {result.get('title', 'No title')}\n"
128
+ formatted += f" {result.get('body', 'No description')[:200]}...\n"
129
  formatted += f" URL: {result.get('href', 'No URL')}\n\n"
130
  return formatted
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  def create_system_prompt(self) -> str:
133
  """Create a system prompt for the model"""
134
  return (
135
+ "You are a highly specialized AI assistant built for factual question answering. Your primary function is to provide accurate, concise, and direct answers. "
136
+ "Adhere to the following principles without deviation:\n"
137
+ "- Accuracy and Conciseness: Prioritize factual correctness and brevity above all else. Your answers should be short and to the point.\n"
138
+ "- Literal Interpretation: Interpret questions literally. Do not infer intent beyond what is explicitly stated.\n"
139
+ "- No Extraneous Information: Do not offer opinions, suggestions, engage in conversation, or provide any information not directly requested. Avoid any form of elaboration.\n"
140
+ "- Strict Format Adherence: Follow any explicit or implicit formatting instructions in the user's question (e.g., for numbers, lists, yes/no answers).\n"
141
+ "- 'Unknown' for Uncertainty: If a high-confidence, factual answer cannot be derived from provided context or your knowledge base, or if the question is impossible for you to answer (e.g. due to lack of capability like image understanding), you MUST output the single word 'Unknown'. Do not attempt to guess or provide a partially correct answer."
 
 
142
  )
143
+
144
+ def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
145
+ """Create a prompt for the model with optional search results"""
146
+ prompt_parts = []
147
+
148
+ if search_results:
149
+ prompt_parts.append(self.format_search_results(search_results))
150
+ prompt_parts.append(
151
+ "Instructions for using search results:\n"
152
+ "- Carefully review the search results provided above.\n"
153
+ "- If the search results contain a clear and factual answer to the question, use that information for your response.\n"
154
+ "- If the search results are relevant but do not directly answer the question, you may synthesize information if explicitly asked to, otherwise state what is found.\n"
155
+ "- If the search results are irrelevant or insufficient, state that the answer cannot be found in the provided search results and then use your general knowledge if applicable.\n\n"
156
+ )
157
+ else:
158
+ prompt_parts.append("No specific search results were provided for this question. You will need to answer using your general knowledge.\n\n")
159
+
160
+ prompt_parts.append(f"Question: {question}\n")
161
+ prompt_parts.append(
162
+ "\nYour Task:\n"
163
+ "Provide a concise, factual answer to the question above. Follow these strict instructions for your answer:\n"
164
+ "1. Directness: Your answer must be direct and to the point. Do NOT include any pleasantries, apologies, self-references (e.g., 'As an AI...'), or any information not explicitly asked for.\n"
165
+ "2. No Explanations: Do NOT provide any explanations, reasoning, or justifications for your answer unless the question specifically asks for it.\n"
166
+ "3. Factual Accuracy: Ensure your answer is factually correct based on the information provided or your internal knowledge.\n"
167
+ "4. Formatting:\n"
168
+ " - If the question asks for a numerical value (e.g., 'How many...'), provide ONLY the number (e.g., '42', '1000').\n"
169
+ " - If the question asks for a list of items, provide them as a comma-separated list without numbering or bullet points (e.g., 'red, green, blue').\n"
170
+ " - If a yes/no answer is appropriate, provide 'Yes' or 'No'.\n"
171
+ " - For other types of questions, provide the most direct and brief factual answer.\n"
172
+ "5. Unknown Answers: If, and only if, you cannot confidently determine a factual answer from the search results (if provided) or your general knowledge, or if the question is unanswerable (e.g., requires processing an image you cannot see, or is nonsensical), you MUST respond with the single word 'Unknown'. Do not guess or provide speculative information.\n"
173
+ "\nExample Scenarios:\n"
174
+ "- Question: What is the capital of France?\n"
175
+ " Correct Answer: Paris\n"
176
+ "- Question: How many moons does Earth have?\n"
177
+ " Correct Answer: 1\n"
178
+ "- Question: List the primary colors.\n"
179
+ " Correct Answer: red, yellow, blue\n"
180
+ "- Question: Does a dog meow?\n"
181
+ " Correct Answer: No\n"
182
+ "- Question: (A question where the answer is truly unknowable or unfindable for you)\n"
183
+ " Correct Answer: Unknown\n"
184
+ )
185
+ prompt_parts.append("\nAnswer: ")
 
 
186
 
187
+ return "".join(prompt_parts)
 
188
 
 
 
189
  def __call__(self, question: str) -> str:
190
  """
191
  Process a question and return an answer.
 
198
  """
199
  print(f"Agent received question: {question[:100]}...")
200
 
201
+ question_lower = question.lower()
202
+
203
+ # 更新 should_search 逻辑
204
+ search_trigger_keywords = [
205
+ "what", "who", "when", "where", "how many", "which", "list", "name", "find", "does",
206
+ "is there", "are there", "can you tell me", "describe", "published by", "released by",
207
+ "highest number", "what is the population of", "when was the first", "who invented",
208
+ "identify", "capital of", "what year", "tell me the", "average", "statistics", "data on",
209
+ "information about", "details on"
210
+ ]
211
+ should_search = any(trigger in question_lower for trigger in search_trigger_keywords)
212
+
213
+ if not should_search and ("?" in question and len(question_lower.split()) > 3) :
214
+ if not (question_lower.startswith("can you") or \
215
+ question_lower.startswith("write") or \
216
+ "tfel" in question_lower or \
217
+ "chess position" in question_lower or \
218
+ "image" in question_lower):
219
+ should_search = True
220
 
221
+ if '.remna eht sa "tfel" drow eht fo etisoppo eht etirw' in question_lower:
222
+ should_search = False
223
+ if "chess position provided in the image" in question_lower or "image." in question_lower:
224
+ should_search = False
225
+
226
  search_results = None
227
  if should_search and self.search_tool:
228
  print(f"Searching for information about: {question}")
229
+ search_response = self.search_tool(question, max_results=5)
 
 
 
 
 
 
 
230
  search_results = search_response.get("results", [])
231
  print(f"Found {len(search_results)} search results")
232
 
 
233
  prompt = self.create_prompt(question, search_results)
234
  system_prompt = self.create_system_prompt()
235
 
236
  print("Generating response with LLM...")
237
+ # print(f"System Prompt: {system_prompt}") # For debugging prompts
238
+ # print(f"User Prompt: {prompt}") # For debugging prompts
239
+ response = self.model.generate(prompt, system_prompt)
240
 
241
+ answer = response.strip()
 
242
 
243
+ prefixes_to_remove = [
244
+ "Answer:", "The answer is:", "I believe", "I think",
245
+ "Based on", "According to", "The answer would be"
246
+ ]
247
+
248
+ for prefix in prefixes_to_remove:
249
+ if answer.lower().startswith(prefix.lower()): # Case-insensitive prefix check
250
+ answer = answer[len(prefix):].strip()
251
+
252
+ if (answer.startswith('"') and answer.endswith('"')) or \
253
+ (answer.startswith("'") and answer.endswith("'")):
254
+ answer = answer[1:-1].strip()
255
+
256
+ print(f" {answer[:100]}...")
257
  return answer
258
 
259
  # 简化版本,不使用OAuthProfile
 
262
  Fetches all questions, runs the Agent on them, submits all answers,
263
  and displays the results.
264
  """
265
+ space_id = os.getenv("SPACE_ID")
 
266
  api_url = DEFAULT_API_URL
267
  questions_url = f"{api_url}/questions"
268
  submit_url = f"{api_url}/submit"
269
 
 
270
  try:
 
271
  api_key = os.getenv("GEMINI_API_KEY")
272
  if not api_key:
273
  return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
 
278
  print(f"Error instantiating agent: {e}")
279
  return f"Error initializing agent: {e}", None
280
 
 
281
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
282
  if not space_id:
283
+ agent_code = "https://huggingface.co/spaces/lethaq/Final_Assignment_Template/tree/main" # Fallback
284
  print(f"Agent code URL: {agent_code}")
285
 
 
286
  print(f"Fetching questions from: {questions_url}")
287
  try:
288
+ response = requests.get(questions_url, timeout=20) # Increased timeout
289
  response.raise_for_status()
290
  questions_data = response.json()
291
  if not questions_data:
292
+ print("Fetched questions list is empty.")
293
+ return "Fetched questions list is empty or invalid format.", None
294
  print(f"Fetched {len(questions_data)} questions.")
295
  except requests.exceptions.RequestException as e:
296
  print(f"Error fetching questions: {e}")
297
  return f"Error fetching questions: {e}", None
298
  except requests.exceptions.JSONDecodeError as e:
299
+ print(f"Error decoding JSON response from questions endpoint: {e}")
300
+ print(f"Response text: {response.text[:500]}")
301
+ return f"Error decoding server response for questions: {e}", None
302
  except Exception as e:
303
  print(f"An unexpected error occurred fetching questions: {e}")
304
  return f"An unexpected error occurred fetching questions: {e}", None
305
 
 
306
  results_log = []
307
  answers_payload = []
308
  print(f"Running agent on {len(questions_data)} questions...")
 
315
  try:
316
  print(f"Processing task {task_id}: {question_text[:50]}...")
317
  submitted_answer = agent(question_text)
 
318
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
319
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
320
  print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
321
  except Exception as e:
322
+ print(f"Error running agent on task {task_id}: {e}")
323
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
324
 
325
  if not answers_payload:
326
  print("Agent did not produce any answers to submit.")
327
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
328
 
 
329
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
330
  try:
 
331
  submission_dict = {
332
+ "username": DEFAULT_USERNAME,
333
+ "agent_code": agent_code,
334
+ "answers": answers_payload
335
  }
336
  response = requests.post(submit_url, json=submission_dict, timeout=60)
337
  response.raise_for_status()
 
392
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
393
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
394
 
 
395
  gr.Markdown("## Test Single Question")
396
  with gr.Row():
397
+ question_in = gr.Textbox(label="Question", lines=3,秀传placeholder="Enter your question here...")
398
  answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
399
 
400
  test_btn = gr.Button("Test Question", variant="secondary")
401
 
 
402
  def test_single_question(question):
403
+ if not question.strip():
404
+ return "Please enter a question."
405
  try:
406
  api_key = os.getenv("GEMINI_API_KEY")
407
  if not api_key:
408
  return "Error: GEMINI_API_KEY environment variable not found"
409
 
410
+ model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=AIzaSyAhmwogxZFBtt7_OUsKQGNeOYF7ced39bM)
411
  agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
412
  answer = agent(question)
413
  return answer
414
  except Exception as e:
415
  return f"Error: {str(e)}"
416
 
 
417
  run_button.click(
418
  fn=run_and_submit_all,
419
  outputs=[status_output, results_table]
 
427
 
428
  if __name__ == "__main__":
429
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
430
  space_host_startup = os.getenv("SPACE_HOST")
431
+ space_id_startup = os.getenv("SPACE_ID")
432
 
433
  if space_host_startup:
434
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
436
  else:
437
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
438
 
439
+ if space_id_startup:
440
  print(f"✅ SPACE_ID found: {space_id_startup}")
441
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
442
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
446
  print("-"*(60 + len(" App Starting ")) + "\n")
447
 
448
  print("Launching Gradio Interface for Gemini Agent Evaluation...")
449
+ # For Hugging Face Spaces, share=True is often not needed or handled by the platform.
450
+ # debug=True can be helpful during development.
451
+ demo.launch(debug=False, share=False)
452
 
453
 
454