lethaq commited on
Commit
dac9255
·
verified ·
1 Parent(s): 2f63ccb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -498
app.py CHANGED
@@ -1,484 +1,88 @@
 
1
  import os
2
- import sys
3
- import subprocess
4
- import re
5
-
6
- # 检查并安装缺失的依赖
7
- required_packages = ["litellm", "duckduckgo-search", "gradio", "requests", "pandas"]
8
- for package in required_packages:
9
- try:
10
- __import__(package)
11
- except ImportError:
12
- print(f"Installing {package}...")
13
- subprocess.check_call([sys.executable, "-m", "pip", "install", package])
14
- print(f"{package} installed successfully!")
15
-
16
- # 现在导入所需模块
17
  import gradio as gr
18
  import requests
19
- import inspect
20
  import pandas as pd
21
- import json
22
- import time
23
- from typing import List, Dict, Any, Optional
24
- from litellm import completion
25
- from duckduckgo_search import DDGS
26
-
27
- # 用於獲取並解析附件
28
- def fetch_and_summarize_attachment(task_id, question, files_api_url="https://agents-course-unit4-scoring.hf.space/files/"):
29
- import pandas as pd, requests
30
- q_lower = question.lower()
31
- keywords = ["attached", "file", "excel", "csv", "spreadsheet", "audio", "mp3", "image", "picture", "python code", "code"]
32
- if not any(k in q_lower for k in keywords):
33
- return ""
34
- try:
35
- file_url = f"{files_api_url}{task_id}"
36
- response = requests.get(file_url, timeout=10)
37
- local_path = f"/tmp/{task_id}"
38
- with open(local_path, "wb") as f:
39
- f.write(response.content)
40
- if local_path.endswith(".csv") or b"," in response.content[:512]:
41
- df = pd.read_csv(local_path)
42
- summary = df.head(10).to_string()
43
- return f"[Attachment Detected: CSV Table]\nFirst 10 rows:\n{summary}\n"
44
- elif local_path.endswith(".xls") or local_path.endswith(".xlsx"):
45
- df = pd.read_excel(local_path)
46
- summary = df.head(10).to_string()
47
- return f"[Attachment Detected: Excel Table]\nFirst 10 rows:\n{summary}\n"
48
- elif local_path.endswith(".txt") or b"\n" in response.content[:512]:
49
- with open(local_path, "r", encoding="utf-8", errors="ignore") as f:
50
- text = f.read(1000)
51
- return f"[Attachment Detected: Text File]\n{text}\n"
52
- elif local_path.endswith(".py"):
53
- with open(local_path, "r", encoding="utf-8", errors="ignore") as f:
54
- code = f.read(1000)
55
- return f"[Attachment Detected: Python Code]\n{code}\n"
56
- else:
57
- return "[Attachment Detected, but file type not supported for summary]\n"
58
- except Exception as e:
59
- return f"[Attachment Error: {e}]\n"
60
-
61
 
62
 
63
 
 
64
  # --- Constants ---
65
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
66
- DEFAULT_USERNAME = "lethaq" # 默认用户名,您可以修改为您的用户名
67
-
68
- # --- Tool Implementations ---
69
- class DuckDuckGoSearchTool:
70
- def __init__(self):
71
- self.name = "duckduckgo_search"
72
- self.description = "Search the web using DuckDuckGo"
73
-
74
- def search(self, query: str, max_results: int = 8) -> List[Dict[str, str]]:
75
- """
76
- Search the web using DuckDuckGo and return results.
77
-
78
- Args:
79
- query: The search query
80
- max_results: Maximum number of results to return
81
-
82
- Returns:
83
- List of dictionaries with search results
84
- """
85
- try:
86
- with DDGS() as ddgs:
87
- results = list(ddgs.text(query, max_results=max_results))
88
- return results
89
- except Exception as e:
90
- print(f"DuckDuckGo search error: {e}")
91
- return [{"title": f"Search error: {e}", "body": "", "href": ""}]
92
-
93
-
94
-
95
-
96
-
97
-
98
- def __call__(self, query: str, max_results: int = 8) -> Dict[str, Any]:
99
- """
100
- Execute the search and return results in a structured format.
101
-
102
- Args:
103
- query: The search query
104
- max_results: Maximum number of results to return
105
-
106
- Returns:
107
- Dictionary with search results and metadata
108
- """
109
- start_time = time.time()
110
- results = self.search(query, max_results)
111
- end_time = time.time()
112
-
113
- return {
114
- "tool_name": self.name,
115
- "query": query,
116
- "results": results,
117
- "result_count": len(results),
118
- "time_taken": end_time - start_time
119
- }
120
 
121
- # --- LiteLLM Model Wrapper ---
122
- class LiteLLMModel:
123
- def __init__(self, model_id: str, api_key: str):
124
- self.model_id = model_id
125
- self.api_key = api_key
126
- print(f"Initialized LiteLLM with model: {model_id}")
127
-
128
- def generate(self, prompt: str, system_prompt: str = None, temperature: float = 0.1) -> str:
129
- """
130
- Generate text using the LiteLLM model.
131
-
132
- Args:
133
- prompt: The user prompt
134
- system_prompt: Optional system prompt
135
- temperature: Temperature for generation (lower = more deterministic)
136
-
137
- Returns:
138
- Generated text response
139
- """
140
- try:
141
- messages = []
142
- if system_prompt:
143
- messages.append({"role": "system", "content": system_prompt})
144
- messages.append({"role": "user", "content": prompt})
145
-
146
- response = completion(
147
- model=self.model_id,
148
- messages=messages,
149
- api_key=self.api_key,
150
- temperature=temperature,
151
- max_tokens=256
152
- )
153
-
154
- return response.choices[0].message.content
155
- except Exception as e:
156
- print(f"LiteLLM generation error: {e}")
157
- return f"Error generating response: {str(e)}"
158
 
159
- # --- Advanced Agent Implementation ---
160
- class CodeAgent:
161
- def __init__(self, tools: List[Any], model: LiteLLMModel):
162
- self.tools = tools
163
- self.model = model
164
- self.search_tool = next((tool for tool in tools if isinstance(tool, DuckDuckGoSearchTool)), None)
165
- print(f"CodeAgent initialized with {len(tools)} tools and model {model.model_id}")
166
-
167
- def format_search_results(self, results: List[Dict[str, str]]) -> str:
168
- """Format search results into a readable string"""
169
- formatted = "Search Results:\n"
170
- if not results:
171
- return "No search results found.\n\n"
172
- for i, result in enumerate(results, 1):
173
- formatted += f"{i}. {result.get('title', 'No title')}\n"
174
- formatted += f" {result.get('body', 'No description')[:300]}...\n"
175
- formatted += f" URL: {result.get('href', 'No URL')}\n\n"
176
- return formatted
177
-
178
- def create_system_prompt(self) -> str:
179
- """Create a system prompt for the model"""
180
- return (
181
- "You are a specialized AI assistant for the GAIA benchmark test. Your sole purpose is to provide extremely concise, factual answers. "
182
- "Follow these strict guidelines:\n\n"
183
- "1. NEVER explain, justify, or add context to your answers\n"
184
- "2. For numerical questions, respond ONLY with the number\n"
185
- "3. For multiple choice questions, respond ONLY with the letter(s) of the correct option(s)\n"
186
- "4. For list questions, provide comma-separated items without numbering\n"
187
- "5. For yes/no questions, respond ONLY with 'yes' or 'no'\n"
188
- "6. If you cannot determine the answer with high confidence, respond ONLY with 'Unknown'\n"
189
- "7. NEVER include phrases like 'the answer is' or 'based on'\n"
190
- "8. NEVER use bullet points or numbering in your answers\n"
191
- "9. NEVER include explanations or reasoning\n\n"
192
- "Examples:\n"
193
- "- Question: What is the capital of France? Answer: Paris\n"
194
- "- Question: How many planets are in our solar system? Answer: 8\n"
195
- "- Question: Which options show prime numbers? a) 4 b) 7 c) 11 d) 15 Answer: b, c\n"
196
- "- Question: List the Great Lakes. Answer: Superior, Michigan, Huron, Erie, Ontario\n"
197
- "- Question: Is the sun a star? Answer: yes\n"
198
- "- Question: What is written on the back of the image? Answer: Unknown"
199
- )
200
 
201
- def enhance_search_query(self, question: str) -> str:
202
- """Enhance the search query based on question type"""
203
- question_lower = question.lower()
204
-
205
- # 添加特定关键词以提高搜索质量
206
- if "how many" in question_lower:
207
- return question + " exact number statistics"
208
- elif "when" in question_lower:
209
- return question + " exact date"
210
- elif "who" in question_lower:
211
- return question + " person biography"
212
- elif "where" in question_lower:
213
- return question + " location"
214
- elif "which" in question_lower and any(word in question_lower for word in ["option", "choice"]):
215
- # 对于选择题,提取选项内容加入搜索
216
- options = re.findall(r'[a-d]\)(.*?)(?=[a-d]\)|$)', question)
217
- if options:
218
- return question + " " + " ".join(options)
219
- elif any(word in question_lower for word in ["list", "name all", "what are"]):
220
- return question + " complete list"
221
- elif "code" in question_lower or "python" in question_lower:
222
- return question + " code example"
223
-
224
- return question
225
 
226
- def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
227
- """Create a prompt for the model with optional search results"""
228
- # 检测问题类型
229
- question_lower = question.lower()
230
- is_multiple_choice = "option" in question_lower or re.search(r'[a-d]\)', question)
231
- is_numerical = "how many" in question_lower or "number of" in question_lower
232
- is_list_question = any(word in question_lower for word in ["list", "name all", "what are"])
233
- is_date_question = "when" in question_lower or "what year" in question_lower or "date" in question_lower
234
- is_yes_no = question_lower.startswith("is ") or question_lower.startswith("are ") or question_lower.startswith("does ") or question_lower.startswith("do ")
235
-
236
- prompt = [f"Question: {question}\n\n"]
237
-
238
- if search_results:
239
- prompt.append("I found the following information:\n")
240
- for i, result in enumerate(search_results, 1):
241
- title = result.get('title', 'No title')
242
- body = result.get('body', 'No description')[:300]
243
- prompt.append(f"Source {i}: {title}\n{body}\n\n")
244
-
245
- prompt.append("Instructions:\n")
246
-
247
- # 添加针对特定问题类型的指导
248
- if is_multiple_choice:
249
- prompt.append("- This is a multiple choice question. Respond ONLY with the letter(s) of the correct option(s), like 'a' or 'b, c'.\n")
250
- elif is_numerical:
251
- prompt.append("- This is a numerical question. Respond ONLY with the number, without any units or explanation.\n")
252
- elif is_list_question:
253
- prompt.append("- This is a list question. Provide items as comma-separated values without numbering or bullet points.\n")
254
- elif is_date_question:
255
- prompt.append("- This is a date question. Provide only the specific date or year without explanation.\n")
256
- elif is_yes_no:
257
- prompt.append("- This is a yes/no question. Respond ONLY with 'yes' or 'no'.\n")
258
-
259
- prompt.append("- Your answer must be extremely concise - no explanations, no reasoning, no context.\n")
260
- prompt.append("- If you cannot determine the answer with high confidence, respond ONLY with 'Unknown'.\n")
261
- prompt.append("- NEVER include phrases like 'the answer is' or 'based on'.\n\n")
262
-
263
- # 添加针对特定问题的示例
264
- if is_multiple_choice:
265
- prompt.append("Example: If asked 'Which options show prime numbers? a) 4 b) 7 c) 11 d) 15', answer only 'b, c'\n\n")
266
- elif is_numerical:
267
- prompt.append("Example: If asked 'How many planets are in our solar system?', answer only '8'\n\n")
268
- elif is_list_question:
269
- prompt.append("Example: If asked 'List the Great Lakes', answer only 'Superior, Michigan, Huron, Erie, Ontario'\n\n")
270
-
271
- prompt.append("Answer: ")
272
-
273
- return "".join(prompt)
274
-
275
- def should_use_search(self, question: str) -> bool:
276
- """Determine if search should be used for this question"""
277
- question_lower = question.lower()
278
-
279
- # 不应该搜索的问题类型
280
- no_search_patterns = [
281
- "tfel", # 反向拼写问题
282
- "chess position",
283
- "image",
284
- "write a",
285
- "calculate",
286
- "compute",
287
- "solve this equation",
288
- "what is the opposite of",
289
- "what does .* mean in"
290
- ]
291
-
292
- for pattern in no_search_patterns:
293
- if pattern in question_lower:
294
- return False
295
-
296
- # 特殊处理反向拼写问题
297
- if '.remna eht sa "tfel" drow eht fo etisoppo eht etirw' in question_lower:
298
- return False
299
-
300
- # 应该搜索的问题类型
301
- search_triggers = [
302
- "what", "who", "when", "where", "how", "which",
303
- "why", "list", "name", "find", "identify", "describe",
304
- "explain", "tell me", "show", "give", "provide",
305
- "capital of", "population of", "invented", "published",
306
- "released", "founded", "created", "discovered",
307
- "located", "born", "died", "year", "date"
308
- ]
309
-
310
- # 如果包含搜索触发词,应该搜索
311
- if any(trigger in question_lower for trigger in search_triggers):
312
- return True
313
-
314
- # 如果是问句但不包含特定模式,也应该搜索
315
- if "?" in question and len(question_lower.split()) > 3:
316
- return True
317
-
318
- return False
319
-
320
- def clean_answer(self, answer: str, question: str) -> str:
321
- """Clean up the model's answer based on question type"""
322
- # 基本清理
323
- answer = answer.strip()
324
-
325
- # 移除常见前缀
326
- prefixes_to_remove = [
327
- "Answer:", "The answer is:", "I believe", "I think",
328
- "Based on", "According to", "The answer would be",
329
- "The correct answer is", "My answer is", "From the information",
330
- "From the search results", "The information suggests",
331
- "The sources indicate", "It appears that", "It seems that"
332
- ]
333
-
334
- for prefix in prefixes_to_remove:
335
- if answer.lower().startswith(prefix.lower()):
336
- answer = answer[len(prefix):].strip()
337
-
338
- # 移除引号
339
- if (answer.startswith('"') and answer.endswith('"')) or \
340
- (answer.startswith("'") and answer.endswith("'")):
341
- answer = answer[1:-1].strip()
342
-
343
- # 移除末尾的标点符号
344
- answer = answer.rstrip(".!,;:")
345
-
346
- # 检测问题类型
347
- question_lower = question.lower()
348
-
349
- # 处理特殊问题类型
350
- if "how many" in question_lower or "number of" in question_lower:
351
- # 尝试提取数字
352
- numbers = re.findall(r'\d+', answer)
353
- if numbers:
354
- return numbers[0]
355
-
356
- elif "which" in question_lower and ("option" in question_lower or re.search(r'[a-d]\)', question)):
357
- # 尝试提取选项字母
358
- options = re.findall(r'[a-dA-D]', answer)
359
- if options:
360
- return ", ".join(option.lower() for option in options)
361
-
362
- elif question_lower.startswith("is ") or question_lower.startswith("are ") or question_lower.startswith("does ") or question_lower.startswith("do "):
363
- # 处理是/否问题
364
- answer_lower = answer.lower()
365
- if "yes" in answer_lower:
366
- return "yes"
367
- elif "no" in answer_lower:
368
- return "no"
369
-
370
- # 处理反向拼写问题
371
- if '.remna eht sa "tfel" drow eht fo etisoppo eht etirw' in question_lower:
372
- return "right"
373
-
374
- # 处理列表问题,确保格式正确
375
- if any(word in question_lower for word in ["list", "name all", "what are"]):
376
- # 移除列表标记
377
- answer = re.sub(r'^\s*[\-\*\d]+\.\s*', '', answer)
378
- answer = re.sub(r'\n\s*[\-\*\d]+\.\s*', ', ', answer)
379
-
380
- # 确保列表项之间使用逗号分隔
381
- if "\n" in answer:
382
- answer = answer.replace("\n", ", ")
383
-
384
- # 修复多余的逗号和空格
385
- answer = re.sub(r',\s*,', ',', answer)
386
- answer = re.sub(r'\s+', ' ', answer)
387
-
388
- return answer
389
-
390
  def __call__(self, question: str) -> str:
391
- """
392
- Process a question and return an answer.
393
-
394
- Args:
395
- question: The question to answer
396
-
397
- Returns:
398
- The answer to the question
399
- """
400
- print(f"Agent received question: {question[:100]}...")
401
-
402
- # 特殊问题处理
403
- if '.remna eht sa "tfel" drow eht fo etisoppo eht etirw' in question.lower():
404
- return "right"
405
-
406
- if "chess position" in question.lower() or "image" in question.lower():
407
- return "Unknown"
408
-
409
- # 确定是否应该使用搜索
410
- should_search = self.should_use_search(question)
411
-
412
- search_results = None
413
- if should_search and self.search_tool:
414
- print(f"Searching for information about: {question}")
415
- # 使用增强的搜索查询
416
- search_query = self.enhance_search_query(question)
417
- search_response = self.search_tool(search_query, max_results=8)
418
- search_results = search_response.get("results", [])
419
- print(f"Found {len(search_results)} search results")
420
-
421
- # 创建提示词和生成回答
422
- prompt = self.create_prompt(question, search_results)
423
- system_prompt = self.create_system_prompt()
424
-
425
- print("Generating response with LLM...")
426
- # 使用较低的温度以获得更确定性的回答
427
- response = self.model.generate(prompt, system_prompt, temperature=0.1)
428
-
429
- # 清理回答
430
- answer = self.clean_answer(response, question)
431
-
432
- print(f"Final answer: {answer[:100]}...")
433
- return answer
434
 
435
- # 简化版本,不使用OAuthProfile
436
- def run_and_submit_all():
437
  """
438
- Fetches all questions, runs the Agent on them, submits all answers,
439
  and displays the results.
440
  """
441
- space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
 
 
 
442
  api_url = DEFAULT_API_URL
443
  questions_url = f"{api_url}/questions"
444
  submit_url = f"{api_url}/submit"
445
 
 
446
  try:
447
- api_key = os.getenv("GEMINI_API_KEY")
448
- if not api_key:
449
- return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
450
-
451
- model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key='AIzaSyAhmwogxZFBtt7_OUsKQGNeOYF7ced39bM')
452
- agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
453
  except Exception as e:
454
  print(f"Error instantiating agent: {e}")
455
  return f"Error initializing agent: {e}", None
456
-
457
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
458
- if not space_id:
459
- agent_code = "https://huggingface.co/spaces/lethaq/Final_Assignment_Template/tree/main"
460
- print(f"Agent code URL: {agent_code}")
461
 
 
462
  print(f"Fetching questions from: {questions_url}")
463
  try:
464
- response = requests.get(questions_url, timeout=20)
465
  response.raise_for_status()
466
  questions_data = response.json()
467
  if not questions_data:
468
- print("Fetched questions list is empty.")
469
- return "Fetched questions list is empty or invalid format.", None
470
  print(f"Fetched {len(questions_data)} questions.")
471
  except requests.exceptions.RequestException as e:
472
  print(f"Error fetching questions: {e}")
473
  return f"Error fetching questions: {e}", None
474
  except requests.exceptions.JSONDecodeError as e:
475
- print(f"Error decoding JSON response from questions endpoint: {e}")
476
- print(f"Response text: {response.text[:500]}")
477
- return f"Error decoding server response for questions: {e}", None
478
  except Exception as e:
479
  print(f"An unexpected error occurred fetching questions: {e}")
480
  return f"An unexpected error occurred fetching questions: {e}", None
481
 
 
482
  results_log = []
483
  answers_payload = []
484
  print(f"Running agent on {len(questions_data)} questions...")
@@ -489,40 +93,34 @@ def run_and_submit_all():
489
  print(f"Skipping item with missing task_id or question: {item}")
490
  continue
491
  try:
492
- # Attachment 摘要插入
493
- attachment_context = fetch_and_summarize_attachment(task_id, question_text)
494
- # ② 拼 prompt(根據你的 agent 用法):
495
- # (方案A)如果 agent 支援 attachment_context 參數
496
- # submitted_answer = agent(question_text, attachment_context=attachment_context)
497
- # (方案B)最直接:把 attachment 拼到問題開頭
498
- full_question = (attachment_context + "\n" + question_text).strip()
499
- submitted_answer = agent(full_question)
500
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
501
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
502
- print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
503
  except Exception as e:
504
- print(f"Error running agent on task {task_id}: {e}")
505
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
506
-
507
 
508
  if not answers_payload:
509
  print("Agent did not produce any answers to submit.")
510
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
511
 
 
 
 
 
 
 
512
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
513
  try:
514
- submission_dict = {
515
- "username": DEFAULT_USERNAME,
516
- "agent_code": agent_code,
517
- "answers": answers_payload
518
- }
519
- response = requests.post(submit_url, json=submission_dict, timeout=60)
520
  response.raise_for_status()
521
  result_data = response.json()
522
  final_status = (
523
  f"Submission Successful!\n"
524
- f"Score: {result_data.get('score', 'N/A')}% "
 
525
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
526
  )
527
  print("Submission successful.")
528
  results_df = pd.DataFrame(results_log)
@@ -557,62 +155,38 @@ def run_and_submit_all():
557
 
558
  # --- Build Gradio Interface using Blocks ---
559
  with gr.Blocks() as demo:
560
- gr.Markdown("# Gemini Agent for GAIA Benchmark")
561
  gr.Markdown(
562
  """
563
  **Instructions:**
564
- 1. Make sure you have set the GEMINI_API_KEY environment variable in your Space settings.
565
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers.
566
-
567
- This agent uses:
568
- - Gemini 2.0 Flash Lite model for reasoning
569
- - DuckDuckGo search for retrieving information
 
570
  """
571
  )
572
 
 
 
573
  run_button = gr.Button("Run Evaluation & Submit All Answers")
574
 
575
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
576
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
577
 
578
- # Add a single question test feature
579
- gr.Markdown("## Test Single Question")
580
- with gr.Row():
581
- question_in = gr.Textbox(label="Question", lines=3)
582
- answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
583
-
584
- test_btn = gr.Button("Test Question", variant="secondary")
585
-
586
- # Add a function to test a single question
587
- def test_single_question(question):
588
- try:
589
- api_key = os.getenv("GEMINI_API_KEY")
590
- if not api_key:
591
- return "Error: GEMINI_API_KEY environment variable not found"
592
-
593
- model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=api_key)
594
- agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
595
- answer = agent(question)
596
- return answer
597
- except Exception as e:
598
- return f"Error: {str(e)}"
599
-
600
- # 完全移除OAuthProfile相关代码
601
  run_button.click(
602
  fn=run_and_submit_all,
603
  outputs=[status_output, results_table]
604
  )
605
-
606
- test_btn.click(
607
- fn=test_single_question,
608
- inputs=[question_in],
609
- outputs=[answer_out]
610
- )
611
 
612
  if __name__ == "__main__":
613
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
614
  space_host_startup = os.getenv("SPACE_HOST")
615
- space_id_startup = os.getenv("SPACE_ID")
616
 
617
  if space_host_startup:
618
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -620,7 +194,7 @@ if __name__ == "__main__":
620
  else:
621
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
622
 
623
- if space_id_startup:
624
  print(f"✅ SPACE_ID found: {space_id_startup}")
625
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
626
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -629,5 +203,6 @@ if __name__ == "__main__":
629
 
630
  print("-"*(60 + len(" App Starting ")) + "\n")
631
 
632
- print("Launching Gradio Interface for Gemini Agent Evaluation...")
633
- demo.launch(debug=False, share=False)
 
 
1
+ """ Basic Agent Evaluation Runner"""
2
  import os
3
+ import inspect
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import gradio as gr
5
  import requests
 
6
  import pandas as pd
7
+ from langchain_core.messages import HumanMessage
8
+ from agent import build_graph
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
 
12
+ # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # --- Basic Agent Definition ---
17
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ class BasicAgent:
21
+ """A langgraph agent."""
22
+ def __init__(self):
23
+ print("BasicAgent initialized.")
24
+ self.graph = build_graph()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def __call__(self, question: str) -> str:
27
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
28
+ # Wrap the question in a HumanMessage from langchain_core
29
+ messages = [HumanMessage(content=question)]
30
+ messages = self.graph.invoke({"messages": messages})
31
+ answer = messages['messages'][-1].content
32
+ return answer[14:]
33
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
 
36
  """
37
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
38
  and displays the results.
39
  """
40
+ # --- Determine HF Space Runtime URL and Repo URL ---
41
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
42
+
43
+ if profile:
44
+ username= f"{profile.username}"
45
+ print(f"User logged in: {username}")
46
+ else:
47
+ print("User not logged in.")
48
+ return "Please Login to Hugging Face with the button.", None
49
+
50
  api_url = DEFAULT_API_URL
51
  questions_url = f"{api_url}/questions"
52
  submit_url = f"{api_url}/submit"
53
 
54
+ # 1. Instantiate Agent ( modify this part to create your agent)
55
  try:
56
+ agent = BasicAgent()
 
 
 
 
 
57
  except Exception as e:
58
  print(f"Error instantiating agent: {e}")
59
  return f"Error initializing agent: {e}", None
60
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
61
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
+ print(agent_code)
 
 
63
 
64
+ # 2. Fetch Questions
65
  print(f"Fetching questions from: {questions_url}")
66
  try:
67
+ response = requests.get(questions_url, timeout=15)
68
  response.raise_for_status()
69
  questions_data = response.json()
70
  if not questions_data:
71
+ print("Fetched questions list is empty.")
72
+ return "Fetched questions list is empty or invalid format.", None
73
  print(f"Fetched {len(questions_data)} questions.")
74
  except requests.exceptions.RequestException as e:
75
  print(f"Error fetching questions: {e}")
76
  return f"Error fetching questions: {e}", None
77
  except requests.exceptions.JSONDecodeError as e:
78
+ print(f"Error decoding JSON response from questions endpoint: {e}")
79
+ print(f"Response text: {response.text[:500]}")
80
+ return f"Error decoding server response for questions: {e}", None
81
  except Exception as e:
82
  print(f"An unexpected error occurred fetching questions: {e}")
83
  return f"An unexpected error occurred fetching questions: {e}", None
84
 
85
+ # 3. Run your Agent
86
  results_log = []
87
  answers_payload = []
88
  print(f"Running agent on {len(questions_data)} questions...")
 
93
  print(f"Skipping item with missing task_id or question: {item}")
94
  continue
95
  try:
96
+ submitted_answer = agent(question_text)
 
 
 
 
 
 
 
97
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
98
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
99
  except Exception as e:
100
+ print(f"Error running agent on task {task_id}: {e}")
101
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
102
 
103
  if not answers_payload:
104
  print("Agent did not produce any answers to submit.")
105
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
 
107
+ # 4. Prepare Submission
108
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
110
+ print(status_update)
111
+
112
+ # 5. Submit
113
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
114
  try:
115
+ response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
116
  response.raise_for_status()
117
  result_data = response.json()
118
  final_status = (
119
  f"Submission Successful!\n"
120
+ f"User: {result_data.get('username')}\n"
121
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
122
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
123
+ f"Message: {result_data.get('message', 'No message received.')}"
124
  )
125
  print("Submission successful.")
126
  results_df = pd.DataFrame(results_log)
 
155
 
156
  # --- Build Gradio Interface using Blocks ---
157
  with gr.Blocks() as demo:
158
+ gr.Markdown("# Basic Agent Evaluation Runner")
159
  gr.Markdown(
160
  """
161
  **Instructions:**
162
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
163
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
164
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
165
+ ---
166
+ **Disclaimers:**
167
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
168
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
169
  """
170
  )
171
 
172
+ gr.LoginButton()
173
+
174
  run_button = gr.Button("Run Evaluation & Submit All Answers")
175
 
176
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
177
+ # Removed max_rows=10 from DataFrame constructor
178
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  run_button.click(
181
  fn=run_and_submit_all,
182
  outputs=[status_output, results_table]
183
  )
 
 
 
 
 
 
184
 
185
  if __name__ == "__main__":
186
  print("\n" + "-"*30 + " App Starting " + "-"*30)
187
+ # Check for SPACE_HOST and SPACE_ID at startup for information
188
  space_host_startup = os.getenv("SPACE_HOST")
189
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
190
 
191
  if space_host_startup:
192
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
194
  else:
195
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
196
 
197
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
198
  print(f"✅ SPACE_ID found: {space_id_startup}")
199
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
200
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
203
 
204
  print("-"*(60 + len(" App Starting ")) + "\n")
205
 
206
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
207
+ demo.launch(debug=True, share=False)
208
+