mohammedff5642 commited on
Commit
d634beb
·
verified ·
1 Parent(s): 9534834

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -93
app.py CHANGED
@@ -1,17 +1,14 @@
1
  import os
2
- import io
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
- from datasets import load_dataset
7
- from huggingface_hub import snapshot_download
8
  from agent import GaiaAgent
9
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  def run_and_submit_all(profile: gr.OAuthProfile | None):
13
  """
14
- Load GAIA dataset locally, run agent on questions, submit answers
15
  """
16
 
17
  # Check login
@@ -22,67 +19,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
22
  space_id = os.getenv("SPACE_ID")
23
 
24
  print(f"\n[run_and_submit_all] starting for user: {username}")
25
- print(f"[run_and_submit_all] space_id: {space_id}")
26
 
27
- # Download GAIA dataset
28
- print("[run_and_submit_all] downloading GAIA dataset...")
29
  try:
30
- data_dir = snapshot_download(
31
- repo_id="gaia-benchmark/GAIA",
32
- repo_type="dataset"
33
- )
34
- print(f"[run_and_submit_all] dataset downloaded to: {data_dir}")
35
  except Exception as e:
36
- print(f"[run_and_submit_all] error downloading dataset: {e}")
37
- return f"Error downloading dataset: {e}", None
38
-
39
- # Load dataset
40
- print("[run_and_submit_all] loading dataset...")
41
- try:
42
- dataset = load_dataset(data_dir, "2023_level1", split="validation", cache_dir=data_dir)
43
- print(f"[run_and_submit_all] loaded {len(dataset)} questions")
44
- except Exception as e:
45
- print(f"[run_and_submit_all] error loading dataset: {e}")
46
- return f"Error loading dataset: {e}", None
47
-
48
- # Map task IDs to file paths
49
- id_to_path = {}
50
- for ex in dataset:
51
- if ex.get("file_path") and ex.get("file_name"):
52
- full_path = os.path.join(data_dir, ex["file_path"])
53
- if os.path.exists(full_path):
54
- id_to_path[ex["task_id"]] = full_path
55
-
56
- print(f"[run_and_submit_all] mapped {len(id_to_path)} task IDs to files")
57
-
58
- # Target task IDs (the 20 questions)
59
- target_task_ids = [
60
- "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
61
- "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
62
- "2d83110e-a098-4ebb-9987-066c06fa42d0",
63
- "cca530fc-4052-43b2-b130-b30968d8aa44",
64
- "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
65
- "6f37996b-2ac7-44b0-8e68-6d28256631b4",
66
- "9d191bce-651d-4746-be2d-7ef8ecadb9c2",
67
- "cabe07ed-9eca-40ea-8ead-410ef5e83f91",
68
- "3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
69
- "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
70
- "305ac316-eef6-4446-960a-92d80d542f82",
71
- "f918266a-b3e0-4914-865d-4faa564f1aef",
72
- "3f57289b-8c60-48be-bd80-01f8099ca449",
73
- "1f975693-876d-457b-a649-393859e79bf3",
74
- "840bfca7-4f7b-481a-8794-c560c340185d",
75
- "bda648d7-d618-4883-88f4-3466eabd860e",
76
- "cf106601-ab4f-4af9-b045-5295fe67b37d",
77
- "a0c07678-e491-4bbc-8f0b-07405144218f",
78
- "7bd855d8-463d-4ed5-93ca-5fe35145f733",
79
- "5a0c1adf-205e-4841-a666-7c3ef95def9d"
80
- ]
81
-
82
- # Filter to target questions
83
- print(f"[run_and_submit_all] filtering to {len(target_task_ids)} target questions...")
84
- subset = dataset.filter(lambda ex: ex["task_id"] in target_task_ids)
85
- subset = subset.to_list()
86
 
87
  # Initialize agent
88
  print("[run_and_submit_all] initializing agent...")
@@ -92,35 +40,38 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
92
  results_log = []
93
  answers_payload = []
94
 
95
- for i, item in enumerate(subset):
96
  task_id = item.get("task_id")
97
  question = item.get("question")
98
- file_name = item.get("file_name")
99
- file_path = id_to_path.get(task_id)
100
 
101
- print(f"\n[run_and_submit_all] [{i+1}/{len(subset)}] task_id={task_id}")
102
  print(f" question: {question[:80]}...")
103
- print(f" file: {file_name}")
104
 
105
  file_content = ""
106
 
107
- # Load file if it exists
108
- if file_path and os.path.exists(file_path):
109
  try:
110
- print(f" reading file: {file_path}")
111
- if file_name.endswith((".txt", ".py", ".json", ".csv", ".md")):
112
- with open(file_path, "r", encoding="utf-8") as f:
113
- file_content = f.read()
114
- print(f" loaded {len(file_content)} chars from text file")
 
 
 
 
 
 
 
 
 
115
  else:
116
- # Binary file (image, audio, etc.) - read as base64 for potential use
117
- with open(file_path, "rb") as f:
118
- file_bytes = f.read()
119
- # Just note that we have a binary file
120
- file_content = f"[Binary file: {file_name}, {len(file_bytes)} bytes]"
121
- print(f" loaded binary file ({len(file_bytes)} bytes)")
122
  except Exception as e:
123
- print(f" error reading file: {e}")
124
 
125
  # Run agent
126
  try:
@@ -145,7 +96,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
145
  }
146
 
147
  try:
148
- response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
 
 
 
 
149
  response.raise_for_status()
150
  result = response.json()
151
 
@@ -160,27 +115,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
160
  return status_msg, pd.DataFrame(results_log)
161
 
162
  except Exception as e:
163
- error_msg = f"❌ Submission failed: {e}"
164
  print(error_msg)
165
  return error_msg, pd.DataFrame(results_log)
166
 
167
 
168
  # Gradio UI
169
  with gr.Blocks() as demo:
170
- gr.Markdown("# GAIA Agent — Mistral + Local Dataset")
171
  gr.Markdown("""
172
  **How it works:**
173
  1. Click "Login with Hugging Face"
174
  2. Click "Run Evaluation"
175
- 3. Wait for the agent to process all questions
176
- 4. See your score!
177
-
178
- **What's happening:**
179
- - Downloads GAIA benchmark dataset locally
180
- - Runs Mistral agent on each question
181
- - Loads attached files from local filesystem
182
- - Uses DuckDuckGo for web search
183
- - Submits all answers for scoring
184
  """)
185
 
186
  gr.LoginButton()
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
5
  from agent import GaiaAgent
6
 
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
 
9
  def run_and_submit_all(profile: gr.OAuthProfile | None):
10
  """
11
+ Fetch questions from GAIA API, run agent, submit answers
12
  """
13
 
14
  # Check login
 
19
  space_id = os.getenv("SPACE_ID")
20
 
21
  print(f"\n[run_and_submit_all] starting for user: {username}")
 
22
 
23
+ # Fetch questions from API
24
+ print("[run_and_submit_all] fetching questions from API...")
25
  try:
26
+ questions_resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
27
+ questions_resp.raise_for_status()
28
+ questions = questions_resp.json()
29
+ print(f"[run_and_submit_all] ✓ fetched {len(questions)} questions")
 
30
  except Exception as e:
31
+ error_msg = f"Error fetching questions: {str(e)[:200]}"
32
+ print(f"[run_and_submit_all] {error_msg}")
33
+ return error_msg, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Initialize agent
36
  print("[run_and_submit_all] initializing agent...")
 
40
  results_log = []
41
  answers_payload = []
42
 
43
+ for i, item in enumerate(questions):
44
  task_id = item.get("task_id")
45
  question = item.get("question")
46
+ file_name = item.get("file_name", "")
 
47
 
48
+ print(f"\n[run_and_submit_all] [{i+1}/{len(questions)}] task_id={task_id}")
49
  print(f" question: {question[:80]}...")
50
+ print(f" file: {file_name if file_name else '(none)'}")
51
 
52
  file_content = ""
53
 
54
+ # Try to fetch file if it exists
55
+ if file_name:
56
  try:
57
+ print(f" fetching file: {file_name}...")
58
+ file_resp = requests.get(
59
+ f"{DEFAULT_API_URL}/files/{task_id}",
60
+ timeout=30
61
+ )
62
+ if file_resp.status_code == 200:
63
+ # Try to decode as text first
64
+ try:
65
+ file_content = file_resp.text[:5000]
66
+ print(f" loaded {len(file_content)} chars from file")
67
+ except:
68
+ # If binary, note it
69
+ file_content = f"[Binary file: {file_name}, {len(file_resp.content)} bytes]"
70
+ print(f" loaded binary file")
71
  else:
72
+ print(f" file fetch returned {file_resp.status_code} (skipping)")
 
 
 
 
 
73
  except Exception as e:
74
+ print(f" error fetching file: {e}")
75
 
76
  # Run agent
77
  try:
 
96
  }
97
 
98
  try:
99
+ response = requests.post(
100
+ f"{DEFAULT_API_URL}/submit",
101
+ json=submission_data,
102
+ timeout=60
103
+ )
104
  response.raise_for_status()
105
  result = response.json()
106
 
 
115
  return status_msg, pd.DataFrame(results_log)
116
 
117
  except Exception as e:
118
+ error_msg = f"❌ Submission failed: {str(e)[:200]}"
119
  print(error_msg)
120
  return error_msg, pd.DataFrame(results_log)
121
 
122
 
123
  # Gradio UI
124
  with gr.Blocks() as demo:
125
+ gr.Markdown("# GAIA Agent — Mistral")
126
  gr.Markdown("""
127
  **How it works:**
128
  1. Click "Login with Hugging Face"
129
  2. Click "Run Evaluation"
130
+ 3. Agent processes all 20 questions
131
+ 4. See your score instantly!
132
+
133
+ **Features:**
134
+ - Uses Mistral model via Groq API
135
+ - Web search via DuckDuckGo (free, no keys)
136
+ - Fetches files from GAIA API
137
+ - Automatic answer submission
 
138
  """)
139
 
140
  gr.LoginButton()