DarrenDsa commited on
Commit
cd5f764
Β·
verified Β·
1 Parent(s): 85f4387

Improved logic in app.py

Browse files
Files changed (1) hide show
  1. app.py +227 -210
app.py CHANGED
@@ -1,231 +1,248 @@
1
  import os
2
- import gradio as gr
 
 
3
  import requests
4
- import inspect
5
- import pandas as pd
6
- from smolagents import CodeAgent, HfApiModel, PromptTemplates, OpenAIServerModel
7
- from tools import search_tool, speech_to_text, python_interpreter
8
 
9
- # (Keep Constants as is)
10
- # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
12
 
13
- # --- Basic Agent Definition ---
14
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
- if os.getenv("ENV") != "production":
16
- load_dotenv()
17
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
-
19
- # --- Basic Agent Definition ---
20
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
21
- # model = HfApiModel()
22
- model = OpenAIServerModel(model_id="gpt-4.1", api_key=OPENAI_API_KEY)
23
- system_prompt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
24
-
25
- class BasicAgent:
26
- def __init__(self):
27
- self.agent = CodeAgent(
28
- tools=[search_tool, speech_to_text, python_interpreter],
29
- model=model,
30
- # max_steps=5,
31
- # add_base_tools=True,
32
- # prompt_templates=PromptTemplates(system_prompt=system_prompt),
33
- additional_authorized_imports=[
34
- "os",
35
- "io",
36
- "pathlib",
37
- "json",
38
- "csv",
39
- "pandas",
40
- "openpyxl",
41
- "zipfile",
42
- "mutagen",
43
- "PIL"
44
- ]
45
- )
46
- print("Agent initialized.")
47
- def __call__(self, question: str, file_name: str) -> str:
48
- print(f"Agent received question (first 50 chars): {question[:50]}...")
49
- task = f"{system_prompt} \n{question}"
50
- if file_name != "":
51
- response = self.agent.run(task=task, additional_args=file_name)
52
- elif file_name == "":
53
- response = self.agent.run(task)
54
- print(f"Agent's answer: {response}")
55
- return response
56
-
57
- def run_and_submit_all( profile: gr.OAuthProfile | None):
58
- """
59
- Fetches all questions, runs the BasicAgent on them, submits all answers,
60
- and displays the results.
61
- """
62
- # --- Determine HF Space Runtime URL and Repo URL ---
63
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
64
-
65
- if profile:
66
- username= f"{profile.username}"
67
- print(f"User logged in: {username}")
68
- else:
69
- print("User not logged in.")
70
- return "Please Login to Hugging Face with the button.", None
71
-
72
- api_url = DEFAULT_API_URL
73
- questions_url = f"{api_url}/questions"
74
- submit_url = f"{api_url}/submit"
75
-
76
- # 1. Instantiate Agent ( modify this part to create your agent)
77
  try:
78
- agent = BasicAgent()
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
- print(f"Error instantiating agent: {e}")
81
- return f"Error initializing agent: {e}", None
82
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
83
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
84
- print(agent_code)
85
-
86
- # 2. Fetch Questions
87
- print(f"Fetching questions from: {questions_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  try:
89
- response = requests.get(questions_url, timeout=15)
90
- response.raise_for_status()
91
- questions_data = response.json()
92
- if not questions_data:
93
- print("Fetched questions list is empty.")
94
- return "Fetched questions list is empty or invalid format.", None
95
- print(f"Fetched {len(questions_data)} questions.")
96
- except requests.exceptions.RequestException as e:
97
- print(f"Error fetching questions: {e}")
98
- return f"Error fetching questions: {e}", None
99
- except requests.exceptions.JSONDecodeError as e:
100
- print(f"Error decoding JSON response from questions endpoint: {e}")
101
- print(f"Response text: {response.text[:500]}")
102
- return f"Error decoding server response for questions: {e}", None
103
  except Exception as e:
104
- print(f"An unexpected error occurred fetching questions: {e}")
105
- return f"An unexpected error occurred fetching questions: {e}", None
106
 
107
- # 3. Run your Agent
 
 
 
 
 
 
 
 
108
  results_log = []
109
- answers_payload = []
110
- print(f"Running agent on {len(questions_data)} questions...")
111
- for item in questions_data:
112
- task_id = item.get("task_id")
113
- question_text = item.get("question")
114
- if not task_id or question_text is None:
115
- print(f"Skipping item with missing task_id or question: {item}")
116
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  try:
118
- submitted_answer = agent(question_text)
119
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
120
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  except Exception as e:
122
- print(f"Error running agent on task {task_id}: {e}")
123
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
124
 
125
- if not answers_payload:
126
- print("Agent did not produce any answers to submit.")
127
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
128
 
129
- # 4. Prepare Submission
130
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
131
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
132
- print(status_update)
133
 
134
- # 5. Submit
135
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
136
- try:
137
- response = requests.post(submit_url, json=submission_data, timeout=60)
138
- response.raise_for_status()
139
- result_data = response.json()
140
- final_status = (
141
- f"Submission Successful!\n"
142
- f"User: {result_data.get('username')}\n"
143
- f"Overall Score: {result_data.get('score', 'N/A')}% "
144
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
145
- f"Message: {result_data.get('message', 'No message received.')}"
146
- )
147
- print("Submission successful.")
148
- results_df = pd.DataFrame(results_log)
149
- return final_status, results_df
150
- except requests.exceptions.HTTPError as e:
151
- error_detail = f"Server responded with status {e.response.status_code}."
152
- try:
153
- error_json = e.response.json()
154
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
155
- except requests.exceptions.JSONDecodeError:
156
- error_detail += f" Response: {e.response.text[:500]}"
157
- status_message = f"Submission Failed: {error_detail}"
158
- print(status_message)
159
- results_df = pd.DataFrame(results_log)
160
- return status_message, results_df
161
- except requests.exceptions.Timeout:
162
- status_message = "Submission Failed: The request timed out."
163
- print(status_message)
164
- results_df = pd.DataFrame(results_log)
165
- return status_message, results_df
166
- except requests.exceptions.RequestException as e:
167
- status_message = f"Submission Failed: Network error - {e}"
168
- print(status_message)
169
- results_df = pd.DataFrame(results_log)
170
- return status_message, results_df
171
- except Exception as e:
172
- status_message = f"An unexpected error occurred during submission: {e}"
173
- print(status_message)
174
- results_df = pd.DataFrame(results_log)
175
- return status_message, results_df
176
-
177
-
178
- # --- Build Gradio Interface using Blocks ---
179
- with gr.Blocks() as demo:
180
- gr.Markdown("# Basic Agent Evaluation Runner")
181
- gr.Markdown(
182
- """
183
- **Instructions:**
184
-
185
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
186
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
187
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
188
-
189
- ---
190
- **Disclaimers:**
191
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
192
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
193
- """
194
- )
195
 
196
  gr.LoginButton()
 
 
 
197
 
198
- run_button = gr.Button("Run Evaluation & Submit All Answers")
199
 
200
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
201
- # Removed max_rows=10 from DataFrame constructor
202
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
203
-
204
- run_button.click(
205
- fn=run_and_submit_all,
206
- outputs=[status_output, results_table]
207
- )
208
 
209
- if __name__ == "__main__":
210
- print("\n" + "-"*30 + " App Starting " + "-"*30)
211
- # Check for SPACE_HOST and SPACE_ID at startup for information
212
- space_host_startup = os.getenv("SPACE_HOST")
213
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
214
-
215
- if space_host_startup:
216
- print(f"βœ… SPACE_HOST found: {space_host_startup}")
217
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
218
- else:
219
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
220
-
221
- if space_id_startup: # Print repo URLs if SPACE_ID is found
222
- print(f"βœ… SPACE_ID found: {space_id_startup}")
223
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
224
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
225
- else:
226
- print("��️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
227
-
228
- print("-"*(60 + len(" App Starting ")) + "\n")
229
-
230
- print("Launching Gradio Interface for Basic Agent Evaluation...")
231
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import re
3
+ import json
4
+ import time
5
  import requests
6
+ import gradio as gr
 
 
 
7
 
8
+ # ── Constants ──────────────────────────────────────────────────────────────────
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
11
 
12
+
13
+ # ── Helper Tools ───────────────────────────────────────────────────────────────
14
+ def download_file(task_id: str) -> str:
15
+ """Download file from GAIA API."""
16
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
17
+ try:
18
+ resp = requests.get(url, timeout=30)
19
+ resp.raise_for_status()
20
+ return resp.text[:10000]
21
+ except Exception as e:
22
+ return f"[File error: {e}]"
23
+
24
+
25
+ # ── Direct Perplexity Call ─────────────────────────────────────────────────────
26
+ def call_perplexity(system_prompt: str, user_message: str, api_key: str) -> str:
27
+ """Call Perplexity API directly."""
28
+ headers = {
29
+ "Authorization": f"Bearer {api_key}",
30
+ "Content-Type": "application/json",
31
+ }
32
+
33
+ data = {
34
+ "model": "sonar-pro", # Updated model name (sonar-large was deprecated Feb 2025)
35
+ "messages": [
36
+ {"role": "system", "content": system_prompt},
37
+ {"role": "user", "content": user_message},
38
+ ],
39
+ "temperature": 0.2,
40
+ "max_tokens": 500,
41
+ }
42
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
+ resp = requests.post(PERPLEXITY_API_URL, headers=headers, json=data, timeout=60)
45
+ resp.raise_for_status()
46
+ result = resp.json()
47
+ content = result["choices"][0]["message"]["content"]
48
+ print(f"[DEBUG] Raw Perplexity response: {content[:200]}") # Debug log
49
+ return content
50
+ except requests.HTTPError as e:
51
+ error_body = e.response.text if hasattr(e.response, 'text') else str(e)
52
+ print(f"[DEBUG] HTTP Error: {e.response.status_code} - {error_body[:200]}")
53
+ return f"HTTP_ERROR: {e.response.status_code}"
54
  except Exception as e:
55
+ print(f"[DEBUG] Exception: {str(e)[:200]}")
56
+ return f"ERROR: {e}"
57
+
58
+
59
+ # ── Answer Cleaner ─────────────────────────────────────────────────────────────
60
+ def clean_answer(raw: str) -> str:
61
+ """Strip preamble and get bare answer."""
62
+ original = raw
63
+ raw = str(raw).strip()
64
+
65
+ # Don't clean error messages - return them as-is for debugging
66
+ if raw.startswith("ERROR:") or raw.startswith("HTTP_ERROR:"):
67
+ return raw
68
+
69
+ # Remove common prefixes
70
+ for prefix in ["FINAL ANSWER:", "Final Answer:", "final answer:", "Answer:", "answer:",
71
+ "The answer is:", "The answer is", "Result:", "**Answer:**", "Based on"]:
72
+ if raw.lower().startswith(prefix.lower()):
73
+ raw = raw[len(prefix):].strip()
74
+ break
75
+
76
+ # Take first line if multi-line and short enough
77
+ if '\n' in raw:
78
+ first_line = raw.split('\n')[0].strip()
79
+ if len(first_line) < 150:
80
+ raw = first_line
81
+
82
+ # Remove quotes
83
+ if len(raw) >= 2 and raw[0] in ('"', "'") and raw[0] == raw[-1]:
84
+ raw = raw[1:-1].strip()
85
+
86
+ # Remove markdown bold
87
+ raw = re.sub(r'\*\*(.*?)\*\*', r'\1', raw)
88
+
89
+ # DON'T filter out answers - just clean them
90
+ result = raw.strip()
91
+
92
+ # Debug log the cleaning
93
+ if result != original:
94
+ print(f"[DEBUG] Cleaned '{original[:100]}...' β†’ '{result[:100]}'")
95
+
96
+ return result
97
+
98
+
99
+ # ── System Prompt ──────────────────────────────────────────────────────────────
100
+ SYSTEM_PROMPT = """Answer the question with ONLY the final answer. No explanation.
101
+ RULES:
102
+ - NO periods at end ("right" NOT "right.")
103
+ - Numbers: digits only ("42")
104
+ - Country names: full name ("Malta" NOT "MLT")
105
+ - Lists: comma-separated
106
+ EXAMPLES:
107
+ Q: "What year was Mona Lisa painted?" β†’ 1503
108
+ Q: "Opposite of left" β†’ right
109
+ """
110
+
111
+
112
+ # ── Main Runner ────────────────────────────────────────────────────────────────
113
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
114
+ if not profile:
115
+ return "❌ Please log in first.", None
116
+
117
+ username = profile.username
118
+ api_key = os.environ.get("PERPLEXITY_API_KEY")
119
+
120
+ if not api_key:
121
+ return "❌ PERPLEXITY_API_KEY not found in Space secrets!", None
122
+
123
+ # Test the API key first
124
+ print(f"[DEBUG] API key exists, length: {len(api_key)}, starts with: {api_key[:10]}")
125
+
126
+ space_id = os.environ.get("SPACE_ID", "")
127
+ agent_code_url = (
128
+ f"https://huggingface.co/spaces/{space_id}/tree/main"
129
+ if space_id
130
+ else f"https://huggingface.co/spaces/{username}/my-gaia-agent/tree/main"
131
+ )
132
+
133
+ log = [f"πŸ‘€ User: {username}", "πŸ“₯ Fetching questions..."]
134
+
135
+ # Fetch questions
136
  try:
137
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
138
+ resp.raise_for_status()
139
+ questions = resp.json()
140
+ log.append(f"βœ… {len(questions)} questions loaded")
 
 
 
 
 
 
 
 
 
 
141
  except Exception as e:
142
+ return f"❌ Failed to fetch questions: {e}", None
 
143
 
144
+ log.append(f"πŸ€– Testing Perplexity API (key: {api_key[:10]}...)")
145
+
146
+ # Test API with simple question first
147
+ test_answer = call_perplexity("You are helpful.", "What is 2+2?", api_key)
148
+ log.append(f"πŸ§ͺ Test call result: {test_answer[:100]}")
149
+
150
+ log.append("─" * 40)
151
+
152
+ answers = []
153
  results_log = []
154
+
155
+ for i, q in enumerate(questions):
156
+ task_id = q.get("task_id", "")
157
+ question_text = q.get("question", "")
158
+ log.append(f"[{i+1}/20] {question_text[:65]}...")
159
+
160
+ # Rate limit
161
+ if i > 0:
162
+ time.sleep(4)
163
+
164
+ # Check if file mentioned
165
+ file_content = ""
166
+ if any(word in question_text.lower() for word in ["file", "image", "attached", "spreadsheet", "document", "excel"]):
167
+ file_content = download_file(task_id)
168
+ if not file_content.startswith("[File error"):
169
+ question_text = f"{question_text}\n\nFile content:\n{file_content[:2000]}"
170
+
171
+ # Call Perplexity
172
+ try:
173
+ user_prompt = f"Question: {question_text}\n\nAnswer with ONLY the answer, nothing else."
174
+ raw_answer = call_perplexity(SYSTEM_PROMPT, user_prompt, api_key)
175
+ final_answer = clean_answer(raw_answer)
176
+
177
+ log.append(f" πŸ“ Raw: {raw_answer[:80]}")
178
+ log.append(f" βœ… Final: {final_answer[:80] if final_answer else '(empty after cleaning)'}")
179
+
180
+ except Exception as e:
181
+ final_answer = f"EXCEPTION: {str(e)[:80]}"
182
+ log.append(f" ❌ Error: {final_answer}")
183
+
184
+ answers.append({"task_id": task_id, "submitted_answer": final_answer})
185
+ results_log.append({
186
+ "#": i + 1,
187
+ "Task ID": task_id[:8] + "...",
188
+ "Question": question_text[:65] + "..." if len(question_text) > 65 else question_text,
189
+ "Answer": final_answer or "(empty)",
190
+ })
191
+
192
+ answered = sum(1 for a in answers if a["submitted_answer"] and not a["submitted_answer"].startswith("ERROR") and not a["submitted_answer"].startswith("EXCEPTION"))
193
+ log.append("─" * 40)
194
+ log.append(f"πŸ“Š Answered: {answered}/20")
195
+
196
+ # Submit
197
+ payload = {
198
+ "username": username,
199
+ "agent_code": agent_code_url,
200
+ "answers": answers,
201
+ }
202
+
203
+ for attempt in range(3):
204
  try:
205
+ log.append(f"πŸ“€ Submitting ({attempt+1}/3)...")
206
+ sub = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
207
+ sub.raise_for_status()
208
+ data = sub.json()
209
+ score = data.get("score", "N/A")
210
+ correct = data.get("correct_count", "?")
211
+ log += [
212
+ "─" * 40,
213
+ "βœ… SUBMITTED!",
214
+ f"πŸ“Š Score: {score}%",
215
+ f"βœ”οΈ Correct: {correct}/20",
216
+ f"πŸ”— {agent_code_url}",
217
+ "─" * 40,
218
+ "πŸ† https://huggingface.co/spaces/agents-course/Students_leaderboard",
219
+ ]
220
+ break
221
  except Exception as e:
222
+ log.append(f"⚠️ Failed: {str(e)[:60]}")
223
+ if attempt < 2:
224
+ time.sleep(5)
225
 
226
+ return "\n".join(log), results_log
 
 
227
 
 
 
 
 
228
 
229
+ # ── Gradio UI ──────────────────────────────────────────────────────────────────
230
+ with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
231
+ gr.Markdown("""
232
+ # πŸ€– HF Agents Course β€” Unit 4
233
+ **Perplexity Sonar Large (Direct API with Debug Logging)**
234
+ 1. Log in with Hugging Face
235
+ 2. Click Run & Submit
236
+ 3. Check the logs to see what Perplexity is returning
237
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  gr.LoginButton()
240
+ run_btn = gr.Button("πŸš€ Run Agent & Submit All Answers", variant="primary", size="lg")
241
+ status_box = gr.Textbox(label="Live Log (with debug info)", lines=25, interactive=False)
242
+ results_table = gr.DataFrame(label="Results", headers=["#", "Task ID", "Question", "Answer"])
243
 
244
+ run_btn.click(fn=run_and_submit_all, outputs=[status_box, results_table])
245
 
246
+ gr.Markdown("**Debug version** - Shows raw Perplexity responses")
 
 
 
 
 
 
 
247
 
248
+ demo.launch()