Snaseem2026 commited on
Commit
b2efdb5
Β·
verified Β·
1 Parent(s): 64f7e3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -350
app.py CHANGED
@@ -4,336 +4,165 @@ import requests
4
  import pandas as pd
5
  from huggingface_hub import InferenceClient
6
  import re
7
- import json
8
- from datetime import datetime
9
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
- # --- Ultimate Agent with Multiple Tools ---
14
- class UltimateAgent:
15
  def __init__(self):
16
- print("πŸš€ Initializing Ultimate Agent with multiple tools...")
17
 
18
  hf_token = os.getenv("HF_TOKEN")
19
  self.client = InferenceClient(token=hf_token)
20
- self.model = "Qwen/QwQ-32B-Preview" # Reasoning-focused model
21
- print(f"βœ… Model: {self.model}")
22
 
23
- # Initialize tools
24
- self._init_search()
25
- print("βœ… All tools ready!")
26
-
27
- def _init_search(self):
28
- """Initialize search tool"""
29
  try:
30
  from duckduckgo_search import DDGS
31
  self.search = DDGS()
32
- print("βœ… Search tool loaded")
33
- except Exception as e:
34
  self.search = None
35
- print(f"⚠️ Search unavailable: {e}")
36
 
37
- def search_web(self, query: str, max_results: int = 10) -> str:
38
- """Search web and return formatted results"""
39
  if not self.search:
40
- return "Search unavailable"
41
 
42
  try:
43
- print(f" πŸ” Searching: {query[: 60]}...")
44
- results = list(self.search.text(query, max_results=max_results))
45
-
46
  if not results:
47
- return "No results found"
48
 
49
- formatted = []
50
- for i, r in enumerate(results[: max_results], 1):
51
- title = r.get('title', '').strip()
52
- body = r.get('body', '').strip()
53
- url = r.get('href', '').strip()
54
-
55
  if title and body:
56
- formatted.append(f"[Result {i}]\nTitle: {title}\nContent: {body}\nURL: {url}")
57
-
58
- result_text = "\n\n".join(formatted)
59
- print(f" βœ… Found {len(results)} results ({len(result_text)} chars)")
60
- return result_text
61
-
62
- except Exception as e:
63
- print(f" ❌ Search error: {e}")
64
- return f"Search error: {e}"
65
-
66
- def calculate(self, expression: str) -> str:
67
- """Safely evaluate mathematical expressions"""
68
- try:
69
- # Clean the expression
70
- expr = expression.strip()
71
-
72
- # Replace common symbols
73
- expr = expr.replace('Γ—', '*').replace('Γ·', '/').replace('^', '**')
74
 
75
- # Only allow safe characters
76
- if not re.match(r'^[\d\s\+\-\*\/\(\)\.\*\%]+$', expr):
77
- return "Invalid expression"
78
-
79
- result = eval(expr)
80
- print(f" πŸ”’ Calculated: {expression} = {result}")
81
- return str(result)
82
- except Exception as e:
83
- print(f" ❌ Calc error: {e}")
84
- return f"Calculation error: {e}"
85
 
86
- def get_webpage_text(self, url: str) -> str:
87
- """Fetch and extract text from webpage"""
88
- try:
89
- print(f" 🌐 Fetching: {url[: 60]}...")
90
- headers = {
91
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
92
- }
93
- response = requests.get(url, headers=headers, timeout=10)
94
- response.raise_for_status()
95
-
96
- from bs4 import BeautifulSoup
97
- soup = BeautifulSoup(response.text, 'html.parser')
98
-
99
- # Remove script and style elements
100
- for script in soup(["script", "style"]):
101
- script.decompose()
102
-
103
- # Get text
104
- text = soup.get_text(separator='\n', strip=True)
105
-
106
- # Clean up
107
- lines = [line.strip() for line in text.split('\n') if line.strip()]
108
- text = '\n'. join(lines)
109
-
110
- # Limit size
111
- if len(text) > 5000:
112
- text = text[: 5000] + "..."
113
-
114
- print(f" βœ… Fetched {len(text)} chars")
115
- return text
116
-
117
- except Exception as e:
118
- print(f" ❌ Webpage error: {e}")
119
- return f"Could not fetch webpage: {e}"
120
-
121
- def solve_question(self, question: str) -> str:
122
- """Main question solving logic with multi-step reasoning"""
123
- print(f"\n{'='*80}")
124
- print(f"❓ QUESTION: {question}")
125
- print(f"{'='*80}")
126
 
127
- # Step 1: Analyze question type
128
- q_lower = question.lower()
 
 
 
129
 
130
- # Check if it's a calculation question
131
- if any(op in question for op in ['+', '-', 'Γ—', 'Γ·', '*', '/', '=']):
132
- print("πŸ“Š Detected: Math calculation")
133
- # Extract math expression
134
- math_match = re.search(r'[\d\+\-\*\/\Γ—\Γ·\(\)\.\s]+', question)
135
- if math_match:
136
- calc_result = self.calculate(math_match.group())
137
- if "error" not in calc_result. lower():
138
- return calc_result
139
 
140
- # Step 2: Decide if we need search
141
- needs_search = any(keyword in q_lower for keyword in [
142
- 'who', 'what', 'where', 'when', 'which', 'how many', 'how much',
143
- 'current', 'latest', 'recent', 'today', '2024', '2025', '2026'
144
- ])
145
 
146
- search_results = ""
147
- if needs_search and self.search:
148
- # Perform web search
149
- search_results = self.search_web(question, max_results=10)
150
-
151
- # If question asks for specific URL content
152
- url_match = re.search(r'https?://[^\s]+', question)
153
- if url_match:
154
- url = url_match. group()
155
- webpage_content = self.get_webpage_text(url)
156
- if webpage_content:
157
- search_results = f"WEBPAGE CONTENT FROM {url}:\n{webpage_content}\n\n" + search_results
158
 
159
- # Step 3: Build reasoning prompt
160
- system_prompt = """You are an expert AI assistant with advanced reasoning capabilities.
161
-
162
- YOUR TASK: Answer the question accurately and concisely.
163
-
164
- CRITICAL INSTRUCTIONS:
165
- 1. Think step-by-step through the problem
166
- 2. Use the provided web search results or webpage content
167
- 3. Extract the most relevant information
168
- 4. Provide a DIRECT, CONCISE answer
169
- 5. For numbers: give just the number
170
- 6. For names: give just the name
171
- 7. For yes/no: give just Yes or No
172
- 8. For facts: state the fact directly
173
- 9. Do NOT say "According to" or "Based on" - just answer directly
174
- 10. If you need to reason through steps, do it, but END with a clear final answer
175
-
176
- FORMAT YOUR RESPONSE:
177
- - If simple answer: just give the answer
178
- - If complex: show brief reasoning, then "Final Answer: [answer]"
179
-
180
- EXAMPLES:
181
- Q: "What is 15 + 27?"
182
- A: "42"
183
-
184
- Q: "Who is the CEO of Tesla?"
185
- A: "Elon Musk"
186
-
187
- Q: "What year did World War 2 end?"
188
- A: "1945"
189
-
190
- Remember: BE CONCISE AND DIRECT! """
191
-
192
- messages = [{"role": "system", "content": system_prompt}]
193
 
194
- user_prompt = f"Question: {question}\n\n"
 
 
 
195
 
196
- if search_results and len(search_results) > 50:
197
- user_prompt += f"Web Search Results / Information:\n{search_results[: 4500]}\n\n"
 
 
 
 
198
 
199
- user_prompt += "Now provide your answer (following the instructions - be direct and concise):"
 
 
 
200
 
201
- messages.append({"role": "user", "content": user_prompt})
 
202
 
203
- # Step 4: Call LLM with reasoning model
 
 
204
  try:
205
- print("πŸ€– Calling reasoning model...")
206
-
207
- response = self.client.chat_completion(
208
  model=self.model,
209
- messages=messages,
210
- max_tokens=2000, # More tokens for reasoning
211
- temperature=0.1
 
212
  )
213
 
214
- raw_answer = response.choices[0].message.content.strip()
215
-
216
- # Extract final answer
217
- answer = self._extract_final_answer(raw_answer)
218
-
219
- print(f"βœ… ANSWER: {answer[: 200]}")
220
- print(f"{'='*80}\n")
221
-
222
  return answer
223
 
224
  except Exception as e:
225
- print(f"❌ Model error: {e}")
226
-
227
- # Fallback to alternative model
228
- try:
229
- print("πŸ”„ Trying fallback model...")
230
- response = self.client.text_generation(
231
- f"Answer this question concisely:\n\n{question}\n\nAnswer:",
232
- model="mistralai/Mixtral-8x7B-Instruct-v0.1",
233
- max_new_tokens=300,
234
- temperature=0.1
235
- )
236
- return response.strip()
237
- except:
238
- return "I cannot answer this question at this time."
239
-
240
- def _extract_final_answer(self, text: str) -> str:
241
- """Extract clean final answer from reasoning output"""
242
-
243
- # Look for "Final Answer:" pattern
244
- final_answer_match = re.search(r'final answer: ?\s*(. +?)(?:\n|$)', text, re.IGNORECASE)
245
- if final_answer_match:
246
- return final_answer_match.group(1).strip()
247
-
248
- # Look for answer after reasoning
249
- answer_match = re. search(r'(? :therefore|thus|so),?\s*(? :the answer is: ?)?\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
250
- if answer_match:
251
- return answer_match.group(1).strip()
252
-
253
- # Remove common verbose prefixes
254
- cleaned = text.strip()
255
-
256
- patterns = [
257
- r'^according to. {0,50}? [,: ]\s*',
258
- r'^based on.{0,50}?[,:]\s*',
259
- r'^the answer is: ?\s*',
260
- r'^answer:?\s*',
261
- ]
262
-
263
- for pattern in patterns:
264
- cleaned = re. sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
265
-
266
- # If multi-line, try to find the most answer-like line
267
- lines = [l.strip() for l in cleaned.split('\n') if l.strip()]
268
-
269
- if len(lines) > 1:
270
- # Prefer shorter, more direct lines
271
- for line in lines:
272
- if 10 < len(line) < 150 and not line.endswith(': '):
273
- return line
274
-
275
- # Return last substantial line
276
- for line in reversed(lines):
277
- if len(line) > 5:
278
- return line
279
-
280
- # Limit length
281
- if len(cleaned) > 500:
282
- cleaned = cleaned[:500]. rsplit('. ', 1)[0] + '.'
283
-
284
- return cleaned
285
-
286
- def __call__(self, question: str) -> str:
287
- """Main entry point"""
288
- return self.solve_question(question)
289
-
290
 
291
  def run_and_submit_all(profile: gr.OAuthProfile | None):
292
  """Run evaluation"""
293
- space_id = os.getenv("SPACE_ID")
294
 
295
  if profile is None:
296
- return "❌ Please login with Hugging Face!", None
297
 
298
  username = profile.username
299
- print(f"\n{'#'*80}")
300
- print(f"🎯 EVALUATION START - User: {username}")
301
- print(f"{'#'*80}\n")
302
 
303
  api_url = DEFAULT_API_URL
304
  questions_url = f"{api_url}/questions"
305
  submit_url = f"{api_url}/submit"
306
 
307
- # Initialize agent
308
- print("βš™οΈ Initializing Ultimate Agent...")
309
  try:
310
- agent = UltimateAgent()
311
  except Exception as e:
312
  return f"❌ Init error: {e}", None
313
 
314
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
315
 
316
  # Fetch questions
317
- print(f"πŸ“₯ Fetching questions from API...")
318
  try:
319
  response = requests.get(questions_url, timeout=30)
320
  response.raise_for_status()
321
  questions_data = response.json()
322
- if not questions_data:
323
- return "❌ No questions received", None
324
- print(f"βœ… Received {len(questions_data)} questions\n")
325
  except Exception as e:
326
  return f"❌ Fetch error: {e}", None
327
 
328
- # Process all questions
329
  results_log = []
330
  answers_payload = []
331
  total = len(questions_data)
332
 
333
- print(f"{'='*80}")
334
- print(f"πŸ€– PROCESSING {total} QUESTIONS (Est. 5-8 minutes)")
335
- print(f"{'='*80}\n")
336
-
337
  for idx, item in enumerate(questions_data, 1):
338
  task_id = item. get("task_id")
339
  question_text = item.get("question")
@@ -341,8 +170,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
341
  if not task_id or not question_text:
342
  continue
343
 
344
- print(f"\n{'─'*80}")
345
- print(f"[{idx}/{total}] Task: {task_id[: 12]}...")
346
 
347
  try:
348
  answer = agent(question_text)
@@ -352,129 +180,79 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
352
  "submitted_answer": answer
353
  })
354
 
355
- results_log.append((
356
- idx,
357
- question_text[: 70] + "..." if len(question_text) > 70 else question_text,
358
- answer[: 100] + "..." if len(answer) > 100 else answer
359
- ))
360
 
361
  except Exception as e:
362
- print(f"❌ ERROR: {e}")
363
  answers_payload.append({
364
  "task_id": task_id,
365
- "submitted_answer": "Error processing question"
366
  })
367
- results_log.append((idx, question_text[: 70], f"Error: {str(e)[:50]}"))
368
-
369
- print(f"\n{'='*80}")
370
- print(f"βœ… COMPLETED ALL {len(answers_payload)} QUESTIONS")
371
- print(f"{'='*80}\n")
372
 
373
- # Submit to scoring server
374
- print(f"πŸ“€ Submitting to scoring server...")
 
375
  try:
376
  payload = {
377
  "username": username,
378
- "answers": answers_payload,
379
  "agent_code": agent_code
380
  }
381
 
382
- submit_response = requests.post(submit_url, json=payload, timeout=120)
383
  submit_response.raise_for_status()
384
  submission_result = submit_response.json()
385
 
386
- print(f"βœ… SUBMISSION SUCCESSFUL!")
387
- print(f"πŸ“Š Result: {submission_result}\n")
388
 
389
  except Exception as e:
390
- print(f"❌ Submission error: {e}")
391
- return f"❌ Submission failed: {e}", None
392
 
393
- # Format results
394
- results_df = pd.DataFrame(results_log, columns=["#", "Question", "Answer"])
395
  score = submission_result.get('score', 0)
396
-
397
  passed = isinstance(score, (int, float)) and score >= 30
398
- excellent = isinstance(score, (int, float)) and score >= 60
399
 
400
  result_message = f"""
401
- # {'πŸ† OUTSTANDING!' if excellent else 'πŸŽ‰ CONGRATULATIONS!' if passed else 'πŸ“Š Results'}
402
-
403
- ## {'🌟 EXCELLENT PERFORMANCE!' if excellent else 'βœ… YOU PASSED UNIT 4!' if passed else '⚠️ Try Again'}
404
 
405
- ### πŸ† Score: **{score}%**
406
 
407
- {'### πŸŽ“ EXCEPTIONAL! You achieved ' + str(score) + '% on the GAIA benchmark!' if excellent else '### πŸŽ“ Congratulations! You passed Unit 4 of the Hugging Face Agents Course!' if passed else f'### πŸ“ˆ Score: {score}% - Keep improving!'}
408
 
409
- **πŸ“Š Details:**
410
- - πŸ‘€ User: `{username}`
411
- - πŸ“ Questions: {len(answers_payload)}
412
- - 🎯 Pass Threshold: 30%
413
- - πŸ“Š Your Score: **{score}%**
414
- - πŸ€– Agent: QwQ-32B Reasoning Model
415
- - πŸ”§ Tools: Web Search, Calculator, Web Scraper
416
 
417
- ### πŸ”— Links:
418
- - [Your Agent Code]({agent_code})
419
- - [Course Unit 4](https://huggingface.co/learn/agents-course/en/unit4/hands-on)
420
-
421
- ---
422
- *Ultimate Agent with QwQ-32B Reasoning + Multi-Tool Integration*
423
  """
424
 
425
  return result_message, results_df
426
 
427
-
428
- # --- Gradio UI ---
429
- with gr.Blocks(theme=gr.themes.Soft(), title="Ultimate Agent - Unit 4") as demo:
430
  gr. Markdown("""
431
- # πŸ† Ultimate AI Agent - Unit 4 Final Assignment
432
 
433
- ## πŸ’ͺ Advanced Features:
434
- - 🧠 **QwQ-32B-Preview** - Advanced reasoning model (32B parameters)
435
- - πŸ” **Enhanced Web Search** - DuckDuckGo with 10 results
436
- - 🌐 **Web Scraper** - Extract content from URLs
437
- - πŸ”’ **Calculator** - Solve mathematical expressions
438
- - 🎯 **Multi-Step Reasoning** - Think through complex problems
439
- - 🧹 **Answer Extraction** - Clean, direct answers
440
- - πŸ”„ **Fallback System** - Alternative model if needed
441
 
442
- ## 🎯 Target: 70%+ (Pass: 30%)
443
 
444
- ## πŸ“‹ Instructions:
445
- 1. βœ… Sign in with Hugging Face (click button below)
446
- 2. πŸš€ Click "Run Evaluation & Submit"
447
- 3. ⏳ Wait 5-8 minutes (it's processing complex questions!)
448
- 4. πŸŽ‰ Get your score!
449
-
450
- **This agent is designed to score 60-80% on the GAIA benchmark! **
451
  """)
452
 
453
- with gr.Row():
454
- gr.LoginButton()
455
-
456
- with gr.Row():
457
- submit_button = gr.Button(
458
- "πŸš€ Run Evaluation & Submit All Answers",
459
- variant="primary",
460
- size="lg",
461
- scale=2
462
- )
463
-
464
- with gr.Row():
465
- output_text = gr.Markdown()
466
-
467
- with gr.Row():
468
- output_table = gr.Dataframe(
469
- label="πŸ“ Results Preview",
470
- wrap=True,
471
- interactive=False
472
- )
473
 
474
- submit_button.click(
475
- run_and_submit_all,
476
- inputs=None,
477
- outputs=[output_text, output_table]
478
- )
479
 
480
  demo.launch()
 
4
  import pandas as pd
5
  from huggingface_hub import InferenceClient
6
  import re
 
 
7
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Reliable Agent ---
12
+ class ReliableAgent:
13
  def __init__(self):
14
+ print("Initializing Reliable Agent...")
15
 
16
  hf_token = os.getenv("HF_TOKEN")
17
  self.client = InferenceClient(token=hf_token)
 
 
18
 
19
+ # Use fast, reliable model
20
+ self.model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
21
+ print(f"βœ… Model: {self.model}")
22
+
23
+ # Initialize search
 
24
  try:
25
  from duckduckgo_search import DDGS
26
  self.search = DDGS()
27
+ print("βœ… Search ready")
28
+ except:
29
  self.search = None
30
+ print("⚠️ Search unavailable")
31
 
32
+ def search_web(self, query: str) -> str:
33
+ """Search and return concise results"""
34
  if not self.search:
35
+ return ""
36
 
37
  try:
38
+ results = list(self.search.text(query, max_results=5))
 
 
39
  if not results:
40
+ return ""
41
 
42
+ info = []
43
+ for r in results[:5]:
44
+ title = r.get('title', '')
45
+ body = r.get('body', '')
 
 
46
  if title and body:
47
+ info.append(f"{title}: {body}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ return " | ".join(info)
50
+ except:
51
+ return ""
 
 
 
 
 
 
 
52
 
53
+ def clean_answer(self, text: str) -> str:
54
+ """Clean and extract answer"""
55
+ # Remove common prefixes
56
+ text = text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # Remove verbose patterns
59
+ patterns = [
60
+ r'^(according to|based on|the answer is|answer is|answer:)\s*',
61
+ r'^(therefore|thus|so|hence),?\s*',
62
+ ]
63
 
64
+ for pattern in patterns:
65
+ text = re. sub(pattern, '', text, flags=re.IGNORECASE)
 
 
 
 
 
 
 
66
 
67
+ # If multi-line, prefer shorter lines
68
+ lines = [l.strip() for l in text.split('\n') if l.strip()]
 
 
 
69
 
70
+ # Find best answer line
71
+ for line in lines:
72
+ # Good answer: 5-200 chars, doesn't end with ':'
73
+ if 5 < len(line) < 200 and not line.endswith(': '):
74
+ return line
 
 
 
 
 
 
 
75
 
76
+ # Return first line if nothing better
77
+ if lines:
78
+ return lines[0][: 300]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ return text[: 300]
81
+
82
+ def __call__(self, question: str) -> str:
83
+ print(f"\nQ: {question[: 100]}")
84
 
85
+ # Search if needed
86
+ search_info = ""
87
+ if self.search and any(kw in question. lower() for kw in ['who', 'what', 'where', 'when', 'current', 'latest', '2024', '2025', '2026']):
88
+ search_info = self.search_web(question)
89
+ if search_info:
90
+ print(f" Found search info: {len(search_info)} chars")
91
 
92
+ # Build concise prompt
93
+ prompt = f"""Answer this question directly and concisely. Give ONLY the answer, no explanation.
94
+
95
+ Question: {question}"""
96
 
97
+ if search_info:
98
+ prompt += f"\n\nRelevant information: {search_info[: 2000]}"
99
 
100
+ prompt += "\n\nDirect answer:"
101
+
102
+ # Call model
103
  try:
104
+ response = self.client.text_generation(
105
+ prompt,
 
106
  model=self.model,
107
+ max_new_tokens=150,
108
+ temperature=0.1,
109
+ do_sample=False,
110
+ stop_sequences=["\n\n", "Question:", "Q:"]
111
  )
112
 
113
+ answer = self.clean_answer(response)
114
+ print(f" A: {answer[:100]}")
 
 
 
 
 
 
115
  return answer
116
 
117
  except Exception as e:
118
+ print(f" Error: {e}")
119
+ # Simple fallback
120
+ if search_info:
121
+ # Extract first meaningful sentence from search
122
+ sentences = search_info.split('.')
123
+ for sent in sentences:
124
+ if len(sent.strip()) > 20:
125
+ return sent.strip()[:200]
126
+ return "Unable to answer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def run_and_submit_all(profile: gr.OAuthProfile | None):
129
  """Run evaluation"""
130
+ space_id = os. getenv("SPACE_ID")
131
 
132
  if profile is None:
133
+ return "❌ Please login!", None
134
 
135
  username = profile.username
136
+ print(f"\n{'='*60}")
137
+ print(f"User: {username}")
138
+ print(f"{'='*60}\n")
139
 
140
  api_url = DEFAULT_API_URL
141
  questions_url = f"{api_url}/questions"
142
  submit_url = f"{api_url}/submit"
143
 
144
+ # Initialize
 
145
  try:
146
+ agent = ReliableAgent()
147
  except Exception as e:
148
  return f"❌ Init error: {e}", None
149
 
150
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
151
 
152
  # Fetch questions
 
153
  try:
154
  response = requests.get(questions_url, timeout=30)
155
  response.raise_for_status()
156
  questions_data = response.json()
157
+ print(f"βœ… Got {len(questions_data)} questions\n")
 
 
158
  except Exception as e:
159
  return f"❌ Fetch error: {e}", None
160
 
161
+ # Process
162
  results_log = []
163
  answers_payload = []
164
  total = len(questions_data)
165
 
 
 
 
 
166
  for idx, item in enumerate(questions_data, 1):
167
  task_id = item. get("task_id")
168
  question_text = item.get("question")
 
170
  if not task_id or not question_text:
171
  continue
172
 
173
+ print(f"[{idx}/{total}]", end=" ")
 
174
 
175
  try:
176
  answer = agent(question_text)
 
180
  "submitted_answer": answer
181
  })
182
 
183
+ results_log.append((idx, question_text[: 60], answer[:80]))
 
 
 
 
184
 
185
  except Exception as e:
186
+ print(f"Error: {e}")
187
  answers_payload.append({
188
  "task_id": task_id,
189
+ "submitted_answer": "Error"
190
  })
 
 
 
 
 
191
 
192
+ print(f"\nβœ… Processed {len(answers_payload)} questions\n")
193
+
194
+ # Submit
195
  try:
196
  payload = {
197
  "username": username,
198
+ "answers": answers_payload,
199
  "agent_code": agent_code
200
  }
201
 
202
+ submit_response = requests.post(submit_url, json=payload, timeout=90)
203
  submit_response.raise_for_status()
204
  submission_result = submit_response.json()
205
 
206
+ print(f"βœ… Submitted: {submission_result}\n")
 
207
 
208
  except Exception as e:
209
+ return f"❌ Submit error: {e}", None
 
210
 
211
+ # Results
212
+ results_df = pd. DataFrame(results_log, columns=["#", "Question", "Answer"])
213
  score = submission_result.get('score', 0)
 
214
  passed = isinstance(score, (int, float)) and score >= 30
 
215
 
216
  result_message = f"""
217
+ # {'πŸŽ‰ PASSED!' if passed else 'πŸ“Š Results'}
 
 
218
 
219
+ ## Score: **{score}%**
220
 
221
+ {'### βœ… You passed Unit 4!' if passed else f'### Score: {score}% (need 30%)'}
222
 
223
+ **Details:**
224
+ - User: {username}
225
+ - Questions: {len(answers_payload)}
226
+ - Target: 30%
227
+ - Score: **{score}%**
 
 
228
 
229
+ [Your Code]({agent_code})
 
 
 
 
 
230
  """
231
 
232
  return result_message, results_df
233
 
234
+ # --- UI ---
235
+ with gr.Blocks() as demo:
 
236
  gr. Markdown("""
237
+ # πŸ€– Reliable Agent - Unit 4
238
 
239
+ **Features:**
240
+ - Fast Mixtral model
241
+ - Web search
242
+ - Clean, direct answers
 
 
 
 
243
 
244
+ **Target: 30%+**
245
 
246
+ 1. Sign in
247
+ 2. Run evaluation
248
+ 3. Wait 2-3 minutes
 
 
 
 
249
  """)
250
 
251
+ gr.LoginButton()
252
+ submit_button = gr.Button("πŸš€ Run Evaluation", variant="primary", size="lg")
253
+ output_text = gr.Markdown()
254
+ output_table = gr. Dataframe(label="Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
+ submit_button.click(run_and_submit_all, inputs=None, outputs=[output_text, output_table])
 
 
 
 
257
 
258
  demo.launch()