DeekshithN05 commited on
Commit
a50a227
·
verified ·
1 Parent(s): b5230e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -655
app.py CHANGED
@@ -1,672 +1,66 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
- import re
6
- import json
7
- import time
8
- from urllib.parse import quote
9
- import wikipedia
10
- from bs4 import BeautifulSoup
11
- import random
12
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
13
 
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
- class EnhancedAgent:
 
 
 
 
18
  def __init__(self):
19
- print("Loading models and tools...")
20
- # Load a stronger model
21
- self.model_name = "google/flan-t5-xl" # Stronger model than base
22
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
23
-
24
  self.pipeline = pipeline(
25
  "text2text-generation",
26
- model=self.model_name,
27
- max_new_tokens=256,
28
- temperature=0.1, # Lower temperature for more deterministic responses
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
-
31
- # Set up Wikipedia API
32
- wikipedia.set_lang("en")
33
- print("Models and tools loaded.")
34
 
35
- def __call__(self, question: str, task_id: str = None) -> str:
36
- """Main entry point for handling questions"""
37
- try:
38
- print(f"\n==== Processing question: {question} ====")
39
- # Preprocess question
40
- question_lower = question.lower()
41
-
42
- # Detect question type and route to appropriate handler
43
- if self.is_reverse_text_question(question_lower):
44
- return self.handle_reverse_text(question)
45
- elif self.is_wikipedia_question(question_lower):
46
- return self.handle_wikipedia_question(question)
47
- elif self.is_youtube_question(question_lower):
48
- return self.handle_youtube_question(question)
49
- elif self.is_file_processing_question(question_lower):
50
- return self.handle_file_processing(question, task_id)
51
- elif self.is_counting_question(question_lower):
52
- return self.handle_counting_question(question)
53
- elif self.is_math_question(question_lower):
54
- return self.handle_math_question(question)
55
- else:
56
- # General reasoning for other questions
57
- return self.handle_general_reasoning(question)
58
- except Exception as e:
59
- print(f"Error processing question: {str(e)}")
60
- # Fall back to model-based answer on error
61
- return self.simplified_model_response(question)
62
-
63
- def is_reverse_text_question(self, question_lower):
64
- """Check if this is a text reversal question"""
65
- reverse_patterns = [
66
- "write the opposite",
67
- "reverse",
68
- "backwards",
69
- ".rewsna", # "answer." backwards
70
- "etirw", # "write" backwards
71
- "esrever" # "reverse" backwards
72
- ]
73
- return any(pattern in question_lower for pattern in reverse_patterns)
74
-
75
- def is_wikipedia_question(self, question_lower):
76
- """Check if this is a Wikipedia-related question"""
77
- return "wikipedia" in question_lower
78
-
79
- def is_youtube_question(self, question_lower):
80
- """Check if this is a YouTube-related question"""
81
- return "youtube" in question_lower or "video" in question_lower
82
-
83
- def is_file_processing_question(self, question_lower):
84
- """Check if this question requires file processing"""
85
- file_indicators = ["excel", "spreadsheet", "file", "csv", "attached"]
86
- return any(indicator in question_lower for indicator in file_indicators)
87
-
88
- def is_counting_question(self, question_lower):
89
- """Check if this is a counting question"""
90
- counting_indicators = ["how many", "count", "number of"]
91
- return any(indicator in question_lower for indicator in counting_indicators)
92
-
93
- def is_math_question(self, question_lower):
94
- """Check if this is a math question"""
95
- math_indicators = ["calculate", "sum", "multiply", "divide", "subtract", "add", "equals"]
96
- return any(indicator in question_lower for indicator in math_indicators)
97
-
98
- def handle_reverse_text(self, question):
99
- """Handle text reversal questions"""
100
- # Check for backwards text first
101
- if ".rewsna" in question.lower():
102
- # The question itself is backwards, so we need to figure out what it's asking
103
- reversed_query = question[::-1].strip()
104
- print(f"Detected backwards question. Reversed: {reversed_query}")
105
-
106
- # Common pattern in GAIA: "If you understand this sentence, write the opposite of the word 'left' as the answer."
107
- if "opposite" in reversed_query.lower() and "word" in reversed_query.lower():
108
- match = re.search(r"opposite of the word ['\"](\w+)['\"]", reversed_query, re.IGNORECASE)
109
- if match:
110
- word = match.group(1)
111
- opposites = {
112
- "left": "right",
113
- "right": "left",
114
- "up": "down",
115
- "down": "up",
116
- "yes": "no",
117
- "no": "yes",
118
- "true": "false",
119
- "false": "true",
120
- "hot": "cold",
121
- "cold": "hot",
122
- "open": "closed",
123
- "closed": "open",
124
- "on": "off",
125
- "off": "on"
126
- }
127
- return opposites.get(word.lower(), f"opposite of {word}")
128
-
129
- # For "write the opposite" type questions
130
- if "write the opposite" in question.lower():
131
- # Find the word to get the opposite of
132
- match = re.search(r"opposite of (?:the word )?['\"](\w+)['\"]", question, re.IGNORECASE)
133
- if match:
134
- word = match.group(1)
135
- opposites = {
136
- "left": "right",
137
- "right": "left",
138
- "up": "down",
139
- "down": "up",
140
- "yes": "no",
141
- "no": "yes",
142
- "true": "false",
143
- "false": "true",
144
- "hot": "cold",
145
- "cold": "hot",
146
- "open": "closed",
147
- "closed": "open",
148
- "on": "off",
149
- "off": "on"
150
- }
151
- return opposites.get(word.lower(), f"opposite of {word}")
152
-
153
- # Simple string reversal
154
- if "reverse" in question.lower() and not "opposite" in question.lower():
155
- # Extract potential text to reverse
156
- text_to_reverse = re.sub(r'reverse the string |reverse |reverse this: ', '', question, flags=re.IGNORECASE).strip()
157
-
158
- # If the text contains instructions, try to isolate just the text to reverse
159
- if len(text_to_reverse.split()) > 5: # Heuristic: if too many words, look for quotes
160
- quoted_text = re.search(r'[\'\"](.*?)[\'\"]', question)
161
- if quoted_text:
162
- text_to_reverse = quoted_text.group(1)
163
-
164
- # Perform the reversal
165
- return text_to_reverse[::-1].strip()
166
-
167
- # If we're unsure, use the LLM to help determine what to reverse
168
- prompt = f"Extract the exact text that needs to be reversed from this instruction: {question}"
169
- text_to_reverse = self.pipeline(prompt)[0]["generated_text"].strip()
170
- return text_to_reverse[::-1].strip()
171
-
172
- def handle_wikipedia_question(self, question):
173
- """Handle Wikipedia-related questions"""
174
- # Extract query terms from question
175
- query_terms = self.extract_wikipedia_query(question)
176
-
177
- try:
178
- # Parse year range if present
179
- year_range = self.extract_year_range(question)
180
-
181
- if "studio albums" in question.lower() and year_range:
182
- # This is likely about counting albums in a date range
183
- artist_name = self.extract_artist_name(question)
184
- if artist_name:
185
- return self.count_albums_in_range(artist_name, year_range)
186
-
187
- # Search Wikipedia
188
- print(f"Searching Wikipedia for: {query_terms}")
189
- search_results = wikipedia.search(query_terms, results=3)
190
-
191
- if not search_results:
192
- return "No Wikipedia results found."
193
-
194
- try:
195
- # Get full page content
196
- wiki_page = wikipedia.page(search_results[0], auto_suggest=False)
197
- content = wiki_page.content
198
-
199
- # Process for specific question types
200
- if "how many" in question.lower():
201
- return self.extract_count_from_wikipedia(question, content)
202
- else:
203
- # For general info questions, summarize relevant information
204
- prompt = f"Based on this Wikipedia content about {search_results[0]}, answer the question: {question}\n\nWikipedia content: {content[:4000]}..."
205
- answer = self.pipeline(prompt)[0]["generated_text"].strip()
206
-
207
- # Clean up the answer to be concise
208
- if len(answer.split()) > 20:
209
- prompt = f"Provide a very concise answer (1-3 words if possible) to: {question}\nBased on: {answer}"
210
- answer = self.pipeline(prompt)[0]["generated_text"].strip()
211
-
212
- return answer
213
- except wikipedia.exceptions.DisambiguationError as e:
214
- # Handle disambiguation by picking the first option
215
- try:
216
- wiki_page = wikipedia.page(e.options[0], auto_suggest=False)
217
- content = wiki_page.content
218
- prompt = f"Based on this Wikipedia content, answer the question: {question}\n\nWikipedia content: {content[:4000]}..."
219
- return self.pipeline(prompt)[0]["generated_text"].strip()
220
- except:
221
- return "Could not resolve Wikipedia disambiguation."
222
-
223
- except Exception as e:
224
- print(f"Wikipedia error: {str(e)}")
225
- return self.simplified_model_response(question)
226
-
227
- def extract_artist_name(self, question):
228
- """Extract artist name from studio albums question"""
229
- # Try to identify artist name in album-related questions
230
- artist_patterns = [
231
- r"by ([A-Za-z\s]+) between",
232
- r"were published by ([A-Za-z\s]+)",
233
- r"albums (?:did|were) ([A-Za-z\s]+) (?:publish|release)"
234
- ]
235
-
236
- for pattern in artist_patterns:
237
- match = re.search(pattern, question)
238
- if match:
239
- return match.group(1).strip()
240
-
241
- # If no match, ask the model to extract
242
- prompt = f"Extract only the artist name from this question: {question}"
243
- return self.pipeline(prompt)[0]["generated_text"].strip()
244
-
245
- def count_albums_in_range(self, artist_name, year_range):
246
- """Count studio albums in a year range for an artist"""
247
- try:
248
- start_year, end_year = year_range
249
-
250
- # Search for the artist
251
- search_results = wikipedia.search(f"{artist_name} discography", results=3)
252
-
253
- # Try the first few search results
254
- for result in search_results:
255
- try:
256
- wiki_page = wikipedia.page(result, auto_suggest=False)
257
- content = wiki_page.content
258
-
259
- # Look for studio albums section
260
- sections = ["Studio albums", "Discography", "Albums"]
261
- relevant_content = content
262
-
263
- # Use regular expressions to find albums with years
264
- albums_pattern = r"(?:Album|album|Studio album).*?\((\d{4})\)"
265
- album_years = re.findall(albums_pattern, relevant_content)
266
-
267
- # Count albums in range
268
- count = 0
269
- for year_str in album_years:
270
- try:
271
- year = int(year_str)
272
- if start_year <= year <= end_year:
273
- count += 1
274
- except ValueError:
275
- continue
276
-
277
- if count > 0:
278
- return str(count)
279
-
280
- except Exception as e:
281
- continue
282
-
283
- # If we couldn't find it in Wikipedia, try a model-based approach
284
- prompt = f"How many studio albums did {artist_name} release between {start_year} and {end_year}, inclusive? Give only the number."
285
- return self.pipeline(prompt)[0]["generated_text"].strip()
286
-
287
- except Exception as e:
288
- print(f"Error counting albums: {str(e)}")
289
- return "0" # Default fallback
290
-
291
- def extract_wikipedia_query(self, question):
292
- """Extract search terms for Wikipedia from the question"""
293
- # Remove common phrases that wouldn't help the search
294
- query = question.lower()
295
- for phrase in ["according to wikipedia", "using wikipedia", "on wikipedia", "in wikipedia", "from wikipedia", "search wikipedia for", "look up on wikipedia"]:
296
- query = query.replace(phrase, "")
297
-
298
- # Get the main entity or topic
299
- prompt = f"Extract the main entity or topic to search on Wikipedia from this question: {query}"
300
- result = self.pipeline(prompt)[0]["generated_text"].strip()
301
-
302
- return result
303
-
304
- def extract_year_range(self, question):
305
- """Extract year range from question if present"""
306
- # Look for patterns like "between 2000 and 2009" or "from 2000 to 2009"
307
- range_patterns = [
308
- r"between (\d{4}) and (\d{4})",
309
- r"from (\d{4}) to (\d{4})",
310
- r"(\d{4})-(\d{4})",
311
- r"(\d{4}) to (\d{4})"
312
- ]
313
-
314
- for pattern in range_patterns:
315
- match = re.search(pattern, question)
316
- if match:
317
- start_year = int(match.group(1))
318
- end_year = int(match.group(2))
319
- return (start_year, end_year)
320
-
321
- return None
322
-
323
- def extract_count_from_wikipedia(self, question, content):
324
- """Extract count information from Wikipedia content"""
325
- # What are we counting?
326
- count_object = re.search(r"how many ([^?]+)", question.lower())
327
- if count_object:
328
- object_type = count_object.group(1).strip()
329
-
330
- # Try to extract with the model
331
- relevant_excerpt = content[:8000] # Limit context size
332
- prompt = f"Based on this Wikipedia content, answer the question: {question}\n\nWikipedia content: {relevant_excerpt}"
333
- answer = self.pipeline(prompt)[0]["generated_text"].strip()
334
-
335
- # Try to extract just the number
336
- number_match = re.search(r'\d+', answer)
337
- if number_match:
338
- return number_match.group(0)
339
- else:
340
- return answer
341
-
342
- return "Unable to determine count from Wikipedia."
343
-
344
- def handle_youtube_question(self, question):
345
- """Handle YouTube-related questions"""
346
- # Extract YouTube URL if present
347
- youtube_url_match = re.search(r'(https?://(?:www\.)?youtube\.com/watch\?v=[a-zA-Z0-9_-]+)', question)
348
-
349
- if youtube_url_match:
350
- youtube_url = youtube_url_match.group(1)
351
-
352
- # Based on the question, extract what we need to find in the video
353
- if "highest number" in question.lower() and "bird" in question.lower():
354
- # This is a specific GAIA question about counting birds in a video
355
- # Since we can't actually watch the video, make an educated guess based on common patterns
356
- print(f"YouTube video question about bird count: {youtube_url}")
357
- return "4" # A reasonable guess for bird count
358
-
359
- elif "title" in question.lower():
360
- # Question about the video title
361
- return self.get_youtube_title_estimation(youtube_url)
362
-
363
- else:
364
- # Try to parse what the question is asking about the video
365
- prompt = f"What specifically is this question asking about the YouTube video? Question: {question}"
366
- aspect = self.pipeline(prompt)[0]["generated_text"].strip()
367
-
368
- if "duration" in aspect.lower() or "length" in aspect.lower():
369
- # Estimate a reasonable video length
370
- return "10:42"
371
- elif "view" in aspect.lower():
372
- # Estimate view count
373
- return "2,547,931"
374
- elif "upload" in aspect.lower() or "date" in aspect.lower():
375
- # Estimate upload date
376
- return "2019-05-15"
377
- else:
378
- # Fallback - extract the most likely answer format from the question
379
- return self.extract_likely_format(question)
380
-
381
- return "Unable to process YouTube video information."
382
-
383
- def get_youtube_title_estimation(self, youtube_url):
384
- """Estimate a YouTube video title based on URL"""
385
- # Extract video ID
386
- video_id_match = re.search(r'v=([a-zA-Z0-9_-]+)', youtube_url)
387
- if not video_id_match:
388
- return "Unable to determine video title"
389
-
390
- # Since we can't actually fetch the video, make a reasonable guess
391
- video_id = video_id_match.group(1)
392
- if "L1vXCYZAYYM" in video_id: # The specific video ID from the example
393
- return "Amazing Bird Feeder Compilation"
394
-
395
- # Generic response for other videos
396
- return "Bird Watching - Amazing Compilation"
397
-
398
- def handle_file_processing(self, question, task_id):
399
- """Handle file processing questions"""
400
- if not task_id:
401
- return "No file provided for processing."
402
-
403
  try:
404
- # Get the file URL
405
- file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
406
-
407
- # Determine what to do with the file based on the question
408
- if "excel" in question.lower() or "spreadsheet" in question.lower():
409
- # Process Excel file
410
- return self.process_excel_file(file_url, question)
411
- elif "csv" in question.lower():
412
- # Process CSV file
413
- return self.process_csv_file(file_url, question)
414
  else:
415
- # Try to determine the file type from the question
416
- return self.process_generic_file(file_url, question)
417
-
418
  except Exception as e:
419
- print(f"File processing error: {str(e)}")
420
- return f"Error processing file: {str(e)}"
421
-
422
- def process_excel_file(self, file_url, question):
423
- """Process Excel file for analysis"""
424
- try:
425
- df = pd.read_excel(file_url)
426
-
427
- # Determine what analysis to perform based on the question
428
- if "sales" in question.lower() and "food" in question.lower():
429
- # Looking for food sales
430
- food_sales = df[df["category"].str.lower() == "food"]["sales"].sum()
431
- return f"${food_sales:.2f}"
432
-
433
- elif "sum" in question.lower() or "total" in question.lower():
434
- # Summing a column
435
- column_to_sum = self.determine_column_to_sum(question, df.columns)
436
- if column_to_sum:
437
- total = df[column_to_sum].sum()
438
- return f"{total:.2f}"
439
-
440
- elif "average" in question.lower() or "mean" in question.lower():
441
- # Computing an average
442
- column_to_avg = self.determine_column_to_sum(question, df.columns)
443
- if column_to_avg:
444
- avg = df[column_to_avg].mean()
445
- return f"{avg:.2f}"
446
-
447
- elif "count" in question.lower() or "how many" in question.lower():
448
- # Counting records
449
- filter_column = self.determine_filter_column(question, df.columns)
450
- filter_value = self.determine_filter_value(question)
451
-
452
- if filter_column and filter_value:
453
- count = len(df[df[filter_column].astype(str).str.lower() == filter_value.lower()])
454
- return str(count)
455
- else:
456
- # Just count all records
457
- return str(len(df))
458
-
459
- # If we couldn't determine the operation, try a general approach
460
- prompt = f"Based on this Excel file data, answer the question: {question}\n\nExcel data (first 10 rows): {df.head(10).to_string()}"
461
- return self.pipeline(prompt)[0]["generated_text"].strip()
462
-
463
- except Exception as e:
464
- print(f"Excel processing error: {str(e)}")
465
- return "Error processing Excel file."
466
-
467
- def determine_column_to_sum(self, question, columns):
468
- """Determine which column to sum based on the question"""
469
- # Check for column names in the question
470
- for column in columns:
471
- if column.lower() in question.lower():
472
- return column
473
-
474
- # Common financial columns
475
- financial_columns = ["sales", "revenue", "price", "cost", "amount", "value"]
476
- for column in columns:
477
- if any(fin_col in column.lower() for fin_col in financial_columns):
478
- return column
479
-
480
- # First numeric column as a fallback
481
- return columns[0]
482
-
483
- def determine_filter_column(self, question, columns):
484
- """Determine which column to filter on based on the question"""
485
- # Check for column names in the question
486
- for column in columns:
487
- if column.lower() in question.lower():
488
- return column
489
-
490
- # Common categorical columns
491
- category_columns = ["category", "type", "name", "product", "department"]
492
- for column in columns:
493
- if any(cat_col in column.lower() for cat_col in category_columns):
494
- return column
495
-
496
- # First column as a fallback
497
- return columns[0]
498
-
499
- def determine_filter_value(self, question):
500
- """Determine what value to filter for based on the question"""
501
- # Common categories in questions
502
- categories = ["food", "electronics", "clothing", "books", "furniture"]
503
- for category in categories:
504
- if category.lower() in question.lower():
505
- return category
506
-
507
- # Try to extract the value from the question
508
- value_match = re.search(r'where (\w+) is (\w+)', question.lower())
509
- if value_match:
510
- return value_match.group(2)
511
-
512
- return None
513
-
514
- def process_csv_file(self, file_url, question):
515
- """Process CSV file for analysis"""
516
- # Very similar to Excel processing, but using read_csv
517
- try:
518
- df = pd.read_csv(file_url)
519
-
520
- # Use the same analysis logic as Excel
521
- return self.process_excel_file(file_url, question)
522
-
523
- except Exception as e:
524
- print(f"CSV processing error: {str(e)}")
525
- return "Error processing CSV file."
526
-
527
- def process_generic_file(self, file_url, question):
528
- """Process a file when the type isn't clear"""
529
- try:
530
- # Try Excel first
531
- try:
532
- return self.process_excel_file(file_url, question)
533
- except:
534
- # Then try CSV
535
- try:
536
- return self.process_csv_file(file_url, question)
537
- except:
538
- return "Unable to process the file - format not recognized."
539
- except Exception as e:
540
- print(f"Generic file processing error: {str(e)}")
541
- return "Error processing file."
542
-
543
- def handle_counting_question(self, question):
544
- """Handle counting questions"""
545
- # Extract what needs to be counted
546
- count_match = re.search(r'how many ([^?\.]+)', question.lower())
547
- if count_match:
548
- count_object = count_match.group(1).strip()
549
-
550
- # Special case for specific counting tasks
551
- if "letters" in count_object:
552
- # Count letters in a text
553
- text_to_count = self.extract_text_to_count(question)
554
- if text_to_count:
555
- # Count only alphabetic characters
556
- letter_count = sum(c.isalpha() for c in text_to_count)
557
- return str(letter_count)
558
-
559
- elif "words" in count_object:
560
- # Count words in a text
561
- text_to_count = self.extract_text_to_count(question)
562
- if text_to_count:
563
- # Split by whitespace and count non-empty strings
564
- word_count = len([w for w in text_to_count.split() if w])
565
- return str(word_count)
566
-
567
- elif "vowels" in count_object:
568
- # Count vowels in a text
569
- text_to_count = self.extract_text_to_count(question)
570
- if text_to_count:
571
- vowel_count = sum(c.lower() in 'aeiou' for c in text_to_count)
572
- return str(vowel_count)
573
-
574
- # Fall back to the model for answering
575
- return self.simplified_model_response(question)
576
-
577
- def extract_text_to_count(self, question):
578
- """Extract the text in which to count letters/words/etc."""
579
- # Look for text in quotes
580
- quoted_text = re.search(r'[\'\"](.*?)[\'\"]', question)
581
- if quoted_text:
582
- return quoted_text.group(1)
583
-
584
- # Look for "in the text" or "in the string" followed by the text
585
- text_match = re.search(r'in the (?:text|string|sentence|phrase|word):?\s*([^?\.]+)', question, re.IGNORECASE)
586
- if text_match:
587
- return text_match.group(1).strip()
588
-
589
- # Look for text after "how many letters/words in"
590
- following_text = re.search(r'how many (?:letters|words|characters|vowels) in\s*([^?\.]+)', question, re.IGNORECASE)
591
- if following_text:
592
- return following_text.group(1).strip()
593
-
594
- return None
595
-
596
- def handle_math_question(self, question):
597
- """Handle mathematical questions"""
598
- # Check if it's a simple calculation
599
- calculation_match = re.search(r'(\d+)\s*([+\-*/])\s*(\d+)', question)
600
- if calculation_match:
601
- num1 = int(calculation_match.group(1))
602
- operator = calculation_match.group(2)
603
- num2 = int(calculation_match.group(3))
604
-
605
- if operator == '+':
606
- return str(num1 + num2)
607
- elif operator == '-':
608
- return str(num1 - num2)
609
- elif operator == '*':
610
- return str(num1 * num2)
611
- elif operator == '/':
612
- if num2 == 0:
613
- return "Division by zero error"
614
- return str(num1 / num2)
615
-
616
- # Extract numbers from the question for more complex calculations
617
- numbers = re.findall(r'\d+', question)
618
- if numbers and ("sum" in question.lower() or "add" in question.lower()):
619
- total = sum(int(num) for num in numbers)
620
- return str(total)
621
-
622
- # Fall back to the model
623
- return self.simplified_model_response(question)
624
-
625
- def handle_general_reasoning(self, question):
626
- """Handle general reasoning questions"""
627
- # Use the model for general reasoning questions
628
- return self.simplified_model_response(question)
629
-
630
- def simplified_model_response(self, question):
631
- """Get a simplified response from the model"""
632
- # Add instructions to keep it concise and direct
633
- prompt = f"Answer this question with only the essential information. Be very concise and direct:\n{question}"
634
- result = self.pipeline(prompt)[0]["generated_text"].strip()
635
-
636
- # Clean up the result
637
- result = re.sub(r'^(Answer:|The answer is:|Answer is:)\s*', '', result)
638
-
639
- # If it's still verbose, try extracting just the key information
640
- if len(result.split()) > 10:
641
- # Try to extract just a few words
642
- prompt = f"Extract just the direct answer in as few words as possible from: {result}"
643
- result = self.pipeline(prompt)[0]["generated_text"].strip()
644
-
645
- return result.strip()
646
-
647
- def extract_likely_format(self, question):
648
- """Try to extract the most likely format for the answer based on the question"""
649
- if "date" in question.lower() or "when" in question.lower():
650
- return "2023-09-15"
651
- elif "percentage" in question.lower() or "percent" in question.lower():
652
- return "42%"
653
- elif "number" in question.lower() or "count" in question.lower() or "how many" in question.lower():
654
- return "7"
655
- elif "name" in question.lower() or "who" in question.lower():
656
- return "John Smith"
657
- else:
658
- return "Unknown"
659
 
 
 
660
 
661
- def run_and_submit_all(profile: gr.OAuthProfile | None):
 
662
  """
663
- Fetches all questions, runs the EnhancedAgent on them, submits all answers,
664
  and displays the results.
665
  """
666
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
667
 
668
  if profile:
669
- username = f"{profile.username}"
670
  print(f"User logged in: {username}")
671
  else:
672
  print("User not logged in.")
@@ -676,13 +70,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
676
  questions_url = f"{api_url}/questions"
677
  submit_url = f"{api_url}/submit"
678
 
679
- # 1. Instantiate Agent
680
  try:
681
- agent = EnhancedAgent()
682
  except Exception as e:
683
  print(f"Error instantiating agent: {e}")
684
  return f"Error initializing agent: {e}", None
685
-
686
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
687
  print(agent_code)
688
 
@@ -693,16 +87,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
693
  response.raise_for_status()
694
  questions_data = response.json()
695
  if not questions_data:
696
- print("Fetched questions list is empty.")
697
- return "Fetched questions list is empty or invalid format.", None
698
  print(f"Fetched {len(questions_data)} questions.")
699
  except requests.exceptions.RequestException as e:
700
  print(f"Error fetching questions: {e}")
701
  return f"Error fetching questions: {e}", None
702
  except requests.exceptions.JSONDecodeError as e:
703
- print(f"Error decoding JSON response from questions endpoint: {e}")
704
- print(f"Response text: {response.text[:500]}")
705
- return f"Error decoding server response for questions: {e}", None
706
  except Exception as e:
707
  print(f"An unexpected error occurred fetching questions: {e}")
708
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -718,7 +112,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
718
  print(f"Skipping item with missing task_id or question: {item}")
719
  continue
720
  try:
721
- submitted_answer = agent(question_text, task_id=task_id)
722
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
723
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
724
  except Exception as e:
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
6
 
7
+ # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+ from transformers import pipeline
14
+
15
+ class BasicAgent:
16
  def __init__(self):
17
+ print("Loading flan-t5-base model...")
 
 
 
 
18
  self.pipeline = pipeline(
19
  "text2text-generation",
20
+ model="google/flan-t5-base",
21
+ max_new_tokens=128,
22
+ temperature=0.3
23
+ )
24
+ print("Model loaded.")
25
+
26
+ def __call__(self, question: str) -> str:
27
+ print(f"Received question: {question[:60]}...")
28
+
29
+ few_shot_example = (
30
+ "Question: List just the vegetables from [milk, eggs, carrots, onions, cookies].\n"
31
+ "Answer: carrots, onions\n\n"
32
+ )
33
+
34
+ prompt = (
35
+ few_shot_example +
36
+ "Please solve the following step by step and return only the final answer:\n"
37
+ f"{question}"
38
  )
 
 
 
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  try:
41
+ response = self.pipeline(prompt)[0]["generated_text"]
42
+ if "Answer:" in response:
43
+ answer = response.strip().split("Answer:")[-1].strip().split("\n")[0]
 
 
 
 
 
 
 
44
  else:
45
+ answer = response.strip().split("\n")[0]
 
 
46
  except Exception as e:
47
+ print(f"Model error: {e}")
48
+ answer = "[Error generating answer]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ print(f"Final answer: {answer}")
51
+ return answer
52
 
53
+
54
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  """
56
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
57
  and displays the results.
58
  """
59
+ # --- Determine HF Space Runtime URL and Repo URL ---
60
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
61
 
62
  if profile:
63
+ username= f"{profile.username}"
64
  print(f"User logged in: {username}")
65
  else:
66
  print("User not logged in.")
 
70
  questions_url = f"{api_url}/questions"
71
  submit_url = f"{api_url}/submit"
72
 
73
+ # 1. Instantiate Agent ( modify this part to create your agent)
74
  try:
75
+ agent = BasicAgent()
76
  except Exception as e:
77
  print(f"Error instantiating agent: {e}")
78
  return f"Error initializing agent: {e}", None
79
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
80
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
81
  print(agent_code)
82
 
 
87
  response.raise_for_status()
88
  questions_data = response.json()
89
  if not questions_data:
90
+ print("Fetched questions list is empty.")
91
+ return "Fetched questions list is empty or invalid format.", None
92
  print(f"Fetched {len(questions_data)} questions.")
93
  except requests.exceptions.RequestException as e:
94
  print(f"Error fetching questions: {e}")
95
  return f"Error fetching questions: {e}", None
96
  except requests.exceptions.JSONDecodeError as e:
97
+ print(f"Error decoding JSON response from questions endpoint: {e}")
98
+ print(f"Response text: {response.text[:500]}")
99
+ return f"Error decoding server response for questions: {e}", None
100
  except Exception as e:
101
  print(f"An unexpected error occurred fetching questions: {e}")
102
  return f"An unexpected error occurred fetching questions: {e}", None
 
112
  print(f"Skipping item with missing task_id or question: {item}")
113
  continue
114
  try:
115
+ submitted_answer = agent(question_text)
116
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
117
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
118
  except Exception as e: