yannis2025 commited on
Commit
d4bb25c
·
verified ·
1 Parent(s): 2c45ad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -162
app.py CHANGED
@@ -7,40 +7,72 @@ import sympy as sp
7
  import wikipedia
8
  from bs4 import BeautifulSoup
9
  from tenacity import retry, stop_after_attempt, wait_fixed
 
 
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
 
 
 
14
  # --- Basic Agent Definition ---
15
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
  class BasicAgent:
17
  def __init__(self):
18
- self.api_url = "https://api-inference.huggingface.co/models/google/flan-t5-base"
19
  self.api_token = os.getenv("HF_TOKEN")
20
- print(f"HF_TOKEN: {self.api_token}")
21
  if not self.api_token:
22
  raise ValueError("HF_TOKEN environment variable not set.")
23
  self.headers = {"Authorization": f"Bearer {self.api_token}"}
24
- print("BasicAgent initialized with Flan-T5-Base, SymPy, Wikipedia, and DuckDuckGo search.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def __call__(self, question: str) -> tuple[str, str]:
27
  print(f"Processing question: {question}")
28
  reasoning = []
 
 
29
 
30
- # Check for media-based questions
31
- if any(keyword in question.lower() for keyword in ["video", "image", "attached", ".mp3", ".xlsx", "code"]):
32
- print("Warning: Media question detected, skipping to LLM due to lack of media access.")
33
- reasoning.append("Media question detected; cannot process without media access.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Step 1: Try math-based questions
36
- is_math = bool(re.search(r'[\d+\-*/=]', question.lower())) and any(
37
- keyword in question.lower() for keyword in ["calculate", "solve", "equation"]
38
- )
39
- if is_math:
40
  try:
41
- expr = question.lower()
42
- for keyword in ["calculate", "solve"]:
43
- expr = expr.replace(keyword, "").strip()
44
  if "=" in expr:
45
  left, right = expr.split("=")
46
  eq = sp.Eq(sp.sympify(left.strip()), sp.sympify(right.strip()))
@@ -52,184 +84,98 @@ class BasicAgent:
52
  concise_answer = str(result)
53
  reasoning.append(f"Math Solver: Evaluated '{expr}'. Result: {concise_answer}")
54
  if concise_answer != "No solution":
55
- print(f"Returning math answer: {concise_answer}")
56
  return concise_answer, "\n".join(reasoning)
57
  except Exception as e:
58
- print(f"Math failed: {e}")
59
  reasoning.append(f"Math Solver failed: {e}")
60
 
61
- # Step 2: Try Wikipedia for factual questions
62
- failed_context = ""
63
- try:
64
- wikipedia.set_lang("en")
65
- # Extract key terms: proper nouns, nouns after key verbs
66
- words = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b|\b\w+\b', question.lower())
67
- key_terms = " ".join([w for w in words if w not in ["what", "is", "the", "of", "in", "on", "at", "by", "for", "how", "many", "who", "where", "when", "if", "this", "that", "?"]][-3:])
68
- if not key_terms:
69
- key_terms = " ".join(words[-3:]) # Fallback to last 3 words
70
- print(f"Searching Wikipedia for: {key_terms}")
71
- search_results = wikipedia.search(key_terms, results=1)
72
- if not search_results:
73
- raise wikipedia.exceptions.PageError("No results")
74
- wiki_summary = wikipedia.summary(search_results[0], sentences=5, auto_suggest=True)
75
- prompt = (
76
- f"Question: {question}\n"
77
- f"Context: {wiki_summary}\n"
78
- "Provide a concise answer (e.g., a number or short phrase): "
79
- )
80
- wiki_answer = self._query_llm(prompt)
81
- if wiki_answer.startswith("Error"):
82
- reasoning.append(f"Wikipedia response: {wiki_answer}")
83
- failed_context = wiki_summary
84
- else:
85
- answer_match = re.search(r"Answer: (.*?)(?:\n|$)", wiki_answer, re.DOTALL)
86
- if answer_match:
87
- concise_answer = answer_match.group(1).strip()
88
- reasoning.append(f"Wikipedia: Searched '{key_terms}'. Answer: {concise_answer}")
89
  else:
90
- concise_answer = self._extract_concise_answer(wiki_answer)
91
- reasoning.append(f"Wikipedia: Searched '{key_terms}'. Parsed answer: {concise_answer}")
92
- print(f"Returning Wikipedia answer: {concise_answer}")
93
- return concise_answer, "\n".join(reasoning)
94
- except wikipedia.exceptions.DisambiguationError as e:
95
- print(f"Wikipedia disambiguation: {e}")
96
- reasoning.append(f"Wikipedia: Disambiguation error - {e}")
97
  try:
98
- key_terms = e.options[0]
99
- print(f"Retrying Wikipedia with: {key_terms}")
100
- wiki_summary = wikipedia.summary(key_terms, sentences=5)
101
- failed_context = wiki_summary
 
 
 
 
 
 
102
  prompt = (
103
  f"Question: {question}\n"
104
  f"Context: {wiki_summary}\n"
105
- "Provide a concise answer: "
106
  )
107
  wiki_answer = self._query_llm(prompt)
108
  concise_answer = self._extract_concise_answer(wiki_answer)
109
- reasoning.append(f"Wikipedia retry: Searched '{key_terms}'. Answer: {concise_answer}")
110
- print(f"Returning Wikipedia retry answer: {concise_answer}")
111
  return concise_answer, "\n".join(reasoning)
112
- except Exception as e2:
113
- print(f"Wikipedia retry failed: {e2}")
114
- reasoning.append(f"Wikipedia retry failed: {e2}")
115
- except wikipedia.exceptions.PageError:
116
- print(f"Wikipedia page not found for: {key_terms}")
117
- reasoning.append(f"Wikipedia: Page not found - {key_terms}")
118
- try:
119
- key_terms = " ".join(words[-3:])
120
- print(f"Retrying Wikipedia with: {key_terms}")
121
- search_results = wikipedia.search(key_terms, results=1)
122
- if search_results:
123
- wiki_summary = wikipedia.summary(search_results[0], sentences=5)
124
- failed_context = wiki_summary
125
- prompt = (
126
- f"Question: {question}\n"
127
- f"Context: {wiki_summary}\n"
128
- "Provide a concise answer: "
129
- )
130
- wiki_answer = self._query_llm(prompt)
131
- concise_answer = self._extract_concise_answer(wiki_answer)
132
- reasoning.append(f"Wikipedia retry: Searched '{key_terms}'. Answer: {concise_answer}")
133
- print(f"Returning Wikipedia retry answer: {concise_answer}")
134
- return concise_answer, "\n".join(reasoning)
135
- except Exception as e2:
136
- print(f"Wikipedia retry failed: {e2}")
137
- reasoning.append(f"Wikipedia retry failed: {e2}")
138
 
139
- # Step 3: Try web search with DuckDuckGo
140
  try:
141
  search_url = f"https://duckduckgo.com/html/?q={question.replace(' ', '+')}"
142
  response = requests.get(search_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
143
- response.raise_for_status()
144
- soup = BeautifulSoup(response.text, features="html.parser")
145
  snippets = [s.text.strip() for s in soup.find_all("a", class_="result__a")[:3]]
146
  if snippets:
147
  prompt = (
148
  f"Question: {question}\n"
149
  f"Search results: {' '.join(snippets)[:500]}\n"
150
- "Provide a concise answer: "
151
  )
152
  search_answer = self._query_llm(prompt)
153
- if search_answer.startswith("Error"):
154
- reasoning.append(f"Search response: {search_answer}")
155
- failed_context += " " + " ".join(snippets)[:200]
156
- else:
157
- answer_match = re.search(r"Answer: (.*?)(?:\n|$)", search_answer, re.DOTALL)
158
- if answer_match:
159
- concise_answer = answer_match.group(1).strip()
160
- else:
161
- concise_answer = self._extract_concise_answer(search_answer)
162
- reasoning.append(f"Search: Searched '{question[:50]}'. Answer: {concise_answer}")
163
- print(f"Returning search answer: {concise_answer}")
164
- return concise_answer, "\n".join(reasoning)
165
  else:
166
- print("No search results found.")
167
  reasoning.append("Search: No results found.")
168
- simplified_terms = " ".join(words[-3:])
169
- search_url = f"https://duckduckgo.com/html/?q={simplified_terms.replace(' ', '+')}"
170
- response = requests.get(search_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
171
- soup = BeautifulSoup(response.text, features="html.parser")
172
- snippets = [s.text.strip() for s in soup.find_all("a", class_="result__a")[:3]]
173
- if snippets:
174
- prompt = (
175
- f"Question: {question}\n"
176
- f"Search results: {' '.join(snippets)[:500]}\n"
177
- "Provide a concise answer: "
178
- )
179
- search_answer = self._query_llm(prompt)
180
- concise_answer = self._extract_concise_answer(search_answer)
181
- reasoning.append(f"Search retry: Searched '{simplified_terms}'. Answer: {concise_answer}")
182
- print(f"Returning search retry answer: {concise_answer}")
183
- return concise_answer, "\n".join(reasoning)
184
- else:
185
- reasoning.append(f"Search retry failed: No results for '{simplified_terms}'")
186
  except Exception as e:
187
- print(f"Search error: {e}")
188
  reasoning.append(f"Search failed: {e}")
189
 
190
- # Step 4: Fallback to LLM with context
191
- try:
192
- prompt = (
193
- f"Question: {question}\n"
194
- f"Additional Info: {failed_context[:200]}\n"
195
- "Provide a concise answer (e.g., a number or short phrase): "
196
- )
197
- full_response = self._query_llm(prompt)
198
- if full_response.startswith("Error"):
199
- print(f"LLM error: {full_response}")
200
- reasoning.append(f"LLM failed: {full_response}")
201
- return "Unknown", "\n".join(reasoning)
202
- answer_match = re.search(r"Answer: (.*?)(?:\n|$)", full_response, re.DOTALL)
203
- if answer_match:
204
- concise_answer = answer_match.group(1).strip()
205
- else:
206
- concise_answer = self._extract_concise_answer(full_response)
207
- reasoning.append(f"LLM: {full_response[:100]}...")
208
- print(f"Returning LLM answer: {concise_answer}")
209
- return concise_answer, "\n".join(reasoning)
210
- except Exception as e:
211
- print(f"LLM error: {e}")
212
- return "Unknown", f"LLM failed: {e}"
213
 
214
- @retry(stop=stop_after_attempt(2), wait=wait_fixed(5))
215
  def _query_llm(self, prompt: str) -> str:
216
  try:
217
  payload = {
218
- "inputs": prompt,
219
- "parameters": {"max_length": 300, "return_full_text": False}
220
  }
221
- response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=15)
222
  if response.status_code in [402, 429]:
223
- print(f"API rate limit: {response.status_code}")
224
  return f"Error: Status {response.status_code}"
225
  response.raise_for_status()
226
  result = response.json()
227
- if isinstance(result, list) and result:
228
- return result[0]["generated_text"].strip()
229
- print("Invalid API response")
230
- return "Error: Invalid API response"
231
- except requests.exceptions.RequestException as e:
232
- print(f"API error: {e}")
233
  return f"Error: {str(e)}"
234
 
235
  def _extract_concise_answer(self, response: str) -> str:
@@ -238,13 +184,13 @@ class BasicAgent:
238
  number_match = re.search(r"\b\d+\.\d+\b|\b\d+\b(?!\.\d)", response)
239
  if number_match:
240
  return number_match.group(0)
241
- words = response.split()[:5]
242
- if len(words) <= 5 and len(" ".join(words)) <= 30:
243
- return " ".join(words)
244
  sentence_end = response.find(".")
245
- if sentence_end != -1:
246
- return response[:sentence_end].strip()[:30]
247
- return response[:30].strip()
 
 
 
248
 
249
  def run_and_submit_all(profile: gr.OAuthProfile | None):
250
  """
 
7
  import wikipedia
8
  from bs4 import BeautifulSoup
9
  from tenacity import retry, stop_after_attempt, wait_fixed
10
+ import spacy
11
+ from io import StringIO
12
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ # --- Initialize NLP for Question Classification ---
17
+ nlp = spacy.load("en_core_web_sm")
18
+
19
  # --- Basic Agent Definition ---
 
20
  class BasicAgent:
21
  def __init__(self):
22
+ self.api_url = "https://api-inference.huggingface.co/models/mixtral-8x7b-instruct-v0.1"
23
  self.api_token = os.getenv("HF_TOKEN")
 
24
  if not self.api_token:
25
  raise ValueError("HF_TOKEN environment variable not set.")
26
  self.headers = {"Authorization": f"Bearer {self.api_token}"}
27
+ print("BasicAgent initialized with Mixtral-8x7B, SymPy, Wikipedia, and DuckDuckGo search.")
28
+
29
+ def classify_question(self, question: str) -> str:
30
+ """Classify question type: math, factual, code, file, or general."""
31
+ question_lower = question.lower()
32
+ doc = nlp(question)
33
+ if any(token.text in ["calculate", "solve", "equation", "sum", "product"] or re.search(r'[\d+\-*/=]', question_lower) for token in doc):
34
+ return "math"
35
+ if any(token.text in ["who", "what", "where", "when", "how many"] for token in doc):
36
+ return "factual"
37
+ if any(token.text in ["code", "python", "program"] or ".py" in question_lower for token in doc):
38
+ return "code"
39
+ if any(ext in question_lower for ext in [".xlsx", ".csv", ".pdf"]):
40
+ return "file"
41
+ return "general"
42
 
43
  def __call__(self, question: str) -> tuple[str, str]:
44
  print(f"Processing question: {question}")
45
  reasoning = []
46
+ question_type = self.classify_question(question)
47
+ reasoning.append(f"Classified as {question_type} question.")
48
 
49
+ # Handle file-based questions (basic CSV parsing if text is provided)
50
+ if question_type == "file" and (".xlsx" in question.lower() or ".csv" in question.lower()):
51
+ try:
52
+ # Assume table data is embedded in question text (simplified)
53
+ table_match = re.search(r'(\|.*?\|.*?\|.*?\|)', question, re.DOTALL)
54
+ if table_match:
55
+ table_text = table_match.group(1)
56
+ df = pd.read_csv(StringIO(table_text.replace("|", ",")), sep=",")
57
+ reasoning.append(f"Parsed table: {df.to_dict()}")
58
+ prompt = (
59
+ f"Question: {question}\n"
60
+ f"Table data: {df.to_dict()}\n"
61
+ "Provide a concise answer (e.g., a number or short phrase): "
62
+ )
63
+ answer = self._query_llm(prompt)
64
+ concise_answer = self._extract_concise_answer(answer)
65
+ reasoning.append(f"File-based answer: {concise_answer}")
66
+ return concise_answer, "\n".join(reasoning)
67
+ else:
68
+ reasoning.append("No table data found in question.")
69
+ except Exception as e:
70
+ reasoning.append(f"File parsing failed: {e}")
71
 
72
+ # Handle math questions
73
+ if question_type == "math":
 
 
 
74
  try:
75
+ expr = re.sub(r'[^\d+\-*/=().]', ' ', question.lower()).strip()
 
 
76
  if "=" in expr:
77
  left, right = expr.split("=")
78
  eq = sp.Eq(sp.sympify(left.strip()), sp.sympify(right.strip()))
 
84
  concise_answer = str(result)
85
  reasoning.append(f"Math Solver: Evaluated '{expr}'. Result: {concise_answer}")
86
  if concise_answer != "No solution":
 
87
  return concise_answer, "\n".join(reasoning)
88
  except Exception as e:
 
89
  reasoning.append(f"Math Solver failed: {e}")
90
 
91
+ # Handle code questions
92
+ if question_type == "code":
93
+ try:
94
+ # Extract code snippet if provided
95
+ code_match = re.search(r'```python\n(.*?)\n```', question, re.DOTALL)
96
+ if code_match:
97
+ code = code_match.group(1)
98
+ # Simulate code execution (simplified)
99
+ locals_dict = {}
100
+ exec(code, {}, locals_dict)
101
+ concise_answer = str(list(locals_dict.values())[-1]) if locals_dict else "Unknown"
102
+ reasoning.append(f"Code executed: {concise_answer}")
103
+ return concise_answer, "\n".join(reasoning)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  else:
105
+ reasoning.append("No executable code found.")
106
+ except Exception as e:
107
+ reasoning.append(f"Code execution failed: {e}")
108
+
109
+ # Handle factual questions with Wikipedia
110
+ if question_type == "factual":
 
111
  try:
112
+ doc = nlp(question)
113
+ key_terms = " ".join([ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG", "GPE", "DATE"]][:3])
114
+ if not key_terms:
115
+ key_terms = " ".join([token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]][-3:])
116
+ print(f"Searching Wikipedia for: {key_terms}")
117
+ wikipedia.set_lang("en")
118
+ search_results = wikipedia.search(key_terms, results=1)
119
+ if not search_results:
120
+ raise wikipedia.exceptions.PageError("No results")
121
+ wiki_summary = wikipedia.summary(search_results[0], sentences=5)
122
  prompt = (
123
  f"Question: {question}\n"
124
  f"Context: {wiki_summary}\n"
125
+ "Answer in one sentence or a number: "
126
  )
127
  wiki_answer = self._query_llm(prompt)
128
  concise_answer = self._extract_concise_answer(wiki_answer)
129
+ reasoning.append(f"Wikipedia: Searched '{key_terms}'. Answer: {concise_answer}")
 
130
  return concise_answer, "\n".join(reasoning)
131
+ except Exception as e:
132
+ reasoning.append(f"Wikipedia failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ # Handle general questions with web search
135
  try:
136
  search_url = f"https://duckduckgo.com/html/?q={question.replace(' ', '+')}"
137
  response = requests.get(search_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
138
+ soup = BeautifulSoup(response.text, "html.parser")
 
139
  snippets = [s.text.strip() for s in soup.find_all("a", class_="result__a")[:3]]
140
  if snippets:
141
  prompt = (
142
  f"Question: {question}\n"
143
  f"Search results: {' '.join(snippets)[:500]}\n"
144
+ "Answer in one sentence or a number: "
145
  )
146
  search_answer = self._query_llm(prompt)
147
+ concise_answer = self._extract_concise_answer(search_answer)
148
+ reasoning.append(f"Search: Searched '{question[:50]}'. Answer: {concise_answer}")
149
+ return concise_answer, "\n".join(reasoning)
 
 
 
 
 
 
 
 
 
150
  else:
 
151
  reasoning.append("Search: No results found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  except Exception as e:
 
153
  reasoning.append(f"Search failed: {e}")
154
 
155
+ # Fallback to LLM with chain-of-thought
156
+ prompt = (
157
+ f"Question: {question}\n"
158
+ "Think step-by-step to answer this question. Provide the final answer in one sentence or a number: "
159
+ )
160
+ llm_answer = self._query_llm(prompt)
161
+ concise_answer = self._extract_concise_answer(llm_answer)
162
+ reasoning.append(f"LLM fallback: {llm_answer[:100]}...")
163
+ return concise_answer, "\n".join(reasoning)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ @retry(stop=stop_after_attempt(3), wait=wait_fixed(5))
166
  def _query_llm(self, prompt: str) -> str:
167
  try:
168
  payload = {
169
+ "inputs": f"[INST] {prompt} [/INST]",
170
+ "parameters": {"max_length": 500, "temperature": 0.7, "return_full_text": False}
171
  }
172
+ response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=20)
173
  if response.status_code in [402, 429]:
 
174
  return f"Error: Status {response.status_code}"
175
  response.raise_for_status()
176
  result = response.json()
177
+ return result[0]["generated_text"].strip() if isinstance(result, list) else "Error: Invalid API response"
178
+ except Exception as e:
 
 
 
 
179
  return f"Error: {str(e)}"
180
 
181
  def _extract_concise_answer(self, response: str) -> str:
 
184
  number_match = re.search(r"\b\d+\.\d+\b|\b\d+\b(?!\.\d)", response)
185
  if number_match:
186
  return number_match.group(0)
 
 
 
187
  sentence_end = response.find(".")
188
+ if sentence_end != -1 and len(response[:sentence_end]) <= 50:
189
+ return response[:sentence_end].strip()
190
+ return response[:50].strip()
191
+
192
+ # --- Rest of the code (run_and_submit_all and Gradio interface) remains unchanged ---
193
+ # [Insert the original run_and_submit_all function and Gradio interface code here]
194
 
195
  def run_and_submit_all(profile: gr.OAuthProfile | None):
196
  """