selim-ba commited on
Commit
c5eb0a4
·
verified ·
1 Parent(s): b1117d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -50
app.py CHANGED
@@ -31,14 +31,17 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
 
32
 
33
 
 
 
 
 
34
  class SuperSmartAgent:
35
  def __init__(self):
36
  self.graph = self._build_graph()
37
- #---------
38
  self.wiki_wiki = wikipediaapi.Wikipedia(
39
  language='en',
40
  extract_format=wikipediaapi.ExtractFormat.WIKI,
41
- user_agent='SelimResearchAgent'
42
  )
43
 
44
  def _build_graph(self):
@@ -126,11 +129,11 @@ class SuperSmartAgent:
126
  def search_wikipedia(state):
127
  question = state["question"]
128
  try:
 
129
  page_titles = wikipedia.search(question)
130
  if not page_titles:
131
  state["response"] = "No relevant Wikipedia article found."
132
  return state
133
-
134
  page = wikipedia.page(page_titles[0])
135
  summary = page.summary
136
  state["response"] = summary
@@ -146,12 +149,11 @@ class SuperSmartAgent:
146
 
147
  def extract_key_phrases(question):
148
  """Identify important phrases in the question"""
149
- # Simple implementation: remove stop words and short words
150
  stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'how', 'what', 'when', 'where', 'who', 'which'}
151
  words = re.findall(r'\b\w+\b', question.lower())
152
  key_phrases = [word for word in words if word not in stop_words and len(word) > 2]
153
  return key_phrases
154
-
155
  def validate_answer(question, answer):
156
  if "how many" in question.lower():
157
  if not re.search(r'\d+', answer):
@@ -160,17 +162,22 @@ class SuperSmartAgent:
160
 
161
  def general_reasoning_qa(state):
162
  question = state["question"]
163
-
164
  # Step 1: Search Wikipedia for relevant pages
165
  try:
166
- search_results = self.wiki_wiki.search(question, results=3) # Get top 3 pages
 
167
  context = ""
168
 
 
169
  for title in search_results:
170
- page = self.wiki_wiki.page(title)
171
- if page.exists():
172
- context += f"\n\n=== Content from: {title} ===\n\n"
173
- context += page.text
 
 
 
 
174
 
175
  if not context:
176
  state["response"] = "Sorry, I couldn't find relevant information."
@@ -184,11 +191,9 @@ class SuperSmartAgent:
184
 
185
  # Step 3: Find relevant sections in the context
186
  relevant_sections = []
187
- # Split context into sections (simplified approach)
188
  sections = re.split(r'\n\s*\n', context)
189
 
190
  for section in sections:
191
- # Check if section contains any of the key phrases
192
  if any(phrase.lower() in section.lower() for phrase in key_phrases):
193
  relevant_sections.append(section)
194
 
@@ -200,81 +205,54 @@ class SuperSmartAgent:
200
  relevant_context = "\n\n".join(relevant_sections)
201
 
202
  # Step 4: Simple answer extraction based on patterns
203
- # This is a basic implementation - consider using a proper QA model for better results
204
  answer = self.extract_answer(question, relevant_context)
205
  if answer:
206
  state["response"] = answer
207
  else:
208
- # Fallback to a summary if no specific answer found
209
  try:
210
- first_page = self.wiki_wiki.page(search_results[0])
211
- if first_page.exists():
212
- summary = first_page.summary[:500] + "..." # Limit summary length
213
- state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
214
- else:
215
- state["response"] = "No relevant information found."
216
- except:
217
- state["response"] = "I couldn't find a specific answer in the available information."
218
-
219
  except Exception as e:
220
  state["response"] = f"An error occurred while searching for information: {str(e)}"
221
-
222
  return state
223
 
224
  def extract_answer(question, context):
225
  """Simple heuristic-based answer extraction"""
226
- # This is a placeholder for more sophisticated answer extraction
227
- # For demonstration, we'll use some simple pattern matching
228
-
229
- # If question asks for a count (e.g., "how many")
230
  if re.search(r'\bhow many\b', question.lower()):
231
- # Look for numbers in the context
232
  numbers = re.findall(r'\d+', context)
233
  if numbers:
234
- # Return the first number found as a simple approach
235
  return f"The answer is {numbers[0]}."
236
-
237
- # If question asks for a date/year (e.g., "when did")
238
  elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
239
- # Look for years in the context
240
  years = re.findall(r'\b(19|20)\d{2}\b', context)
241
  if years:
242
- # Return the first year found
243
  return f"The answer is {years[0]}."
244
-
245
- # If question asks for a name/person (e.g., "who is")
246
  elif re.search(r'\bwho (is|was)\b', question.lower()):
247
- # Look for proper nouns in the context
248
  names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
249
  if names:
250
- # Return the first name found
251
  return f"The answer is {names[0]}."
252
-
253
- # If question asks for a definition/explanation (e.g., "what is")
254
  elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
255
- # Return the first sentence of the relevant section
256
  first_sentence = re.search(r'^[^.!?]*[.!?]', context)
257
  if first_sentence:
258
  return first_sentence.group(0)
259
-
260
- # If question asks for a list (e.g., "list of")
261
  elif re.search(r'\blist of\b', question.lower()):
262
- # Look for bullet points or numbered lists
263
  items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
264
  if items:
265
  return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
266
-
267
- # Default case - return a relevant sentence containing question keywords
268
  key_phrases = extract_key_phrases(question)
269
  if key_phrases:
270
- # Find sentences containing the key phrases
271
  sentences = re.split(r'[.!?]', context)
272
  for sentence in sentences:
273
  if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
274
  return sentence.strip() + "."
275
-
276
  return None
277
-
278
  class AgentState(TypedDict, total=False):
279
  question: str
280
  is_reversed: bool
@@ -333,6 +311,44 @@ class SuperSmartAgent:
333
  graph = builder.compile()
334
  return graph
335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  def __call__(self, question: str) -> str:
337
  state = {"question": question}
338
  result = self.graph.invoke(state)
 
31
 
32
 
33
 
34
+
35
+ # --- Constants ---
36
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
37
+
38
  class SuperSmartAgent:
39
  def __init__(self):
40
  self.graph = self._build_graph()
 
41
  self.wiki_wiki = wikipediaapi.Wikipedia(
42
  language='en',
43
  extract_format=wikipediaapi.ExtractFormat.WIKI,
44
+ user_agent='SelimResearchAgent/1.0'
45
  )
46
 
47
  def _build_graph(self):
 
129
  def search_wikipedia(state):
130
  question = state["question"]
131
  try:
132
+ # Use wikipedia library's search instead of wikipediaapi
133
  page_titles = wikipedia.search(question)
134
  if not page_titles:
135
  state["response"] = "No relevant Wikipedia article found."
136
  return state
 
137
  page = wikipedia.page(page_titles[0])
138
  summary = page.summary
139
  state["response"] = summary
 
149
 
150
  def extract_key_phrases(question):
151
  """Identify important phrases in the question"""
 
152
  stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'how', 'what', 'when', 'where', 'who', 'which'}
153
  words = re.findall(r'\b\w+\b', question.lower())
154
  key_phrases = [word for word in words if word not in stop_words and len(word) > 2]
155
  return key_phrases
156
+
157
  def validate_answer(question, answer):
158
  if "how many" in question.lower():
159
  if not re.search(r'\d+', answer):
 
162
 
163
  def general_reasoning_qa(state):
164
  question = state["question"]
 
165
  # Step 1: Search Wikipedia for relevant pages
166
  try:
167
+ # Use wikipedia library for search functionality
168
+ search_results = wikipedia.search(question, results=3)
169
  context = ""
170
 
171
+ # Use wikipediaapi to get full content for each result
172
  for title in search_results:
173
+ try:
174
+ page = self.wiki_wiki.page(title)
175
+ if page.exists():
176
+ context += f"\n\n=== Content from: {title} ===\n\n"
177
+ context += page.text
178
+ except Exception as e:
179
+ print(f"Error processing page {title}: {e}")
180
+ continue
181
 
182
  if not context:
183
  state["response"] = "Sorry, I couldn't find relevant information."
 
191
 
192
  # Step 3: Find relevant sections in the context
193
  relevant_sections = []
 
194
  sections = re.split(r'\n\s*\n', context)
195
 
196
  for section in sections:
 
197
  if any(phrase.lower() in section.lower() for phrase in key_phrases):
198
  relevant_sections.append(section)
199
 
 
205
  relevant_context = "\n\n".join(relevant_sections)
206
 
207
  # Step 4: Simple answer extraction based on patterns
 
208
  answer = self.extract_answer(question, relevant_context)
209
  if answer:
210
  state["response"] = answer
211
  else:
 
212
  try:
213
+ if search_results:
214
+ first_page = self.wiki_wiki.page(search_results[0])
215
+ if first_page.exists():
216
+ summary = first_page.summary[:500] + "..." # Limit summary length
217
+ state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
218
+ else:
219
+ state["response"] = "No relevant information found."
220
+ except Exception as e:
221
+ state["response"] = f"I couldn't find a specific answer in the available information."
222
  except Exception as e:
223
  state["response"] = f"An error occurred while searching for information: {str(e)}"
 
224
  return state
225
 
226
  def extract_answer(question, context):
227
  """Simple heuristic-based answer extraction"""
 
 
 
 
228
  if re.search(r'\bhow many\b', question.lower()):
 
229
  numbers = re.findall(r'\d+', context)
230
  if numbers:
 
231
  return f"The answer is {numbers[0]}."
 
 
232
  elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
 
233
  years = re.findall(r'\b(19|20)\d{2}\b', context)
234
  if years:
 
235
  return f"The answer is {years[0]}."
 
 
236
  elif re.search(r'\bwho (is|was)\b', question.lower()):
 
237
  names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
238
  if names:
 
239
  return f"The answer is {names[0]}."
 
 
240
  elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
 
241
  first_sentence = re.search(r'^[^.!?]*[.!?]', context)
242
  if first_sentence:
243
  return first_sentence.group(0)
 
 
244
  elif re.search(r'\blist of\b', question.lower()):
 
245
  items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
246
  if items:
247
  return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
 
 
248
  key_phrases = extract_key_phrases(question)
249
  if key_phrases:
 
250
  sentences = re.split(r'[.!?]', context)
251
  for sentence in sentences:
252
  if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
253
  return sentence.strip() + "."
 
254
  return None
255
+
256
  class AgentState(TypedDict, total=False):
257
  question: str
258
  is_reversed: bool
 
311
  graph = builder.compile()
312
  return graph
313
 
314
+ def extract_answer(self, question, context):
315
+ """Simple heuristic-based answer extraction"""
316
+ # If question asks for a count (e.g., "how many")
317
+ if re.search(r'\bhow many\b', question.lower()):
318
+ numbers = re.findall(r'\d+', context)
319
+ if numbers:
320
+ return f"The answer is {numbers[0]}."
321
+ # If question asks for a date/year (e.g., "when did")
322
+ elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
323
+ years = re.findall(r'\b(19|20)\d{2}\b', context)
324
+ if years:
325
+ return f"The answer is {years[0]}."
326
+ # If question asks for a name/person (e.g., "who is")
327
+ elif re.search(r'\bwho (is|was)\b', question.lower()):
328
+ names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
329
+ if names:
330
+ return f"The answer is {names[0]}."
331
+ # If question asks for a definition/explanation (e.g., "what is")
332
+ elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
333
+ first_sentence = re.search(r'^[^.!?]*[.!?]', context)
334
+ if first_sentence:
335
+ return first_sentence.group(0)
336
+ # If question asks for a list (e.g., "list of")
337
+ elif re.search(r'\blist of\b', question.lower()):
338
+ items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
339
+ if items:
340
+ return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
341
+ # Default case - return a relevant sentence containing question keywords
342
+ key_phrases = extract_key_phrases(question)
343
+ if key_phrases:
344
+ sentences = re.split(r'[.!?]', context)
345
+ for sentence in sentences:
346
+ if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
347
+ return sentence.strip() + "."
348
+ return None
349
+
350
+
351
+
352
  def __call__(self, question: str) -> str:
353
  state = {"question": question}
354
  result = self.graph.invoke(state)