selim-ba commited on
Commit
b1117d6
·
verified ·
1 Parent(s): 3768321

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -31
app.py CHANGED
@@ -34,6 +34,12 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
34
  class SuperSmartAgent:
35
  def __init__(self):
36
  self.graph = self._build_graph()
 
 
 
 
 
 
37
 
38
  def _build_graph(self):
39
  def score_text(text):
@@ -133,10 +139,19 @@ class SuperSmartAgent:
133
  return state
134
 
135
  def preprocess_context(context):
136
- context = re.sub(r'\[\d+\]', '', context) # Remove citations
137
- context = re.sub(r'\s+', ' ', context).strip() # Clean whitespace
 
138
  return context
139
 
 
 
 
 
 
 
 
 
140
  def validate_answer(question, answer):
141
  if "how many" in question.lower():
142
  if not re.search(r'\d+', answer):
@@ -146,51 +161,120 @@ class SuperSmartAgent:
146
  def general_reasoning_qa(state):
147
  question = state["question"]
148
 
149
- # Step 1: Search Wikipedia and gather context
150
- context = ""
151
  try:
152
- wiki_wiki = wikipediaapi.Wikipedia('en')
153
- search_results = wiki_wiki.search(question, results=3) # get top 3 pages
154
 
155
  for title in search_results:
156
- page = wiki_wiki.page(title)
157
  if page.exists():
158
- context += page.text + "\n"
159
- except Exception as e:
160
- state["response"] = f"Error fetching Wikipedia content: {e}"
161
- return state
162
 
163
- if not context:
164
- state["response"] = "Sorry, I couldnt find enough information."
165
- return state
166
 
167
- context = preprocess_context(context)
 
168
 
169
- # Step 2: Use a pre-trained QA model to generate the answer
170
- try:
171
- qa_pipeline = pipeline("question-answering")
172
- result = qa_pipeline(question=question, context=context)
173
- answer = result['answer']
 
 
174
 
175
- if validate_answer(question, answer):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  state["response"] = answer
177
  else:
178
- # Fallback: return a summary if the answer is not validated
179
  try:
180
- page_titles = wikipedia.search(question)
181
- if page_titles:
182
- page = wikipedia.page(page_titles[0])
183
- summary = page.summary
184
- state["response"] = summary
185
  else:
186
- state["response"] = "No relevant Wikipedia article found."
187
- except Exception as e:
188
- state["response"] = f"Error fetching Wikipedia content: {e}"
 
189
  except Exception as e:
190
- state["response"] = f"Error generating answer: {e}"
191
 
192
  return state
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  class AgentState(TypedDict, total=False):
195
  question: str
196
  is_reversed: bool
 
34
  class SuperSmartAgent:
35
  def __init__(self):
36
  self.graph = self._build_graph()
37
+ #---------
38
+ self.wiki_wiki = wikipediaapi.Wikipedia(
39
+ language='en',
40
+ extract_format=wikipediaapi.ExtractFormat.WIKI,
41
+ user_agent='SelimResearchAgent'
42
+ )
43
 
44
  def _build_graph(self):
45
  def score_text(text):
 
139
  return state
140
 
141
  def preprocess_context(context):
142
+ context = re.sub(r'\[\d+\]', '', context)
143
+ context = re.sub(r'\s+', ' ', context).strip()
144
+ context = re.sub(r'\{\|.*?\|\}', '', context, flags=re.DOTALL)
145
  return context
146
 
147
+ def extract_key_phrases(question):
148
+ """Identify important phrases in the question"""
149
+ # Simple implementation: remove stop words and short words
150
+ stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'how', 'what', 'when', 'where', 'who', 'which'}
151
+ words = re.findall(r'\b\w+\b', question.lower())
152
+ key_phrases = [word for word in words if word not in stop_words and len(word) > 2]
153
+ return key_phrases
154
+
155
  def validate_answer(question, answer):
156
  if "how many" in question.lower():
157
  if not re.search(r'\d+', answer):
 
161
  def general_reasoning_qa(state):
162
  question = state["question"]
163
 
164
+ # Step 1: Search Wikipedia for relevant pages
 
165
  try:
166
+ search_results = self.wiki_wiki.search(question, results=3) # Get top 3 pages
167
+ context = ""
168
 
169
  for title in search_results:
170
+ page = self.wiki_wiki.page(title)
171
  if page.exists():
172
+ context += f"\n\n=== Content from: {title} ===\n\n"
173
+ context += page.text
 
 
174
 
175
+ if not context:
176
+ state["response"] = "Sorry, I couldn't find relevant information."
177
+ return state
178
 
179
+ # Preprocess the context
180
+ context = preprocess_context(context)
181
 
182
+ # Step 2: Extract key phrases from the question
183
+ key_phrases = extract_key_phrases(question)
184
+
185
+ # Step 3: Find relevant sections in the context
186
+ relevant_sections = []
187
+ # Split context into sections (simplified approach)
188
+ sections = re.split(r'\n\s*\n', context)
189
 
190
+ for section in sections:
191
+ # Check if section contains any of the key phrases
192
+ if any(phrase.lower() in section.lower() for phrase in key_phrases):
193
+ relevant_sections.append(section)
194
+
195
+ if not relevant_sections:
196
+ state["response"] = "I found information but couldn't identify the most relevant parts."
197
+ return state
198
+
199
+ # Combine relevant sections
200
+ relevant_context = "\n\n".join(relevant_sections)
201
+
202
+ # Step 4: Simple answer extraction based on patterns
203
+ # This is a basic implementation - consider using a proper QA model for better results
204
+ answer = self.extract_answer(question, relevant_context)
205
+ if answer:
206
  state["response"] = answer
207
  else:
208
+ # Fallback to a summary if no specific answer found
209
  try:
210
+ first_page = self.wiki_wiki.page(search_results[0])
211
+ if first_page.exists():
212
+ summary = first_page.summary[:500] + "..." # Limit summary length
213
+ state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
 
214
  else:
215
+ state["response"] = "No relevant information found."
216
+ except:
217
+ state["response"] = "I couldn't find a specific answer in the available information."
218
+
219
  except Exception as e:
220
+ state["response"] = f"An error occurred while searching for information: {str(e)}"
221
 
222
  return state
223
 
224
+ def extract_answer(question, context):
225
+ """Simple heuristic-based answer extraction"""
226
+ # This is a placeholder for more sophisticated answer extraction
227
+ # For demonstration, we'll use some simple pattern matching
228
+
229
+ # If question asks for a count (e.g., "how many")
230
+ if re.search(r'\bhow many\b', question.lower()):
231
+ # Look for numbers in the context
232
+ numbers = re.findall(r'\d+', context)
233
+ if numbers:
234
+ # Return the first number found as a simple approach
235
+ return f"The answer is {numbers[0]}."
236
+
237
+ # If question asks for a date/year (e.g., "when did")
238
+ elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
239
+ # Look for years in the context
240
+ years = re.findall(r'\b(19|20)\d{2}\b', context)
241
+ if years:
242
+ # Return the first year found
243
+ return f"The answer is {years[0]}."
244
+
245
+ # If question asks for a name/person (e.g., "who is")
246
+ elif re.search(r'\bwho (is|was)\b', question.lower()):
247
+ # Look for proper nouns in the context
248
+ names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
249
+ if names:
250
+ # Return the first name found
251
+ return f"The answer is {names[0]}."
252
+
253
+ # If question asks for a definition/explanation (e.g., "what is")
254
+ elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
255
+ # Return the first sentence of the relevant section
256
+ first_sentence = re.search(r'^[^.!?]*[.!?]', context)
257
+ if first_sentence:
258
+ return first_sentence.group(0)
259
+
260
+ # If question asks for a list (e.g., "list of")
261
+ elif re.search(r'\blist of\b', question.lower()):
262
+ # Look for bullet points or numbered lists
263
+ items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
264
+ if items:
265
+ return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
266
+
267
+ # Default case - return a relevant sentence containing question keywords
268
+ key_phrases = extract_key_phrases(question)
269
+ if key_phrases:
270
+ # Find sentences containing the key phrases
271
+ sentences = re.split(r'[.!?]', context)
272
+ for sentence in sentences:
273
+ if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
274
+ return sentence.strip() + "."
275
+
276
+ return None
277
+
278
  class AgentState(TypedDict, total=False):
279
  question: str
280
  is_reversed: bool