Final_Agent_HF_Course

Sleeping

App Files Files Community

selim-ba commited on Jun 28, 2025

Commit

c5eb0a4

verified ·

1 Parent(s): b1117d6

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -50

app.py CHANGED Viewed

@@ -31,14 +31,17 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class SuperSmartAgent:
     def __init__(self):
         self.graph = self._build_graph()
-        #---------
         self.wiki_wiki = wikipediaapi.Wikipedia(
             language='en',
             extract_format=wikipediaapi.ExtractFormat.WIKI,
-            user_agent='SelimResearchAgent'
         )
     def _build_graph(self):
@@ -126,11 +129,11 @@ class SuperSmartAgent:
         def search_wikipedia(state):
             question = state["question"]
             try:
                 page_titles = wikipedia.search(question)
                 if not page_titles:
                     state["response"] = "No relevant Wikipedia article found."
                     return state
                 page = wikipedia.page(page_titles[0])
                 summary = page.summary
                 state["response"] = summary
@@ -146,12 +149,11 @@ class SuperSmartAgent:
         def extract_key_phrases(question):
             """Identify important phrases in the question"""
-            # Simple implementation: remove stop words and short words
             stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'how', 'what', 'when', 'where', 'who', 'which'}
             words = re.findall(r'\b\w+\b', question.lower())
             key_phrases = [word for word in words if word not in stop_words and len(word) > 2]
             return key_phrases
         def validate_answer(question, answer):
             if "how many" in question.lower():
                 if not re.search(r'\d+', answer):
@@ -160,17 +162,22 @@ class SuperSmartAgent:
         def general_reasoning_qa(state):
             question = state["question"]
             # Step 1: Search Wikipedia for relevant pages
             try:
-                search_results = self.wiki_wiki.search(question, results=3)  # Get top 3 pages
                 context = ""
                 for title in search_results:
-                    page = self.wiki_wiki.page(title)
-                    if page.exists():
-                        context += f"\n\n=== Content from: {title} ===\n\n"
-                        context += page.text
                 if not context:
                     state["response"] = "Sorry, I couldn't find relevant information."
@@ -184,11 +191,9 @@ class SuperSmartAgent:
                 # Step 3: Find relevant sections in the context
                 relevant_sections = []
-                # Split context into sections (simplified approach)
                 sections = re.split(r'\n\s*\n', context)
                 for section in sections:
-                    # Check if section contains any of the key phrases
                     if any(phrase.lower() in section.lower() for phrase in key_phrases):
                         relevant_sections.append(section)
@@ -200,81 +205,54 @@ class SuperSmartAgent:
                 relevant_context = "\n\n".join(relevant_sections)
                 # Step 4: Simple answer extraction based on patterns
-                # This is a basic implementation - consider using a proper QA model for better results
                 answer = self.extract_answer(question, relevant_context)
                 if answer:
                     state["response"] = answer
                 else:
-                    # Fallback to a summary if no specific answer found
                     try:
-                        first_page = self.wiki_wiki.page(search_results[0])
-                        if first_page.exists():
-                            summary = first_page.summary[:500] + "..."  # Limit summary length
-                            state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
-                        else:
-                            state["response"] = "No relevant information found."
-                    except:
-                        state["response"] = "I couldn't find a specific answer in the available information."
             except Exception as e:
                 state["response"] = f"An error occurred while searching for information: {str(e)}"
             return state
         def extract_answer(question, context):
             """Simple heuristic-based answer extraction"""
-            # This is a placeholder for more sophisticated answer extraction
-            # For demonstration, we'll use some simple pattern matching
-            # If question asks for a count (e.g., "how many")
             if re.search(r'\bhow many\b', question.lower()):
-                # Look for numbers in the context
                 numbers = re.findall(r'\d+', context)
                 if numbers:
-                    # Return the first number found as a simple approach
                     return f"The answer is {numbers[0]}."
-            # If question asks for a date/year (e.g., "when did")
             elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
-                # Look for years in the context
                 years = re.findall(r'\b(19|20)\d{2}\b', context)
                 if years:
-                    # Return the first year found
                     return f"The answer is {years[0]}."
-            # If question asks for a name/person (e.g., "who is")
             elif re.search(r'\bwho (is|was)\b', question.lower()):
-                # Look for proper nouns in the context
                 names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
                 if names:
-                    # Return the first name found
                     return f"The answer is {names[0]}."
-            # If question asks for a definition/explanation (e.g., "what is")
             elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
-                # Return the first sentence of the relevant section
                 first_sentence = re.search(r'^[^.!?]*[.!?]', context)
                 if first_sentence:
                     return first_sentence.group(0)
-            # If question asks for a list (e.g., "list of")
             elif re.search(r'\blist of\b', question.lower()):
-                # Look for bullet points or numbered lists
                 items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
                 if items:
                     return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
-            # Default case - return a relevant sentence containing question keywords
             key_phrases = extract_key_phrases(question)
             if key_phrases:
-                # Find sentences containing the key phrases
                 sentences = re.split(r'[.!?]', context)
                 for sentence in sentences:
                     if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
                         return sentence.strip() + "."
             return None
         class AgentState(TypedDict, total=False):
             question: str
             is_reversed: bool
@@ -333,6 +311,44 @@ class SuperSmartAgent:
         graph = builder.compile()
         return graph
     def __call__(self, question: str) -> str:
         state = {"question": question}
         result = self.graph.invoke(state)

+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class SuperSmartAgent:
     def __init__(self):
         self.graph = self._build_graph()
         self.wiki_wiki = wikipediaapi.Wikipedia(
             language='en',
             extract_format=wikipediaapi.ExtractFormat.WIKI,
+            user_agent='SelimResearchAgent/1.0'
         )
     def _build_graph(self):
         def search_wikipedia(state):
             question = state["question"]
             try:
+                # Use wikipedia library's search instead of wikipediaapi
                 page_titles = wikipedia.search(question)
                 if not page_titles:
                     state["response"] = "No relevant Wikipedia article found."
                     return state
                 page = wikipedia.page(page_titles[0])
                 summary = page.summary
                 state["response"] = summary
         def extract_key_phrases(question):
             """Identify important phrases in the question"""
             stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'how', 'what', 'when', 'where', 'who', 'which'}
             words = re.findall(r'\b\w+\b', question.lower())
             key_phrases = [word for word in words if word not in stop_words and len(word) > 2]
             return key_phrases
         def validate_answer(question, answer):
             if "how many" in question.lower():
                 if not re.search(r'\d+', answer):
         def general_reasoning_qa(state):
             question = state["question"]
             # Step 1: Search Wikipedia for relevant pages
             try:
+                # Use wikipedia library for search functionality
+                search_results = wikipedia.search(question, results=3)
                 context = ""
+                # Use wikipediaapi to get full content for each result
                 for title in search_results:
+                    try:
+                        page = self.wiki_wiki.page(title)
+                        if page.exists():
+                            context += f"\n\n=== Content from: {title} ===\n\n"
+                            context += page.text
+                    except Exception as e:
+                        print(f"Error processing page {title}: {e}")
+                        continue
                 if not context:
                     state["response"] = "Sorry, I couldn't find relevant information."
                 # Step 3: Find relevant sections in the context
                 relevant_sections = []
                 sections = re.split(r'\n\s*\n', context)
                 for section in sections:
                     if any(phrase.lower() in section.lower() for phrase in key_phrases):
                         relevant_sections.append(section)
                 relevant_context = "\n\n".join(relevant_sections)
                 # Step 4: Simple answer extraction based on patterns
                 answer = self.extract_answer(question, relevant_context)
                 if answer:
                     state["response"] = answer
                 else:
                     try:
+                        if search_results:
+                            first_page = self.wiki_wiki.page(search_results[0])
+                            if first_page.exists():
+                                summary = first_page.summary[:500] + "..."  # Limit summary length
+                                state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
+                            else:
+                                state["response"] = "No relevant information found."
+                    except Exception as e:
+                        state["response"] = f"I couldn't find a specific answer in the available information."
             except Exception as e:
                 state["response"] = f"An error occurred while searching for information: {str(e)}"
             return state
         def extract_answer(question, context):
             """Simple heuristic-based answer extraction"""
             if re.search(r'\bhow many\b', question.lower()):
                 numbers = re.findall(r'\d+', context)
                 if numbers:
                     return f"The answer is {numbers[0]}."
             elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
                 years = re.findall(r'\b(19|20)\d{2}\b', context)
                 if years:
                     return f"The answer is {years[0]}."
             elif re.search(r'\bwho (is|was)\b', question.lower()):
                 names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
                 if names:
                     return f"The answer is {names[0]}."
             elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
                 first_sentence = re.search(r'^[^.!?]*[.!?]', context)
                 if first_sentence:
                     return first_sentence.group(0)
             elif re.search(r'\blist of\b', question.lower()):
                 items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
                 if items:
                     return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
             key_phrases = extract_key_phrases(question)
             if key_phrases:
                 sentences = re.split(r'[.!?]', context)
                 for sentence in sentences:
                     if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
                         return sentence.strip() + "."
             return None
         class AgentState(TypedDict, total=False):
             question: str
             is_reversed: bool
         graph = builder.compile()
         return graph
+    def extract_answer(self, question, context):
+        """Simple heuristic-based answer extraction"""
+        # If question asks for a count (e.g., "how many")
+        if re.search(r'\bhow many\b', question.lower()):
+            numbers = re.findall(r'\d+', context)
+            if numbers:
+                return f"The answer is {numbers[0]}."
+        # If question asks for a date/year (e.g., "when did")
+        elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
+            years = re.findall(r'\b(19|20)\d{2}\b', context)
+            if years:
+                return f"The answer is {years[0]}."
+        # If question asks for a name/person (e.g., "who is")
+        elif re.search(r'\bwho (is|was)\b', question.lower()):
+            names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
+            if names:
+                return f"The answer is {names[0]}."
+        # If question asks for a definition/explanation (e.g., "what is")
+        elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
+            first_sentence = re.search(r'^[^.!?]*[.!?]', context)
+            if first_sentence:
+                return first_sentence.group(0)
+        # If question asks for a list (e.g., "list of")
+        elif re.search(r'\blist of\b', question.lower()):
+            items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
+            if items:
+                return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
+        # Default case - return a relevant sentence containing question keywords
+        key_phrases = extract_key_phrases(question)
+        if key_phrases:
+            sentences = re.split(r'[.!?]', context)
+            for sentence in sentences:
+                if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
+                    return sentence.strip() + "."
+        return None
     def __call__(self, question: str) -> str:
         state = {"question": question}
         result = self.graph.invoke(state)