Final_Agent_HF_Course

Sleeping

App Files Files Community

selim-ba commited on Jun 28, 2025

Commit

b1117d6

verified ·

1 Parent(s): 3768321

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -31

app.py CHANGED Viewed

@@ -34,6 +34,12 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class SuperSmartAgent:
     def __init__(self):
         self.graph = self._build_graph()
     def _build_graph(self):
         def score_text(text):
@@ -133,10 +139,19 @@ class SuperSmartAgent:
             return state
         def preprocess_context(context):
-            context = re.sub(r'\[\d+\]', '', context)  # Remove citations
-            context = re.sub(r'\s+', ' ', context).strip()  # Clean whitespace
             return context
         def validate_answer(question, answer):
             if "how many" in question.lower():
                 if not re.search(r'\d+', answer):
@@ -146,51 +161,120 @@ class SuperSmartAgent:
         def general_reasoning_qa(state):
             question = state["question"]
-            # Step 1: Search Wikipedia and gather context
-            context = ""
             try:
-                wiki_wiki = wikipediaapi.Wikipedia('en')
-                search_results = wiki_wiki.search(question, results=3)  # get top 3 pages
                 for title in search_results:
-                    page = wiki_wiki.page(title)
                     if page.exists():
-                        context += page.text + "\n"
-            except Exception as e:
-                state["response"] = f"Error fetching Wikipedia content: {e}"
-                return state
-            if not context:
-                state["response"] = "Sorry, I couldn’t find enough information."
-                return state
-            context = preprocess_context(context)
-            # Step 2: Use a pre-trained QA model to generate the answer
-            try:
-                qa_pipeline = pipeline("question-answering")
-                result = qa_pipeline(question=question, context=context)
-                answer = result['answer']
-                if validate_answer(question, answer):
                     state["response"] = answer
                 else:
-                    # Fallback: return a summary if the answer is not validated
                     try:
-                        page_titles = wikipedia.search(question)
-                        if page_titles:
-                            page = wikipedia.page(page_titles[0])
-                            summary = page.summary
-                            state["response"] = summary
                         else:
-                            state["response"] = "No relevant Wikipedia article found."
-                    except Exception as e:
-                        state["response"] = f"Error fetching Wikipedia content: {e}"
             except Exception as e:
-                state["response"] = f"Error generating answer: {e}"
             return state
         class AgentState(TypedDict, total=False):
             question: str
             is_reversed: bool

 class SuperSmartAgent:
     def __init__(self):
         self.graph = self._build_graph()
+        #---------
+        self.wiki_wiki = wikipediaapi.Wikipedia(
+            language='en',
+            extract_format=wikipediaapi.ExtractFormat.WIKI,
+            user_agent='SelimResearchAgent'
+        )
     def _build_graph(self):
         def score_text(text):
             return state
         def preprocess_context(context):
+            context = re.sub(r'\[\d+\]', '', context)
+            context = re.sub(r'\s+', ' ', context).strip()
+            context = re.sub(r'\{\|.*?\|\}', '', context, flags=re.DOTALL)
             return context
+        def extract_key_phrases(question):
+            """Identify important phrases in the question"""
+            # Simple implementation: remove stop words and short words
+            stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'how', 'what', 'when', 'where', 'who', 'which'}
+            words = re.findall(r'\b\w+\b', question.lower())
+            key_phrases = [word for word in words if word not in stop_words and len(word) > 2]
+            return key_phrases
         def validate_answer(question, answer):
             if "how many" in question.lower():
                 if not re.search(r'\d+', answer):
         def general_reasoning_qa(state):
             question = state["question"]
+            # Step 1: Search Wikipedia for relevant pages
             try:
+                search_results = self.wiki_wiki.search(question, results=3)  # Get top 3 pages
+                context = ""
                 for title in search_results:
+                    page = self.wiki_wiki.page(title)
                     if page.exists():
+                        context += f"\n\n=== Content from: {title} ===\n\n"
+                        context += page.text
+                if not context:
+                    state["response"] = "Sorry, I couldn't find relevant information."
+                    return state
+                # Preprocess the context
+                context = preprocess_context(context)
+                # Step 2: Extract key phrases from the question
+                key_phrases = extract_key_phrases(question)
+                # Step 3: Find relevant sections in the context
+                relevant_sections = []
+                # Split context into sections (simplified approach)
+                sections = re.split(r'\n\s*\n', context)
+                for section in sections:
+                    # Check if section contains any of the key phrases
+                    if any(phrase.lower() in section.lower() for phrase in key_phrases):
+                        relevant_sections.append(section)
+                if not relevant_sections:
+                    state["response"] = "I found information but couldn't identify the most relevant parts."
+                    return state
+                # Combine relevant sections
+                relevant_context = "\n\n".join(relevant_sections)
+                # Step 4: Simple answer extraction based on patterns
+                # This is a basic implementation - consider using a proper QA model for better results
+                answer = self.extract_answer(question, relevant_context)
+                if answer:
                     state["response"] = answer
                 else:
+                    # Fallback to a summary if no specific answer found
                     try:
+                        first_page = self.wiki_wiki.page(search_results[0])
+                        if first_page.exists():
+                            summary = first_page.summary[:500] + "..."  # Limit summary length
+                            state["response"] = f"I couldn't find a specific answer, but here's some relevant information: {summary}"
                         else:
+                            state["response"] = "No relevant information found."
+                    except:
+                        state["response"] = "I couldn't find a specific answer in the available information."
             except Exception as e:
+                state["response"] = f"An error occurred while searching for information: {str(e)}"
             return state
+        def extract_answer(question, context):
+            """Simple heuristic-based answer extraction"""
+            # This is a placeholder for more sophisticated answer extraction
+            # For demonstration, we'll use some simple pattern matching
+            # If question asks for a count (e.g., "how many")
+            if re.search(r'\bhow many\b', question.lower()):
+                # Look for numbers in the context
+                numbers = re.findall(r'\d+', context)
+                if numbers:
+                    # Return the first number found as a simple approach
+                    return f"The answer is {numbers[0]}."
+            # If question asks for a date/year (e.g., "when did")
+            elif re.search(r'\bwhen (did|was|were)\b', question.lower()):
+                # Look for years in the context
+                years = re.findall(r'\b(19|20)\d{2}\b', context)
+                if years:
+                    # Return the first year found
+                    return f"The answer is {years[0]}."
+            # If question asks for a name/person (e.g., "who is")
+            elif re.search(r'\bwho (is|was)\b', question.lower()):
+                # Look for proper nouns in the context
+                names = re.findall(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', context)
+                if names:
+                    # Return the first name found
+                    return f"The answer is {names[0]}."
+            # If question asks for a definition/explanation (e.g., "what is")
+            elif re.search(r'\bwhat (is|are|was|were)\b', question.lower()):
+                # Return the first sentence of the relevant section
+                first_sentence = re.search(r'^[^.!?]*[.!?]', context)
+                if first_sentence:
+                    return first_sentence.group(0)
+            # If question asks for a list (e.g., "list of")
+            elif re.search(r'\blist of\b', question.lower()):
+                # Look for bullet points or numbered lists
+                items = re.findall(r'^\s*[•*-]\s*.*', context, re.MULTILINE)
+                if items:
+                    return "Some relevant items: " + ", ".join([item.strip()[2:] for item in items[:3]]) + "..."
+            # Default case - return a relevant sentence containing question keywords
+            key_phrases = extract_key_phrases(question)
+            if key_phrases:
+                # Find sentences containing the key phrases
+                sentences = re.split(r'[.!?]', context)
+                for sentence in sentences:
+                    if any(phrase.lower() in sentence.lower() for phrase in key_phrases):
+                        return sentence.strip() + "."
+            return None
         class AgentState(TypedDict, total=False):
             question: str
             is_reversed: bool