Spaces:

yannis2025
/

Final_Assignment_Template_1

Sleeping

App Files Files Community

yannis2025 commited on May 30, 2025

Commit

d4bb25c

verified ·

1 Parent(s): 2c45ad6

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -162

app.py CHANGED Viewed

@@ -7,40 +7,72 @@ import sympy as sp
 import wikipedia
 from bs4 import BeautifulSoup
 from tenacity import retry, stop_after_attempt, wait_fixed
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        self.api_url = "https://api-inference.huggingface.co/models/google/flan-t5-base"
         self.api_token = os.getenv("HF_TOKEN")
-        print(f"HF_TOKEN: {self.api_token}")
         if not self.api_token:
             raise ValueError("HF_TOKEN environment variable not set.")
         self.headers = {"Authorization": f"Bearer {self.api_token}"}
-        print("BasicAgent initialized with Flan-T5-Base, SymPy, Wikipedia, and DuckDuckGo search.")
     def __call__(self, question: str) -> tuple[str, str]:
         print(f"Processing question: {question}")
         reasoning = []
-        # Check for media-based questions
-        if any(keyword in question.lower() for keyword in ["video", "image", "attached", ".mp3", ".xlsx", "code"]):
-            print("Warning: Media question detected, skipping to LLM due to lack of media access.")
-            reasoning.append("Media question detected; cannot process without media access.")
-        # Step 1: Try math-based questions
-        is_math = bool(re.search(r'[\d+\-*/=]', question.lower())) and any(
-            keyword in question.lower() for keyword in ["calculate", "solve", "equation"]
-        )
-        if is_math:
             try:
-                expr = question.lower()
-                for keyword in ["calculate", "solve"]:
-                    expr = expr.replace(keyword, "").strip()
                 if "=" in expr:
                     left, right = expr.split("=")
                     eq = sp.Eq(sp.sympify(left.strip()), sp.sympify(right.strip()))
@@ -52,184 +84,98 @@ class BasicAgent:
                     concise_answer = str(result)
                     reasoning.append(f"Math Solver: Evaluated '{expr}'. Result: {concise_answer}")
                 if concise_answer != "No solution":
-                    print(f"Returning math answer: {concise_answer}")
                     return concise_answer, "\n".join(reasoning)
             except Exception as e:
-                print(f"Math failed: {e}")
                 reasoning.append(f"Math Solver failed: {e}")
-        # Step 2: Try Wikipedia for factual questions
-        failed_context = ""
-        try:
-            wikipedia.set_lang("en")
-            # Extract key terms: proper nouns, nouns after key verbs
-            words = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b|\b\w+\b', question.lower())
-            key_terms = " ".join([w for w in words if w not in ["what", "is", "the", "of", "in", "on", "at", "by", "for", "how", "many", "who", "where", "when", "if", "this", "that", "?"]][-3:])
-            if not key_terms:
-                key_terms = " ".join(words[-3:])  # Fallback to last 3 words
-            print(f"Searching Wikipedia for: {key_terms}")
-            search_results = wikipedia.search(key_terms, results=1)
-            if not search_results:
-                raise wikipedia.exceptions.PageError("No results")
-            wiki_summary = wikipedia.summary(search_results[0], sentences=5, auto_suggest=True)
-            prompt = (
-                f"Question: {question}\n"
-                f"Context: {wiki_summary}\n"
-                "Provide a concise answer (e.g., a number or short phrase): "
-            )
-            wiki_answer = self._query_llm(prompt)
-            if wiki_answer.startswith("Error"):
-                reasoning.append(f"Wikipedia response: {wiki_answer}")
-                failed_context = wiki_summary
-            else:
-                answer_match = re.search(r"Answer: (.*?)(?:\n|$)", wiki_answer, re.DOTALL)
-                if answer_match:
-                    concise_answer = answer_match.group(1).strip()
-                    reasoning.append(f"Wikipedia: Searched '{key_terms}'. Answer: {concise_answer}")
                 else:
-                    concise_answer = self._extract_concise_answer(wiki_answer)
-                    reasoning.append(f"Wikipedia: Searched '{key_terms}'. Parsed answer: {concise_answer}")
-                print(f"Returning Wikipedia answer: {concise_answer}")
-                return concise_answer, "\n".join(reasoning)
-        except wikipedia.exceptions.DisambiguationError as e:
-            print(f"Wikipedia disambiguation: {e}")
-            reasoning.append(f"Wikipedia: Disambiguation error - {e}")
             try:
-                key_terms = e.options[0]
-                print(f"Retrying Wikipedia with: {key_terms}")
-                wiki_summary = wikipedia.summary(key_terms, sentences=5)
-                failed_context = wiki_summary
                 prompt = (
                     f"Question: {question}\n"
                     f"Context: {wiki_summary}\n"
-                    "Provide a concise answer: "
                 )
                 wiki_answer = self._query_llm(prompt)
                 concise_answer = self._extract_concise_answer(wiki_answer)
-                reasoning.append(f"Wikipedia retry: Searched '{key_terms}'. Answer: {concise_answer}")
-                print(f"Returning Wikipedia retry answer: {concise_answer}")
                 return concise_answer, "\n".join(reasoning)
-            except Exception as e2:
-                print(f"Wikipedia retry failed: {e2}")
-                reasoning.append(f"Wikipedia retry failed: {e2}")
-        except wikipedia.exceptions.PageError:
-            print(f"Wikipedia page not found for: {key_terms}")
-            reasoning.append(f"Wikipedia: Page not found - {key_terms}")
-            try:
-                key_terms = " ".join(words[-3:])
-                print(f"Retrying Wikipedia with: {key_terms}")
-                search_results = wikipedia.search(key_terms, results=1)
-                if search_results:
-                    wiki_summary = wikipedia.summary(search_results[0], sentences=5)
-                    failed_context = wiki_summary
-                    prompt = (
-                        f"Question: {question}\n"
-                        f"Context: {wiki_summary}\n"
-                        "Provide a concise answer: "
-                    )
-                    wiki_answer = self._query_llm(prompt)
-                    concise_answer = self._extract_concise_answer(wiki_answer)
-                    reasoning.append(f"Wikipedia retry: Searched '{key_terms}'. Answer: {concise_answer}")
-                    print(f"Returning Wikipedia retry answer: {concise_answer}")
-                    return concise_answer, "\n".join(reasoning)
-            except Exception as e2:
-                print(f"Wikipedia retry failed: {e2}")
-                reasoning.append(f"Wikipedia retry failed: {e2}")
-        # Step 3: Try web search with DuckDuckGo
         try:
             search_url = f"https://duckduckgo.com/html/?q={question.replace(' ', '+')}"
             response = requests.get(search_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
-            response.raise_for_status()
-            soup = BeautifulSoup(response.text, features="html.parser")
             snippets = [s.text.strip() for s in soup.find_all("a", class_="result__a")[:3]]
             if snippets:
                 prompt = (
                     f"Question: {question}\n"
                     f"Search results: {' '.join(snippets)[:500]}\n"
-                    "Provide a concise answer: "
                 )
                 search_answer = self._query_llm(prompt)
-                if search_answer.startswith("Error"):
-                    reasoning.append(f"Search response: {search_answer}")
-                    failed_context += " " + " ".join(snippets)[:200]
-                else:
-                    answer_match = re.search(r"Answer: (.*?)(?:\n|$)", search_answer, re.DOTALL)
-                    if answer_match:
-                        concise_answer = answer_match.group(1).strip()
-                    else:
-                        concise_answer = self._extract_concise_answer(search_answer)
-                    reasoning.append(f"Search: Searched '{question[:50]}'. Answer: {concise_answer}")
-                    print(f"Returning search answer: {concise_answer}")
-                    return concise_answer, "\n".join(reasoning)
             else:
-                print("No search results found.")
                 reasoning.append("Search: No results found.")
-                simplified_terms = " ".join(words[-3:])
-                search_url = f"https://duckduckgo.com/html/?q={simplified_terms.replace(' ', '+')}"
-                response = requests.get(search_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
-                soup = BeautifulSoup(response.text, features="html.parser")
-                snippets = [s.text.strip() for s in soup.find_all("a", class_="result__a")[:3]]
-                if snippets:
-                    prompt = (
-                        f"Question: {question}\n"
-                        f"Search results: {' '.join(snippets)[:500]}\n"
-                        "Provide a concise answer: "
-                    )
-                    search_answer = self._query_llm(prompt)
-                    concise_answer = self._extract_concise_answer(search_answer)
-                    reasoning.append(f"Search retry: Searched '{simplified_terms}'. Answer: {concise_answer}")
-                    print(f"Returning search retry answer: {concise_answer}")
-                    return concise_answer, "\n".join(reasoning)
-                else:
-                    reasoning.append(f"Search retry failed: No results for '{simplified_terms}'")
         except Exception as e:
-            print(f"Search error: {e}")
             reasoning.append(f"Search failed: {e}")
-        # Step 4: Fallback to LLM with context
-        try:
-            prompt = (
-                f"Question: {question}\n"
-                f"Additional Info: {failed_context[:200]}\n"
-                "Provide a concise answer (e.g., a number or short phrase): "
-            )
-            full_response = self._query_llm(prompt)
-            if full_response.startswith("Error"):
-                print(f"LLM error: {full_response}")
-                reasoning.append(f"LLM failed: {full_response}")
-                return "Unknown", "\n".join(reasoning)
-            answer_match = re.search(r"Answer: (.*?)(?:\n|$)", full_response, re.DOTALL)
-            if answer_match:
-                concise_answer = answer_match.group(1).strip()
-            else:
-                concise_answer = self._extract_concise_answer(full_response)
-            reasoning.append(f"LLM: {full_response[:100]}...")
-            print(f"Returning LLM answer: {concise_answer}")
-            return concise_answer, "\n".join(reasoning)
-        except Exception as e:
-            print(f"LLM error: {e}")
-            return "Unknown", f"LLM failed: {e}"
-    @retry(stop=stop_after_attempt(2), wait=wait_fixed(5))
     def _query_llm(self, prompt: str) -> str:
         try:
             payload = {
-                "inputs": prompt,
-                "parameters": {"max_length": 300, "return_full_text": False}
             }
-            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=15)
             if response.status_code in [402, 429]:
-                print(f"API rate limit: {response.status_code}")
                 return f"Error: Status {response.status_code}"
             response.raise_for_status()
             result = response.json()
-            if isinstance(result, list) and result:
-                return result[0]["generated_text"].strip()
-            print("Invalid API response")
-            return "Error: Invalid API response"
-        except requests.exceptions.RequestException as e:
-            print(f"API error: {e}")
             return f"Error: {str(e)}"
     def _extract_concise_answer(self, response: str) -> str:
@@ -238,13 +184,13 @@ class BasicAgent:
         number_match = re.search(r"\b\d+\.\d+\b|\b\d+\b(?!\.\d)", response)
         if number_match:
             return number_match.group(0)
-        words = response.split()[:5]
-        if len(words) <= 5 and len(" ".join(words)) <= 30:
-            return " ".join(words)
         sentence_end = response.find(".")
-        if sentence_end != -1:
-            return response[:sentence_end].strip()[:30]
-        return response[:30].strip()
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """

 import wikipedia
 from bs4 import BeautifulSoup
 from tenacity import retry, stop_after_attempt, wait_fixed
+import spacy
+from io import StringIO
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Initialize NLP for Question Classification ---
+nlp = spacy.load("en_core_web_sm")
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
+        self.api_url = "https://api-inference.huggingface.co/models/mixtral-8x7b-instruct-v0.1"
         self.api_token = os.getenv("HF_TOKEN")
         if not self.api_token:
             raise ValueError("HF_TOKEN environment variable not set.")
         self.headers = {"Authorization": f"Bearer {self.api_token}"}
+        print("BasicAgent initialized with Mixtral-8x7B, SymPy, Wikipedia, and DuckDuckGo search.")
+    def classify_question(self, question: str) -> str:
+        """Classify question type: math, factual, code, file, or general."""
+        question_lower = question.lower()
+        doc = nlp(question)
+        if any(token.text in ["calculate", "solve", "equation", "sum", "product"] or re.search(r'[\d+\-*/=]', question_lower) for token in doc):
+            return "math"
+        if any(token.text in ["who", "what", "where", "when", "how many"] for token in doc):
+            return "factual"
+        if any(token.text in ["code", "python", "program"] or ".py" in question_lower for token in doc):
+            return "code"
+        if any(ext in question_lower for ext in [".xlsx", ".csv", ".pdf"]):
+            return "file"
+        return "general"
     def __call__(self, question: str) -> tuple[str, str]:
         print(f"Processing question: {question}")
         reasoning = []
+        question_type = self.classify_question(question)
+        reasoning.append(f"Classified as {question_type} question.")
+        # Handle file-based questions (basic CSV parsing if text is provided)
+        if question_type == "file" and (".xlsx" in question.lower() or ".csv" in question.lower()):
+            try:
+                # Assume table data is embedded in question text (simplified)
+                table_match = re.search(r'(\|.*?\|.*?\|.*?\|)', question, re.DOTALL)
+                if table_match:
+                    table_text = table_match.group(1)
+                    df = pd.read_csv(StringIO(table_text.replace("|", ",")), sep=",")
+                    reasoning.append(f"Parsed table: {df.to_dict()}")
+                    prompt = (
+                        f"Question: {question}\n"
+                        f"Table data: {df.to_dict()}\n"
+                        "Provide a concise answer (e.g., a number or short phrase): "
+                    )
+                    answer = self._query_llm(prompt)
+                    concise_answer = self._extract_concise_answer(answer)
+                    reasoning.append(f"File-based answer: {concise_answer}")
+                    return concise_answer, "\n".join(reasoning)
+                else:
+                    reasoning.append("No table data found in question.")
+            except Exception as e:
+                reasoning.append(f"File parsing failed: {e}")
+        # Handle math questions
+        if question_type == "math":
             try:
+                expr = re.sub(r'[^\d+\-*/=().]', ' ', question.lower()).strip()
                 if "=" in expr:
                     left, right = expr.split("=")
                     eq = sp.Eq(sp.sympify(left.strip()), sp.sympify(right.strip()))
                     concise_answer = str(result)
                     reasoning.append(f"Math Solver: Evaluated '{expr}'. Result: {concise_answer}")
                 if concise_answer != "No solution":
                     return concise_answer, "\n".join(reasoning)
             except Exception as e:
                 reasoning.append(f"Math Solver failed: {e}")
+        # Handle code questions
+        if question_type == "code":
+            try:
+                # Extract code snippet if provided
+                code_match = re.search(r'```python\n(.*?)\n```', question, re.DOTALL)
+                if code_match:
+                    code = code_match.group(1)
+                    # Simulate code execution (simplified)
+                    locals_dict = {}
+                    exec(code, {}, locals_dict)
+                    concise_answer = str(list(locals_dict.values())[-1]) if locals_dict else "Unknown"
+                    reasoning.append(f"Code executed: {concise_answer}")
+                    return concise_answer, "\n".join(reasoning)
                 else:
+                    reasoning.append("No executable code found.")
+            except Exception as e:
+                reasoning.append(f"Code execution failed: {e}")
+        # Handle factual questions with Wikipedia
+        if question_type == "factual":
             try:
+                doc = nlp(question)
+                key_terms = " ".join([ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG", "GPE", "DATE"]][:3])
+                if not key_terms:
+                    key_terms = " ".join([token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]][-3:])
+                print(f"Searching Wikipedia for: {key_terms}")
+                wikipedia.set_lang("en")
+                search_results = wikipedia.search(key_terms, results=1)
+                if not search_results:
+                    raise wikipedia.exceptions.PageError("No results")
+                wiki_summary = wikipedia.summary(search_results[0], sentences=5)
                 prompt = (
                     f"Question: {question}\n"
                     f"Context: {wiki_summary}\n"
+                    "Answer in one sentence or a number: "
                 )
                 wiki_answer = self._query_llm(prompt)
                 concise_answer = self._extract_concise_answer(wiki_answer)
+                reasoning.append(f"Wikipedia: Searched '{key_terms}'. Answer: {concise_answer}")
                 return concise_answer, "\n".join(reasoning)
+            except Exception as e:
+                reasoning.append(f"Wikipedia failed: {e}")
+        # Handle general questions with web search
         try:
             search_url = f"https://duckduckgo.com/html/?q={question.replace(' ', '+')}"
             response = requests.get(search_url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
+            soup = BeautifulSoup(response.text, "html.parser")
             snippets = [s.text.strip() for s in soup.find_all("a", class_="result__a")[:3]]
             if snippets:
                 prompt = (
                     f"Question: {question}\n"
                     f"Search results: {' '.join(snippets)[:500]}\n"
+                    "Answer in one sentence or a number: "
                 )
                 search_answer = self._query_llm(prompt)
+                concise_answer = self._extract_concise_answer(search_answer)
+                reasoning.append(f"Search: Searched '{question[:50]}'. Answer: {concise_answer}")
+                return concise_answer, "\n".join(reasoning)
             else:
                 reasoning.append("Search: No results found.")
         except Exception as e:
             reasoning.append(f"Search failed: {e}")
+        # Fallback to LLM with chain-of-thought
+        prompt = (
+            f"Question: {question}\n"
+            "Think step-by-step to answer this question. Provide the final answer in one sentence or a number: "
+        )
+        llm_answer = self._query_llm(prompt)
+        concise_answer = self._extract_concise_answer(llm_answer)
+        reasoning.append(f"LLM fallback: {llm_answer[:100]}...")
+        return concise_answer, "\n".join(reasoning)
+    @retry(stop=stop_after_attempt(3), wait=wait_fixed(5))
     def _query_llm(self, prompt: str) -> str:
         try:
             payload = {
+                "inputs": f"[INST] {prompt} [/INST]",
+                "parameters": {"max_length": 500, "temperature": 0.7, "return_full_text": False}
             }
+            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=20)
             if response.status_code in [402, 429]:
                 return f"Error: Status {response.status_code}"
             response.raise_for_status()
             result = response.json()
+            return result[0]["generated_text"].strip() if isinstance(result, list) else "Error: Invalid API response"
+        except Exception as e:
             return f"Error: {str(e)}"
     def _extract_concise_answer(self, response: str) -> str:
         number_match = re.search(r"\b\d+\.\d+\b|\b\d+\b(?!\.\d)", response)
         if number_match:
             return number_match.group(0)
         sentence_end = response.find(".")
+        if sentence_end != -1 and len(response[:sentence_end]) <= 50:
+            return response[:sentence_end].strip()
+        return response[:50].strip()
+# --- Rest of the code (run_and_submit_all and Gradio interface) remains unchanged ---
+# [Insert the original run_and_submit_all function and Gradio interface code here]
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """