my_customisedAgent

Runtime error

App Files Files Community

Toumaima commited on May 5

Commit

9f6cf12

verified ·

1 Parent(s): 704ac65

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -190

app.py CHANGED Viewed

@@ -1,225 +1,88 @@
-import os
-import time
-import moviepy
-import requests
-import whisper
-import gradio as gr
-import pandas as pd
-from duckduckgo_search import DDGS
 from transformers import pipeline
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
-from bs4 import BeautifulSoup
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-        # Initialize Whisper model for video transcription
-        self.whisper_model = whisper.load_model("base")  # You can change the model to `large`, `medium`, etc.
-        self.search_pipeline = pipeline("question-answering")
-        self.nlp_model = pipeline("feature-extraction")  # For semantic similarity (using transformer model)
-        self.ner_pipeline = pipeline("ner", grouped_entities=True)
-    def extract_person_entities(self, text: str) -> list:
-        # Extract named entities (persons) from the text
-        entities = self.ner_pipeline(text[:1000])
-        return [e['word'] for e in entities if e['entity_group'] == 'PER']
-    def extract_wikipedia_nominator(self, search_results: list) -> str:
-        # Check if search result contains Wikipedia nomination info
-        for result in search_results:
-            if "Wikipedia:Featured_article_candidates" in result.get('href', ''):
-                try:
-                    response = requests.get(result['href'], timeout=10)
-                    soup = BeautifulSoup(response.text, 'html.parser')
-                    text = soup.get_text()
-                    for line in text.split("\n"):
-                        if "nominated by" in line.lower():
-                            persons = self.extract_person_entities(line)
-                            return f"Nominated by {persons[0]}" if persons else line.strip()
-                except Exception:
-                    continue
-        return None
-    def score_search_results(self, question: str, search_results: list) -> str:
-        # Calculate semantic similarity and score the search results
-        question_embedding = np.mean(self.nlp_model(question)[0], axis=0)
-        best_score = -1
-        best_answer = None
-        for result in search_results:
-            result_embedding = np.mean(self.nlp_model(result['body'])[0], axis=0)
-            similarity = cosine_similarity([question_embedding], [result_embedding])[0][0]
-            if similarity > best_score:
-                best_score = similarity
-                best_answer = result['body']
-        return best_answer or "No high-confidence answer found."
     def search(self, question: str) -> str:
         try:
             with DDGS() as ddgs:
-                results = list(ddgs.text(question, max_results=5))  # Fetch top 5 results
                 if not results:
                     return "No relevant search results found."
-                # If the question relates to Wikipedia Featured Article nomination, check for nomination
-                if "featured article" in question.lower() and "wikipedia" in question.lower():
-                    nomination_info = self.extract_wikipedia_nominator(results)
-                    if nomination_info:
-                        return nomination_info
-                # Otherwise, return the best search result based on semantic similarity
-                return self.score_search_results(question, results)
         except Exception as e:
             return f"Search error: {e}"
-    def call_whisper(self, video_path: str) -> str:
-        # Transcribe video using Whisper
-        video = moviepy.editor.VideoFileClip(video_path)
-        audio_path = "temp_audio.wav"
-        video.audio.write_audiofile(audio_path)
-        result = self.whisper_model.transcribe(audio_path)
-        return result["text"]
     def __call__(self, question: str, video_path: str = None) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
         if video_path:
             transcription = self.call_whisper(video_path)
-            print(f"Transcribed video text: {transcription[:100]}...")
             return transcription
-        answer = self.search(question)
-        print(f"Agent returning search result: {answer[:100]}...")
-        time.sleep(2)
-        return answer
-# --- Run and Submit All ---
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
-    """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent
-    try:
-        agent = BasicAgent()
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
-    # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-            print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
-        return f"Error decoding server response for questions: {e}", None
-    except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run Agent
-    results_log = []
-    answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        video_link = item.get("video_link")  # Assuming the question contains an optional video link
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
-            continue
-        try:
-            # Pass video_link if available, else just the question text
-            submitted_answer = agent(question_text, video_path=video_link)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---

+import re
+import spacy
 from transformers import pipeline
+from duckduckgo_search import DDGS
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
+import whisper
+import moviepy.editor
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        self.whisper_model = whisper.load_model("base")
+        self.qa_pipeline = pipeline("question-answering")
+        self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")
+        self.embedding_model = pipeline("feature-extraction")
+        self.spacy = spacy.load("en_core_web_sm")
+    def extract_named_entities(self, text):
+        entities = self.ner_pipeline(text)
+        return [e["word"] for e in entities if e["entity_group"] == "PER"]
+    def extract_numbers(self, text):
+        return re.findall(r"\d+", text)
+    def extract_keywords(self, text):
+        doc = self.spacy(text)
+        return [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
+    def call_whisper(self, video_path: str) -> str:
+        video = moviepy.editor.VideoFileClip(video_path)
+        audio_path = "temp_audio.wav"
+        video.audio.write_audiofile(audio_path)
+        result = self.whisper_model.transcribe(audio_path)
+        return result["text"]
     def search(self, question: str) -> str:
         try:
             with DDGS() as ddgs:
+                results = list(ddgs.text(question, max_results=3))
                 if not results:
                     return "No relevant search results found."
+                context = results[0]["body"]
+                return context
         except Exception as e:
             return f"Search error: {e}"
+    def answer_question(self, question: str, context: str) -> str:
+        try:
+            return self.qa_pipeline(question=question, context=context)["answer"]
+        except:
+            return context  # Fallback to context if QA fails
     def __call__(self, question: str, video_path: str = None) -> str:
+        print(f"Agent received question: {question[:60]}...")
         if video_path:
             transcription = self.call_whisper(video_path)
+            print(f"Transcribed video: {transcription[:100]}...")
             return transcription
+        context = self.search(question)
+        answer = self.answer_question(question, context)
+        q_lower = question.lower()
+        # Enhance based on question type
+        if "who" in q_lower:
+            people = self.extract_named_entities(context)
+            return f"👤 Who: {', '.join(people) if people else 'No person found'}\n\n🧠 Answer: {answer}"
+        elif "how many" in q_lower:
+            numbers = self.extract_numbers(context)
+            return f"🔢 How many: {', '.join(numbers) if numbers else 'No numbers found'}\n\n🧠 Answer: {answer}"
+        elif "how" in q_lower:
+            return f"⚙️ How: {answer}"
+        elif "what" in q_lower or "where" in q_lower:
+            keywords = self.extract_keywords(context)
+            return f"🗝️ Keywords: {', '.join(keywords[:5])}\n\n🧠 Answer: {answer}"
+        else:
+            return f"🧠 Answer: {answer}"
 # --- Build Gradio Interface using Blocks ---