my_customisedAgent

Runtime error

App Files Files Community

my_customisedAgent / app.py

Toumaima

Update app.py

ed10d25 verified 8 months ago

raw

history blame

11.5 kB

	import requests
	import string
	import inspect
	import os
	import re
	import spacy
	from transformers import pipeline
	from duckduckgo_search import DDGS
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np
	import whisper
	import moviepy
	import gradio as gr
	import pandas as pd
	from spacy.cli import download
	from transformers import AutoTokenizer, AutoModel
	import torch

	class BasicAgent:
	def __init__(self):
	print("BasicAgent initialized.")
	try:
	self.spacy = spacy.load("en_core_web_sm")
	except OSError:
	download("en_core_web_sm")
	self.spacy = spacy.load("en_core_web_sm")

	self.whisper_model = whisper.load_model("base")
	self.qa_pipeline = pipeline("question-answering", truncation=True, padding=True)
	self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")

	# ✅ FIXED: safer embedding model setup
	self.embedding_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
	self.embedding_model = AutoModel.from_pretrained("bert-base-uncased")

	def split_text_into_chunks(self, text, max_length=512):
	"""Split text into chunks smaller than `max_length` tokens."""
	words = text.split()
	chunks = []
	chunk = []

	for word in words:
	chunk.append(word)
	if len(' '.join(chunk)) > max_length:
	chunks.append(' '.join(chunk[:-1])) # Add the chunk and reset
	chunk = [word]

	if chunk:
	chunks.append(' '.join(chunk)) # Add the final chunk

	return chunks

	def answer_question(self, question: str, context: str) -> str:
	try:
	context_chunks = self.split_text_into_chunks(context, max_length=512)
	answers = []
	for chunk in context_chunks:
	answer = self.qa_pipeline(question=question, context=chunk)["answer"]
	answers.append(answer)

	return " ".join(answers) # Combine answers from chunks
	except Exception as e:
	return f"Error answering question: {e}"

	def extract_named_entities(self, text):
	entities = self.ner_pipeline(text)
	return [e["word"] for e in entities if e["entity_group"] == "PER"]

	def extract_numbers(self, text):
	return re.findall(r"\d+", text)

	def extract_keywords(self, text):
	doc = self.spacy(text)
	return [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]

	def call_whisper(self, video_path: str) -> str:
	video = moviepy.editor.VideoFileClip(video_path)
	audio_path = "temp_audio.wav"
	video.audio.write_audiofile(audio_path)
	result = self.whisper_model.transcribe(audio_path)
	os.remove(audio_path)
	return result["text"]

	def search(self, question: str) -> str:
	try:
	with DDGS() as ddgs:
	results = list(ddgs.text(question, max_results=3))
	if not results:
	return "No relevant search results found."
	context = results[0]["body"]
	return context
	except Exception as e:
	return f"Search error: {e}"

	def answer_question(self, question: str, context: str) -> str:
	try:
	return self.qa_pipeline(question=question, context=context)["answer"]
	except:
	return context # Fallback to context if QA fails

	def handle_logic_riddles(self, question: str) -> str \| None:
	# Normalize the input
	q = question.lower().strip()
	q = q.translate(str.maketrans("", "", string.punctuation)) # remove punctuation
	q = re.sub(r"\s+", " ", q) # normalize multiple spaces

	logic_patterns = [
	{
	"pattern": r"opposite of the word left",
	"answer": "right"
	},
	{
	"pattern": r"what comes after a",
	"answer": "b"
	},
	{
	"pattern": r"first letter of the alphabet",
	"answer": "a"
	},
	{
	"pattern": r"what is the color of the clear sky",
	"answer": "blue"
	},
	{
	"pattern": r"how many sides does a triangle have",
	"answer": "3"
	},
	{
	"pattern": r"how many legs does a spider have",
	"answer": "8"
	},
	{
	"pattern": r"what is 2 \+ 2",
	"answer": "4"
	},
	{
	"pattern": r"what is the opposite of up",
	"answer": "down"
	},
	{
	"pattern": r"if you understand this sentence.opposite.left",
	"answer": "right"
	}
	]

	for item in logic_patterns:
	if re.search(item["pattern"], q, re.IGNORECASE):
	return item["answer"]

	return None


	def solve_riddle(self, riddle: str) -> str:
	"""Fallback riddle solver using QA pipeline with general logic context."""
	riddle_context = (
	"You are a riddle-solving assistant. Try to give a short and logical answer to riddles.\n"
	"Examples:\n"
	"Q: What has keys but can't open locks?\nA: A piano\n"
	"Q: What runs but never walks?\nA: Water\n"
	"Q: What comes once in a minute, twice in a moment, but never in a thousand years?\nA: The letter M\n"
	f"Q: {riddle}\nA:"
	)
	try:
	result = self.qa_pipeline(question=riddle, context=riddle_context)
	return result["answer"]
	except Exception as e:
	return f"Could not solve riddle: {e}"



	def __call__(self, question: str, video_path: str = None) -> str:
	print(f"Agent received question: {question[:60]}...")

	# Handle logic/riddle questions first
	logic_answer = self.handle_logic_riddles(question)
	if logic_answer is not None:
	return f"🧠 Logic Answer: {logic_answer}"
	else:
	riddle_guess = self.solve_riddle(question)
	return f"🤖 Riddle Guess: {riddle_guess}"

	if video_path:
	transcription = self.call_whisper(video_path)
	print(f"Transcribed video: {transcription[:100]}...")
	return transcription

	context = self.search(question)
	answer = self.answer_question(question, context)
	q_lower = question.lower()

	# Enhanced formatting based on question type
	if "who" in q_lower:
	people = self.extract_named_entities(context)
	return f"👤 Who: {', '.join(people) if people else 'No person found'}\n\n🧠 Answer: {answer}"

	elif "how many" in q_lower:
	numbers = self.extract_numbers(context)
	return f"🔢 How many: {', '.join(numbers) if numbers else 'No numbers found'}\n\n🧠 Answer: {answer}"

	elif "how" in q_lower:
	return f"⚙️ How: {answer}"

	elif "what" in q_lower or "where" in q_lower:
	keywords = self.extract_keywords(context)
	return f"🗝️ Keywords: {', '.join(keywords[:5])}\n\n🧠 Answer: {answer}"

	else:
	return f"🧠 Answer: {answer}"

	# --- Submission Function ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	space_id = os.getenv("SPACE_ID")

	if profile:
	username = profile.username
	print(f"User logged in: {username}")
	else:
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	try:
	agent = BasicAgent()
	except Exception as e:
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
	print(f"Agent repo: {agent_code}")

	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	print(f"Fetched {len(questions_data)} questions.")
	except Exception as e:
	return f"Error fetching questions: {e}", None

	results_log = []
	answers_payload = []

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	video_link = item.get("video_link")

	if not task_id or question_text is None:
	continue

	try:
	submitted_answer = agent(question_text, video_path=video_link)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
	except Exception as e:
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})

	if not answers_payload:
	return "No answers were submitted.", pd.DataFrame(results_log)

	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	}

	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"✅ Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})\n"
	f"Message: {result_data.get('message', '')}"
	)
	return final_status, pd.DataFrame(results_log)
	except Exception as e:
	return f"Submission Failed: {e}", pd.DataFrame(results_log)


	# --- Gradio Interface ---
	with gr.Blocks() as demo:
	gr.Markdown("# Basic Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions:
	1. Clone this space and modify the agent logic if desired.
	2. Log in to Hugging Face with the button below.
	3. Click 'Run Evaluation & Submit All Answers' to evaluate and submit your agent.
	---
	Note: This process may take several minutes depending on the number of questions.
	"""
	)

	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)


	if __name__ == "__main__":
	print("-" * 30 + " App Starting " + "-" * 30)
	space_host = os.getenv("SPACE_HOST")
	space_id = os.getenv("SPACE_ID")

	if space_host:
	print(f"✅ SPACE_HOST: {space_host}")
	print(f" → https://{space_host}.hf.space")
	else:
	print("ℹ️ No SPACE_HOST set.")

	if space_id:
	print(f"✅ SPACE_ID: {space_id}")
	print(f" → https://huggingface.co/spaces/{space_id}/tree/main")
	else:
	print("ℹ️ No SPACE_ID set.")

	demo.launch(debug=True, share=False)