New_Final_Assignment

Sleeping

App Files Files Community

New_Final_Assignment / tools.py

naman1102

web

919fd15 8 months ago

raw

history blame contribute delete

16.4 kB

	# tools.py

	import pandas as pd

	from pathlib import Path
	import requests
	import regex as re
	import time
	import os
	from duckduckgo_search import DDGS
	from langchain_core.tools import tool
	from langchain_community.document_loaders import ArxivLoader
	import arxiv
	import fitz # PyMuPDF
	import tempfile

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


	# Removed complex safety wrapper - keeping things simple

	def _download_file_for_task(task_id: str, ext: str) -> str:
	"""
	Helper: attempt to GET the remote file for a given task_id.
	Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
	or an empty string if no file / download failed.
	"""

	print("reached _download_file_for_task")
	os.makedirs("hf_files", exist_ok=True)
	local_path = os.path.join("hf_files", f"{task_id}.{ext}")
	url = f"{DEFAULT_API_URL}/files/{task_id}"

	try:
	resp = requests.get(url, timeout=10)
	if resp.status_code == 200 and resp.content:
	print(f"\n Downloaded file from {url} to {local_path} \n")
	with open(local_path, "wb") as f:
	f.write(resp.content)
	return local_path
	except Exception:
	print(f"Error downloading file from {url} to {local_path}")
	pass

	# If we get here, either 404 or download error
	return ""

	@tool
	def image_tool(task_id: str) -> str:
	"""
	TOOL NAME: Image Analysis Tool

	Purpose: When the user asks about images, photos, or visual content, use this tool to get a description of the image.

	Input: A task_id string that identifies the specific image to analyze.

	Example usage:
	- "What is shown in this image?"
	- "Describe the contents of the picture"
	- "What objects are visible in the photo?"
	"""

	import requests, os

	# Try downloading image with one of the allowed extensions
	for ext in ("png", "jpg", "jpeg"):
	file_path = _download_file_for_task(task_id, ext)
	if file_path and os.path.exists(file_path):
	break
	else:
	return f"Error: Image file for task_id '{task_id}' not found."

	# Read the image bytes
	try:
	with open(file_path, "rb") as f:
	image_bytes = f.read()
	except Exception as e:
	return f"Error reading image: {str(e)}"

	# Load HF token
	hf_token = os.getenv("HF_TOKEN")
	if not hf_token:
	return "Error: HF_TOKEN not set in environment."

	# Use a single reliable model
	model = "Salesforce/blip-image-captioning-base"
	headers = {"Authorization": f"Bearer {hf_token}"}

	try:
	response = requests.post(
	f"https://api-inference.huggingface.co/models/{model}",
	headers=headers,
	files={"file": image_bytes},
	timeout=30
	)
	except Exception as e:
	return f"Error calling HuggingFace API: {e}"

	# Parse response
	if response.status_code != 200:
	return f"Error from model ({model}): {response.status_code} - {response.text}"

	try:
	result = response.json()
	if isinstance(result, list) and result:
	caption = result[0].get("generated_text", "").strip()
	elif isinstance(result, dict):
	caption = result.get("generated_text", "").strip()
	else:
	caption = ""
	except Exception as e:
	return f"Error parsing response: {e}"

	if not caption:
	return "No caption generated by model."

	return f"Image Caption:\n{caption}"




	@tool
	def excel_tool(task_id: str) -> str:
	"""
	TOOL NAME: Excel Data Analysis Tool

	Purpose: When the user asks about data in spreadsheets, tables, or Excel files, use this tool to read and analyze the data.

	Input: A task_id string that identifies the specific Excel file to analyze.

	Example usage:
	- "What data is in this spreadsheet?"
	- "Analyze the Excel file contents"
	- "Show me the data from the table"
	"""
	print("reached excel_tool")
	sheet = "Sheet1"

	local_xlsx = _download_file_for_task(task_id, "xlsx")
	if not local_xlsx or not os.path.exists(local_xlsx):
	return "Error: Excel file not found for this task."

	try:
	xls = pd.ExcelFile(local_xlsx)
	df = pd.read_excel(
	xls,
	sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
	)
	print(f"Excel file read successfully: {str(df.to_dict(orient='records'))}")
	return str(df.to_dict(orient="records"))
	except Exception as e:
	return f"Error reading Excel file: {e}"


	import openai
	@tool
	def audio_transcriber_tool(task_id: str) -> str:
	"""
	TOOL NAME: Audio Transcription Tool

	Purpose: When the user asks about audio files, speech, or wants to know what was said in an audio recording, use this tool.

	Input: A task_id string that identifies the specific audio file to transcribe.

	Example usage:
	- "What is said in this audio file?"
	- "Transcribe the speech from the recording"
	- "Convert the audio to text"
	"""
	print("reached audio_transcriber_tool")


	# Always attempt to download the file, regardless of local existence
	local_audio = ""
	for ext in ("mp3", "wav", "m4a"):
	candidate = _download_file_for_task(task_id, ext)
	if candidate:
	local_audio = candidate
	break

	if not local_audio or not os.path.exists(local_audio):
	print("Error: No audio file found (download failed).")
	return "Error: No audio file found (download failed)."


	# Send to OpenAI Whisper
	try:
	openai.api_key = os.getenv("OPENAI_API_KEY")
	if not openai.api_key:
	raise RuntimeError("OPENAI_API_KEY is not set in environment.")

	with open(local_audio, "rb") as audio_file:
	print("reached openai.audio.transcriptions.create")
	response = openai.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	)
	# print("reached response")
	text = response.text.strip()
	except Exception as e:
	text = f"Error during transcription: {e}"
	print(f"Transcripted as transcript: {text}")
	return text
	# tools.py

	import re
	import requests

	@tool
	def wikipedia_search_tool(wiki_query: str) -> str:
	"""
	TOOL NAME: Wikipedia Search Tool

	Purpose: When the user asks about general knowledge, facts, or wants to know about a specific topic, use this tool.

	Input: A string describing the topic to search for on Wikipedia.

	Example usage:
	- "What is the capital of France?"
	- "Find information about quantum computing"
	- "What is the history of the internet?"
	If no valid wiki_query is provided, returns an empty string.
	"""
	print("reached wikipedia search tool")

	# --- Simple in-memory cache to avoid repeated look-ups in a single session
	if not hasattr(wikipedia_search_tool, "_cache"):
	wikipedia_search_tool._cache = {}

	query = wiki_query.strip()
	if not query:
	return ""

	if query in wikipedia_search_tool._cache:
	print("Returning cached Wikipedia result for query:", query)
	return wikipedia_search_tool._cache[query]

	try:
	# 1) Use the MediaWiki API to search for page titles matching the query
	search_params = {
	"action": "query",
	"list": "search",
	"srsearch": query,
	"format": "json",
	"utf8": 1
	}
	search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
	search_resp.raise_for_status()
	search_data = search_resp.json()

	search_results = search_data.get("query", {}).get("search", [])
	if not search_results:
	msg = f"No Wikipedia page found for '{query}'. [END_OF_SEARCH]"
	wikipedia_search_tool._cache[query] = msg
	return msg

	# 2) Take the first search result's title
	first_title = search_results[0].get("title", "")
	if not first_title:
	msg = "Unexpected format from Wikipedia search. [END_OF_SEARCH]"
	wikipedia_search_tool._cache[query] = msg
	return msg

	# 3) Fetch the page summary for that title via the REST summary endpoint
	title_for_url = requests.utils.requote_uri(first_title)
	summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
	summary_resp = requests.get(summary_url, timeout=10)
	summary_resp.raise_for_status()
	summary_data = summary_resp.json()

	# 4) Extract either the "extract" field or a fallback message
	summary_text = summary_data.get("extract")
	if not summary_text:
	summary_text = summary_data.get("description", "No summary available.")

	result = f"Title: {first_title}\n\n{summary_text}\n\n[END_OF_SEARCH]"
	wikipedia_search_tool._cache[query] = result
	print("Submitted wiki successfully")
	return result

	except requests.exceptions.RequestException as e:
	print("Wikipedia search error: ", e)
	return f"Wikipedia search error: {e} [END_OF_SEARCH]"
	except Exception as e:
	print("Unexpected error in wikipedia_search_tool: ", e)
	return f"Unexpected error in wikipedia_search_tool: {e} [END_OF_SEARCH]"

	@tool
	def arxiv_search_tool(query: str) -> str:
	"""
	TOOL NAME: ArXiv Academic Search Tool

	Purpose: When the user asks for academic research, scientific papers, or technical information, use this tool.

	Input: A string describing the academic topic to search for on ArXiv.

	Example usage:
	- "Find research papers about machine learning"
	- "What are recent studies on climate change?"
	- "Search for papers on quantum computing"
	"""
	print("Reached ArXiv tool, with query = ", query)
	try:
	# Search arXiv for the top result
	search = arxiv.Search(query=query, max_results=1, sort_by=arxiv.SortCriterion.Relevance)
	result = next(search.results(), None)

	if not result:
	print("No arXiv result found")
	return "No results found. [END_OF_SEARCH]"

	# Download PDF
	pdf_url = result.pdf_url
	response = requests.get(pdf_url)
	response.raise_for_status()

	# Save and open PDF
	with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
	tmp.write(response.content)
	tmp.flush()

	doc = fitz.open(tmp.name)
	text = ""
	for page in doc:
	text += page.get_text()

	# Clean and trim text
	text = " ".join(text.split())
	summary = text[:3000] + "..." if len(text) > 3000 else text

	return f"Title: {result.title}\n\nSummary:\n{summary}\n\n[END_OF_SEARCH]"

	except Exception as e:
	return f"Error fetching arXiv content: {e} [END_OF_SEARCH]"


	from langchain_openai import ChatOpenAI
	from langchain.schema import SystemMessage, HumanMessage
	LLM = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.2)

	@tool
	def analyze_code_tool(task_id: str) -> str:
	"""
	TOOL NAME: Code Analysis Tool

	Purpose: When the user asks about code, programming files, or wants to understand what a script does, use this tool.

	Input: A task_id string that identifies the specific code file to analyze.

	Example usage:
	- "What does this Python code do?"
	- "Analyze the code file for bugs"
	- "Explain the functions in this script"
	"""
	print("Reached analyze_code_tool")
	code_txt = ""
	if not task_id:
	code_txt = "No code provided."
	else:
	path = _download_file_for_task(task_id, "py")
	if not path:
	print("Error: .py file not found for this task.")
	return "Error: .py file not found for this task."
	code_txt = Path(path).read_text(encoding="utf-8", errors="ignore")

	lines = code_txt.splitlines()
	code_sample = "\n".join(lines)

	prompt = [
	SystemMessage(content="You are a senior Python code reviewer."),
	HumanMessage(content=(
	"Please analyse the following code. "
	"Summarise what it does, list key functions/classes, "
	"and point out any obvious bugs, performance issues or style problems.\n\n"
	f"```python\n{code_sample}\n```"
	"If you can then find the output of the code and return it in the output."
	))
	]
	return LLM.invoke(prompt).content.strip()



	# ─────────────────────────── Math Tools ───────────────────────────────

	@tool
	def add_tool(a: float, b: float) -> str:
	"""
	TOOL NAME: Addition Tool

	Purpose: When the user asks to add numbers or perform addition calculations, use this tool.

	Input: Two numbers (a and b) to add together.

	Example usage:
	- "What is 25 + 17?"
	- "Add 3.14 and 2.86"
	- "Calculate the sum of 100 and 250"
	"""
	print("Reached add_tool")
	result = a + b
	return f"Addition result: {a} + {b} = {result}"

	@tool
	def subtract_tool(a: float, b: float) -> str:
	"""
	TOOL NAME: Subtraction Tool

	Purpose: When the user asks to subtract numbers or perform subtraction calculations, use this tool.

	Input: Two numbers (a and b) where b is subtracted from a.

	Example usage:
	- "What is 50 - 23?"
	- "Subtract 15.5 from 40.2"
	- "Calculate 1000 minus 347"
	"""
	print("Reached subtract_tool")
	result = a - b
	return f"Subtraction result: {a} - {b} = {result}"

	@tool
	def multiply_tool(a: float, b: float) -> str:
	"""
	TOOL NAME: Multiplication Tool

	Purpose: When the user asks to multiply numbers or perform multiplication calculations, use this tool.

	Input: Two numbers (a and b) to multiply together.

	Example usage:
	- "What is 8 × 7?"
	- "Multiply 12.5 by 4"
	- "Calculate the product of 15 and 20"
	"""
	print("Reached multiply_tool")
	result = a * b
	return f"Multiplication result: {a} × {b} = {result}"

	@tool
	def divide_tool(a: float, b: float) -> str:
	"""
	TOOL NAME: Division Tool

	Purpose: When the user asks to divide numbers or perform division calculations, use this tool.

	Input: Two numbers (a and b) where a is divided by b.

	Example usage:
	- "What is 100 ÷ 4?"
	- "Divide 75 by 3"
	- "Calculate 144 divided by 12"
	"""
	print("Reached divide_tool")
	if b == 0:
	return "Division error: Cannot divide by zero"
	result = a / b
	return f"Division result: {a} ÷ {b} = {result}"

	@tool
	def web_search_tool(query: str) -> str:
	"""
	TOOL NAME: Web Search Tool

	Purpose: When the user asks for current information, recent news, or topics not covered by Wikipedia, use this tool.

	Input: A string describing what to search for on the web.
	"""
	print("reached web_search_tool")

	if not hasattr(web_search_tool, "_cache"):
	web_search_tool._cache = {}

	query = query.strip()
	if not query:
	return "No search query provided."

	if query in web_search_tool._cache:
	print("Returning cached web search result for query:", query)
	return web_search_tool._cache[query]

	ddg = DDGS()
	max_retries = 5
	result_text = ""

	for attempt in range(1, max_retries + 1):
	try:
	result_text = str(ddg.text(query, max_results=5))
	except Exception as e:
	if attempt < max_retries:
	print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
	time.sleep(4)
	continue
	else:
	return f"Error during DuckDuckGo search: {e} [END_OF_SEARCH]"

	if "202 Ratelimit" in result_text:
	if attempt < max_retries:
	print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
	time.sleep(4)
	continue
	else:
	break
	break # Successful

	result_text += "\n\n[END_OF_SEARCH]"
	web_search_tool._cache[query] = result_text
	print("Submitted web search successfully")
	return result_text