Spaces:

Cmuroc27
/

final_project_agents_course

Sleeping

App Files Files Community

final_project_agents_course / tools.py

Cmuroc27

funcion read mejorada

fc7df56 2 months ago

raw

history blame contribute delete

9.39 kB

	# Image analyzer tool
	import tempfile
	# Building search web agent
	from llama_index.core import SimpleDirectoryReader
	from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
	from tavily import AsyncTavilyClient
	from llama_index.core.tools import FunctionTool
	from llama_index.core.agent.workflow import AgentWorkflow
	import numexpr as ne
	from llama_index.llms.openai import OpenAI
	import base64
	import openai
	import os
	from huggingface_hub import InferenceClient
	from dotenv import load_dotenv
	from youtube_transcript_api import YouTubeTranscriptApi
	import re
	import requests
	load_dotenv()

	OPEN_AI = os.getenv("OPENAI_API_KEY").strip()
	HF_TOKEN = os.environ.get("HF_TOKEN")
	TAVILY = os.getenv("TAVILY_KEY")
	client = InferenceClient(HF_TOKEN)

	def python_interpreter(code: str) -> str:
	"""
	Execute Python code and return the output (stdout).
	Use this when you need to perform complex calculations, string manipulations,
	or when asked to determine the output of a python script.
	WARNING: The code will be executed in the current environment.
	"""
	# Crear un buffer para capturar el print()
	buffer = io.StringIO()

	try:
	# Redirigir stdout a nuestro buffer
	with contextlib.redirect_stdout(buffer):
	# Crear un diccionario local para las variables
	local_scope = {}
	exec(code, {}, local_scope)

	output = buffer.getvalue()
	if not output and local_scope:
	# Si no hubo prints, devolvemos las variables finales
	return f"Code executed successfully. Variables: {local_scope}"
	return output if output else "Code executed. No output produced."

	except Exception as e:
	return f"Error executing code: {str(e)}"

	def transcribe_audio_openai(audio_path: str) -> str:
	"""Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
	try:
	if not os.path.exists(audio_path):
	return "Error: Audio file not found."

	# Verificar que la API key está disponible
	if not OPEN_AI:
	return "Error: OpenAI API key not configured"

	# Configurar OpenAI
	openai.api_key = OPEN_AI

	with open(audio_path, "rb") as audio_file:
	transcript = openai.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text"
	)

	return transcript # Retorna solo el texto transcrito

	except Exception as e:
	return f"Error transcribing audio: {str(e)}"

	def get_youtube_transcript(video_url: str) -> str:
	"""Extract transcript from a YouTube video URL."""
	try:
	# Extraer el ID del video
	if "youtu.be/" in video_url:
	video_id = video_url.split("youtu.be/")[1].split("?")[0]
	elif "youtube.com/watch" in video_url:
	video_id = video_url.split("v=")[1].split("&")[0]
	else:
	return "Invalid YouTube URL"

	transcript_yt = YouTubeTranscriptApi()

	fetched_transcript = transcript_yt.fetch(video_id)

	transcript = []
	for snippet in fetched_transcript:
	transcript.append(snippet.text)

	text = "\n".join(transcript)
	return text[:3000] # límite razonable

	except Exception as e:
	return f"Unavailable. Error: {str(e)}"



	def read_document(file_name: str) -> str:
	"""
	Downloads a file from the GAIA source if not present.
	- If it's a text file (.txt, .py, .csv, .json, .md), returns the content.
	- If it's a binary file (.xlsx, .pdf, .png, .mp3), returns the file path
	and instructions to use other tools (like python_interpreter or analyze_image).
	"""
	import os
	import requests

	# Limpiar el nombre del archivo (a veces el LLM alucina rutas)
	file_name = os.path.basename(file_name)

	# URL oficial de validación de GAIA (Donde viven los archivos reales)
	base_url = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation"
	file_url = f"{base_url}/{file_name}"

	# 1. Descarga si no existe
	if not os.path.exists(file_name):
	try:
	print(f"📥 Downloading {file_name} from {file_url}...")
	response = requests.get(file_url)
	if response.status_code == 200:
	with open(file_name, "wb") as f:
	f.write(response.content)
	else:
	return f"Error: File {file_name} not found in GAIA source (Status {response.status_code})."
	except Exception as e:
	return f"Error downloading file: {str(e)}"

	# 2. Decidir cómo leerlo según la extensión
	_, ext = os.path.splitext(file_name)
	ext = ext.lower()

	# Lista de archivos que NO se deben leer como texto plano
	binary_extensions = ['.xlsx', '.xls', '.png', '.jpg', '.jpeg', '.mp3', '.wav', '.pdf', '.zip']

	if ext in binary_extensions:
	return (f"File '{file_name}' has been downloaded and saved locally. "
	f"It is a binary file ({ext}). DO NOT read it as text. "
	f"Use 'python_interpreter' (with pandas for excel) or 'analyze_image' to process it.")

	# 3. Si es texto, leerlo y devolver el contenido
	try:
	with open(file_name, "r", encoding='utf-8', errors='ignore') as file:
	content = file.read()
	# Truncar si es demasiado largo para evitar errores de contexto
	if len(content) > 10000:
	return content[:10000] + "\n...[Content Truncated]..."
	return content
	except Exception as e:
	return f"Error reading text file: {str(e)}"



	def analyze_image(image_path: str, question = """ Describe what you see in this image""") -> str:
	"""Analyze and extract information from images """
	try:
	if not os.path.exists(image_path):
	return "Error: Image file not found at the specified path."

	with open(image_path, "rb") as image_file:
	image_bytes = image_file.read()
	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# ver el tipo de la imagen
	ext = os.path.splitext(image_path)[1].lower()
	mime_type = {
	'.jpg': 'image/jpeg',
	'.jpeg': 'image/jpeg',
	'.png': 'image/png',
	'.gif': 'image/gif',
	'.webp': 'image/webp'
	}.get(ext, 'image/jpeg')

	from llama_index.core.base.llms.types import ChatMessage, MessageRole

	message = ChatMessage(
	role = MessageRole.USER,
	content = [
	{type: "text", "text": question},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:{mime_type};base64,{image_base64}"
	}
	}
	]
	)

	llm = OpenAI(api_key=OPEN_AI,model="gpt-4o-mini",temperature=0.7)

	response = llm.chat([message])

	return response.message.content

	except Exception as e:
	return f"Error analyzing image: {str(e)}"

	# Funciòn para calcular expresiones matemáticas
	def calculator_numexpr(expression: str) -> str:
	"""
	Evaluate expresiones matem'aticas
	"""
	try:
	expression = expression.strip()

	# Evaluar la expression
	result = ne.evaluate(expression)

	if hasattr(result, 'item'):
	result = result.item()

	return f"Result: {result}"

	except Exception as e:
	return f"Error calculating '{expression}': {str(e)}"

	# internet
	tool_spec = DuckDuckGoSearchToolSpec()
	async def search_web(query: str) -> str:
	"""Useful for using the web to answer questions."""
	client = AsyncTavilyClient(api_key=TAVILY)
	return str(await client.search(query))


	# Definimos las tool
	python_tool = FunctionTool.from_defaults(
	fn=python_interpreter,
	name="python_interpreter",
	description="Executes Python code. Use this to run code found in files or to perform complex logic."
	)

	image_analyzer_tool = FunctionTool.from_defaults(
	fn = analyze_image,
	name = "analyze_image",
	description = "Analyze image to extract information, identify objects and read text"
	)

	calculator_tool = FunctionTool.from_defaults(
	fn=calculator_numexpr,
	name="calculator",
	description="Evaluate mathematical expressions including basic operations (+, -, , /, *) and functions (sqrt, log, sin, cos, etc.)"
	)

	read_document_tool = FunctionTool.from_defaults(fn = read_document,
	name="read_document",
	description = "Read and extract text content from documents including PDF, DOCX, TXT, MD, CSV, and JSON files")

	youtube_transcript_tool = FunctionTool.from_defaults(
	fn=get_youtube_transcript,
	name="youtube_transcript",
	description="Get transcript/subtitles from YouTube videos. Use this when you need to know what is said in a YouTube video."
	)


	search_tool = FunctionTool.from_defaults(
	fn=search_web,
	name="web_search", # ✅ Nombre explícito
	description="Search the web for current information, facts, and answers to questions"
	)