File size: 9,393 Bytes
9dfaeea 3d6a2f8 9dfaeea 04b2ee3 9dfaeea 6e44e8b 9dfaeea 6c0bb00 b28b51e d978fd2 6c0bb00 9dfaeea d1a3601 9dfaeea 04b2ee3 9dfaeea fc7df56 6e44e8b b28b51e 3dc56c2 b28b51e 3dc56c2 9808761 3dc56c2 9808761 3dc56c2 b28b51e 3dc56c2 602af35 b28b51e 765b331 fc7df56 602af35 fc7df56 602af35 fc7df56 602af35 fc7df56 602af35 fc7df56 9808761 04b2ee3 9dfaeea 04b2ee3 9dfaeea fc7df56 9dfaeea dc72cd9 b28b51e 9dfaeea dc72cd9 04b2ee3 dc72cd9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | # Image analyzer tool
import tempfile
# Building search web agent
from llama_index.core import SimpleDirectoryReader
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from tavily import AsyncTavilyClient
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow
import numexpr as ne
from llama_index.llms.openai import OpenAI
import base64
import openai
import os
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
import re
import requests
load_dotenv()
OPEN_AI = os.getenv("OPENAI_API_KEY").strip()
HF_TOKEN = os.environ.get("HF_TOKEN")
TAVILY = os.getenv("TAVILY_KEY")
client = InferenceClient(HF_TOKEN)
def python_interpreter(code: str) -> str:
"""
Execute Python code and return the output (stdout).
Use this when you need to perform complex calculations, string manipulations,
or when asked to determine the output of a python script.
WARNING: The code will be executed in the current environment.
"""
# Crear un buffer para capturar el print()
buffer = io.StringIO()
try:
# Redirigir stdout a nuestro buffer
with contextlib.redirect_stdout(buffer):
# Crear un diccionario local para las variables
local_scope = {}
exec(code, {}, local_scope)
output = buffer.getvalue()
if not output and local_scope:
# Si no hubo prints, devolvemos las variables finales
return f"Code executed successfully. Variables: {local_scope}"
return output if output else "Code executed. No output produced."
except Exception as e:
return f"Error executing code: {str(e)}"
def transcribe_audio_openai(audio_path: str) -> str:
"""Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
try:
if not os.path.exists(audio_path):
return "Error: Audio file not found."
# Verificar que la API key está disponible
if not OPEN_AI:
return "Error: OpenAI API key not configured"
# Configurar OpenAI
openai.api_key = OPEN_AI
with open(audio_path, "rb") as audio_file:
transcript = openai.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript # Retorna solo el texto transcrito
except Exception as e:
return f"Error transcribing audio: {str(e)}"
def get_youtube_transcript(video_url: str) -> str:
"""Extract transcript from a YouTube video URL."""
try:
# Extraer el ID del video
if "youtu.be/" in video_url:
video_id = video_url.split("youtu.be/")[1].split("?")[0]
elif "youtube.com/watch" in video_url:
video_id = video_url.split("v=")[1].split("&")[0]
else:
return "Invalid YouTube URL"
transcript_yt = YouTubeTranscriptApi()
fetched_transcript = transcript_yt.fetch(video_id)
transcript = []
for snippet in fetched_transcript:
transcript.append(snippet.text)
text = "\n".join(transcript)
return text[:3000] # límite razonable
except Exception as e:
return f"Unavailable. Error: {str(e)}"
def read_document(file_name: str) -> str:
"""
Downloads a file from the GAIA source if not present.
- If it's a text file (.txt, .py, .csv, .json, .md), returns the content.
- If it's a binary file (.xlsx, .pdf, .png, .mp3), returns the file path
and instructions to use other tools (like python_interpreter or analyze_image).
"""
import os
import requests
# Limpiar el nombre del archivo (a veces el LLM alucina rutas)
file_name = os.path.basename(file_name)
# URL oficial de validación de GAIA (Donde viven los archivos reales)
base_url = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation"
file_url = f"{base_url}/{file_name}"
# 1. Descarga si no existe
if not os.path.exists(file_name):
try:
print(f"📥 Downloading {file_name} from {file_url}...")
response = requests.get(file_url)
if response.status_code == 200:
with open(file_name, "wb") as f:
f.write(response.content)
else:
return f"Error: File {file_name} not found in GAIA source (Status {response.status_code})."
except Exception as e:
return f"Error downloading file: {str(e)}"
# 2. Decidir cómo leerlo según la extensión
_, ext = os.path.splitext(file_name)
ext = ext.lower()
# Lista de archivos que NO se deben leer como texto plano
binary_extensions = ['.xlsx', '.xls', '.png', '.jpg', '.jpeg', '.mp3', '.wav', '.pdf', '.zip']
if ext in binary_extensions:
return (f"File '{file_name}' has been downloaded and saved locally. "
f"It is a binary file ({ext}). DO NOT read it as text. "
f"Use 'python_interpreter' (with pandas for excel) or 'analyze_image' to process it.")
# 3. Si es texto, leerlo y devolver el contenido
try:
with open(file_name, "r", encoding='utf-8', errors='ignore') as file:
content = file.read()
# Truncar si es demasiado largo para evitar errores de contexto
if len(content) > 10000:
return content[:10000] + "\n...[Content Truncated]..."
return content
except Exception as e:
return f"Error reading text file: {str(e)}"
def analyze_image(image_path: str, question = """ Describe what you see in this image""") -> str:
"""Analyze and extract information from images """
try:
if not os.path.exists(image_path):
return "Error: Image file not found at the specified path."
with open(image_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
# ver el tipo de la imagen
ext = os.path.splitext(image_path)[1].lower()
mime_type = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp'
}.get(ext, 'image/jpeg')
from llama_index.core.base.llms.types import ChatMessage, MessageRole
message = ChatMessage(
role = MessageRole.USER,
content = [
{type: "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{image_base64}"
}
}
]
)
llm = OpenAI(api_key=OPEN_AI,model="gpt-4o-mini",temperature=0.7)
response = llm.chat([message])
return response.message.content
except Exception as e:
return f"Error analyzing image: {str(e)}"
# Funciòn para calcular expresiones matemáticas
def calculator_numexpr(expression: str) -> str:
"""
Evaluate expresiones matem'aticas
"""
try:
expression = expression.strip()
# Evaluar la expression
result = ne.evaluate(expression)
if hasattr(result, 'item'):
result = result.item()
return f"Result: {result}"
except Exception as e:
return f"Error calculating '{expression}': {str(e)}"
# internet
tool_spec = DuckDuckGoSearchToolSpec()
async def search_web(query: str) -> str:
"""Useful for using the web to answer questions."""
client = AsyncTavilyClient(api_key=TAVILY)
return str(await client.search(query))
# Definimos las tool
python_tool = FunctionTool.from_defaults(
fn=python_interpreter,
name="python_interpreter",
description="Executes Python code. Use this to run code found in files or to perform complex logic."
)
image_analyzer_tool = FunctionTool.from_defaults(
fn = analyze_image,
name = "analyze_image",
description = "Analyze image to extract information, identify objects and read text"
)
calculator_tool = FunctionTool.from_defaults(
fn=calculator_numexpr,
name="calculator",
description="Evaluate mathematical expressions including basic operations (+, -, *, /, **) and functions (sqrt, log, sin, cos, etc.)"
)
read_document_tool = FunctionTool.from_defaults(fn = read_document,
name="read_document",
description = "Read and extract text content from documents including PDF, DOCX, TXT, MD, CSV, and JSON files")
youtube_transcript_tool = FunctionTool.from_defaults(
fn=get_youtube_transcript,
name="youtube_transcript",
description="Get transcript/subtitles from YouTube videos. Use this when you need to know what is said in a YouTube video."
)
search_tool = FunctionTool.from_defaults(
fn=search_web,
name="web_search", # ✅ Nombre explícito
description="Search the web for current information, facts, and answers to questions"
) |