Spaces:

Cmuroc27
/

final_project_agents_course

Sleeping

File size: 9,393 Bytes

9dfaeea
3d6a2f8
9dfaeea
 
 
04b2ee3
9dfaeea
 
 
 
 
6e44e8b
9dfaeea
 
6c0bb00
b28b51e
 
d978fd2
6c0bb00
9dfaeea
d1a3601
9dfaeea
04b2ee3
9dfaeea
 
fc7df56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e44e8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b28b51e
3dc56c2
b28b51e
3dc56c2
 
 
 
 
9808761
3dc56c2
9808761
3dc56c2
 
 
 
 
 
 
 
 
 
 
b28b51e
3dc56c2
602af35
b28b51e
765b331
fc7df56
602af35
fc7df56
 
 
 
602af35
fc7df56
 
 
 
 
 
 
 
 
 
 
 
602af35
fc7df56
 
 
 
 
 
 
602af35
fc7df56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9808761
04b2ee3
9dfaeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04b2ee3
 
 
 
9dfaeea
 
 
fc7df56
 
 
 
 
 
9dfaeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc72cd9
 
b28b51e
 
 
 
9dfaeea
dc72cd9
04b2ee3
dc72cd9

# Image analyzer tool
import tempfile
# Building search web agent
from llama_index.core import SimpleDirectoryReader
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from tavily import AsyncTavilyClient
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow
import numexpr as ne
from llama_index.llms.openai import OpenAI
import base64
import openai
import os
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
import re
import requests
load_dotenv()

OPEN_AI = os.getenv("OPENAI_API_KEY").strip()
HF_TOKEN = os.environ.get("HF_TOKEN")
TAVILY = os.getenv("TAVILY_KEY")
client = InferenceClient(HF_TOKEN)

def python_interpreter(code: str) -> str:
    """
    Execute Python code and return the output (stdout). 
    Use this when you need to perform complex calculations, string manipulations, 
    or when asked to determine the output of a python script.
    WARNING: The code will be executed in the current environment.
    """
    # Crear un buffer para capturar el print()
    buffer = io.StringIO()
    
    try:
        # Redirigir stdout a nuestro buffer
        with contextlib.redirect_stdout(buffer):
            # Crear un diccionario local para las variables
            local_scope = {}
            exec(code, {}, local_scope)
            
        output = buffer.getvalue()
        if not output and local_scope:
            # Si no hubo prints, devolvemos las variables finales
            return f"Code executed successfully. Variables: {local_scope}"
        return output if output else "Code executed. No output produced."
        
    except Exception as e:
        return f"Error executing code: {str(e)}"

def transcribe_audio_openai(audio_path: str) -> str:
    """Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
    try:
        if not os.path.exists(audio_path):
            return "Error: Audio file not found."
        
        # Verificar que la API key está disponible
        if not OPEN_AI:
            return "Error: OpenAI API key not configured"
        
        # Configurar OpenAI
        openai.api_key = OPEN_AI
        
        with open(audio_path, "rb") as audio_file:
            transcript = openai.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )
        
        return transcript  # Retorna solo el texto transcrito
        
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

def get_youtube_transcript(video_url: str) -> str:
    """Extract transcript from a YouTube video URL."""
    try:
        # Extraer el ID del video
        if "youtu.be/" in video_url:
            video_id = video_url.split("youtu.be/")[1].split("?")[0]
        elif "youtube.com/watch" in video_url:
            video_id = video_url.split("v=")[1].split("&")[0]
        else:
            return "Invalid YouTube URL"
        
        transcript_yt = YouTubeTranscriptApi()

        fetched_transcript = transcript_yt.fetch(video_id)

        transcript = []
        for snippet in fetched_transcript:
            transcript.append(snippet.text)

        text = "\n".join(transcript)
        return text[:3000]  # límite razonable
    
    except Exception as e:
        return f"Unavailable. Error: {str(e)}"
    


def read_document(file_name: str) -> str:
    """
    Downloads a file from the GAIA source if not present.
    - If it's a text file (.txt, .py, .csv, .json, .md), returns the content.
    - If it's a binary file (.xlsx, .pdf, .png, .mp3), returns the file path 
      and instructions to use other tools (like python_interpreter or analyze_image).
    """
    import os
    import requests

    # Limpiar el nombre del archivo (a veces el LLM alucina rutas)
    file_name = os.path.basename(file_name)
    
    # URL oficial de validación de GAIA (Donde viven los archivos reales)
    base_url = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation"
    file_url = f"{base_url}/{file_name}"
    
    # 1. Descarga si no existe
    if not os.path.exists(file_name):
        try:
            print(f"📥 Downloading {file_name} from {file_url}...")
            response = requests.get(file_url)
            if response.status_code == 200:
                with open(file_name, "wb") as f:
                    f.write(response.content)
            else:
                return f"Error: File {file_name} not found in GAIA source (Status {response.status_code})."
        except Exception as e:
            return f"Error downloading file: {str(e)}"
    
    # 2. Decidir cómo leerlo según la extensión
    _, ext = os.path.splitext(file_name)
    ext = ext.lower()
    
    # Lista de archivos que NO se deben leer como texto plano
    binary_extensions = ['.xlsx', '.xls', '.png', '.jpg', '.jpeg', '.mp3', '.wav', '.pdf', '.zip']
    
    if ext in binary_extensions:
        return (f"File '{file_name}' has been downloaded and saved locally. "
                f"It is a binary file ({ext}). DO NOT read it as text. "
                f"Use 'python_interpreter' (with pandas for excel) or 'analyze_image' to process it.")
    
    # 3. Si es texto, leerlo y devolver el contenido
    try:
        with open(file_name, "r", encoding='utf-8', errors='ignore') as file:
            content = file.read()
            # Truncar si es demasiado largo para evitar errores de contexto
            if len(content) > 10000: 
                return content[:10000] + "\n...[Content Truncated]..."
            return content
    except Exception as e:
        return f"Error reading text file: {str(e)}"

   

def analyze_image(image_path: str, question = """ Describe what you see in this image""") -> str:
    """Analyze and extract information from images """
    try:
        if not os.path.exists(image_path):
            return "Error: Image file not found at the specified path."
        
        with open(image_path, "rb") as image_file:
            image_bytes = image_file.read()
        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        # ver el tipo de la imagen
        ext = os.path.splitext(image_path)[1].lower()
        mime_type = {
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.png': 'image/png',
            '.gif': 'image/gif',
            '.webp': 'image/webp'
        }.get(ext, 'image/jpeg')

        from llama_index.core.base.llms.types import ChatMessage, MessageRole

        message = ChatMessage(
            role = MessageRole.USER,
            content = [
                {type: "text", "text": question},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{mime_type};base64,{image_base64}"
                    }
                }
            ]
        )

        llm = OpenAI(api_key=OPEN_AI,model="gpt-4o-mini",temperature=0.7)

        response = llm.chat([message])

        return response.message.content
    
    except Exception as e:
        return f"Error analyzing image: {str(e)}"
    
# Funciòn para calcular expresiones matemáticas
def calculator_numexpr(expression: str) -> str:
    """
    Evaluate expresiones matem'aticas
    """
    try:
        expression = expression.strip()
        
        # Evaluar la expression
        result = ne.evaluate(expression)
        
        if hasattr(result, 'item'):
            result = result.item()
        
        return f"Result: {result}"
        
    except Exception as e:
        return f"Error calculating '{expression}': {str(e)}"

# internet
tool_spec = DuckDuckGoSearchToolSpec()
async def search_web(query: str) -> str:
    """Useful for using the web to answer questions."""
    client = AsyncTavilyClient(api_key=TAVILY)
    return str(await client.search(query))


# Definimos las tool
python_tool = FunctionTool.from_defaults(
    fn=python_interpreter,
    name="python_interpreter",
    description="Executes Python code. Use this to run code found in files or to perform complex logic."
)

image_analyzer_tool = FunctionTool.from_defaults(
        fn = analyze_image,
        name = "analyze_image",
        description = "Analyze image to extract information, identify objects and read text"
    )

calculator_tool = FunctionTool.from_defaults(
    fn=calculator_numexpr,
    name="calculator",
    description="Evaluate mathematical expressions including basic operations (+, -, *, /, **) and functions (sqrt, log, sin, cos, etc.)"
)

read_document_tool = FunctionTool.from_defaults(fn = read_document,
                                                name="read_document",
                                       description = "Read and extract text content from documents including PDF, DOCX, TXT, MD, CSV, and JSON files")

youtube_transcript_tool = FunctionTool.from_defaults(
    fn=get_youtube_transcript,
    name="youtube_transcript",
    description="Get transcript/subtitles from YouTube videos. Use this when you need to know what is said in a YouTube video."
)


search_tool = FunctionTool.from_defaults(
    fn=search_web,
    name="web_search",  # ✅ Nombre explícito
    description="Search the web for current information, facts, and answers to questions"
)