File size: 9,393 Bytes
9dfaeea
3d6a2f8
9dfaeea
 
 
04b2ee3
9dfaeea
 
 
 
 
6e44e8b
9dfaeea
 
6c0bb00
b28b51e
 
d978fd2
6c0bb00
9dfaeea
d1a3601
9dfaeea
04b2ee3
9dfaeea
 
fc7df56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e44e8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b28b51e
3dc56c2
b28b51e
3dc56c2
 
 
 
 
9808761
3dc56c2
9808761
3dc56c2
 
 
 
 
 
 
 
 
 
 
b28b51e
3dc56c2
602af35
b28b51e
765b331
fc7df56
602af35
fc7df56
 
 
 
602af35
fc7df56
 
 
 
 
 
 
 
 
 
 
 
602af35
fc7df56
 
 
 
 
 
 
602af35
fc7df56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9808761
04b2ee3
9dfaeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04b2ee3
 
 
 
9dfaeea
 
 
fc7df56
 
 
 
 
 
9dfaeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc72cd9
 
b28b51e
 
 
 
9dfaeea
dc72cd9
04b2ee3
dc72cd9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# Image analyzer tool
import tempfile
# Building search web agent
from llama_index.core import SimpleDirectoryReader
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from tavily import AsyncTavilyClient
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow
import numexpr as ne
from llama_index.llms.openai import OpenAI
import base64
import openai
import os
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
import re
import requests
load_dotenv()

OPEN_AI = os.getenv("OPENAI_API_KEY").strip()
HF_TOKEN = os.environ.get("HF_TOKEN")
TAVILY = os.getenv("TAVILY_KEY")
client = InferenceClient(HF_TOKEN)

def python_interpreter(code: str) -> str:
    """
    Execute Python code and return the output (stdout). 
    Use this when you need to perform complex calculations, string manipulations, 
    or when asked to determine the output of a python script.
    WARNING: The code will be executed in the current environment.
    """
    # Crear un buffer para capturar el print()
    buffer = io.StringIO()
    
    try:
        # Redirigir stdout a nuestro buffer
        with contextlib.redirect_stdout(buffer):
            # Crear un diccionario local para las variables
            local_scope = {}
            exec(code, {}, local_scope)
            
        output = buffer.getvalue()
        if not output and local_scope:
            # Si no hubo prints, devolvemos las variables finales
            return f"Code executed successfully. Variables: {local_scope}"
        return output if output else "Code executed. No output produced."
        
    except Exception as e:
        return f"Error executing code: {str(e)}"

def transcribe_audio_openai(audio_path: str) -> str:
    """Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
    try:
        if not os.path.exists(audio_path):
            return "Error: Audio file not found."
        
        # Verificar que la API key está disponible
        if not OPEN_AI:
            return "Error: OpenAI API key not configured"
        
        # Configurar OpenAI
        openai.api_key = OPEN_AI
        
        with open(audio_path, "rb") as audio_file:
            transcript = openai.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )
        
        return transcript  # Retorna solo el texto transcrito
        
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

def get_youtube_transcript(video_url: str) -> str:
    """Extract transcript from a YouTube video URL."""
    try:
        # Extraer el ID del video
        if "youtu.be/" in video_url:
            video_id = video_url.split("youtu.be/")[1].split("?")[0]
        elif "youtube.com/watch" in video_url:
            video_id = video_url.split("v=")[1].split("&")[0]
        else:
            return "Invalid YouTube URL"
        
        transcript_yt = YouTubeTranscriptApi()

        fetched_transcript = transcript_yt.fetch(video_id)

        transcript = []
        for snippet in fetched_transcript:
            transcript.append(snippet.text)

        text = "\n".join(transcript)
        return text[:3000]  # límite razonable
    
    except Exception as e:
        return f"Unavailable. Error: {str(e)}"
    


def read_document(file_name: str) -> str:
    """
    Downloads a file from the GAIA source if not present.
    - If it's a text file (.txt, .py, .csv, .json, .md), returns the content.
    - If it's a binary file (.xlsx, .pdf, .png, .mp3), returns the file path 
      and instructions to use other tools (like python_interpreter or analyze_image).
    """
    import os
    import requests

    # Limpiar el nombre del archivo (a veces el LLM alucina rutas)
    file_name = os.path.basename(file_name)
    
    # URL oficial de validación de GAIA (Donde viven los archivos reales)
    base_url = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation"
    file_url = f"{base_url}/{file_name}"
    
    # 1. Descarga si no existe
    if not os.path.exists(file_name):
        try:
            print(f"📥 Downloading {file_name} from {file_url}...")
            response = requests.get(file_url)
            if response.status_code == 200:
                with open(file_name, "wb") as f:
                    f.write(response.content)
            else:
                return f"Error: File {file_name} not found in GAIA source (Status {response.status_code})."
        except Exception as e:
            return f"Error downloading file: {str(e)}"
    
    # 2. Decidir cómo leerlo según la extensión
    _, ext = os.path.splitext(file_name)
    ext = ext.lower()
    
    # Lista de archivos que NO se deben leer como texto plano
    binary_extensions = ['.xlsx', '.xls', '.png', '.jpg', '.jpeg', '.mp3', '.wav', '.pdf', '.zip']
    
    if ext in binary_extensions:
        return (f"File '{file_name}' has been downloaded and saved locally. "
                f"It is a binary file ({ext}). DO NOT read it as text. "
                f"Use 'python_interpreter' (with pandas for excel) or 'analyze_image' to process it.")
    
    # 3. Si es texto, leerlo y devolver el contenido
    try:
        with open(file_name, "r", encoding='utf-8', errors='ignore') as file:
            content = file.read()
            # Truncar si es demasiado largo para evitar errores de contexto
            if len(content) > 10000: 
                return content[:10000] + "\n...[Content Truncated]..."
            return content
    except Exception as e:
        return f"Error reading text file: {str(e)}"

   

def analyze_image(image_path: str, question = """ Describe what you see in this image""") -> str:
    """Analyze and extract information from images """
    try:
        if not os.path.exists(image_path):
            return "Error: Image file not found at the specified path."
        
        with open(image_path, "rb") as image_file:
            image_bytes = image_file.read()
        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        # ver el tipo de la imagen
        ext = os.path.splitext(image_path)[1].lower()
        mime_type = {
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.png': 'image/png',
            '.gif': 'image/gif',
            '.webp': 'image/webp'
        }.get(ext, 'image/jpeg')

        from llama_index.core.base.llms.types import ChatMessage, MessageRole

        message = ChatMessage(
            role = MessageRole.USER,
            content = [
                {type: "text", "text": question},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{mime_type};base64,{image_base64}"
                    }
                }
            ]
        )

        llm = OpenAI(api_key=OPEN_AI,model="gpt-4o-mini",temperature=0.7)

        response = llm.chat([message])

        return response.message.content
    
    except Exception as e:
        return f"Error analyzing image: {str(e)}"
    
# Funciòn para calcular expresiones matemáticas
def calculator_numexpr(expression: str) -> str:
    """
    Evaluate expresiones matem'aticas
    """
    try:
        expression = expression.strip()
        
        # Evaluar la expression
        result = ne.evaluate(expression)
        
        if hasattr(result, 'item'):
            result = result.item()
        
        return f"Result: {result}"
        
    except Exception as e:
        return f"Error calculating '{expression}': {str(e)}"

# internet
tool_spec = DuckDuckGoSearchToolSpec()
async def search_web(query: str) -> str:
    """Useful for using the web to answer questions."""
    client = AsyncTavilyClient(api_key=TAVILY)
    return str(await client.search(query))


# Definimos las tool
python_tool = FunctionTool.from_defaults(
    fn=python_interpreter,
    name="python_interpreter",
    description="Executes Python code. Use this to run code found in files or to perform complex logic."
)

image_analyzer_tool = FunctionTool.from_defaults(
        fn = analyze_image,
        name = "analyze_image",
        description = "Analyze image to extract information, identify objects and read text"
    )

calculator_tool = FunctionTool.from_defaults(
    fn=calculator_numexpr,
    name="calculator",
    description="Evaluate mathematical expressions including basic operations (+, -, *, /, **) and functions (sqrt, log, sin, cos, etc.)"
)

read_document_tool = FunctionTool.from_defaults(fn = read_document,
                                                name="read_document",
                                       description = "Read and extract text content from documents including PDF, DOCX, TXT, MD, CSV, and JSON files")

youtube_transcript_tool = FunctionTool.from_defaults(
    fn=get_youtube_transcript,
    name="youtube_transcript",
    description="Get transcript/subtitles from YouTube videos. Use this when you need to know what is said in a YouTube video."
)


search_tool = FunctionTool.from_defaults(
    fn=search_web,
    name="web_search",  # ✅ Nombre explícito
    description="Search the web for current information, facts, and answers to questions"
)