Csuarezg commited on
Commit
c17fa18
·
verified ·
1 Parent(s): 165ff1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -73
app.py CHANGED
@@ -3,33 +3,102 @@ import gradio as gr
3
  import requests
4
  import pandas as pd
5
  import re
6
- import tempfile
7
  import pytesseract
8
- from PIL import Image
9
- from typing import Dict, List, Optional, TypedDict, Annotated
10
- from langgraph.graph import StateGraph, END
11
- from langgraph.checkpoint.memory import MemorySaver
12
- from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
13
- from langchain_openai import ChatOpenAI
14
- from langgraph.prebuilt import ToolNode, tools_condition
15
- from langchain_community.tools.tavily_search import TavilySearchResults
16
- from youtube_transcript_api import YouTubeTranscriptApi
17
- from langchain_core.tools import tool
18
  import yt_dlp
19
  import cv2
20
  import numpy as np
21
  import speech_recognition as sr
 
 
 
 
 
 
 
 
 
22
 
23
- # ================ Configuración Global ================
24
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
- USERNAME = "Csuarezg" # Cambiar por tu usuario
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  AGENT_CODE = "gaia_agent_v1"
27
- SYSTEM_PROMPT = """[Insertar tu system prompt completo aquí]"""
28
 
29
- # ================ Definición de Herramientas con @tool ================
30
  @tool
31
  def wikipedia_tool(query: str) -> str:
32
- """Busca en Wikipedia información enciclopédica."""
 
 
 
 
 
 
 
33
  try:
34
  import wikipedia
35
  wikipedia.set_lang("en")
@@ -39,17 +108,31 @@ def wikipedia_tool(query: str) -> str:
39
 
40
  @tool
41
  def youtube_transcript_tool(url: str) -> str:
42
- """Obtiene el transcript de un video de YouTube."""
 
 
 
 
 
 
 
43
  try:
44
  video_id = re.findall(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)[0]
45
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
46
  return " ".join([entry['text'] for entry in transcript[:5]])
47
  except Exception as e:
48
- return f"Error de transcript: {str(e)}"
49
 
50
  @tool
51
  def file_analyzer_tool(file_path: str) -> str:
52
- """Analiza archivos de imagen y texto."""
 
 
 
 
 
 
 
53
  try:
54
  if file_path.endswith(('.png', '.jpg', '.jpeg')):
55
  img = Image.open(file_path)
@@ -57,110 +140,135 @@ def file_analyzer_tool(file_path: str) -> str:
57
  return f"Texto detectado: {text[:500]}..." if text else "Sin texto"
58
  return "Formato no soportado"
59
  except Exception as e:
60
- return f"Error análisis de archivo: {str(e)}"
61
 
62
  @tool
63
  def web_search_tool(query: str) -> str:
64
- """Realiza búsquedas web en tiempo real."""
 
 
 
 
 
 
 
65
  try:
66
  tavily = TavilySearchResults(api_key=os.getenv("TAVILY_API_KEY"), max_results=3)
67
  results = tavily.invoke(query)
68
  return "\n".join([f"{res['title']}: {res['content']}" for res in results])
69
  except Exception as e:
70
- return f"Error de búsqueda: {str(e)}"
71
 
72
- # ================ Clase del Agente ================
73
  class GaiaAgent:
74
  def __init__(self):
75
- self.tools = [
76
- wikipedia_tool,
77
- youtube_transcript_tool,
78
- file_analyzer_tool,
79
- web_search_tool
80
- ]
81
- self.agent = self._create_agent()
82
-
83
- def _create_agent(self):
84
- llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
85
- model = llm.bind_tools(self.tools)
86
 
87
  def agent_node(state):
88
- messages = state['messages']
89
- if not any(isinstance(m, SystemMessage) for m in messages):
90
- messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
91
- response = model.invoke(messages)
92
  return {"messages": [response]}
93
 
94
- tool_node = ToolNode(self.tools)
95
-
96
- workflow = StateGraph(AgentState)
97
  workflow.add_node("agent", agent_node)
98
- workflow.add_node("tools", tool_node)
 
99
  workflow.set_entry_point("agent")
100
  workflow.add_conditional_edges(
101
  "agent",
102
- lambda x: "tools" if x["messages"][-1].tool_calls else END,
103
  )
104
  workflow.add_edge("tools", "agent")
105
 
106
  return workflow.compile()
107
-
108
- def process_question(self, question: str) -> str:
109
  try:
110
- response = self.agent.invoke({"messages": [HumanMessage(content=question)]})
111
- last_message = response["messages"][-1].content
112
- return self._extract_final_answer(last_message)
 
 
113
  except Exception as e:
114
- return f"Error del agente: {str(e)}"
115
-
116
  def _extract_final_answer(self, text: str) -> str:
117
  match = re.search(r"FINAL ANSWER:\s*(.*)", text, re.IGNORECASE)
118
  return match.group(1).strip() if match else text
119
 
120
- # ================ Integración con Gradio ================
121
- def run_evaluation(profile: gr.OAuthProfile | None):
 
 
122
  if not profile:
123
  return "Por favor inicia sesión primero", None
124
-
125
  try:
126
  agent = GaiaAgent()
127
- response = requests.get(f"{DEFAULT_API_URL}/questions")
128
- questions = response.json()
 
 
129
 
130
  answers = []
131
- results = []
132
- for q in questions:
133
- answer = agent.process_question(q["question"])
134
- answers.append({"task_id": q["task_id"], "submitted_answer": answer})
135
- results.append({"Pregunta": q["task_id"], "Respuesta": answer})
136
-
137
- submission = {
 
 
 
 
 
 
 
138
  "username": USERNAME,
139
  "agent_code": AGENT_CODE,
140
  "answers": answers
141
  }
142
 
143
- submit_response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission)
144
  submit_response.raise_for_status()
145
 
146
- return f"Éxito! Puntaje: {submit_response.json().get('score', 0)}", pd.DataFrame(results)
 
 
 
 
 
 
 
147
 
148
  except Exception as e:
149
  return f"Error crítico: {str(e)}", pd.DataFrame()
150
 
151
- # ================ Interfaz de Usuario ================
152
- with gr.Blocks() as interface:
153
  gr.Markdown("# GAIA Agent - Evaluación Completa")
154
- login_btn = gr.LoginButton()
155
- run_btn = gr.Button("Ejecutar Evaluación Completa")
156
- status_output = gr.Textbox(label="Estado de la Ejecución", interactive=False)
157
- results_table = gr.DataFrame(label="Resultados Detallados")
 
 
 
 
 
 
 
158
 
159
  run_btn.click(
160
- fn=run_evaluation,
161
- inputs=[],
162
  outputs=[status_output, results_table]
163
  )
164
 
165
  if __name__ == "__main__":
166
- interface.launch(server_name="0.0.0.0", server_port=7860)
 
3
  import requests
4
  import pandas as pd
5
  import re
 
6
  import pytesseract
 
 
 
 
 
 
 
 
 
 
7
  import yt_dlp
8
  import cv2
9
  import numpy as np
10
  import speech_recognition as sr
11
+ from PIL import Image
12
+ from typing import List, Dict
13
+ from langchain_core.tools import tool
14
+ from langchain_openai import ChatOpenAI
15
+ from langchain_community.tools.tavily_search import TavilySearchResults
16
+ from youtube_transcript_api import YouTubeTranscriptApi
17
+ from langgraph.graph import StateGraph, END
18
+ from langgraph.checkpoint.memory import MemorySaver
19
+ from langchain_core.messages import HumanMessage, SystemMessage
20
 
21
+ # ================ CONSTANTES ================
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
+ SYSTEM_PROMPT = SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
24
+ CRITICAL ANSWER FORMAT RULES:
25
+ # - ALWAYS end with: FINAL ANSWER: [answer]
26
+ # - READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
27
+ SPECIFIC FORMATTING BY QUESTION TYPE:
28
+ # - Numbers: ONLY the number, no units, no text
29
+ # Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
30
+ # - First name only: ONLY the first name
31
+ # Example: If person is "John Smith" → "FINAL ANSWER: John"
32
+ # - Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
33
+ # Example: If asked for IOC country code → "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
34
+ # - When asked for a specific type of identifier (code, abbreviation, symbol):
35
+ # Give ONLY that identifier, strip all explanatory text, brackets, or full names
36
+ # - Lists/Sets: Exactly as requested format
37
+ # Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
38
+ CRITICAL TOOL SELECTION:
39
+ # - Wikipedia questions → wikipedia_tool ONLY
40
+ # - File questions → file_analyzer_tool FIRST to inspect contents, then reason based on structure
41
+ # - Current events → web_search_tool ONLY
42
+ # - Mathematical analysis/calculations → wolfram_alpha_tool or python_repl_tool ONLY
43
+ # - Tables, matrices, systematic checking → python_repl_tool ONLY
44
+ FOR MATHEMATICAL PROBLEMS:
45
+ # ALWAYS use python_repl_tool when:
46
+ # - Analyzing mathematical tables or matrices
47
+ # - Checking properties like commutativity, associativity
48
+ # - Systematic verification of mathematical statements
49
+ # - Complex calculations that need precision
50
+ # - ANY problem involving tables, sets, or systematic checking
51
+ MATHEMATICAL ANALYSIS PROCESS:
52
+ # 1. Use python_repl_tool to parse data systematically
53
+ # 2. Write code to check ALL cases (don't rely on manual inspection)
54
+ # 3. Collect results programmatically
55
+ # 4. Verify your logic with multiple approaches
56
+ # 5. Format answer exactly as requested
57
+ # Example for commutativity checking:
58
+ # - Parse the operation table into a data structure
59
+ # - Check ALL pairs (x,y) to see if x*y = y*x
60
+ # - Collect ALL elements involved in ANY counter-example
61
+ # - Return in requested format (e.g., comma-separated, alphabetical)
62
+ FILE HANDLING:
63
+ # - You HAVE the ability to read and analyze uploaded files
64
+ # - ALWAYS use file_analyzer_tool when questions mention files
65
+ # - The tool automatically finds and analyzes Excel, CSV, images, and audio files
66
+ # - For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
67
+ # - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
68
+ # - Example: "The attached Excel file..." → Use file_analyzer_tool immediately
69
+ SPECIAL CASES TO HANDLE:
70
+ # - If the question appears reversed or encoded, decode it first.
71
+ # - If the question includes an instruction (e.g., "write the opposite of..."), follow the instruction precisely.
72
+ # - DO NOT repeat or paraphrase the question in your answer.
73
+ # - NEVER answer with the full sentence unless explicitly asked to.
74
+ # - If the decoded question asks for a word, give ONLY the word, in the required format.
75
+ REASONING PROCESS:
76
+ # 1. Carefully read what the question is asking for
77
+ # 2. Identify if it needs systematic/mathematical analysis
78
+ # 3. Use appropriate tool (python_repl_tool for math problems)
79
+ # 4. Extract ONLY the specific part requested
80
+ # 5. Format according to the rules above
81
+ # 6. For file questions:
82
+ # a. First use file_analyzer_tool to inspect column names, types, and sample data
83
+ # b. Identify relevant columns based on the question
84
+ # c. Reason using the data (e.g., by counting, filtering, or identifying patterns)
85
+ # d. Only use python_repl_tool if additional computation is necessary
86
+ # 7. If the Wikipedia tool is used but fails to provide an answer (no relevant entry or content), automatically attempt a web search using the same query or a refined version of it
87
+ """
88
+ USERNAME = "Csuarezg"
89
  AGENT_CODE = "gaia_agent_v1"
 
90
 
91
+ # ================ HERRAMIENTAS ================
92
  @tool
93
  def wikipedia_tool(query: str) -> str:
94
+ """Busca información enciclopédica en Wikipedia. Útil para datos históricos, biografías y conceptos científicos.
95
+
96
+ Args:
97
+ query: Término de búsqueda específico (ej. 'Teoría de la relatividad')
98
+
99
+ Returns:
100
+ Resumen conciso del tema en 3 oraciones.
101
+ """
102
  try:
103
  import wikipedia
104
  wikipedia.set_lang("en")
 
108
 
109
  @tool
110
  def youtube_transcript_tool(url: str) -> str:
111
+ """Obtiene el transcript de videos de YouTube. Útil para analizar diálogos o contenido hablado.
112
+
113
+ Args:
114
+ url: Enlace completo del video (ej. 'https://youtu.be/VIDEO_ID')
115
+
116
+ Returns:
117
+ Primera parte del transcript (primeros 30 segundos).
118
+ """
119
  try:
120
  video_id = re.findall(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)[0]
121
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
122
  return " ".join([entry['text'] for entry in transcript[:5]])
123
  except Exception as e:
124
+ return f"Error transcript: {str(e)}"
125
 
126
  @tool
127
  def file_analyzer_tool(file_path: str) -> str:
128
+ """Analiza archivos (imágenes, audio) usando OCR y visión por computadora.
129
+
130
+ Args:
131
+ file_path: Ruta al archivo en el sistema
132
+
133
+ Returns:
134
+ Texto extraído o análisis de contenido multimedia.
135
+ """
136
  try:
137
  if file_path.endswith(('.png', '.jpg', '.jpeg')):
138
  img = Image.open(file_path)
 
140
  return f"Texto detectado: {text[:500]}..." if text else "Sin texto"
141
  return "Formato no soportado"
142
  except Exception as e:
143
+ return f"Error análisis archivo: {str(e)}"
144
 
145
  @tool
146
  def web_search_tool(query: str) -> str:
147
+ """Realiza búsquedas web en tiempo real. Útil para información actualizada.
148
+
149
+ Args:
150
+ query: Término de búsqueda con contexto
151
+
152
+ Returns:
153
+ 3 resultados relevantes con fuentes.
154
+ """
155
  try:
156
  tavily = TavilySearchResults(api_key=os.getenv("TAVILY_API_KEY"), max_results=3)
157
  results = tavily.invoke(query)
158
  return "\n".join([f"{res['title']}: {res['content']}" for res in results])
159
  except Exception as e:
160
+ return f"Error búsqueda: {str(e)}"
161
 
162
+ # ================ AGENTE PRINCIPAL ================
163
  class GaiaAgent:
164
  def __init__(self):
165
+ self.tools = [wikipedia_tool, youtube_transcript_tool, file_analyzer_tool, web_search_tool]
166
+ self.llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
167
+ self.workflow = self._build_workflow()
168
+ self.recognizer = sr.Recognizer()
169
+
170
+ def _build_workflow(self):
171
+ workflow = StateGraph(AgentState)
 
 
 
 
172
 
173
  def agent_node(state):
174
+ messages = [SystemMessage(content=SYSTEM_PROMPT)] + state['messages']
175
+ response = self.llm.bind_tools(self.tools).invoke(messages)
 
 
176
  return {"messages": [response]}
177
 
 
 
 
178
  workflow.add_node("agent", agent_node)
179
+ workflow.add_node("tools", ToolNode(self.tools))
180
+
181
  workflow.set_entry_point("agent")
182
  workflow.add_conditional_edges(
183
  "agent",
184
+ lambda x: "tools" if x["messages"][-1].tool_calls else END
185
  )
186
  workflow.add_edge("tools", "agent")
187
 
188
  return workflow.compile()
189
+
190
+ def __call__(self, question: str) -> str:
191
  try:
192
+ response = self.workflow.invoke(
193
+ {"messages": [HumanMessage(content=question)]},
194
+ {"configurable": {"thread_id": "main_thread"}}
195
+ )
196
+ return self._extract_final_answer(response['messages'][-1].content)
197
  except Exception as e:
198
+ return f"Error: {str(e)}"
199
+
200
  def _extract_final_answer(self, text: str) -> str:
201
  match = re.search(r"FINAL ANSWER:\s*(.*)", text, re.IGNORECASE)
202
  return match.group(1).strip() if match else text
203
 
204
+ # ================ LÓGICA DE EJECUCIÓN ================
205
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
206
+ space_id = os.getenv("SPACE_ID")
207
+
208
  if not profile:
209
  return "Por favor inicia sesión primero", None
210
+
211
  try:
212
  agent = GaiaAgent()
213
+ questions_url = f"{DEFAULT_API_URL}/questions"
214
+ response = requests.get(questions_url, timeout=15)
215
+ response.raise_for_status()
216
+ questions_data = response.json()
217
 
218
  answers = []
219
+ results_log = []
220
+ for item in questions_data:
221
+ task_id = item.get("task_id")
222
+ question_text = item.get("question")
223
+ if not task_id or not question_text:
224
+ continue
225
+ try:
226
+ answer = agent(question_text)
227
+ answers.append({"task_id": task_id, "submitted_answer": answer})
228
+ results_log.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
229
+ except Exception as e:
230
+ results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"Error: {str(e)}"})
231
+
232
+ submission_data = {
233
  "username": USERNAME,
234
  "agent_code": AGENT_CODE,
235
  "answers": answers
236
  }
237
 
238
+ submit_response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
239
  submit_response.raise_for_status()
240
 
241
+ result = submit_response.json()
242
+ status = (
243
+ f"¡Envío exitoso!\n"
244
+ f"Usuario: {result.get('username', '')}\n"
245
+ f"Puntaje: {result.get('score', 0)}%\n"
246
+ f"Mensaje: {result.get('message', '')}"
247
+ )
248
+ return status, pd.DataFrame(results_log)
249
 
250
  except Exception as e:
251
  return f"Error crítico: {str(e)}", pd.DataFrame()
252
 
253
+ # ================ INTERFAZ GRADIO ================
254
+ with gr.Blocks() as demo:
255
  gr.Markdown("# GAIA Agent - Evaluación Completa")
256
+ gr.Markdown("""
257
+ **Instrucciones:**
258
+ 1. Inicia sesión con tu cuenta de Hugging Face
259
+ 2. Haz clic en 'Ejecutar Evaluación'
260
+ 3. Espera los resultados (puede tomar varios minutos)
261
+ """)
262
+
263
+ gr.LoginButton()
264
+ run_btn = gr.Button("Ejecutar Evaluación", variant="primary")
265
+ status_output = gr.Textbox(label="Estado", interactive=False)
266
+ results_table = gr.DataFrame(label="Resultados Detallados", wrap=True)
267
 
268
  run_btn.click(
269
+ fn=run_and_submit_all,
 
270
  outputs=[status_output, results_table]
271
  )
272
 
273
  if __name__ == "__main__":
274
+ demo.launch(server_name="0.0.0.0", server_port=7860)