igorpavlov-mgr commited on
Commit
65fb8cb
·
verified ·
1 Parent(s): e921749

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -482
app.py CHANGED
@@ -1,488 +1,9 @@
1
- # app-24.py
2
- # Final GAIA-compliant agent integrating RobotPai best practices + our advanced logic
3
-
4
- import os
5
- import re
6
- import json
7
- import base64
8
- import requests
9
- import pdfplumber
10
- import fitz # PyMuPDF
11
- import tempfile
12
  import gradio as gr
13
- import pandas as pd
14
- from pydub import AudioSegment
15
- import speech_recognition as sr
16
- from io import BytesIO
17
-
18
- from langchain_core.messages import HumanMessage
19
- from langgraph.graph import StateGraph, END
20
- from langgraph.prebuilt import ToolNode
21
- from langchain.tools import tool
22
- from langchain.agents import tool as lc_tool
23
- from langchain_core.runnables import Runnable
24
-
25
- from langchain.agents.output_parsers import ReActSingleInputOutputParser
26
- from langchain.agents.format_scratchpad import format_to_openai_functions
27
- from langchain.agents.agent import AgentExecutor
28
- from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
29
- from langchain_core.prompts import SystemMessagePromptTemplate
30
- from langchain_core.prompts.chat import HumanMessagePromptTemplate
31
- from langchain_core.prompts import ChatPromptTemplate
32
- from langchain_core.runnables import RunnableLambda
33
-
34
- from langchain_community.tools.tavily_search import TavilySearchResults
35
- from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
36
-
37
- from langchain_community.chat_models import ChatOpenAI
38
- from langchain_core.language_models.chat_models import BaseChatModel
39
-
40
- # =========================
41
- # AGENT STATE SCHEMA
42
- # =========================
43
-
44
- from typing import TypedDict, Optional, List, Tuple
45
-
46
- class AgentState(TypedDict, total=False):
47
- question: str
48
- planner_output: Optional[str]
49
- tool_call: Optional[str]
50
- tool_result: Optional[str]
51
- answer: Optional[str]
52
- replan: Optional[bool]
53
- replan_count: int
54
- debug_trace: List[str]
55
-
56
- # =========================
57
- # ENVIRONMENT & LLM SETUP
58
- # =========================
59
-
60
- openai_api_key = os.getenv("OPENAI_API_KEY", "")
61
- model_name = os.getenv("OPENAI_MODEL", "gpt-4-turbo")
62
-
63
- llm = ChatOpenAI(
64
- model=model_name,
65
- temperature=0.0,
66
- openai_api_key=openai_api_key,
67
- max_tokens=512
68
- )
69
-
70
- # =========================
71
- # File Download Function
72
- # =========================
73
-
74
- def download_file_from_gaia(task_id: str, file_name: str) -> str:
75
- url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
76
- response = requests.get(url)
77
- if response.status_code == 200:
78
- dir_path = os.path.expanduser("~/gaia_files")
79
- os.makedirs(dir_path, exist_ok=True)
80
- file_path = os.path.join(dir_path, file_name)
81
- with open(file_path, "wb") as f:
82
- f.write(response.content)
83
- return file_path
84
- else:
85
- return f"/tmp/fake_{file_name}"
86
-
87
- # =========================
88
- # TOOL REGISTRY SECTION
89
- # =========================
90
-
91
- @tool
92
- def Calculator(expression: str) -> str:
93
- """Evaluate a basic math expression like 15 / 100 * 80"""
94
- try:
95
- result = eval(expression, {"__builtins__": {}}, {})
96
- return str(result)
97
- except Exception as e:
98
- return f"Error: {str(e)}"
99
-
100
- @tool
101
- def PythonExec(code: str) -> str:
102
- """Evaluate basic Python code for logic and parsing. Avoid stateful ops."""
103
- if not is_valid_python_code(code):
104
- return "Invalid Python code."
105
- try:
106
- exec_globals = {}
107
- exec(code, exec_globals)
108
- return str(exec_globals.get("result", "Executed"))
109
- except Exception as e:
110
- return f"Error: {str(e)}"
111
-
112
- def is_valid_python_code(code: str) -> bool:
113
- invalid_keywords = ["import", "open", "os", "sys", "socket", "subprocess"]
114
- return not any(word in code for word in invalid_keywords)
115
-
116
- @tool
117
- def PDFReader(file_path: str) -> str:
118
- """Extract up to 1000 characters of clean text from a PDF file."""
119
- try:
120
- text = ""
121
- with pdfplumber.open(file_path) as pdf:
122
- for page in pdf.pages:
123
- text += page.extract_text() or ""
124
- if len(text) > 1000:
125
- break
126
- return text[:1000].strip()
127
- except Exception:
128
- try:
129
- doc = fitz.open(file_path)
130
- text = " ".join([page.get_text() for page in doc][:3])
131
- return text[:1000].strip()
132
- except Exception as e:
133
- return f"Error: {str(e)}"
134
-
135
- @tool
136
- def ReadExcel(file_path: str) -> str:
137
- """Return a summary of the Excel file content."""
138
- try:
139
- df = pd.read_excel(file_path)
140
- preview = df.head().to_string()
141
- return preview
142
- except Exception as e:
143
- return f"Error: {str(e)}"
144
-
145
- @tool
146
- def TranscribeAudio(file_path: str) -> str:
147
- """Return the audio transcript (mp3 only)."""
148
- try:
149
- audio = AudioSegment.from_file(file_path)
150
- audio.export("/tmp/tmp.wav", format="wav")
151
- recognizer = sr.Recognizer()
152
- with sr.AudioFile("/tmp/tmp.wav") as source:
153
- audio_data = recognizer.record(source)
154
- return recognizer.recognize_google(audio_data)
155
- except Exception as e:
156
- return f"Error: {str(e)}"
157
-
158
- @tool
159
- def YouTubeTranscript(url: str) -> str:
160
- """Extract transcript text from a YouTube video (fallback simulation)."""
161
- return f"Transcript of video {url} (not implemented)"
162
-
163
- @tool
164
- def DuckDuckGoSearch(query: str) -> str:
165
- """Search the web using DuckDuckGo."""
166
- try:
167
- wrapper = DuckDuckGoSearchAPIWrapper()
168
- results = wrapper.run(query)
169
- return results
170
- except Exception as e:
171
- return f"Error: {str(e)}"
172
-
173
- # Tool registry list
174
- tools = [
175
- Calculator,
176
- PythonExec,
177
- PDFReader,
178
- ReadExcel,
179
- TranscribeAudio,
180
- YouTubeTranscript,
181
- DuckDuckGoSearch,
182
- ]
183
-
184
-
185
- # =========================
186
- # PLANNER NODE
187
- # =========================
188
-
189
- def is_valid_tool_call(output: str) -> bool:
190
- """Check if the output is a valid tool call of the form ToolName[<input>]"""
191
- return bool(re.match(r"^[A-Za-z_]+\[.*\]$", output.strip()))
192
-
193
- def planner_node(state: dict) -> dict:
194
- question = state.get("question", "")
195
- trace = state.get("debug_trace", [])
196
-
197
- # Prompt with tool list and few-shot examples
198
- prompt = (
199
- "You are a ReAct-style planning agent. Choose the most suitable tool.\n"
200
- "Respond using this format:\n"
201
- "Thought: <reasoning>\nAction: ToolName[<input>]\n\n"
202
- "Available tools:\n"
203
- "- Calculator: Evaluate math expressions\n"
204
- "- PythonExec: Run Python code\n"
205
- "- PDFReader: Read content from PDF files\n"
206
- "- ReadExcel: Parse Excel spreadsheets\n"
207
- "- TranscribeAudio: Transcribe .mp3 audio\n"
208
- "- YouTubeTranscript: Extract transcript from a video\n"
209
- "- DuckDuckGoSearch: Search for web content\n\n"
210
- "---\n"
211
- "Question: What is 25% of 80?\n"
212
- "Thought: I can calculate this with math.\n"
213
- "Action: Calculator[25 / 100 * 80]\n\n"
214
- "Question: What does the video say at https://youtube.com/watch?v=abc123?\n"
215
- "Thought: I need the video transcript.\n"
216
- "Action: YouTubeTranscript[https://youtube.com/watch?v=abc123]\n\n"
217
- "Question: What is in the Excel file sales.xlsx?\n"
218
- "Thought: I should read the Excel file.\n"
219
- "Action: ReadExcel[/tmp/sales.xlsx]\n\n"
220
- f"Question: {question}"
221
- )
222
-
223
- llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
224
- result = llm.invoke(prompt)
225
- result_text = result.content.strip()
226
-
227
- # Extract Thought and Action
228
- thought_match = re.search(r"Thought: (.*?)\n", result_text, re.DOTALL)
229
- action_match = re.search(r"Action: (.*?)$", result_text.strip())
230
- thought = thought_match.group(1).strip() if thought_match else ""
231
- action = action_match.group(1).strip() if action_match else "INVALID"
232
-
233
- trace.append(f"[Planner] Thought: {thought}")
234
- trace.append(f"[Planner] Raw Action: {action}")
235
-
236
- if not is_valid_tool_call(action):
237
- trace.append("[Planner] Invalid format detected — replanning may be required.")
238
- return {**state, "tool_call": None, "replan": True, "debug_trace": trace}
239
-
240
- return {**state, "tool_call": action, "debug_trace": trace, "replan": False}
241
-
242
- # =========================
243
- # TOOL NODE (ReAct-style)
244
- # =========================
245
-
246
- from langgraph.prebuilt import ToolExecutor
247
-
248
- tool_executor = ToolExecutor(tools)
249
-
250
- def tool_node(state: dict) -> dict:
251
- tool_call = state.get("tool_call")
252
- trace = state.get("debug_trace", [])
253
-
254
- if not tool_call:
255
- trace.append("[ToolNode] No tool call provided.")
256
- return {**state, "tool_result": None, "debug_trace": trace}
257
-
258
- try:
259
- tool_name, tool_input = re.match(r"([A-Za-z_]+)\[(.*)\]", tool_call).groups()
260
- tool_input = tool_input.strip()
261
- result = tool_executor.invoke({"tool": tool_name, "tool_input": tool_input})
262
- trace.append(f"[ToolNode] Tool used: {tool_name}")
263
- trace.append(f"[ToolNode] Input: {tool_input[:250]}")
264
- trace.append(f"[ToolNode] Observation: {str(result)[:250]}")
265
- return {**state, "tool_result": str(result), "debug_trace": trace}
266
- except Exception as e:
267
- trace.append(f"[ToolNode] Error invoking tool: {str(e)}")
268
- return {**state, "tool_result": None, "debug_trace": trace}
269
-
270
- # =========================
271
- # FINALIZER NODE
272
- # =========================
273
-
274
- def clean_final_answer(question: str, result: str, trace: list) -> str:
275
- """Apply GAIA-safe formatting rules to tool output."""
276
- answer = result.strip()
277
-
278
- # First name trimming
279
- if re.search(r"first name", question, re.IGNORECASE):
280
- words = answer.split()
281
- if len(words) > 1:
282
- answer = words[0]
283
- trace.append("[Finalizer] Heuristic: Trimmed to first name.")
284
-
285
- # Quote simulation fallback (if output in quotes)
286
- quote_match = re.findall(r'"([^"]{1,40})"', answer)
287
- if quote_match:
288
- answer = quote_match[0]
289
- trace.append("[Finalizer] Heuristic: Quote selected as answer.")
290
-
291
- # Year counting (e.g., for discography)
292
- if re.search(r"how many .*\b(years|albums|times)\b", question, re.IGNORECASE):
293
- years = re.findall(r"\b(19|20)\d{2}\b", answer)
294
- if years:
295
- answer = str(len(years))
296
- trace.append("[Finalizer] Heuristic: Counted year mentions.")
297
-
298
- # Defunct country parsing
299
- if re.search(r"born in.*\b(USSR|Yugoslavia|Czechoslovakia)\b", question, re.IGNORECASE):
300
- m = re.search(r"\b[A-Z][a-z]+\b", answer)
301
- if m:
302
- answer = m.group(0)
303
- trace.append("[Finalizer] Heuristic: Extracted name from defunct country context.")
304
-
305
- # Final trim and return
306
- return answer.strip()
307
-
308
- def finalizer_node(state: dict) -> dict:
309
- question = state.get("question", "")
310
- tool_result = state.get("tool_result", "")
311
- trace = state.get("debug_trace", [])
312
-
313
- answer = clean_final_answer(question, tool_result, trace)
314
- trace.append(f"[Finalizer] Final Answer: {answer}")
315
- return {**state, "answer": answer, "debug_trace": trace}
316
-
317
- # =========================
318
- # BASIC AGENT CLASS
319
- # =========================
320
-
321
- class BasicAgent:
322
- def __init__(self, graph):
323
- self.graph = graph
324
-
325
- def __call__(self, question: str) -> str:
326
- state = {"question": question, "debug_trace": []}
327
- result = self.graph.invoke(state)
328
- return result.get("answer", "Error"), result.get("debug_trace", [])
329
-
330
- agent = BasicAgent(compiled_graph)
331
-
332
- # =========================
333
- # GRAPH DEFINITION
334
- # =========================
335
-
336
- def build_graph():
337
- graph = StateGraph()
338
- graph.add_node("planner", planner_node)
339
- graph.add_node("tool", tool_node)
340
- graph.add_node("finalizer", finalizer_node)
341
-
342
- graph.set_entry_point("planner")
343
- graph.add_edge("planner", "tool")
344
- graph.add_edge("tool", "finalizer")
345
- graph.set_finish_point("finalizer")
346
-
347
- return graph.compile()
348
-
349
- print("✅ app.py loaded")
350
-
351
- try:
352
- compiled_graph = build_graph()
353
- print("✅ Graph compiled")
354
- agent = BasicAgent(compiled_graph)
355
- print("✅ Agent ready")
356
- except Exception as e:
357
- import traceback
358
- print("❌ Agent init failed:")
359
- print(traceback.format_exc())
360
-
361
- # =========================
362
- # GAIA RUNNERS FOR SUBMISSION
363
- # =========================
364
-
365
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
366
-
367
- def run_gaia_agent(question: str) -> str:
368
- answer, _ = agent(question)
369
- return answer or "Final Answer: [ERROR] Missing."
370
-
371
- def run_and_submit_all(profile: gr.OAuthProfile | None):
372
- import pandas as pd
373
- import requests
374
-
375
- if not profile:
376
- return "Please Login to Hugging Face with the button.", None
377
-
378
- username = profile.username
379
- space_id = os.getenv("SPACE_ID", "unknown-space-id")
380
- questions_url = f"{DEFAULT_API_URL}/questions"
381
- submit_url = f"{DEFAULT_API_URL}/submit"
382
-
383
- try:
384
- questions_data = requests.get(questions_url, timeout=15).json()
385
- except Exception as e:
386
- return f"Error fetching questions: {e}", None
387
-
388
- results_log, answers_payload = [], []
389
- for item in questions_data:
390
- task_id = item.get("task_id")
391
- question_text = item.get("question")
392
- if not task_id or not question_text:
393
- continue
394
- try:
395
- submitted_answer = run_gaia_agent(question_text)
396
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
397
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
398
- except Exception as e:
399
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
400
-
401
- submission_data = {
402
- "username": username.strip(),
403
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
404
- "answers": answers_payload,
405
- }
406
-
407
- try:
408
- response = requests.post(submit_url, json=submission_data, timeout=60).json()
409
- final_status = (
410
- f"Submission Successful!\n"
411
- f"User: {response.get('username')}\n"
412
- f"Score: {response.get('score')}% "
413
- f"({response.get('correct_count')}/{response.get('total_attempted')} correct)\n"
414
- f"Message: {response.get('message', 'No message')}"
415
- )
416
- return final_status, pd.DataFrame(results_log)
417
- except Exception as e:
418
- return f"Submission failed: {e}", pd.DataFrame(results_log)
419
-
420
- # =========================
421
- # UI + GAIA SUBMISSION ENTRY POINT
422
- # =========================
423
-
424
- def debug_single_question(q):
425
- try:
426
- result = compiled_graph.invoke({"question": q})
427
- trace = "\n".join(result.get("debug_trace", []))
428
- answer = result["answer"]
429
-
430
- # Format checks (debug only)
431
- format_warnings = []
432
- if "," in answer:
433
- parts = [x.strip() for x in answer.split(",")]
434
- if [p.lower() for p in parts] != sorted([p.lower() for p in parts]):
435
- format_warnings.append("List is not alphabetically sorted.")
436
- if len(answer.split()) == 2:
437
- format_warnings.append("Full name detected; question may require first name only.")
438
- if answer.lower().strip().startswith("final answer:"):
439
- format_warnings.append("Do not include 'Final Answer:' prefix in result.")
440
- if any(ord(c) > 127 for c in answer):
441
- format_warnings.append("Non-ASCII characters found in result.")
442
-
443
- if format_warnings:
444
- trace += "\n\n⚠️ **Format Warning(s):**\n- " + "\n- ".join(format_warnings)
445
-
446
- return answer, trace
447
- except Exception as e:
448
- import traceback
449
- return "Error", traceback.format_exc()
450
-
451
- with gr.Blocks() as demo:
452
- gr.Markdown("# GAIA Agent with Debug & Submission UI")
453
-
454
- # --- Debug UI ---
455
- question_box = gr.Textbox(label='Enter a GAIA Question')
456
- ask_button = gr.Button('Run Agent')
457
- answer_output = gr.Textbox(label='Final Answer')
458
- debug_output = gr.Textbox(label='Planner / Tool / Finalizer Trace', lines=20)
459
- ask_button.click(fn=debug_single_question, inputs=question_box, outputs=[answer_output, debug_output])
460
-
461
- # --- File Preview UI ---
462
- task_id_box = gr.Textbox(label='GAIA Task ID (for File Download)')
463
- file_name_box = gr.Textbox(label='File Name (e.g., doc.pdf)')
464
- download_button = gr.Button("Download File and Get Base64")
465
- base64_output = gr.Textbox(label="Base64 Download Link", lines=2)
466
-
467
- def get_base64_file_link(task_id, file_name):
468
- path = download_file_from_gaia(task_id, file_name)
469
- if os.path.exists(path):
470
- with open(path, "rb") as f:
471
- encoded = base64.b64encode(f.read()).decode("utf-8")
472
- link = f"data:application/octet-stream;base64,{encoded}"
473
- return link
474
- return "Error downloading file."
475
 
476
- download_button.click(fn=get_base64_file_link, inputs=[task_id_box, file_name_box], outputs=base64_output)
477
 
478
- # === GAIA Submission UI
479
- gr.Markdown("## Submit GAIA Benchmark")
480
- gr.LoginButton()
481
- run_button = gr.Button("Run Evaluation & Submit All Answers")
482
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5)
483
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
484
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
485
 
486
  if __name__ == "__main__":
487
  print("✅ Gradio demo launching...")
488
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ print("✅ Minimal app.py reached")
4
 
5
+ demo = gr.Interface(fn=lambda x: x.upper(), inputs="text", outputs="text")
 
 
 
 
 
 
6
 
7
  if __name__ == "__main__":
8
  print("✅ Gradio demo launching...")
9
+ demo.launch()