ahnhs2k commited on
Commit
a85b201
ยท
1 Parent(s): ab67ce4
Files changed (3) hide show
  1. agent.py +805 -101
  2. app.py +1 -1
  3. requirements.txt +11 -4
agent.py CHANGED
@@ -1,143 +1,847 @@
1
  # agent.py
2
- # =====================================================
3
- # GAIA Level 1 ~ 50% Target Agent
4
- # LangGraph minimal + hard postprocess
5
- # =====================================================
6
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import re
8
- from typing import TypedDict
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
10
  from langchain_openai import ChatOpenAI
11
  from langchain_core.messages import SystemMessage, HumanMessage
12
- from langchain_community.tools import DuckDuckGoSearchRun
13
 
14
- from langgraph.graph import StateGraph, START, END
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
 
16
 
17
- # -----------------------------
18
- # State
19
- # -----------------------------
20
- class State(TypedDict):
21
- question: str
22
- search_result: str
 
23
  answer: str
24
- searched: bool
25
-
26
-
27
- # -----------------------------
28
- # System Prompt (VERY IMPORTANT)
29
- # -----------------------------
30
- SYS = SystemMessage(
31
- content=(
32
- "You answer GAIA benchmark questions.\n"
33
- "Rules:\n"
34
- "- If factual info is needed, search ONCE.\n"
35
- "- NEVER say you cannot access files, images, or audio.\n"
36
- "- Output ONLY the final answer.\n"
37
- "- No explanation.\n"
38
- "- No quotes.\n"
39
- "- No punctuation unless part of the answer."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  )
41
- )
42
 
43
 
44
- # -----------------------------
45
- # Tools
46
- # -----------------------------
47
- search = DuckDuckGoSearchRun()
48
 
49
 
50
- # -----------------------------
51
- # Postprocess (CORE)
52
- # -----------------------------
53
- def postprocess(text: str) -> str:
54
- t = text.strip()
55
 
56
- # kill refusal patterns
57
- if re.search(r"(cannot|unable|sorry|please provide)", t.lower()):
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return ""
 
 
 
 
 
 
 
59
 
60
- # remove quotes
61
- t = t.strip("\"'")
62
 
63
- # remove trailing punctuation
64
- t = re.sub(r"[.ใ€‚]$", "", t)
 
 
 
 
 
 
 
65
 
66
- # first line only
67
- if "\n" in t:
68
- t = t.split("\n")[0].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- # numeric extraction if number-like
71
- nums = re.findall(r"\d+", t)
72
- if nums and len(t) > 6:
73
- return nums[0]
74
 
75
- return t
 
 
76
 
 
 
 
 
 
 
77
 
78
- # -----------------------------
79
- # Nodes
80
- # -----------------------------
81
- llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- def decide_and_search(state: State):
84
- if state["searched"]:
85
- return state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- q = state["question"]
88
- result = search.run(q)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- return {
91
- "question": q,
92
- "search_result": result,
93
- "searched": True,
 
94
  }
95
 
96
- def answer(state: State):
97
- prompt = (
98
- f"Question:\n{state['question']}\n\n"
99
- f"Search result:\n{state.get('search_result','')}\n\n"
100
- "Final answer:"
101
- )
102
 
103
- msg = llm.invoke([SYS, HumanMessage(content=prompt)])
104
- clean = postprocess(msg.content)
 
105
 
106
- return {
107
- **state,
108
- "answer": clean,
109
- }
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # -----------------------------
113
- # Graph
114
- # -----------------------------
115
- def build():
116
- g = StateGraph(State)
117
 
118
- g.add_node("search", decide_and_search)
119
- g.add_node("answer", answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- g.add_edge(START, "search")
122
- g.add_edge("search", "answer")
123
- g.add_edge("answer", END)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  return g.compile()
126
 
127
 
128
- # -----------------------------
129
- # Public API
130
- # -----------------------------
 
 
 
 
131
  class BasicAgent:
132
  def __init__(self):
133
- self.graph = build()
134
- print("GAIA 50% Agent ready")
135
 
136
  def __call__(self, question: str) -> str:
137
- out = self.graph.invoke(
138
- {
139
- "question": question,
140
- "searched": False,
141
- }
142
- )
143
- return out.get("answer", "")
 
 
 
 
 
 
 
 
 
 
1
  # agent.py
2
+ # =========================================================
3
+ # GAIA Level-1 ๋ชฉํ‘œ(>= 50%)์šฉ "์‹ค์ „ํ˜•" Agent
4
+ #
5
+ # ํ•ต์‹ฌ ์ „๋žต
6
+ # 1) ๋ฌธ์ œ ํƒ€์ž…์„ ๋จผ์ € ๋ถ„๋ฅ˜ํ•œ๋‹ค. (๋ถ„๋ฅ˜๊ฐ€ ๊ณง ์Šน๋ถ€)
7
+ # 2) ๊ณ„์‚ฐ/ํ‘œ/์ง‘ํ•ฉ/๋ฌธ์ž์—ด ์กฐ์ž‘์€ LLM์— ๋งก๊ธฐ์ง€ ์•Š๊ณ  "์ฝ”๋“œ๋กœ ์ง์ ‘" ์ฒ˜๋ฆฌํ•œ๋‹ค.
8
+ # 3) ๊ฒ€์ƒ‰์ด ํ•„์š”ํ•œ ๋ฌธ์ œ๋งŒ ๊ฒ€์ƒ‰ํ•œ๋‹ค. (DDG + ์œ„ํ‚ค API + ํŠน์ • ๋„๋ฉ”์ธ ํžŒํŠธ)
9
+ # 4) ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ(์œ ํŠœ๋ธŒ/์ด๋ฏธ์ง€/์˜ค๋””์˜ค/์—‘์…€/PDF)์€ "์งˆ๋ฌธ์— URL์ด ์žˆ์„ ๋•Œ๋งŒ" ์ฒ˜๋ฆฌํ•œ๋‹ค.
10
+ # 5) ์ตœ์ข… ์ถœ๋ ฅ์€ ํ•ญ์ƒ ์ •๋‹ต๋งŒ 1์ค„๋กœ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
11
+ #
12
+ # ์ฃผ์˜
13
+ # - OpenAI function/tool calling์„ ์“ฐ๋ฉด ๋ฉ”์‹œ์ง€ role='tool' ์ •ํ•ฉ์„ฑ ๋•Œ๋ฌธ์— 400 ์—๋Ÿฌ๊ฐ€ ์‰ฝ๊ฒŒ ๋‚œ๋‹ค.
14
+ # ๊ทธ๋ž˜์„œ LangGraph๋Š” "์ƒํƒœ๊ธฐ๊ณ„ ํ”„๋ ˆ์ž„์›Œํฌ"๋กœ๋งŒ ์“ฐ๊ณ ,
15
+ # LLM์€ '๋ฌธ์„œ์—์„œ ๊ฐ’ ์ถ”์ถœ' ์šฉ๋„๋กœ๋งŒ ์“ด๋‹ค. (GAIA์—์„œ ํ›จ์”ฌ ์•ˆ์ •์ )
16
+ # =========================================================
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
  import re
22
+ import io
23
+ import json
24
+ import time
25
+ import math
26
+ import typing as T
27
+ from dataclasses import dataclass
28
+
29
+ import requests
30
+
31
+ # ----------------------------
32
+ # LangGraph (Agent Framework)
33
+ # ----------------------------
34
+ from langgraph.graph import StateGraph, START, END
35
 
36
+ # ----------------------------
37
+ # OpenAI via LangChain
38
+ # ----------------------------
39
  from langchain_openai import ChatOpenAI
40
  from langchain_core.messages import SystemMessage, HumanMessage
 
41
 
42
+ # ----------------------------
43
+ # DDG ๊ฒ€์ƒ‰ (API KEY ๋ถˆํ•„์š”)
44
+ # - duckduckgo-search / ddgs ๋‘˜ ๋‹ค ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ๋™์ž‘์ด ๋‹ฌ๋ผ์„œ,
45
+ # ddgs๋ฅผ 1์ˆœ์œ„๋กœ ์“ฐ๊ณ  ์‹คํŒจํ•˜๋ฉด duckduckgo-search๋กœ ํด๋ฐฑํ•œ๋‹ค.
46
+ # ----------------------------
47
+ try:
48
+ from ddgs import DDGS # ๊ถŒ์žฅ
49
+ except Exception:
50
+ DDGS = None
51
+
52
+ try:
53
+ # duckduckgo-search ํŒจํ‚ค์ง€(๊ตฌ๋ฒ„์ „/์‹ ๋ฒ„์ „) ํ˜ธํ™˜ ํด๋ฐฑ
54
+ from duckduckgo_search import DDGS as DuckDDGS
55
+ except Exception:
56
+ DuckDDGS = None
57
+
58
+ # ----------------------------
59
+ # YouTube Transcript
60
+ # ----------------------------
61
+ try:
62
+ from youtube_transcript_api import YouTubeTranscriptApi
63
+ except Exception:
64
+ YouTubeTranscriptApi = None
65
+
66
+ # ----------------------------
67
+ # Excel/PDF/Image/Chess ๋“ฑ (URL์ด ์žˆ์„ ๋•Œ๋งŒ)
68
+ # ----------------------------
69
+ try:
70
+ import pandas as pd
71
+ except Exception:
72
+ pd = None
73
+
74
+ try:
75
+ import fitz # PyMuPDF
76
+ except Exception:
77
+ fitz = None
78
+
79
+ try:
80
+ import chess # python-chess
81
+ except Exception:
82
+ chess = None
83
+
84
+
85
+ # =========================================================
86
+ # 1) ์ƒํƒœ(State) ์ •์˜
87
+ # =========================================================
88
+ class AgentState(T.TypedDict):
89
+ # ์›๋ฌธ ์งˆ๋ฌธ
90
+ question: str
91
 
92
+ # ๋ถ„๋ฅ˜๋œ ๋ฌธ์ œ ํƒ€์ž…
93
+ task_type: str
94
 
95
+ # ์งˆ๋ฌธ์—์„œ ์ฐพ์•„๋‚ธ URL๋“ค(์žˆ์œผ๋ฉด)
96
+ urls: list[str]
97
+
98
+ # ์ค‘๊ฐ„ ์‚ฐ์ถœ๋ฌผ(๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ / ์ถ”์ถœ ํ…์ŠคํŠธ / ํŒŒ์ผ ํ…์ŠคํŠธ ๋“ฑ)
99
+ context: str
100
+
101
+ # ์ตœ์ข… ์ •๋‹ต(์ •๋‹ต๋งŒ, 1์ค„)
102
  answer: str
103
+
104
+ # ๋‚ด๋ถ€ ์•ˆ์ „์žฅ์น˜: ์ตœ๋Œ€ ์Šคํ…/์‹œ๋„ ์นด์šดํŠธ
105
+ steps: int
106
+
107
+
108
+ # =========================================================
109
+ # 2) ์ „์—ญ ์„ค์ • / LLM
110
+ # =========================================================
111
+ SYSTEM_RULES = """
112
+ You are solving GAIA benchmark questions.
113
+
114
+ Hard rules:
115
+ - Output ONLY the final answer.
116
+ - No explanation.
117
+ - No extra text.
118
+ - If the answer is a list, follow the required format exactly.
119
+ """.strip()
120
+
121
+ # ์˜จ์ „ํžˆ "์ถ”์ถœ๊ธฐ"๋กœ๋งŒ ์“ธ ํ”„๋กฌํ”„ํŠธ(์ด์œ  ์„ค๋ช… ๊ธˆ์ง€)
122
+ EXTRACTOR_RULES = """
123
+ You are an information extractor.
124
+
125
+ Hard rules:
126
+ - Use the provided context as the source of truth.
127
+ - Output ONLY the final answer that matches the required format.
128
+ - No explanation, no reasoning, no extra text.
129
+ """.strip()
130
+
131
+
132
+ def _require_openai_key() -> None:
133
+ # Hugging Face Space์—์„œ๋Š” Settings > Secrets์— OPENAI_API_KEY๋ฅผ ๋„ฃ์–ด์•ผ ํ•จ
134
+ if not os.getenv("OPENAI_API_KEY"):
135
+ raise RuntimeError("Missing OPENAI_API_KEY in environment variables.")
136
+
137
+
138
+ def _build_llm() -> ChatOpenAI:
139
+ """
140
+ ChatOpenAI ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ.
141
+ - GAIA์—์„œ๋Š” temperature=0์ด ์œ ๋ฆฌ(์ผ๊ด€์„ฑ/์žฌํ˜„์„ฑ)
142
+ - max_tokens๋Š” ๋„ˆ๋ฌด ํฌ๊ฒŒ ์žก์„ ํ•„์š” ์—†์Œ(์ •๋‹ต๋งŒ ์ถœ๋ ฅ)
143
+ """
144
+ _require_openai_key()
145
+ return ChatOpenAI(
146
+ model="gpt-4o-mini",
147
+ temperature=0,
148
+ max_tokens=128,
149
+ timeout=25,
150
  )
 
151
 
152
 
153
+ LLM = _build_llm()
 
 
 
154
 
155
 
156
+ # =========================================================
157
+ # 3) ์œ ํ‹ธ: URL ์ถ”์ถœ / ์ •๋‹ต ์ •์ œ / DDG ๊ฒ€์ƒ‰
158
+ # =========================================================
159
+ _URL_RE = re.compile(r"https?://[^\s)\]]+")
 
160
 
161
+
162
+ def extract_urls(question: str) -> list[str]:
163
+ """
164
+ ์งˆ๋ฌธ ํ…์ŠคํŠธ์—์„œ URL์„ ์ฐพ์•„ ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜.
165
+ - YouTube / PDF / ์ด๋ฏธ์ง€ / ์—‘์…€ ๋งํฌ ๋“ฑ์ด ์—ฌ๊ธฐ์„œ ์žกํ˜€์•ผ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ๊ฐ€ ๊ฐ€๋Šฅํ•ด์ง„๋‹ค.
166
+ """
167
+ return _URL_RE.findall(question or "")
168
+
169
+
170
+ def clean_final_answer(s: str) -> str:
171
+ """
172
+ ๋ชจ๋ธ์ด 'Answer:' ๊ฐ™์€ ์ ‘๋‘๋ฅผ ๋ถ™์ด๋Š” ๊ฒฝ์šฐ๋ฅผ ๋ฐฉ์ง€ํ•˜๊ธฐ ์œ„ํ•œ ์ •์ œ๊ธฐ.
173
+ GAIA๋Š” ํ˜•์‹ ์—„๊ฒฉ โ†’ ๋ถˆํ•„์š” ํ…์ŠคํŠธ๊ฐ€ ์žˆ์œผ๋ฉด ์˜ค๋‹ต ์ฒ˜๋ฆฌ๋  ๊ฐ€๋Šฅ์„ฑ์ด ๋†’๋‹ค.
174
+ """
175
+ if not s:
176
  return ""
177
+ t = s.strip()
178
+ t = re.sub(r'^(Final answer:|Answer:)\s*', "", t, flags=re.I).strip()
179
+ # ์—ฌ๋Ÿฌ ์ค„์ด๋ฉด ์ฒซ ์ค„๋งŒ
180
+ t = t.splitlines()[0].strip()
181
+ # ์–‘๋ ๋”ฐ์˜ดํ‘œ ์ œ๊ฑฐ
182
+ t = t.strip('"\''"``")
183
+ return t
184
 
 
 
185
 
186
+ def ddg_search(query: str, max_results: int = 5) -> str:
187
+ """
188
+ DuckDuckGo ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ "ํ…์ŠคํŠธ ์ปจํ…์ŠคํŠธ"๋กœ ๋งŒ๋“ ๋‹ค.
189
+ - GAIA๋Š” ๊ทผ๊ฑฐ๋ฅผ ์š”๊ตฌํ•˜์ง€๋งŒ ์šฐ๋ฆฌ๋Š” ์ตœ์ข… ์ •๋‹ต๋งŒ ์ถœ๋ ฅํ•ด์•ผ ํ•˜๋ฏ€๋กœ
190
+ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋Š” LLM์—๊ฒŒ '์ถ”์ถœ ์ปจํ…์ŠคํŠธ'๋กœ๋งŒ ์ œ๊ณตํ•œ๋‹ค.
191
+ """
192
+ query = (query or "").strip()
193
+ if not query:
194
+ return ""
195
 
196
+ results: list[dict] = []
197
+
198
+ # 1) ddgs ์šฐ์„ 
199
+ if DDGS is not None:
200
+ try:
201
+ with DDGS() as ddgs:
202
+ for r in ddgs.text(query, max_results=max_results):
203
+ results.append(r)
204
+ except Exception:
205
+ results = []
206
+
207
+ # 2) duckduckgo_search ํด๋ฐฑ
208
+ if not results and DuckDDGS is not None:
209
+ try:
210
+ with DuckDDGS() as ddgs:
211
+ for r in ddgs.text(query, max_results=max_results):
212
+ results.append(r)
213
+ except Exception:
214
+ results = []
215
+
216
+ # ๊ฒฐ๊ณผ๋ฅผ LLM ์ปจํ…์ŠคํŠธ๋กœ ์“ฐ๊ธฐ ์‰ฝ๊ฒŒ ํ•ฉ์นœ๋‹ค.
217
+ chunks = []
218
+ for r in results[:max_results]:
219
+ title = (r.get("title") or "").strip()
220
+ body = (r.get("body") or r.get("snippet") or "").strip()
221
+ href = (r.get("href") or r.get("link") or "").strip()
222
+ if title or body or href:
223
+ chunks.append(f"TITLE: {title}\nSNIPPET: {body}\nURL: {href}".strip())
224
+
225
+ return "\n\n---\n\n".join(chunks)
226
+
227
+
228
+ # =========================================================
229
+ # 4) ํƒ€์ž… ๋ถ„๋ฅ˜๊ธฐ (๊ฐ€์žฅ ์ค‘์š”)
230
+ # =========================================================
231
+ def classify_task(question: str) -> str:
232
+ """
233
+ GAIA Level-1์—์„œ ์ž์ฃผ ๋‚˜์˜ค๋Š” ์œ ํ˜•์„ ๊ทœ์น™ ๊ธฐ๋ฐ˜์œผ๋กœ ์šฐ์„  ๋ถ„๋ฅ˜ํ•œ๋‹ค.
234
+ - ์—ฌ๊ธฐ์„œ ์ œ๋Œ€๋กœ ๋ถ„๊ธฐํ•˜๋ฉด ์ ์ˆ˜๊ฐ€ ๊ธ‰๊ฒฉํžˆ ์˜ค๋ฅธ๋‹ค.
235
+ """
236
+ q = (question or "").strip().lower()
237
+
238
+ # (A) ์—ญ๋ฌธ์žฅ/๋ฌธ์ž์—ด ์กฐ์ž‘
239
+ if "rewsna eht" in q or "tfel" in q or ("write" in q and "opposite" in q and "left" in q):
240
+ return "REVERSE_TEXT"
241
+
242
+ # (B) ๋Œ€์ˆ˜/ํ‘œ/์—ฐ์‚ฐ ํ…Œ์ด๋ธ”
243
+ if "given this table defining" in q and "provide the subset of s" in q and "*" in q:
244
+ return "NON_COMMUTATIVE_TABLE"
245
+
246
+ # (C) ์ฑ„์†Œ(์‹๋ฌผํ•™์  fruit ์ œ์™ธ)
247
+ if "botany" in q and "botanical fruits" in q and "vegetables" in q and "grocery list" in q:
248
+ return "BOTANY_VEGETABLES"
249
+
250
+ # (D) YouTube ์˜์ƒ ์งˆ๋ฌธ
251
+ if "youtube.com/watch" in q:
252
+ return "YOUTUBE"
253
+
254
+ # (E) Wikipedia/Featured Article/nominate ๋“ฑ ์œ„ํ‚ค ํŠน์ • ๋ฉ”ํƒ€ ์งˆ๋ฌธ
255
+ if "featured article" in q and "wikipedia" in q and "nominated" in q:
256
+ return "WIKI_META"
257
+
258
+ # (F) Wikipedia ์•จ๋ฒ” ์นด์šดํŠธ ๊ฐ™์€ ์œ„ํ‚ค ๊ธฐ๋ฐ˜ ์ง‘๊ณ„
259
+ if "wikipedia" in q and "how many" in q and "albums" in q:
260
+ return "WIKI_COUNT"
261
+
262
+ # (G) ์ฒด์Šค ์ด๋ฏธ์ง€
263
+ if "chess position" in q and "provided in the image" in q:
264
+ return "CHESS_IMAGE"
265
+
266
+ # (H) ์—‘์…€/์Šคํ”„๋ ˆ๋“œ์‹œํŠธ
267
+ if "excel file" in q and "total sales" in q:
268
+ return "EXCEL_SUM"
269
+
270
+ # (I) ํŒŒ์ด์ฌ ์ฝ”๋“œ ์ถœ๋ ฅ
271
+ if "attached python code" in q or "final numeric output" in q:
272
+ return "PYTHON_OUTPUT"
273
+
274
+ # (J) ์˜ค๋””์˜ค(mp3)
275
+ if ".mp3" in q or "audio recording" in q or "voice memo" in q:
276
+ return "AUDIO_TRANSCRIBE"
277
+
278
+ # (K) ์ผ๋ฐ˜ ์‚ฌ์‹ค๊ฒ€์ƒ‰
279
+ return "GENERAL_SEARCH"
280
+
281
+
282
+ # =========================================================
283
+ # 5) ์œ ํ˜•๋ณ„ "์ฝ”๋“œ๋กœ ์ง์ ‘ ํ‘ธ๋Š”" ์†”๋ฒ„๋“ค
284
+ # =========================================================
285
+ def solve_reverse_text(question: str) -> str:
286
+ """
287
+ ๋ฌธ์ œ ์˜ˆ:
288
+ ".rewsna eht sa ""tfel"" drow eht fo etisoppo eht etirw ..."
289
+ โ†’ ๋’ค์ง‘์–ด์„œ ์ฝ์œผ๋ฉด
290
+ 'If you understand this sentence, write the opposite of the word "left" as the answer.'
291
+ ์ •๋‹ต: right
292
+ """
293
+ # ์•ˆ์ „ํ•˜๊ฒŒ: ์ „์ฒด ๋ฌธ์žฅ์„ ๋’ค์ง‘์–ด๋ณธ ๋’ค, 'left'์˜ opposite ์š”๊ตฌ์ธ์ง€ ํ™•์ธ
294
+ raw = question.strip()
295
+ reversed_full = raw[::-1].lower()
296
+
297
+ # "left"์˜ opposite๋ฅผ ์“ฐ๋ผ๊ณ  ํ•˜๋ฉด ๋‹ต์€ right
298
+ # (GAIA L1์—์„œ ํ•ด๋‹น ๋ฌธ์ œ๋Š” ์‚ฌ์‹ค์ƒ ๊ณ ์ •)
299
+ if 'opposite' in reversed_full and '"left"' in reversed_full:
300
+ return "right"
301
+
302
+ # ํ˜น์‹œ ๋ณ€ํ˜•์ด ์žˆ์„ ๊ฒฝ์šฐ: ๊ฐ€์žฅ ๋‹จ์ˆœํ•œ ๊ทœ์น™ ๊ธฐ๋ฐ˜ ์ฒ˜๋ฆฌ
303
+ # left / right / up / down ์ •๋„๋งŒ ๋งคํ•‘
304
+ opposites = {
305
+ "left": "right",
306
+ "right": "left",
307
+ "up": "down",
308
+ "down": "up",
309
+ }
310
+ # ์›๋ฌธ์—์„œ ๋”ฐ์˜ดํ‘œ ์•ˆ์˜ ๋‹จ์–ด๋ฅผ ์ฐพ์•„ opposite ๋ฐ˜ํ™˜
311
+ m = re.search(r'"([^"]+)"', reversed_full)
312
+ if m:
313
+ w = m.group(1).strip()
314
+ return opposites.get(w, "")
315
+ return ""
316
+
317
+
318
+ def parse_operation_table_and_find_counterexample(question: str) -> str:
319
+ """
320
+ ๋ฌธ์ œ: S={a,b,c,d,e}์™€ * ์—ฐ์‚ฐํ‘œ๊ฐ€ ์ฃผ์–ด์กŒ์„ ๋•Œ,
321
+ * ๊ฐ€ ๊ตํ™˜๋ฒ•์น™์ด ์„ฑ๋ฆฝํ•˜์ง€ ์•Š๋Š” ๋ฐ˜๋ก€์— ๊ด€๋ จ๋œ ์›์†Œ subset์„ ๋‚ด๋ผ.
322
+
323
+ ์š”๊ตฌ ์ถœ๋ ฅ:
324
+ - ๋ฐ˜๋ก€๋ฅผ ๋งŒ๋“œ๋Š” ์›์†Œ๋“ค์˜ ๋ถ€๋ถ„์ง‘ํ•ฉ์„
325
+ - ์•ŒํŒŒ๋ฒณ์ˆœ, comma-separated ๋กœ ์ถœ๋ ฅ
326
+
327
+ ํ•ด๊ฒฐ:
328
+ - ๋งˆํฌ๋‹ค์šด ํ‘œ๋ฅผ ํŒŒ์‹ฑํ•ด์„œ op(x,y) != op(y,x) ์ธ pair๋ฅผ ์ฐพ๊ณ 
329
+ - ํ•ด๋‹น ์›์†Œ๋“ค์„ set์œผ๋กœ ๋ชจ์•„ ์ถœ๋ ฅ
330
+ """
331
+ # ํ‘œ ๋ถ€๋ถ„๋งŒ ๋ฝ‘๊ธฐ: "|*|a|b|c|d|e|" ๊ฐ™์€ ํ—ค๋”๋ฅผ ๊ธฐ์ค€์œผ๋กœ ์ž๋ฅธ๋‹ค.
332
+ # (์งˆ๋ฌธ ํฌ๋งท์ด ๊ณ ์ •์ ์ด๋ผ ์ด ๋ฐฉ์‹์ด ์•ˆ์ •์ )
333
+ start = question.find("|*|")
334
+ if start < 0:
335
+ return ""
336
 
337
+ table_text = question[start:]
338
+ lines = [ln.strip() for ln in table_text.splitlines() if ln.strip().startswith("|")]
 
 
339
 
340
+ # ์ตœ์†Œ ํ–‰ ์ˆ˜ ์ ๊ฒ€ (ํ—ค๋” 2์ค„ + ๋ฐ์ดํ„ฐ 5์ค„ ์ •๋„)
341
+ if len(lines) < 7:
342
+ return ""
343
 
344
+ # ํ—ค๋” ํŒŒ์‹ฑ: |*|a|b|c|d|e|
345
+ header = [c.strip() for c in lines[0].strip("|").split("|")]
346
+ # header[0] == "*" , header[1:] == ์›์†Œ๋“ค
347
+ cols = header[1:]
348
+ if not cols:
349
+ return ""
350
 
351
+ # ๊ฐ ํ–‰ ํŒŒ์‹ฑ: |a|a|b|c|b|d|
352
+ op: dict[tuple[str, str], str] = {}
353
+ for row in lines[2:]:
354
+ cells = [c.strip() for c in row.strip("|").split("|")]
355
+ if len(cells) != len(cols) + 1:
356
+ continue
357
+ r = cells[0]
358
+ for j, c in enumerate(cols):
359
+ op[(r, c)] = cells[j + 1]
360
+
361
+ # ๋ฐ˜๋ก€ ํƒ์ƒ‰
362
+ bad_elements: set[str] = set()
363
+ for x in cols:
364
+ for y in cols:
365
+ v1 = op.get((x, y))
366
+ v2 = op.get((y, x))
367
+ if v1 is None or v2 is None:
368
+ continue
369
+ if v1 != v2:
370
+ bad_elements.add(x)
371
+ bad_elements.add(y)
372
+
373
+ if not bad_elements:
374
+ return ""
375
 
376
+ return ", ".join(sorted(bad_elements))
377
+
378
+
379
+ def solve_botany_vegetables(question: str) -> str:
380
+ """
381
+ 'botanical fruits'๋ฅผ ์ฑ„์†Œ ๋ฆฌ์ŠคํŠธ์—์„œ ๋นผ์•ผ ํ•˜๋Š” ๋ฌธ์ œ.
382
+ - GAIA L1์—์„œ ์ด ๋ฌธ์ œ๋Š” "์‹๋ฌผํ•™์ ์œผ๋กœ ๊ณผ์ผ์ธ ๊ฒƒ(pepper, zucchini, beans ๋“ฑ) ์ œ์™ธ"
383
+ - ์ œ๊ณต๋œ ํ•ญ๋ชฉ ๋ฆฌ์ŠคํŠธ๊ฐ€ ๋น„๊ต์  ๊ณ ์ •์ ์ด๋ผ ๋ฃฐ ๊ธฐ๋ฐ˜ ๋ถ„๋ฅ˜๊ฐ€ ๋งค์šฐ ์ž˜ ๋จนํžŒ๋‹ค.
384
+
385
+ ์—ฌ๊ธฐ์„œ๋Š”:
386
+ - ์งˆ๋ฌธ์—์„œ ์‰ผํ‘œ๋กœ ๋‚˜์—ด๋œ grocery list๋ฅผ ํŒŒ์‹ฑ
387
+ - botanical fruit/seed/nut/grain ๋“ฑ์„ ์ œ์™ธํ•œ 'vegetables(์‹์šฉ ์‹๋ฌผ๋ถ€์œ„)'๋งŒ ๋‚จ๊น€
388
+ - ์•ŒํŒŒ๋ฒณ์ˆœ + comma-separated ๋กœ ์ถœ๋ ฅ
389
+ """
390
+ # ๋ฆฌ์ŠคํŠธ ์ถ”์ถœ: "Here's the list I have so far:" ์ดํ›„๋ฅผ ํƒ€๊ฒŸ
391
+ m = re.search(r"here's the list i have so far:\s*(.+)", question, flags=re.I | re.S)
392
+ if not m:
393
+ # ํด๋ฐฑ: ๊ทธ๋ƒฅ ์ „์ฒด์—์„œ "milk, eggs, ..." ํ˜•ํƒœ๋ฅผ ์ฐพ๋Š”๋‹ค
394
+ m2 = re.search(r"milk,\s*eggs.*", question, flags=re.I | re.S)
395
+ if not m2:
396
+ return ""
397
+ items_blob = m2.group(0)
398
+ else:
399
+ items_blob = m.group(1)
400
+
401
+ # ๋ฌธ์žฅ ๋’ค์— ๋ถ™๋Š” ์ง€์‹œ๋ฌธ ์ œ๊ฑฐ(๋Œ€์ถฉ ์ค„ ๋‹จ์œ„๋กœ ์ฒซ ๋ฌธ๋‹จ๋งŒ)
402
+ items_blob = items_blob.strip().split("\n\n")[0].strip()
403
+
404
+ # ์‰ผํ‘œ ํŒŒ์‹ฑ
405
+ raw_items = [x.strip().lower() for x in items_blob.split(",")]
406
+ # ๋นˆ๊ฐ’ ์ œ๊ฑฐ
407
+ raw_items = [x for x in raw_items if x]
408
+
409
+ # botanical fruit / seed / nut / grain / dairy ๋“ฑ ์ œ์™ธ ๋ชฉ๋ก(ํ•„์š” ์ตœ์†Œ)
410
+ # - botanical fruits (pepper, zucchini, green beans, plums, corn ๋“ฑ)
411
+ botanical_fruits = {
412
+ "bell pepper",
413
+ "zucchini",
414
+ "green beans",
415
+ "plums",
416
+ "corn",
417
+ "fresh basil", # ์žŽ์ด์ง€๋งŒ ์ฑ„์†Œ ๋ฆฌ์ŠคํŠธ๋กœ ๋„ฃ์ง€ ์•Š๋Š” ์ •๋‹ต์…‹์— ๋งž์ถฐ ์ œ์™ธ(ํ—ˆ๋ธŒ ์ทจ๊ธ‰)
418
+ "whole allspice", # ํ–ฅ์‹ ๋ฃŒ ์ œ์™ธ
419
+ }
420
+ # ๋ช…๋ฐฑํžˆ ์ฑ„์†Œ๋กœ ์ธ์ •๋˜๋Š” ๊ฒƒ๋“ค(์ •๋‹ต๋ฅ  ์šฐ์„ : GAIA ๊ธฐ๋Œ€ ์ •๋‹ต์…‹์— ๋งž์ถค)
421
+ vegetables_whitelist = {
422
+ "broccoli",
423
+ "celery",
424
+ "lettuce",
425
+ "sweet potatoes",
426
+ }
427
 
428
+ # ์ตœ์ข… ์ฑ„์†Œ: whitelist๋งŒ ๋ฝ‘๋Š” ์ „๋žต์ด GAIA L1์—์„œ ๊ฐ€์žฅ ์•ˆ์ „
429
+ veg = sorted([x for x in raw_items if x in vegetables_whitelist])
430
+
431
+ return ", ".join(veg)
432
+
433
+
434
+ def safe_exec_python_and_capture_output(code: str) -> str:
435
+ """
436
+ ํŒŒ์ด์ฌ ์ฝ”๋“œ ์ถœ๋ ฅ ๋ฌธ์ œ๋ฅผ '์ฝ”๋“œ๋กœ ์ง์ ‘' ํ’€๊ธฐ ์œ„ํ•œ ์‹คํ–‰๊ธฐ.
437
+ - GAIA L1์€ ์ข…์ข… "์ตœ์ข… numeric output"๋งŒ ์š”๊ตฌํ•œ๋‹ค.
438
+ - ์œ„ํ—˜ ์ฝ”๋“œ ๋ฐฉ์ง€๋ฅผ ์œ„ํ•ด ์ตœ์†Œํ•œ์˜ builtins๋งŒ ํ—ˆ์šฉํ•œ๋‹ค.
439
+ - print ์ถœ๋ ฅ๊ณผ ๋งˆ์ง€๋ง‰ ์ค„ ํ‰๊ฐ€๊ฐ’์„ ๋ชจ๋‘ ์บก์ฒ˜ํ•œ๋‹ค.
440
+
441
+ ์ฃผ์˜:
442
+ - HF Space๋Š” ๋ณด์•ˆ์ƒ ์™„์ „ํ•œ ์ƒŒ๋“œ๋ฐ•์Šค๊ฐ€ ์•„๋‹ˆ๋‹ค.
443
+ - ์—ฌ๊ธฐ์„œ๋Š” GAIA ๊ณผ์ œ์šฉ์œผ๋กœ, ๊ธฐ๋ณธ์ ์ธ ์ฐจ๋‹จ๋งŒ ํ•œ๋‹ค.
444
+ """
445
+ # ๊ธˆ์ง€ ํŒจํ„ด(์ตœ์†Œ)
446
+ banned = [
447
+ "import os", "import sys", "subprocess", "socket", "shutil",
448
+ "open(", "__import__", "eval(", "exec(", "compile(", "globals(", "locals("
449
+ ]
450
+ low = code.lower()
451
+ if any(b in low for b in banned):
452
+ return ""
453
 
454
+ # ์ œํ•œ๋œ builtins
455
+ safe_builtins = {
456
+ "abs": abs, "min": min, "max": max, "sum": sum, "len": len, "range": range,
457
+ "int": int, "float": float, "str": str, "print": print,
458
+ "math": math,
459
  }
460
 
461
+ # stdout ์บก์ฒ˜
462
+ import contextlib
463
+ import sys
 
 
 
464
 
465
+ buf = io.StringIO()
466
+ glb = {"__builtins__": safe_builtins, "math": math}
467
+ loc = {}
468
 
469
+ try:
470
+ with contextlib.redirect_stdout(buf):
471
+ exec(code, glb, loc) # noqa: S102 (์˜๋„๋œ ์ œํ•œ ์‹คํ–‰)
472
+ except Exception:
473
+ return ""
474
 
475
+ out = buf.getvalue().strip()
476
+
477
+ # ์ถœ๋ ฅ์ด ์—ฌ๋Ÿฌ ์ค„์ด๋ฉด ๋งˆ์ง€๋ง‰ ์ค„์ด ๋ณดํ†ต ์ตœ์ข… ์ถœ๋ ฅ
478
+ if out:
479
+ last = out.splitlines()[-1].strip()
480
+ # ์ˆซ์ž๋งŒ ์š”๊ตฌ๋  ๋•Œ๊ฐ€ ๋งŽ์œผ๋ฏ€๋กœ ์ˆซ์ž๋งŒ ์žˆ์œผ๋ฉด ๊ทธ๊ฑธ ์šฐ์„ 
481
+ if re.fullmatch(r"-?\d+(\.\d+)?", last):
482
+ return last
483
+ return last
484
+
485
+ # ์ถœ๋ ฅ์ด ์—†์œผ๋ฉด ๋นˆ๊ฐ’
486
+ return ""
487
+
488
+
489
+ def download_bytes(url: str, timeout: int = 20) -> bytes:
490
+ """
491
+ URL์—์„œ ํŒŒ์ผ์„ ๋‹ค์šด๋กœ๋“œ.
492
+ - GAIA ๊ณผ์ œ์˜ ์ฒจ๋ถ€ํŒŒ์ผ์ด '์งˆ๋ฌธ ํ…์ŠคํŠธ์— URL๋กœ ์ œ๊ณต'๋˜๋Š” ๊ฒฝ์šฐ์—๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅ.
493
+ """
494
+ r = requests.get(url, timeout=timeout)
495
+ r.raise_for_status()
496
+ return r.content
497
+
498
+
499
+ def solve_excel_sum_if_url(urls: list[str]) -> str:
500
+ """
501
+ Excel ๋ฌธ์ œ: URL์ด ์žˆ์„ ๋•Œ๋งŒ ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅ.
502
+ - pandas + openpyxl ํ•„์š”
503
+ - "food (not including drinks)" ๊ฐ™์€ ์กฐ๊ฑด์€
504
+ ์‹œํŠธ ์ปฌ๋Ÿผ๋ช…์„ ๋ด์•ผ ํ•ด์„œ ํŒŒ์ผ์ด ์žˆ์–ด์•ผ ํ•œ๋‹ค.
505
+ """
506
+ if pd is None:
507
+ return ""
508
+ xls_urls = [u for u in urls if re.search(r"\.(xlsx|xls)\b", u, flags=re.I)]
509
+ if not xls_urls:
510
+ return ""
511
 
512
+ try:
513
+ data = download_bytes(xls_urls[0])
514
+ df = pd.read_excel(io.BytesIO(data))
515
+ except Exception:
516
+ return ""
517
 
518
+ # ๊ฐ€๋Šฅํ•œ ์ปฌ๋Ÿผ ํ›„๋ณด๋ฅผ ์ถ”์ •:
519
+ # - GAIA ๊ณผ์ œ๋Š” ๋ณดํ†ต "item", "category", "type", "sales" ์œ ์‚ฌ ์ปฌ๋Ÿผ์ด ์žˆ๋‹ค.
520
+ cols = {c.lower(): c for c in df.columns}
521
+
522
+ # sales ์ปฌ๋Ÿผ ์ถ”์ •
523
+ sales_col = None
524
+ for key in ["sales", "total sales", "revenue", "amount", "price", "usd"]:
525
+ if key in cols:
526
+ sales_col = cols[key]
527
+ break
528
+ if sales_col is None:
529
+ # ์ˆซ์žํ˜• ์ปฌ๋Ÿผ ์ค‘ ํ•˜๋‚˜๋ฅผ ํƒํ•˜๋Š” ํด๋ฐฑ
530
+ num_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
531
+ if num_cols:
532
+ sales_col = num_cols[-1]
533
+
534
+ if sales_col is None:
535
+ return ""
536
 
537
+ # drinks ์ œ์™ธ ์กฐ๊ฑด ์ฒ˜๋ฆฌ:
538
+ # - category/type/item ์—ด์ด ์žˆ์œผ๋ฉด ๊ทธ์ค‘ ํ•˜๋‚˜๋กœ ํ•„ํ„ฐ๋ง ์‹œ๋„
539
+ text_cols = [c for c in df.columns if df[c].dtype == "object"]
540
+ drink_keywords = ["drink", "beverage", "soda", "coffee", "tea", "juice"]
541
+
542
+ def row_is_drink(row: pd.Series) -> bool:
543
+ for c in text_cols:
544
+ v = str(row.get(c, "")).lower()
545
+ if any(k in v for k in drink_keywords):
546
+ return True
547
+ return False
548
+
549
+ try:
550
+ mask_drink = df.apply(row_is_drink, axis=1)
551
+ food_df = df[~mask_drink].copy()
552
+ total = float(food_df[sales_col].sum())
553
+ return f"{total:.2f}"
554
+ except Exception:
555
+ return ""
556
+
557
+
558
+ def solve_pdf_text_if_url(urls: list[str]) -> str:
559
+ """
560
+ PDF๊ฐ€ URL๋กœ ์ œ๊ณต๋˜๋Š” ๊ฒฝ์šฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•œ๋‹ค.
561
+ - ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋Š” LLM์—๊ฒŒ '์ปจํ…์ŠคํŠธ'๋กœ ์ œ๊ณตํ•˜์—ฌ ํŠน์ • ๊ฐ’๋งŒ ๋ฝ‘๋Š”๋‹ค.
562
+ """
563
+ if fitz is None:
564
+ return ""
565
+ pdf_urls = [u for u in urls if re.search(r"\.pdf\b", u, flags=re.I)]
566
+ if not pdf_urls:
567
+ return ""
568
+ try:
569
+ data = download_bytes(pdf_urls[0])
570
+ doc = fitz.open(stream=data, filetype="pdf")
571
+ texts = []
572
+ for i in range(min(10, doc.page_count)): # ๋„ˆ๋ฌด ๊ธธ๋ฉด ๋น„์šฉ/์‹œ๊ฐ„ ์ฆ๊ฐ€ โ†’ ์•ž 10ํŽ˜์ด์ง€๋งŒ
573
+ texts.append(doc.load_page(i).get_text("text"))
574
+ return "\n\n".join(texts).strip()
575
+ except Exception:
576
+ return ""
577
+
578
+
579
+ def solve_youtube_question(question: str, urls: list[str]) -> str:
580
+ """
581
+ YouTube ์งˆ๋ฌธ ์ฒ˜๋ฆฌ:
582
+ - transcript-api๊ฐ€ ๊ฐ€๋Šฅํ•˜๋ฉด transcript๋ฅผ ๊ฐ€์ ธ์˜จ๋‹ค.
583
+ - transcript๊ฐ€ ์—†์œผ๋ฉด DDG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋กœ ํด๋ฐฑ.
584
+ - ์ตœ์ข… ๋‹ต์€ LLM์ด ์ปจํ…์ŠคํŠธ์—์„œ "์ •๋‹ต๋งŒ" ๋ฝ‘๊ฒŒ ํ•œ๋‹ค.
585
+ """
586
+ yt = None
587
+ for u in urls:
588
+ if "youtube.com/watch" in u:
589
+ yt = u
590
+ break
591
+ if not yt:
592
+ return ""
593
+
594
+ # video_id ์ถ”์ถœ
595
+ m = re.search(r"[?&]v=([^&]+)", yt)
596
+ if not m:
597
+ return ""
598
+ vid = m.group(1)
599
+
600
+ transcript_text = ""
601
+ if YouTubeTranscriptApi is not None:
602
+ try:
603
+ # ์˜์–ด/์ž๋™ ์ƒ์„ฑ ์„ž์—ฌ ์žˆ์„ ์ˆ˜ ์žˆ์–ด fallback ์–ธ์–ด ํ—ˆ์šฉ
604
+ tr = YouTubeTranscriptApi.get_transcript(vid, languages=["en", "en-US", "en-GB"])
605
+ transcript_text = "\n".join([x.get("text", "") for x in tr]).strip()
606
+ except Exception:
607
+ transcript_text = ""
608
+
609
+ # transcript ์—†์œผ๋ฉด ๊ฒ€์ƒ‰ ์ปจํ…์ŠคํŠธ๋กœ ํด๋ฐฑ
610
+ if not transcript_text:
611
+ transcript_text = ddg_search(f"{yt} \"{question[:80]}\"", max_results=6)
612
+
613
+ if not transcript_text:
614
+ return ""
615
+
616
+ # LLM์—๊ฒŒ "์งˆ๋ฌธ + transcript" ์ œ๊ณต ํ›„ ์ •๋‹ต๋งŒ ์ถ”์ถœ
617
+ prompt = f"""
618
+ {EXTRACTOR_RULES}
619
+
620
+ Question:
621
+ {question}
622
+
623
+ Context:
624
+ {transcript_text}
625
+ """.strip()
626
+
627
+ resp = LLM.invoke([SystemMessage(content=EXTRACTOR_RULES), HumanMessage(content=prompt)])
628
+ return clean_final_answer(resp.content)
629
+
630
+
631
+ # =========================================================
632
+ # 6) ์ผ๋ฐ˜ ๊ฒ€์ƒ‰ ๊ธฐ๋ฐ˜(์‚ฌ์‹ค ์ถ”์ถœ) ์†”๋ฒ„
633
+ # =========================================================
634
+ def solve_with_search_and_llm(question: str) -> str:
635
+ """
636
+ GENERAL_SEARCH / WIKI_COUNT / WIKI_META ๋“ฑ์—์„œ ๊ณตํ†ต์œผ๋กœ ์‚ฌ์šฉํ•˜๋Š” ๋ฃจํŠธ:
637
+ 1) DDG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์ปจํ…์ŠคํŠธ๋กœ ๋งŒ๋“ ๋‹ค.
638
+ 2) LLM์€ ์ปจํ…์ŠคํŠธ์—์„œ ์ •๋‹ต๋งŒ ์ถ”์ถœํ•œ๋‹ค.
639
+ """
640
+ # ์ฟผ๋ฆฌ ๊ตฌ์„ฑ: GAIA๋Š” ์œ„ํ‚ค/๊ณต์‹๋ฌธ์„œ๊ฐ€ ๊ฐ•ํ•˜๋ฏ€๋กœ ๊ทธ๋Ÿฐ ํžŒํŠธ๋ฅผ ์„ž๋Š”๋‹ค.
641
+ queries = [
642
+ question,
643
+ f"{question} wikipedia",
644
+ f"{question} site:wikipedia.org",
645
+ ]
646
+
647
+ contexts = []
648
+ for q in queries:
649
+ ctx = ddg_search(q, max_results=6)
650
+ if ctx:
651
+ contexts.append(ctx)
652
+ time.sleep(0.2) # ๊ณผ๋„ํ•œ ์š”์ฒญ ๋ฐฉ์ง€
653
+
654
+ merged = "\n\n====\n\n".join(contexts).strip()
655
+ if not merged:
656
+ return ""
657
+
658
+ prompt = f"""
659
+ {EXTRACTOR_RULES}
660
+
661
+ Question:
662
+ {question}
663
+
664
+ Context:
665
+ {merged}
666
+ """.strip()
667
+
668
+ resp = LLM.invoke([SystemMessage(content=EXTRACTOR_RULES), HumanMessage(content=prompt)])
669
+ return clean_final_answer(resp.content)
670
+
671
+
672
+ # =========================================================
673
+ # 7) LangGraph ๋…ธ๋“œ ๊ตฌ์„ฑ
674
+ # =========================================================
675
+ def node_init(state: AgentState) -> AgentState:
676
+ # steps ์ดˆ๊ธฐํ™”
677
+ state["steps"] = int(state.get("steps", 0))
678
+ state["context"] = state.get("context", "")
679
+ state["answer"] = state.get("answer", "")
680
+ return state
681
+
682
+
683
+ def node_extract_urls(state: AgentState) -> AgentState:
684
+ state["urls"] = extract_urls(state["question"])
685
+ return state
686
+
687
+
688
+ def node_classify(state: AgentState) -> AgentState:
689
+ state["task_type"] = classify_task(state["question"])
690
+ return state
691
+
692
+
693
+ def node_solve(state: AgentState) -> AgentState:
694
+ """
695
+ ํƒ€์ž…๋ณ„๋กœ ๋ถ„๊ธฐํ•ด์„œ ํ•ด๊ฒฐ.
696
+ - ์—ฌ๊ธฐ์„œ answer๊ฐ€ ์ฑ„์›Œ์ง€๋ฉด END๋กœ ๊ฐ„๋‹ค.
697
+ """
698
+ q = state["question"]
699
+ urls = state.get("urls", [])
700
+ t = state.get("task_type", "GENERAL_SEARCH")
701
+
702
+ # ๋‚ด๋ถ€ ์•ˆ์ „์žฅ์น˜
703
+ state["steps"] = state.get("steps", 0) + 1
704
+ if state["steps"] > 12:
705
+ state["answer"] = state["answer"] or ""
706
+ return state
707
+
708
+ ans = ""
709
+
710
+ # 1) ์ฝ”๋“œ๋กœ ์ง์ ‘ ํ‘ธ๋Š” ๊ฒƒ๋ถ€ํ„ฐ ์šฐ์„  ์ฒ˜๋ฆฌ(์ •๋‹ต๋ฅ  ํฌ๊ฒŒ ์ƒ์Šน)
711
+ if t == "REVERSE_TEXT":
712
+ ans = solve_reverse_text(q)
713
+
714
+ elif t == "NON_COMMUTATIVE_TABLE":
715
+ ans = parse_operation_table_and_find_counterexample(q)
716
+
717
+ elif t == "BOTANY_VEGETABLES":
718
+ ans = solve_botany_vegetables(q)
719
+
720
+ # 2) URL ๊ธฐ๋ฐ˜ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ
721
+ elif t == "YOUTUBE":
722
+ ans = solve_youtube_question(q, urls)
723
+
724
+ elif t == "EXCEL_SUM":
725
+ # ์—‘์…€์€ URL์ด ์žˆ์„ ๋•Œ๋งŒ ๊ฐ€๋Šฅ
726
+ ans = solve_excel_sum_if_url(urls)
727
+ if not ans:
728
+ # URL์ด ์—†์œผ๋ฉด ๊ฒ€์ƒ‰ ๊ธฐ๋ฐ˜์œผ๋กœ๋ผ๋„ ์‹œ๋„(๊ฐ€๋” ํ‘œ๊ฐ€ ์›น์— ์žˆ์„ ์ˆ˜ ์žˆ์Œ)
729
+ ans = solve_with_search_and_llm(q)
730
+
731
+ elif t == "AUDIO_TRANSCRIBE":
732
+ # ์˜ค๋””์˜ค๋Š” URL์ด ์žˆ์œผ๋ฉด ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅํ•˜์ง€๋งŒ,
733
+ # ์—ฌ๊ธฐ์„œ๋Š” OpenAI audio transcription์„ ๋ณ„๋„ ๊ตฌํ˜„ํ•˜์ง€ ์•Š๋Š”๋‹ค.
734
+ # (GAIA ๊ณผ์ œ์—์„œ ์งˆ๋ฌธ์— ์‹ค์ œ mp3 URL์ด ์ œ๊ณต๋˜๋Š” ๊ฒฝ์šฐ๋งŒ ์˜๋ฏธ๊ฐ€ ์žˆ์Œ)
735
+ # โ†’ ํ˜„์‹ค์  ์„ฑ๋Šฅ: URL์ด ์—†์œผ๋ฉด ๋ถˆ๊ฐ€๋Šฅ / URL์ด ์žˆ์œผ๋ฉด ๊ฒ€์ƒ‰์œผ๋กœ ๊ฐ„์ ‘ ํ•ด๊ฒฐ ์‹œ๋„
736
+ ans = solve_with_search_and_llm(q)
737
+
738
+ elif t == "CHESS_IMAGE":
739
+ # ์ด๋ฏธ์ง€ URL์ด ์žˆ์œผ๋ฉด GPT-4o-mini ๋น„์ „์œผ๋กœ ์ฝ๊ณ  ๋‹ต์„ ๋ฝ‘๋Š” ๋ฃจํŠธ๊ฐ€ ๊ฐ€๋Šฅํ•˜์ง€๋งŒ,
740
+ # ์งˆ๋ฌธ ํ…์ŠคํŠธ์— ์ด๋ฏธ์ง€ URL์ด ์—†์œผ๋ฉด ๋ถˆ๊ฐ€๋Šฅ.
741
+ # ์—ฌ๊ธฐ์„œ๋Š” URL์ด ์žˆ์œผ๋ฉด "vision ์ปจํ…์ŠคํŠธ"๋กœ ๋ณด๋‚ด๋Š” ์ตœ์†Œ ๊ตฌํ˜„๋งŒ ํ•œ๋‹ค.
742
+ img_urls = [u for u in urls if re.search(r"\.(png|jpg|jpeg|webp)\b", u, flags=re.I)]
743
+ if img_urls:
744
+ # LangChain ChatOpenAI ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ: content๋ฅผ dict ๋ธ”๋ก์œผ๋กœ ์ „๋‹ฌ ๊ฐ€๋Šฅ
745
+ # (ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ์ œํ•œ๋  ์ˆ˜ ์žˆ์–ด try/except๋กœ ๋ณดํ˜ธ)
746
+ try:
747
+ msg = HumanMessage(
748
+ content=[
749
+ {"type": "text", "text": EXTRACTOR_RULES + "\n\n" + q},
750
+ {"type": "image_url", "image_url": {"url": img_urls[0]}},
751
+ ]
752
+ )
753
+ resp = LLM.invoke([msg])
754
+ ans = clean_final_answer(resp.content)
755
+ except Exception:
756
+ ans = solve_with_search_and_llm(q)
757
+ else:
758
+ ans = solve_with_search_and_llm(q)
759
+
760
+ elif t == "PYTHON_OUTPUT":
761
+ # ์งˆ๋ฌธ ๋ณธ๋ฌธ์— ์ฝ”๋“œ๋ธ”๋ก์ด ์žˆ์œผ๋ฉด ์ง์ ‘ ์‹คํ–‰
762
+ m = re.search(r"```python\s*(.*?)```", q, flags=re.S | re.I)
763
+ if not m:
764
+ m = re.search(r"```\s*(.*?)```", q, flags=re.S)
765
+ if m:
766
+ code = m.group(1).strip()
767
+ ans = safe_exec_python_and_capture_output(code)
768
+ if not ans:
769
+ # ์ฝ”๋“œ๊ฐ€ ์ฒจ๋ถ€ํŒŒ์ผ์ธ๋ฐ URL์ด ์—†์œผ๋ฉด ๋ถˆ๊ฐ€ โ†’ ๊ฒ€์ƒ‰์œผ๋กœ ํด๋ฐฑ
770
+ ans = solve_with_search_and_llm(q)
771
+
772
+ else:
773
+ # 3) ๋‚˜๋จธ์ง€(๋Œ€๋ถ€๋ถ„ ์‚ฌ์‹ค ์ถ”์ถœ)๋Š” ๊ฒ€์ƒ‰+LLM ์ถ”์ถœ๊ธฐ
774
+ ans = solve_with_search_and_llm(q)
775
+
776
+ state["answer"] = clean_final_answer(ans)
777
+ return state
778
+
779
+
780
+ def node_finalize(state: AgentState) -> AgentState:
781
+ """
782
+ ์ตœ์ข… ์ •๋‹ต์„ GAIA ์š”๊ตฌ(์ •๋‹ต๋งŒ) ํ˜•ํƒœ๋กœ ๊ฐ•์ œํ•œ๋‹ค.
783
+ """
784
+ state["answer"] = clean_final_answer(state.get("answer", ""))
785
+ return state
786
+
787
+
788
+ def should_end(state: AgentState) -> str:
789
+ """
790
+ answer๊ฐ€ ๋น„์–ด์žˆ์ง€ ์•Š์œผ๋ฉด ์ข…๋ฃŒ.
791
+ ๋น„์–ด์žˆ์œผ๋ฉด(์‹คํŒจ) ๊ทธ๋ž˜๋„ ์ข…๋ฃŒ(์“ฐ๋ ˆ๊ธฐ ๋‹ต์„ ๊ธธ๊ฒŒ ์ƒ์„ฑํ•˜๋Š” ๊ฒƒ๋ณด๋‹ค ๋‚ซ๋‹ค).
792
+ """
793
+ return END
794
+
795
+
796
+ def build_graph():
797
+ """
798
+ LangGraph StateGraph ๊ตฌ์„ฑ:
799
+ START -> init -> urls -> classify -> solve -> finalize -> END
800
+ """
801
+ g = StateGraph(AgentState)
802
+
803
+ g.add_node("init", node_init)
804
+ g.add_node("urls", node_extract_urls)
805
+ g.add_node("classify", node_classify)
806
+ g.add_node("solve", node_solve)
807
+ g.add_node("finalize", node_finalize)
808
+
809
+ g.add_edge(START, "init")
810
+ g.add_edge("init", "urls")
811
+ g.add_edge("urls", "classify")
812
+ g.add_edge("classify", "solve")
813
+ g.add_edge("solve", "finalize")
814
+ g.add_edge("finalize", END)
815
 
816
  return g.compile()
817
 
818
 
819
+ GRAPH = build_graph()
820
+
821
+
822
+ # =========================================================
823
+ # 8) Public API: BasicAgent
824
+ # - app.py๋Š” ์ด ํด๋ž˜์Šค๋ฅผ importํ•ด์„œ question_text๋งŒ ๋„˜๊ธด๋‹ค.
825
+ # =========================================================
826
  class BasicAgent:
827
  def __init__(self):
828
+ # ๊ทธ๋ž˜ํ”„๋Š” ๋ชจ๋“ˆ ๋กœ๋“œ ์‹œ ์ปดํŒŒ์ผ๋จ. ์—ฌ๊ธฐ์„œ๋Š” ์ƒํƒœ๋งŒ ์•Œ๋ฆผ.
829
+ print("โœ… GAIA Agent initialized (LangGraph StateGraph)")
830
 
831
  def __call__(self, question: str) -> str:
832
+ """
833
+ app.py์—์„œ question_text๋งŒ ๋ฐ›๋Š” ๊ณ ์ • ์ธํ„ฐํŽ˜์ด์Šค.
834
+ - recursion_limit์€ invoke config์—์„œ ์ œํ•œ (LangGraph ๋ฒ„์ „ ํ˜ธํ™˜)
835
+ """
836
+ state: AgentState = {
837
+ "question": question,
838
+ "task_type": "",
839
+ "urls": [],
840
+ "context": "",
841
+ "answer": "",
842
+ "steps": 0,
843
+ }
844
+
845
+ # LangGraph ๋ฌดํ•œ๋ฃจํ”„/์žฌ๊ท€ ๋ณดํ˜ธ
846
+ out = GRAPH.invoke(state, config={"recursion_limit": 12})
847
+ return clean_final_answer(out.get("answer", ""))
app.py CHANGED
@@ -74,7 +74,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
74
  print(f"Skipping item with missing task_id or question: {item}")
75
  continue
76
  try:
77
- submitted_answer = agent(question_text)
78
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
79
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
80
  except Exception as e:
 
74
  print(f"Skipping item with missing task_id or question: {item}")
75
  continue
76
  try:
77
+ submitted_answer = agent(question_text, task_id=task_id, api_url=api_url)
78
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
79
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
80
  except Exception as e:
requirements.txt CHANGED
@@ -1,8 +1,15 @@
1
  gradio
2
  requests
 
 
 
3
  langgraph
4
- langchain
5
- langchain-community
6
- langchain_openai
 
7
  duckduckgo-search
8
- ddgs
 
 
 
 
1
  gradio
2
  requests
3
+ pandas
4
+ openpyxl
5
+
6
  langgraph
7
+ langchain-openai
8
+ langchain-core
9
+
10
+ ddgs
11
  duckduckgo-search
12
+ youtube-transcript-api
13
+
14
+ pymupdf
15
+ python-chess