ahnhs2k commited on
Commit
bcc1b67
Β·
1 Parent(s): 1cdf0e9
Files changed (2) hide show
  1. app.py +54 -136
  2. requirements.txt +3 -1
app.py CHANGED
@@ -5,173 +5,91 @@ import gradio as gr
5
  import requests
6
  import inspect
7
  import pandas as pd
8
- from typing import Optional
9
 
10
  from langchain_openai import ChatOpenAI
11
- from langchain_core.messages import SystemMessage, HumanMessage
 
12
 
13
  # (Keep Constants as is)
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
- # =========================================================
18
- # Answer cleaning (EXACT MATCH μ΅œμ ν™”)
19
- # =========================================================
 
 
 
 
 
 
 
 
20
  def clean_answer(text: str) -> str:
21
  if not text:
22
  return ""
23
-
24
  s = text.strip()
25
-
26
- # ν”ν•œ 접두/포맷 제거
27
- s = re.sub(r"^(final\s*answer|answer)\s*:\s*", "", s, flags=re.IGNORECASE).strip()
28
-
29
- # μ½”λ“œλΈ”λ‘/λ§ˆν¬λ‹€μš΄ 제거
30
- s = s.strip("`").strip()
31
-
32
- # μ—¬λŸ¬ 쀄이면 첫 μ€„λ§Œ
33
  s = s.splitlines()[0].strip()
34
-
35
- # 양끝 λ”°μ˜΄ν‘œ 제거
36
- s = s.strip("\"'")
37
-
38
- # 끝에 . ν•˜λ‚˜ λΆ™λŠ” 버릇 제거 (단, μ•½μ–΄/μ†Œμˆ˜μ μ€ κ±΄λ“œλ¦¬λ©΄ μœ„ν—˜ν•˜λ‹ˆ 맀우 보수적으둜)
39
- if len(s) > 1 and s.endswith(".") and not re.search(r"\d\.$", s):
40
  s = s[:-1].strip()
41
-
42
- # λΆˆν•„μš”ν•œ 곡백 정리
43
- s = re.sub(r"\s+", " ", s).strip()
44
-
45
  return s
46
 
47
- # =========================================================
48
- # Robust request wrapper for GAIA server (429 λŒ€μ‘)
49
- # =========================================================
50
- def get_with_backoff(url: str, timeout: int = 15, max_retries: int = 6) -> requests.Response:
51
- for i in range(max_retries):
52
- try:
53
- r = requests.get(url, timeout=timeout)
54
- if r.status_code == 429:
55
- # μ§€μˆ˜ λ°±μ˜€ν”„ + μ§€ν„°
56
- sleep_s = min(30, (2 ** i) + random.uniform(0, 1.5))
57
- print(f"[WARN] 429 Too Many Requests. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
58
- time.sleep(sleep_s)
59
- continue
60
- r.raise_for_status()
61
- return r
62
- except requests.exceptions.RequestException as e:
63
- if i == max_retries - 1:
64
- raise
65
- sleep_s = min(20, (2 ** i) + random.uniform(0, 1.0))
66
- print(f"[WARN] GET failed: {e}. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
67
- time.sleep(sleep_s)
68
- raise RuntimeError("get_with_backoff exhausted retries")
69
-
70
- def post_with_backoff(url: str, json_data: dict, timeout: int = 60, max_retries: int = 5) -> requests.Response:
71
- for i in range(max_retries):
72
- try:
73
- r = requests.post(url, json=json_data, timeout=timeout)
74
- if r.status_code == 429:
75
- sleep_s = min(30, (2 ** i) + random.uniform(0, 1.5))
76
- print(f"[WARN] 429 Too Many Requests (POST). Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
77
- time.sleep(sleep_s)
78
- continue
79
- r.raise_for_status()
80
- return r
81
- except requests.exceptions.RequestException as e:
82
- if i == max_retries - 1:
83
- raise
84
- sleep_s = min(20, (2 ** i) + random.uniform(0, 1.0))
85
- print(f"[WARN] POST failed: {e}. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
86
- time.sleep(sleep_s)
87
- raise RuntimeError("post_with_backoff exhausted retries")
88
-
89
- # =========================================================
90
- # LLM setup (OpenAI)
91
- # =========================================================
92
- # Space Secrets에 OPENAI_API_KEY ν•„μš”
93
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
94
- if OPENAI_API_KEY:
95
- os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
96
 
 
97
  llm = ChatOpenAI(
98
  model="gpt-4o-mini",
99
  temperature=0,
100
  max_tokens=96,
101
  )
102
 
103
- SYSTEM_PROMPT = """You are solving GAIA benchmark questions.
104
-
105
- Hard rules:
106
- - Think step by step internally, but DO NOT reveal your reasoning.
107
- - Verify arithmetic, units, dates, and entity names before finalizing.
108
- - Output ONLY the final answer (exactly what should be matched).
109
- - No explanation. No prefixes. No punctuation unless required by the answer itself.
110
- - If the answer is a number/date/name, output it in the simplest canonical form.
111
- """
112
 
113
- def build_user_prompt(question: str) -> str:
114
- return f"""Question:
115
- {question}
116
 
117
- Return ONLY the final answer.
118
- """
 
 
 
 
119
 
120
- # =========================================================
121
- # 2-pass solve: (1) answer (2) self-check and possibly revise
122
- # =========================================================
123
- def solve_with_selfcheck(question: str) -> str:
124
- # Pass 1: initial solve
125
- msg1 = [
126
- SystemMessage(content=SYSTEM_PROMPT),
127
- HumanMessage(content=build_user_prompt(question)),
128
- ]
129
- r1 = llm.invoke(msg1)
130
- a1 = clean_answer(getattr(r1, "content", "") or "")
131
-
132
- # Pass 2: self-check (짧게 κ²€μ¦λ§Œ)
133
- # - GAIAλŠ” "μ •λ‹΅λ§Œ"을 μš”κ΅¬ν•˜λ―€λ‘œ, 검증도 좜λ ₯은 μ •λ‹΅λ§Œ ν•˜κ²Œ κ°•μ œ
134
- check_prompt = f"""You previously answered: {a1}
135
-
136
- Now do a silent verification. If the answer is wrong or not in canonical exact-match form, output the corrected final answer.
137
- If it is correct, output exactly the same answer again.
138
 
139
  Question:
140
  {question}
141
 
142
- Return ONLY the final answer.
143
- """
144
- msg2 = [
145
- SystemMessage(content=SYSTEM_PROMPT),
146
- HumanMessage(content=check_prompt),
147
- ]
148
- r2 = llm.invoke(msg2)
149
- a2 = clean_answer(getattr(r2, "content", "") or "")
150
-
151
- # λ‘˜ λ‹€ λΉ„μ—ˆμœΌλ©΄ μ‹€νŒ¨ 처리
152
- if not a2 and a1:
153
- return a1
154
- return a2
155
-
156
- # =========================================================
157
- # Basic Agent Definition (ν…œν”Œλ¦Ώ μœ μ§€, μ—¬κΈ°λ§Œ β€œμ§„μ§œβ€λ‘œ λ°”κΏˆ)
158
- # =========================================================
159
- class BasicAgent:
160
- def __init__(self):
161
- print("BasicAgent initialized (LLM + self-check).")
162
 
163
- def __call__(self, question: str) -> str:
164
- print(f"Agent received question (first 50 chars): {question[:50]}...")
165
-
166
- try:
167
- answer = solve_with_selfcheck(question)
168
- except Exception as e:
169
- # LLM μ—λŸ¬κ°€ λ‚˜λ©΄ 빈 λ‹΅ λ‚΄λ©΄ 0μ μ΄λ‹ˆ, μ΅œμ†Œν•œ μ—λŸ¬λ₯Ό λ‘œκΉ…ν•˜κ³  빈 λ¬Έμžμ—΄ λ°˜ν™˜
170
- # (μ—¬κΈ°μ„œ λ‹€λ₯Έ fallback λ„£κ³  μ‹ΆμœΌλ©΄ 넣을 수 있음)
171
- print(f"[ERROR] LLM call failed: {e}")
172
- answer = ""
173
 
174
- print(f"Agent returning answer: {answer}")
175
  return answer
176
 
177
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
5
  import requests
6
  import inspect
7
  import pandas as pd
8
+ from typing import TypedDict
9
 
10
  from langchain_openai import ChatOpenAI
11
+ from langchain_core.messages import HumanMessage
12
+ from langchain_community.tools import DuckDuckGoSearchRun
13
 
14
  # (Keep Constants as is)
15
  # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
+ SYSTEM_PROMPT = """
19
+ You are solving GAIA benchmark questions.
20
+
21
+ You MUST:
22
+ - Use the provided search results as the source of truth.
23
+ - Reason internally but DO NOT show reasoning.
24
+ - Output ONLY the final answer.
25
+ - No explanation.
26
+ - No extra text.
27
+ """
28
+
29
  def clean_answer(text: str) -> str:
30
  if not text:
31
  return ""
 
32
  s = text.strip()
33
+ s = s.replace("Final answer:", "").replace("Answer:", "").strip()
 
 
 
 
 
 
 
34
  s = s.splitlines()[0].strip()
35
+ s = s.strip('"\'`')
36
+ if len(s) > 1 and s.endswith("."):
 
 
 
 
37
  s = s[:-1].strip()
 
 
 
 
38
  return s
39
 
40
+ # -------------------------------
41
+ # State
42
+ # -------------------------------
43
+ class AgentState(TypedDict):
44
+ question: str
45
+ answer: str
46
+
47
+ # -------------------------------
48
+ # Tools & LLM
49
+ # -------------------------------
50
+ # Search tool (무료)
51
+ search_tool = DuckDuckGoSearchRun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ # LLM (OpenAI – 이미 λ„€ ν™˜κ²½μ—μ„œ λ™μž‘ 확인됨)
54
  llm = ChatOpenAI(
55
  model="gpt-4o-mini",
56
  temperature=0,
57
  max_tokens=96,
58
  )
59
 
60
+ # -------------------------------
61
+ # Agent
62
+ # -------------------------------
63
+ class BasicAgent:
64
+ def __init__(self):
65
+ print("Search-based GAIA Agent initialized.")
 
 
 
66
 
67
+ def __call__(self, question: str) -> str:
68
+ print(f"Question: {question[:80]}...")
 
69
 
70
+ # 1) Search
71
+ try:
72
+ search_result = search_tool.run(question)
73
+ except Exception as e:
74
+ print("Search error:", e)
75
+ search_result = ""
76
 
77
+ # 2) Prompt with evidence
78
+ prompt = f"""
79
+ {SYSTEM_PROMPT}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  Question:
82
  {question}
83
 
84
+ Search Results:
85
+ {search_result}
86
+ """.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ # 3) LLM Answer
89
+ response = llm.invoke([HumanMessage(content=prompt)])
90
+ answer = clean_answer(response.content)
 
 
 
 
 
 
 
91
 
92
+ print(f"Answer: {answer}")
93
  return answer
94
 
95
  def run_and_submit_all( profile: gr.OAuthProfile | None):
requirements.txt CHANGED
@@ -2,4 +2,6 @@ gradio
2
  requests
3
  langgraph
4
  langchain_openai
5
- langchain_core
 
 
 
2
  requests
3
  langgraph
4
  langchain_openai
5
+ langchain_core
6
+ langchain-community
7
+ duckduckgo-search