Spaces:
Sleeping
Sleeping
commit
Browse files- app.py +54 -136
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -5,173 +5,91 @@ import gradio as gr
|
|
| 5 |
import requests
|
| 6 |
import inspect
|
| 7 |
import pandas as pd
|
| 8 |
-
from typing import
|
| 9 |
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
-
from langchain_core.messages import
|
|
|
|
| 12 |
|
| 13 |
# (Keep Constants as is)
|
| 14 |
# --- Constants ---
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def clean_answer(text: str) -> str:
|
| 21 |
if not text:
|
| 22 |
return ""
|
| 23 |
-
|
| 24 |
s = text.strip()
|
| 25 |
-
|
| 26 |
-
# νν μ λ/ν¬λ§· μ κ±°
|
| 27 |
-
s = re.sub(r"^(final\s*answer|answer)\s*:\s*", "", s, flags=re.IGNORECASE).strip()
|
| 28 |
-
|
| 29 |
-
# μ½λλΈλ‘/λ§ν¬λ€μ΄ μ κ±°
|
| 30 |
-
s = s.strip("`").strip()
|
| 31 |
-
|
| 32 |
-
# μ¬λ¬ μ€μ΄λ©΄ 첫 μ€λ§
|
| 33 |
s = s.splitlines()[0].strip()
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
s = s.strip("\"'")
|
| 37 |
-
|
| 38 |
-
# λμ . νλ λΆλ λ²λ¦ μ κ±° (λ¨, μ½μ΄/μμμ μ 건λ리면 μννλ λ§€μ° λ³΄μμ μΌλ‘)
|
| 39 |
-
if len(s) > 1 and s.endswith(".") and not re.search(r"\d\.$", s):
|
| 40 |
s = s[:-1].strip()
|
| 41 |
-
|
| 42 |
-
# λΆνμν 곡백 μ 리
|
| 43 |
-
s = re.sub(r"\s+", " ", s).strip()
|
| 44 |
-
|
| 45 |
return s
|
| 46 |
|
| 47 |
-
#
|
| 48 |
-
#
|
| 49 |
-
#
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
continue
|
| 60 |
-
r.raise_for_status()
|
| 61 |
-
return r
|
| 62 |
-
except requests.exceptions.RequestException as e:
|
| 63 |
-
if i == max_retries - 1:
|
| 64 |
-
raise
|
| 65 |
-
sleep_s = min(20, (2 ** i) + random.uniform(0, 1.0))
|
| 66 |
-
print(f"[WARN] GET failed: {e}. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
|
| 67 |
-
time.sleep(sleep_s)
|
| 68 |
-
raise RuntimeError("get_with_backoff exhausted retries")
|
| 69 |
-
|
| 70 |
-
def post_with_backoff(url: str, json_data: dict, timeout: int = 60, max_retries: int = 5) -> requests.Response:
|
| 71 |
-
for i in range(max_retries):
|
| 72 |
-
try:
|
| 73 |
-
r = requests.post(url, json=json_data, timeout=timeout)
|
| 74 |
-
if r.status_code == 429:
|
| 75 |
-
sleep_s = min(30, (2 ** i) + random.uniform(0, 1.5))
|
| 76 |
-
print(f"[WARN] 429 Too Many Requests (POST). Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
|
| 77 |
-
time.sleep(sleep_s)
|
| 78 |
-
continue
|
| 79 |
-
r.raise_for_status()
|
| 80 |
-
return r
|
| 81 |
-
except requests.exceptions.RequestException as e:
|
| 82 |
-
if i == max_retries - 1:
|
| 83 |
-
raise
|
| 84 |
-
sleep_s = min(20, (2 ** i) + random.uniform(0, 1.0))
|
| 85 |
-
print(f"[WARN] POST failed: {e}. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
|
| 86 |
-
time.sleep(sleep_s)
|
| 87 |
-
raise RuntimeError("post_with_backoff exhausted retries")
|
| 88 |
-
|
| 89 |
-
# =========================================================
|
| 90 |
-
# LLM setup (OpenAI)
|
| 91 |
-
# =========================================================
|
| 92 |
-
# Space Secretsμ OPENAI_API_KEY νμ
|
| 93 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
| 94 |
-
if OPENAI_API_KEY:
|
| 95 |
-
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 96 |
|
|
|
|
| 97 |
llm = ChatOpenAI(
|
| 98 |
model="gpt-4o-mini",
|
| 99 |
temperature=0,
|
| 100 |
max_tokens=96,
|
| 101 |
)
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
-
|
| 109 |
-
- No explanation. No prefixes. No punctuation unless required by the answer itself.
|
| 110 |
-
- If the answer is a number/date/name, output it in the simplest canonical form.
|
| 111 |
-
"""
|
| 112 |
|
| 113 |
-
def
|
| 114 |
-
|
| 115 |
-
{question}
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
#
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
def solve_with_selfcheck(question: str) -> str:
|
| 124 |
-
# Pass 1: initial solve
|
| 125 |
-
msg1 = [
|
| 126 |
-
SystemMessage(content=SYSTEM_PROMPT),
|
| 127 |
-
HumanMessage(content=build_user_prompt(question)),
|
| 128 |
-
]
|
| 129 |
-
r1 = llm.invoke(msg1)
|
| 130 |
-
a1 = clean_answer(getattr(r1, "content", "") or "")
|
| 131 |
-
|
| 132 |
-
# Pass 2: self-check (μ§§κ² κ²μ¦λ§)
|
| 133 |
-
# - GAIAλ "μ λ΅λ§"μ μꡬνλ―λ‘, κ²μ¦λ μΆλ ₯μ μ λ΅λ§ νκ² κ°μ
|
| 134 |
-
check_prompt = f"""You previously answered: {a1}
|
| 135 |
-
|
| 136 |
-
Now do a silent verification. If the answer is wrong or not in canonical exact-match form, output the corrected final answer.
|
| 137 |
-
If it is correct, output exactly the same answer again.
|
| 138 |
|
| 139 |
Question:
|
| 140 |
{question}
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
SystemMessage(content=SYSTEM_PROMPT),
|
| 146 |
-
HumanMessage(content=check_prompt),
|
| 147 |
-
]
|
| 148 |
-
r2 = llm.invoke(msg2)
|
| 149 |
-
a2 = clean_answer(getattr(r2, "content", "") or "")
|
| 150 |
-
|
| 151 |
-
# λ λ€ λΉμμΌλ©΄ μ€ν¨ μ²λ¦¬
|
| 152 |
-
if not a2 and a1:
|
| 153 |
-
return a1
|
| 154 |
-
return a2
|
| 155 |
-
|
| 156 |
-
# =========================================================
|
| 157 |
-
# Basic Agent Definition (ν
νλ¦Ώ μ μ§, μ¬κΈ°λ§ βμ§μ§βλ‘ λ°κΏ)
|
| 158 |
-
# =========================================================
|
| 159 |
-
class BasicAgent:
|
| 160 |
-
def __init__(self):
|
| 161 |
-
print("BasicAgent initialized (LLM + self-check).")
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
try:
|
| 167 |
-
answer = solve_with_selfcheck(question)
|
| 168 |
-
except Exception as e:
|
| 169 |
-
# LLM μλ¬κ° λλ©΄ λΉ λ΅ λ΄λ©΄ 0μ μ΄λ, μ΅μν μλ¬λ₯Ό λ‘κΉ
νκ³ λΉ λ¬Έμμ΄ λ°ν
|
| 170 |
-
# (μ¬κΈ°μ λ€λ₯Έ fallback λ£κ³ μΆμΌλ©΄ λ£μ μ μμ)
|
| 171 |
-
print(f"[ERROR] LLM call failed: {e}")
|
| 172 |
-
answer = ""
|
| 173 |
|
| 174 |
-
print(f"
|
| 175 |
return answer
|
| 176 |
|
| 177 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
|
|
| 5 |
import requests
|
| 6 |
import inspect
|
| 7 |
import pandas as pd
|
| 8 |
+
from typing import TypedDict
|
| 9 |
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
+
from langchain_core.messages import HumanMessage
|
| 12 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
| 13 |
|
| 14 |
# (Keep Constants as is)
|
| 15 |
# --- Constants ---
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 17 |
|
| 18 |
+
SYSTEM_PROMPT = """
|
| 19 |
+
You are solving GAIA benchmark questions.
|
| 20 |
+
|
| 21 |
+
You MUST:
|
| 22 |
+
- Use the provided search results as the source of truth.
|
| 23 |
+
- Reason internally but DO NOT show reasoning.
|
| 24 |
+
- Output ONLY the final answer.
|
| 25 |
+
- No explanation.
|
| 26 |
+
- No extra text.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
def clean_answer(text: str) -> str:
|
| 30 |
if not text:
|
| 31 |
return ""
|
|
|
|
| 32 |
s = text.strip()
|
| 33 |
+
s = s.replace("Final answer:", "").replace("Answer:", "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
s = s.splitlines()[0].strip()
|
| 35 |
+
s = s.strip('"\'`')
|
| 36 |
+
if len(s) > 1 and s.endswith("."):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
s = s[:-1].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
return s
|
| 39 |
|
| 40 |
+
# -------------------------------
|
| 41 |
+
# State
|
| 42 |
+
# -------------------------------
|
| 43 |
+
class AgentState(TypedDict):
|
| 44 |
+
question: str
|
| 45 |
+
answer: str
|
| 46 |
+
|
| 47 |
+
# -------------------------------
|
| 48 |
+
# Tools & LLM
|
| 49 |
+
# -------------------------------
|
| 50 |
+
# Search tool (무λ£)
|
| 51 |
+
search_tool = DuckDuckGoSearchRun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
# LLM (OpenAI β μ΄λ―Έ λ€ νκ²½μμ λμ νμΈλ¨)
|
| 54 |
llm = ChatOpenAI(
|
| 55 |
model="gpt-4o-mini",
|
| 56 |
temperature=0,
|
| 57 |
max_tokens=96,
|
| 58 |
)
|
| 59 |
|
| 60 |
+
# -------------------------------
|
| 61 |
+
# Agent
|
| 62 |
+
# -------------------------------
|
| 63 |
+
class BasicAgent:
|
| 64 |
+
def __init__(self):
|
| 65 |
+
print("Search-based GAIA Agent initialized.")
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
def __call__(self, question: str) -> str:
|
| 68 |
+
print(f"Question: {question[:80]}...")
|
|
|
|
| 69 |
|
| 70 |
+
# 1) Search
|
| 71 |
+
try:
|
| 72 |
+
search_result = search_tool.run(question)
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print("Search error:", e)
|
| 75 |
+
search_result = ""
|
| 76 |
|
| 77 |
+
# 2) Prompt with evidence
|
| 78 |
+
prompt = f"""
|
| 79 |
+
{SYSTEM_PROMPT}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
Question:
|
| 82 |
{question}
|
| 83 |
|
| 84 |
+
Search Results:
|
| 85 |
+
{search_result}
|
| 86 |
+
""".strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# 3) LLM Answer
|
| 89 |
+
response = llm.invoke([HumanMessage(content=prompt)])
|
| 90 |
+
answer = clean_answer(response.content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
print(f"Answer: {answer}")
|
| 93 |
return answer
|
| 94 |
|
| 95 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
requirements.txt
CHANGED
|
@@ -2,4 +2,6 @@ gradio
|
|
| 2 |
requests
|
| 3 |
langgraph
|
| 4 |
langchain_openai
|
| 5 |
-
langchain_core
|
|
|
|
|
|
|
|
|
| 2 |
requests
|
| 3 |
langgraph
|
| 4 |
langchain_openai
|
| 5 |
+
langchain_core
|
| 6 |
+
langchain-community
|
| 7 |
+
duckduckgo-search
|