Spaces:
Sleeping
Sleeping
commit
Browse files
app.py
CHANGED
|
@@ -9,7 +9,6 @@ from typing import TypedDict
|
|
| 9 |
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
from langchain_core.messages import HumanMessage
|
| 12 |
-
from langchain_community.tools import DuckDuckGoSearchRun
|
| 13 |
|
| 14 |
# (Keep Constants as is)
|
| 15 |
# --- Constants ---
|
|
@@ -26,13 +25,6 @@ You MUST:
|
|
| 26 |
- No extra text.
|
| 27 |
"""
|
| 28 |
|
| 29 |
-
RULES = {
|
| 30 |
-
"number": "Answer with ONLY the number. No words.",
|
| 31 |
-
"date": "Answer with ONLY the year or date. No words.",
|
| 32 |
-
"yesno": "Answer with ONLY Yes or No.",
|
| 33 |
-
"fact": "Answer with ONLY the final answer."
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
def clean_answer(text: str) -> str:
|
| 37 |
if not text:
|
| 38 |
return ""
|
|
@@ -42,22 +34,8 @@ def clean_answer(text: str) -> str:
|
|
| 42 |
s = s.strip('"\'`')
|
| 43 |
if len(s) > 1 and s.endswith("."):
|
| 44 |
s = s[:-1].strip()
|
| 45 |
-
|
| 46 |
-
if s.lower() in ["yes", "no"]:
|
| 47 |
-
s = s.capitalize()
|
| 48 |
-
|
| 49 |
return s
|
| 50 |
|
| 51 |
-
def classify_question(q: str) -> str:
|
| 52 |
-
ql = q.lower().strip()
|
| 53 |
-
if any(k in ql for k in ["how many", "how much", "number of", "population"]):
|
| 54 |
-
return "number"
|
| 55 |
-
if any(k in ql for k in ["what year", "when", "date"]):
|
| 56 |
-
return "date"
|
| 57 |
-
if ql.startswith(("is ", "are ", "was ", "were ", "did ", "does ")):
|
| 58 |
-
return "yesno"
|
| 59 |
-
return "fact"
|
| 60 |
-
|
| 61 |
# -------------------------------
|
| 62 |
# State
|
| 63 |
# -------------------------------
|
|
@@ -68,8 +46,6 @@ class AgentState(TypedDict):
|
|
| 68 |
# -------------------------------
|
| 69 |
# Tools & LLM
|
| 70 |
# -------------------------------
|
| 71 |
-
# Search tool (๋ฌด๋ฃ)
|
| 72 |
-
search_tool = DuckDuckGoSearchRun()
|
| 73 |
|
| 74 |
# LLM (OpenAI โ ์ด๋ฏธ ๋ค ํ๊ฒฝ์์ ๋์ ํ์ธ๋จ)
|
| 75 |
llm = ChatOpenAI(
|
|
@@ -83,56 +59,32 @@ llm = ChatOpenAI(
|
|
| 83 |
# -------------------------------
|
| 84 |
class BasicAgent:
|
| 85 |
def __init__(self):
|
| 86 |
-
print("
|
| 87 |
|
| 88 |
def __call__(self, question: str) -> str:
|
| 89 |
print(f"Question: {question[:80]}...")
|
| 90 |
|
| 91 |
-
qtype = classify_question(question)
|
| 92 |
-
|
| 93 |
-
# ๐น 1๋จ๊ณ: ๊ฒ์ ์์ด ๋จผ์ ๋ต ์๋
|
| 94 |
-
base_prompt = f"""
|
| 95 |
-
You are solving a GAIA benchmark question.
|
| 96 |
-
|
| 97 |
-
Rules:
|
| 98 |
-
- Answer with ONLY the final answer
|
| 99 |
-
- No explanation
|
| 100 |
-
- No extra text
|
| 101 |
-
|
| 102 |
-
Question:
|
| 103 |
-
{question}
|
| 104 |
-
""".strip()
|
| 105 |
-
|
| 106 |
-
base_response = llm.invoke([HumanMessage(content=base_prompt)])
|
| 107 |
-
base_answer = clean_answer(base_response.content)
|
| 108 |
-
|
| 109 |
-
# ๐น ์ซ์/์ฐ๋/YesNo๊ฐ ๋ช
ํํ๋ฉด ๋ฐ๋ก ๋ฐํ
|
| 110 |
-
if qtype in ["number", "date", "yesno"] and base_answer:
|
| 111 |
-
print(f"Answer (no search): {base_answer}")
|
| 112 |
-
return base_answer
|
| 113 |
-
|
| 114 |
-
# ๐น 2๋จ๊ณ: ๊ทธ๋๋ ์ ๋งคํ๋ฉด ๊ฒ์ ์ฌ์ฉ
|
| 115 |
-
try:
|
| 116 |
-
search_result = search_tool.run(question)
|
| 117 |
-
except Exception:
|
| 118 |
-
search_result = ""
|
| 119 |
-
|
| 120 |
prompt = f"""
|
| 121 |
-
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
|
| 130 |
response = llm.invoke([HumanMessage(content=prompt)])
|
| 131 |
answer = clean_answer(response.content)
|
| 132 |
|
| 133 |
-
print(f"Answer
|
| 134 |
return answer
|
| 135 |
-
|
| 136 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 137 |
"""
|
| 138 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
|
|
| 9 |
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
from langchain_core.messages import HumanMessage
|
|
|
|
| 12 |
|
| 13 |
# (Keep Constants as is)
|
| 14 |
# --- Constants ---
|
|
|
|
| 25 |
- No extra text.
|
| 26 |
"""
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def clean_answer(text: str) -> str:
|
| 29 |
if not text:
|
| 30 |
return ""
|
|
|
|
| 34 |
s = s.strip('"\'`')
|
| 35 |
if len(s) > 1 and s.endswith("."):
|
| 36 |
s = s[:-1].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
return s
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# -------------------------------
|
| 40 |
# State
|
| 41 |
# -------------------------------
|
|
|
|
| 46 |
# -------------------------------
|
| 47 |
# Tools & LLM
|
| 48 |
# -------------------------------
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# LLM (OpenAI โ ์ด๋ฏธ ๋ค ํ๊ฒฝ์์ ๋์ ํ์ธ๋จ)
|
| 51 |
llm = ChatOpenAI(
|
|
|
|
| 59 |
# -------------------------------
|
| 60 |
class BasicAgent:
|
| 61 |
def __init__(self):
|
| 62 |
+
print("GAIA LLM-only Agent initialized.")
|
| 63 |
|
| 64 |
def __call__(self, question: str) -> str:
|
| 65 |
print(f"Question: {question[:80]}...")
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
prompt = f"""
|
| 68 |
+
You are solving a GAIA benchmark question.
|
| 69 |
|
| 70 |
+
Rules:
|
| 71 |
+
- Answer with ONLY the final answer.
|
| 72 |
+
- No explanation.
|
| 73 |
+
- No formatting.
|
| 74 |
+
- No extra text.
|
| 75 |
+
- If the answer is a number, output ONLY the number.
|
| 76 |
+
- If the answer is Yes/No, output ONLY Yes or No.
|
| 77 |
|
| 78 |
+
Question:
|
| 79 |
+
{question}
|
| 80 |
+
""".strip()
|
| 81 |
|
| 82 |
response = llm.invoke([HumanMessage(content=prompt)])
|
| 83 |
answer = clean_answer(response.content)
|
| 84 |
|
| 85 |
+
print(f"Answer: {answer}")
|
| 86 |
return answer
|
| 87 |
+
|
| 88 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 89 |
"""
|
| 90 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|