Agents_Final_Assignment_

Sleeping

App Files Files Community

ahnhs2k commited on Jan 12

Commit

bcc1b67

1 Parent(s): 1cdf0e9

commit

Browse files

Files changed (2) hide show

app.py +54 -136
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -5,173 +5,91 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from typing import Optional
 from langchain_openai import ChatOpenAI
-from langchain_core.messages import SystemMessage, HumanMessage
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# =========================================================
-# Answer cleaning (EXACT MATCH 최적화)
-# =========================================================
 def clean_answer(text: str) -> str:
     if not text:
         return ""
     s = text.strip()
-    # 흔한 접두/포맷 제거
-    s = re.sub(r"^(final\s*answer|answer)\s*:\s*", "", s, flags=re.IGNORECASE).strip()
-    # 코드블록/마크다운 제거
-    s = s.strip("`").strip()
-    # 여러 줄이면 첫 줄만
     s = s.splitlines()[0].strip()
-    # 양끝 따옴표 제거
-    s = s.strip("\"'")
-    # 끝에 . 하나 붙는 버릇 제거 (단, 약어/소수점은 건드리면 위험하니 매우 보수적으로)
-    if len(s) > 1 and s.endswith(".") and not re.search(r"\d\.$", s):
         s = s[:-1].strip()
-    # 불필요한 공백 정리
-    s = re.sub(r"\s+", " ", s).strip()
     return s
-# =========================================================
-# Robust request wrapper for GAIA server (429 대응)
-# =========================================================
-def get_with_backoff(url: str, timeout: int = 15, max_retries: int = 6) -> requests.Response:
-    for i in range(max_retries):
-        try:
-            r = requests.get(url, timeout=timeout)
-            if r.status_code == 429:
-                # 지수 백오프 + 지터
-                sleep_s = min(30, (2 ** i) + random.uniform(0, 1.5))
-                print(f"[WARN] 429 Too Many Requests. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
-                time.sleep(sleep_s)
-                continue
-            r.raise_for_status()
-            return r
-        except requests.exceptions.RequestException as e:
-            if i == max_retries - 1:
-                raise
-            sleep_s = min(20, (2 ** i) + random.uniform(0, 1.0))
-            print(f"[WARN] GET failed: {e}. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
-            time.sleep(sleep_s)
-    raise RuntimeError("get_with_backoff exhausted retries")
-def post_with_backoff(url: str, json_data: dict, timeout: int = 60, max_retries: int = 5) -> requests.Response:
-    for i in range(max_retries):
-        try:
-            r = requests.post(url, json=json_data, timeout=timeout)
-            if r.status_code == 429:
-                sleep_s = min(30, (2 ** i) + random.uniform(0, 1.5))
-                print(f"[WARN] 429 Too Many Requests (POST). Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
-                time.sleep(sleep_s)
-                continue
-            r.raise_for_status()
-            return r
-        except requests.exceptions.RequestException as e:
-            if i == max_retries - 1:
-                raise
-            sleep_s = min(20, (2 ** i) + random.uniform(0, 1.0))
-            print(f"[WARN] POST failed: {e}. Sleeping {sleep_s:.2f}s then retry {i+1}/{max_retries}...")
-            time.sleep(sleep_s)
-    raise RuntimeError("post_with_backoff exhausted retries")
-# =========================================================
-# LLM setup (OpenAI)
-# =========================================================
-# Space Secrets에 OPENAI_API_KEY 필요
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
-if OPENAI_API_KEY:
-    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
 llm = ChatOpenAI(
     model="gpt-4o-mini",
     temperature=0,
     max_tokens=96,
 )
-SYSTEM_PROMPT = """You are solving GAIA benchmark questions.
-Hard rules:
-- Think step by step internally, but DO NOT reveal your reasoning.
-- Verify arithmetic, units, dates, and entity names before finalizing.
-- Output ONLY the final answer (exactly what should be matched).
-- No explanation. No prefixes. No punctuation unless required by the answer itself.
-- If the answer is a number/date/name, output it in the simplest canonical form.
-"""
-def build_user_prompt(question: str) -> str:
-    return f"""Question:
-{question}
-Return ONLY the final answer.
-"""
-# =========================================================
-# 2-pass solve: (1) answer (2) self-check and possibly revise
-# =========================================================
-def solve_with_selfcheck(question: str) -> str:
-    # Pass 1: initial solve
-    msg1 = [
-        SystemMessage(content=SYSTEM_PROMPT),
-        HumanMessage(content=build_user_prompt(question)),
-    ]
-    r1 = llm.invoke(msg1)
-    a1 = clean_answer(getattr(r1, "content", "") or "")
-    # Pass 2: self-check (짧게 검증만)
-    # - GAIA는 "정답만"을 요구하므로, 검증도 출력은 정답만 하게 강제
-    check_prompt = f"""You previously answered: {a1}
-Now do a silent verification. If the answer is wrong or not in canonical exact-match form, output the corrected final answer.
-If it is correct, output exactly the same answer again.
 Question:
 {question}
-Return ONLY the final answer.
-"""
-    msg2 = [
-        SystemMessage(content=SYSTEM_PROMPT),
-        HumanMessage(content=check_prompt),
-    ]
-    r2 = llm.invoke(msg2)
-    a2 = clean_answer(getattr(r2, "content", "") or "")
-    # 둘 다 비었으면 실패 처리
-    if not a2 and a1:
-        return a1
-    return a2
-# =========================================================
-# Basic Agent Definition (템플릿 유지, 여기만 “진짜”로 바꿈)
-# =========================================================
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized (LLM + self-check).")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        try:
-            answer = solve_with_selfcheck(question)
-        except Exception as e:
-            # LLM 에러가 나면 빈 답 내면 0점이니, 최소한 에러를 로깅하고 빈 문자열 반환
-            # (여기서 다른 fallback 넣고 싶으면 넣을 수 있음)
-            print(f"[ERROR] LLM call failed: {e}")
-            answer = ""
-        print(f"Agent returning answer: {answer}")
         return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import requests
 import inspect
 import pandas as pd
+from typing import TypedDict
 from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage
+from langchain_community.tools import DuckDuckGoSearchRun
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+SYSTEM_PROMPT = """
+You are solving GAIA benchmark questions.
+You MUST:
+- Use the provided search results as the source of truth.
+- Reason internally but DO NOT show reasoning.
+- Output ONLY the final answer.
+- No explanation.
+- No extra text.
+"""
 def clean_answer(text: str) -> str:
     if not text:
         return ""
     s = text.strip()
+    s = s.replace("Final answer:", "").replace("Answer:", "").strip()
     s = s.splitlines()[0].strip()
+    s = s.strip('"\'`')
+    if len(s) > 1 and s.endswith("."):
         s = s[:-1].strip()
     return s
+# -------------------------------
+# State
+# -------------------------------
+class AgentState(TypedDict):
+    question: str
+    answer: str
+# -------------------------------
+# Tools & LLM
+# -------------------------------
+# Search tool (무료)
+search_tool = DuckDuckGoSearchRun()
+# LLM (OpenAI – 이미 네 환경에서 동작 확인됨)
 llm = ChatOpenAI(
     model="gpt-4o-mini",
     temperature=0,
     max_tokens=96,
 )
+# -------------------------------
+# Agent
+# -------------------------------
+class BasicAgent:
+    def __init__(self):
+        print("Search-based GAIA Agent initialized.")
+    def __call__(self, question: str) -> str:
+        print(f"Question: {question[:80]}...")
+        # 1) Search
+        try:
+            search_result = search_tool.run(question)
+        except Exception as e:
+            print("Search error:", e)
+            search_result = ""
+        # 2) Prompt with evidence
+        prompt = f"""
+{SYSTEM_PROMPT}
 Question:
 {question}
+Search Results:
+{search_result}
+""".strip()
+        # 3) LLM Answer
+        response = llm.invoke([HumanMessage(content=prompt)])
+        answer = clean_answer(response.content)
+        print(f"Answer: {answer}")
         return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):

requirements.txt CHANGED Viewed

@@ -2,4 +2,6 @@ gradio
 requests
 langgraph
 langchain_openai
-langchain_core

 requests
 langgraph
 langchain_openai
+langchain_core
+langchain-community
+duckduckgo-search