Final_Assignment_Template

Sleeping

App Files Files Community

Nigou Julien commited on 30 days ago

Commit

b22ac70

1 Parent(s): 9c8d6f1

Use LiteLLM for agent model calls

Browse files

Files changed (15) hide show

.env.example +14 -2
README.md +27 -1
app.py +14 -1
gaia_agent/agent.py +17 -4
gaia_agent/answer.py +1 -1
gaia_agent/config.py +4 -2
gaia_agent/graph.py +11 -5
gaia_agent/llms.py +21 -0
gaia_agent/observability.py +26 -11
gaia_agent/prompts.py +6 -0
pyproject.toml +1 -1
tests/test_answer.py +4 -0
tests/test_graph_smoke.py +18 -3
tests/test_llms.py +35 -0
uv.lock +0 -0

.env.example CHANGED Viewed

@@ -1,9 +1,21 @@
 # Copy this file to .env for local development.
 # Do not commit real secrets.
-# LLM provider
 OPENAI_API_KEY=
-OPENAI_MODEL=gpt-4o-mini
 # Langfuse tracing
 LANGFUSE_PUBLIC_KEY=

 # Copy this file to .env for local development.
 # Do not commit real secrets.
+# LiteLLM settings.
+# Example models:
+#   openai/gpt-4o-mini
+#   anthropic/claude-3-5-sonnet-latest
+#   gemini/gemini-2.0-flash
+LITELLM_MODEL=
+LITELLM_TEMPERATURE=0
+LITELLM_API_KEY=
+LITELLM_API_BASE=
+# Provider keys are still read by LiteLLM when you call provider-backed models.
+# Set only the keys you need for your selected LITELLM_MODEL.
 OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+GEMINI_API_KEY=
 # Langfuse tracing
 LANGFUSE_PUBLIC_KEY=

README.md CHANGED Viewed

@@ -12,4 +12,30 @@ hf_oauth: true
 hf_oauth_expiration_minutes: 480
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 hf_oauth_expiration_minutes: 480
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Local Setup
+Install dependencies and run the smoke checks with `uv`:
+```bash
+uv sync
+uv run pytest
+uv run python scripts/run_one.py
+```
+Copy `.env.example` to `.env` and set the LiteLLM model you want to test:
+```env
+LITELLM_MODEL=anthropic/claude-3-5-sonnet-latest
+ANTHROPIC_API_KEY=...
+```
+LiteLLM can also route OpenAI-compatible models through the same code path:
+```env
+LITELLM_MODEL=openai/gpt-4o-mini
+OPENAI_API_KEY=...
+```
+If you run a LiteLLM proxy, set `LITELLM_API_BASE` and `LITELLM_API_KEY`.

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
@@ -63,6 +66,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
@@ -70,7 +78,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 import os
+from datetime import UTC, datetime
+from uuid import uuid4
 import gradio as gr
 import requests
 import pandas as pd
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    session_id = (
+        f"gaia-eval-{username.strip()}-"
+        f"{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}-"
+        f"{uuid4().hex[:8]}"
+    )
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(
+                question_text,
+                session_id=session_id,
+                user_id=username.strip(),
+                task_id=task_id,
+            )
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

gaia_agent/agent.py CHANGED Viewed

@@ -3,13 +3,26 @@ from gaia_agent.observability import trace_agent_run
 class GaiaAgent:
-    def __init__(self):
         print("GaiaAgent initialized.")
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 80 chars): {question[:80]}...")
-        with trace_agent_run(question) as trace:
-            graph = build_graph(trace=trace)
             result = graph.invoke({"question": question})
         final_answer = result["final_answer"]
         print(f"Agent returning answer: {final_answer}")

 class GaiaAgent:
+    def __init__(self, llm=None):
+        self.llm = llm
         print("GaiaAgent initialized.")
+    def __call__(
+        self,
+        question: str,
+        *,
+        session_id: str | None = None,
+        user_id: str | None = None,
+        task_id: str | None = None,
+    ) -> str:
         print(f"Agent received question (first 80 chars): {question[:80]}...")
+        with trace_agent_run(
+            question,
+            session_id=session_id,
+            user_id=user_id,
+            task_id=task_id,
+        ) as trace:
+            graph = build_graph(trace=trace, llm=self.llm)
             result = graph.invoke({"question": question})
         final_answer = result["final_answer"]
         print(f"Agent returning answer: {final_answer}")

gaia_agent/answer.py CHANGED Viewed

@@ -1,3 +1,3 @@
 def normalize_answer(answer: str) -> str:
     """Apply minimal GAIA answer cleanup without changing meaning."""
-    return answer.strip()

 def normalize_answer(answer: str) -> str:
     """Apply minimal GAIA answer cleanup without changing meaning."""
+    return answer.strip().removesuffix(".")

gaia_agent/config.py CHANGED Viewed

@@ -9,8 +9,10 @@ load_dotenv()
 @dataclass(frozen=True)
 class Settings:
-    openai_api_key: str | None = os.getenv("OPENAI_API_KEY")
-    openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
     langfuse_public_key: str | None = os.getenv("LANGFUSE_PUBLIC_KEY")
     langfuse_secret_key: str | None = os.getenv("LANGFUSE_SECRET_KEY")
     langfuse_host: str = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")

 @dataclass(frozen=True)
 class Settings:
+    litellm_model: str | None = os.getenv("LITELLM_MODEL")
+    litellm_temperature: float = float(os.getenv("LITELLM_TEMPERATURE", "0"))
+    litellm_api_key: str | None = os.getenv("LITELLM_API_KEY")
+    litellm_api_base: str | None = os.getenv("LITELLM_API_BASE")
     langfuse_public_key: str | None = os.getenv("LANGFUSE_PUBLIC_KEY")
     langfuse_secret_key: str | None = os.getenv("LANGFUSE_SECRET_KEY")
     langfuse_host: str = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")

gaia_agent/graph.py CHANGED Viewed

@@ -1,19 +1,25 @@
 from langgraph.graph import END, StateGraph
 from gaia_agent.answer import normalize_answer
 from gaia_agent.observability import traced_step
 from gaia_agent.state import GaiaState
-PLACEHOLDER_ANSWER = "Julien test"
-def build_graph(trace=None):
     graph = StateGraph(GaiaState)
     def draft_answer(state: GaiaState) -> GaiaState:
         def run() -> dict[str, str]:
-            return {"draft_answer": PLACEHOLDER_ANSWER}
         return traced_step(trace, "draft_answer", run)

 from langgraph.graph import END, StateGraph
 from gaia_agent.answer import normalize_answer
+from gaia_agent.llms import create_chat_model
 from gaia_agent.observability import traced_step
+from gaia_agent.prompts import DUMMY_LLM_TEST_PROMPT
 from gaia_agent.state import GaiaState
+def build_graph(trace=None, llm=None):
     graph = StateGraph(GaiaState)
+    chat_model = llm or create_chat_model()
     def draft_answer(state: GaiaState) -> GaiaState:
         def run() -> dict[str, str]:
+            response = chat_model.invoke(
+                [
+                    ("system", DUMMY_LLM_TEST_PROMPT),
+                    ("user", state["question"]),
+                ]
+            )
+            return {"draft_answer": str(response.content)}
         return traced_step(trace, "draft_answer", run)

gaia_agent/llms.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_litellm import ChatLiteLLM
+from gaia_agent.config import Settings, settings
+def create_chat_model(config: Settings = settings) -> BaseChatModel:
+    """Create the configured LiteLLM chat model."""
+    if not config.litellm_model:
+        raise ValueError("LITELLM_MODEL must be set to create a chat model.")
+    kwargs = {
+        "model": config.litellm_model,
+        "temperature": config.litellm_temperature,
+    }
+    if config.litellm_api_key:
+        kwargs["api_key"] = config.litellm_api_key
+    if config.litellm_api_base:
+        kwargs["api_base"] = config.litellm_api_base
+    return ChatLiteLLM(**kwargs)

gaia_agent/observability.py CHANGED Viewed

@@ -2,6 +2,8 @@ from collections.abc import Callable
 from contextlib import contextmanager
 from typing import Any
 from gaia_agent.config import settings
@@ -10,7 +12,13 @@ def langfuse_enabled() -> bool:
 @contextmanager
-def trace_agent_run(question: str):
     """Create a Langfuse trace when credentials are configured.
     This keeps local development and HF Space startup working before secrets are set.
@@ -26,16 +34,23 @@ def trace_agent_run(question: str):
         secret_key=settings.langfuse_secret_key,
         host=settings.langfuse_host,
     )
-    with client.start_as_current_observation(
-        name="gaia-agent-run",
-        as_type="agent",
-        input={"question": question},
-        metadata={"component": "GaiaAgent"},
-    ) as observation:
-        try:
-            yield observation
-        finally:
-            client.flush()
 def traced_step(trace: Any, name: str, fn: Callable[[], dict[str, Any]]) -> dict[str, Any]:

 from contextlib import contextmanager
 from typing import Any
+from langfuse import propagate_attributes
 from gaia_agent.config import settings
 @contextmanager
+def trace_agent_run(
+    question: str,
+    *,
+    session_id: str | None = None,
+    user_id: str | None = None,
+    task_id: str | None = None,
+):
     """Create a Langfuse trace when credentials are configured.
     This keeps local development and HF Space startup working before secrets are set.
         secret_key=settings.langfuse_secret_key,
         host=settings.langfuse_host,
     )
+    with propagate_attributes(
+        trace_name="gaia-agent-run",
+        user_id=user_id,
+        session_id=session_id,
+        metadata={"task_id": task_id} if task_id else None,
+        tags=["gaia", "final-assignment"],
+    ):
+        with client.start_as_current_observation(
+            name="gaia-agent-run",
+            as_type="agent",
+            input={"question": question},
+            metadata={"component": "GaiaAgent", "task_id": task_id},
+        ) as observation:
+            try:
+                yield observation
+            finally:
+                client.flush()
 def traced_step(trace: Any, name: str, fn: Callable[[], dict[str, Any]]) -> dict[str, Any]:

gaia_agent/prompts.py CHANGED Viewed

@@ -3,3 +3,9 @@ Return only the final answer.
 The answer should be a number, as few words as possible, or a comma-separated
 list of numbers and/or strings.
 """.strip()

 The answer should be a number, as few words as possible, or a comma-separated
 list of numbers and/or strings.
 """.strip()
+DUMMY_LLM_TEST_PROMPT = """
+You are testing the LLM connection for a GAIA agent.
+Answer the user question directly in a few words.
+""".strip()

pyproject.toml CHANGED Viewed

@@ -10,9 +10,9 @@ dependencies = [
     "pandas>=2.2.0",
     "python-dotenv>=1.0.1",
     "langchain>=0.3.0",
-    "langchain-openai>=0.3.0",
     "langgraph>=0.2.60",
     "langfuse>=2.57.0",
 ]
 [build-system]

     "pandas>=2.2.0",
     "python-dotenv>=1.0.1",
     "langchain>=0.3.0",
     "langgraph>=0.2.60",
     "langfuse>=2.57.0",
+    "langchain-litellm>=0.6.4",
 ]
 [build-system]

tests/test_answer.py CHANGED Viewed

@@ -3,3 +3,7 @@ from gaia_agent.answer import normalize_answer
 def test_normalize_answer_strips_whitespace():
     assert normalize_answer("  Paris  \n") == "Paris"

 def test_normalize_answer_strips_whitespace():
     assert normalize_answer("  Paris  \n") == "Paris"
+def test_normalize_answer_removes_trailing_period():
+    assert normalize_answer("Paris.") == "Paris"

tests/test_graph_smoke.py CHANGED Viewed

@@ -1,7 +1,22 @@
 from gaia_agent.agent import GaiaAgent
-def test_agent_returns_placeholder_answer():
-    agent = GaiaAgent()
-    assert agent("What is the answer?") == "Julien test"

+from langchain_core.messages import AIMessage
 from gaia_agent.agent import GaiaAgent
+class FakeChatModel:
+    def invoke(self, messages):
+        return AIMessage(content="Dummy LLM answer")
+def test_agent_returns_llm_answer():
+    agent = GaiaAgent(llm=FakeChatModel())
+    assert (
+        agent(
+            "What is the answer?",
+            session_id="test-session",
+            user_id="test-user",
+            task_id="test-task",
+        )
+        == "Dummy LLM answer"
+    )

tests/test_llms.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import pytest
+from gaia_agent.config import Settings
+from gaia_agent.llms import create_chat_model
+def test_create_litellm_chat_model():
+    model = create_chat_model(
+        Settings(
+            litellm_model="anthropic/claude-3-5-sonnet-latest",
+            litellm_api_key="test-key",
+        )
+    )
+    assert type(model).__name__ == "ChatLiteLLM"
+    assert model.model == "anthropic/claude-3-5-sonnet-latest"
+def test_create_litellm_chat_model_supports_proxy_settings():
+    model = create_chat_model(
+        Settings(
+            litellm_model="gaia-router",
+            litellm_api_key="test-key",
+            litellm_api_base="http://localhost:4000",
+        )
+    )
+    assert type(model).__name__ == "ChatLiteLLM"
+    assert model.model == "gaia-router"
+    assert model.api_base == "http://localhost:4000"
+def test_create_chat_model_requires_litellm_model():
+    with pytest.raises(ValueError, match="LITELLM_MODEL must be set"):
+        create_chat_model(Settings(litellm_model=None))

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff