D3MI4N commited on
Commit
7bccf8e
Β·
1 Parent(s): 0c06f61

New version using OpenAI

Browse files
app.py CHANGED
@@ -3,24 +3,24 @@ import gradio as gr
3
  import requests
4
  import pandas as pd
5
  import asyncio
6
- from qa_graph import graph
7
  from typing import Optional
8
 
9
- # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
- user_answers_cache = {} # Stores answers per session
12
 
13
  class GaiaAgent:
14
  def __init__(self):
15
- print("Graph-based agent initialized.")
16
 
17
  def __call__(self, question: str) -> str:
18
- print("Received question:", question)
19
  state = {"question": question, "answer": ""}
20
- out = graph.invoke(state)
21
- return out["answer"]
22
-
23
 
 
24
  async def run_agent(profile: gr.OAuthProfile | None):
25
  if not profile:
26
  return "Please login to Hugging Face.", None
@@ -28,27 +28,25 @@ async def run_agent(profile: gr.OAuthProfile | None):
28
  username = profile.username
29
  agent = GaiaAgent()
30
 
31
- api_url = DEFAULT_API_URL
32
- questions_url = f"{api_url}/questions"
33
- print(f"Fetching questions from: {questions_url}")
34
-
35
  try:
36
- response = requests.get(questions_url, timeout=15)
37
  response.raise_for_status()
38
  questions_data = response.json()
39
  except Exception as e:
40
  return f"Error fetching questions: {e}", None
41
 
42
- async def process_question(item):
 
43
  task_id = item.get("task_id")
44
- question_text = item.get("question")
45
  try:
46
- answer = await asyncio.to_thread(agent, question_text)
47
- return {"task_id": task_id, "question": question_text, "submitted_answer": answer}
48
  except Exception as e:
49
- return {"task_id": task_id, "question": question_text, "submitted_answer": f"ERROR: {e}"}
50
 
51
- results = await asyncio.gather(*(process_question(item) for item in questions_data))
52
  user_answers_cache[username] = results
53
 
54
  df = pd.DataFrame(results)
@@ -61,50 +59,49 @@ def submit_answers(profile: gr.OAuthProfile | None):
61
 
62
  username = profile.username.strip()
63
  if username not in user_answers_cache:
64
- return "No cached answers found. Please run the agent first.", None
65
 
66
  answers_payload = [
67
  {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
68
  for item in user_answers_cache[username]
69
  ]
70
 
71
- space_id = os.getenv("SPACE_ID")
72
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
73
  submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
74
 
75
- submit_url = f"{DEFAULT_API_URL}/submit"
76
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
77
-
78
  try:
79
- response = requests.post(submit_url, json=submission_data, timeout=60)
80
  response.raise_for_status()
81
  result = response.json()
82
  final_status = (
83
- f"Submission Successful!\n"
84
- f"User: {result.get('username')}\n"
85
- f"Overall Score: {result.get('score', 'N/A')}% "
86
  f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
87
- f"Message: {result.get('message', 'No message received.')}"
88
  )
89
  df = pd.DataFrame(user_answers_cache[username])
90
  return final_status, df
91
  except Exception as e:
92
- return f"Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
93
 
94
 
 
95
  with gr.Blocks() as demo:
96
- gr.Markdown("# Basic Agent Evaluation Runner")
97
  gr.LoginButton()
98
 
99
- run_button = gr.Button("Run Agent on Questions")
100
- submit_button = gr.Button("Submit Cached Answers")
101
 
102
- status_output = gr.Textbox(label="Status", lines=5, interactive=False)
103
- results_table = gr.DataFrame(label="Results", wrap=True)
104
 
105
- run_button.click(run_agent, outputs=[status_output, results_table])
106
- submit_button.click(submit_answers, outputs=[status_output, results_table])
107
 
108
  if __name__ == "__main__":
109
- print("Launching app...")
110
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import pandas as pd
5
  import asyncio
6
+ from gaia_graph import graph # Use your agent
7
  from typing import Optional
8
 
9
+ # Constants
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+ user_answers_cache = {} # session-based cache
12
 
13
  class GaiaAgent:
14
  def __init__(self):
15
+ print("GaiaAgent initialized.")
16
 
17
  def __call__(self, question: str) -> str:
18
+ print(f"Running agent on: {question}")
19
  state = {"question": question, "answer": ""}
20
+ result = graph["invoke"](state)
21
+ return result["answer"]
 
22
 
23
+ # Async runner
24
  async def run_agent(profile: gr.OAuthProfile | None):
25
  if not profile:
26
  return "Please login to Hugging Face.", None
 
28
  username = profile.username
29
  agent = GaiaAgent()
30
 
31
+ # 1. Load questions
 
 
 
32
  try:
33
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
34
  response.raise_for_status()
35
  questions_data = response.json()
36
  except Exception as e:
37
  return f"Error fetching questions: {e}", None
38
 
39
+ # 2. Process questions
40
+ async def process(item):
41
  task_id = item.get("task_id")
42
+ question = item.get("question")
43
  try:
44
+ answer = await asyncio.to_thread(agent, question)
45
+ return {"task_id": task_id, "question": question, "submitted_answer": answer}
46
  except Exception as e:
47
+ return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
48
 
49
+ results = await asyncio.gather(*(process(item) for item in questions_data))
50
  user_answers_cache[username] = results
51
 
52
  df = pd.DataFrame(results)
 
59
 
60
  username = profile.username.strip()
61
  if username not in user_answers_cache:
62
+ return "No cached answers. Please run the agent first.", None
63
 
64
  answers_payload = [
65
  {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
66
  for item in user_answers_cache[username]
67
  ]
68
 
69
+ space_id = os.getenv("SPACE_ID", "")
70
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
71
  submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
72
 
73
+ # 3. Submit to scoring API
 
 
74
  try:
75
+ response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
76
  response.raise_for_status()
77
  result = response.json()
78
  final_status = (
79
+ f"βœ… Submission Successful!\n"
80
+ f"πŸ‘€ User: {result.get('username')}\n"
81
+ f"🎯 Score: {result.get('score', 'N/A')}% "
82
  f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
83
+ f"πŸ“© Message: {result.get('message', 'No message received.')}"
84
  )
85
  df = pd.DataFrame(user_answers_cache[username])
86
  return final_status, df
87
  except Exception as e:
88
+ return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
89
 
90
 
91
+ # ────────── Gradio UI ──────────
92
  with gr.Blocks() as demo:
93
+ gr.Markdown("# 🧠 GAIA Agent Evaluation")
94
  gr.LoginButton()
95
 
96
+ run_button = gr.Button("▢️ Run Agent on GAIA Questions")
97
+ submit_button = gr.Button("πŸ“€ Submit Cached Answers")
98
 
99
+ status = gr.Textbox(label="Status", lines=6, interactive=False)
100
+ results = gr.DataFrame(label="Answers", wrap=True)
101
 
102
+ run_button.click(run_agent, outputs=[status, results])
103
+ submit_button.click(submit_answers, outputs=[status, results])
104
 
105
  if __name__ == "__main__":
106
+ print("Launching Gradio app...")
107
  demo.launch(debug=True, share=False)
gaia_graph.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gaia_graph.py
2
+
3
+ import os
4
+ import ast
5
+ import operator
6
+ import yaml
7
+ from typing import TypedDict
8
+
9
+ from dotenv import load_dotenv
10
+ from langchain.tools import Tool
11
+ from langchain.agents import initialize_agent, AgentType
12
+ from langchain_openai import ChatOpenAI
13
+ from langgraph.graph import StateGraph, END
14
+
15
+ # ─── Load Environment Variables ──────────────────────────────────────────────
16
+ load_dotenv()
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ assert OPENAI_API_KEY, "OPENAI_API_KEY is not set"
19
+
20
+ # ─── Define Calculator Tool ──────────────────────────────────────────────────
21
+ def safe_eval(expr: str) -> str:
22
+ ops = {
23
+ ast.Add: operator.add,
24
+ ast.Sub: operator.sub,
25
+ ast.Mult: operator.mul,
26
+ ast.Div: operator.truediv,
27
+ ast.Pow: operator.pow,
28
+ ast.USub: operator.neg,
29
+ }
30
+
31
+ def _eval(node):
32
+ if isinstance(node, ast.Constant):
33
+ return node.value
34
+ if isinstance(node, ast.BinOp):
35
+ return ops[type(node.op)](_eval(node.left), _eval(node.right))
36
+ if isinstance(node, ast.UnaryOp):
37
+ return ops[type(node.op)](_eval(node.operand))
38
+ raise TypeError(f"Unsupported AST node: {node!r}")
39
+
40
+ try:
41
+ node = ast.parse(expr, mode="eval").body
42
+ return str(_eval(node))
43
+ except Exception as e:
44
+ return f"Error: {e}"
45
+
46
+ calculator_tool = Tool(
47
+ name="calculator",
48
+ func=safe_eval,
49
+ description="Evaluate basic math expressions. Input: a math string like '2 + 2'. Output: the result.",
50
+ )
51
+
52
+ # ─── Define Search Tool using Tavily ─────────────────────────────────────────
53
+ from tavily import TavilyClient
54
+
55
+ with open("config.yaml") as f:
56
+ cfg = yaml.safe_load(f)
57
+
58
+ TAVILY_API_KEY = cfg.get("tavily_api_key")
59
+ assert TAVILY_API_KEY, "TAVILY API key is missing in config.yaml"
60
+
61
+ tavily = TavilyClient(api_key=TAVILY_API_KEY)
62
+
63
+ def search_tool_fn(query: str) -> str:
64
+ try:
65
+ resp = tavily.search(query)
66
+ results = resp.get("results", [])
67
+ if not results:
68
+ return "No results found."
69
+ return results[0].get("title") or results[0].get("snippet") or "No snippet."
70
+ except Exception as e:
71
+ return f"Search error: {e}"
72
+
73
+ search_tool = Tool(
74
+ name="search",
75
+ func=search_tool_fn,
76
+ description="Useful for answering factual questions using a search engine.",
77
+ )
78
+
79
+ # ─── Create LLM Agent ────────────────────────────────────────────────────────
80
+ llm = ChatOpenAI(
81
+ temperature=0.0,
82
+ model="gpt-4o",
83
+ openai_api_key=OPENAI_API_KEY
84
+ )
85
+
86
+ agent_executor = initialize_agent(
87
+ tools=[calculator_tool, search_tool],
88
+ llm=llm,
89
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
90
+ verbose=False,
91
+ handle_parsing_errors=True,
92
+ )
93
+
94
+ # ─── Clean Output ────────────────────────────────────────────────────────────
95
+ def clean_answer(ans: str) -> str:
96
+ if "```" in ans:
97
+ ans = ans.split("```")[-1]
98
+ if "Answer:" in ans:
99
+ return ans.split("Answer:")[-1].strip()
100
+ if "β†’" in ans:
101
+ return ans.split("β†’")[-1].strip()
102
+ return ans.strip()
103
+
104
+ # ─── Define State ────────────────────────────────────────────────────────────
105
+ class GaiaState(TypedDict):
106
+ question: str
107
+ answer: str
108
+
109
+ # ─── Define Node Function ────────────────────────────────────────────────────
110
+ def agent_node(state: GaiaState) -> GaiaState:
111
+ raw = agent_executor.run(state["question"])
112
+ return {"question": state["question"], "answer": clean_answer(raw)}
113
+
114
+ # ─── Build LangGraph ─────────────────────────────────────────────────────────
115
+ builder = StateGraph(GaiaState)
116
+ builder.add_node("agent", agent_node)
117
+ builder.set_entry_point("agent")
118
+ builder.set_finish_point("agent")
119
+
120
+ graph = builder.compile()
gaia_graph_legacy.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gaia_graph.py
2
+
3
+ import os
4
+ import re
5
+ import yaml
6
+ from typing import TypedDict
7
+
8
+ from dotenv import load_dotenv
9
+ from transformers import pipeline
10
+ from langchain_huggingface import HuggingFacePipeline
11
+ from langchain_core.tools.structured import StructuredTool
12
+ from langgraph.graph import StateGraph, START, END
13
+ from langgraph.prebuilt.chat_agent_executor import create_react_agent
14
+
15
+ #
16
+ # ─── 1) LOAD ENVIRONMENT VARIABLES ──────────────────────────────────────────────
17
+ #
18
+ # Make sure you have a valid HF token in your shell or .env:
19
+ # export HUGGINGFACE_API_TOKEN="<your token>"
20
+ load_dotenv()
21
+ HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
22
+ assert HF_TOKEN, "Please set HUGGINGFACE_API_TOKEN in your environment or .env."
23
+
24
+ #
25
+ # ─── 2) LOAD config.yaml ─────────────────────────────────────────────────────────
26
+ #
27
+ # Expect config.yaml with:
28
+ # tavily_api_key: "<your Tavily key>"
29
+ # huggingface_api_token: "<your HF token>" (optional duplication)
30
+ with open("config.yaml", "r") as f:
31
+ cfg = yaml.safe_load(f)
32
+
33
+ TAVILY_API_KEY = cfg.get("tavily_api_key")
34
+ assert TAVILY_API_KEY, "Put your Tavily key under 'tavily_api_key' in config.yaml."
35
+
36
+ #
37
+ # ─── 3) DEFINE β€œTOOL” WRAPPERS ────────────────────────────────────────────────────
38
+ #
39
+
40
+ # 3a) Calculator (a β€œsafe eval” of simple expressions)
41
+ def _safe_eval(expr: str) -> str:
42
+ import ast, operator
43
+
44
+ ops = {
45
+ ast.Add: operator.add,
46
+ ast.Sub: operator.sub,
47
+ ast.Mult: operator.mul,
48
+ ast.Div: operator.truediv,
49
+ ast.Pow: operator.pow,
50
+ ast.USub: operator.neg,
51
+ }
52
+
53
+ def _eval(node):
54
+ if isinstance(node, ast.Constant):
55
+ return node.n
56
+ elif isinstance(node, ast.BinOp):
57
+ return ops[type(node.op)](_eval(node.left), _eval(node.right))
58
+ elif isinstance(node, ast.UnaryOp):
59
+ return ops[type(node.op)](_eval(node.operand))
60
+ else:
61
+ raise TypeError(f"Unsupported AST node: {node}")
62
+
63
+ node = ast.parse(expr, mode="eval").body
64
+ return str(_eval(node))
65
+
66
+
67
+ def _calculator_tool(text: str) -> str:
68
+ try:
69
+ return _safe_eval(text)
70
+ except Exception as e:
71
+ return f"Error evaluating expression: {e}"
72
+
73
+
74
+ calculator_tool = StructuredTool.from_function(
75
+ func=_calculator_tool,
76
+ name="calculator",
77
+ description="Evaluate simple arithmetic expressions; return the numeric result as a string.",
78
+ )
79
+
80
+ # 3b) Tavily‐based search
81
+ from tavily import TavilyClient
82
+
83
+ class _TavilySearch:
84
+ def __init__(self, api_key: str):
85
+ self.client = TavilyClient(api_key=api_key)
86
+
87
+ def __call__(self, query: str) -> str:
88
+ resp = self.client.search(query)
89
+ results = resp.get("results", [])
90
+ if not results:
91
+ return "No results found."
92
+ snippets = []
93
+ for r in results[:3]:
94
+ title = r.get("title")
95
+ snippet = r.get("snippet")
96
+ if title:
97
+ snippets.append(title)
98
+ elif snippet:
99
+ snippets.append(snippet)
100
+ return " | ".join(snippets)
101
+
102
+
103
+ _tavily_search = _TavilySearch(api_key=TAVILY_API_KEY)
104
+
105
+ # Note: pass the instance’s __call__, not the instance itself.
106
+ search_tool = StructuredTool.from_function(
107
+ func=_tavily_search.__call__,
108
+ name="search",
109
+ description="Look up facts via Tavily; return up to three summaries joined by ' | '.",
110
+ )
111
+
112
+ TOOLS = [calculator_tool, search_tool]
113
+
114
+
115
+ #
116
+ # ─── 4) PRELOAD A FREE HF MODEL & WRAP IT AS HuggingFacePipeline ───────────────────
117
+ #
118
+ # We choose β€œgoogle/flan-t5-small” (free, CPU‐friendly). Load as a text2text pipeline:
119
+ hf_gen = pipeline(
120
+ "text2text-generation",
121
+ model="google/flan-t5-small",
122
+ device=-1, # CPU only
123
+ max_new_tokens=128,
124
+ do_sample=False, # greedy
125
+ )
126
+
127
+ # Now wrap that pipeline into a HuggingFacePipeline LLM.
128
+ # (No API token needed here for a local β€œgoogle/flan-t5-small”)
129
+ llm = HuggingFacePipeline(pipeline=hf_gen)
130
+
131
+
132
+ #
133
+ # ─── 5) CREATE A LANGGRAPH ReAct AGENT ─────────────────────────────────────────────
134
+ #
135
+ # This `create_react_agent` will add the Thought/Action/Observation framing
136
+ # so that the LLM can call β€œcalculator” or β€œsearch” as needed,
137
+ # and then eventually emit β€œFinal Answer: …”.
138
+ #
139
+ react_agent = create_react_agent(
140
+ llm=llm,
141
+ tools=TOOLS,
142
+ max_iterations=3,
143
+ verbose=False,
144
+ )
145
+
146
+
147
+ #
148
+ # ─── 6) DEFINE STATE SCHEMA & SINGLE GRAPH NODE ─────────────────────────────────
149
+ #
150
+ class AgentState(TypedDict):
151
+ question: str
152
+ tool_output: str # (ignored by ReAct, but must exist)
153
+ final_answer: str
154
+
155
+
156
+ def AgentNode(state: AgentState) -> AgentState:
157
+ q = state["question"].strip()
158
+ # Invoke the internal ReAct loop:
159
+ answer = react_agent.invoke(q).strip()
160
+ state["final_answer"] = answer
161
+ return state
162
+
163
+
164
+ #
165
+ # ─── 7) WIRE UP THE LANGGRAPH ─────────────────────────────────────────────────────
166
+ #
167
+ builder = StateGraph(AgentState)
168
+ builder.set_entry_point("AgentNode")
169
+ builder.add_node("AgentNode", AgentNode)
170
+ builder.add_edge(START, "AgentNode")
171
+ builder.add_edge("AgentNode", END)
172
+
173
+ graph = builder.compile()
174
+
175
+ #
176
+ # ─── 8) SMOKE TESTS ───────────────────────────────────────────────────────────────
177
+ #
178
+ if __name__ == "__main__":
179
+ print("Device set to use CPU\n")
180
+ tests = [
181
+ "How much is 2 + 2",
182
+ "What is the capital of France?",
183
+ "Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
184
+ ]
185
+ for q in tests:
186
+ state = {"question": q, "tool_output": "", "final_answer": ""}
187
+ out = graph.invoke(state)
188
+ print(f"Q: {q!r}\n→ A: {out['final_answer']!r}\n")
langgraph_openai.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # langgraph_openai.py
2
+
3
+ import os
4
+ import ast
5
+ import operator
6
+ import yaml
7
+ from typing import TypedDict
8
+ from dotenv import load_dotenv
9
+
10
+ from langchain_core.tools.structured import StructuredTool
11
+ from langchain_community.chat_models import ChatOpenAI
12
+ from langgraph.prebuilt.chat_agent_executor import create_react_agent
13
+ from langgraph.graph import StateGraph, START, END
14
+ from langsmith import traceable
15
+ from tavily import TavilyClient
16
+
17
+ # ─── 1) LOAD KEYS ───────────────────────────────────────────────────────────────
18
+ load_dotenv()
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+ assert OPENAI_API_KEY, "Set OPENAI_API_KEY in your .env"
21
+
22
+ with open("config.yaml") as f:
23
+ cfg = yaml.safe_load(f)
24
+ TAVILY_API_KEY = cfg.get("tavily_api_key")
25
+ assert TAVILY_API_KEY, "Put your Tavily key under 'tavily_api_key' in config.yaml"
26
+
27
+ # ─── 2) TOOLS ───────────────────────────────────────────────────────────────────
28
+ def _safe_eval(expr: str) -> str:
29
+ ops = {
30
+ ast.Add: operator.add,
31
+ ast.Sub: operator.sub,
32
+ ast.Mult: operator.mul,
33
+ ast.Div: operator.truediv,
34
+ ast.Pow: operator.pow,
35
+ ast.USub: operator.neg,
36
+ }
37
+ def _eval(node):
38
+ if isinstance(node, ast.Constant):
39
+ return node.value
40
+ if isinstance(node, ast.BinOp):
41
+ return ops[type(node.op)](_eval(node.left), _eval(node.right))
42
+ if isinstance(node, ast.UnaryOp):
43
+ return ops[type(node.op)](_eval(node.operand))
44
+ raise TypeError(f"Unsupported AST node: {node!r}")
45
+ node = ast.parse(expr, mode="eval").body
46
+ return str(_eval(node))
47
+
48
+ def calculator_fn(expr: str) -> str:
49
+ try:
50
+ return _safe_eval(expr)
51
+ except Exception as e:
52
+ return f"Error: {e}"
53
+
54
+ calculator_tool = StructuredTool.from_function(
55
+ func=calculator_fn,
56
+ name="calculator",
57
+ description="Useful for evaluating simple math expressions like '2 + 2'. Takes a single input 'expr'.",
58
+ )
59
+
60
+ tavily = TavilyClient(api_key=TAVILY_API_KEY)
61
+
62
+ def search_fn(query: str) -> str:
63
+ try:
64
+ response = tavily.search(query)
65
+ results = response.get("results", [])
66
+ if not results:
67
+ return "No results found."
68
+ snippets = []
69
+ for r in results[:3]:
70
+ t = r.get("title") or r.get("snippet") or ""
71
+ snippets.append(t.strip())
72
+ return " | ".join(snippets)
73
+ except Exception as e:
74
+ return f"Search error: {e}"
75
+
76
+ search_tool = StructuredTool.from_function(
77
+ func=search_fn,
78
+ name="search",
79
+ description="Useful for general purpose web search queries.",
80
+ )
81
+
82
+ TOOLS = [calculator_tool, search_tool]
83
+
84
+ # ─── 3) AGENT + ANALYZER ────────────────────────────────────────────────────────
85
+ chat = ChatOpenAI(
86
+ model="gpt-4o",
87
+ temperature=0.0,
88
+ openai_api_key=OPENAI_API_KEY,
89
+ )
90
+ chat_with_tools = chat.bind_tools(TOOLS)
91
+
92
+ react_agent = create_react_agent(chat_with_tools, TOOLS, max_steps=3, verbose=False)
93
+
94
+ analyzer = ChatOpenAI(
95
+ model="gpt-4o",
96
+ temperature=0.0,
97
+ openai_api_key=OPENAI_API_KEY,
98
+ )
99
+
100
+ def analyze_answer(text: str) -> str:
101
+ prompt = (
102
+ "Extract the final concise answer from the following text."
103
+ "Respond with only the core answer, no explanation."
104
+ f"{text}"
105
+ )
106
+ result = analyzer.invoke(prompt)
107
+ return result.content.strip()
108
+
109
+ # ─── 4) LANGGRAPH ───────────────────────────────────────────────────────────────
110
+ class AgentState(TypedDict):
111
+ question: str
112
+ tool_output: str
113
+ final_answer: str
114
+
115
+ @traceable(name="AgentNode OpenAI")
116
+ def AgentNode(state: AgentState) -> AgentState:
117
+ q = state["question"].strip()
118
+ out = react_agent.invoke(q).strip()
119
+ state["tool_output"] = out
120
+ return state
121
+
122
+ @traceable(name="AnalyzerNode")
123
+ def AnalyzerNode(state: AgentState) -> AgentState:
124
+ clean = analyze_answer(state["tool_output"])
125
+ state["final_answer"] = clean
126
+ return state
127
+
128
+ builder = StateGraph(AgentState)
129
+ builder.set_entry_point("AgentNode")
130
+ builder.add_node("AgentNode", AgentNode)
131
+ builder.add_node("AnalyzerNode", AnalyzerNode)
132
+ builder.add_edge(START, "AgentNode")
133
+ builder.add_edge("AgentNode", "AnalyzerNode")
134
+ builder.add_edge("AnalyzerNode", END)
135
+ graph = builder.build()
136
+
137
+ # ─── 5) RUN TEST ───────────────────────────────────────────────────────────────
138
+ @traceable(name="OpenAI LangGraph Final Test")
139
+ def run_tests():
140
+ questions = [
141
+ "How much is 2 + 2",
142
+ "What is the capital of France?",
143
+ "Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
144
+ ]
145
+ for q in questions:
146
+ state = {"question": q, "tool_output": "", "final_answer": ""}
147
+ out = graph.invoke(state)
148
+ print(f"Q: {q}\nA: {out['final_answer']}\n")
149
+
150
+ if __name__ == "__main__":
151
+ run_tests()
qa_graph.py DELETED
@@ -1,69 +0,0 @@
1
- from typing import TypedDict
2
- import re
3
- from langgraph.graph import StateGraph, START, END
4
- from tools.calculator_tool import calculator_tool
5
- from tools.search_tool import search_tool
6
- from transformers import pipeline
7
-
8
- # Shape of the state
9
- class QAState(TypedDict):
10
- question: str # incoming question
11
- answer: str # to store tool output or synthesized answer
12
-
13
- # Use text2text-generation for llm model
14
- synthesizer = pipeline(
15
- "text2text-generation",
16
- model="google/flan-t5-small",
17
- device=-1, # CPU; change to 0 for GPU
18
- max_new_tokens=100,
19
- do_sample=True,
20
- top_p=0.95,
21
- temperature=0.7
22
- )
23
-
24
- # Tool agent: calculator for math, search for other
25
- def QAAgent(state: QAState) -> QAState:
26
- q = state["question"].strip()
27
- if re.fullmatch(r"[0-9\s\+\-\*\/\.\(\)]+", q):
28
- state["answer"] = calculator_tool.invoke(q)
29
- else:
30
- state["answer"] = search_tool.invoke(q)
31
- return state
32
-
33
- # Synthesis agent to generate final response
34
- def SynthesisAgent(state: QAState) -> QAState:
35
- question = state["question"]
36
- tool_out = state["answer"]
37
- prompt = (
38
- f"Question: {question}\n"
39
- f"Tool output: {tool_out}\n"
40
- "Answer in a comma-separated list (no extra text):"
41
- )
42
- outputs = synthesizer(prompt)
43
- completion = outputs[0]["generated_text"].strip()
44
- state["answer"] = completion
45
- return state
46
-
47
- # Build the graph
48
- builder = StateGraph(QAState)
49
- builder.set_entry_point("QAAgent")
50
- builder.add_node("QAAgent", QAAgent)
51
- builder.add_node("SynthesisAgent", SynthesisAgent)
52
-
53
- builder.add_edge(START, "QAAgent")
54
- builder.add_edge("QAAgent", "SynthesisAgent")
55
- builder.add_edge("SynthesisAgent", END)
56
-
57
- graph = builder.compile()
58
-
59
- # Local testing
60
- if __name__ == "__main__":
61
- # Math example
62
- s1: QAState = {"question": "2 + 2", "answer": ""}
63
- o1 = graph.invoke(s1)
64
- print("Q:", s1["question"], "-> A:", o1["answer"])
65
-
66
- # Search + synthesis example
67
- s2: QAState = {"question": "What is the capital of France?", "answer": ""}
68
- o2 = graph.invoke(s2)
69
- print("Q:", s2["question"], "-> A:", o2["answer"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,14 +1,38 @@
1
- gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  requests
3
- langgraph
4
- openai
5
- tavily-python
6
- langchain
7
  pandas
8
  python-dotenv
9
- huggingface_hub
10
- transformers
11
- langchain-huggingface
12
  IPython
13
  numpy<2.0
14
-
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio
2
+ # requests
3
+ # pandas
4
+ # python-dotenv
5
+ # IPython
6
+ # numpy<2.0
7
+
8
+ # huggingface_hub
9
+ # transformers
10
+ # langchain-huggingface
11
+ # langgraph
12
+ # langsmith
13
+ # langchain>=0.1.20
14
+ # langchain-community
15
+ # tavily-python
16
+ # huggingface_hub[hf_xet]
17
+ # pydantic
18
+ # PyYAML
19
+
20
+
21
+ gradio==5.30.0
22
  requests
 
 
 
 
23
  pandas
24
  python-dotenv
 
 
 
25
  IPython
26
  numpy<2.0
27
+ huggingface_hub
28
+ transformers==4.51.3
29
+ langchain-huggingface==0.2.0
30
+ langgraph==0.4.5
31
+ langsmith==0.3.42
32
+ langchain>=0.1.20,<0.4.0
33
+ langchain-community==0.3.24
34
+ tavily-python==0.7.2
35
+ pydantic>=2.0
36
+ PyYAML
37
+ hf-xet~=1.1.1
38
+ langchain-openai
test_gaia.py DELETED
@@ -1,8 +0,0 @@
1
- from qa_graph import graph
2
- import requests
3
- import pandas as pd
4
-
5
- QUESTIONS = requests.get("https://agents-course-unit4-scoring.hf.space/questions").json()
6
- for q in QUESTIONS[:5]:
7
- out = graph.invoke({"question": q["question"], "answer": ""})
8
- print(q["task_id"], out["answer"])
 
 
 
 
 
 
 
 
 
test_gaia_questions.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # test_gaia_questions.py
2
+
3
+ import requests
4
+ from gaia_graph import graph
5
+
6
+ def test_with_real_gaia_questions():
7
+ # Fetch questions directly from the benchmark API
8
+ url = "https://agents-course-unit4-scoring.hf.space/questions"
9
+ response = requests.get(url)
10
+ questions = response.json()
11
+
12
+ for q in questions[:5]: # Limit to first 5 for testing
13
+ question = q["question"]
14
+ task_id = q["task_id"]
15
+ state = {"question": question, "answer": ""}
16
+ result = graph.invoke(state)
17
+ print(f"[{task_id}] Q: {question}")
18
+ print(f"β†’ {result['answer']}")
19
+ print("-" * 60)
20
+
21
+ if __name__ == "__main__":
22
+ test_with_real_gaia_questions()
test_openai_agent.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # test_openai_agent.py
2
+
3
+ import os, json
4
+ from typing import Dict
5
+ from dotenv import load_dotenv
6
+ from openai import OpenAI
7
+ from tavily import TavilyClient
8
+
9
+ load_dotenv()
10
+
11
+ # ─── 1) OpenAI client (v1) ─────────────────────────────────────────────────────────
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ assert OPENAI_API_KEY, "Set OPENAI_API_KEY in your .env"
14
+ client = OpenAI(api_key=OPENAI_API_KEY)
15
+
16
+ # ─── 2) Tavily search client ───────────────────────────────────────────────────────
17
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
18
+ assert TAVILY_API_KEY, "Set TAVILY_API_KEY in your .env"
19
+ tavily = TavilyClient(api_key=TAVILY_API_KEY)
20
+
21
+ # ─── 3) Tool implementations ───────────────────────────────────────────────────────
22
+ def calculator(expr: str) -> str:
23
+ try:
24
+ return str(eval(expr, {}, {}))
25
+ except Exception as e:
26
+ return f"Error: {e}"
27
+
28
+ def search(query: str) -> str:
29
+ try:
30
+ resp = tavily.search(query=query, search_depth="basic")
31
+ results = resp.get("results", [])
32
+ if not results:
33
+ return "No results."
34
+ # join up to three titles/snippets
35
+ snippets = []
36
+ for r in results[:3]:
37
+ text = (r.get("title") or r.get("snippet") or "").strip()
38
+ if text:
39
+ snippets.append(text)
40
+ return " | ".join(snippets) or "No results."
41
+ except Exception as e:
42
+ return f"Search error: {e}"
43
+
44
+ # ─── 4) Function‐calling schemas ──────────────────────────────────────────────────
45
+ functions = [
46
+ {
47
+ "name": "calculator",
48
+ "description": "Evaluate a math expression, return result as string",
49
+ "parameters": {
50
+ "type": "object",
51
+ "properties": {
52
+ "expr": {"type": "string", "description": "The expression to evaluate"}
53
+ },
54
+ "required": ["expr"],
55
+ },
56
+ },
57
+ {
58
+ "name": "search",
59
+ "description": "Look up facts on the web via Tavily; return up to three summaries.",
60
+ "parameters": {
61
+ "type": "object",
62
+ "properties": {
63
+ "query": {"type": "string", "description": "The search query"}
64
+ },
65
+ "required": ["query"],
66
+ },
67
+ },
68
+ ]
69
+ tool_map = {"calculator": calculator, "search": search}
70
+
71
+ # ─── 5) A single ReAct loop using free‑tier mini model ────────────────────────────
72
+ def run_react(question: str, max_steps: int = 3) -> str:
73
+ messages = [{"role": "user", "content": question}]
74
+ for _ in range(max_steps):
75
+ resp = client.chat.completions.create(
76
+ model="gpt-4o-mini", # free‑tier β€œmini” model
77
+ messages=messages,
78
+ functions=functions,
79
+ function_call="auto",
80
+ )
81
+ msg = resp.choices[0].message
82
+
83
+ # if the model decided to call a tool:
84
+ if msg.function_call:
85
+ name = msg.function_call.name
86
+ args = json.loads(msg.function_call.arguments)
87
+ print(f"[Tool Call] {name}({args})")
88
+ out = tool_map[name](**args)
89
+ # append both the tool call and your tool’s response
90
+ messages.append({
91
+ "role": "assistant",
92
+ "content": None,
93
+ "function_call": msg.function_call.to_dict(),
94
+ })
95
+ messages.append({
96
+ "role": "function",
97
+ "name": name,
98
+ "content": out,
99
+ })
100
+ # loop continues so model can see the tool output
101
+ continue
102
+
103
+ # otherwise this is the final answer:
104
+ return msg.content.strip()
105
+
106
+ return "[no final answer after max steps]"
107
+
108
+ def analyzer_agent(question: str, raw_answer: str) -> str:
109
+ prompt = (
110
+ f"You are a data extractor.\n"
111
+ f"Q: {question.strip()}\n"
112
+ f"Answer: {raw_answer.strip()}\n"
113
+ f"Return only the short answer (e.g., a number, place, or code)."
114
+ )
115
+
116
+ resp = client.chat.completions.create(
117
+ model="gpt-4o-mini",
118
+ messages=[{"role": "user", "content": prompt}],
119
+ temperature=0,
120
+ )
121
+ return resp.choices[0].message.content.strip()
122
+
123
+ def run_cleaned_answer(question: str) -> str:
124
+ raw = run_react(question)
125
+ return analyzer_agent(question, raw)
126
+
127
+
128
+ # ─── 6) Smoke‑test it ──────────────────────────────────────────────────────────────
129
+ if __name__ == "__main__":
130
+ tests = [
131
+ "How much is 2 + 2",
132
+ "What is the capital of France?",
133
+ "Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
134
+ ]
135
+ for q in tests:
136
+ print(f"\n>>> {q}")
137
+ try:
138
+ result = run_cleaned_answer(q)
139
+ except Exception as e:
140
+ result = f"[ERROR] {e}"
141
+ print(f"β†’ {result}")
tools/search_tool.py CHANGED
@@ -1,8 +1,8 @@
1
  from tavily import TavilyClient
2
  from langchain.tools import tool
3
- # from config import TAVILY_API_KEY
4
- import os
5
- TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
6
 
7
  class SearchTool:
8
  def __init__(self, api_key: str):
 
1
  from tavily import TavilyClient
2
  from langchain.tools import tool
3
+ from config import TAVILY_API_KEY
4
+ # import os
5
+ # TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
6
 
7
  class SearchTool:
8
  def __init__(self, api_key: str):