D3MI4N commited on
Commit
0ee4998
Β·
1 Parent(s): 94958b6

try function calling version

Browse files
Files changed (4) hide show
  1. app.py +140 -89
  2. app_prior.py +116 -0
  3. test_gaia_questions.py +1 -1
  4. test_openai_agent.py +1 -0
app.py CHANGED
@@ -1,116 +1,167 @@
 
 
1
  import os
2
- import gradio as gr
3
  import requests
4
  import pandas as pd
5
  import asyncio
6
- from gaia_graph import graph # Use your agent
7
- from typing import Optional
8
 
9
- # Constants
10
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
- user_answers_cache = {} # session-based cache
 
12
 
13
- class GaiaAgent:
14
- def __init__(self):
15
- print("Graph-based agent initialized.")
16
 
17
- def __call__(self, question: str) -> str:
18
- print("Received question:", question)
19
- state = {"question": question, "answer": ""}
20
- try:
21
- result = graph.invoke(state)
22
- print("Result type:", type(result))
23
- print("Result value:", result)
24
- if isinstance(result, dict):
25
- return result.get("answer", "No answer generated.")
26
- else:
27
- return f"Unexpected output from graph: {result}"
28
- except Exception as e:
29
- return f"ERROR invoking graph: {e}"
30
-
31
-
32
- # Async runner
33
- async def run_agent(profile: gr.OAuthProfile | None):
34
- if not profile:
35
- return "Please login to Hugging Face.", None
36
 
37
- username = profile.username
38
- agent = GaiaAgent()
 
 
39
 
40
- # 1. Load questions
 
41
  try:
42
- response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
43
- response.raise_for_status()
44
- questions_data = response.json()
45
  except Exception as e:
46
- return f"Error fetching questions: {e}", None
47
 
48
- # 2. Process questions
49
- async def process(item):
50
- task_id = item.get("task_id")
51
- question = item.get("question")
52
- try:
53
- answer = await asyncio.to_thread(agent, question)
54
- return {"task_id": task_id, "question": question, "submitted_answer": answer}
55
- except Exception as e:
56
- return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- results = await asyncio.gather(*(process(item) for item in questions_data))
59
- user_answers_cache[username] = results
 
 
 
 
 
60
 
61
- df = pd.DataFrame(results)
62
- return f"Answered {len(results)} questions. Ready to submit.", df
 
 
 
 
 
63
 
 
 
 
64
 
65
  def submit_answers(profile: gr.OAuthProfile | None):
66
  if not profile:
67
- return "Please login to Hugging Face.", None
68
-
69
- username = profile.username.strip()
70
- if username not in user_answers_cache:
71
- return "No cached answers. Please run the agent first.", None
72
-
73
- answers_payload = [
74
- {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
75
- for item in user_answers_cache[username]
76
  ]
77
-
78
  space_id = os.getenv("SPACE_ID", "")
79
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
80
- submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
 
 
 
 
81
 
82
- # 3. Submit to scoring API
83
- try:
84
- response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
85
- response.raise_for_status()
86
- result = response.json()
87
- final_status = (
88
- f"βœ… Submission Successful!\n"
89
- f"πŸ‘€ User: {result.get('username')}\n"
90
- f"🎯 Score: {result.get('score', 'N/A')}% "
91
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
92
- f"πŸ“© Message: {result.get('message', 'No message received.')}"
93
- )
94
- df = pd.DataFrame(user_answers_cache[username])
95
- return final_status, df
96
- except Exception as e:
97
- return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
98
-
99
-
100
- # ────────── Gradio UI ──────────
101
  with gr.Blocks() as demo:
102
- gr.Markdown("# 🧠 GAIA Agent Evaluation")
103
  gr.LoginButton()
104
-
105
- run_button = gr.Button("▢️ Run Agent on GAIA Questions")
106
- submit_button = gr.Button("πŸ“€ Submit Cached Answers")
107
-
108
- status = gr.Textbox(label="Status", lines=6, interactive=False)
109
- results = gr.DataFrame(label="Answers", wrap=True)
110
-
111
- run_button.click(run_agent, outputs=[status, results])
112
- submit_button.click(submit_answers, outputs=[status, results])
113
 
114
  if __name__ == "__main__":
115
- print("Launching Gradio app...")
116
  demo.launch(debug=True, share=False)
 
1
+ # app.py
2
+
3
  import os
4
+ import json
5
  import requests
6
  import pandas as pd
7
  import asyncio
 
 
8
 
9
+ import gradio as gr
10
+ from openai import OpenAI
11
+ from tavily import TavilyClient
12
+ from dotenv import load_dotenv
13
 
14
+ load_dotenv()
 
 
15
 
16
+ # ─── 1) OpenAI client (v1 SDK) ───────────────────────────────────────────────────
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ assert OPENAI_API_KEY, "Set OPENAI_API_KEY in .env"
19
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # ─── 2) Tavily search client ─────────────────────────────────────────────────────
22
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
23
+ assert TAVILY_API_KEY, "Set TAVILY_API_KEY in .env"
24
+ tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
25
 
26
+ # ─── 3) Define our tools & JSON schemas ──────────────────────────────────────────
27
+ def calculator(expr: str) -> str:
28
  try:
29
+ # safe eval
30
+ return str(eval(expr, {}, {}))
 
31
  except Exception as e:
32
+ return f"Error: {e}"
33
 
34
+ def search(query: str) -> str:
35
+ try:
36
+ resp = tavily_client.search(query=query, search_depth="basic")
37
+ results = resp.get("results", [])
38
+ if not results:
39
+ return "No results found."
40
+ # grab up to 3 titles/snippets
41
+ snippets = []
42
+ for r in results[:3]:
43
+ snippets.append(r.get("title") or r.get("snippet") or "")
44
+ return " | ".join(snippets)
45
+ except Exception as e:
46
+ return f"Search error: {e}"
47
+
48
+ functions = [
49
+ {
50
+ "name": "calculator",
51
+ "description": "Evaluate a math expression. Returns the result as a string.",
52
+ "parameters": {
53
+ "type": "object",
54
+ "properties": {
55
+ "expr": {"type": "string", "description": "Math expression to evaluate"}
56
+ },
57
+ "required": ["expr"],
58
+ },
59
+ },
60
+ {
61
+ "name": "search",
62
+ "description": "Look up facts on the web via Tavily; return up to three summaries separated by ' | '.",
63
+ "parameters": {
64
+ "type": "object",
65
+ "properties": {
66
+ "query": {"type": "string", "description": "The search query"}
67
+ },
68
+ "required": ["query"],
69
+ },
70
+ },
71
+ ]
72
+ tool_map = {"calculator": calculator, "search": search}
73
+
74
+ # ─── 4) The ReAct loop ───────────────────────────────────────────────────────────
75
+ def run_react(question: str) -> str:
76
+ messages = [{"role": "user", "content": question}]
77
+ while True:
78
+ resp = openai_client.chat.completions.create(
79
+ model="gpt-4o-mini", # free-tier β€œmini” model
80
+ messages=messages,
81
+ functions=functions,
82
+ function_call="auto",
83
+ )
84
+ msg = resp.choices[0].message
85
+
86
+ # if the model wants to call a tool:
87
+ if msg.function_call:
88
+ name = msg.function_call.name
89
+ args = json.loads(msg.function_call.arguments)
90
+ output = tool_map[name](**args)
91
+ # feed both the assistant's call and the tool's result back into the loop
92
+ messages.append({
93
+ "role": "assistant",
94
+ "content": None,
95
+ "function_call": msg.function_call.to_dict()
96
+ })
97
+ messages.append({
98
+ "role": "function",
99
+ "name": name,
100
+ "content": output
101
+ })
102
+ else:
103
+ # final answer
104
+ return msg.content.strip()
105
+
106
+ # ─── 5) Gradio / GAIA integration ────────────────────────────────────────────────
107
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
108
+ _cache = {}
109
+
110
+ class GaiaAgent:
111
+ def __call__(self, question: str) -> str:
112
+ return run_react(question)
113
 
114
+ async def run_agent(profile: gr.OAuthProfile | None):
115
+ if not profile:
116
+ return "Please login.", None
117
+ user = profile.username
118
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
119
+ data = resp.json()
120
+ agent = GaiaAgent()
121
 
122
+ async def proc(item):
123
+ ans = await asyncio.to_thread(agent, item["question"])
124
+ return {
125
+ "task_id": item["task_id"],
126
+ "question": item["question"],
127
+ "submitted_answer": ans
128
+ }
129
 
130
+ results = await asyncio.gather(*(proc(it) for it in data))
131
+ _cache[user] = results
132
+ return f"Answered {len(results)} questions.", pd.DataFrame(results)
133
 
134
  def submit_answers(profile: gr.OAuthProfile | None):
135
  if not profile:
136
+ return "Please login.", None
137
+ user = profile.username
138
+ if user not in _cache:
139
+ return "Run agent first.", None
140
+ payload = [
141
+ {"task_id": r["task_id"], "submitted_answer": r["submitted_answer"]}
142
+ for r in _cache[user]
 
 
143
  ]
 
144
  space_id = os.getenv("SPACE_ID", "")
145
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
146
+ body = {"username": user, "agent_code": agent_code, "answers": payload}
147
+ r = requests.post(f"{DEFAULT_API_URL}/submit", json=body, timeout=60)
148
+ r.raise_for_status()
149
+ res = r.json()
150
+ msg = (
151
+ f"Score: {res.get('score')}% "
152
+ f"({res.get('correct_count')}/{res.get('total_attempted')})"
153
+ )
154
+ return msg, pd.DataFrame(_cache[user])
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  with gr.Blocks() as demo:
157
+ gr.Markdown("# 🧠 GAIA Benchmark Runner")
158
  gr.LoginButton()
159
+ run_btn = gr.Button("Run agent on questions")
160
+ sub_btn = gr.Button("Submit cached answers")
161
+ out_txt = gr.Textbox(lines=3, interactive=False)
162
+ out_tbl = gr.DataFrame()
163
+ run_btn.click(run_agent, outputs=[out_txt, out_tbl])
164
+ sub_btn.click(submit_answers, outputs=[out_txt, out_tbl])
 
 
 
165
 
166
  if __name__ == "__main__":
 
167
  demo.launch(debug=True, share=False)
app_prior.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ import asyncio
6
+ from gaia_new import graph # Use your agent
7
+ from typing import Optional
8
+
9
+ # Constants
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+ user_answers_cache = {} # session-based cache
12
+
13
+ class GaiaAgent:
14
+ def __init__(self):
15
+ print("Graph-based agent initialized.")
16
+
17
+ def __call__(self, question: str) -> str:
18
+ print("Received question:", question)
19
+ state = {"question": question, "answer": ""}
20
+ try:
21
+ result = graph.invoke(state)
22
+ print("Result type:", type(result))
23
+ print("Result value:", result)
24
+ if isinstance(result, dict):
25
+ return result.get("answer", "No answer generated.")
26
+ else:
27
+ return f"Unexpected output from graph: {result}"
28
+ except Exception as e:
29
+ return f"ERROR invoking graph: {e}"
30
+
31
+
32
+ # Async runner
33
+ async def run_agent(profile: gr.OAuthProfile | None):
34
+ if not profile:
35
+ return "Please login to Hugging Face.", None
36
+
37
+ username = profile.username
38
+ agent = GaiaAgent()
39
+
40
+ # 1. Load questions
41
+ try:
42
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
43
+ response.raise_for_status()
44
+ questions_data = response.json()
45
+ except Exception as e:
46
+ return f"Error fetching questions: {e}", None
47
+
48
+ # 2. Process questions
49
+ async def process(item):
50
+ task_id = item.get("task_id")
51
+ question = item.get("question")
52
+ try:
53
+ answer = await asyncio.to_thread(agent, question)
54
+ return {"task_id": task_id, "question": question, "submitted_answer": answer}
55
+ except Exception as e:
56
+ return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
57
+
58
+ results = await asyncio.gather(*(process(item) for item in questions_data))
59
+ user_answers_cache[username] = results
60
+
61
+ df = pd.DataFrame(results)
62
+ return f"Answered {len(results)} questions. Ready to submit.", df
63
+
64
+
65
+ def submit_answers(profile: gr.OAuthProfile | None):
66
+ if not profile:
67
+ return "Please login to Hugging Face.", None
68
+
69
+ username = profile.username.strip()
70
+ if username not in user_answers_cache:
71
+ return "No cached answers. Please run the agent first.", None
72
+
73
+ answers_payload = [
74
+ {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
75
+ for item in user_answers_cache[username]
76
+ ]
77
+
78
+ space_id = os.getenv("SPACE_ID", "")
79
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
80
+ submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
81
+
82
+ # 3. Submit to scoring API
83
+ try:
84
+ response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
85
+ response.raise_for_status()
86
+ result = response.json()
87
+ final_status = (
88
+ f"βœ… Submission Successful!\n"
89
+ f"πŸ‘€ User: {result.get('username')}\n"
90
+ f"🎯 Score: {result.get('score', 'N/A')}% "
91
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
92
+ f"πŸ“© Message: {result.get('message', 'No message received.')}"
93
+ )
94
+ df = pd.DataFrame(user_answers_cache[username])
95
+ return final_status, df
96
+ except Exception as e:
97
+ return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
98
+
99
+
100
+ # ────────── Gradio UI ──────────
101
+ with gr.Blocks() as demo:
102
+ gr.Markdown("# 🧠 GAIA Agent Evaluation")
103
+ gr.LoginButton()
104
+
105
+ run_button = gr.Button("▢️ Run Agent on GAIA Questions")
106
+ submit_button = gr.Button("πŸ“€ Submit Cached Answers")
107
+
108
+ status = gr.Textbox(label="Status", lines=6, interactive=False)
109
+ results = gr.DataFrame(label="Answers", wrap=True)
110
+
111
+ run_button.click(run_agent, outputs=[status, results])
112
+ submit_button.click(submit_answers, outputs=[status, results])
113
+
114
+ if __name__ == "__main__":
115
+ print("Launching Gradio app...")
116
+ demo.launch(debug=True, share=False)
test_gaia_questions.py CHANGED
@@ -1,7 +1,7 @@
1
  # test_gaia_questions.py
2
 
3
  import requests
4
- from gaia_graph import graph
5
 
6
  def test_with_real_gaia_questions():
7
  # Fetch questions directly from the benchmark API
 
1
  # test_gaia_questions.py
2
 
3
  import requests
4
+ from gaia_new import graph
5
 
6
  def test_with_real_gaia_questions():
7
  # Fetch questions directly from the benchmark API
test_openai_agent.py CHANGED
@@ -139,3 +139,4 @@ if __name__ == "__main__":
139
  except Exception as e:
140
  result = f"[ERROR] {e}"
141
  print(f"β†’ {result}")
 
 
139
  except Exception as e:
140
  result = f"[ERROR] {e}"
141
  print(f"β†’ {result}")
142
+