wlchee commited on
Commit
f4fb7d0
·
verified ·
1 Parent(s): 377c37c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -201
app.py CHANGED
@@ -1,229 +1,153 @@
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
  from datetime import datetime
6
- import random
7
- from transformers import Tool
8
- from transformers.agents import Agent
9
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
- # --- Enhanced Agent Definition ---
14
- class EnhancedAgent:
15
- def __init__(self):
16
- print("EnhancedAgent initialized with tools.")
17
- self.tools = {
18
- "calculator": self.calculator,
19
- "time": self.get_current_time,
20
- "random_choice": self.random_choice
21
- }
22
-
23
- def calculator(self, expression: str) -> str:
24
- """Evaluate mathematical expressions"""
25
  try:
26
- return str(eval(expression))
27
  except:
28
- return "Error: Could not evaluate the expression"
29
 
30
- def get_current_time(self) -> str:
31
- """Get current UTC time"""
32
- return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
33
 
34
- def random_choice(self, items: str) -> str:
35
- """Randomly select from comma-separated items"""
36
- try:
37
- options = [x.strip() for x in items.split(",")]
38
- return f"I choose: {random.choice(options)}"
39
- except:
40
- return "Error: Please provide comma-separated options"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
 
 
42
  def __call__(self, question: str) -> str:
43
- print(f"Processing question: {question[:100]}...")
44
- question_lower = question.lower()
45
-
46
- # Math questions
47
- if any(word in question_lower for word in ["calculate", "what is", "how much is", "+", "-", "*", "/"]):
48
- expr = question.replace("?", "").replace("what is", "").replace("calculate", "").strip()
49
- return self.tools["calculator"](expr)
50
-
51
- # Time questions
52
- if any(word in question_lower for word in ["time", "current time", "what time is it"]):
53
- return self.tools["time"]()
54
-
55
- # Random choice questions
56
- if " or " in question_lower and not any(word in question_lower for word in ["who", "what", "when", "where", "why", "how"]):
57
- return self.tools["random_choice"](question.replace("?", "").replace(" or ", ","))
58
-
59
- # Fallback to HF Agent for complex questions
60
  try:
61
- agent = Agent("bigcode/starcoder")
62
- return agent.run(question, remote=True)
 
63
  except Exception as e:
64
  print(f"Agent error: {e}")
65
- return "I couldn't find an answer to that question."
66
-
67
- def run_and_submit_all(profile: gr.OAuthProfile | None):
68
- """
69
- Fetches all questions, runs the EnhancedAgent on them, submits all answers,
70
- and displays the results.
71
- """
72
- # --- Determine HF Space Runtime URL and Repo URL ---
73
- space_id = os.getenv("SPACE_ID")
74
-
75
- if profile:
76
- username = f"{profile.username}"
77
- print(f"User logged in: {username}")
78
- else:
79
- print("User not logged in.")
80
- return "Please Login to Hugging Face with the button.", None
81
-
82
- api_url = DEFAULT_API_URL
83
- questions_url = f"{api_url}/questions"
84
- submit_url = f"{api_url}/submit"
85
 
86
- # 1. Instantiate Agent
87
- try:
88
- agent = EnhancedAgent()
89
- except Exception as e:
90
- print(f"Error instantiating agent: {e}")
91
- return f"Error initializing agent: {e}", None
 
92
 
93
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
94
- print(agent_code)
95
-
96
- # 2. Fetch Questions
97
- print(f"Fetching questions from: {questions_url}")
98
- try:
99
- response = requests.get(questions_url, timeout=15)
100
- response.raise_for_status()
101
- questions_data = response.json()
102
- if not questions_data:
103
- print("Fetched questions list is empty.")
104
- return "Fetched questions list is empty or invalid format.", None
105
- print(f"Fetched {len(questions_data)} questions.")
106
- except requests.exceptions.RequestException as e:
107
- print(f"Error fetching questions: {e}")
108
- return f"Error fetching questions: {e}", None
109
- except requests.exceptions.JSONDecodeError as e:
110
- print(f"Error decoding JSON response from questions endpoint: {e}")
111
- print(f"Response text: {response.text[:500]}")
112
- return f"Error decoding server response for questions: {e}", None
113
- except Exception as e:
114
- print(f"An unexpected error occurred fetching questions: {e}")
115
- return f"An unexpected error occurred fetching questions: {e}", None
116
-
117
- # 3. Run your Agent
118
- results_log = []
119
- answers_payload = []
120
- print(f"Running agent on {len(questions_data)} questions...")
121
- for item in questions_data:
122
- task_id = item.get("task_id")
123
- question_text = item.get("question")
124
- if not task_id or question_text is None:
125
- print(f"Skipping item with missing task_id or question: {item}")
126
- continue
127
- try:
128
- submitted_answer = agent(question_text)
129
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
130
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
131
- except Exception as e:
132
- print(f"Error running agent on task {task_id}: {e}")
133
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
134
-
135
- if not answers_payload:
136
- print("Agent did not produce any answers to submit.")
137
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
138
-
139
- # 4. Prepare Submission
140
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
141
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
142
- print(status_update)
143
-
144
- # 5. Submit
145
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
146
  try:
147
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  response.raise_for_status()
149
- result_data = response.json()
150
- final_status = (
151
- f"Submission Successful!\n"
152
- f"User: {result_data.get('username')}\n"
153
- f"Overall Score: {result_data.get('score', 'N/A')}% "
154
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
155
- f"Message: {result_data.get('message', 'No message received.')}"
156
  )
157
- print("Submission successful.")
158
- results_df = pd.DataFrame(results_log)
159
- return final_status, results_df
160
- except requests.exceptions.HTTPError as e:
161
- error_detail = f"Server responded with status {e.response.status_code}."
162
- try:
163
- error_json = e.response.json()
164
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
165
- except requests.exceptions.JSONDecodeError:
166
- error_detail += f" Response: {e.response.text[:500]}"
167
- status_message = f"Submission Failed: {error_detail}"
168
- print(status_message)
169
- results_df = pd.DataFrame(results_log)
170
- return status_message, results_df
171
- except requests.exceptions.Timeout:
172
- status_message = "Submission Failed: The request timed out."
173
- print(status_message)
174
- results_df = pd.DataFrame(results_log)
175
- return status_message, results_df
176
- except requests.exceptions.RequestException as e:
177
- status_message = f"Submission Failed: Network error - {e}"
178
- print(status_message)
179
- results_df = pd.DataFrame(results_log)
180
- return status_message, results_df
181
  except Exception as e:
182
- status_message = f"An unexpected error occurred during submission: {e}"
183
- print(status_message)
184
- results_df = pd.DataFrame(results_log)
185
- return status_message, results_df
186
-
187
- # --- Build Gradio Interface using Blocks ---
188
- with gr.Blocks() as demo:
189
- gr.Markdown("# Enhanced Agent Evaluation Runner")
190
- gr.Markdown(
191
- """
192
- **Instructions:**
193
- 1. Log in to your Hugging Face account using the button below.
194
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the enhanced agent, submit answers, and see the score.
195
-
196
- **Agent Capabilities:**
197
- - Math calculations
198
- - Current time lookup
199
- - Random choice selection
200
- - Complex question handling via AI
201
- """
202
- )
203
-
204
  gr.LoginButton()
205
- run_button = gr.Button("Run Evaluation & Submit All Answers")
206
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
207
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
208
-
209
- run_button.click(
210
- fn=run_and_submit_all,
211
- outputs=[status_output, results_table]
212
  )
213
 
214
  if __name__ == "__main__":
215
- print("\n" + "-"*30 + " App Starting " + "-"*30)
216
- space_host_startup = os.getenv("SPACE_HOST")
217
- space_id_startup = os.getenv("SPACE_ID")
218
-
219
- if space_host_startup:
220
- print(f"✅ SPACE_HOST found: {space_host_startup}")
221
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
222
-
223
- if space_id_startup:
224
- print(f"✅ SPACE_ID found: {space_id_startup}")
225
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
226
-
227
- print("-"*(60 + len(" App Starting ")) + "\n")
228
- print("Launching Gradio Interface for Enhanced Agent Evaluation...")
229
- demo.launch(debug=True, share=False)
 
1
+ """
2
+ Advanced Agent Evaluation Runner with Custom LangGraph Implementation
3
+ """
4
  import os
5
  import gradio as gr
6
  import requests
7
  import pandas as pd
8
+ from typing import Dict, List, Optional
9
  from datetime import datetime
10
+ from langgraph.graph import StateGraph, END
11
+ from langchain_core.messages import HumanMessage, AIMessage
 
12
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ # --- Custom Agent State Definition ---
17
+ class AgentState(Dict):
18
+ messages: List[Dict]
19
+ question: str
20
+ response: Optional[str]
21
+
22
+ # --- Custom Tool Implementations ---
23
+ class MathTool:
24
+ def execute(self, expression: str) -> str:
 
 
 
25
  try:
26
+ return f"Calculation result: {eval(expression)}"
27
  except:
28
+ return "Error: Invalid mathematical expression"
29
 
30
+ class TimeTool:
31
+ def execute(self) -> str:
32
+ return f"Current time: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}"
33
 
34
+ # --- Custom Agent Graph Builder ---
35
+ def create_agent_workflow():
36
+ workflow = StateGraph(AgentState)
37
+
38
+ # Define nodes
39
+ def route_question(state: AgentState):
40
+ question = state["question"].lower()
41
+ if any(op in question for op in ["+", "-", "*", "/", "calculate"]):
42
+ return "math_tool"
43
+ elif "time" in question or "current time" in question:
44
+ return "time_tool"
45
+ return "llm_response"
46
+
47
+ def math_node(state: AgentState):
48
+ tool = MathTool()
49
+ return {"response": tool.execute(state["question"])}
50
+
51
+ def time_node(state: AgentState):
52
+ tool = TimeTool()
53
+ return {"response": tool.execute()}
54
+
55
+ def llm_node(state: AgentState):
56
+ # Simulated LLM response
57
+ return {"response": f"AI response to: {state['question']}"}
58
+
59
+ # Build graph
60
+ workflow.add_node("math_tool", math_node)
61
+ workflow.add_node("time_tool", time_node)
62
+ workflow.add_node("llm_response", llm_node)
63
+
64
+ workflow.add_conditional_edges(
65
+ "start",
66
+ route_question,
67
+ {
68
+ "math_tool": "math_tool",
69
+ "time_tool": "time_tool",
70
+ "llm_response": "llm_response"
71
+ }
72
+ )
73
+
74
+ workflow.add_edge("math_tool", END)
75
+ workflow.add_edge("time_tool", END)
76
+ workflow.add_edge("llm_response", END)
77
+
78
+ workflow.set_entry_point("start")
79
+ return workflow.compile()
80
 
81
+ # --- Custom Agent Class ---
82
+ class CustomLangGraphAgent:
83
+ def __init__(self):
84
+ self.workflow = create_agent_workflow()
85
+ print("Custom LangGraph agent initialized")
86
+
87
  def __call__(self, question: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  try:
89
+ state = {"question": question, "messages": [], "response": None}
90
+ result = self.workflow.invoke(state)
91
+ return result["response"]
92
  except Exception as e:
93
  print(f"Agent error: {e}")
94
+ return "Error processing question"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # --- Evaluation Runner ---
97
+ def run_evaluation(profile: gr.OAuthProfile | None):
98
+ if not profile:
99
+ return "Please login first", None
100
+
101
+ space_id = os.getenv("SPACE_ID", "local-test")
102
+ api_url = os.getenv("API_URL", DEFAULT_API_URL)
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  try:
105
+ agent = CustomLangGraphAgent()
106
+ questions = requests.get(f"{api_url}/questions", timeout=15).json()
107
+
108
+ results = []
109
+ answers = []
110
+ for q in questions:
111
+ try:
112
+ answer = agent(q["question"])
113
+ answers.append({"task_id": q["task_id"], "submitted_answer": answer})
114
+ results.append({"Task ID": q["task_id"], "Question": q["question"], "Answer": answer})
115
+ except Exception as e:
116
+ results.append({"Task ID": q["task_id"], "Question": q["question"], "Answer": f"Error: {e}"})
117
+
118
+ submission = {
119
+ "username": profile.username,
120
+ "agent_code": f"https://huggingface.co/spaces/{space_id}",
121
+ "answers": answers
122
+ }
123
+
124
+ response = requests.post(f"{api_url}/submit", json=submission, timeout=60)
125
  response.raise_for_status()
126
+
127
+ return (
128
+ f"Success! Score: {response.json().get('score', 'N/A')}%",
129
+ pd.DataFrame(results)
 
 
 
130
  )
131
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
+ return f"Evaluation failed: {e}", None
134
+
135
+ # --- Gradio Interface ---
136
+ with gr.Blocks(title="Custom LangGraph Agent Evaluator") as app:
137
+ gr.Markdown("""
138
+ ## Custom LangGraph Agent Evaluation
139
+ Test your agent against the benchmark questions
140
+ """)
141
+
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  gr.LoginButton()
143
+ run_btn = gr.Button("Start Evaluation")
144
+ output = gr.Textbox(label="Results")
145
+ results = gr.DataFrame(label="Details")
146
+
147
+ run_btn.click(
148
+ fn=run_evaluation,
149
+ outputs=[output, results]
150
  )
151
 
152
  if __name__ == "__main__":
153
+ app.launch()