SantoshKumar1310 commited on
Commit
5c3f940
Β·
verified Β·
1 Parent(s): 5c5ce54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -80
app.py CHANGED
@@ -1,89 +1,175 @@
1
- """Model wrapper for LiteLLM"""
2
-
3
  import os
4
- from typing import List, Dict, Any, Optional
 
 
 
 
 
 
 
 
5
 
6
- try:
7
- import litellm
8
- except ImportError:
9
- print("⚠️ litellm not installed. Install with: pip install litellm")
10
- litellm = None
11
 
12
 
13
- class LiteLLMModel:
14
- """Wrapper for LiteLLM models"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- def __init__(self, model_id: str):
17
- self.model_id = model_id
18
-
19
- if "gemini" in model_id.lower():
20
- if not os.getenv("GEMINI_API_KEY"):
21
- print("⚠️ GEMINI_API_KEY not set in environment")
22
 
23
- def generate(self, messages: List[Dict], tools: Optional[List] = None) -> Dict:
24
- if not litellm:
25
- return {"content": "Unknown - litellm not installed"}
26
-
27
- try:
28
- formatted_tools = None
29
- if tools:
30
- formatted_tools = [
31
- {
32
- "type": "function",
33
- "function": {
34
- "name": tool.name,
35
- "description": tool.description,
36
- "parameters": tool.parameters
37
- }
38
- }
39
- for tool in tools
40
- ]
41
-
42
- if "gemini" in self.model_id.lower():
43
- api_key = os.getenv("GEMINI_API_KEY")
44
- if not api_key:
45
- raise RuntimeError("GEMINI_API_KEY not set in environment")
46
-
47
- print(f"DEBUG: Using model id: {self.model_id}")
48
-
49
- response = litellm.completion(
50
- model=self.model_id,
51
- api_key=api_key,
52
- messages=messages,
53
- tools=formatted_tools,
54
- temperature=0.1
55
- )
56
- else:
57
- response = litellm.completion(
58
- model=self.model_id,
59
- messages=messages,
60
- tools=formatted_tools,
61
- temperature=0.1
62
- )
63
-
64
- message = response.choices[0].message
65
- result = {
66
- "content": message.content or ""
67
- }
68
-
69
- if hasattr(message, 'tool_calls') and message.tool_calls:
70
- result["tool_calls"] = [
71
- {
72
- "name": tc.function.name,
73
- "arguments": eval(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments
74
- }
75
- for tc in message.tool_calls
76
- ]
77
-
78
- return result
79
-
80
- except Exception as e:
81
- print(f"Model error: {e}")
82
- return {"content": "Unknown"}
83
 
 
 
 
 
 
 
84
 
85
- def get_model(model_type: str, model_id: str):
86
- if model_type == "LiteLLMModel":
87
- return LiteLLMModel(model_id)
88
  else:
89
- raise ValueError(f"Unknown model type: {model_type}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from typing import Dict, List
6
+
7
+ # custom imports
8
+ from agents import Agent
9
+ from tool import get_tools
10
+ from model import get_model
11
 
12
+ # --- Constants ---
13
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
+ MODEL_ID = "gemini/gemini-2.0-flash-exp"
 
 
15
 
16
 
17
+ # --- Async Question Processing ---
18
+ async def process_question(agent, question: str, task_id: str) -> Dict:
19
+ """Process a single question and return both answer AND full log entry"""
20
+ try:
21
+ answer = agent(question)
22
+ return {
23
+ "submission": {"task_id": task_id, "submitted_answer": answer},
24
+ "log": {"Task ID": task_id, "Question": question, "Submitted Answer": answer}
25
+ }
26
+ except Exception as e:
27
+ error_msg = f"ERROR: {str(e)}"
28
+ return {
29
+ "submission": {"task_id": task_id, "submitted_answer": error_msg},
30
+ "log": {"Task ID": task_id, "Question": question, "Submitted Answer": error_msg}
31
+ }
32
+
33
+ async def run_questions_async(agent, questions_data: List[Dict]) -> tuple:
34
+ """Process questions sequentially"""
35
+ submissions = []
36
+ logs = []
37
 
38
+ total = len(questions_data)
39
+ for idx, q in enumerate(questions_data):
40
+ print(f"Processing {idx+1}/{total}: {q['question'][:80]}...")
41
+ result = await process_question(agent, q["question"], q["task_id"])
42
+ submissions.append(result["submission"])
43
+ logs.append(result["log"])
44
 
45
+ return submissions, logs
46
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ async def run_and_submit_all(profile: gr.OAuthProfile | None):
49
+ """
50
+ Fetches all questions, runs the Agent on them, submits all answers,
51
+ and displays the results.
52
+ """
53
+ space_id = os.getenv("SPACE_ID")
54
 
55
+ if profile:
56
+ username = f"{profile.username}"
57
+ print(f"User logged in: {username}")
58
  else:
59
+ print("User not logged in.")
60
+ return "Please Login to Hugging Face with the button.", None
61
+
62
+ api_url = DEFAULT_API_URL
63
+ questions_url = f"{api_url}/questions"
64
+ submit_url = f"{api_url}/submit"
65
+
66
+ # 1. Instantiate Agent
67
+ try:
68
+ agent = Agent(
69
+ model=get_model("LiteLLMModel", MODEL_ID),
70
+ tools=get_tools()
71
+ )
72
+ except Exception as e:
73
+ print(f"Error instantiating agent: {e}")
74
+ return f"Error initializing agent: {e}", None
75
+
76
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
77
+ print(f"Agent code: {agent_code}")
78
+
79
+ # 2. Fetch Questions
80
+ print(f"Fetching questions from: {questions_url}")
81
+ try:
82
+ response = requests.get(questions_url, timeout=15)
83
+ response.raise_for_status()
84
+ questions_data = response.json()
85
+ if not questions_data:
86
+ print("Fetched questions list is empty.")
87
+ return "Fetched questions list is empty or invalid format.", None
88
+ print(f"Fetched {len(questions_data)} questions.")
89
+ # Remove this line to process all questions: questions_data = questions_data[:2]
90
+ except Exception as e:
91
+ print(f"Error fetching questions: {e}")
92
+ return f"Error fetching questions: {e}", None
93
+
94
+ # 3. Run Agent
95
+ print(f"Running agent on {len(questions_data)} questions...")
96
+ answers_payload, results_log = await run_questions_async(agent, questions_data)
97
+
98
+ if not answers_payload:
99
+ print("Agent did not produce any answers to submit.")
100
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
101
+
102
+ # 4. Prepare Submission
103
+ submission_data = {
104
+ "username": username.strip(),
105
+ "agent_code": agent_code,
106
+ "answers": answers_payload
107
+ }
108
+ print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
109
+
110
+ # 5. Submit
111
+ try:
112
+ response = requests.post(submit_url, json=submission_data, timeout=60)
113
+ response.raise_for_status()
114
+ result_data = response.json()
115
+ final_status = (
116
+ f"βœ… Submission Successful!\n\n"
117
+ f"User: {result_data.get('username')}\n"
118
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
119
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n\n"
120
+ f"Message: {result_data.get('message', 'No message received.')}\n\n"
121
+ f"Leaderboard: {api_url}/leaderboard"
122
+ )
123
+ print("Submission successful.")
124
+ results_df = pd.DataFrame(results_log)
125
+ return final_status, results_df
126
+ except Exception as e:
127
+ status_message = f"❌ Submission Failed: {e}"
128
+ print(status_message)
129
+ results_df = pd.DataFrame(results_log)
130
+ return status_message, results_df
131
+
132
+
133
+ # --- Build Gradio Interface ---
134
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
+ gr.Markdown("# πŸ€– GAIA Agent Evaluation")
136
+ gr.Markdown(
137
+ """
138
+ **Instructions:**
139
+ 1. Log in to your Hugging Face account using the button below
140
+ 2. Click 'Run Evaluation & Submit' to test your agent
141
+ 3. The agent will use web search and other tools to answer questions
142
+
143
+ **Current Setup:**
144
+ - Model: Gemini 2.0 Flash (via LiteLLM)
145
+ - Tools: Web search, Wikipedia, calculation, and more
146
+ """
147
+ )
148
+
149
+ gr.LoginButton()
150
+
151
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")
152
+
153
+ status_output = gr.Textbox(label="πŸ“Š Status / Results", lines=8, interactive=False)
154
+ results_table = gr.DataFrame(label="πŸ“‹ Questions and Answers", wrap=True, max_height=400)
155
+
156
+ run_button.click(
157
+ fn=run_and_submit_all,
158
+ outputs=[status_output, results_table]
159
+ )
160
+
161
+ if __name__ == "__main__":
162
+ print("\n" + "="*70)
163
+ print("πŸ€– GAIA Agent Starting")
164
+ print("="*70)
165
+
166
+ space_host = os.getenv("SPACE_HOST")
167
+ space_id = os.getenv("SPACE_ID")
168
+
169
+ if space_host:
170
+ print(f"βœ… Runtime URL: https://{space_host}.hf.space")
171
+ if space_id:
172
+ print(f"βœ… Repo URL: https://huggingface.co/spaces/{space_id}")
173
+
174
+ print("="*70 + "\n")
175
+ demo.launch(debug=True, share=False)