SantoshKumar1310 commited on
Commit
05a84b0
Β·
verified Β·
1 Parent(s): 507502c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from typing import Dict, List
6
+ import asyncio
7
+
8
+ # custom imports
9
+ from agents import Agent
10
+ from tool import get_tools
11
+ from model import get_model
12
+
13
+ # --- Constants ---
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+ MODEL_ID = "groq/llama-3.3-70b-versatile" # Groq's fastest model
16
+ RATE_LIMIT_DELAY = 1 # Groq has generous rate limits
17
+
18
+
19
+ # --- Async Question Processing ---
20
+ async def process_question(agent, question: str, task_id: str) -> Dict:
21
+ """Process a single question and return both answer AND full log entry"""
22
+ try:
23
+ answer = agent(question)
24
+ return {
25
+ "submission": {"task_id": task_id, "submitted_answer": answer},
26
+ "log": {"Task ID": task_id, "Question": question, "Submitted Answer": answer}
27
+ }
28
+ except Exception as e:
29
+ error_msg = f"ERROR: {str(e)}"
30
+ return {
31
+ "submission": {"task_id": task_id, "submitted_answer": error_msg},
32
+ "log": {"Task ID": task_id, "Question": question, "Submitted Answer": error_msg}
33
+ }
34
+
35
+ async def run_questions_async(agent, questions_data: List[Dict]) -> tuple:
36
+ """Process questions sequentially with minimal rate limiting"""
37
+ submissions = []
38
+ logs = []
39
+
40
+ total = len(questions_data)
41
+ for idx, q in enumerate(questions_data):
42
+ print(f"Processing {idx+1}/{total}: {q['question'][:80]}...")
43
+
44
+ # Add small delay between requests
45
+ if idx > 0:
46
+ await asyncio.sleep(RATE_LIMIT_DELAY)
47
+
48
+ result = await process_question(agent, q["question"], q["task_id"])
49
+ submissions.append(result["submission"])
50
+ logs.append(result["log"])
51
+
52
+ return submissions, logs
53
+
54
+
55
+ async def run_and_submit_all(profile: gr.OAuthProfile | None):
56
+ """
57
+ Fetches all questions, runs the Agent on them, submits all answers,
58
+ and displays the results.
59
+ """
60
+ space_id = os.getenv("SPACE_ID")
61
+
62
+ if profile:
63
+ username = f"{profile.username}"
64
+ print(f"User logged in: {username}")
65
+ else:
66
+ print("User not logged in.")
67
+ return "Please Login to Hugging Face with the button.", None
68
+
69
+ api_url = DEFAULT_API_URL
70
+ questions_url = f"{api_url}/questions"
71
+ submit_url = f"{api_url}/submit"
72
+
73
+ # 1. Instantiate Agent
74
+ try:
75
+ agent = Agent(
76
+ model=get_model("LiteLLMModel", MODEL_ID),
77
+ tools=get_tools()
78
+ )
79
+ except Exception as e:
80
+ print(f"Error instantiating agent: {e}")
81
+ return f"Error initializing agent: {e}", None
82
+
83
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
84
+ print(f"Agent code: {agent_code}")
85
+
86
+ # 2. Fetch Questions
87
+ print(f"Fetching questions from: {questions_url}")
88
+ try:
89
+ response = requests.get(questions_url, timeout=15)
90
+ response.raise_for_status()
91
+ questions_data = response.json()
92
+ if not questions_data:
93
+ print("Fetched questions list is empty.")
94
+ return "Fetched questions list is empty or invalid format.", None
95
+ print(f"Fetched {len(questions_data)} questions.")
96
+ estimated_time = len(questions_data) * RATE_LIMIT_DELAY / 60
97
+ print(f"⏱️ Estimated time: {estimated_time:.1f} minutes")
98
+ except Exception as e:
99
+ print(f"Error fetching questions: {e}")
100
+ return f"Error fetching questions: {e}", None
101
+
102
+ # 3. Run Agent
103
+ print(f"Running agent on {len(questions_data)} questions...")
104
+ answers_payload, results_log = await run_questions_async(agent, questions_data)
105
+
106
+ if not answers_payload:
107
+ print("Agent did not produce any answers to submit.")
108
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
109
+
110
+ # 4. Prepare Submission
111
+ submission_data = {
112
+ "username": username.strip(),
113
+ "agent_code": agent_code,
114
+ "answers": answers_payload
115
+ }
116
+ print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
117
+
118
+ # 5. Submit
119
+ try:
120
+ response = requests.post(submit_url, json=submission_data, timeout=60)
121
+ response.raise_for_status()
122
+ result_data = response.json()
123
+ final_status = (
124
+ f"βœ… Submission Successful!\n\n"
125
+ f"User: {result_data.get('username')}\n"
126
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
127
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n\n"
128
+ f"Message: {result_data.get('message', 'No message received.')}\n\n"
129
+ f"Leaderboard: {api_url}/leaderboard"
130
+ )
131
+ print("Submission successful.")
132
+ results_df = pd.DataFrame(results_log)
133
+ return final_status, results_df
134
+ except Exception as e:
135
+ status_message = f"❌ Submission Failed: {e}"
136
+ print(status_message)
137
+ results_df = pd.DataFrame(results_log)
138
+ return status_message, results_df
139
+
140
+
141
+ # --- Build Gradio Interface ---
142
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
143
+ gr.Markdown("# πŸ€– GAIA Agent Evaluation")
144
+ gr.Markdown(
145
+ f"""
146
+ **Instructions:**
147
+ 1. Log in to your Hugging Face account using the button below
148
+ 2. Click 'Run Evaluation & Submit' to test your agent
149
+ 3. The agent will use web search and other tools to answer questions
150
+
151
+ **Current Setup:**
152
+ - Model: Llama 3.3 70B (via Groq)
153
+ - Tools: Web search, Wikipedia, calculation, and more
154
+ - Rate Limiting: {RATE_LIMIT_DELAY}s between requests
155
+
156
+ ⚠️ **Note:** Make sure you have set your GROQ_API_KEY in the Space secrets.
157
+ """
158
+ )
159
+
160
+ gr.LoginButton()
161
+
162
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")
163
+
164
+ status_output = gr.Textbox(label="πŸ“Š Status / Results", lines=8, interactive=False)
165
+ results_table = gr.DataFrame(label="πŸ“‹ Questions and Answers", wrap=True, max_height=400)
166
+
167
+ run_button.click(
168
+ fn=run_and_submit_all,
169
+ outputs=[status_output, results_table]
170
+ )
171
+
172
+ if __name__ == "__main__":
173
+ print("\n" + "="*70)
174
+ print("πŸ€– GAIA Agent Starting")
175
+ print("="*70)
176
+ print(f"πŸ“ Using Model: {MODEL_ID}")
177
+
178
+ space_host = os.getenv("SPACE_HOST")
179
+ space_id = os.getenv("SPACE_ID")
180
+
181
+ if space_host:
182
+ print(f"βœ… Runtime URL: https://{space_host}.hf.space")
183
+ if space_id:
184
+ print(f"βœ… Repo URL: https://huggingface.co/spaces/{space_id}")
185
+
186
+ print("="*70 + "\n")
187
+ demo.launch(debug=True, share=False)