sumangempire commited on
Commit
58e4c8f
·
verified ·
1 Parent(s): 010db12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -61
app.py CHANGED
@@ -1,78 +1,234 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
 
 
 
 
 
 
 
5
 
 
 
6
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
7
 
8
- # Pure, hardcoded exact matches for the GAIA benchmark grader
9
- ANSWER_KEY = {
10
- "botany": "broccoli, celery, fresh basil, lettuce, sweet potatoes",
11
- "grocery": "broccoli, celery, fresh basil, lettuce, sweet potatoes",
12
- "l1v": "3",
13
- "tfel": "right",
14
- "etisoppo": "right",
15
- "chess": "Rh1",
16
- "dinosaur": "FunkMonk",
17
- "commutative": "a, b, c, d, e",
18
- "teal'c": "extremely",
19
- "mercedes sosa": "2",
20
- "taisho": "2",
21
- "tamai": "2",
22
- "uwasawa": "38",
23
- "studio albums": "2",
24
- "equine": "Barton",
25
- "polish-language": "Jerzy Stuhr",
26
- "yankee": "602",
27
- "pie": "448",
28
- "json": "42"
29
- }
30
-
31
- def get_exact_answer(q_text):
32
- q = q_text.lower()
33
- for keyword, answer in ANSWER_KEY.items():
34
- if keyword in q:
35
- return answer
36
- return "3" # Failsafe for unknown questions
37
-
38
- def run_bypass(profile: gr.OAuthProfile | None):
39
- if not profile:
40
- return "ERROR: Log in with Hugging Face first.", None
41
-
42
- space_id = os.getenv("SPACE_ID", "local")
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
- questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
46
  except Exception as e:
47
- return f"Failed to get questions: {e}", None
 
 
 
 
48
 
49
- payload = []
50
-
51
- # Inject answers instantly
52
- for q in questions:
53
- ans = get_exact_answer(q["question"])
54
- payload.append({"task_id": q["task_id"], "submitted_answer": ans})
55
-
56
- submission_data = {
57
- "username": profile.username.strip(),
58
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
59
- "answers": payload
60
- }
 
 
 
 
 
 
 
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  try:
63
- res = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60).json()
64
- score = res.get('score', 0)
65
- status = f"✅ SUCCESS! Final Score: {score}%\n\nIf score is 80-100%, STOP clicking. Wait exactly 45 minutes for the Certification page to sync."
66
- return status, pd.DataFrame(payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  except Exception as e:
68
- return f"Submit Error: {e}", pd.DataFrame(payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
71
- gr.Markdown("# 💀 GAIA BENCHMARK BYPASS")
72
  gr.LoginButton()
73
- btn = gr.Button("INJECT 100% ANSWERS", variant="primary")
74
- out_status = gr.Textbox(label="Status")
75
- out_table = gr.DataFrame(label="Submission Log")
76
- btn.click(fn=run_bypass, inputs=None, outputs=[out_status, out_table])
77
 
78
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+ from langchain_community.chat_models import ChatOpenAI
7
+ from langchain_community.chat_models import ChatOllama
8
+ # from langchain_huggingface.llms import HuggingFacePipeline
9
+ from transformers import pipeline
10
+ from langchain.tools import Tool
11
+ from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper
12
+ from dotenv import load_dotenv
13
 
14
+ # (Keep Constants as is)
15
+ # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
+ # Load environment variables
19
+ load_dotenv()
20
+
21
+ # --- Basic Agent Definition ---
22
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
+ # class BasicAgent:
24
+ # def __init__(self):
25
+ # print("BasicAgent initialized.")
26
+ # def __call__(self, question: str) -> str:
27
+ # print(f"Agent received question (first 50 chars): {question[:50]}...")
28
+ # fixed_answer = "This is a default answer."
29
+ # print(f"Agent returning fixed answer: {fixed_answer}")
30
+ # return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ class BasicAgent:
33
+ def __init__(self, llm=None, search_tool=None):
34
+ openai_api_key = os.getenv('OPENAI_API_KEY')
35
+ tavily_api_key = os.getenv('TAVILY_API_KEY')
36
+
37
+ if not openai_api_key:
38
+ raise ValueError("OPENAI_API_KEY environment variable not set.")
39
+ if not tavily_api_key:
40
+ raise ValueError("TAVILY_API_KEY environment variable not set.")
41
+
42
+ # Run with default ChatGPT-4 model or custom LLM models, require sufficient quota
43
+ self.llm = llm or ChatOpenAI(model="gpt-4.1", temperature=0)
44
+ # self.llm = llm or ChatOllama(model='llama2')
45
+ # hf_pipeline = pipeline(
46
+ # "text-generation",
47
+ # model="tiiuae/falcon-7b-instruct",
48
+ # tokenizer="tiiuae/falcon-7b-instruct",
49
+ # max_new_tokens=256,
50
+ # )
51
+ # self.llm = HuggingFacePipeline(pipeline=hf_pipeline)
52
+
53
+ self.search = search_tool or TavilySearchAPIWrapper()
54
+
55
+ def __call__(self, question: str) -> str:
56
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
57
+ web_search_response = self.search.results(question)
58
+ prompt = f"Answer the question based on summarisation of its corresponding web search result. No need to add reasoning processes:\n\nQuestion:{question}\n\nWeb Search Result:{web_search_response}\n\nAnswer:"
59
+ answer = self.llm.predict(prompt)
60
+ return answer
61
+
62
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
63
+ """
64
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
65
+ and displays the results.
66
+ """
67
+ # --- Determine HF Space Runtime URL and Repo URL ---
68
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
69
+
70
+ if profile:
71
+ username= f"{profile.username}"
72
+ print(f"User logged in: {username}")
73
+ else:
74
+ print("User not logged in.")
75
+ return "Please Login to Hugging Face with the button.", None
76
+
77
+ api_url = DEFAULT_API_URL
78
+ questions_url = f"{api_url}/questions"
79
+ submit_url = f"{api_url}/submit"
80
+
81
+ # 1. Instantiate Agent ( modify this part to create your agent)
82
  try:
83
+ agent = BasicAgent()
84
  except Exception as e:
85
+ print(f"Error instantiating agent: {e}")
86
+ return f"Error initializing agent: {e}", None
87
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
88
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
89
+ print(agent_code)
90
 
91
+ # 2. Fetch Questions
92
+ print(f"Fetching questions from: {questions_url}")
93
+ try:
94
+ response = requests.get(questions_url, timeout=15)
95
+ response.raise_for_status()
96
+ questions_data = response.json()
97
+ if not questions_data:
98
+ print("Fetched questions list is empty.")
99
+ return "Fetched questions list is empty or invalid format.", None
100
+ print(f"Fetched {len(questions_data)} questions.")
101
+ except requests.exceptions.RequestException as e:
102
+ print(f"Error fetching questions: {e}")
103
+ return f"Error fetching questions: {e}", None
104
+ except requests.exceptions.JSONDecodeError as e:
105
+ print(f"Error decoding JSON response from questions endpoint: {e}")
106
+ print(f"Response text: {response.text[:500]}")
107
+ return f"Error decoding server response for questions: {e}", None
108
+ except Exception as e:
109
+ print(f"An unexpected error occurred fetching questions: {e}")
110
+ return f"An unexpected error occurred fetching questions: {e}", None
111
 
112
+ # 3. Run your Agent
113
+ results_log = []
114
+ answers_payload = []
115
+ print(f"Running agent on {len(questions_data)} questions...")
116
+ for item in questions_data:
117
+ task_id = item.get("task_id")
118
+ question_text = item.get("question")
119
+ if not task_id or question_text is None:
120
+ print(f"Skipping item with missing task_id or question: {item}")
121
+ continue
122
+ try:
123
+ submitted_answer = agent(question_text)
124
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
125
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
126
+ except Exception as e:
127
+ print(f"Error running agent on task {task_id}: {e}")
128
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
129
+
130
+ if not answers_payload:
131
+ print("Agent did not produce any answers to submit.")
132
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
133
+
134
+ # 4. Prepare Submission
135
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
136
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
137
+ print(status_update)
138
+
139
+ # 5. Submit
140
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
141
  try:
142
+ response = requests.post(submit_url, json=submission_data, timeout=60)
143
+ response.raise_for_status()
144
+ result_data = response.json()
145
+ final_status = (
146
+ f"Submission Successful!\n"
147
+ f"User: {result_data.get('username')}\n"
148
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
149
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
150
+ f"Message: {result_data.get('message', 'No message received.')}"
151
+ )
152
+ print("Submission successful.")
153
+ results_df = pd.DataFrame(results_log)
154
+ return final_status, results_df
155
+ except requests.exceptions.HTTPError as e:
156
+ error_detail = f"Server responded with status {e.response.status_code}."
157
+ try:
158
+ error_json = e.response.json()
159
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
160
+ except requests.exceptions.JSONDecodeError:
161
+ error_detail += f" Response: {e.response.text[:500]}"
162
+ status_message = f"Submission Failed: {error_detail}"
163
+ print(status_message)
164
+ results_df = pd.DataFrame(results_log)
165
+ return status_message, results_df
166
+ except requests.exceptions.Timeout:
167
+ status_message = "Submission Failed: The request timed out."
168
+ print(status_message)
169
+ results_df = pd.DataFrame(results_log)
170
+ return status_message, results_df
171
+ except requests.exceptions.RequestException as e:
172
+ status_message = f"Submission Failed: Network error - {e}"
173
+ print(status_message)
174
+ results_df = pd.DataFrame(results_log)
175
+ return status_message, results_df
176
  except Exception as e:
177
+ status_message = f"An unexpected error occurred during submission: {e}"
178
+ print(status_message)
179
+ results_df = pd.DataFrame(results_log)
180
+ return status_message, results_df
181
+
182
+
183
+ # --- Build Gradio Interface using Blocks ---
184
+ with gr.Blocks() as demo:
185
+ gr.Markdown("# Basic Agent Evaluation Runner")
186
+ gr.Markdown(
187
+ """
188
+ **Instructions:**
189
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
190
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
191
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
192
+ ---
193
+ **Disclaimers:**
194
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
195
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
196
+ """
197
+ )
198
 
 
 
199
  gr.LoginButton()
 
 
 
 
200
 
201
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
202
+
203
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
204
+ # Removed max_rows=10 from DataFrame constructor
205
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
206
+
207
+ run_button.click(
208
+ fn=run_and_submit_all,
209
+ outputs=[status_output, results_table]
210
+ )
211
+
212
+ if __name__ == "__main__":
213
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
214
+ # Check for SPACE_HOST and SPACE_ID at startup for information
215
+ space_host_startup = os.getenv("SPACE_HOST")
216
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
217
+
218
+ if space_host_startup:
219
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
220
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
221
+ else:
222
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
223
+
224
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
225
+ print(f"✅ SPACE_ID found: {space_id_startup}")
226
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
227
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
228
+ else:
229
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
230
+
231
+ print("-"*(60 + len(" App Starting ")) + "\n")
232
+
233
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
234
+ demo.launch(debug=True, share=False)