Shivangsinha commited on
Commit
567c67d
·
verified ·
1 Parent(s): 0fd1749

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -30
app.py CHANGED
@@ -6,7 +6,7 @@ import inspect
6
  import pandas as pd
7
  from smolagents import (
8
  CodeAgent,
9
- InferenceClientModel, # <-- Updated here
10
  DuckDuckGoSearchTool,
11
  WikipediaSearchTool,
12
  PythonInterpreterTool,
@@ -16,6 +16,33 @@ from smolagents import (
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # --- Basic Agent Definition ---
20
  @tool
21
  def get_current_date_time() -> str:
@@ -27,9 +54,14 @@ class BasicAgent:
27
  def __init__(self):
28
  print("BasicAgent initialized.")
29
 
30
- # Using Hugging Face's free Serverless Inference API with the updated class name
31
- self.model = InferenceClientModel( # <-- Updated here
32
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
 
 
 
 
 
33
  )
34
 
35
  self.tools = [
@@ -45,26 +77,18 @@ class BasicAgent:
45
  max_steps=8,
46
  additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
47
  )
48
- print("BasicAgent ready with Qwen2.5-Coder-32B-Instruct (CodeAgent).")
49
 
50
  def __call__(self, question: str) -> str:
51
- print(f"Agent received question: {question[:80]}...")
52
- max_retries = 3
53
- for attempt in range(max_retries):
54
- try:
55
- answer = self.agent.run(question)
56
- print(f"Agent answer: {str(answer)[:200]}")
57
- return str(answer)
58
- except Exception as e:
59
- err = str(e)
60
- if "429" in err or "rate_limit" in err.lower() or "quota" in err.lower():
61
- wait_time = 30 * (attempt + 1)
62
- print(f"Rate limit hit, waiting {wait_time}s before retry {attempt+1}/{max_retries}...")
63
- time.sleep(wait_time)
64
- else:
65
- print(f"Agent error: {e}")
66
- return f"Error: {err}"
67
- return "Error: Rate limit exceeded after retries"
68
 
69
  # --- The rest of the code ---
70
  def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -87,7 +111,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
87
  return f"Error initializing agent: {e}", None
88
 
89
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
90
- print(f"Agent code: {agent_code}")
91
  print(f"Fetching questions from: {questions_url}")
92
 
93
  try:
@@ -95,10 +118,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
95
  response.raise_for_status()
96
  questions_data = response.json()
97
  if not questions_data:
98
- print("No questions.")
99
  return "No questions.", None
100
  except Exception as e:
101
- print(f"Error fetching questions: {e}")
102
  return f"Error fetching questions: {e}", None
103
 
104
  results_log = []
@@ -119,9 +140,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
119
  print(f"Error on task {task_id}: {e}")
120
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
121
 
122
- # Wait 10 seconds between questions to play nicely with HF inference servers
123
- time.sleep(10)
124
-
125
  if not answers_payload:
126
  return "No answers.", pd.DataFrame(results_log)
127
 
@@ -151,9 +169,10 @@ with gr.Blocks() as demo:
151
  gr.Markdown(
152
  """
153
  **Instructions:**
154
- 1. Ensure you have your `HF_TOKEN` in your Space secrets (Settings -> Secrets).
155
  2. Log in with your Hugging Face account below.
156
- 3. Click 'Run Evaluation & Submit' to start. Please be patient, as inference will take a few minutes to process 20 questions securely.
 
157
  """
158
  )
159
  gr.LoginButton()
 
6
  import pandas as pd
7
  from smolagents import (
8
  CodeAgent,
9
+ LiteLLMModel,
10
  DuckDuckGoSearchTool,
11
  WikipediaSearchTool,
12
  PythonInterpreterTool,
 
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ # --- Custom Throttled Model to fix Gemini 15 RPM Limits ---
20
+ class ThrottledGeminiModel(LiteLLMModel):
21
+ """
22
+ Wraps the LiteLLMModel to automatically enforce delays between requests.
23
+ Gemini Free Tier allows 15 requests per minute.
24
+ By sleeping 5 seconds before every call, we guarantee we never exceed the limit.
25
+ It also catches internal 429 errors without breaking the agent's multi-step thought process.
26
+ """
27
+ def __call__(self, *args, **kwargs):
28
+ print("Throttling: Sleeping 5s to prevent hitting Gemini's 15 RPM limit...")
29
+ time.sleep(5)
30
+
31
+ max_retries = 5
32
+ for attempt in range(max_retries):
33
+ try:
34
+ return super().__call__(*args, **kwargs)
35
+ except Exception as e:
36
+ error_msg = str(e).lower()
37
+ if "429" in error_msg or "rate limit" in error_msg or "quota" in error_msg:
38
+ wait_time = 30 * (attempt + 1)
39
+ print(f"Internal API Rate limit hit. Pausing for {wait_time}s (Attempt {attempt+1}/{max_retries})...")
40
+ time.sleep(wait_time)
41
+ else:
42
+ raise e
43
+ # Final attempt if loop finishes without returning
44
+ return super().__call__(*args, **kwargs)
45
+
46
  # --- Basic Agent Definition ---
47
  @tool
48
  def get_current_date_time() -> str:
 
54
  def __init__(self):
55
  print("BasicAgent initialized.")
56
 
57
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
58
+ if not gemini_api_key:
59
+ raise ValueError("GEMINI_API_KEY environment variable not set in Space Secrets.")
60
+
61
+ # Using our custom throttled wrapper
62
+ self.model = ThrottledGeminiModel(
63
+ model_id="gemini/gemini-2.0-flash-lite",
64
+ api_key=gemini_api_key,
65
  )
66
 
67
  self.tools = [
 
77
  max_steps=8,
78
  additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
79
  )
80
+ print("BasicAgent ready with Throttled Gemini 2.0 Flash-Lite.")
81
 
82
  def __call__(self, question: str) -> str:
83
+ print(f"\nAgent received question: {question[:80]}...")
84
+ # The retry loop is now handled safely inside the ThrottledGeminiModel
85
+ try:
86
+ answer = self.agent.run(question)
87
+ print(f"Agent answer: {str(answer)[:200]}")
88
+ return str(answer)
89
+ except Exception as e:
90
+ print(f"Agent error processing question: {e}")
91
+ return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
92
 
93
  # --- The rest of the code ---
94
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
111
  return f"Error initializing agent: {e}", None
112
 
113
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
114
  print(f"Fetching questions from: {questions_url}")
115
 
116
  try:
 
118
  response.raise_for_status()
119
  questions_data = response.json()
120
  if not questions_data:
 
121
  return "No questions.", None
122
  except Exception as e:
 
123
  return f"Error fetching questions: {e}", None
124
 
125
  results_log = []
 
140
  print(f"Error on task {task_id}: {e}")
141
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
142
 
 
 
 
143
  if not answers_payload:
144
  return "No answers.", pd.DataFrame(results_log)
145
 
 
169
  gr.Markdown(
170
  """
171
  **Instructions:**
172
+ 1. Set `GEMINI_API_KEY` in your Space Secrets.
173
  2. Log in with your Hugging Face account below.
174
+ 3. Click 'Run Evaluation & Submit' to start.
175
+ *(Note: Because we are intentionally throttling the agent to respect Gemini's free tier limits, running all 20 questions might take around 10 to 15 minutes. Feel free to grab a coffee!)*
176
  """
177
  )
178
  gr.LoginButton()