Shivangsinha commited on
Commit
6767692
·
verified ·
1 Parent(s): 0e853cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -50
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import time
3
  import gradio as gr
4
  import requests
5
- import inspect
6
  import pandas as pd
7
  from smolagents import (
8
  CodeAgent,
@@ -13,37 +12,8 @@ from smolagents import (
13
  tool,
14
  )
15
 
16
- # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
- # --- Custom Throttled Model to fix Gemini 15 RPM Limits ---
20
- class ThrottledGeminiModel(LiteLLMModel):
21
- """
22
- Wraps the LiteLLMModel to automatically enforce delays between requests.
23
- Gemini Free Tier allows 15 requests per minute.
24
- By sleeping 5 seconds before every call, we guarantee we never exceed the limit.
25
- It also catches internal 429 errors without breaking the agent's multi-step thought process.
26
- """
27
- def __call__(self, *args, **kwargs):
28
- print("Throttling: Sleeping 5s to prevent hitting Gemini's 15 RPM limit...")
29
- time.sleep(5)
30
-
31
- max_retries = 5
32
- for attempt in range(max_retries):
33
- try:
34
- return super().__call__(*args, **kwargs)
35
- except Exception as e:
36
- error_msg = str(e).lower()
37
- if "429" in error_msg or "rate limit" in error_msg or "quota" in error_msg:
38
- wait_time = 30 * (attempt + 1)
39
- print(f"Internal API Rate limit hit. Pausing for {wait_time}s (Attempt {attempt+1}/{max_retries})...")
40
- time.sleep(wait_time)
41
- else:
42
- raise e
43
- # Final attempt if loop finishes without returning
44
- return super().__call__(*args, **kwargs)
45
-
46
- # --- Basic Agent Definition ---
47
  @tool
48
  def get_current_date_time() -> str:
49
  """Returns the current date and time in ISO format."""
@@ -54,14 +24,15 @@ class BasicAgent:
54
  def __init__(self):
55
  print("BasicAgent initialized.")
56
 
57
- gemini_api_key = os.getenv("GEMINI_API_KEY")
58
- if not gemini_api_key:
59
- raise ValueError("GEMINI_API_KEY environment variable not set in Space Secrets.")
 
60
 
61
-
62
- self.model = ThrottledGeminiModel(
63
- model_id="gemini/gemini-2.0-flash", # <-- Changed from flash-lite
64
- api_key=gemini_api_key,
65
  )
66
 
67
  self.tools = [
@@ -74,23 +45,35 @@ class BasicAgent:
74
  self.agent = CodeAgent(
75
  tools=self.tools,
76
  model=self.model,
77
- max_steps=8,
78
  additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
79
  )
80
- print("BasicAgent ready with Throttled Gemini 2.0 Flash-Lite.")
81
 
82
  def __call__(self, question: str) -> str:
83
  print(f"\nAgent received question: {question[:80]}...")
84
- # The retry loop is now handled safely inside the ThrottledGeminiModel
85
- try:
86
- answer = self.agent.run(question)
87
- print(f"Agent answer: {str(answer)[:200]}")
88
- return str(answer)
89
- except Exception as e:
90
- print(f"Agent error processing question: {e}")
91
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- # --- The rest of the code ---
94
  def run_and_submit_all(profile: gr.OAuthProfile | None):
95
  space_id = os.getenv("SPACE_ID")
96
  if profile:
@@ -140,6 +123,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
140
  print(f"Error on task {task_id}: {e}")
141
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
142
 
 
 
 
 
143
  if not answers_payload:
144
  return "No answers.", pd.DataFrame(results_log)
145
 
@@ -169,10 +156,9 @@ with gr.Blocks() as demo:
169
  gr.Markdown(
170
  """
171
  **Instructions:**
172
- 1. Set `GEMINI_API_KEY` in your Space Secrets.
173
  2. Log in with your Hugging Face account below.
174
  3. Click 'Run Evaluation & Submit' to start.
175
- *(Note: Because we are intentionally throttling the agent to respect Gemini's free tier limits, running all 20 questions might take around 10 to 15 minutes. Feel free to grab a coffee!)*
176
  """
177
  )
178
  gr.LoginButton()
 
2
  import time
3
  import gradio as gr
4
  import requests
 
5
  import pandas as pd
6
  from smolagents import (
7
  CodeAgent,
 
12
  tool,
13
  )
14
 
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @tool
18
  def get_current_date_time() -> str:
19
  """Returns the current date and time in ISO format."""
 
24
  def __init__(self):
25
  print("BasicAgent initialized.")
26
 
27
+ # 1. Fetch the OpenRouter API Key
28
+ openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
29
+ if not openrouter_api_key:
30
+ raise ValueError("OPENROUTER_API_KEY environment variable not set in Space Secrets.")
31
 
32
+ # 2. Use LiteLLM to connect to OpenRouter's completely free Llama 3.3 70B endpoint
33
+ self.model = LiteLLMModel(
34
+ model_id="openrouter/meta-llama/llama-3.3-70b-instruct:free",
35
+ api_key=openrouter_api_key,
36
  )
37
 
38
  self.tools = [
 
45
  self.agent = CodeAgent(
46
  tools=self.tools,
47
  model=self.model,
48
+ max_steps=6, # Reduced from 8 to save tokens and prevent quota crashes
49
  additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
50
  )
51
+ print("BasicAgent ready with OpenRouter (Llama-3.3-70b Free).")
52
 
53
  def __call__(self, question: str) -> str:
54
  print(f"\nAgent received question: {question[:80]}...")
55
+ max_retries = 3
56
+
57
+ for attempt in range(max_retries):
58
+ try:
59
+ # A tiny safety buffer per step
60
+ time.sleep(2)
61
+ answer = self.agent.run(question)
62
+ print(f"Agent answer: {str(answer)[:200]}")
63
+ return str(answer)
64
+ except Exception as e:
65
+ err_msg = str(e).lower()
66
+ # If we hit a rate limit, pause and retry
67
+ if "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg:
68
+ wait_time = 20 * (attempt + 1)
69
+ print(f"Rate limit hit! Pausing for {wait_time} seconds before retrying (Attempt {attempt+1}/{max_retries})...")
70
+ time.sleep(wait_time)
71
+ else:
72
+ print(f"Agent error processing question: {e}")
73
+ return f"Error: {str(e)}"
74
+
75
+ return "Error: Rate limit exceeded after maximum retries."
76
 
 
77
  def run_and_submit_all(profile: gr.OAuthProfile | None):
78
  space_id = os.getenv("SPACE_ID")
79
  if profile:
 
123
  print(f"Error on task {task_id}: {e}")
124
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
125
 
126
+ # CRITICAL FIX: Give the API token bucket time to cool down between questions
127
+ print("Cooling down for 15 seconds to prevent token exhaustion...")
128
+ time.sleep(15)
129
+
130
  if not answers_payload:
131
  return "No answers.", pd.DataFrame(results_log)
132
 
 
156
  gr.Markdown(
157
  """
158
  **Instructions:**
159
+ 1. Set `OPENROUTER_API_KEY` in your Space Secrets.
160
  2. Log in with your Hugging Face account below.
161
  3. Click 'Run Evaluation & Submit' to start.
 
162
  """
163
  )
164
  gr.LoginButton()