pmeyhoefer commited on
Commit
475f56b
·
verified ·
1 Parent(s): 8e0c4a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -210
app.py CHANGED
@@ -1,251 +1,262 @@
1
  import os
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import re
6
- import json
7
- from datetime import datetime
8
- import tempfile
9
- import atexit
10
- import sys # Für sys.exit bei Importfehlern
11
-
12
- # --- Smol Agents und HF Imports ---
13
- try:
14
- from smolagents import CodeAgent, tool, HfApiModel
15
- print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
16
- except ImportError as e:
17
- print(f"Error importing from smolagents: {e}")
18
- print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
19
- sys.exit(f"Fatal Error: Could not import smolagents components. Original error: {e}")
20
-
21
- from huggingface_hub import HfApi
22
-
23
- # --- Suchtool Imports ---
24
- USE_TAVILY = False
25
- USE_DUCKDUCKGO = True
26
- if USE_TAVILY:
27
- try:
28
- from tavily import TavilyClient
29
- except ImportError:
30
- print("WARNUNG: TavilyClient nicht installiert.")
31
- USE_TAVILY = False
32
- USE_DUCKDUCKGO = True
33
- if USE_DUCKDUCKGO:
34
- try:
35
- from duckduckgo_search import DDGS
36
- except ImportError:
37
- print("WARNUNG: duckduckgo-search nicht installiert.")
38
- USE_DUCKDUCKGO = False
39
 
40
- # --- PDF Reader Import ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  try:
42
- import PyPDF2
43
- PDF_READER_AVAILABLE = True
44
- except ImportError:
45
- PDF_READER_AVAILABLE = False
46
- print("WARNUNG: PyPDF2 nicht installiert. PDF-Lesefunktion deaktiviert.")
47
-
48
- # --- Konstanten & Globals ---
49
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
50
- HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
51
- search_client = None
52
- agent_instance = None
53
-
54
- temp_files_to_clean = set()
55
-
56
- def cleanup_temp_files():
57
- for path in list(temp_files_to_clean):
58
- try:
59
- if os.path.exists(path): os.remove(path)
60
- except OSError:
61
- pass
62
- temp_files_to_clean.discard(path)
63
- atexit.register(cleanup_temp_files)
64
 
65
- # --- Tool Definitions ---
66
  @tool
67
- def search_web(query: str, max_results: int = 3) -> str:
68
  """
69
- Search the web for the given query and return a summary of the top results.
70
-
71
  Args:
72
- query (str): The search query to look up online.
73
- max_results (int): The maximum number of search results to retrieve.
74
-
75
  Returns:
76
- str: A concatenated string summarizing the URLs and snippets of the results.
77
  """
78
- if not search_client:
79
- return "Error: Search tool not configured."
80
- try:
81
- if USE_TAVILY and isinstance(search_client, TavilyClient):
82
- res = search_client.search(query=query, search_depth="basic", max_results=max_results)
83
- items = res.get('results', [])
84
- if not items:
85
- return "No search results found."
86
- return "\n".join([f"URL: {i['url']}\nContent: {i.get('content','')[:200]}..." for i in items])
87
- elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
88
- results = search_client.text(query, max_results=max_results)
89
- if not results:
90
- return "No search results found."
91
- return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:200]}..." for r in results])
92
- else:
93
- return "Error: No compatible search client available."
94
- except Exception as e:
95
- return f"Error during search: {e}"
96
 
97
  @tool
98
- def download_task_file(task_id: str) -> str:
99
  """
100
- Download the file associated with a specific task ID from the evaluation server.
101
-
102
  Args:
103
- task_id (str): Unique identifier of the task whose file should be downloaded.
104
-
105
  Returns:
106
- str: Local filesystem path to the downloaded file, or an error message.
107
  """
108
- url = f"{DEFAULT_API_URL}/files/{task_id}"
109
  try:
110
- response = requests.get(url, stream=True, timeout=30)
111
- response.raise_for_status()
112
- content_type = response.headers.get('content-type', '')
113
- suffix = '.pdf' if 'pdf' in content_type else '.tmp'
114
- tmp_dir = tempfile.gettempdir()
115
- filename = f"task_{task_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}{suffix}"
116
- path = os.path.join(tmp_dir, filename)
117
- with open(path, 'wb') as f:
118
- for chunk in response.iter_content(8192): f.write(chunk)
119
- temp_files_to_clean.add(path)
120
- return path
121
  except Exception as e:
122
- return f"Error: {e}"
 
123
 
124
- @tool
125
- def read_file_content(file_path: str) -> str:
126
- """
127
- Read the text content of a previously downloaded file (PDF or plain text).
128
 
129
- Args:
130
- file_path (str): Absolute local path to the file to read (from download_task_file).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- Returns:
133
- str: Extracted text content truncated if necessary, or an error message.
134
- """
135
- if not file_path.startswith(tempfile.gettempdir()):
136
- return "Error: Invalid file path."
137
- if not os.path.exists(file_path):
138
- return "Error: File does not exist."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  try:
140
- if file_path.lower().endswith('.pdf'):
141
- if not PDF_READER_AVAILABLE:
142
- return "Error: PyPDF2 not installed."
143
- text = ''
144
- with open(file_path, 'rb') as f:
145
- reader = PyPDF2.PdfReader(f)
146
- for page in reader.pages:
147
- text += page.extract_text() or ''
148
- if len(text) > 7000:
149
- text = text[:7000] + '\n... (truncated)'
150
- break
151
- return text
152
- else:
153
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
154
- return f.read(7000)
155
  except Exception as e:
156
- return f"Error: {e}"
157
-
158
- # --- Agent Setup ---
159
- def initialize_agent():
160
- global search_client, agent_instance
161
- if search_client is None:
162
- if USE_TAVILY:
163
- key = os.getenv('TAVILY_API_KEY')
164
- search_client = TavilyClient(api_key=key) if key else False
165
- elif USE_DUCKDUCKGO:
166
- try:
167
- search_client = DDGS()
168
- except:
169
- search_client = False
170
- else:
171
- search_client = False
172
- token = os.getenv('HUGGINGFACE_TOKEN')
173
- if not token:
174
- raise ValueError("HUGGINGFACE_TOKEN environment variable is required.")
175
- hf_model = HfApiModel()
176
- tools = [search_web, download_task_file, read_file_content]
177
- if not search_client:
178
- tools.remove(search_web)
179
- agent_instance = CodeAgent(tools=tools, model=hf_model)
180
-
181
- # --- Main Logic ---
182
- def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
183
- # Parse profile if passed as JSON string
184
- if isinstance(profile, str):
185
  try:
186
- profile = json.loads(profile)
187
- except json.JSONDecodeError:
188
- return "Error: Invalid profile format.", None
189
- if not profile or 'username' not in profile:
190
- return "Bitte zuerst einloggen.", None
191
- username = profile['username']
 
 
 
 
 
 
 
 
192
 
 
 
 
 
 
 
 
 
193
  try:
194
- initialize_agent()
 
 
195
  except Exception as e:
196
- return f"Initialization Error: {e}", None
197
-
198
- # Fetch questions
199
- resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
200
- resp.raise_for_status()
201
- questions = resp.json()
202
 
203
  logs, payload = [], []
204
- for item in progress.tqdm(questions, desc="Bearbeite Fragen"):
205
- task_id = item.get('task_id')
206
- question = item.get('question')
207
- if not task_id or question is None:
208
  continue
209
- prompt = f"Task {task_id}: {question}"
210
- try:
211
- result = agent_instance.run(prompt=prompt)
212
- answer = re.sub(r"^(Answer:|Final Answer:)", "", result or "").strip()
213
- except Exception as e:
214
- answer = f"ERROR: {e}"
215
- logs.append({'Task ID': task_id, 'Question': question, 'Submitted Answer': answer})
216
- payload.append({'task_id': task_id, 'submitted_answer': answer})
217
 
218
- df = pd.DataFrame(logs)
 
219
 
220
- # Submit answers
221
- submission = {'username': username, 'agent_code': '...', 'answers': payload}
222
  try:
223
- post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=180)
 
 
 
 
224
  post.raise_for_status()
225
- score = post.json().get('score', 0)
226
- status = f"Erfolg! Score: {score:.2f}%"
 
 
 
 
 
 
 
 
227
  except Exception as e:
228
- status = f"Submission Error: {e}"
 
229
 
230
- cleanup_temp_files()
231
- return status, df
232
 
233
- # --- Gradio UI ---
234
  with gr.Blocks() as demo:
235
- gr.Markdown("# Smol CodeAgent Evaluation Runner")
236
- gr.Markdown("Bitte einloggen und dann auf "Run Evaluation & Submit All Answers" klicken.")
237
- with gr.Row():
238
- login_btn = gr.LoginButton()
239
- run_btn = gr.Button("Run Evaluation & Submit All Answers")
240
- out_status = gr.Textbox(label="Status", lines=5)
241
- out_table = gr.DataFrame(label="Ergebnisse")
242
-
243
- run_btn.click(
244
- fn=run_and_submit_all,
245
- inputs=[login_btn],
246
- outputs=[out_status, out_table],
247
- api_name="run_evaluation_smol_codeagent"
248
- )
249
 
250
- if __name__ == '__main__':
251
- demo.queue().launch(debug=False, share=False)
 
 
 
 
 
 
1
  import os
2
+ import logging
3
+
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ # We still need the openai library, even if we change the endpoint
8
+ from openai import OpenAI
9
+
10
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
11
+ from smolagents.models import OpenAIServerModel # Assuming this can handle base_url
12
+
13
+ # --- Logging ---
14
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
15
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # --- Constants ---
18
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # Keep this for submission
19
+
20
+ # --- GitHub Models Configuration ---
21
+ # Use GITHUB_TOKEN environment variable for authentication
22
+ GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
23
+ if not GITHUB_TOKEN:
24
+ # If running locally and GITHUB_TOKEN is not set, you might fall back
25
+ # to another mechanism or raise an error. For HF Spaces, secrets are needed.
26
+ raise RuntimeError("Please set GITHUB_TOKEN in your Space secrets.")
27
+
28
+ # GitHub Models endpoint
29
+ GITHUB_ENDPOINT = "https://models.github.ai/inference"
30
+
31
+ # Specify the model ID compatible with the GitHub endpoint
32
+ # Check GitHub Models documentation for available models. 'gpt-4.1' might not be the correct identifier.
33
+ # Let's assume a common format like 'openai/gpt-4o' or similar, adjust as needed.
34
+ # Using 'openai/gpt-4.1' as a placeholder based on your original code, VERIFY THIS with GitHub Models docs.
35
+ MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4.1") # Renamed for clarity, adjust if needed
36
+
37
+ # --- Configure OpenAI SDK (for tools if needed, now using GitHub endpoint) ---
38
+ # This client might be used by tools OR potentially by OpenAIServerModel internally
39
+ # depending on its implementation. Configuring it ensures consistency.
40
+ # Note: If OpenAIServerModel directly instantiates its own client using the parameters
41
+ # we provide later, this specific 'client' instance might not be used by the agent itself.
42
  try:
43
+ client = OpenAI(
44
+ base_url=GITHUB_ENDPOINT,
45
+ api_key=GITHUB_TOKEN,
46
+ )
47
+ # Optional: Test connection or a simple call here if needed during setup
48
+ # client.models.list() # Example call, might need adjustment for GitHub's API structure
49
+ except Exception as e:
50
+ logger.error(f"Failed to initialize OpenAI client for GitHub Models: {e}")
51
+ # Decide how to handle this - raise error, log warning, etc.
52
+ raise RuntimeError(f"OpenAI client initialization failed for GitHub Models: {e}") from e
53
+
54
+
55
+ # --- Tools ---
56
+ # Tools remain the same, assuming they don't directly depend on the *specific* OpenAI API endpoint
57
+ # unless they internally use the globally configured 'client' (which we just updated).
 
 
 
 
 
 
 
58
 
 
59
  @tool
60
+ def summarize_query(query: str) -> str:
61
  """
62
+ Reframes an unclear search query to improve relevance.
 
63
  Args:
64
+ query (str): The original search query.
 
 
65
  Returns:
66
+ str: A concise, improved version.
67
  """
68
+ # This tool currently doesn't use an LLM, so it's unaffected by the endpoint change.
69
+ # If it *did* use the 'client' instance, it would now point to GitHub Models.
70
+ return f"Summarize and reframe: {query}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  @tool
73
+ def wikipedia_search(page: str) -> str:
74
  """
75
+ Fetches the summary extract of an English Wikipedia page.
 
76
  Args:
77
+ page (str): e.g. 'Mercedes_Sosa_discography'
 
78
  Returns:
79
+ str: The page’s extract text.
80
  """
 
81
  try:
82
+ url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
83
+ r = requests.get(url, timeout=10)
84
+ r.raise_for_status()
85
+ return r.json().get("extract", "")
 
 
 
 
 
 
 
86
  except Exception as e:
87
+ logger.exception("Wikipedia lookup failed")
88
+ return f"Wikipedia error: {e}"
89
 
90
+ search_tool = DuckDuckGoSearchTool()
91
+ wiki_tool = wikipedia_search
92
+ summarize_tool = summarize_query
 
93
 
94
+ # --- ReACT Prompt ---
95
+ # The prompt itself doesn't need to change as it describes the agent's *behavior*
96
+ instruction_prompt = """
97
+ You are a ReACT agent with three tools:
98
+ • DuckDuckGoSearchTool(query: str)
99
+ • wikipedia_search(page: str)
100
+ • summarize_query(query: str)
101
+ Internally, for each question:
102
+ 1. Thought: decide which tool to call.
103
+ 2. Action: call the chosen tool.
104
+ 3. Observation: record the result.
105
+ 4. If empty/irrelevant:
106
+ Thought: retry with summarize_query + DuckDuckGoSearchTool.
107
+ Record new Observation.
108
+ 5. Thought: integrate observations.
109
+ Finally, output your answer with the following template:
110
+ FINAL ANSWER: [YOUR FINAL ANSWER].
111
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
112
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
113
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
114
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
115
+ """
116
 
117
+ # --- Build the Agent with OpenAIServerModel pointing to GitHub Models ---
118
+
119
+ # *** Key Change Here ***
120
+ # We configure OpenAIServerModel to use the GitHub endpoint and token.
121
+ # We assume OpenAIServerModel accepts 'api_base' or 'base_url' and passes it
122
+ # to the underlying OpenAI client it creates. 'base_url' is the modern parameter.
123
+ # If this doesn't work, you might need to check the smolagents documentation
124
+ # or source for how to specify a custom endpoint, or potentially subclass/modify it.
125
+ try:
126
+ model = OpenAIServerModel(
127
+ model_id=MODEL_ID, # Use the model ID for GitHub
128
+ api_key=GITHUB_TOKEN, # Use the GitHub token as the API key
129
+ api_base=GITHUB_ENDPOINT # Specify the GitHub endpoint *** Use api_base or base_url ***
130
+ # Try base_url if api_base doesn't work:
131
+ # base_url=GITHUB_ENDPOINT
132
+ # Add any other necessary parameters required by OpenAIServerModel or the underlying client
133
+ # e.g., model_kwargs if needed
134
+ )
135
+ logger.info(f"Configured OpenAIServerModel with GitHub endpoint: {GITHUB_ENDPOINT} and model: {MODEL_ID}")
136
+ except TypeError as te:
137
+ logger.error(f"TypeError configuring OpenAIServerModel: {te}. Trying with 'base_url' instead of 'api_base'.")
138
+ # Fallback attempt using base_url if api_base caused a TypeError
139
  try:
140
+ model = OpenAIServerModel(
141
+ model_id=MODEL_ID,
142
+ api_key=GITHUB_TOKEN,
143
+ base_url=GITHUB_ENDPOINT # Use base_url
144
+ )
145
+ logger.info(f"Successfully configured OpenAIServerModel with GitHub endpoint using 'base_url'.")
 
 
 
 
 
 
 
 
 
146
  except Exception as e:
147
+ logger.error(f"Failed to configure OpenAIServerModel with both 'api_base' and 'base_url': {e}")
148
+ raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
149
+ except Exception as e:
150
+ logger.error(f"Failed to configure OpenAIServerModel: {e}")
151
+ raise RuntimeError(f"Could not configure SmolAgents model for GitHub endpoint: {e}") from e
152
+
153
+
154
+ smart_agent = CodeAgent(
155
+ tools=[search_tool, wiki_tool, summarize_tool],
156
+ model=model # Pass the configured model instance
157
+ )
158
+
159
+ # --- Gradio Wrapper ---
160
+
161
+ class BasicAgent:
162
+ def __init__(self):
163
+ # Updated log message
164
+ logger.info(f"Initialized SmolAgent with GitHub Model: {MODEL_ID} via {GITHUB_ENDPOINT}")
165
+
166
+ def __call__(self, question: str) -> str:
167
+ if not question.strip():
168
+ return "AGENT ERROR: empty question"
169
+ prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
 
 
 
 
 
 
170
  try:
171
+ # The agent uses the 'model' instance we configured above
172
+ return smart_agent.run(prompt)
173
+ except Exception as e:
174
+ logger.exception("Agent run error")
175
+ # Provide more specific error if possible, e.g., AuthenticationError from OpenAI client
176
+ return f"AGENT ERROR: {e}"
177
+
178
+ # --- Submission Logic ---
179
+ # This part remains largely the same, as it interacts with the external scoring service (DEFAULT_API_URL)
180
+ # It just uses the 'agent' which now internally calls GitHub Models.
181
+
182
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
183
+ if not profile:
184
+ return "Please log in to Hugging Face.", None
185
 
186
+ username = profile.username
187
+ space_id = os.getenv("SPACE_ID", "")
188
+ # Link to the code, unchanged
189
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
190
+ # Instantiate the agent wrapper, which now uses the GitHub-configured model
191
+ agent = BasicAgent()
192
+
193
+ # fetch questions (unchanged)
194
  try:
195
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
196
+ resp.raise_for_status()
197
+ questions = resp.json() or []
198
  except Exception as e:
199
+ logger.exception("Failed fetch")
200
+ return f"Error fetching questions: {e}", None
 
 
 
 
201
 
202
  logs, payload = [], []
203
+ for item in questions:
204
+ tid = item.get("task_id")
205
+ q = item.get("question")
206
+ if not tid or not q:
207
  continue
208
+ # Run the agent (now using GitHub Models)
209
+ ans = agent(q)
210
+ logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
211
+ payload.append({"task_id": tid, "submitted_answer": ans})
 
 
 
 
212
 
213
+ if not payload:
214
+ return "Agent did not produce any answers.", pd.DataFrame(logs)
215
 
216
+ # submit answers (unchanged)
 
217
  try:
218
+ post = requests.post(
219
+ f"{DEFAULT_API_URL}/submit",
220
+ json={"username": username, "agent_code": agent_code, "answers": payload},
221
+ timeout=60
222
+ )
223
  post.raise_for_status()
224
+ result = post.json()
225
+ status = (
226
+ f"Submission Successful!\n"
227
+ f"User: {result.get('username')}\n"
228
+ f"Score: {result.get('score','N/A')}%\n"
229
+ f"({result.get('correct_count','?')}/"
230
+ f"{result.get('total_attempted','?')})\n"
231
+ f"Message: {result.get('message','')}"
232
+ )
233
+ return status, pd.DataFrame(logs)
234
  except Exception as e:
235
+ logger.exception("Submit failed")
236
+ return f"Submission Failed: {e}", pd.DataFrame(logs)
237
 
238
+ # --- Gradio App ---
 
239
 
 
240
  with gr.Blocks() as demo:
241
+ gr.Markdown("# SmolAgent GAIA Runner (using GitHub Models) 🚀") # Updated title
242
+ gr.Markdown("""
243
+ **Instructions:**
244
+ 1. Clone this space.
245
+ 2. In Settings → Secrets, add `GITHUB_TOKEN` (your GitHub access token with appropriate permissions for GitHub Models).
246
+ 3. Optionally, set `MODEL_ID` if you want to use a model other than the default (e.g., `openai/gpt-4o`). Verify the correct model identifier for GitHub Models.
247
+ 4. Log in to Hugging Face.
248
+ 5. Click **Run Evaluation & Submit All Answers**.
249
+ """) # Updated instructions
250
+ gr.LoginButton()
251
+ btn = gr.Button("Run Evaluation & Submit All Answers")
252
+ out_status = gr.Textbox(label="Status", lines=5, interactive=False)
253
+ out_table = gr.DataFrame(label="Questions & Answers", wrap=True)
254
+ btn.click(run_and_submit_all, outputs=[out_status, out_table])
255
 
256
+ if __name__ == "__main__":
257
+ # Check GITHUB_TOKEN presence before launching
258
+ if not GITHUB_TOKEN:
259
+ logger.error("GITHUB_TOKEN environment variable not set. Cannot start.")
260
+ else:
261
+ logger.info("Launching Gradio App...")
262
+ demo.launch(debug=True, share=False) # Set debug=False for production