Bharadwaj-m7 commited on
Commit
1dabd3a
·
verified ·
1 Parent(s): 3af3919

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -646
app.py CHANGED
@@ -1,652 +1,194 @@
1
  import os
2
  import gradio as gr
3
- import io
4
- import contextlib
5
- import re
6
- import uuid
7
- try:
8
- import spaces # type: ignore
9
- except ImportError:
10
- # Create a dummy spaces class for local development compatibility
11
- class spaces:
12
- @staticmethod
13
- def GPU():
14
- def decorator(func):
15
- return func
16
- return decorator
17
- print("Warning: `spaces` module not found. Using dummy implementation for local execution.")
18
-
19
  import requests
20
  import inspect
21
  import pandas as pd
22
- import json
23
- from typing import List, Dict, Tuple, Union, Optional, TypedDict, Literal
24
- from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
25
- from langchain_core.agents import AgentAction, AgentFinish
26
- from langchain_core.tools import tool
27
- from langchain_community.tools.ddg_search import DuckDuckGoSearchRun
28
- from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
29
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
30
- from langgraph.prebuilt import ToolNode
31
- from langgraph.graph import StateGraph, END
32
- from functools import partial
33
- from transformers import pipeline
34
 
 
35
  # --- Constants ---
36
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
37
 
38
- SYSTEM_PROMPT = """You are a highly meticulous assistant assigned to answer questions precisely.
39
- Your primary goal is to provide a final answer that directly addresses the user's question and *exactly* matches the required format, with no extra text, commentary, or explanation.
40
- TOOL USAGE GUIDELINES:
41
- ----------------------
42
- 1. DECISION PROCESS:
43
- * First, ALWAYS analyze if the question requires external information or can be answered directly.
44
- * For direct questions about basic facts, calculations, or reasoning, do NOT use tools - just answer immediately.
45
- * ONLY use tools when the question explicitly or implicitly requires external information.
46
- 2. 'web_search' TOOL USAGE:
47
- * Use SPECIFICALLY when you need:
48
- - Current events or time-sensitive information (e.g., today's date, recent news).
49
- - Specific facts not commonly known (e.g., population of a small town, technical specs).
50
- - Statistical data, complex figures, or precise numerical values (e.g., GDP of a country).
51
- - Information about recent developments, trends, or changes.
52
- * If asked for specific, verifiable facts (names, dates, statistics, events) that are not common knowledge, you MUST use the 'web_search' tool.
53
- * When using web_search, provide SPECIFIC search queries focused on exactly what you need. Target the precise piece of information.
54
- * DO NOT use broad, general searches like "climate change". Target the exact information needed (e.g., "current CO2 levels 2023 global average ppm").
55
- 3. 'download_file' TOOL USAGE:
56
- * Use ONLY when:
57
- - The question EXPLICITLY mentions a file, document, or attachment (e.g., "read the file", "in the attached document").
58
- - The question refers to information that MUST logically come from a file associated with the provided task_id.
59
- - The question contains phrases like "in the document", "from the file", "according to the text provided".
60
- * IMPORTANT: Pass ONLY the task_id string to this tool. Nothing else.
61
- * DO NOT attempt to download a file if the question doesn't clearly indicate one exists or is necessary.
62
- 4. PROCESS AFTER TOOL USE:
63
- * WAIT for the tool's response before proceeding.
64
- * EXTRACT only the specific piece of information required by the question from the tool's response.
65
- * DO NOT include the full tool response or any commentary about the tool use in your final answer.
66
- * FORMULATE your final answer based SOLELY on the relevant information extracted from the tool, adhering strictly to the format rules.
67
- FINAL ANSWER FORMATTING (CRITICAL):
68
- -----------------------------------
69
- * Your final response *must* contain ONLY the answer itself, exactly as requested.
70
- * Do NOT include any introductory phrases like "The answer is:", "Based on my search:", "Here is the file content:", "The result is:", etc.
71
- * If the question asks for a number, respond with ONLY the number (e.g., `42`).
72
- * If the question asks for a name, respond with ONLY the name (e.g., `Paris`).
73
- * If the question asks for a date, respond with ONLY the date in the requested format (e.g., `2023-10-26`).
74
- * If the answer is derived from a downloaded file, extract the specific piece of information requested and return *only* that information.
75
- * If you determine you cannot answer the question accurately after using tools or reasoning, respond with only the text: `I cannot answer this question.`
76
- Example Interaction 1:
77
- User Question: What is the boiling point of water in Celsius?
78
- Your Final Answer: 100
79
- Example Interaction 2:
80
- User Question: Calculate (5 * 3) + 2
81
- Your Final Answer: 17
82
- Example Interaction 3:
83
- User Question: Read the document associated with task_id 'abc-123' and tell me the value mentioned for 'Project Alpha'.
84
- (Tool: download_file(task_id='abc-123') -> Returns: "File content: ... Project Alpha: Complete ...")
85
- Your Final Answer: Complete
86
- Example Interaction 4 (Requires Web Search):
87
- User Question: What is the capital of France?
88
- (LLM decides tool is needed: Request Tool Call `web_search(query='capital of France')`)
89
- (Tool Result: "Paris is the capital and most populous city of France...")
90
- (LLM extracts answer from tool result)
91
- Your Final Answer: Paris
92
- """
93
-
94
- # --- Agent State Definition ---
95
- class AgentState(TypedDict):
96
- """Represents the state of our agent graph."""
97
- question: str # The initial question from the API
98
- task_id: str # Task ID associated with the question
99
- agent_outcome: Optional[Union[AgentAction, AgentFinish]] # The latest decision from the agent node
100
- intermediate_steps: List[Tuple[AgentAction, str]] # List of (tool action, tool observation) tuples
101
- chat_history: List[BaseMessage] # History of messages (human, ai, tool)
102
- downloaded_files: Dict[str, str] # Maps task_id to downloaded file content (string)
103
- tool_cache: Dict[str, str] # Cache for tool results {cache_key: result}
104
- error: Optional[str] # To capture any errors during execution
105
- max_iterations: int # Iteration limit
106
- current_iteration: int # Current iteration count
107
-
108
- # --- Tool Definitions ---
109
  @tool
110
- def download_file(task_id: str) -> str:
111
- """Downloads a required file for a given task ID when the question context
112
- indicates a file is needed. Input MUST be the task_id string.
113
- Returns the file content as a string on success, or an error message.
 
 
114
  """
115
- file_url = f"{DEFAULT_API_URL}/files/{task_id}"
116
- print(f"Attempting to download file from: {file_url}")
117
  try:
118
- response = requests.get(file_url, timeout=10)
119
- response.raise_for_status()
 
 
 
120
 
121
- try:
122
- content = response.content.decode('utf-8')
123
- print(f"Successfully downloaded and decoded file for task {task_id}. Content length: {len(content)}")
124
- summary = f"Successfully downloaded file for task {task_id}. Content starts: {content[:500]}..."
125
- return summary
126
- except UnicodeDecodeError:
127
- print(f"Warning: Could not decode file content as UTF-8 for task {task_id}. Returning raw bytes summary.")
128
- return f"Successfully downloaded file for task {task_id}, but it may not be text. Raw content (first 500 bytes): {response.content[:500]}..."
129
 
130
- except requests.exceptions.Timeout:
131
- error_msg = f"Error: Timeout occurred while trying to download file for task {task_id} from {file_url}"
132
- print(error_msg)
133
- return error_msg
134
- except requests.exceptions.RequestException as e:
135
- status_code = e.response.status_code if e.response else 'N/A'
136
- error_msg = f"Error: Failed to download file for task {task_id}. Status code: {status_code}. URL: {file_url}. Error: {e}"
137
- print(error_msg)
138
- return error_msg
139
- except Exception as e:
140
- error_msg = f"Error: An unexpected error occurred during file download for task {task_id}: {e}"
141
- print(error_msg)
142
- return error_msg
143
-
144
- web_search = DuckDuckGoSearchRun()
145
- agent_tools = [download_file, web_search]
146
-
147
- # --- Helper Functions for Graph Nodes ---
148
- def agent_node(state: AgentState, llm_with_tools, prompt):
149
- """Runs the LLM to determine the next action or finish."""
150
- print(f"Running agent_node (Iteration {state.get('current_iteration', 0)})" )
151
- agent_input_dict = {
152
- "input": state['question'],
153
- "chat_history": state['chat_history'],
154
- }
155
-
156
- # Format the prompt explicitly first
157
- # The prompt template takes chat_history and input
158
- formatted_prompt = prompt.invoke(agent_input_dict)
159
- print("Formatted prompt generated for LLM.")
160
-
161
- # --- Add GAIA-specific instructions ---
162
- # Convert ChatPromptValue to messages, modify the last HumanMessage
163
- messages = formatted_prompt.to_messages()
164
- # Use triple quotes for the instruction string
165
- gaia_instructions = """\n\nWhen answering, provide ONLY the precise answer requested. Do not include explanations, steps, reasoning, or additional text. Be direct and specific. GAIA benchmark requires exact matching answers. For example, if asked "What is the capital of France?", respond simply with "Paris"."""
166
-
167
- if messages and isinstance(messages[-1], HumanMessage):
168
- messages[-1].content += gaia_instructions
169
- print("Appended GAIA formatting instructions to the last HumanMessage.")
170
- else:
171
- # If the last message isn't Human (unexpected), add instructions as a new Human message
172
- messages.append(HumanMessage(content=gaia_instructions))
173
- print("Warning: Appended GAIA instructions as a new HumanMessage.")
174
-
175
- # Pass the modified messages list to the LLM
176
- response = llm_with_tools.invoke(messages)
177
-
178
- # --- Tool Call Handling ---
179
- agent_outcome = None # Initialize agent_outcome
180
- raw_content = response.content # Get raw content once
181
-
182
- # Check for automatic tool calls first (ideal case)
183
- if hasattr(response, 'tool_calls') and response.tool_calls:
184
- print(f"Agent decided to call tools (structured): {response.tool_calls}")
185
- actions = [
186
- # Ensure tool_call_id is included if available directly from the model
187
- AgentAction(
188
- tool=call['name'],
189
- tool_input=call['args'],
190
- log=str(call),
191
- tool_call_id=call.get('id') # Get ID if model provides it
192
- )
193
- for call in response.tool_calls
194
- ]
195
- if actions:
196
- # If model provides multiple calls, we might need to handle them.
197
- # For now, just take the first action if it exists.
198
- agent_outcome = actions[0]
199
-
200
- # If no structured tool call, try manual parsing on the stripped content
201
- if agent_outcome is None:
202
- # --- Strip prompt echo FIRST ---
203
- marker = "<|im_start|>assistant"
204
- marker_pos = raw_content.rfind(marker)
205
- if marker_pos != -1:
206
- final_content = raw_content[marker_pos + len(marker):].lstrip()
207
- print(f"Stripped prompt echo for parsing. Content: {final_content[:150]}...")
208
- else:
209
- final_content = raw_content.strip()
210
- print("Assistant marker not found. Parsing raw content.")
211
- # --- End Stripping ---
212
-
213
- # --- Manual Parsing on final_content ---
214
- search_match = re.search(r"web_search\((.*?)\)", final_content)
215
- download_match = re.search(r"download_file\((.*?)\)", final_content)
216
- manual_tool_call_id = f"tool_{uuid.uuid4()}" # Generate unique ID
217
-
218
- if search_match:
219
- tool_input_str = search_match.group(1).strip()
220
- try:
221
- # Try parsing as dict {'query': '...'} first
222
- parsed_input = json.loads(tool_input_str.replace("'", '"')) # Replace single quotes for JSON
223
- if isinstance(parsed_input, dict) and 'query' in parsed_input:
224
- tool_input = parsed_input
225
- else:
226
- # If not a dict with 'query', assume the string itself is the query
227
- tool_input = {"query": tool_input_str}
228
- except json.JSONDecodeError:
229
- # Handle plain string query: web_search("the query") or web_search('the query')
230
- if (tool_input_str.startswith("'") and tool_input_str.endswith("'")) or \
231
- (tool_input_str.startswith('"') and tool_input_str.endswith('"')):
232
- tool_input = {"query": tool_input_str[1:-1]}
233
- else: # Assume raw string is the query
234
- tool_input = {"query": tool_input_str}
235
- print(f"Agent decided to call tool (MANUALLY PARSED): web_search, Input: {tool_input}")
236
- agent_outcome = AgentAction(
237
- tool="duckduckgo_search",
238
- tool_input=tool_input,
239
- log=f"Manually Parsed from content: {search_match.group(0)}",
240
- tool_call_id=manual_tool_call_id
241
- )
242
- elif download_match:
243
- tool_input_str = download_match.group(1).strip()
244
- if (tool_input_str.startswith("'") and tool_input_str.endswith("'")) or \
245
- (tool_input_str.startswith('"') and tool_input_str.endswith('"')):
246
- tool_input = tool_input_str[1:-1]
247
- else:
248
- tool_input = tool_input_str
249
- print(f"Agent decided to call tool (MANUALLY PARSED): download_file, Input: {tool_input}")
250
- agent_outcome = AgentAction(
251
- tool="download_file",
252
- tool_input=tool_input,
253
- log=f"Manually Parsed from content: {download_match.group(0)}",
254
- tool_call_id=manual_tool_call_id
255
- )
256
 
257
- # If still no agent_outcome (no tool call detected at all), create AgentFinish
258
- if agent_outcome is None:
259
- print(f"Agent decided to finish. Raw Content: {raw_content[:100]}...")
260
- # --- Post-processing Workaround ---
261
- marker = "<|im_start|>assistant"
262
- marker_pos = raw_content.rfind(marker)
263
- if marker_pos != -1:
264
- final_content = raw_content[marker_pos + len(marker):].lstrip()
265
- print(f"Stripped prompt echo for final answer. Using: {final_content[:100]}...")
266
- else:
267
- final_content = raw_content.strip()
268
- print("Assistant marker not found for final answer. Using raw content.")
269
- # --- End Workaround ---
270
- agent_outcome = AgentFinish(return_values={"output": final_content}, log=raw_content)
271
-
272
- # Add the original response (including potential tool calls) to history for context
273
- new_history = state['chat_history'] + [response]
274
- return {"agent_outcome": agent_outcome, "chat_history": new_history}
275
-
276
- def tool_node(state: AgentState, tool_executor):
277
- """Executes tools and returns the results, using a cache."""
278
- print("Running tool_node")
279
- agent_action = state['agent_outcome']
280
- if not isinstance(agent_action, AgentAction):
281
- print("Warning: tool_node called without AgentAction in state.")
282
- return {}
283
-
284
- # Construct a unique cache key for this tool call
285
- tool_name = agent_action.tool
286
- tool_input = str(agent_action.tool_input) # Ensure input is string for dict key
287
- cache_key = f"{tool_name}::{tool_input}"
288
- tool_cache = state.get('tool_cache', {})
289
-
290
- # Check cache first
291
- if cache_key in tool_cache:
292
- observation = tool_cache[cache_key]
293
- print(f"Cache hit for tool {tool_name} with input {tool_input[:50]}... Returning cached result.")
294
- else:
295
- print(f"Cache miss for tool {tool_name} with input {tool_input[:50]}... Executing tool.")
296
- observation = tool_executor.invoke(agent_action)
297
- print(f"Tool {tool_name} executed. Observation: {str(observation)[:200]}...")
298
- # Update cache
299
- tool_cache[cache_key] = str(observation)
300
-
301
- new_intermediate_steps = state['intermediate_steps'] + [(agent_action, str(observation))]
302
- # Directly use the tool_call_id from the AgentAction
303
- # Assumes agent_action WILL have tool_call_id if it's an AgentAction leading here
304
- tool_message = ToolMessage(
305
- content=str(observation),
306
- tool_call_id=agent_action.tool_call_id
307
- )
308
- new_history = state['chat_history'] + [tool_message]
309
-
310
- # Return updated state including the potentially modified cache
311
- return {
312
- "intermediate_steps": new_intermediate_steps,
313
- "chat_history": new_history,
314
- "tool_cache": tool_cache # Ensure cache updates are propagated
315
- }
316
-
317
- def should_continue(state: AgentState) -> Literal["tools", "__end__"]:
318
- """Determines whether to continue the loop or end."""
319
- print("Running should_continue")
320
- outcome = state['agent_outcome']
321
- current_iter = state.get('current_iteration', 0)
322
- max_iter = state.get('max_iterations', 10)
323
-
324
- if isinstance(outcome, AgentFinish):
325
- print("Decision: End (AgentFinish)")
326
- return "__end__"
327
- elif current_iter >= max_iter:
328
- print("Decision: End (Max Iterations Reached)")
329
- return "__end__"
330
- elif isinstance(outcome, AgentAction):
331
- print("Decision: Continue (Tools)")
332
- return "tools"
333
- else:
334
- print("Decision: End (Unexpected State)")
335
- return "__end__"
336
-
337
- # Add a dummy function decorated for the Spaces platform GPU check
338
- @spaces.GPU()
339
- def gpu_check():
340
- """Dummy function to signal GPU usage to the Hugging Face Spaces platform."""
341
- print("GPU check function called (decorator signals usage).")
342
-
343
- # --- Agent Definition ---
344
- class LangGraphAgent:
345
- def __init__(self, max_iterations=10):
346
- print("Initializing LangGraphAgent...")
347
- self.max_iterations = max_iterations
348
-
349
- # 1. Define LLM
350
- print("Loading Hugging Face pipeline...")
351
- try:
352
- # Explicitly create the transformers pipeline first
353
- hf_pipeline = pipeline(
354
- "text-generation", # Task for instruct models
355
- model="Qwen/Qwen2-72B-Instruct", # Use Qwen2 72B Instruct model
356
- # Group model-specific args into model_kwargs
357
- model_kwargs={"torch_dtype": "auto", "device_map": "auto"},
358
- # Keep pipeline-specific args separate
359
- max_new_tokens=1024,
360
- add_special_tokens=False # Let ChatHuggingFace handle templating/special tokens
361
- )
362
- print("Hugging Face transformers pipeline loaded successfully.")
363
-
364
- # Wrap the transformers pipeline with LangChain's HuggingFacePipeline
365
- print("Creating HuggingFacePipeline wrapper...")
366
- lc_pipeline = HuggingFacePipeline(pipeline=hf_pipeline)
367
- print("HuggingFacePipeline wrapper created successfully.")
368
-
369
- print("Initializing ChatHuggingFace wrapper...")
370
- # Pass the LangChain pipeline wrapper to ChatHuggingFace
371
- self.llm = ChatHuggingFace(llm=lc_pipeline)
372
- # Pass the raw transformers pipeline directly to ChatHuggingFace
373
- # self.llm = ChatHuggingFace(pipeline=hf_pipeline)
374
- print("ChatHuggingFace wrapper initialized successfully.")
375
 
376
- except Exception as e:
377
- print(f"FATAL: Error loading Hugging Face pipeline or ChatHuggingFace: {e}")
378
- raise
379
-
380
- # 2. Define Tools
381
- self.tools = agent_tools
382
- # Use ToolNode
383
- self.tool_executor = ToolNode(self.tools)
384
- print(f"Tools initialized: {[tool.name for tool in self.tools]}")
385
-
386
- # 3. Create Prompt Template
387
- self.prompt = ChatPromptTemplate.from_messages(
388
- [
389
- ("system", SYSTEM_PROMPT),
390
- MessagesPlaceholder(variable_name="chat_history"),
391
- ("human", "{input}"),
392
- ]
393
- )
394
- print("Chat prompt template created.")
395
-
396
- # Bind tools to the LLM
397
- print("Binding tools to the LLM...")
398
- self.llm_with_tools = self.llm.bind_tools(self.tools)
399
- print("Tools bound successfully.")
400
-
401
- # 4. Define Graph Nodes
402
- agent_node_partial = partial(agent_node, llm_with_tools=self.llm_with_tools, prompt=self.prompt)
403
- tool_node_partial = partial(tool_node, tool_executor=self.tool_executor)
404
-
405
- # 5. Define Graph Structure
406
- print("Defining LangGraph workflow...")
407
- workflow = StateGraph(AgentState)
408
- workflow.add_node("agent", agent_node_partial)
409
- workflow.add_node("tools", tool_node_partial)
410
- workflow.set_entry_point("agent")
411
- workflow.add_conditional_edges(
412
- "agent",
413
- should_continue,
414
- {"tools": "tools", "__end__": END},
415
- )
416
- workflow.add_edge("tools", "agent")
417
 
418
- # 6. Compile Graph
419
- print("Compiling LangGraph agent graph...")
420
- try:
421
- self.graph = workflow.compile()
422
- print("LangGraph agent graph compiled successfully.")
423
- except Exception as e:
424
- print(f"FATAL: Error compiling LangGraph agent: {e}")
425
- raise
426
-
427
- print("LangGraphAgent initialized successfully.")
428
-
429
- def __call__(self, question: str, task_id: str) -> str:
430
- """Executes the agent graph for a given question and task ID."""
431
- print(f"Agent invoked with question: '{question}' and task_id: '{task_id}'")
432
- initial_state = AgentState(
433
- question=question,
434
- task_id=task_id,
435
- intermediate_steps=[],
436
- chat_history=[HumanMessage(content=question)],
437
- downloaded_files={},
438
- tool_cache={}, # Initialize the cache
439
- error=None,
440
- max_iterations=self.max_iterations,
441
- current_iteration=0,
442
- agent_outcome=None # Ensure agent_outcome is initialized
443
- )
444
 
445
- final_state_result = None
446
- try:
447
- print(f"Invoking graph for task {task_id}...")
448
- final_state_result = self.graph.invoke(
449
- initial_state,
450
- config={"recursion_limit": self.max_iterations + 10}
451
- )
452
- print(f"Graph invocation complete for task {task_id}.")
453
-
454
- if final_state_result and isinstance(final_state_result.get('agent_outcome'), AgentFinish):
455
- final_answer = final_state_result['agent_outcome'].return_values['output']
456
- print(f"Agent finished successfully. Final Answer: {final_answer[:200]}...")
457
- return final_answer
458
- else:
459
- error_msg = "Agent did not finish with a final answer (AgentFinish)."
460
- print(f"{error_msg} Final State: {str(final_state_result)[:500]}")
461
- last_message = "No message found in history."
462
- if isinstance(final_state_result, dict) and 'chat_history' in final_state_result and final_state_result['chat_history']:
463
- last_message = final_state_result['chat_history'][-1].content
464
- return f"AGENT_ERROR: {error_msg} Last Message: {last_message[:200]}..."
465
 
466
- except Exception as e:
467
- error_msg = f"Error during agent execution for task {task_id}: {e}"
468
- print(error_msg)
469
- if final_state_result:
470
- error_msg += f" | Final State (partial): {str(final_state_result)[:500]}"
471
- return f"AGENT_ERROR: {error_msg}"
472
- finally:
473
- print(f"--- Finished Agent Run for Task {task_id} ---")
474
-
475
- # --- Gradio App Logic ---
476
- # We need to make run_and_submit_all a generator to yield updates
477
- def run_and_submit_all(profile: gr.OAuthProfile | None):
478
  """
479
- Fetches all questions, runs the LangGraphAgent on them, submits all answers,
480
- and displays the results. Yields status updates. Requires user login.
481
  """
482
- space_id = os.getenv("SPACE_ID")
483
- results_log = []
484
- answers_payload = []
485
- results_df = pd.DataFrame() # Initialize empty DataFrame
486
- full_activity_log = "" # Initialize cumulative log
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
- if not profile:
489
- print("User not logged in.")
490
- # Yield initial state with empty log
491
- yield "Please Login to Hugging Face with the button to submit results.", "", results_df
492
- return # Stop execution if not logged in
 
 
493
 
494
- username = profile.username
495
- print(f"User logged in: {username}")
496
- # Yield status update with empty log
497
- yield f"User logged in: {username}. Initializing...", "", results_df
 
 
498
 
499
  api_url = DEFAULT_API_URL
500
  questions_url = f"{api_url}/questions"
501
  submit_url = f"{api_url}/submit"
502
 
503
- # 1. Instantiate Agent
504
- log_capture = io.StringIO()
505
  try:
506
- initial_agent_log = "Initializing Agent...\n"
507
- full_activity_log += initial_agent_log
508
- yield initial_agent_log.strip(), full_activity_log, results_df
509
- with contextlib.redirect_stdout(log_capture): # Capture prints during init
510
- agent = LangGraphAgent(max_iterations=15)
511
- print("Agent instantiation successful.")
512
- init_log_output = log_capture.getvalue()
513
- full_activity_log += init_log_output
514
- status_update = "Agent Initialized. Fetching questions..."
515
- yield status_update, full_activity_log, results_df
516
  except Exception as e:
517
- error_msg = f"Error initializing agent: {e}"
518
- print(error_msg)
519
- full_activity_log += f"\nERROR: {error_msg}\n"
520
- yield error_msg, full_activity_log, results_df
521
- return
522
- finally:
523
- # Ensure any captured init log is added even if exception occurs later
524
- init_log_output = log_capture.getvalue()
525
- if init_log_output not in full_activity_log: # Avoid duplication
526
- full_activity_log += init_log_output
527
-
528
- # Construct agent_code link
529
- if space_id:
530
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
531
- code_link_log = f"Agent code link: {agent_code}\n"
532
- print(code_link_log.strip())
533
- full_activity_log += code_link_log
534
- else:
535
- agent_code = "local_run_no_code_link"
536
- code_link_log = "Warning: SPACE_ID not found. Using placeholder for agent_code link.\n"
537
- print(code_link_log.strip())
538
- full_activity_log += code_link_log
539
 
540
  # 2. Fetch Questions
541
- fetch_log_start = f"Fetching questions from: {questions_url}\n"
542
- print(fetch_log_start.strip())
543
- full_activity_log += fetch_log_start
544
- questions_data = None
545
  try:
546
  response = requests.get(questions_url, timeout=15)
547
  response.raise_for_status()
548
  questions_data = response.json()
549
  if not questions_data:
550
- empty_q_log = "Fetched questions list is empty or invalid format.\n"
551
- print(empty_q_log.strip())
552
- full_activity_log += empty_q_log
553
- yield empty_q_log.strip(), full_activity_log, results_df
554
- return
555
- q_fetch_success = f"Fetched {len(questions_data)} questions.\n"
556
- print(q_fetch_success.strip())
557
- full_activity_log += q_fetch_success
558
- status_update = f"Fetched {len(questions_data)} questions. Running agent..."
559
- yield status_update, full_activity_log, results_df
560
  except requests.exceptions.RequestException as e:
561
- error_msg = f"Error fetching questions: {e}"
562
- print(error_msg)
563
- full_activity_log += f"\nERROR: {error_msg}\n"
564
- yield error_msg, full_activity_log, results_df
565
- return
566
- except json.JSONDecodeError as e:
567
- response_text = response.text if response else "No response object"
568
- error_msg = f"Error decoding JSON response from questions endpoint: {e}"
569
- print(error_msg)
570
- print(f"Response text: {response_text[:500]}")
571
- full_activity_log += f"\nERROR: {error_msg}\nResponse text: {response_text[:500]}\n"
572
- yield f"Error decoding server response for questions: {e}", full_activity_log, results_df
573
- return
574
  except Exception as e:
575
- error_msg = f"An unexpected error occurred fetching questions: {e}"
576
- print(error_msg)
577
- full_activity_log += f"\nERROR: {error_msg}\n"
578
- yield error_msg, full_activity_log, results_df
579
- return
580
-
581
- # 3. Run Agent on Questions
582
- total_questions = len(questions_data)
583
- run_start_log = f"Running agent on {total_questions} questions...\n"
584
- print(run_start_log.strip())
585
- full_activity_log += run_start_log
586
- for i, item in enumerate(questions_data):
587
  task_id = item.get("task_id")
588
  question_text = item.get("question")
589
-
590
- status_update = f"Running agent... Processing question {i+1}/{total_questions} (Task ID: {task_id})"
591
- # Yield progress update with the current log
592
- yield status_update, full_activity_log, pd.DataFrame(results_log)
593
-
594
  if not task_id or question_text is None:
595
- skip_log = f"Skipping item with missing task_id or question: {item}\n"
596
- print(skip_log.strip())
597
- full_activity_log += skip_log
598
- results_log.append({"Task ID": task_id or "Missing", "Question": question_text or "Missing", "Submitted Answer": "SKIPPED - Invalid item data"})
599
  continue
600
-
601
- task_start_log = f"--- Running Task {task_id} ---\n"
602
- print(task_start_log.strip())
603
- full_activity_log += task_start_log
604
- log_capture = io.StringIO() # Reset buffer for each agent call
605
  try:
606
- with contextlib.redirect_stdout(log_capture): # Capture agent prints
607
- submitted_answer = agent(question=question_text, task_id=task_id)
608
- # Also capture the print right after the call if successful
609
- print(f"Agent returned answer for task {task_id}: {submitted_answer[:100]}...\n")
610
-
611
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
612
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
613
  except Exception as e:
614
- error_log = f"Error running agent on task {task_id}: {e}\n"
615
- print(error_log.strip())
616
- # Add error to main log even if not captured by redirect_stdout
617
- if error_log not in log_capture.getvalue():
618
- full_activity_log += f"ERROR: {error_log}"
619
- error_answer = f"AGENT_ERROR: {e}"
620
- answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
621
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_answer})
622
- finally:
623
- # Always append the captured log for this task
624
- current_task_log = log_capture.getvalue()
625
- full_activity_log += current_task_log
626
- task_end_log = f"--- Finished Task {task_id} ---\n\n"
627
- print(task_end_log.strip())
628
- full_activity_log += task_end_log
629
 
630
  if not answers_payload:
631
- no_answer_log = "Agent did not produce any answers to submit.\n"
632
- print(no_answer_log.strip())
633
- full_activity_log += no_answer_log
634
- yield no_answer_log.strip(), full_activity_log, pd.DataFrame(results_log)
635
- return
636
 
637
  # 4. Prepare Submission
638
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
639
- status_update = f"Agent finished processing {total_questions} questions. Submitting {len(answers_payload)} answers for user '{username}'..."
640
- prep_log = status_update + "\n"
641
  print(status_update)
642
- full_activity_log += prep_log
643
- results_df = pd.DataFrame(results_log) # Update DataFrame before final yield
644
- yield status_update, full_activity_log, results_df
645
-
646
- # 5. Submit Results
647
- submit_start_log = f"Submitting {len(answers_payload)} answers to: {submit_url}\n"
648
- print(submit_start_log.strip())
649
- full_activity_log += submit_start_log
650
  try:
651
  response = requests.post(submit_url, json=submission_data, timeout=60)
652
  response.raise_for_status()
@@ -658,85 +200,71 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
658
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
659
  f"Message: {result_data.get('message', 'No message received.')}"
660
  )
661
- submit_success_log = "Submission successful.\n"
662
- print(submit_success_log.strip())
663
- full_activity_log += submit_success_log
664
- yield final_status, full_activity_log, results_df # Final status yield
665
  except requests.exceptions.HTTPError as e:
666
  error_detail = f"Server responded with status {e.response.status_code}."
667
  try:
668
  error_json = e.response.json()
669
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
670
- except json.JSONDecodeError:
671
  error_detail += f" Response: {e.response.text[:500]}"
672
  status_message = f"Submission Failed: {error_detail}"
673
  print(status_message)
674
- full_activity_log += f"\nERROR: {status_message}\n"
675
- yield status_message, full_activity_log, results_df
676
  except requests.exceptions.Timeout:
677
  status_message = "Submission Failed: The request timed out."
678
  print(status_message)
679
- full_activity_log += f"\nERROR: {status_message}\n"
680
- yield status_message, full_activity_log, results_df
681
  except requests.exceptions.RequestException as e:
682
  status_message = f"Submission Failed: Network error - {e}"
683
  print(status_message)
684
- full_activity_log += f"\nERROR: {status_message}\n"
685
- yield status_message, full_activity_log, results_df
686
  except Exception as e:
687
  status_message = f"An unexpected error occurred during submission: {e}"
688
  print(status_message)
689
- full_activity_log += f"\nERROR: {status_message}\n"
690
- yield status_message, full_activity_log, results_df
 
691
 
692
- # --- Build Gradio Interface ---
693
  with gr.Blocks() as demo:
694
- gr.Markdown("# LangGraph Agent Evaluation Runner")
695
-
696
- # Updated instructions markdown reflecting current state
697
- instructions_markdown = """
698
- **Welcome to the LangGraph Agent Evaluation Runner!**
699
- **What is this project?**
700
- This application is designed to test and evaluate an AI assistant (an "agent") built using the LangGraph framework and powered by the Llama 3 language model. The goal is to see how well this agent can answer a variety of questions, sometimes needing to use tools like web search or file downloading to find the right information.
701
- **How does this app work?**
702
- please log in using the Hugging Face login button below. We need your username to submit your agent's results.
703
- 2. **Run Evaluation:** Click the 'Run Evaluation & Submit All Answers' button.
704
- * The app will fetch a set of evaluation questions from a server.
705
- * For each question, the LangGraph agent will try to determine the best answer. It might use its built-in knowledge or decide to use a tool (like searching the web or reading a provided file).
706
- * Once the agent has processed all questions, the app will automatically submit all the answers linked to your username.
707
- 3. **See Results:** The final score and a table showing each question, the agent's answer, and whether it was correct will be displayed below. You'll also see status updates during the run.
708
- **Technical Details (for the curious):**
709
- * **Agent Framework:** LangGraph (helps orchestrate the agent's steps)
710
- * **Language Model:** `Qwen/Qwen2-72B-Instruct`
711
- * **Tools Available to Agent:** DuckDuckGo Web Search, File Downloader
712
- * **Key Feature:** The agent caches results from tools, so if it needs the same information again, it can retrieve it quickly without re-running the tool.
713
- **(Optional) For Developers:** You can clone this Space and modify the `app.py` file to experiment with different prompts, agent logic, or even different language models or tools.
714
- """
715
- gr.Markdown(instructions_markdown)
716
 
717
  gr.LoginButton()
718
 
719
  run_button = gr.Button("Run Evaluation & Submit All Answers")
720
 
721
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=3, interactive=False) # Reduced lines slightly
722
- agent_activity_feed = gr.Textbox(label="Agent Activity Log", lines=15, interactive=False, autoscroll=True)
723
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
724
 
725
  run_button.click(
726
  fn=run_and_submit_all,
727
- # The profile is implicitly available in the scope Gradio runs the function
728
- outputs=[status_output, agent_activity_feed, results_table]
729
  )
730
 
731
- # --- Main Execution Block ---
732
  if __name__ == "__main__":
733
  print("\n" + "-"*30 + " App Starting " + "-"*30)
734
-
735
- # Call the GPU check function early during startup
736
- gpu_check()
737
-
738
  space_host_startup = os.getenv("SPACE_HOST")
739
- space_id_startup = os.getenv("SPACE_ID")
740
 
741
  if space_host_startup:
742
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -744,7 +272,7 @@ if __name__ == "__main__":
744
  else:
745
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
746
 
747
- if space_id_startup:
748
  print(f"✅ SPACE_ID found: {space_id_startup}")
749
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
750
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -753,7 +281,5 @@ if __name__ == "__main__":
753
 
754
  print("-"*(60 + len(" App Starting ")) + "\n")
755
 
756
- print("Launching Gradio Interface for LangGraph Agent Evaluation...")
757
- # Set share=False for security unless needed
758
- # Set debug=True for more detailed logs during development
759
  demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+
14
+ import requests
15
+ import json
16
+ from typing import List, Dict, Any
17
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, tool
18
+ from smolagents.models import ChatMessage
19
+ import datetime
20
+ import pytz
21
+
22
+
23
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  @tool
25
+ def get_current_time_in_timezone(timezone: str) -> str:
26
+ """
27
+ A tool that fetches the current local time in a specified timezone.
28
+
29
+ Args:
30
+ timezone: A string representing a valid timezone (e.g., 'America/New_York').
31
  """
 
 
32
  try:
33
+ tz = pytz.timezone(timezone)
34
+ local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
35
+ return f"The current local time in {timezone} is: {local_time}"
36
+ except Exception as e:
37
+ return f"Error fetching time for timezone '{timezone}': {str(e)}"
38
 
 
 
 
 
 
 
 
 
39
 
40
+ @tool
41
+ def calculate(expression: str) -> str:
42
+ """
43
+ A tool that evaluates mathematical expressions safely.
44
+
45
+ Args:
46
+ expression: A mathematical expression (e.g., '2 + 2 * 3').
47
+ """
48
+ try:
49
+ # Safe evaluation of mathematical expressions
50
+ allowed_chars = set('0123456789+-*/(). ')
51
+ if not all(c in allowed_chars for c in expression):
52
+ return "Error: Expression contains invalid characters"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ result = eval(expression)
55
+ return f"The result of {expression} is: {result}"
56
+ except Exception as e:
57
+ return f"Error evaluating expression: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ @tool
61
+ def search_web(query: str) -> str:
62
+ """
63
+ A tool that searches the web for information using DuckDuckGo.
64
+
65
+ Args:
66
+ query: The search query string.
67
+ """
68
+ try:
69
+ from duckduckgo_search import DDGS
70
+ ddgs = DDGS()
71
+ results = list(ddgs.text(query, max_results=5))
72
+
73
+ if not results:
74
+ return f"No results found for: {query}"
75
+
76
+ output = f"Search results for '{query}':\n\n"
77
+ for i, result in enumerate(results, 1):
78
+ output += f"{i}. {result.get('title', 'N/A')}\n"
79
+ output += f" {result.get('body', 'N/A')[:200]}...\n"
80
+ output += f" URL: {result.get('href', 'N/A')}\n\n"
81
+
82
+ return output
83
+ except Exception as e:
84
+ return f"Error searching web: {str(e)}"
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ def create_agent(model_name: str = "llama3.2:3b-instruct-q4_K_M",
88
+ max_steps: int = 6,
89
+ verbosity: int = 1) -> CodeAgent:
 
 
 
 
 
 
 
 
 
90
  """
91
+ Create a CodeAgent with the specified model and tools
 
92
  """
93
+ print(f"\nCreating agent with model: {model_name}")
94
+ print(f"Max steps: {max_steps}")
95
+ print(f"Verbosity: {verbosity}\n")
96
+ model = InferenceClientModel(
97
+ "Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096)
98
+
99
+ agent = CodeAgent(
100
+ model=model,
101
+ tools=[
102
+ get_current_time_in_timezone,
103
+ calculate,
104
+ search_web,
105
+ FinalAnswerTool()
106
+ ],
107
+ max_steps=max_steps,
108
+ verbosity_level=verbosity
109
+ )
110
+
111
+ return agent
112
 
113
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
114
+ """
115
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
116
+ and displays the results.
117
+ """
118
+ # --- Determine HF Space Runtime URL and Repo URL ---
119
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
120
 
121
+ if profile:
122
+ username= f"{profile.username}"
123
+ print(f"User logged in: {username}")
124
+ else:
125
+ print("User not logged in.")
126
+ return "Please Login to Hugging Face with the button.", None
127
 
128
  api_url = DEFAULT_API_URL
129
  questions_url = f"{api_url}/questions"
130
  submit_url = f"{api_url}/submit"
131
 
132
+ # 1. Instantiate Agent ( modify this part to create your agent)
 
133
  try:
134
+ agent = create_agent()
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
+ print(f"Error instantiating agent: {e}")
137
+ return f"Error initializing agent: {e}", None
138
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
139
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
140
+ print(agent_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # 2. Fetch Questions
143
+ print(f"Fetching questions from: {questions_url}")
 
 
 
144
  try:
145
  response = requests.get(questions_url, timeout=15)
146
  response.raise_for_status()
147
  questions_data = response.json()
148
  if not questions_data:
149
+ print("Fetched questions list is empty.")
150
+ return "Fetched questions list is empty or invalid format.", None
151
+ print(f"Fetched {len(questions_data)} questions.")
 
 
 
 
 
 
 
152
  except requests.exceptions.RequestException as e:
153
+ print(f"Error fetching questions: {e}")
154
+ return f"Error fetching questions: {e}", None
155
+ except requests.exceptions.JSONDecodeError as e:
156
+ print(f"Error decoding JSON response from questions endpoint: {e}")
157
+ print(f"Response text: {response.text[:500]}")
158
+ return f"Error decoding server response for questions: {e}", None
 
 
 
 
 
 
 
159
  except Exception as e:
160
+ print(f"An unexpected error occurred fetching questions: {e}")
161
+ return f"An unexpected error occurred fetching questions: {e}", None
162
+
163
+ # 3. Run your Agent
164
+ results_log = []
165
+ answers_payload = []
166
+ print(f"Running agent on {len(questions_data)} questions...")
167
+ for item in questions_data:
 
 
 
 
168
  task_id = item.get("task_id")
169
  question_text = item.get("question")
 
 
 
 
 
170
  if not task_id or question_text is None:
171
+ print(f"Skipping item with missing task_id or question: {item}")
 
 
 
172
  continue
 
 
 
 
 
173
  try:
174
+ submitted_answer = agent(question_text)
 
 
 
 
175
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
176
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
177
  except Exception as e:
178
+ print(f"Error running agent on task {task_id}: {e}")
179
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  if not answers_payload:
182
+ print("Agent did not produce any answers to submit.")
183
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
 
 
184
 
185
  # 4. Prepare Submission
186
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
187
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
 
188
  print(status_update)
189
+
190
+ # 5. Submit
191
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
192
  try:
193
  response = requests.post(submit_url, json=submission_data, timeout=60)
194
  response.raise_for_status()
 
200
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
201
  f"Message: {result_data.get('message', 'No message received.')}"
202
  )
203
+ print("Submission successful.")
204
+ results_df = pd.DataFrame(results_log)
205
+ return final_status, results_df
 
206
  except requests.exceptions.HTTPError as e:
207
  error_detail = f"Server responded with status {e.response.status_code}."
208
  try:
209
  error_json = e.response.json()
210
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
211
+ except requests.exceptions.JSONDecodeError:
212
  error_detail += f" Response: {e.response.text[:500]}"
213
  status_message = f"Submission Failed: {error_detail}"
214
  print(status_message)
215
+ results_df = pd.DataFrame(results_log)
216
+ return status_message, results_df
217
  except requests.exceptions.Timeout:
218
  status_message = "Submission Failed: The request timed out."
219
  print(status_message)
220
+ results_df = pd.DataFrame(results_log)
221
+ return status_message, results_df
222
  except requests.exceptions.RequestException as e:
223
  status_message = f"Submission Failed: Network error - {e}"
224
  print(status_message)
225
+ results_df = pd.DataFrame(results_log)
226
+ return status_message, results_df
227
  except Exception as e:
228
  status_message = f"An unexpected error occurred during submission: {e}"
229
  print(status_message)
230
+ results_df = pd.DataFrame(results_log)
231
+ return status_message, results_df
232
+
233
 
234
+ # --- Build Gradio Interface using Blocks ---
235
  with gr.Blocks() as demo:
236
+ gr.Markdown("# Basic Agent Evaluation Runner")
237
+ gr.Markdown(
238
+ """
239
+ **Instructions:**
240
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
241
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
242
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
243
+ ---
244
+ **Disclaimers:**
245
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
246
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
247
+ """
248
+ )
 
 
 
 
 
 
 
 
 
249
 
250
  gr.LoginButton()
251
 
252
  run_button = gr.Button("Run Evaluation & Submit All Answers")
253
 
254
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
255
+ # Removed max_rows=10 from DataFrame constructor
256
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
257
 
258
  run_button.click(
259
  fn=run_and_submit_all,
260
+ outputs=[status_output, results_table]
 
261
  )
262
 
 
263
  if __name__ == "__main__":
264
  print("\n" + "-"*30 + " App Starting " + "-"*30)
265
+ # Check for SPACE_HOST and SPACE_ID at startup for information
 
 
 
266
  space_host_startup = os.getenv("SPACE_HOST")
267
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
268
 
269
  if space_host_startup:
270
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
272
  else:
273
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
274
 
275
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
276
  print(f"✅ SPACE_ID found: {space_id_startup}")
277
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
278
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
281
 
282
  print("-"*(60 + len(" App Starting ")) + "\n")
283
 
284
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
285
  demo.launch(debug=True, share=False)