Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,9 @@ import re
|
|
| 13 |
import uuid
|
| 14 |
import time
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
# --- Multimodal & Web Tool Imports ---
|
| 17 |
from transformers import pipeline
|
| 18 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
@@ -61,7 +64,10 @@ agent = None
|
|
| 61 |
# ====================================================
|
| 62 |
# --- Tool Definitions ---
|
| 63 |
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
def search_tool(query: str) -> str:
|
| 66 |
"""Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
|
| 67 |
if not isinstance(query, str) or not query.strip():
|
|
@@ -78,7 +84,10 @@ def search_tool(query: str) -> str:
|
|
| 78 |
return f"Error running search for '{query}': {str(e)}"
|
| 79 |
|
| 80 |
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
| 82 |
def code_interpreter(code: str) -> str:
|
| 83 |
"""
|
| 84 |
Executes a string of Python code and returns its stdout, stderr, and any error.
|
|
@@ -132,7 +141,10 @@ def code_interpreter(code: str) -> str:
|
|
| 132 |
return f"Execution failed:\n{tb_str}"
|
| 133 |
|
| 134 |
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
| 136 |
def read_file(path: str) -> str:
|
| 137 |
"""Reads the content of a file at the specified path. Use this to examine uploaded files or files you've created."""
|
| 138 |
if not isinstance(path, str) or not path.strip():
|
|
@@ -186,7 +198,11 @@ def read_file(path: str) -> str:
|
|
| 186 |
return f"Unexpected error accessing file '{path}': {str(e)}"
|
| 187 |
|
| 188 |
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
def write_file(path: str, content: str) -> str:
|
| 191 |
"""Writes content to a file at the specified path. Creates directories if needed."""
|
| 192 |
if not isinstance(path, str) or not path.strip():
|
|
@@ -213,7 +229,10 @@ def write_file(path: str, content: str) -> str:
|
|
| 213 |
return f"Error writing file '{path}': {str(e)}"
|
| 214 |
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
def list_directory(path: str = ".") -> str:
|
| 218 |
"""Lists the contents of a directory. Useful for finding available files."""
|
| 219 |
if not isinstance(path, str):
|
|
@@ -254,7 +273,10 @@ def list_directory(path: str = ".") -> str:
|
|
| 254 |
return f"Error listing directory '{path}': {str(e)}"
|
| 255 |
|
| 256 |
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
| 258 |
def audio_transcription_tool(file_path: str) -> str:
|
| 259 |
"""Transcribes an audio file (mp3, wav, etc.) to text using Whisper."""
|
| 260 |
if not isinstance(file_path, str) or not file_path.strip():
|
|
@@ -300,7 +322,10 @@ def audio_transcription_tool(file_path: str) -> str:
|
|
| 300 |
return f"Error transcribing '{file_path}': {str(e)}"
|
| 301 |
|
| 302 |
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
| 304 |
def get_youtube_transcript(video_url: str) -> str:
|
| 305 |
"""Fetches the transcript/captions for a YouTube video."""
|
| 306 |
if not isinstance(video_url, str) or not video_url.strip():
|
|
@@ -335,7 +360,11 @@ def get_youtube_transcript(video_url: str) -> str:
|
|
| 335 |
|
| 336 |
|
| 337 |
# --- NEW RAG-BASED SCRAPER TOOL ---
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
def scrape_and_retrieve(url: str, query: str) -> str:
|
| 340 |
"""
|
| 341 |
Scrapes a webpage, chunks its content, and performs a RAG (Retrieval-Augmented Generation)
|
|
@@ -401,7 +430,10 @@ def scrape_and_retrieve(url: str, query: str) -> str:
|
|
| 401 |
return f"Error scraping or retrieving from {url}: {str(e)}\n{tb_str}"
|
| 402 |
|
| 403 |
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
| 405 |
def final_answer_tool(answer: str) -> str:
|
| 406 |
"""
|
| 407 |
Call this tool ONLY when you have the final, definitive answer.
|
|
@@ -500,7 +532,17 @@ class BasicAgent:
|
|
| 500 |
# Build tool descriptions
|
| 501 |
tool_desc_list = []
|
| 502 |
for tool in self.tools:
|
| 503 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
tool_desc_list.append(desc)
|
| 505 |
tool_descriptions = "\n".join(tool_desc_list)
|
| 506 |
|
|
@@ -656,6 +698,10 @@ Current Chat History:
|
|
| 656 |
)
|
| 657 |
time.sleep(2 ** attempt)
|
| 658 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
if ai_message.tool_calls:
|
| 660 |
print(f"🔧 Executor Tool Call: {ai_message.tool_calls[0]['name']}")
|
| 661 |
else:
|
|
@@ -775,6 +821,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 775 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
|
| 776 |
print(f"Agent code URL: {agent_code}")
|
| 777 |
print("--- USING MOCK QUESTIONS ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 778 |
mock_questions_data = [
|
| 779 |
{
|
| 780 |
"task_id": "mock_level1_001",
|
|
@@ -857,6 +908,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 857 |
"question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
|
| 858 |
}
|
| 859 |
]
|
|
|
|
| 860 |
questions_data = mock_questions_data
|
| 861 |
print(f"Using {len(questions_data)} mock questions.")
|
| 862 |
|
|
@@ -891,7 +943,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 891 |
|
| 892 |
status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
|
| 893 |
print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
|
| 894 |
-
|
| 895 |
results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
|
| 896 |
return final_status, results_df
|
| 897 |
|
|
@@ -911,7 +963,7 @@ with gr.Blocks() as demo:
|
|
| 911 |
|
| 912 |
if __name__ == "__main__":
|
| 913 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 914 |
-
space_host_startup = os.getenv("
|
| 915 |
if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
|
| 916 |
else: print("ℹ️ No SPACE_HOST (local?).")
|
| 917 |
if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
@@ -925,3 +977,5 @@ if __name__ == "__main__":
|
|
| 925 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 926 |
print("Launching Gradio Interface...")
|
| 927 |
demo.queue().launch(debug=True, share=False)
|
|
|
|
|
|
|
|
|
| 13 |
import uuid
|
| 14 |
import time
|
| 15 |
|
| 16 |
+
# --- Pydantic Import ---
|
| 17 |
+
from pydantic import BaseModel, Field
|
| 18 |
+
|
| 19 |
# --- Multimodal & Web Tool Imports ---
|
| 20 |
from transformers import pipeline
|
| 21 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
| 64 |
# ====================================================
|
| 65 |
# --- Tool Definitions ---
|
| 66 |
|
| 67 |
+
class SearchInput(BaseModel):
|
| 68 |
+
query: str = Field(description="The search query.")
|
| 69 |
+
|
| 70 |
+
@tool(args_schema=SearchInput)
|
| 71 |
def search_tool(query: str) -> str:
|
| 72 |
"""Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
|
| 73 |
if not isinstance(query, str) or not query.strip():
|
|
|
|
| 84 |
return f"Error running search for '{query}': {str(e)}"
|
| 85 |
|
| 86 |
|
| 87 |
+
class CodeInput(BaseModel):
|
| 88 |
+
code: str = Field(description="The Python code to execute, which must include a print() statement for output.")
|
| 89 |
+
|
| 90 |
+
@tool(args_schema=CodeInput)
|
| 91 |
def code_interpreter(code: str) -> str:
|
| 92 |
"""
|
| 93 |
Executes a string of Python code and returns its stdout, stderr, and any error.
|
|
|
|
| 141 |
return f"Execution failed:\n{tb_str}"
|
| 142 |
|
| 143 |
|
| 144 |
+
class ReadFileInput(BaseModel):
|
| 145 |
+
path: str = Field(description="The path to the file to read.")
|
| 146 |
+
|
| 147 |
+
@tool(args_schema=ReadFileInput)
|
| 148 |
def read_file(path: str) -> str:
|
| 149 |
"""Reads the content of a file at the specified path. Use this to examine uploaded files or files you've created."""
|
| 150 |
if not isinstance(path, str) or not path.strip():
|
|
|
|
| 198 |
return f"Unexpected error accessing file '{path}': {str(e)}"
|
| 199 |
|
| 200 |
|
| 201 |
+
class WriteFileInput(BaseModel):
|
| 202 |
+
path: str = Field(description="The path of the file to write to.")
|
| 203 |
+
content: str = Field(description="The content to write into the file.")
|
| 204 |
+
|
| 205 |
+
@tool(args_schema=WriteFileInput)
|
| 206 |
def write_file(path: str, content: str) -> str:
|
| 207 |
"""Writes content to a file at the specified path. Creates directories if needed."""
|
| 208 |
if not isinstance(path, str) or not path.strip():
|
|
|
|
| 229 |
return f"Error writing file '{path}': {str(e)}"
|
| 230 |
|
| 231 |
|
| 232 |
+
class ListDirInput(BaseModel):
|
| 233 |
+
path: str = Field(description="The directory path to list.", default=".")
|
| 234 |
+
|
| 235 |
+
@tool(args_schema=ListDirInput)
|
| 236 |
def list_directory(path: str = ".") -> str:
|
| 237 |
"""Lists the contents of a directory. Useful for finding available files."""
|
| 238 |
if not isinstance(path, str):
|
|
|
|
| 273 |
return f"Error listing directory '{path}': {str(e)}"
|
| 274 |
|
| 275 |
|
| 276 |
+
class AudioInput(BaseModel):
|
| 277 |
+
file_path: str = Field(description="The file path of the audio to transcribe.")
|
| 278 |
+
|
| 279 |
+
@tool(args_schema=AudioInput)
|
| 280 |
def audio_transcription_tool(file_path: str) -> str:
|
| 281 |
"""Transcribes an audio file (mp3, wav, etc.) to text using Whisper."""
|
| 282 |
if not isinstance(file_path, str) or not file_path.strip():
|
|
|
|
| 322 |
return f"Error transcribing '{file_path}': {str(e)}"
|
| 323 |
|
| 324 |
|
| 325 |
+
class YoutubeInput(BaseModel):
|
| 326 |
+
video_url: str = Field(description="The URL of the YouTube video.")
|
| 327 |
+
|
| 328 |
+
@tool(args_schema=YoutubeInput)
|
| 329 |
def get_youtube_transcript(video_url: str) -> str:
|
| 330 |
"""Fetches the transcript/captions for a YouTube video."""
|
| 331 |
if not isinstance(video_url, str) or not video_url.strip():
|
|
|
|
| 360 |
|
| 361 |
|
| 362 |
# --- NEW RAG-BASED SCRAPER TOOL ---
|
| 363 |
+
class ScrapeInput(BaseModel):
|
| 364 |
+
url: str = Field(description="The URL to scrape (must start with http:// or https://).")
|
| 365 |
+
query: str = Field(description="The specific question to answer or information to find on the page.")
|
| 366 |
+
|
| 367 |
+
@tool(args_schema=ScrapeInput)
|
| 368 |
def scrape_and_retrieve(url: str, query: str) -> str:
|
| 369 |
"""
|
| 370 |
Scrapes a webpage, chunks its content, and performs a RAG (Retrieval-Augmented Generation)
|
|
|
|
| 430 |
return f"Error scraping or retrieving from {url}: {str(e)}\n{tb_str}"
|
| 431 |
|
| 432 |
|
| 433 |
+
class FinalAnswerInput(BaseModel):
|
| 434 |
+
answer: str = Field(description="The final, definitive answer to the question.")
|
| 435 |
+
|
| 436 |
+
@tool(args_schema=FinalAnswerInput)
|
| 437 |
def final_answer_tool(answer: str) -> str:
|
| 438 |
"""
|
| 439 |
Call this tool ONLY when you have the final, definitive answer.
|
|
|
|
| 532 |
# Build tool descriptions
|
| 533 |
tool_desc_list = []
|
| 534 |
for tool in self.tools:
|
| 535 |
+
# Use Pydantic schema if available for richer descriptions
|
| 536 |
+
if tool.args_schema:
|
| 537 |
+
schema = tool.args_schema.schema()
|
| 538 |
+
args_desc = []
|
| 539 |
+
for prop, details in schema.get('properties', {}).items():
|
| 540 |
+
desc = details.get('description', '')
|
| 541 |
+
args_desc.append(f" - {prop}: {desc}")
|
| 542 |
+
args_str = "\n".join(args_desc)
|
| 543 |
+
desc = f"- {tool.name}:\n {tool.description}\n Args:\n{args_str}"
|
| 544 |
+
else:
|
| 545 |
+
desc = f"- {tool.name}: {tool.description}"
|
| 546 |
tool_desc_list.append(desc)
|
| 547 |
tool_descriptions = "\n".join(tool_desc_list)
|
| 548 |
|
|
|
|
| 698 |
)
|
| 699 |
time.sleep(2 ** attempt)
|
| 700 |
|
| 701 |
+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
| 702 |
+
# --- FALLBACK LOGIC REMOVED AS REQUESTED ---
|
| 703 |
+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
| 704 |
+
|
| 705 |
if ai_message.tool_calls:
|
| 706 |
print(f"🔧 Executor Tool Call: {ai_message.tool_calls[0]['name']}")
|
| 707 |
else:
|
|
|
|
| 821 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
|
| 822 |
print(f"Agent code URL: {agent_code}")
|
| 823 |
print("--- USING MOCK QUESTIONS ---")
|
| 824 |
+
|
| 825 |
+
# --- MOCK QUESTIONS ---
|
| 826 |
+
#
|
| 827 |
+
# vvv PASTE YOUR FULL LIST OF 20 MOCK QUESTIONS HERE vvv
|
| 828 |
+
#
|
| 829 |
mock_questions_data = [
|
| 830 |
{
|
| 831 |
"task_id": "mock_level1_001",
|
|
|
|
| 908 |
"question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
|
| 909 |
}
|
| 910 |
]
|
| 911 |
+
|
| 912 |
questions_data = mock_questions_data
|
| 913 |
print(f"Using {len(questions_data)} mock questions.")
|
| 914 |
|
|
|
|
| 943 |
|
| 944 |
status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
|
| 945 |
print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
|
| 946 |
+
final_.status = "--- MOCK RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED."
|
| 947 |
results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
|
| 948 |
return final_status, results_df
|
| 949 |
|
|
|
|
| 963 |
|
| 964 |
if __name__ == "__main__":
|
| 965 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 966 |
+
space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID")
|
| 967 |
if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n Runtime URL: https://{space_host_startup}.hf.space")
|
| 968 |
else: print("ℹ️ No SPACE_HOST (local?).")
|
| 969 |
if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n Repo URL: https://huggingface.co/spaces/{space_id_startup}\n Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
|
| 977 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 978 |
print("Launching Gradio Interface...")
|
| 979 |
demo.queue().launch(debug=True, share=False)
|
| 980 |
+
|
| 981 |
+
|