Final_Assignment_Template

Sleeping

App Files Files Community

dlaima commited on Jun 3, 2025

Commit

1381703

verified ·

1 Parent(s): add03b8

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -51

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-from smolagents import ToolCallingAgent, OpenAIClientModel
 from audio_transcriber import AudioTranscriptionTool
 from image_analyzer import ImageAnalysisTool
 from wikipedia_searcher import WikipediaSearcher
@@ -18,7 +19,13 @@ SYSTEM_PROMPT = (
     "3. For dates, use the exact requested format.\n"
     "4. For numbers, use only the number.\n"
     "5. For names, use the exact name from sources.\n"
-    "6. If the question has a file, download it using the task ID and process it.\n"
     "Never say 'the answer is...'. Only return the answer.\n"
 )
@@ -26,68 +33,31 @@ class GaiaAgent:
     def __init__(self):
         print("Gaia Agent Initialized")
-        openai_api_key = os.getenv("OPENAI_API_KEY")
-        if not openai_api_key:
-            raise EnvironmentError("OPENAI_API_KEY not found in environment variables.")
-        self.model = OpenAIClientModel(
             model_name="gpt-3.5-turbo",
-            api_key=openai_api_key
         )
         self.tools = [
             AudioTranscriptionTool(),
             ImageAnalysisTool(),
             WikipediaSearcher()
         ]
         self.agent = ToolCallingAgent(
             tools=self.tools,
             model=self.model
         )
-    def download_file(self, task_id: str, file_extension: str) -> str:
-        file_url = f"{DEFAULT_API_URL}/files/{task_id}.{file_extension}"
-        local_filename = f"temp_{task_id}.{file_extension}"
-        try:
-            r = requests.get(file_url, timeout=30)
-            r.raise_for_status()
-            with open(local_filename, "wb") as f:
-                f.write(r.content)
-            return local_filename
-        except Exception as e:
-            print(f"Error downloading file for task {task_id}: {e}")
-            return ""
-    def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # If there's a file related to the question, download it and prepare tool input
-        tool_inputs = {}
-        if task_id and file_name:
-            ext = file_name.split(".")[-1].lower()
-            local_path = self.download_file(task_id, ext)
-            if local_path:
-                if ext in ["mp3", "wav"]:
-                    tool_inputs = {"file_path": local_path}
-                    question = f"Transcribe the audio file."
-                elif ext in ["jpg", "jpeg", "png"]:
-                    tool_inputs = {"image_path": local_path, "question": question}
-                else:
-                    print(f"Unsupported file extension: {ext}")
         full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}"
         try:
-            # If there's a file to process, call the tool with inputs
-            if tool_inputs:
-                for tool in self.tools:
-                    if all(k in tool.inputs for k in tool_inputs.keys()):
-                        result = tool.forward(**tool_inputs)
-                        return result.strip()
-            # Otherwise, just call the agent with the prompt
             result = self.agent.run(full_prompt)
             print(f"Raw result from agent: {result}")
@@ -144,13 +114,33 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question", "")
-        file_name = item.get("file_name")  # file_name may or may not be present
         if not task_id:
             continue
         try:
-            submitted_answer = agent(question_text, task_id=task_id, file_name=file_name)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
@@ -165,6 +155,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 "Submitted Answer": error_msg
             })
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
@@ -215,7 +212,7 @@ with gr.Blocks() as demo:
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, inputs=[gr.get_last_logged_in_user()], outputs=[status_output, results_table])
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
@@ -239,4 +236,3 @@ if __name__ == "__main__":

 import gradio as gr
 import requests
 import pandas as pd
+from smolagents import ToolCallingAgent, OpenAIServerModel
 from audio_transcriber import AudioTranscriptionTool
 from image_analyzer import ImageAnalysisTool
 from wikipedia_searcher import WikipediaSearcher
     "3. For dates, use the exact requested format.\n"
     "4. For numbers, use only the number.\n"
     "5. For names, use the exact name from sources.\n"
+    "6. If the question has a file, download it using the task ID.\n"
+    "Examples:\n"
+    "- '42'\n"
+    "- 'Arturo Nunez'\n"
+    "- 'Yes'\n"
+    "- 'October 5, 2001'\n"
+    "- 'Buenos Aires'\n"
     "Never say 'the answer is...'. Only return the answer.\n"
 )
     def __init__(self):
         print("Gaia Agent Initialized")
+        # Initialize the OpenAI GPT-3.5-turbo model via smolagents OpenAIServerModel
+        self.model = OpenAIServerModel(
             model_name="gpt-3.5-turbo",
+            api_key=os.getenv("OPENAI_API_KEY")  # Make sure you set this in your environment
         )
+        # Initialize the tools
         self.tools = [
             AudioTranscriptionTool(),
             ImageAnalysisTool(),
             WikipediaSearcher()
         ]
+        # Create the agent with tools and model
         self.agent = ToolCallingAgent(
             tools=self.tools,
             model=self.model
         )
+    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}"
         try:
             result = self.agent.run(full_prompt)
             print(f"Raw result from agent: {result}")
     for item in questions_data:
         task_id = item.get("task_id")
         if not task_id:
             continue
+        question_text = item.get("question", "")
+        # Download associated file if any (mp3 or jpeg) according to GAIA benchmark task
+        file_url = item.get("file_url")
+        local_file_path = None
+        if file_url:
+            try:
+                ext = file_url.split(".")[-1].lower()
+                if ext in ["mp3", "wav", "jpeg", "jpg", "png"]:
+                    local_file_path = f"./temp_{task_id}.{ext}"
+                    with requests.get(file_url, stream=True) as r:
+                        r.raise_for_status()
+                        with open(local_file_path, "wb") as f:
+                            for chunk in r.iter_content(chunk_size=8192):
+                                f.write(chunk)
+                    print(f"Downloaded file for task {task_id} to {local_file_path}")
+                    # Append info about the file path to the question so the agent knows to use it
+                    question_text += f"\n\nFile path: {local_file_path}"
+            except Exception as e:
+                print(f"Failed to download file for task {task_id}: {e}")
         try:
+            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
                 "Submitted Answer": error_msg
             })
+        # Cleanup downloaded file
+        if local_file_path:
+            try:
+                os.remove(local_file_path)
+            except Exception:
+                pass
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)