Final_Assignment_Template

Sleeping

App Files Files Community

dlaima commited on Jun 3, 2025

Commit

62ad750

verified ·

1 Parent(s): e0e7440

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -42

app.py CHANGED Viewed

@@ -3,10 +3,7 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-import torch
-from transformers import BartTokenizer, BartForConditionalGeneration
-from smolagents import ToolCallingAgent
 from audio_transcriber import AudioTranscriptionTool
 from image_analyzer import ImageAnalysisTool
 from wikipedia_searcher import WikipediaSearcher
@@ -21,45 +18,22 @@ SYSTEM_PROMPT = (
     "3. For dates, use the exact requested format.\n"
     "4. For numbers, use only the number.\n"
     "5. For names, use the exact name from sources.\n"
-    "6. If the question has a file, download it using the task ID.\n"
-    "Examples:\n"
-    "- '42'\n"
-    "- 'Arturo Nunez'\n"
-    "- 'Yes'\n"
-    "- 'October 5, 2001'\n"
-    "- 'Buenos Aires'\n"
     "Never say 'the answer is...'. Only return the answer.\n"
 )
-class LocalBartModel:
-    def __init__(self):
-        self.tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
-        self.model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
-        self.model.eval()
-    def __call__(self, prompt: str) -> str:
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-        with torch.no_grad():
-            outputs = self.model.generate(
-                input_ids=inputs["input_ids"],
-                attention_mask=inputs["attention_mask"],
-                max_length=100,
-                num_beams=5,
-                early_stopping=True,
-            )
-        return self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-    def generate(self, *args, **kwargs):
-        # Forward generate calls for ToolCallingAgent compatibility
-        return self.model.generate(*args, **kwargs)
 class GaiaAgent:
     def __init__(self):
         print("Gaia Agent Initialized")
-        self.model = LocalBartModel()
         self.tools = [
             AudioTranscriptionTool(),
@@ -72,12 +46,48 @@ class GaiaAgent:
             model=self.model
         )
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}"
         try:
             result = self.agent.run(full_prompt)
             print(f"Raw result from agent: {result}")
@@ -86,7 +96,6 @@ class GaiaAgent:
             elif isinstance(result, str):
                 return result.strip()
             elif isinstance(result, list):
-                # Find assistant content if possible
                 for item in reversed(result):
                     if isinstance(item, dict) and item.get("role") == "assistant" and "content" in item:
                         return item["content"].strip()
@@ -135,21 +144,24 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         if not task_id:
             continue
         try:
-            submitted_answer = agent(item.get("question", ""))
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
-                "Question": item.get("question", ""),
                 "Submitted Answer": submitted_answer
             })
         except Exception as e:
             error_msg = f"AGENT ERROR: {e}"
             results_log.append({
                 "Task ID": task_id,
-                "Question": item.get("question", ""),
                 "Submitted Answer": error_msg
             })
@@ -203,7 +215,7 @@ with gr.Blocks() as demo:
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)

 import gradio as gr
 import requests
 import pandas as pd
+from smolagents import ToolCallingAgent, OpenAIClientModel
 from audio_transcriber import AudioTranscriptionTool
 from image_analyzer import ImageAnalysisTool
 from wikipedia_searcher import WikipediaSearcher
     "3. For dates, use the exact requested format.\n"
     "4. For numbers, use only the number.\n"
     "5. For names, use the exact name from sources.\n"
+    "6. If the question has a file, download it using the task ID and process it.\n"
     "Never say 'the answer is...'. Only return the answer.\n"
 )
 class GaiaAgent:
     def __init__(self):
         print("Gaia Agent Initialized")
+        openai_api_key = os.getenv("OPENAI_API_KEY")
+        if not openai_api_key:
+            raise EnvironmentError("OPENAI_API_KEY not found in environment variables.")
+        self.model = OpenAIClientModel(
+            model_name="gpt-3.5-turbo",
+            api_key=openai_api_key
+        )
         self.tools = [
             AudioTranscriptionTool(),
             model=self.model
         )
+    def download_file(self, task_id: str, file_extension: str) -> str:
+        file_url = f"{DEFAULT_API_URL}/files/{task_id}.{file_extension}"
+        local_filename = f"temp_{task_id}.{file_extension}"
+        try:
+            r = requests.get(file_url, timeout=30)
+            r.raise_for_status()
+            with open(local_filename, "wb") as f:
+                f.write(r.content)
+            return local_filename
+        except Exception as e:
+            print(f"Error downloading file for task {task_id}: {e}")
+            return ""
+    def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # If there's a file related to the question, download it and prepare tool input
+        tool_inputs = {}
+        if task_id and file_name:
+            ext = file_name.split(".")[-1].lower()
+            local_path = self.download_file(task_id, ext)
+            if local_path:
+                if ext in ["mp3", "wav"]:
+                    tool_inputs = {"file_path": local_path}
+                    question = f"Transcribe the audio file."
+                elif ext in ["jpg", "jpeg", "png"]:
+                    tool_inputs = {"image_path": local_path, "question": question}
+                else:
+                    print(f"Unsupported file extension: {ext}")
         full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}"
         try:
+            # If there's a file to process, call the tool with inputs
+            if tool_inputs:
+                for tool in self.tools:
+                    if all(k in tool.inputs for k in tool_inputs.keys()):
+                        result = tool.forward(**tool_inputs)
+                        return result.strip()
+            # Otherwise, just call the agent with the prompt
             result = self.agent.run(full_prompt)
             print(f"Raw result from agent: {result}")
             elif isinstance(result, str):
                 return result.strip()
             elif isinstance(result, list):
                 for item in reversed(result):
                     if isinstance(item, dict) and item.get("role") == "assistant" and "content" in item:
                         return item["content"].strip()
     for item in questions_data:
         task_id = item.get("task_id")
+        question_text = item.get("question", "")
+        file_name = item.get("file_name")  # file_name may or may not be present
         if not task_id:
             continue
         try:
+            submitted_answer = agent(question_text, task_id=task_id, file_name=file_name)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text,
                 "Submitted Answer": submitted_answer
             })
         except Exception as e:
             error_msg = f"AGENT ERROR: {e}"
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text,
                 "Submitted Answer": error_msg
             })
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, inputs=[gr.get_last_logged_in_user()], outputs=[status_output, results_table])
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)