Final_Assignment_Template

Sleeping

App Files Files Community

Freddolin commited on Jul 8, 2025

Commit

e258602

verified ·

1 Parent(s): dad31a4

Update agent.py

Browse files

Files changed (1) hide show

agent.py +23 -10

agent.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from ddgs import DDGS
-import re
 import pandas as pd
-import tempfile
 import os
-import whisper
 SYSTEM_PROMPT = """
 You are a helpful AI assistant. Think step by step to solve the problem. If the question requires reasoning, perform it. If it refers to a search or file, use the result provided. At the end, return ONLY the final answer string. No explanations.
@@ -17,7 +20,19 @@ class GaiaAgent:
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
-        self.transcriber = whisper.load_model("base")
     def search(self, query: str) -> str:
         try:
@@ -25,13 +40,13 @@ class GaiaAgent:
                 results = list(ddgs.text(query, safesearch="off"))
                 if results:
                     return results[0]['body']
-        except Exception as e:
             return ""
         return ""
     def transcribe_audio(self, file_path: str) -> str:
         try:
-            result = self.transcriber.transcribe(file_path)
             return result['text']
         except Exception:
             return ""
@@ -52,7 +67,7 @@ class GaiaAgent:
             context = ""
             if files:
                 for filename, filepath in files.items():
-                    if filename.endswith(".mp3"):
                         context = self.transcribe_audio(filepath)
                         break
                     elif filename.endswith(".xlsx"):
@@ -67,7 +82,6 @@ class GaiaAgent:
                 **inputs,
                 max_new_tokens=128,
                 do_sample=False,
-                temperature=0.0,
                 pad_token_id=self.tokenizer.pad_token_id
             )
             output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -75,5 +89,4 @@ class GaiaAgent:
             return final, final
         except Exception as e:
             return "ERROR", f"Agent failed: {e}"

 import torch
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    pipeline,
+    AutoProcessor,
+    AutoModelForSpeechSeq2Seq
+)
 from ddgs import DDGS
 import pandas as pd
 import os
 SYSTEM_PROMPT = """
 You are a helpful AI assistant. Think step by step to solve the problem. If the question requires reasoning, perform it. If it refers to a search or file, use the result provided. At the end, return ONLY the final answer string. No explanations.
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
+        # Whisper via HF
+        self.asr_model_id = "openai/whisper-small"
+        self.asr_processor = AutoProcessor.from_pretrained(self.asr_model_id)
+        self.asr_model = AutoModelForSpeechSeq2Seq.from_pretrained(self.asr_model_id).to(self.device)
+        self.pipe = pipeline(
+            "automatic-speech-recognition",
+            model=self.asr_model,
+            tokenizer=self.asr_processor.tokenizer,
+            feature_extractor=self.asr_processor.feature_extractor,
+            return_timestamps=False,
+            device=0 if torch.cuda.is_available() else -1
+        )
     def search(self, query: str) -> str:
         try:
                 results = list(ddgs.text(query, safesearch="off"))
                 if results:
                     return results[0]['body']
+        except Exception:
             return ""
         return ""
     def transcribe_audio(self, file_path: str) -> str:
         try:
+            result = self.pipe(file_path)
             return result['text']
         except Exception:
             return ""
             context = ""
             if files:
                 for filename, filepath in files.items():
+                    if filename.endswith(".mp3") or filename.endswith(".wav"):
                         context = self.transcribe_audio(filepath)
                         break
                     elif filename.endswith(".xlsx"):
                 **inputs,
                 max_new_tokens=128,
                 do_sample=False,
                 pad_token_id=self.tokenizer.pad_token_id
             )
             output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             return final, final
         except Exception as e:
             return "ERROR", f"Agent failed: {e}"