Spaces:

jayashree
/

TatTwamAI

Sleeping

App Files Files Community

Jayashree Sridhar commited on Jun 12, 2025

Commit

7a4afbb

1 Parent(s): ba735c4

modified voice_tool

Browse files

Files changed (1) hide show

agents/tools/voice_tools.py +50 -135

agents/tools/voice_tools.py CHANGED Viewed

@@ -1,164 +1,79 @@
-# import os
-# import numpy as np
-# import torch
-# from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
-# import asyncio
-# import soundfile as sf
-# import tempfile  # Added the import for tempfile!
-# #from models.mistral_model import MistralModel
-# from models.tinygpt2_model import TinyGPT2Model
-# class MultilingualVoiceProcessor:
-#     def __init__(self, model_name="openai/whisper-base", device=None):
-#         cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
-#         if device is None:
-#             device = 0 if torch.cuda.is_available() else -1
-#         # Load model and processor with cache_dir
-#         processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
-#         model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)
-#         # Create the pipeline, DO NOT PASS cache_dir here
-#         self.pipe = pipeline(
-#             "automatic-speech-recognition",
-#             model=model,
-#             tokenizer=processor,
-#             feature_extractor=processor,
-#             device=device,
-#             generate_kwargs={"task": "transcribe", "return_timestamps": False},
-#         )
-#     async def transcribe(self, audio_data: np.ndarray, language: str = None):
-#         with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
-#             sf.write(tmp_wav.name, audio_data, samplerate=16000)
-#             extra = {"language": language} if language else {}
-#             result = self.pipe(tmp_wav.name, **extra)
-#         text = result['text']
-#         return text, language or "unknown"
-#     async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
-#         raise NotImplementedError("Use gTTS or edge-tts as before.")
-# class VoiceTools:
-#     def __init__(self, config=None):
-#         self.config = config
-#         self.vp = MultilingualVoiceProcessor()
-#     def transcribe_audio(self, audio_data: np.ndarray, language=None):
-#         text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
-#         return {"text": text, "language": detected_lang}
-#     def detect_emotion(self, text: str) -> dict:
-#         model = TinyGPT2Model()
-#         prompt = f"""
-#         Analyze the emotional state in this text: "{text}"
-#         Identify:
-#         1. Primary emotion (joy, sadness, anger, fear, anxiety, confusion, etc.)
-#         2. Emotional intensity (low, medium, high)
-#         3. Underlying feelings
-#         4. Key concerns
-#         Format as JSON with keys: primary_emotion, intensity, feelings, concerns
-#         """
-#         response = model.generate(prompt)
-#         # TODO: Actually parse response, dummy return for now:
-#         return {
-#             "primary_emotion": "detected_emotion",
-#             "intensity": "medium",
-#             "feelings": ["feeling1", "feeling2"],
-#             "concerns": ["concern1", "concern2"]
-#         }
-#     def generate_reflective_questions(self, context: dict) -> list:
-#         emotion = context.get("primary_emotion", "neutral")
-#         questions_map = {
-#             "anxiety": [
-#                 "What specific thoughts are creating this anxiety?",
-#                 "What would feeling calm look like in this situation?",
-#                 "What has helped you manage anxiety before?"
-#             ],
-#             "sadness": [
-#                 "What would comfort mean to you right now?",
-#                 "What are you grieving or missing?",
-#                 "How can you be gentle with yourself today?"
-#             ],
-#             "confusion": [
-#                 "What would clarity feel like?",
-#                 "What's the main question you're grappling with?",
-#                 "What does your intuition tell you?"
-#             ]
-#         }
-#         return questions_map.get(emotion, [
-#             "How are you feeling in this moment?",
-#             "What would support look like for you?",
-#             "What's most important to explore right now?"
-#         ])
 import numpy as np
 import asyncio
-from models.tinygpt2_model import TinyGPT2Model
 from .base_tool import BaseTool
-# Dummy MultilingualVoiceProcessor for context (real version can be plugged in)
 class MultilingualVoiceProcessor:
     async def transcribe(self, audio_data: np.ndarray, language: str = None):
-        # Simulate dummy STT result
-        return "Transcribed text.", language or "en"
-# --- Tool wrapper classes below ---
 class TranscribeAudioTool(BaseTool):
     def __init__(self, config=None):
-        super().__init__(config)
         self.vp = MultilingualVoiceProcessor()
     def __call__(self, audio_data: np.ndarray, language=None):
         text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
         return {"text": text, "language": detected_lang}
 class DetectEmotionTool(BaseTool):
     def __init__(self, config=None):
-        super().__init__(config)
     def __call__(self, text: str):
         model = TinyGPT2Model()
-        prompt = f"""
-        Analyze the emotional state in this text: "{text}"
-        Identify: 1. Primary emotion (joy, sadness, etc) 2. Intensity
-        3. Feelings 4. Concerns. Format as JSON.
-        """
-        # For a real implementation, parse the response!
         response = model.generate(prompt)
-        # Stub (replace with correct parsing logic)
-        return {
-            "primary_emotion": "detected_emotion",
-            "intensity": "medium",
-            "feelings": ["feeling1", "feeling2"],
-            "concerns": ["concern1", "concern2"]
-        }
 class GenerateReflectiveQuestionsTool(BaseTool):
     def __init__(self, config=None):
-        super().__init__(config)
     def __call__(self, context: dict):
         emotion = context.get("primary_emotion", "neutral")
         questions_map = {
-            "anxiety": [
-                "What specific thoughts are creating this anxiety?",
-                "What would feeling calm look like in this situation?",
-                "What has helped you manage anxiety before?"
-            ],
-            "sadness": [
-                "What would comfort mean to you right now?",
-                "What are you grieving or missing?",
-                "How can you be gentle with yourself today?"
-            ],
-            "confusion": [
-                "What would clarity feel like?",
-                "What's the main question you're grappling with?",
-                "What does your intuition tell you?"
-            ]
         }
         return questions_map.get(emotion, [
-            "How are you feeling in this moment?",
-            "What would support look like for you?",
-            "What's most important to explore right now?"
         ])
 class VoiceTools:

 import numpy as np
 import asyncio
 from .base_tool import BaseTool
+from models.tinygpt2_model import TinyGPT2Model
+from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
+import os
+import tempfile
+import soundfile as sf
 class MultilingualVoiceProcessor:
+    def __init__(self, model_name="openai/whisper-base", device=None):
+        cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
+        if device is None:
+            device = 0 if torch.cuda.is_available() else -1
+        # Load model and processor with cache_dir
+        processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)
+        # Create the pipeline, DO NOT PASS cache_dir here
+        self.pipe = pipeline(
+            "automatic-speech-recognition",
+            model=model,
+            tokenizer=processor,
+            feature_extractor=processor,
+            device=device,
+            generate_kwargs={"task": "transcribe", "return_timestamps": False},
+        )
     async def transcribe(self, audio_data: np.ndarray, language: str = None):
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
+            sf.write(tmp_wav.name, audio_data, samplerate=16000)
+            extra = {"language": language} if language else {}
+            result = self.pipe(tmp_wav.name, **extra)
+        text = result['text']
+        return text, language or "unknown"
+    async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
+        raise NotImplementedError("Use gTTS or edge-tts as before.")
 class TranscribeAudioTool(BaseTool):
+    name: str = "transcribe_audio"
+    description: str = "Transcribe audio to text and detect language."
     def __init__(self, config=None):
+        super().__init__()
         self.vp = MultilingualVoiceProcessor()
     def __call__(self, audio_data: np.ndarray, language=None):
         text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
         return {"text": text, "language": detected_lang}
 class DetectEmotionTool(BaseTool):
+    name: str = "detect_emotion"
+    description: str = "Detect the emotional state from text."
     def __init__(self, config=None):
+        super().__init__()
     def __call__(self, text: str):
         model = TinyGPT2Model()
+        prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
         response = model.generate(prompt)
+        return {"primary_emotion": "detected_emotion",
+                "intensity": "medium",
+                "feelings": ["feeling1"],
+                "concerns": ["concern1"]}
 class GenerateReflectiveQuestionsTool(BaseTool):
+    name: str = "generate_reflective_questions"
+    description: str = "Generate reflective questions."
     def __init__(self, config=None):
+        super().__init__()
     def __call__(self, context: dict):
         emotion = context.get("primary_emotion", "neutral")
         questions_map = {
+            "anxiety": ["What triggers your anxiety?", "How do you cope?"],
+            "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
         }
         return questions_map.get(emotion, [
+            "How are you feeling?",
+            "What feels important now?"
         ])
 class VoiceTools: