TatTwamAI / agents /tools /voice_tools_old.py
Jayashree Sridhar
First Version
20d720d
raw
history blame
6.34 kB
"""
Multilingual Voice Processing Tools
STT and TTS with language support
"""
import whisper
import numpy as np
from gtts import gTTS
import edge_tts
import io
import asyncio
from typing import Tuple, Optional
from crewai.tools import BaseTool
import speech_recognition as sr
class MultilingualVoiceProcessor:
"""Handles multilingual STT and TTS"""
def __init__(self):
# Load Whisper model for multilingual STT
self.whisper_model = whisper.load_model("base")
# Language voice mappings for Edge TTS
self.voice_map = {
"en": "en-US-AriaNeural",
"es": "es-ES-ElviraNeural",
"fr": "fr-FR-DeniseNeural",
"de": "de-DE-KatjaNeural",
"it": "it-IT-ElsaNeural",
"pt": "pt-BR-FranciscaNeural",
"hi": "hi-IN-SwaraNeural",
"zh": "zh-CN-XiaoxiaoNeural",
"ja": "ja-JP-NanamiNeural",
"ko": "ko-KR-SunHiNeural",
"ar": "ar-SA-ZariyahNeural",
"ru": "ru-RU-SvetlanaNeural"
}
async def transcribe(
self,
audio_data: np.ndarray,
language: Optional[str] = None
) -> Tuple[str, str]:
"""Transcribe audio to text with language detection"""
try:
# Process audio
if isinstance(audio_data, tuple):
sample_rate, audio = audio_data
else:
audio = audio_data
sample_rate = 16000
# Normalize audio
if audio.dtype != np.float32:
audio = audio.astype(np.float32) / 32768.0
# Transcribe with Whisper
if language and language != "auto":
result = self.whisper_model.transcribe(
audio,
language=language
)
else:
# Auto-detect language
result = self.whisper_model.transcribe(audio)
text = result["text"]
detected_language = result["language"]
return text, detected_language
except Exception as e:
print(f"Transcription error: {e}")
return "Could not transcribe audio", "en"
async def synthesize(
self,
text: str,
language: str = "en",
voice_type: str = "normal"
) -> bytes:
"""Convert text to speech with voice modulation"""
try:
voice = self.voice_map.get(language, "en-US-AriaNeural")
# Apply voice settings for meditation tone
if voice_type == "meditation":
rate = "-15%" # Slower
pitch = "-50Hz" # Lower pitch
else:
rate = "+0%"
pitch = "+0Hz"
# Generate speech
communicate = edge_tts.Communicate(
text,
voice,
rate=rate,
pitch=pitch
)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
return audio_data
except Exception as e:
print(f"TTS error: {e}")
# Fallback to gTTS
try:
tts = gTTS(text=text, lang=language[:2])
fp = io.BytesIO()
tts.write_to_fp(fp)
return fp.getvalue()
except:
return None
class TranscribeTool(BaseTool):
name: str = "transcribe_audio"
description: str = "Transcribe audio input to text with language detection"
def _run(self, audio_data: np.ndarray, language: str = None) -> dict:
processor = MultilingualVoiceProcessor()
text, detected_lang = asyncio.run(
processor.transcribe(audio_data, language)
)
return {
"text": text,
"language": detected_lang
}
class DetectEmotionTool(BaseTool):
name: str = "detect_emotion"
description: str = "Detect emotional state from text using Mistral"
def _run(self, text: str) -> dict:
# Use Mistral for emotion detection
from models.mistral_model import MistralModel
model = MistralModel()
prompt = f"""
Analyze the emotional state in this text: "{text}"
Identify:
1. Primary emotion (joy, sadness, anger, fear, anxiety, confusion, etc.)
2. Emotional intensity (low, medium, high)
3. Underlying feelings
4. Key concerns
Format as JSON with keys: primary_emotion, intensity, feelings, concerns
"""
response = model.generate(prompt)
# Parse response (simplified)
return {
"primary_emotion": "detected_emotion",
"intensity": "medium",
"feelings": ["feeling1", "feeling2"],
"concerns": ["concern1", "concern2"]
}
class GenerateQuestionsTool(BaseTool):
name: str = "generate_reflective_questions"
description: str = "Generate empathetic reflective questions"
def _run(self, context: dict) -> list:
emotion = context.get("primary_emotion", "neutral")
questions_map = {
"anxiety": [
"What specific thoughts are creating this anxiety?",
"What would feeling calm look like in this situation?",
"What has helped you manage anxiety before?"
],
"sadness": [
"What would comfort mean to you right now?",
"What are you grieving or missing?",
"How can you be gentle with yourself today?"
],
"confusion": [
"What would clarity feel like?",
"What's the main question you're grappling with?",
"What does your intuition tell you?"
]
}
return questions_map.get(emotion, [
"How are you feeling in this moment?",
"What would support look like for you?",
"What's most important to explore right now?"
])