Spaces:

jayashree
/

TatTwamAI

Sleeping

TatTwamAI / agents /tools /voice_tools_old.py

Jayashree Sridhar

First Version

20d720d 6 months ago

6.34 kB

	"""
	Multilingual Voice Processing Tools
	STT and TTS with language support
	"""

	import whisper
	import numpy as np
	from gtts import gTTS
	import edge_tts
	import io
	import asyncio
	from typing import Tuple, Optional
	from crewai.tools import BaseTool
	import speech_recognition as sr

	class MultilingualVoiceProcessor:
	"""Handles multilingual STT and TTS"""

	def __init__(self):
	# Load Whisper model for multilingual STT
	self.whisper_model = whisper.load_model("base")

	# Language voice mappings for Edge TTS
	self.voice_map = {
	"en": "en-US-AriaNeural",
	"es": "es-ES-ElviraNeural",
	"fr": "fr-FR-DeniseNeural",
	"de": "de-DE-KatjaNeural",
	"it": "it-IT-ElsaNeural",
	"pt": "pt-BR-FranciscaNeural",
	"hi": "hi-IN-SwaraNeural",
	"zh": "zh-CN-XiaoxiaoNeural",
	"ja": "ja-JP-NanamiNeural",
	"ko": "ko-KR-SunHiNeural",
	"ar": "ar-SA-ZariyahNeural",
	"ru": "ru-RU-SvetlanaNeural"
	}

	async def transcribe(
	self,
	audio_data: np.ndarray,
	language: Optional[str] = None
	) -> Tuple[str, str]:
	"""Transcribe audio to text with language detection"""
	try:
	# Process audio
	if isinstance(audio_data, tuple):
	sample_rate, audio = audio_data
	else:
	audio = audio_data
	sample_rate = 16000

	# Normalize audio
	if audio.dtype != np.float32:
	audio = audio.astype(np.float32) / 32768.0

	# Transcribe with Whisper
	if language and language != "auto":
	result = self.whisper_model.transcribe(
	audio,
	language=language
	)
	else:
	# Auto-detect language
	result = self.whisper_model.transcribe(audio)

	text = result["text"]
	detected_language = result["language"]

	return text, detected_language

	except Exception as e:
	print(f"Transcription error: {e}")
	return "Could not transcribe audio", "en"

	async def synthesize(
	self,
	text: str,
	language: str = "en",
	voice_type: str = "normal"
	) -> bytes:
	"""Convert text to speech with voice modulation"""
	try:
	voice = self.voice_map.get(language, "en-US-AriaNeural")

	# Apply voice settings for meditation tone
	if voice_type == "meditation":
	rate = "-15%" # Slower
	pitch = "-50Hz" # Lower pitch
	else:
	rate = "+0%"
	pitch = "+0Hz"

	# Generate speech
	communicate = edge_tts.Communicate(
	text,
	voice,
	rate=rate,
	pitch=pitch
	)

	audio_data = b""
	async for chunk in communicate.stream():
	if chunk["type"] == "audio":
	audio_data += chunk["data"]

	return audio_data

	except Exception as e:
	print(f"TTS error: {e}")
	# Fallback to gTTS
	try:
	tts = gTTS(text=text, lang=language[:2])
	fp = io.BytesIO()
	tts.write_to_fp(fp)
	return fp.getvalue()
	except:
	return None

	class TranscribeTool(BaseTool):
	name: str = "transcribe_audio"
	description: str = "Transcribe audio input to text with language detection"

	def _run(self, audio_data: np.ndarray, language: str = None) -> dict:
	processor = MultilingualVoiceProcessor()
	text, detected_lang = asyncio.run(
	processor.transcribe(audio_data, language)
	)
	return {
	"text": text,
	"language": detected_lang
	}

	class DetectEmotionTool(BaseTool):
	name: str = "detect_emotion"
	description: str = "Detect emotional state from text using Mistral"

	def _run(self, text: str) -> dict:
	# Use Mistral for emotion detection
	from models.mistral_model import MistralModel
	model = MistralModel()

	prompt = f"""
	Analyze the emotional state in this text: "{text}"

	Identify:
	1. Primary emotion (joy, sadness, anger, fear, anxiety, confusion, etc.)
	2. Emotional intensity (low, medium, high)
	3. Underlying feelings
	4. Key concerns

	Format as JSON with keys: primary_emotion, intensity, feelings, concerns
	"""

	response = model.generate(prompt)

	# Parse response (simplified)
	return {
	"primary_emotion": "detected_emotion",
	"intensity": "medium",
	"feelings": ["feeling1", "feeling2"],
	"concerns": ["concern1", "concern2"]
	}

	class GenerateQuestionsTool(BaseTool):
	name: str = "generate_reflective_questions"
	description: str = "Generate empathetic reflective questions"

	def _run(self, context: dict) -> list:
	emotion = context.get("primary_emotion", "neutral")

	questions_map = {
	"anxiety": [
	"What specific thoughts are creating this anxiety?",
	"What would feeling calm look like in this situation?",
	"What has helped you manage anxiety before?"
	],
	"sadness": [
	"What would comfort mean to you right now?",
	"What are you grieving or missing?",
	"How can you be gentle with yourself today?"
	],
	"confusion": [
	"What would clarity feel like?",
	"What's the main question you're grappling with?",
	"What does your intuition tell you?"
	]
	}

	return questions_map.get(emotion, [
	"How are you feeling in this moment?",
	"What would support look like for you?",
	"What's most important to explore right now?"
	])