Jayashree Sridhar commited on
Commit
7a4afbb
·
1 Parent(s): ba735c4

modified voice_tool

Browse files
Files changed (1) hide show
  1. agents/tools/voice_tools.py +50 -135
agents/tools/voice_tools.py CHANGED
@@ -1,164 +1,79 @@
1
- # import os
2
- # import numpy as np
3
- # import torch
4
- # from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
5
- # import asyncio
6
- # import soundfile as sf
7
- # import tempfile # Added the import for tempfile!
8
- # #from models.mistral_model import MistralModel
9
- # from models.tinygpt2_model import TinyGPT2Model
10
-
11
- # class MultilingualVoiceProcessor:
12
- # def __init__(self, model_name="openai/whisper-base", device=None):
13
- # cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
14
- # if device is None:
15
- # device = 0 if torch.cuda.is_available() else -1
16
-
17
- # # Load model and processor with cache_dir
18
- # processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
19
- # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)
20
-
21
- # # Create the pipeline, DO NOT PASS cache_dir here
22
- # self.pipe = pipeline(
23
- # "automatic-speech-recognition",
24
- # model=model,
25
- # tokenizer=processor,
26
- # feature_extractor=processor,
27
- # device=device,
28
- # generate_kwargs={"task": "transcribe", "return_timestamps": False},
29
- # )
30
-
31
- # async def transcribe(self, audio_data: np.ndarray, language: str = None):
32
- # with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
33
- # sf.write(tmp_wav.name, audio_data, samplerate=16000)
34
- # extra = {"language": language} if language else {}
35
- # result = self.pipe(tmp_wav.name, **extra)
36
- # text = result['text']
37
- # return text, language or "unknown"
38
-
39
- # async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
40
- # raise NotImplementedError("Use gTTS or edge-tts as before.")
41
-
42
- # class VoiceTools:
43
- # def __init__(self, config=None):
44
- # self.config = config
45
- # self.vp = MultilingualVoiceProcessor()
46
-
47
- # def transcribe_audio(self, audio_data: np.ndarray, language=None):
48
- # text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
49
- # return {"text": text, "language": detected_lang}
50
-
51
- # def detect_emotion(self, text: str) -> dict:
52
- # model = TinyGPT2Model()
53
- # prompt = f"""
54
- # Analyze the emotional state in this text: "{text}"
55
- # Identify:
56
- # 1. Primary emotion (joy, sadness, anger, fear, anxiety, confusion, etc.)
57
- # 2. Emotional intensity (low, medium, high)
58
- # 3. Underlying feelings
59
- # 4. Key concerns
60
- # Format as JSON with keys: primary_emotion, intensity, feelings, concerns
61
- # """
62
- # response = model.generate(prompt)
63
- # # TODO: Actually parse response, dummy return for now:
64
- # return {
65
- # "primary_emotion": "detected_emotion",
66
- # "intensity": "medium",
67
- # "feelings": ["feeling1", "feeling2"],
68
- # "concerns": ["concern1", "concern2"]
69
- # }
70
-
71
- # def generate_reflective_questions(self, context: dict) -> list:
72
- # emotion = context.get("primary_emotion", "neutral")
73
- # questions_map = {
74
- # "anxiety": [
75
- # "What specific thoughts are creating this anxiety?",
76
- # "What would feeling calm look like in this situation?",
77
- # "What has helped you manage anxiety before?"
78
- # ],
79
- # "sadness": [
80
- # "What would comfort mean to you right now?",
81
- # "What are you grieving or missing?",
82
- # "How can you be gentle with yourself today?"
83
- # ],
84
- # "confusion": [
85
- # "What would clarity feel like?",
86
- # "What's the main question you're grappling with?",
87
- # "What does your intuition tell you?"
88
- # ]
89
- # }
90
- # return questions_map.get(emotion, [
91
- # "How are you feeling in this moment?",
92
- # "What would support look like for you?",
93
- # "What's most important to explore right now?"
94
- # ])
95
-
96
  import numpy as np
97
  import asyncio
98
- from models.tinygpt2_model import TinyGPT2Model
99
  from .base_tool import BaseTool
100
-
101
- # Dummy MultilingualVoiceProcessor for context (real version can be plugged in)
 
 
 
102
  class MultilingualVoiceProcessor:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  async def transcribe(self, audio_data: np.ndarray, language: str = None):
104
- # Simulate dummy STT result
105
- return "Transcribed text.", language or "en"
 
 
 
 
106
 
107
- # --- Tool wrapper classes below ---
 
108
  class TranscribeAudioTool(BaseTool):
 
 
109
  def __init__(self, config=None):
110
- super().__init__(config)
111
  self.vp = MultilingualVoiceProcessor()
112
  def __call__(self, audio_data: np.ndarray, language=None):
113
  text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
114
  return {"text": text, "language": detected_lang}
115
 
116
  class DetectEmotionTool(BaseTool):
 
 
117
  def __init__(self, config=None):
118
- super().__init__(config)
119
  def __call__(self, text: str):
120
  model = TinyGPT2Model()
121
- prompt = f"""
122
- Analyze the emotional state in this text: "{text}"
123
- Identify: 1. Primary emotion (joy, sadness, etc) 2. Intensity
124
- 3. Feelings 4. Concerns. Format as JSON.
125
- """
126
- # For a real implementation, parse the response!
127
  response = model.generate(prompt)
128
- # Stub (replace with correct parsing logic)
129
- return {
130
- "primary_emotion": "detected_emotion",
131
- "intensity": "medium",
132
- "feelings": ["feeling1", "feeling2"],
133
- "concerns": ["concern1", "concern2"]
134
- }
135
 
136
  class GenerateReflectiveQuestionsTool(BaseTool):
 
 
137
  def __init__(self, config=None):
138
- super().__init__(config)
139
  def __call__(self, context: dict):
140
  emotion = context.get("primary_emotion", "neutral")
141
  questions_map = {
142
- "anxiety": [
143
- "What specific thoughts are creating this anxiety?",
144
- "What would feeling calm look like in this situation?",
145
- "What has helped you manage anxiety before?"
146
- ],
147
- "sadness": [
148
- "What would comfort mean to you right now?",
149
- "What are you grieving or missing?",
150
- "How can you be gentle with yourself today?"
151
- ],
152
- "confusion": [
153
- "What would clarity feel like?",
154
- "What's the main question you're grappling with?",
155
- "What does your intuition tell you?"
156
- ]
157
  }
158
  return questions_map.get(emotion, [
159
- "How are you feeling in this moment?",
160
- "What would support look like for you?",
161
- "What's most important to explore right now?"
162
  ])
163
 
164
  class VoiceTools:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
  import asyncio
 
3
  from .base_tool import BaseTool
4
+ from models.tinygpt2_model import TinyGPT2Model
5
+ from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
6
+ import os
7
+ import tempfile
8
+ import soundfile as sf
9
  class MultilingualVoiceProcessor:
10
+ def __init__(self, model_name="openai/whisper-base", device=None):
11
+ cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
12
+ if device is None:
13
+ device = 0 if torch.cuda.is_available() else -1
14
+
15
+ # Load model and processor with cache_dir
16
+ processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
17
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)
18
+
19
+ # Create the pipeline, DO NOT PASS cache_dir here
20
+ self.pipe = pipeline(
21
+ "automatic-speech-recognition",
22
+ model=model,
23
+ tokenizer=processor,
24
+ feature_extractor=processor,
25
+ device=device,
26
+ generate_kwargs={"task": "transcribe", "return_timestamps": False},
27
+ )
28
+
29
  async def transcribe(self, audio_data: np.ndarray, language: str = None):
30
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
31
+ sf.write(tmp_wav.name, audio_data, samplerate=16000)
32
+ extra = {"language": language} if language else {}
33
+ result = self.pipe(tmp_wav.name, **extra)
34
+ text = result['text']
35
+ return text, language or "unknown"
36
 
37
+ async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
38
+ raise NotImplementedError("Use gTTS or edge-tts as before.")
39
  class TranscribeAudioTool(BaseTool):
40
+ name: str = "transcribe_audio"
41
+ description: str = "Transcribe audio to text and detect language."
42
  def __init__(self, config=None):
43
+ super().__init__()
44
  self.vp = MultilingualVoiceProcessor()
45
  def __call__(self, audio_data: np.ndarray, language=None):
46
  text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
47
  return {"text": text, "language": detected_lang}
48
 
49
  class DetectEmotionTool(BaseTool):
50
+ name: str = "detect_emotion"
51
+ description: str = "Detect the emotional state from text."
52
  def __init__(self, config=None):
53
+ super().__init__()
54
  def __call__(self, text: str):
55
  model = TinyGPT2Model()
56
+ prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
 
 
 
 
 
57
  response = model.generate(prompt)
58
+ return {"primary_emotion": "detected_emotion",
59
+ "intensity": "medium",
60
+ "feelings": ["feeling1"],
61
+ "concerns": ["concern1"]}
 
 
 
62
 
63
  class GenerateReflectiveQuestionsTool(BaseTool):
64
+ name: str = "generate_reflective_questions"
65
+ description: str = "Generate reflective questions."
66
  def __init__(self, config=None):
67
+ super().__init__()
68
  def __call__(self, context: dict):
69
  emotion = context.get("primary_emotion", "neutral")
70
  questions_map = {
71
+ "anxiety": ["What triggers your anxiety?", "How do you cope?"],
72
+ "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
  return questions_map.get(emotion, [
75
+ "How are you feeling?",
76
+ "What feels important now?"
 
77
  ])
78
 
79
  class VoiceTools: