Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,10 +25,15 @@ speaker_embeddings = torch.randn(1, 512)
|
|
| 25 |
|
| 26 |
def voice_assistant(audio):
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
response = llm(
|
| 33 |
speech_text,
|
| 34 |
max_new_tokens=60
|
|
@@ -42,8 +47,7 @@ def voice_assistant(audio):
|
|
| 42 |
speaker_embeddings
|
| 43 |
)
|
| 44 |
|
| 45 |
-
|
| 46 |
-
audio_output = speech.cpu().numpy().astype("float32")
|
| 47 |
|
| 48 |
return speech_text, response, (16000, audio_output)
|
| 49 |
|
|
|
|
| 25 |
|
| 26 |
def voice_assistant(audio):
|
| 27 |
|
| 28 |
+
if audio is None:
|
| 29 |
+
return "No audio detected", "Please record something first.", None
|
| 30 |
|
| 31 |
+
sample_rate, audio_data = audio
|
| 32 |
+
|
| 33 |
+
# Speech to text
|
| 34 |
+
speech_text = stt(audio_data)["text"]
|
| 35 |
+
|
| 36 |
+
# AI response
|
| 37 |
response = llm(
|
| 38 |
speech_text,
|
| 39 |
max_new_tokens=60
|
|
|
|
| 47 |
speaker_embeddings
|
| 48 |
)
|
| 49 |
|
| 50 |
+
audio_output = speech.cpu().numpy()
|
|
|
|
| 51 |
|
| 52 |
return speech_text, response, (16000, audio_output)
|
| 53 |
|