iajitpanday commited on
Commit
36b1b62
·
verified ·
1 Parent(s): b667242

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, WebSocket
2
+ from twilio.twiml.voice_response import VoiceResponse, Connect, Stream
3
+ from pydub import AudioSegment
4
+ import base64
5
+ import asyncio
6
+ import gradio as gr
7
+ from utils import transcribe_audio, generate_response, text_to_speech
8
+ import os
9
+
10
+ # FastAPI app
11
+ app = FastAPI()
12
+
13
+ # Twilio voice webhook
14
+ @app.get("/voice")
15
+ async def handle_call():
16
+ """Handle incoming Twilio voice calls."""
17
+ response = VoiceResponse()
18
+ connect = Connect()
19
+ connect.stream(url="wss://iajitpanday-vBot-1-7.hf.space/media-stream")
20
+ response.append(connect)
21
+ return response
22
+
23
+ # Twilio media stream WebSocket
24
+ @app.websocket("/media-stream")
25
+ async def media_stream(websocket: WebSocket):
26
+ """Handle Twilio media streams via WebSocket."""
27
+ await websocket.accept()
28
+ while True:
29
+ try:
30
+ data = await websocket.receive_json()
31
+ if data["event"] == "media":
32
+ # Decode base64 audio
33
+ audio_data = base64.b64decode(data["media"]["payload"])
34
+ input_path = "input.wav"
35
+ with open(input_path, "wb") as f:
36
+ f.write(audio_data)
37
+
38
+ # Process audio: STT -> NLP -> TTS
39
+ text = transcribe_audio(input_path)
40
+ response_text = generate_response(text)
41
+ output_path = text_to_speech(response_text)
42
+
43
+ if output_path and os.path.exists(output_path):
44
+ # Convert to 8kHz MULAW for Twilio
45
+ audio = AudioSegment.from_wav(output_path).set_frame_rate(8000).set_channels(1).set_sample_width(2)
46
+ audio.export("output.mulaw", format="raw", codec="pcm_mulaw")
47
+ with open("output.mulaw", "rb") as f:
48
+ response_audio = base64.b64encode(f.read()).decode("utf-8")
49
+
50
+ # Send audio back to Twilio
51
+ await websocket.send_json({
52
+ "event": "media",
53
+ "streamSid": data["streamSid"],
54
+ "media": {"payload": response_audio}
55
+ })
56
+ else:
57
+ print("TTS failed, skipping response.")
58
+
59
+ # Clean up
60
+ for path in [input_path, output_path, "output.mulaw"]:
61
+ if os.path.exists(path):
62
+ os.remove(path)
63
+ elif data["event"] == "stop":
64
+ break
65
+ except Exception as e:
66
+ print(f"WebSocket Error: {e}")
67
+ break
68
+ await websocket.close()
69
+
70
+ # Gradio interface for testing
71
+ def test_voice_bot(audio):
72
+ """Test the voice bot pipeline via Gradio UI."""
73
+ if audio is None:
74
+ return "No audio provided.", None
75
+ input_path = "test_input.wav"
76
+ sf.write(input_path, audio[1], audio[0]) # audio[0] is sample rate, audio[1] is data
77
+ text = transcribe_audio(input_path)
78
+ response_text = generate_response(text)
79
+ output_path = text_to_speech(response_text)
80
+ os.remove(input_path)
81
+ if output_path and os.path.exists(output_path):
82
+ return response_text, output_path
83
+ return response_text, None
84
+
85
+ # Gradio UI
86
+ with gr.Blocks() as demo:
87
+ gr.Markdown("# Voice AI Bot Tester")
88
+ gr.Markdown("Upload or record audio to test the bot's response.")
89
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="numpy")
90
+ text_output = gr.Textbox(label="Bot Response Text")
91
+ audio_output = gr.Audio(label="Bot Response Audio")
92
+ submit_btn = gr.Button("Test")
93
+ submit_btn.click(
94
+ fn=test_voice_bot,
95
+ inputs=audio_input,
96
+ outputs=[text_output, audio_output]
97
+ )
98
+
99
+ # Launch Gradio app
100
+ if __name__ == "__main__":
101
+ demo.launch(server_name="0.0.0.0", server_port=7860)