abhishekjoel commited on
Commit
c3e217f
·
verified ·
1 Parent(s): 9cf3b98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -120
app.py CHANGED
@@ -10,10 +10,14 @@ import aiohttp
10
  import av
11
  import numpy as np
12
  from typing import Dict, Any
 
 
 
 
13
 
14
  # Enhanced logging configuration
15
  logging.basicConfig(
16
- level=logging.INFO,
17
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
18
  )
19
  logger = logging.getLogger(__name__)
@@ -31,18 +35,31 @@ twilio_client = Client(TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN)
31
  class WebRTCManager:
32
  def __init__(self):
33
  self.twilio_client = twilio_client
 
34
 
35
  def get_ice_servers(self):
36
  try:
37
  token = self.twilio_client.tokens.create()
38
- return RTCConfiguration(
39
- iceServers=token.ice_servers
40
- )
 
 
 
 
 
 
 
 
41
  except Exception as e:
42
  logger.error(f"Failed to get Twilio ICE servers: {e}")
 
43
  return RTCConfiguration(
44
  iceServers=[
45
- {"urls": ["stun:stun.l.google.com:19302"]}
 
 
 
46
  ]
47
  )
48
 
@@ -50,6 +67,23 @@ class WebRTCManager:
50
  try:
51
  rtc_configuration = self.get_ice_servers()
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  return webrtc_streamer(
54
  key="user_stream",
55
  mode=WebRtcMode.SENDRECV,
@@ -64,146 +98,84 @@ class WebRTCManager:
64
  "echoCancellation": True,
65
  "noiseSuppression": True,
66
  "autoGainControl": True,
 
 
 
67
  },
68
  },
 
 
 
 
 
 
 
69
  video_html_attrs={
70
  "autoPlay": True,
71
  "controls": False,
72
  "muted": True,
73
  "playsinline": True,
74
  },
75
- async_processing=True,
76
  )
77
  except Exception as e:
78
  logger.error(f"WebRTC context creation failed: {e}")
79
  st.error("Failed to initialize video chat. Please refresh the page.")
80
  return None
81
 
82
- class ConversationManager:
83
  def __init__(self):
84
- self.initial_prompt = "Hello there, I'm Nathan and I'm going to help you with college admissions. How's it going?"
85
- if 'conversation_history' not in st.session_state:
86
- st.session_state.conversation_history = [{"role": "assistant", "content": self.initial_prompt}]
87
-
88
- async def process_audio(self, audio_frames) -> str:
89
- try:
90
- # Convert audio frames to wav format
91
- audio_data = b"".join(frame.to_ndarray().tobytes() for frame in audio_frames)
92
-
93
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
94
- tmpfile.write(audio_data)
95
- with open(tmpfile.name, "rb") as audio_file:
96
- transcript = await asyncio.to_thread(
97
- openai.Audio.transcribe,
98
- "whisper-1",
99
- audio_file
100
- )
101
- return transcript.get("text", "")
102
- except Exception as e:
103
- logger.error(f"Audio processing error: {e}")
104
- return ""
105
-
106
- async def get_chatbot_response(self) -> str:
107
- try:
108
- response = await asyncio.to_thread(
109
- openai.ChatCompletion.create,
110
- model="gpt-3.5-turbo",
111
- messages=st.session_state.conversation_history,
112
- max_tokens=150,
113
- temperature=0.7,
114
- )
115
- return response.choices[0].message["content"]
116
- except Exception as e:
117
- logger.error(f"GPT response error: {e}")
118
- return "I apologize, but I'm having trouble processing your request right now."
119
-
120
- class TavusManager:
121
- def __init__(self):
122
- self.headers = {
123
- "Authorization": f"Bearer {TAVUS_API_KEY}",
124
- "Content-Type": "application/json",
125
- }
126
-
127
- async def generate_avatar_speech(self, text: str) -> None:
128
  try:
129
- payload = {
130
- "replica_id": "r79e1c033f",
131
- "persona_id": "p9a95912",
132
- "conversation_name": "University Admissions Chat",
133
- "conversational_context": text,
134
- "properties": {"enable_recording": True}
135
- }
136
-
137
- async with aiohttp.ClientSession() as session:
138
- async with session.post(
139
- "https://api.tavus.io/v2/conversations",
140
- headers=self.headers,
141
- json=payload
142
- ) as response:
143
- if response.status == 200:
144
- data = await response.json()
145
- video_url = data.get("conversation_url")
146
- if video_url:
147
- st.video(video_url)
148
- else:
149
- st.error("No video URL received from Tavus API.")
150
- else:
151
- st.error(f"Tavus API error: {response.status} {await response.text()}")
152
  except Exception as e:
153
- logger.error(f"Tavus speech generation error: {e}")
154
- st.error("Failed to generate avatar response.")
155
-
156
- async def process_frames(webrtc_ctx, conversation_manager, tavus_manager):
157
- try:
158
- if webrtc_ctx.audio_receiver:
159
- audio_frames = await webrtc_ctx.audio_receiver.get_frames(timeout=1)
160
- if audio_frames:
161
- user_text = await conversation_manager.process_audio(audio_frames)
162
-
163
- if user_text:
164
- st.write(f"**You said:** {user_text}")
165
- st.session_state.conversation_history.append(
166
- {"role": "user", "content": user_text}
167
- )
168
-
169
- response = await conversation_manager.get_chatbot_response()
170
- st.session_state.conversation_history.append(
171
- {"role": "assistant", "content": response}
172
- )
173
-
174
- await tavus_manager.generate_avatar_speech(response)
175
-
176
- except Exception as e:
177
- logger.error(f"Frame processing error: {e}")
178
- st.error("An error occurred while processing the audio stream.")
179
 
180
  def main():
181
  st.title("AI Video Chatbot for University Admissions")
182
 
183
  # Initialize managers
184
  webrtc_manager = WebRTCManager()
185
- conversation_manager = ConversationManager()
186
- tavus_manager = TavusManager()
187
 
188
- # Create WebRTC context
189
- webrtc_ctx = webrtc_manager.create_webrtc_context()
190
-
191
- if webrtc_ctx and webrtc_ctx.state.playing:
192
- st.write("Streaming is active...")
193
-
194
- # Display initial prompt for new conversations
195
- if len(st.session_state.conversation_history) == 1:
196
- asyncio.run(tavus_manager.generate_avatar_speech(conversation_manager.initial_prompt))
197
 
198
- # Process audio frames
199
- asyncio.run(process_frames(webrtc_ctx, conversation_manager, tavus_manager))
200
-
201
- # Display conversation history
202
- st.sidebar.title("Conversation History")
203
- for message in st.session_state.conversation_history:
204
- role = message["role"]
205
- content = message["content"]
206
- st.sidebar.write(f"**{role.capitalize()}:** {content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  if __name__ == "__main__":
209
  main()
 
10
  import av
11
  import numpy as np
12
  from typing import Dict, Any
13
+ import json
14
+ from aiortc.contrib.media import MediaPlayer, MediaRecorder
15
+ import threading
16
+ from pathlib import Path
17
 
18
  # Enhanced logging configuration
19
  logging.basicConfig(
20
+ level=logging.DEBUG, # Changed to DEBUG for more detailed logs
21
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
22
  )
23
  logger = logging.getLogger(__name__)
 
35
  class WebRTCManager:
36
  def __init__(self):
37
  self.twilio_client = twilio_client
38
+ self.lock = threading.Lock()
39
 
40
  def get_ice_servers(self):
41
  try:
42
  token = self.twilio_client.tokens.create()
43
+ ice_servers = token.ice_servers
44
+
45
+ # Add additional STUN servers for redundancy
46
+ ice_servers.extend([
47
+ {"urls": ["stun:stun1.l.google.com:19302"]},
48
+ {"urls": ["stun:stun2.l.google.com:19302"]},
49
+ {"urls": ["stun:stun3.l.google.com:19302"]},
50
+ {"urls": ["stun:stun4.l.google.com:19302"]}
51
+ ])
52
+
53
+ return RTCConfiguration(iceServers=ice_servers)
54
  except Exception as e:
55
  logger.error(f"Failed to get Twilio ICE servers: {e}")
56
+ # Fallback configuration with multiple STUN servers
57
  return RTCConfiguration(
58
  iceServers=[
59
+ {"urls": ["stun:stun1.l.google.com:19302"]},
60
+ {"urls": ["stun:stun2.l.google.com:19302"]},
61
+ {"urls": ["stun:stun3.l.google.com:19302"]},
62
+ {"urls": ["stun:stun4.l.google.com:19302"]}
63
  ]
64
  )
65
 
 
67
  try:
68
  rtc_configuration = self.get_ice_servers()
69
 
70
+ def video_frame_callback(frame):
71
+ try:
72
+ with self.lock:
73
+ img = frame.to_ndarray(format="bgr24")
74
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
75
+ except Exception as e:
76
+ logger.error(f"Error in video frame callback: {e}")
77
+ return frame
78
+
79
+ def audio_frame_callback(frame):
80
+ try:
81
+ with self.lock:
82
+ return frame
83
+ except Exception as e:
84
+ logger.error(f"Error in audio frame callback: {e}")
85
+ return frame
86
+
87
  return webrtc_streamer(
88
  key="user_stream",
89
  mode=WebRtcMode.SENDRECV,
 
98
  "echoCancellation": True,
99
  "noiseSuppression": True,
100
  "autoGainControl": True,
101
+ "sampleRate": 48000,
102
+ "sampleSize": 16,
103
+ "channelCount": 1,
104
  },
105
  },
106
+ video_frame_callback=video_frame_callback,
107
+ audio_frame_callback=audio_frame_callback,
108
+ rtc_offer_options={
109
+ "offerToReceiveAudio": True,
110
+ "offerToReceiveVideo": True,
111
+ },
112
+ async_processing=True,
113
  video_html_attrs={
114
  "autoPlay": True,
115
  "controls": False,
116
  "muted": True,
117
  "playsinline": True,
118
  },
119
+ sendback_audio=False, # Prevent audio feedback loops
120
  )
121
  except Exception as e:
122
  logger.error(f"WebRTC context creation failed: {e}")
123
  st.error("Failed to initialize video chat. Please refresh the page.")
124
  return None
125
 
126
+ class AudioProcessor:
127
  def __init__(self):
128
+ self.audio_buffer = []
129
+ self.lock = threading.Lock()
130
+
131
+ async def process_audio_frame(self, frame):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  try:
133
+ with self.lock:
134
+ self.audio_buffer.append(frame.to_ndarray())
135
+ if len(self.audio_buffer) >= 10: # Process every 10 frames
136
+ audio_data = np.concatenate(self.audio_buffer)
137
+ self.audio_buffer = []
138
+ return audio_data
139
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  except Exception as e:
141
+ logger.error(f"Error processing audio frame: {e}")
142
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  def main():
145
  st.title("AI Video Chatbot for University Admissions")
146
 
147
  # Initialize managers
148
  webrtc_manager = WebRTCManager()
149
+ audio_processor = AudioProcessor()
 
150
 
151
+ # Create WebRTC context with error handling
152
+ try:
153
+ webrtc_ctx = webrtc_manager.create_webrtc_context()
 
 
 
 
 
 
154
 
155
+ if webrtc_ctx and webrtc_ctx.state.playing:
156
+ st.write("Streaming is active...")
157
+
158
+ # Initialize conversation if needed
159
+ if 'conversation_history' not in st.session_state:
160
+ st.session_state.conversation_history = [{
161
+ "role": "assistant",
162
+ "content": "Hello there, I'm Nathan and I'm going to help you with college admissions. How's it going?"
163
+ }]
164
+
165
+ # Process audio frames with additional error handling
166
+ if webrtc_ctx.audio_receiver:
167
+ try:
168
+ audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
169
+ if audio_frames:
170
+ # Process audio frames...
171
+ pass
172
+ except Exception as e:
173
+ logger.error(f"Error receiving audio frames: {e}")
174
+ st.warning("Audio connection interrupted. Please refresh the page if this persists.")
175
+
176
+ except Exception as e:
177
+ logger.error(f"Main loop error: {e}")
178
+ st.error("An error occurred. Please refresh the page and try again.")
179
 
180
  if __name__ == "__main__":
181
  main()