Spaces:

abhishekjoel
/

Glovera_testing

Sleeping

App Files Files Community

abhishekjoel commited on Nov 6, 2024

Commit

c3e217f

verified ·

1 Parent(s): 9cf3b98

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -120

app.py CHANGED Viewed

@@ -10,10 +10,14 @@ import aiohttp
 import av
 import numpy as np
 from typing import Dict, Any
 # Enhanced logging configuration
 logging.basicConfig(
-    level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
@@ -31,18 +35,31 @@ twilio_client = Client(TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN)
 class WebRTCManager:
     def __init__(self):
         self.twilio_client = twilio_client
     def get_ice_servers(self):
         try:
             token = self.twilio_client.tokens.create()
-            return RTCConfiguration(
-                iceServers=token.ice_servers
-            )
         except Exception as e:
             logger.error(f"Failed to get Twilio ICE servers: {e}")
             return RTCConfiguration(
                 iceServers=[
-                    {"urls": ["stun:stun.l.google.com:19302"]}
                 ]
             )
@@ -50,6 +67,23 @@ class WebRTCManager:
         try:
             rtc_configuration = self.get_ice_servers()
             return webrtc_streamer(
                 key="user_stream",
                 mode=WebRtcMode.SENDRECV,
@@ -64,146 +98,84 @@ class WebRTCManager:
                         "echoCancellation": True,
                         "noiseSuppression": True,
                         "autoGainControl": True,
                     },
                 },
                 video_html_attrs={
                     "autoPlay": True,
                     "controls": False,
                     "muted": True,
                     "playsinline": True,
                 },
-                async_processing=True,
             )
         except Exception as e:
             logger.error(f"WebRTC context creation failed: {e}")
             st.error("Failed to initialize video chat. Please refresh the page.")
             return None
-class ConversationManager:
     def __init__(self):
-        self.initial_prompt = "Hello there, I'm Nathan and I'm going to help you with college admissions. How's it going?"
-        if 'conversation_history' not in st.session_state:
-            st.session_state.conversation_history = [{"role": "assistant", "content": self.initial_prompt}]
-    async def process_audio(self, audio_frames) -> str:
-        try:
-            # Convert audio frames to wav format
-            audio_data = b"".join(frame.to_ndarray().tobytes() for frame in audio_frames)
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
-                tmpfile.write(audio_data)
-                with open(tmpfile.name, "rb") as audio_file:
-                    transcript = await asyncio.to_thread(
-                        openai.Audio.transcribe,
-                        "whisper-1",
-                        audio_file
-                    )
-                    return transcript.get("text", "")
-        except Exception as e:
-            logger.error(f"Audio processing error: {e}")
-            return ""
-    async def get_chatbot_response(self) -> str:
-        try:
-            response = await asyncio.to_thread(
-                openai.ChatCompletion.create,
-                model="gpt-3.5-turbo",
-                messages=st.session_state.conversation_history,
-                max_tokens=150,
-                temperature=0.7,
-            )
-            return response.choices[0].message["content"]
-        except Exception as e:
-            logger.error(f"GPT response error: {e}")
-            return "I apologize, but I'm having trouble processing your request right now."
-class TavusManager:
-    def __init__(self):
-        self.headers = {
-            "Authorization": f"Bearer {TAVUS_API_KEY}",
-            "Content-Type": "application/json",
-        }
-    async def generate_avatar_speech(self, text: str) -> None:
         try:
-            payload = {
-                "replica_id": "r79e1c033f",
-                "persona_id": "p9a95912",
-                "conversation_name": "University Admissions Chat",
-                "conversational_context": text,
-                "properties": {"enable_recording": True}
-            }
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    "https://api.tavus.io/v2/conversations",
-                    headers=self.headers,
-                    json=payload
-                ) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        video_url = data.get("conversation_url")
-                        if video_url:
-                            st.video(video_url)
-                        else:
-                            st.error("No video URL received from Tavus API.")
-                    else:
-                        st.error(f"Tavus API error: {response.status} {await response.text()}")
         except Exception as e:
-            logger.error(f"Tavus speech generation error: {e}")
-            st.error("Failed to generate avatar response.")
-async def process_frames(webrtc_ctx, conversation_manager, tavus_manager):
-    try:
-        if webrtc_ctx.audio_receiver:
-            audio_frames = await webrtc_ctx.audio_receiver.get_frames(timeout=1)
-            if audio_frames:
-                user_text = await conversation_manager.process_audio(audio_frames)
-                if user_text:
-                    st.write(f"**You said:** {user_text}")
-                    st.session_state.conversation_history.append(
-                        {"role": "user", "content": user_text}
-                    )
-                    response = await conversation_manager.get_chatbot_response()
-                    st.session_state.conversation_history.append(
-                        {"role": "assistant", "content": response}
-                    )
-                    await tavus_manager.generate_avatar_speech(response)
-    except Exception as e:
-        logger.error(f"Frame processing error: {e}")
-        st.error("An error occurred while processing the audio stream.")
 def main():
     st.title("AI Video Chatbot for University Admissions")
     # Initialize managers
     webrtc_manager = WebRTCManager()
-    conversation_manager = ConversationManager()
-    tavus_manager = TavusManager()
-    # Create WebRTC context
-    webrtc_ctx = webrtc_manager.create_webrtc_context()
-    if webrtc_ctx and webrtc_ctx.state.playing:
-        st.write("Streaming is active...")
-        # Display initial prompt for new conversations
-        if len(st.session_state.conversation_history) == 1:
-            asyncio.run(tavus_manager.generate_avatar_speech(conversation_manager.initial_prompt))
-        # Process audio frames
-        asyncio.run(process_frames(webrtc_ctx, conversation_manager, tavus_manager))
-    # Display conversation history
-    st.sidebar.title("Conversation History")
-    for message in st.session_state.conversation_history:
-        role = message["role"]
-        content = message["content"]
-        st.sidebar.write(f"**{role.capitalize()}:** {content}")
 if __name__ == "__main__":
     main()

 import av
 import numpy as np
 from typing import Dict, Any
+import json
+from aiortc.contrib.media import MediaPlayer, MediaRecorder
+import threading
+from pathlib import Path
 # Enhanced logging configuration
 logging.basicConfig(
+    level=logging.DEBUG,  # Changed to DEBUG for more detailed logs
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 class WebRTCManager:
     def __init__(self):
         self.twilio_client = twilio_client
+        self.lock = threading.Lock()
     def get_ice_servers(self):
         try:
             token = self.twilio_client.tokens.create()
+            ice_servers = token.ice_servers
+            # Add additional STUN servers for redundancy
+            ice_servers.extend([
+                {"urls": ["stun:stun1.l.google.com:19302"]},
+                {"urls": ["stun:stun2.l.google.com:19302"]},
+                {"urls": ["stun:stun3.l.google.com:19302"]},
+                {"urls": ["stun:stun4.l.google.com:19302"]}
+            ])
+            return RTCConfiguration(iceServers=ice_servers)
         except Exception as e:
             logger.error(f"Failed to get Twilio ICE servers: {e}")
+            # Fallback configuration with multiple STUN servers
             return RTCConfiguration(
                 iceServers=[
+                    {"urls": ["stun:stun1.l.google.com:19302"]},
+                    {"urls": ["stun:stun2.l.google.com:19302"]},
+                    {"urls": ["stun:stun3.l.google.com:19302"]},
+                    {"urls": ["stun:stun4.l.google.com:19302"]}
                 ]
             )
         try:
             rtc_configuration = self.get_ice_servers()
+            def video_frame_callback(frame):
+                try:
+                    with self.lock:
+                        img = frame.to_ndarray(format="bgr24")
+                        return av.VideoFrame.from_ndarray(img, format="bgr24")
+                except Exception as e:
+                    logger.error(f"Error in video frame callback: {e}")
+                    return frame
+            def audio_frame_callback(frame):
+                try:
+                    with self.lock:
+                        return frame
+                except Exception as e:
+                    logger.error(f"Error in audio frame callback: {e}")
+                    return frame
             return webrtc_streamer(
                 key="user_stream",
                 mode=WebRtcMode.SENDRECV,
                         "echoCancellation": True,
                         "noiseSuppression": True,
                         "autoGainControl": True,
+                        "sampleRate": 48000,
+                        "sampleSize": 16,
+                        "channelCount": 1,
                     },
                 },
+                video_frame_callback=video_frame_callback,
+                audio_frame_callback=audio_frame_callback,
+                rtc_offer_options={
+                    "offerToReceiveAudio": True,
+                    "offerToReceiveVideo": True,
+                },
+                async_processing=True,
                 video_html_attrs={
                     "autoPlay": True,
                     "controls": False,
                     "muted": True,
                     "playsinline": True,
                 },
+                sendback_audio=False,  # Prevent audio feedback loops
             )
         except Exception as e:
             logger.error(f"WebRTC context creation failed: {e}")
             st.error("Failed to initialize video chat. Please refresh the page.")
             return None
+class AudioProcessor:
     def __init__(self):
+        self.audio_buffer = []
+        self.lock = threading.Lock()
+    async def process_audio_frame(self, frame):
         try:
+            with self.lock:
+                self.audio_buffer.append(frame.to_ndarray())
+                if len(self.audio_buffer) >= 10:  # Process every 10 frames
+                    audio_data = np.concatenate(self.audio_buffer)
+                    self.audio_buffer = []
+                    return audio_data
+                return None
         except Exception as e:
+            logger.error(f"Error processing audio frame: {e}")
+            return None
 def main():
     st.title("AI Video Chatbot for University Admissions")
     # Initialize managers
     webrtc_manager = WebRTCManager()
+    audio_processor = AudioProcessor()
+    # Create WebRTC context with error handling
+    try:
+        webrtc_ctx = webrtc_manager.create_webrtc_context()
+        if webrtc_ctx and webrtc_ctx.state.playing:
+            st.write("Streaming is active...")
+            # Initialize conversation if needed
+            if 'conversation_history' not in st.session_state:
+                st.session_state.conversation_history = [{
+                    "role": "assistant",
+                    "content": "Hello there, I'm Nathan and I'm going to help you with college admissions. How's it going?"
+                }]
+            # Process audio frames with additional error handling
+            if webrtc_ctx.audio_receiver:
+                try:
+                    audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
+                    if audio_frames:
+                        # Process audio frames...
+                        pass
+                except Exception as e:
+                    logger.error(f"Error receiving audio frames: {e}")
+                    st.warning("Audio connection interrupted. Please refresh the page if this persists.")
+    except Exception as e:
+        logger.error(f"Main loop error: {e}")
+        st.error("An error occurred. Please refresh the page and try again.")
 if __name__ == "__main__":
     main()