Spaces:

abhishekjoel
/

Glovera_testing

Sleeping

App Files Files Community

abhishekjoel commited on Nov 7, 2024

Commit

dd7d315

verified ·

1 Parent(s): d8349f0

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -165

app.py CHANGED Viewed

@@ -1,181 +1,131 @@
 import streamlit as st
-from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
 import requests
 import openai
 import tempfile
 import logging
-from twilio.rest import Client
-import asyncio
-import aiohttp
-import av
-import numpy as np
-from typing import Dict, Any
-import json
-from aiortc.contrib.media import MediaPlayer, MediaRecorder
-import threading
-from pathlib import Path
-# Enhanced logging configuration
-logging.basicConfig(
-    level=logging.DEBUG,  # Changed to DEBUG for more detailed logs
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
 # Load API keys from Streamlit secrets
-TAVUS_API_KEY = st.secrets["TAVUS_API_KEY"]
-OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
-TWILIO_ACCOUNT_SID = st.secrets["TWILIO_ACCOUNT_SID"]
-TWILIO_AUTH_TOKEN = st.secrets["TWILIO_AUTH_TOKEN"]
-# Initialize API clients
 openai.api_key = OPENAI_API_KEY
-twilio_client = Client(TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN)
-class WebRTCManager:
-    def __init__(self):
-        self.twilio_client = twilio_client
-        self.lock = threading.Lock()
-    def get_ice_servers(self):
-        try:
-            token = self.twilio_client.tokens.create()
-            ice_servers = token.ice_servers
-            # Add additional STUN servers for redundancy
-            ice_servers.extend([
-                {"urls": ["stun:stun1.l.google.com:19302"]},
-                {"urls": ["stun:stun2.l.google.com:19302"]},
-                {"urls": ["stun:stun3.l.google.com:19302"]},
-                {"urls": ["stun:stun4.l.google.com:19302"]}
-            ])
-            return RTCConfiguration(iceServers=ice_servers)
-        except Exception as e:
-            logger.error(f"Failed to get Twilio ICE servers: {e}")
-            # Fallback configuration with multiple STUN servers
-            return RTCConfiguration(
-                iceServers=[
-                    {"urls": ["stun:stun1.l.google.com:19302"]},
-                    {"urls": ["stun:stun2.l.google.com:19302"]},
-                    {"urls": ["stun:stun3.l.google.com:19302"]},
-                    {"urls": ["stun:stun4.l.google.com:19302"]}
-                ]
-            )
-    def create_webrtc_context(self):
-        try:
-            rtc_configuration = self.get_ice_servers()
-            def video_frame_callback(frame):
-                try:
-                    with self.lock:
-                        img = frame.to_ndarray(format="bgr24")
-                        return av.VideoFrame.from_ndarray(img, format="bgr24")
-                except Exception as e:
-                    logger.error(f"Error in video frame callback: {e}")
-                    return frame
-            def audio_frame_callback(frame):
-                try:
-                    with self.lock:
-                        return frame
-                except Exception as e:
-                    logger.error(f"Error in audio frame callback: {e}")
-                    return frame
-            return webrtc_streamer(
-                key="user_stream",
-                mode=WebRtcMode.SENDRECV,
-                rtc_configuration=rtc_configuration,
-                media_stream_constraints={
-                    "video": {
-                        "width": {"min": 640, "ideal": 1280, "max": 1920},
-                        "height": {"min": 480, "ideal": 720, "max": 1080},
-                        "frameRate": {"max": 30},
-                    },
-                    "audio": {
-                        "echoCancellation": True,
-                        "noiseSuppression": True,
-                        "autoGainControl": True,
-                        "sampleRate": 48000,
-                        "sampleSize": 16,
-                        "channelCount": 1,
-                    },
-                },
-                video_frame_callback=video_frame_callback,
-                audio_frame_callback=audio_frame_callback,
-                rtc_offer_options={
-                    "offerToReceiveAudio": True,
-                    "offerToReceiveVideo": True,
-                },
-                async_processing=True,
-                video_html_attrs={
-                    "autoPlay": True,
-                    "controls": False,
-                    "muted": True,
-                    "playsinline": True,
-                },
-                sendback_audio=False,  # Prevent audio feedback loops
-            )
-        except Exception as e:
-            logger.error(f"WebRTC context creation failed: {e}")
-            st.error("Failed to initialize video chat. Please refresh the page.")
-            return None
-class AudioProcessor:
-    def __init__(self):
-        self.audio_buffer = []
-        self.lock = threading.Lock()
-    async def process_audio_frame(self, frame):
-        try:
-            with self.lock:
-                self.audio_buffer.append(frame.to_ndarray())
-                if len(self.audio_buffer) >= 10:  # Process every 10 frames
-                    audio_data = np.concatenate(self.audio_buffer)
-                    self.audio_buffer = []
-                    return audio_data
-                return None
-        except Exception as e:
-            logger.error(f"Error processing audio frame: {e}")
-            return None
 def main():
     st.title("AI Video Chatbot for University Admissions")
-    # Initialize managers
-    webrtc_manager = WebRTCManager()
-    audio_processor = AudioProcessor()
-    # Create WebRTC context with error handling
     try:
-        webrtc_ctx = webrtc_manager.create_webrtc_context()
-        if webrtc_ctx and webrtc_ctx.state.playing:
-            st.write("Streaming is active...")
-            # Initialize conversation if needed
-            if 'conversation_history' not in st.session_state:
-                st.session_state.conversation_history = [{
-                    "role": "assistant",
-                    "content": "Hello there, I'm Nathan and I'm going to help you with college admissions. How's it going?"
-                }]
-            # Process audio frames with additional error handling
-            if webrtc_ctx.audio_receiver:
-                try:
-                    audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
-                    if audio_frames:
-                        # Process audio frames...
-                        pass
-                except Exception as e:
-                    logger.error(f"Error receiving audio frames: {e}")
-                    st.warning("Audio connection interrupted. Please refresh the page if this persists.")
     except Exception as e:
-        logger.error(f"Main loop error: {e}")
-        st.error("An error occurred. Please refresh the page and try again.")
 if __name__ == "__main__":
-    main()

 import streamlit as st
+from streamlit_webrtc import webrtc_streamer, WebRtcMode
 import requests
 import openai
 import tempfile
 import logging
+# Set logging level for debugging purposes
+logging.basicConfig(level=logging.INFO)
 # Load API keys from Streamlit secrets
+TAVUS_API_KEY = st.secrets.get("TAVUS_API_KEY")
+OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
 openai.api_key = OPENAI_API_KEY
+# Introduction prompt
+initial_prompt = "Hello there, I'm Nathan and I'm going to help you with college admissions. How's it going?"
+# Initialize conversation history if not present
+if 'conversation_history' not in st.session_state:
+    st.session_state.conversation_history = [{"role": "assistant", "content": initial_prompt}]
 def main():
     st.title("AI Video Chatbot for University Admissions")
+    rtc_configuration = {
+        "iceServers": [
+            {"urls": ["stun:stun.l.google.com:19302"]},
+            # Add TURN server details if necessary for restrictive networks
+        ]
+    }
+    # Webrtc streamer without unsupported arguments
+    webrtc_ctx = webrtc_streamer(
+        key="user_stream",
+        mode=WebRtcMode.SENDRECV,
+        rtc_configuration=rtc_configuration,
+        media_stream_constraints={
+            "audio": True,
+            "video": True,
+        },
+        video_html_attrs={
+            "autoPlay": True,
+            "controls": False,
+            "muted": True,
+            "playsinline": True,
+        },
+    )
+    if webrtc_ctx and webrtc_ctx.state.playing:
+        st.write("Streaming...")
+        if len(st.session_state.conversation_history) == 1:
+            avatar_speak_tavus(initial_prompt)
+        if webrtc_ctx.audio_receiver:
+            try:
+                audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
+                if audio_frames:
+                    audio_data = b"".join(frame.to_ndarray().tobytes() for frame in audio_frames)
+                    user_text = speech_to_text(audio_data)
+                    if user_text:
+                        st.write(f"**You said:** {user_text}")
+                        st.session_state.conversation_history.append({"role": "user", "content": user_text})
+                        response = get_chatbot_response(st.session_state.conversation_history)
+                        st.session_state.conversation_history.append({"role": "assistant", "content": response})
+                        avatar_speak_tavus(response)
+            except Exception as e:
+                st.error(f"Error in processing audio: {e}")
+# Convert speech to text using OpenAI Whisper API
+def speech_to_text(audio_data):
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
+        tmpfile.write(audio_data)
+        tmpfile_path = tmpfile.name
+    try:
+        with open(tmpfile_path, "rb") as audio_file:
+            transcript = openai.Audio.transcribe("whisper-1", audio_file)
+        return transcript.get("text", "")
+    except Exception as e:
+        st.error(f"Error during transcription: {e}")
+        return ""
+# Get chatbot response using GPT-3.5
+def get_chatbot_response(conversation_history):
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=conversation_history,
+            max_tokens=150,
+        )
+        return response.choices[0].message["content"]
+    except Exception as e:
+        st.error(f"Error in generating response from GPT-3.5: {e}")
+        return "Sorry, I'm having trouble understanding you at the moment."
+# Make Tavus avatar speak the response
+def avatar_speak_tavus(text):
     try:
+        url = "https://api.tavus.io/v2/conversations"
+        headers = {
+            "Authorization": f"Bearer {TAVUS_API_KEY}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "replica_id": "r79e1c033f",  # Replace with your Tavus replica ID
+            "persona_id": "p9a95912",  # Replace with your Tavus persona ID
+            "conversation_name": "University Admissions Chat",
+            "conversational_context": text,
+            "properties": {
+                "enable_recording": True
+            }
+        }
+        response = requests.post(url, headers=headers, json=payload)
+        if response.status_code == 200:
+            video_url = response.json().get("conversation_url")
+            if video_url:
+                st.video(video_url)
+            else:
+                st.error("No video URL received from Tavus API.")
+        else:
+            st.error(f"Error from Tavus API: {response.status_code} {response.text}")
     except Exception as e:
+        st.error(f"Error in Tavus speech generation: {e}")
 if __name__ == "__main__":
+    main()