Spaces:

Daaku-C5
/

Voice-Bot

Sleeping

App Files Files Community

Voice-Bot / src /streamlit_app.py

Daaku-C5

Update src/streamlit_app.py

c3e2346 verified 7 months ago

raw

history blame contribute delete

10.3 kB

	import streamlit as st
	from openai import OpenAI
	import io
	import base64
	import os
	import tempfile
	from audio_recorder_streamlit import audio_recorder

	# Page configuration
	st.set_page_config(
	page_title="Voice Bot",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Configuration
	TEMP_AUDIO_FILE = "temp_audio.wav"

	# Initialize OpenAI client
	@st.cache_resource
	def init_openai_client():
	try:
	# Check for API key in multiple places
	api_key = None

	# 1. Try environment variable first (HF Spaces secrets appear as env vars)
	api_key = os.environ.get("OPENAI_API_KEY")

	# 2. Try Streamlit secrets (for local development)
	if not api_key:
	try:
	api_key = st.secrets["OPENAI_API_KEY"]
	except (KeyError, FileNotFoundError):
	pass

	# 3. Check if we found the key
	if not api_key:
	st.error("⚠️ OpenAI API key not found!")
	st.markdown("""
	For Hugging Face Spaces:
	1. Go to your Space settings
	2. Click on "Repository secrets"
	3. Add a new secret with name: `OPENAI_API_KEY`
	4. Restart your Space

	For local development:
	Create `.streamlit/secrets.toml` with:
	```
	OPENAI_API_KEY = "your-key-here"
	```
	""")
	st.stop()

	return OpenAI(api_key=api_key)

	except Exception as e:
	st.error(f"Error initializing OpenAI client: {str(e)}")
	st.stop()

	client = init_openai_client()

	# Initialize session state variables
	def init_session_state():
	if 'conversation_history' not in st.session_state:
	st.session_state.conversation_history = []
	if 'context' not in st.session_state:
	st.session_state.context = load_context()
	if 'processing' not in st.session_state:
	st.session_state.processing = False
	if 'last_audio_hash' not in st.session_state:
	st.session_state.last_audio_hash = None

	def load_context():
	"""Load the context from file or return default."""
	try:
	base_dir = os.path.dirname(os.path.abspath(__file__))
	context_path = os.path.join(base_dir, 'context.txt')

	if os.path.exists(context_path):
	with open(context_path, "r", encoding='utf-8') as f:
	return f.read().strip()
	else:
	# Default context if file doesn't exist
	return """I am Prakhar, an AI assistant. I can help you with general questions and conversations.
	I aim to be helpful, harmless, and honest in all my interactions."""

	except Exception as e:
	st.error(f"Error loading context: {str(e)}")
	return "I am Prakhar, an AI assistant."

	def save_context(context_text):
	"""Save context to file."""
	try:
	base_dir = os.path.dirname(os.path.abspath(__file__))
	context_path = os.path.join(base_dir, 'context.txt')

	with open(context_path, "w", encoding='utf-8') as f:
	f.write(context_text)
	return True
	except Exception as e:
	st.error(f"Error saving context: {str(e)}")
	return False

	def transcribe_audio(audio_bytes):
	"""Transcribe audio using Whisper API."""
	try:
	# Create a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	tmp_file.write(audio_bytes)
	tmp_file_path = tmp_file.name

	# Transcribe using OpenAI Whisper
	with open(tmp_file_path, "rb") as audio_file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	language="en" # You can remove this to auto-detect language
	)

	# Clean up temporary file
	os.unlink(tmp_file_path)

	return transcript.text.strip()

	except Exception as e:
	st.error(f"Error transcribing audio: {str(e)}")
	return None

	def get_ai_response(user_text, context):
	"""Get AI response using GPT-4."""
	try:
	system_prompt = f"""You are Prakhar. You should respond naturally and helpfully.

	Context about you:
	{context}

	Instructions:
	- Use the context above to inform your responses
	- If asked about something not covered in the context, you can use your general knowledge
	- If you're not sure about something specific to your context, say "I'm not sure about that based on what I know about myself"
	- Keep responses conversational and natural
	- Be helpful and engaging"""

	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_text}
	],
	max_tokens=500,
	temperature=0.7
	)

	return response.choices[0].message.content.strip()

	except Exception as e:
	st.error(f"Error getting AI response: {str(e)}")
	return "I'm sorry, I encountered an error while processing your request."

	def text_to_speech(text):
	"""Convert text to speech using OpenAI TTS."""
	try:
	response = client.audio.speech.create(
	model="tts-1",
	voice="onyx", # Available voices: alloy, echo, fable, onyx, nova, shimmer
	input=text,
	speed=1.0
	)

	return response.content

	except Exception as e:
	st.error(f"Error generating speech: {str(e)}")
	return None

	def process_audio(audio_bytes):
	"""Process recorded audio through the full pipeline."""
	if not audio_bytes:
	return None, None, None

	# Transcribe audio
	with st.spinner("🎯 Transcribing audio..."):
	user_text = transcribe_audio(audio_bytes)

	if not user_text:
	return None, None, None

	# Get AI response
	with st.spinner("🤖 Generating response..."):
	ai_response = get_ai_response(user_text, st.session_state.context)

	# Convert to speech
	with st.spinner("🔊 Converting to speech..."):
	speech_audio = text_to_speech(ai_response)

	return user_text, ai_response, speech_audio

	def main():
	st.title("🎙️ Voice Bot")
	st.markdown("Talk to Prakhar using your voice!")

	# Initialize session state
	init_session_state()

	# Create main layout
	col1, col2 = st.columns([1, 1], gap="large")

	with col1:
	st.subheader("🎤 Voice Input")

	# Audio recorder
	audio_bytes = audio_recorder(
	text="Click to record",
	recording_color="#e74c3c",
	neutral_color="#34495e",
	icon_name="microphone",
	icon_size="2x",
	pause_threshold=2.0,
	sample_rate=44100
	)

	# Show current recording
	if audio_bytes:
	st.audio(audio_bytes, format="audio/wav")

	# Process audio when new recording is available
	if audio_bytes and not st.session_state.processing:
	# Create a hash of the audio to detect new recordings
	import hashlib
	audio_hash = hashlib.md5(audio_bytes).hexdigest()

	# Only process if this is a new recording
	if audio_hash != st.session_state.last_audio_hash:
	st.session_state.processing = True
	st.session_state.last_audio_hash = audio_hash

	user_text, ai_response, speech_audio = process_audio(audio_bytes)

	if user_text and ai_response:
	# Add to conversation history
	st.session_state.conversation_history.append({
	"user": user_text,
	"ai": ai_response,
	"speech": speech_audio
	})

	# Reset processing flag before rerun
	st.session_state.processing = False

	# Force a rerun to update the conversation display
	if user_text and ai_response:
	st.rerun()

	with col2:
	st.subheader("💬 Conversation")

	# Display conversation history
	if st.session_state.conversation_history:
	# Show the most recent conversation
	latest = st.session_state.conversation_history[-1]

	st.markdown("You said:")
	st.info(latest["user"])

	st.markdown("Prakhar replied:")
	st.success(latest["ai"])
	st.session_state.processing = False

	# Play AI response audio
	if latest["speech"]:
	st.audio(latest["speech"], format="audio/mp3")

	# Show conversation history
	if len(st.session_state.conversation_history) > 1:
	with st.expander("📜 Previous conversations"):
	for i, conv in enumerate(reversed(st.session_state.conversation_history[:-1])):
	st.markdown(f"Conversation {len(st.session_state.conversation_history) - i - 1}:")
	st.markdown(f"👤 You: {conv['user']}")
	st.markdown(f"🤖 Prakhar: {conv['ai']}")
	if conv["speech"]:
	st.audio(conv["speech"], format="audio/mp3")
	st.divider()
	else:
	st.info("👋 Start by recording your voice message above!")

	# Context display section
	st.divider()

	with st.expander("ℹ️ Context", expanded=False):
	st.info(st.session_state.context)

	# Only keep the clear conversation button
	if st.button("🗑️ Clear Conversation"):
	st.session_state.conversation_history = []
	st.rerun()

	# Status indicators
	if st.session_state.processing:
	st.info("🔄 Processing your request...")

	if __name__ == "__main__":
	main()