Spaces:

VladB46
/

FasterWhisper-Streamlit

Running

Vlad Bastina

changes

06bedea 6 months ago

6.32 kB

	import streamlit as st
	import numpy as np
	import io
	import tempfile
	import os
	import time
	from faster_whisper import WhisperModel
	from streamlit_mic_recorder import mic_recorder
	from transcription_correction import ask_gemini
	from pathlib import Path

	def load_css(file_name):
	"""Loads a CSS file and injects it into the Streamlit app."""
	try:
	css_path = Path(__file__).parent / file_name
	with open(css_path) as f:
	st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
	# st.info(f"Loaded CSS: {file_name}") # Optional: uncomment for debugging
	except FileNotFoundError:
	st.error(f"CSS file not found: {file_name}. Make sure it's in the same directory as app.py.")
	except Exception as e:
	st.error(f"Error loading CSS file {file_name}: {e}")


	# Load Whisper Model
	@st.cache_resource()
	def load_model():
	model_size = "large-v3"
	return WhisperModel(model_size, device="cpu", compute_type="int8")


	st.markdown("""
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
	<link href="https://fonts.googleapis.com/css2?family=Inter+Tight:ital,wght@0,100..900;1,100..900&family=Space+Grotesk:wght@300..700&display=swap" rel="stylesheet">
	""", unsafe_allow_html=True)
	load_css("style.css")

	model = load_model()
	os.environ["GOOGLE_API_KEY"] = st.secrets['GOOGLE_API_KEY']
	st.title("AI Speech-to-Text Transcription Demo")
	st.write("This demo uses advanced AI models to transcribe spoken audio into accurate, readable text. Ideal for interviews, voice notes, or recorded meetings, it demonstrates how modern speech recognition can deliver fast and reliable transcription.")


	clear_button = st.button("Clear")

	if clear_button:
	st.session_state.clear()
	st.rerun()

	# Checkbox for using a default file
	use_default = st.checkbox("Use default audio file")

	default_audio_path = "default_audio.wav" # Ensure this file exists in your directory

	audio_data = None
	if use_default:
	with open(default_audio_path, "rb") as f:
	audio_data = io.BytesIO(f.read())
	st.audio(audio_data, format="audio/wav")
	with st.spinner("Loading default video..."):
	time.sleep(2)

	st.subheader("Transcription:")
	st.markdown('''Call Center Agent: This call is now being recorded. Parker Scarves, how may I help you?

	Charlie Johnson: I bought a scarf online for my wife, and it turns out they shipped the wrong color.

	Call Center Agent: Oh, I am so sorry, sir.

	Charlie Johnson: I got it for her birthday, which is tonight, and now I'm not 100% sure what I need to do.

	Call Center Agent: Okay, let me see if I can help you. Do you have the item number of the Parker Scarves?

	Charlie Johnson: I don't think so. It's called a New Yorker, I think.

	Call Center Agent: Excellent. Okay. What color did you want the New Yorker in?

	Charlie Johnson: Blue. The one they shipped was light blue. I wanted the darker one.

	Call Center Agent: Did you want navy blue or royal blue?

	Charlie Johnson: What's the difference there?

	Call Center Agent: The royal blue is a bit brighter.

	Charlie Johnson: That's the one I want.

	Call Center Agent: Okay. What zip code are you located in?

	Charlie Johnson: 19406.

	Call Center Agent: Okay. It appears that we do have that item in stock at Karen's Boutique at the Hunter Mall. Is that close by?

	Charlie Johnson: It is. It's right by my office.

	Call Center Agent: Okay. What is your name, sir?

	Charlie Johnson: Charlie Johnson.

	Call Center Agent: Charlie Johnson? Is that J-O-H-N-S-O-N?

	Charlie Johnson: Yes, ma'am.

	Call Center Agent: And Mr. Johnson, do you have the Parker Scarves in light blue with you now?

	Charlie Johnson: I do. They shipped it to my office. It just came in not that long ago.

	Call Center Agent: Okay. What I will do is make arrangements with Karen's Boutique for you to exchange the Parker Scarf at no additional cost. And in addition, I was able to look up your order in our system, and I'm going to send out a special gift to you to make up for the inconvenience.

	Charlie Johnson: Oh, excellent. Thank you so much.

	Call Center Agent: You're welcome. And thank you for calling Parker Scarf, and I hope your wife enjoys her birthday gift.

	Charlie Johnson: Oh, thank you. Thank you very much.

	Call Center Agent: You're very welcome. Goodbye.

	Charlie Johnson: Bye-bye.

	Call Center Agent: Bye-bye.''')

	else:
	st.subheader("Record Audio or Upload a File")

	uploaded_file = st.file_uploader("Or upload an audio file", type=["mp3", "wav", "m4a", "ogg"], key="uploaded_file")

	if uploaded_file:
	st.audio(uploaded_file, format="audio/wav")
	audio_data = uploaded_file

	if audio_data:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
	tmpfile.write(audio_data.read())
	tmpfile_path = tmpfile.name

	# Transcribe audio
	segments, info = model.transcribe(tmpfile_path, beam_size=5)

	# Display detected language
	st.write(f"Detected language: {info.language} (Confidence: {info.language_probability:.2f})")

	# Display transcription
	st.subheader("Transcription:")
	faster_whisper_transcription = " ".join([segment.text for segment in segments])

	corrected_version = ask_gemini(faster_whisper_transcription)
	st.write(corrected_version)

	import streamlit.components.v1 as components
	components.html(
	"""
	<script>
	function sendHeightWhenReady() {
	const el = window.parent.document.getElementsByClassName('stMain')[0];
	if (el) {
	const height = el.scrollHeight;
	console.log("Sending height to parent:", height);
	window.parent.parent.postMessage({ type: 'setHeight', height: height }, '*');
	} else {
	// Retry in 100ms until the element appears
	setTimeout(sendHeightWhenReady, 1000);
	}
	}

	window.onload = sendHeightWhenReady;
	window.addEventListener('resize', sendHeightWhenReady);
	setInterval(sendHeightWhenReady, 1000);
	</script>
	""",height=0
	)