Spaces:
Running
Running
| import streamlit as st | |
| import numpy as np | |
| import io | |
| import tempfile | |
| import os | |
| import time | |
| from faster_whisper import WhisperModel | |
| from streamlit_mic_recorder import mic_recorder | |
| from transcription_correction import ask_gemini | |
| from pathlib import Path | |
| def load_css(file_name): | |
| """Loads a CSS file and injects it into the Streamlit app.""" | |
| try: | |
| css_path = Path(__file__).parent / file_name | |
| with open(css_path) as f: | |
| st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
| # st.info(f"Loaded CSS: {file_name}") # Optional: uncomment for debugging | |
| except FileNotFoundError: | |
| st.error(f"CSS file not found: {file_name}. Make sure it's in the same directory as app.py.") | |
| except Exception as e: | |
| st.error(f"Error loading CSS file {file_name}: {e}") | |
| # Load Whisper Model | |
| def load_model(): | |
| model_size = "large-v3" | |
| return WhisperModel(model_size, device="cpu", compute_type="int8") | |
| st.markdown(""" | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter+Tight:ital,wght@0,100..900;1,100..900&family=Space+Grotesk:wght@300..700&display=swap" rel="stylesheet"> | |
| """, unsafe_allow_html=True) | |
| load_css("style.css") | |
| model = load_model() | |
| os.environ["GOOGLE_API_KEY"] = st.secrets['GOOGLE_API_KEY'] | |
| st.title("AI Speech-to-Text Transcription Demo") | |
| st.write("This demo uses advanced AI models to transcribe spoken audio into accurate, readable text. Ideal for interviews, voice notes, or recorded meetings, it demonstrates how modern speech recognition can deliver fast and reliable transcription.") | |
| clear_button = st.button("Clear") | |
| if clear_button: | |
| st.session_state.clear() | |
| st.rerun() | |
| # Checkbox for using a default file | |
| use_default = st.checkbox("Use default audio file") | |
| default_audio_path = "default_audio.wav" # Ensure this file exists in your directory | |
| audio_data = None | |
| if use_default: | |
| with open(default_audio_path, "rb") as f: | |
| audio_data = io.BytesIO(f.read()) | |
| st.audio(audio_data, format="audio/wav") | |
| with st.spinner("Loading default video..."): | |
| time.sleep(2) | |
| st.subheader("Transcription:") | |
| st.markdown('''**Call Center Agent**: This call is now being recorded. Parker Scarves, how may I help you? | |
| **Charlie Johnson**: I bought a scarf online for my wife, and it turns out they shipped the wrong color. | |
| **Call Center Agent**: Oh, I am so sorry, sir. | |
| **Charlie Johnson**: I got it for her birthday, which is tonight, and now I'm not 100% sure what I need to do. | |
| **Call Center Agent**: Okay, let me see if I can help you. Do you have the item number of the Parker Scarves? | |
| **Charlie Johnson**: I don't think so. It's called a New Yorker, I think. | |
| **Call Center Agent**: Excellent. Okay. What color did you want the New Yorker in? | |
| **Charlie Johnson**: Blue. The one they shipped was light blue. I wanted the darker one. | |
| **Call Center Agent**: Did you want navy blue or royal blue? | |
| **Charlie Johnson**: What's the difference there? | |
| **Call Center Agent**: The royal blue is a bit brighter. | |
| **Charlie Johnson**: That's the one I want. | |
| **Call Center Agent**: Okay. What zip code are you located in? | |
| **Charlie Johnson**: 19406. | |
| **Call Center Agent**: Okay. It appears that we do have that item in stock at Karen's Boutique at the Hunter Mall. Is that close by? | |
| **Charlie Johnson**: It is. It's right by my office. | |
| **Call Center Agent**: Okay. What is your name, sir? | |
| **Charlie Johnson**: Charlie Johnson. | |
| **Call Center Agent**: Charlie Johnson? Is that J-O-H-N-S-O-N? | |
| **Charlie Johnson**: Yes, ma'am. | |
| **Call Center Agent**: And Mr. Johnson, do you have the Parker Scarves in light blue with you now? | |
| **Charlie Johnson**: I do. They shipped it to my office. It just came in not that long ago. | |
| **Call Center Agent**: Okay. What I will do is make arrangements with Karen's Boutique for you to exchange the Parker Scarf at no additional cost. And in addition, I was able to look up your order in our system, and I'm going to send out a special gift to you to make up for the inconvenience. | |
| **Charlie Johnson**: Oh, excellent. Thank you so much. | |
| **Call Center Agent**: You're welcome. And thank you for calling Parker Scarf, and I hope your wife enjoys her birthday gift. | |
| **Charlie Johnson**: Oh, thank you. Thank you very much. | |
| **Call Center Agent**: You're very welcome. Goodbye. | |
| **Charlie Johnson**: Bye-bye. | |
| **Call Center Agent**: Bye-bye.''') | |
| else: | |
| st.subheader("Record Audio or Upload a File") | |
| uploaded_file = st.file_uploader("Or upload an audio file", type=["mp3", "wav", "m4a", "ogg"], key="uploaded_file") | |
| if uploaded_file: | |
| st.audio(uploaded_file, format="audio/wav") | |
| audio_data = uploaded_file | |
| if audio_data: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: | |
| tmpfile.write(audio_data.read()) | |
| tmpfile_path = tmpfile.name | |
| # Transcribe audio | |
| segments, info = model.transcribe(tmpfile_path, beam_size=5) | |
| # Display detected language | |
| st.write(f"Detected language: {info.language} (Confidence: {info.language_probability:.2f})") | |
| # Display transcription | |
| st.subheader("Transcription:") | |
| faster_whisper_transcription = " ".join([segment.text for segment in segments]) | |
| corrected_version = ask_gemini(faster_whisper_transcription) | |
| st.write(corrected_version) | |
| import streamlit.components.v1 as components | |
| components.html( | |
| """ | |
| <script> | |
| function sendHeightWhenReady() { | |
| const el = window.parent.document.getElementsByClassName('stMain')[0]; | |
| if (el) { | |
| const height = el.scrollHeight; | |
| console.log("Sending height to parent:", height); | |
| window.parent.parent.postMessage({ type: 'setHeight', height: height }, '*'); | |
| } else { | |
| // Retry in 100ms until the element appears | |
| setTimeout(sendHeightWhenReady, 1000); | |
| } | |
| } | |
| window.onload = sendHeightWhenReady; | |
| window.addEventListener('resize', sendHeightWhenReady); | |
| setInterval(sendHeightWhenReady, 1000); | |
| </script> | |
| """,height=0 | |
| ) | |