Spaces:

ojas121
/

speech_emotion_project

Running

App Files Files Community

ojas121 commited on Mar 11

Commit

3867db1

verified ·

1 Parent(s): e3f2196

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -12

app.py CHANGED Viewed

@@ -10,16 +10,52 @@ import wave
 import json
 from vosk import Model, KaldiRecognizer
 from transformers import pipeline
-from huggingface_hub import snapshot_download, login
 from pydub import AudioSegment
 import noisereduce as nr
 # ✅ Auto-Download Vosk Model (Speech-to-Text)
 VOSK_MODEL = "vosk-model-small-en-us-0.15"
 if not os.path.exists(VOSK_MODEL):
-    st.write("Downloading Vosk Model...")
     subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
     subprocess.run(["unzip", "vosk.zip"])
     subprocess.run(["rm", "vosk.zip"])
@@ -30,15 +66,15 @@ model = Model(VOSK_MODEL)
 # ✅ Auto-Download Wav2Vec2 Model (Emotion Detection)
 WAV2VEC_MODEL = "facebook/wav2vec2-large-xlsr-53"
 if not os.path.exists(WAV2VEC_MODEL):
-    st.write(f"Downloading {WAV2VEC_MODEL}...")
     snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
 # Load emotion detection model
 emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
 # ✅ Streamlit UI
-st.title("🎙️ Speech Detection System using Mozilla Common Voice")
-st.write("Upload an audio file and get real-time speech-to-text, noise filtering, and emotion analysis.")
 uploaded_file = st.file_uploader("Upload an MP3/WAV file", type=["mp3", "wav"])
@@ -58,12 +94,14 @@ if uploaded_file:
     # Load audio
     y, sr = librosa.load(file_path, sr=16000)
-    # Display waveform
     fig, ax = plt.subplots(figsize=(10, 4))
     librosa.display.waveshow(y, sr=sr, ax=ax)
     st.pyplot(fig)
     # ✅ Noise Reduction
     y_denoised = nr.reduce_noise(y=y, sr=sr)
     denoised_path = file_path.replace(".wav", "_denoised.wav")
     sf.write(denoised_path, y_denoised, sr)
@@ -82,16 +120,18 @@ if uploaded_file:
                 return result["text"]
     transcription = transcribe_audio(file_path)
-    st.subheader("📝 Transcribed Text:")
-    st.write(transcription)
     # ✅ Emotion Detection
     emotion_result = emotion_model(file_path)
-    st.subheader("😊 Emotion Analysis:")
     st.write(emotion_result)
     # ✅ Play Original & Denoised Audio
     st.audio(file_path, format="audio/wav", start_time=0)
-    st.subheader("🔊 Denoised Audio:")
     st.audio(denoised_path, format="audio/wav", start_time=0)

 import json
 from vosk import Model, KaldiRecognizer
 from transformers import pipeline
+from huggingface_hub import snapshot_download
 from pydub import AudioSegment
 import noisereduce as nr
+# 🎨 Apply Custom CSS Styling
+st.markdown(
+    """
+    <style>
+        .stApp {
+            background-color: #f0f2f6;
+        }
+        .title {
+            font-size: 32px;
+            text-align: center;
+            color: #4A90E2;
+            font-weight: bold;
+        }
+        .subheader {
+            font-size: 20px;
+            font-weight: bold;
+            color: #333;
+        }
+        .stButton>button {
+            background-color: #4A90E2 !important;
+            color: white !important;
+            font-size: 18px !important;
+            padding: 10px 24px !important;
+            border-radius: 10px !important;
+            border: none !important;
+        }
+        .stAudio {
+            width: 100% !important;
+        }
+        .stMarkdown {
+            font-size: 16px;
+            color: #333;
+        }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
 # ✅ Auto-Download Vosk Model (Speech-to-Text)
 VOSK_MODEL = "vosk-model-small-en-us-0.15"
 if not os.path.exists(VOSK_MODEL):
+    st.write("📥 Downloading Vosk Model...")
     subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
     subprocess.run(["unzip", "vosk.zip"])
     subprocess.run(["rm", "vosk.zip"])
 # ✅ Auto-Download Wav2Vec2 Model (Emotion Detection)
 WAV2VEC_MODEL = "facebook/wav2vec2-large-xlsr-53"
 if not os.path.exists(WAV2VEC_MODEL):
+    st.write(f"📥 Downloading {WAV2VEC_MODEL}...")
     snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
 # Load emotion detection model
 emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
 # ✅ Streamlit UI
+st.markdown("<div class='title'>🎙️ Speech Detection System</div>", unsafe_allow_html=True)
+st.markdown("<div class='subheader'>🔍 Upload an audio file for speech-to-text, noise filtering, and emotion analysis.</div>", unsafe_allow_html=True)
 uploaded_file = st.file_uploader("Upload an MP3/WAV file", type=["mp3", "wav"])
     # Load audio
     y, sr = librosa.load(file_path, sr=16000)
+    # 🎵 Display waveform
+    st.markdown("<div class='subheader'>🎼 Audio Waveform:</div>", unsafe_allow_html=True)
     fig, ax = plt.subplots(figsize=(10, 4))
     librosa.display.waveshow(y, sr=sr, ax=ax)
     st.pyplot(fig)
     # ✅ Noise Reduction
+    st.markdown("<div class='subheader'>🔇 Applying Noise Reduction...</div>", unsafe_allow_html=True)
     y_denoised = nr.reduce_noise(y=y, sr=sr)
     denoised_path = file_path.replace(".wav", "_denoised.wav")
     sf.write(denoised_path, y_denoised, sr)
                 return result["text"]
     transcription = transcribe_audio(file_path)
+    st.markdown("<div class='subheader'>📝 Transcribed Text:</div>", unsafe_allow_html=True)
+    st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
     # ✅ Emotion Detection
+    st.markdown("<div class='subheader'>😊 Emotion Analysis:</div>", unsafe_allow_html=True)
     emotion_result = emotion_model(file_path)
     st.write(emotion_result)
     # ✅ Play Original & Denoised Audio
+    st.markdown("<div class='subheader'>🔊 Play Audio:</div>", unsafe_allow_html=True)
     st.audio(file_path, format="audio/wav", start_time=0)
+    st.markdown("<div class='subheader'>🔇 Denoised Audio:</div>", unsafe_allow_html=True)
     st.audio(denoised_path, format="audio/wav", start_time=0)