Spaces:

Surendradjh
/

Project_Shazam

Sleeping

App Files Files Community

Surendradjh commited on Mar 27, 2025

Commit

c711488

verified ·

1 Parent(s): 8432676

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -6

app.py CHANGED Viewed

@@ -1,24 +1,86 @@
-import streamlit as st
-from transformers import pipeline
-st.title("Project Shazam - Audio File Uploader")
 uploaded_file = st.file_uploader("Upload any audio file", type=None)
 if uploaded_file is not None:
     audio_file = uploaded_file.read()
     st.session_state.audio_file = audio_file
     st.success("Audio file uploaded and stored in the background as 'audio_file'!")
     st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
 if "audio_file" not in st.session_state:
     st.info("Please upload an audio file to store it in the background.")
 else:
-    st.info("Audio file is stored in the background. You can proceed with further processing.")
-pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
-st.write(pipe(audio_file))

+# import streamlit as st
+# from transformers import pipeline
+# st.title("Project Shazam - Audio File Uploader")
+# uploaded_file = st.file_uploader("Upload any audio file", type=None)
+# if uploaded_file is not None:
+#     audio_file = uploaded_file.read()
+#     st.session_state.audio_file = audio_file
+#     st.success("Audio file uploaded and stored in the background as 'audio_file'!")
+#     st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
+# if "audio_file" not in st.session_state:
+#     st.info("Please upload an audio file to store it in the background.")
+# else:
+#     st.info("Audio file is stored in the background. You can proceed with further processing.")
+# pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
+# st.write(pipe(audio_file))
+import streamlit as st
+from transformers import pipeline
+import soundfile as sf
+import io
+import numpy as np
+from scipy import signal
+st.title("Project Shazam - Audio File Uploader with Transcription")
+# File uploader for any audio file
 uploaded_file = st.file_uploader("Upload any audio file", type=None)
+# Store the uploaded file content in audio_file variable using session state
 if uploaded_file is not None:
     audio_file = uploaded_file.read()
     st.session_state.audio_file = audio_file
     st.success("Audio file uploaded and stored in the background as 'audio_file'!")
     st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
+# Check if audio_file exists in session state
 if "audio_file" not in st.session_state:
     st.info("Please upload an audio file to store it in the background.")
 else:
+    st.info("Audio file is stored in the background. Processing for transcription...")
+    # Load the Wav2Vec2 model for automatic speech recognition
+    try:
+        pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
+        st.write("Model loaded successfully!")
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        st.warning("The model might be too large for Hugging Face Spaces' free tier. Try a smaller model like 'facebook/wav2vec2-base-960h'.")
+        pipe = None
+    if pipe:
+        try:
+            # Read the audio file from session state
+            audio_bytes = st.session_state.audio_file
+            audio_buffer = io.BytesIO(audio_bytes)
+            # Load the audio using soundfile
+            audio, sample_rate = sf.read(audio_buffer)
+            # Ensure the audio is mono (Wav2Vec2 expects mono audio)
+            if len(audio.shape) > 1:
+                audio = np.mean(audio, axis=1)
+            # Resample to 16kHz (Wav2Vec2 models expect 16kHz)
+            target_sample_rate = 16000
+            if sample_rate != target_sample_rate:
+                audio = signal.resample(audio, int(len(audio) * target_sample_rate / sample_rate))
+            # Transcribe the audio
+            transcription = pipe(audio)
+            st.success("Transcription completed!")
+            st.write("**Transcription:**", transcription["text"])
+        except Exception as e:
+            st.error(f"Error processing audio: {str(e)}")
+            st.info("Ensure the audio file is in a supported format (e.g., WAV, MP3) and is not corrupted.")