Spaces:

Surendradjh
/

Project_Shazam

Sleeping

App Files Files Community

Surendradjh commited on Mar 27, 2025

Commit

578f5d6

verified ·

1 Parent(s): b697d0d

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -66

app.py CHANGED Viewed

@@ -1,84 +1,24 @@
-# import streamlit as st
-# from transformers import pipeline
-# st.title("Project Shazam - Audio File Uploader")
-# uploaded_file = st.file_uploader("Upload any audio file", type=None)
-# if uploaded_file is not None:
-#     audio_file = uploaded_file.read()
-#     st.session_state.audio_file = audio_file
-#     st.success("Audio file uploaded and stored in the background as 'audio_file'!")
-#     st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
-# if "audio_file" not in st.session_state:
-#     st.info("Please upload an audio file to store it in the background.")
-# else:
-#     st.info("Audio file is stored in the background. You can proceed with further processing.")
-# pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
-# st.write(pipe(audio_file))
 import streamlit as st
 from transformers import pipeline
-import librosa
-import numpy as np
-from scipy import signal
-st.title("Project Shazam - Audio File Uploader with Transcription")
-# File uploader for any audio file
 uploaded_file = st.file_uploader("Upload any audio file", type=None)
-# Store the uploaded file content in audio_file variable using session state
 if uploaded_file is not None:
     audio_file = uploaded_file.read()
     st.session_state.audio_file = audio_file
     st.success("Audio file uploaded and stored in the background as 'audio_file'!")
     st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
-# Check if audio_file exists in session state
 if "audio_file" not in st.session_state:
     st.info("Please upload an audio file to store it in the background.")
 else:
-    st.info("Audio file is stored in the background. Processing for transcription...")
-    # Load the Wav2Vec2 model for automatic speech recognition
-    try:
-        # Use a smaller model to fit within Hugging Face Spaces' free tier limits
-        pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=-1)  # device=-1 forces CPU
-        st.write("Model loaded successfully!")
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        st.warning("There might be an issue with the model or dependencies. Please check the logs.")
-        pipe = None
-    if pipe:
-        try:
-            # Read the audio file from session state
-            audio_bytes = st.session_state.audio_file
-            # Load the audio using librosa
-            audio, sample_rate = librosa.load(audio_bytes, sr=None)
-            # Ensure the audio is mono (Wav2Vec2 expects mono audio)
-            if len(audio.shape) > 1:
-                audio = np.mean(audio, axis=1)
-            # Resample to 16kHz (Wav2Vec2 models expect 16kHz)
-            target_sample_rate = 16000
-            if sample_rate != target_sample_rate:
-                audio = signal.resample(audio, int(len(audio) * target_sample_rate / sample_rate))
-            # Transcribe the audio
-            transcription = pipe(audio)
-            st.success("Transcription completed!")
-            st.write("**Transcription:**", transcription["text"])
-        except Exception as e:
-            st.error(f"Error processing audio: {str(e)}")
-            st.info("Ensure the audio file is in a supported format (e.g., WAV, MP3) and is not corrupted.")

 import streamlit as st
 from transformers import pipeline
+st.title("Project Shazam - Audio File Uploader")
 uploaded_file = st.file_uploader("Upload any audio file", type=None)
 if uploaded_file is not None:
     audio_file = uploaded_file.read()
     st.session_state.audio_file = audio_file
     st.success("Audio file uploaded and stored in the background as 'audio_file'!")
     st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
 if "audio_file" not in st.session_state:
     st.info("Please upload an audio file to store it in the background.")
 else:
+    st.info("Audio file is stored in the background. You can proceed with further processing.")
+pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
+st.write(pipe(audio_file))