Surendradjh commited on
Commit
578f5d6
·
verified ·
1 Parent(s): b697d0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -66
app.py CHANGED
@@ -1,84 +1,24 @@
1
- # import streamlit as st
2
-
3
- # from transformers import pipeline
4
-
5
-
6
- # st.title("Project Shazam - Audio File Uploader")
7
-
8
- # uploaded_file = st.file_uploader("Upload any audio file", type=None)
9
-
10
- # if uploaded_file is not None:
11
- # audio_file = uploaded_file.read()
12
- # st.session_state.audio_file = audio_file
13
- # st.success("Audio file uploaded and stored in the background as 'audio_file'!")
14
- # st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
15
-
16
- # if "audio_file" not in st.session_state:
17
- # st.info("Please upload an audio file to store it in the background.")
18
- # else:
19
- # st.info("Audio file is stored in the background. You can proceed with further processing.")
20
-
21
-
22
- # pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
23
-
24
- # st.write(pipe(audio_file))
25
-
26
  import streamlit as st
 
27
  from transformers import pipeline
28
- import librosa
29
- import numpy as np
30
- from scipy import signal
31
 
32
- st.title("Project Shazam - Audio File Uploader with Transcription")
33
 
34
- # File uploader for any audio file
 
35
  uploaded_file = st.file_uploader("Upload any audio file", type=None)
36
 
37
- # Store the uploaded file content in audio_file variable using session state
38
  if uploaded_file is not None:
39
  audio_file = uploaded_file.read()
40
  st.session_state.audio_file = audio_file
41
  st.success("Audio file uploaded and stored in the background as 'audio_file'!")
42
  st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
43
 
44
- # Check if audio_file exists in session state
45
  if "audio_file" not in st.session_state:
46
  st.info("Please upload an audio file to store it in the background.")
47
  else:
48
- st.info("Audio file is stored in the background. Processing for transcription...")
49
-
50
- # Load the Wav2Vec2 model for automatic speech recognition
51
- try:
52
- # Use a smaller model to fit within Hugging Face Spaces' free tier limits
53
- pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=-1) # device=-1 forces CPU
54
- st.write("Model loaded successfully!")
55
- except Exception as e:
56
- st.error(f"Error loading model: {str(e)}")
57
- st.warning("There might be an issue with the model or dependencies. Please check the logs.")
58
- pipe = None
59
-
60
- if pipe:
61
- try:
62
- # Read the audio file from session state
63
- audio_bytes = st.session_state.audio_file
64
-
65
- # Load the audio using librosa
66
- audio, sample_rate = librosa.load(audio_bytes, sr=None)
67
-
68
- # Ensure the audio is mono (Wav2Vec2 expects mono audio)
69
- if len(audio.shape) > 1:
70
- audio = np.mean(audio, axis=1)
71
 
72
- # Resample to 16kHz (Wav2Vec2 models expect 16kHz)
73
- target_sample_rate = 16000
74
- if sample_rate != target_sample_rate:
75
- audio = signal.resample(audio, int(len(audio) * target_sample_rate / sample_rate))
76
 
77
- # Transcribe the audio
78
- transcription = pipe(audio)
79
- st.success("Transcription completed!")
80
- st.write("**Transcription:**", transcription["text"])
81
 
82
- except Exception as e:
83
- st.error(f"Error processing audio: {str(e)}")
84
- st.info("Ensure the audio file is in a supported format (e.g., WAV, MP3) and is not corrupted.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+
3
  from transformers import pipeline
 
 
 
4
 
 
5
 
6
+ st.title("Project Shazam - Audio File Uploader")
7
+
8
  uploaded_file = st.file_uploader("Upload any audio file", type=None)
9
 
 
10
  if uploaded_file is not None:
11
  audio_file = uploaded_file.read()
12
  st.session_state.audio_file = audio_file
13
  st.success("Audio file uploaded and stored in the background as 'audio_file'!")
14
  st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
15
 
 
16
  if "audio_file" not in st.session_state:
17
  st.info("Please upload an audio file to store it in the background.")
18
  else:
19
+ st.info("Audio file is stored in the background. You can proceed with further processing.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
 
21
 
22
+ pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
 
 
 
23
 
24
+ st.write(pipe(audio_file))