Surendradjh commited on
Commit
c711488
·
verified ·
1 Parent(s): 8432676

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -6
app.py CHANGED
@@ -1,24 +1,86 @@
1
- import streamlit as st
2
 
3
- from transformers import pipeline
 
 
 
 
 
4
 
 
 
 
 
 
5
 
6
- st.title("Project Shazam - Audio File Uploader")
 
 
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  uploaded_file = st.file_uploader("Upload any audio file", type=None)
9
 
 
10
  if uploaded_file is not None:
11
  audio_file = uploaded_file.read()
12
  st.session_state.audio_file = audio_file
13
  st.success("Audio file uploaded and stored in the background as 'audio_file'!")
14
  st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
15
 
 
16
  if "audio_file" not in st.session_state:
17
  st.info("Please upload an audio file to store it in the background.")
18
  else:
19
- st.info("Audio file is stored in the background. You can proceed with further processing.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
 
21
 
22
- pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
 
 
23
 
24
- st.write(pipe(audio_file))
 
1
+ # import streamlit as st
2
 
3
+ # from transformers import pipeline
4
+
5
+
6
+ # st.title("Project Shazam - Audio File Uploader")
7
+
8
+ # uploaded_file = st.file_uploader("Upload any audio file", type=None)
9
 
10
+ # if uploaded_file is not None:
11
+ # audio_file = uploaded_file.read()
12
+ # st.session_state.audio_file = audio_file
13
+ # st.success("Audio file uploaded and stored in the background as 'audio_file'!")
14
+ # st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
15
 
16
+ # if "audio_file" not in st.session_state:
17
+ # st.info("Please upload an audio file to store it in the background.")
18
+ # else:
19
+ # st.info("Audio file is stored in the background. You can proceed with further processing.")
20
 
21
+
22
+ # pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
23
+
24
+ # st.write(pipe(audio_file))
25
+
26
+ import streamlit as st
27
+ from transformers import pipeline
28
+ import soundfile as sf
29
+ import io
30
+ import numpy as np
31
+ from scipy import signal
32
+
33
+ st.title("Project Shazam - Audio File Uploader with Transcription")
34
+
35
+ # File uploader for any audio file
36
  uploaded_file = st.file_uploader("Upload any audio file", type=None)
37
 
38
+ # Store the uploaded file content in audio_file variable using session state
39
  if uploaded_file is not None:
40
  audio_file = uploaded_file.read()
41
  st.session_state.audio_file = audio_file
42
  st.success("Audio file uploaded and stored in the background as 'audio_file'!")
43
  st.write(f"Stored audio file size: {len(st.session_state.audio_file)} bytes")
44
 
45
+ # Check if audio_file exists in session state
46
  if "audio_file" not in st.session_state:
47
  st.info("Please upload an audio file to store it in the background.")
48
  else:
49
+ st.info("Audio file is stored in the background. Processing for transcription...")
50
+
51
+ # Load the Wav2Vec2 model for automatic speech recognition
52
+ try:
53
+ pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
54
+ st.write("Model loaded successfully!")
55
+ except Exception as e:
56
+ st.error(f"Error loading model: {str(e)}")
57
+ st.warning("The model might be too large for Hugging Face Spaces' free tier. Try a smaller model like 'facebook/wav2vec2-base-960h'.")
58
+ pipe = None
59
+
60
+ if pipe:
61
+ try:
62
+ # Read the audio file from session state
63
+ audio_bytes = st.session_state.audio_file
64
+ audio_buffer = io.BytesIO(audio_bytes)
65
+
66
+ # Load the audio using soundfile
67
+ audio, sample_rate = sf.read(audio_buffer)
68
+
69
+ # Ensure the audio is mono (Wav2Vec2 expects mono audio)
70
+ if len(audio.shape) > 1:
71
+ audio = np.mean(audio, axis=1)
72
+
73
+ # Resample to 16kHz (Wav2Vec2 models expect 16kHz)
74
+ target_sample_rate = 16000
75
+ if sample_rate != target_sample_rate:
76
+ audio = signal.resample(audio, int(len(audio) * target_sample_rate / sample_rate))
77
 
78
+ # Transcribe the audio
79
+ transcription = pipe(audio)
80
+ st.success("Transcription completed!")
81
+ st.write("**Transcription:**", transcription["text"])
82
 
83
+ except Exception as e:
84
+ st.error(f"Error processing audio: {str(e)}")
85
+ st.info("Ensure the audio file is in a supported format (e.g., WAV, MP3) and is not corrupted.")
86