gmustafa413 commited on
Commit
9c9499a
·
verified ·
1 Parent(s): fc9f129

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -19
app.py CHANGED
@@ -2,9 +2,11 @@ import streamlit as st
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
3
  import torch
4
  import librosa
 
 
5
 
6
  # Load models
7
- @st.cache_resource # Cache the models for faster reloads
8
  def load_models():
9
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
10
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
@@ -14,27 +16,51 @@ def load_models():
14
  processor, model, summarizer = load_models()
15
 
16
  # Function to convert audio to text
17
- def audio_to_text(audio_path):
18
- speech, _ = librosa.load(audio_path, sr=16000)
19
- input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
20
- with torch.no_grad():
21
- logits = model(input_values).logits
22
- predicted_ids = torch.argmax(logits, dim=-1)
23
- transcription = processor.decode(predicted_ids[0])
24
- return transcription
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Function to summarize text
27
  def summarize_text(text):
28
- summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
29
- return summary[0]['summary_text']
 
 
 
 
30
 
31
  # Streamlit app
32
  def main():
33
  st.title("Audio Summarization App")
34
- st.write("Upload an audio file to get a summary of its content.")
35
 
36
  # File uploader
37
- audio_file = st.file_uploader("Upload Audio File", type=["wav", "mp3", "ogg"])
38
 
39
  if audio_file is not None:
40
  st.audio(audio_file, format="audio/wav")
@@ -44,13 +70,15 @@ def main():
44
  with st.spinner("Processing audio..."):
45
  # Convert audio to text
46
  text = audio_to_text(audio_file)
47
- st.subheader("Transcribed Text:")
48
- st.write(text)
 
49
 
50
- # Summarize the text
51
- summary = summarize_text(text)
52
- st.subheader("Summary:")
53
- st.write(summary)
 
54
 
55
  if __name__ == "__main__":
56
  main()
 
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
3
  import torch
4
  import librosa
5
+ import soundfile as sf
6
+ import io
7
 
8
  # Load models
9
+ @st.cache_resource
10
  def load_models():
11
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
12
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
 
16
  processor, model, summarizer = load_models()
17
 
18
  # Function to convert audio to text
19
+ def audio_to_text(audio_file):
20
+ try:
21
+ # Read the audio file from BytesIO
22
+ audio_bytes = audio_file.read()
23
+ audio_file.seek(0) # Reset the file pointer
24
+
25
+ # Use soundfile to read the audio data
26
+ with io.BytesIO(audio_bytes) as f:
27
+ data, samplerate = sf.read(f)
28
+
29
+ # Resample to 16kHz if necessary
30
+ if samplerate != 16000:
31
+ data = librosa.resample(data, orig_sr=samplerate, target_sr=16000)
32
+
33
+ # Convert to input values for the model
34
+ input_values = processor(data, return_tensors="pt", sampling_rate=16000).input_values
35
+
36
+ # Perform inference
37
+ with torch.no_grad():
38
+ logits = model(input_values).logits
39
+
40
+ # Decode the output
41
+ predicted_ids = torch.argmax(logits, dim=-1)
42
+ transcription = processor.decode(predicted_ids[0])
43
+ return transcription
44
+ except Exception as e:
45
+ st.error(f"Error processing audio: {e}")
46
+ return None
47
 
48
  # Function to summarize text
49
  def summarize_text(text):
50
+ try:
51
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
52
+ return summary[0]['summary_text']
53
+ except Exception as e:
54
+ st.error(f"Error summarizing text: {e}")
55
+ return None
56
 
57
  # Streamlit app
58
  def main():
59
  st.title("Audio Summarization App")
60
+ st.write("Upload an audio file (WAV or MP3) to get a summary of its content.")
61
 
62
  # File uploader
63
+ audio_file = st.file_uploader("Upload Audio File", type=["wav", "mp3"])
64
 
65
  if audio_file is not None:
66
  st.audio(audio_file, format="audio/wav")
 
70
  with st.spinner("Processing audio..."):
71
  # Convert audio to text
72
  text = audio_to_text(audio_file)
73
+ if text:
74
+ st.subheader("Transcribed Text:")
75
+ st.write(text)
76
 
77
+ # Summarize the text
78
+ summary = summarize_text(text)
79
+ if summary:
80
+ st.subheader("Summary:")
81
+ st.write(summary)
82
 
83
  if __name__ == "__main__":
84
  main()