daniyal commited on
Commit
daf9786
·
1 Parent(s): f58949a

initial commit

Browse files
Files changed (2) hide show
  1. app.py +36 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import WhisperForConditionalGeneration, WhisperTokenizer, WhisperProcessor
3
+ import torchaudio
4
+ import torch
5
+ import io
6
+
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+
10
+ model_path = "Danyal-AI/whisper-small-FT_on_lahaja"
11
+ tokenizer = WhisperTokenizer.from_pretrained(model_path, language="Hindi", task="transcribe")
12
+ feature_extractor = WhisperProcessor.from_pretrained(model_path, language="Hindi", task="transcribe")
13
+ model = WhisperForConditionalGeneration.from_pretrained(model_path).to(device)
14
+
15
+ def transcribe_audio(audio_bytes):
16
+ audio_stream = io.BytesIO(audio_bytes)
17
+ speech_array, sr = torchaudio.load(audio_stream)
18
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
19
+ speech_array = resampler(speech_array)
20
+ input_features = feature_extractor(speech_array.squeeze(0), sampling_rate=16000).input_features
21
+ inputs = {"input_features": torch.tensor(input_features).to(device)}
22
+ generated_ids = model.generate(**inputs)
23
+ transcription = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
24
+ return transcription[0]
25
+
26
+ st.title("Hindi Speech Transcription")
27
+ st.write("Upload an audio file to transcribe Hindi speech.")
28
+
29
+ uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "m4a"])
30
+
31
+ if uploaded_file is not None:
32
+
33
+ audio_bytes = uploaded_file.read()
34
+ transcription = transcribe_audio(audio_bytes)
35
+ st.write("Transcription:")
36
+ st.write(transcription)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ torchaudio
3
+ torch
4
+ transformers
5
+ ffmpeg-python