BissakaAI commited on
Commit
28a23d6
·
verified ·
1 Parent(s): 0ff596e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +68 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import librosa
5
+ from transformers import (
6
+ AutoProcessor,
7
+ SeamlessM4Tv2ForSpeechToText
8
+ )
9
+
10
+
11
+ ASR_MODEL_ID = "facebook/seamless-m4t-v2-large"
12
+ HF_TOKEN = os.getenv("HF_TOKEN")
13
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+
16
+ print("Loading ASR processor...")
17
+ processor = AutoProcessor.from_pretrained(
18
+ ASR_MODEL_ID,
19
+ token=HF_TOKEN
20
+ )
21
+
22
+ print("🔹 Loading ASR model...")
23
+ asr_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(
24
+ ASR_MODEL_ID,
25
+ token=HF_TOKEN
26
+ ).to(DEVICE)
27
+
28
+ asr_model.eval()
29
+ print("ASR model loaded successfully")
30
+
31
+ def transcribe_audio(audio_path):
32
+ if audio_path is None:
33
+ return "No audio provided."
34
+
35
+ # Load audio
36
+ speech, sr = librosa.load(audio_path, sr=16000)
37
+
38
+ inputs = processor(
39
+ audios=speech,
40
+ sampling_rate=16000,
41
+ return_tensors="pt"
42
+ ).to(DEVICE)
43
+
44
+ with torch.no_grad():
45
+ predicted_ids = asr_model.generate(
46
+ inputs["input_features"],
47
+ max_new_tokens=300
48
+ )
49
+
50
+ transcription = processor.batch_decode(
51
+ predicted_ids,
52
+ skip_special_tokens=True
53
+ )[0]
54
+
55
+ return transcription.strip()
56
+
57
+
58
+ demo = gr.Interface(
59
+ fn=transcribe_audio,
60
+ inputs=gr.Audio(type="filepath", label="Upload Speech"),
61
+ outputs=gr.Textbox(label="Transcription"),
62
+ title="HealthAtlas ASR Service",
63
+ description="Speech → Text using SeamlessM4T v2",
64
+ allow_flagging="never"
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ accelerate
4
+ sentencepiece
5
+ soundfile
6
+ librosa
7
+ gradio