Spaces:

BissakaAI
/

spaceb

Sleeping

BissakaAI commited on 17 days ago

Commit

ba7ca0f

verified ·

1 Parent(s): 624a6c7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,26 +8,26 @@ ASR_MODEL_ID = "facebook/seamless-m4t-v2-large"
 HF_TOKEN = os.getenv("HF_TOKEN")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-print("🔹 Loading processor...")
 processor = AutoProcessor.from_pretrained(
     ASR_MODEL_ID,
     token=HF_TOKEN
 )
-print("🔹 Loading ASR model...")
 asr_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(
     ASR_MODEL_ID,
     token=HF_TOKEN
 ).to(DEVICE)
 asr_model.eval()
-print("✅ ASR model loaded")
-def transcribe_audio(audio_path):
-    if audio_path is None:
         return "No audio provided."
-    speech, sr = librosa.load(audio_path, sr=16000)
     inputs = processor(
         audios=speech,
@@ -56,10 +56,9 @@ def transcribe_audio(audio_path):
 demo = gr.Interface(
     fn=transcribe_audio,
-    inputs=gr.Audio(type="filepath", label="Upload Speech"),
     outputs=gr.Textbox(label="Transcription"),
     title="HealthAtlas ASR Service",
-    description="Speech → Text (SeamlessM4T v2)"
 )
 if __name__ == "__main__":

 HF_TOKEN = os.getenv("HF_TOKEN")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 processor = AutoProcessor.from_pretrained(
     ASR_MODEL_ID,
     token=HF_TOKEN
 )
 asr_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(
     ASR_MODEL_ID,
     token=HF_TOKEN
 ).to(DEVICE)
 asr_model.eval()
+def transcribe_audio(audio):
+    if audio is None:
         return "No audio provided."
+    speech, sr = audio
+    if sr != 16000:
+        speech = librosa.resample(speech, sr, 16000)
     inputs = processor(
         audios=speech,
 demo = gr.Interface(
     fn=transcribe_audio,
+    inputs=gr.Audio(type="numpy", label="Upload Speech"),
     outputs=gr.Textbox(label="Transcription"),
     title="HealthAtlas ASR Service",
 )
 if __name__ == "__main__":