Somalitts commited on
Commit
172cd83
·
verified ·
1 Parent(s): 7ed7860

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from fastapi.responses import JSONResponse
3
+ import torchaudio
4
+ import torch
5
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
6
+ import io
7
+
8
+ # Load model and processor
9
+ processor = Wav2Vec2Processor.from_pretrained("Mustafaa4a/ASR-Somali")
10
+ model = Wav2Vec2ForCTC.from_pretrained("Mustafaa4a/ASR-Somali")
11
+ model.eval()
12
+
13
+ # Initialize FastAPI
14
+ app = FastAPI(
15
+ title="Somali Speech-to-Text API",
16
+ description="Upload a Somali audio file (.wav) and receive text transcription using ASR model.",
17
+ version="1.0",
18
+ )
19
+
20
+ @app.post("/transcribe")
21
+ async def transcribe(audio: UploadFile = File(...)):
22
+ # Read audio bytes
23
+ audio_bytes = await audio.read()
24
+ waveform, sample_rate = torchaudio.load(io.BytesIO(audio_bytes))
25
+
26
+ # Ensure 16kHz sample rate
27
+ if sample_rate != 16000:
28
+ waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
29
+
30
+ # Process input
31
+ inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
32
+ with torch.no_grad():
33
+ logits = model(**inputs).logits
34
+
35
+ predicted_ids = torch.argmax(logits, dim=-1)
36
+ transcription = processor.decode(predicted_ids[0])
37
+ return JSONResponse(content={"transcription": transcription})