| import os |
| import tempfile |
| import numpy as np |
| import librosa |
| import torch |
|
|
| from fastapi import FastAPI, UploadFile, File, HTTPException |
| from fastapi.responses import JSONResponse |
|
|
| |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
|
| app = FastAPI(title="Whisper ASR API") |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| MODEL_DIR = "cisckids2026/marungko-API-Whisper" |
|
|
| print("Loading processor and model...") |
|
|
| |
| processor = WhisperProcessor.from_pretrained("openai/whisper-small") |
|
|
| model = WhisperForConditionalGeneration.from_pretrained(MODEL_DIR) |
|
|
| model.to(device) |
| model.eval() |
|
|
| print("Model loaded successfully on", device) |
|
|
| def load_audio_16k(path: str): |
| audio, sr = librosa.load(path, sr=16000, mono=True) |
|
|
| |
| audio, _ = librosa.effects.trim(audio, top_db=20) |
|
|
| |
| max_val = np.max(np.abs(audio)) |
| if max_val > 0: |
| audio = audio / max_val |
|
|
| return audio |
|
|
| def transcribe_array(audio: np.ndarray) -> str: |
|
|
| |
| inputs = processor( |
| audio, |
| sampling_rate=16000, |
| return_tensors="pt" |
| ) |
|
|
| input_features = inputs.input_features.to(device) |
|
|
| with torch.no_grad(): |
| predicted_ids = model.generate(input_features) |
|
|
| transcription = processor.batch_decode( |
| predicted_ids, |
| skip_special_tokens=True |
| )[0] |
|
|
| return transcription.strip() |
|
|
| @app.get("/") |
| def root(): |
| return { |
| "message": "Whisper ASR API is running", |
| "device": device |
| } |
|
|
| @app.post("/transcribe") |
| async def transcribe(file: UploadFile = File(...)): |
|
|
| if not file.filename: |
| raise HTTPException(status_code=400, detail="No file uploaded.") |
|
|
| suffix = os.path.splitext(file.filename)[1].lower() |
|
|
| |
| if suffix not in [".wav", ".mp3", ".m4a", ".aac", ".flac", ".ogg", ".caf"]: |
| raise HTTPException( |
| status_code=400, |
| detail="Unsupported audio format." |
| ) |
|
|
| temp_path = None |
|
|
| try: |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: |
| temp_file.write(await file.read()) |
| temp_path = temp_file.name |
|
|
| audio = load_audio_16k(temp_path) |
|
|
| transcript = transcribe_array(audio) |
|
|
| return JSONResponse({ |
| "status": "success", |
| "filename": file.filename, |
| "transcript": transcript |
| }) |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| finally: |
| if temp_path and os.path.exists(temp_path): |
| os.remove(temp_path) |