import os
import tempfile
import numpy as np
import librosa
import torch

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse

# CHANGED: Whisper imports
from transformers import WhisperProcessor, WhisperForConditionalGeneration

app = FastAPI(title="Whisper ASR API")

device = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_DIR = "cisckids2026/marungko-API-Whisper"

print("Loading processor and model...")

# IMPORTANT: processor must come from base Whisper
processor = WhisperProcessor.from_pretrained("openai/whisper-small")

model = WhisperForConditionalGeneration.from_pretrained(MODEL_DIR)

model.to(device)
model.eval()

print("Model loaded successfully on", device)

def load_audio_16k(path: str):
    audio, sr = librosa.load(path, sr=16000, mono=True)

    # Trim silence
    audio, _ = librosa.effects.trim(audio, top_db=20)

    # Normalize
    max_val = np.max(np.abs(audio))
    if max_val > 0:
        audio = audio / max_val

    return audio

def transcribe_array(audio: np.ndarray) -> str:

    # Whisper input features
    inputs = processor(
        audio,
        sampling_rate=16000,
        return_tensors="pt"
    )

    input_features = inputs.input_features.to(device)

    with torch.no_grad():
        predicted_ids = model.generate(input_features)

    transcription = processor.batch_decode(
        predicted_ids,
        skip_special_tokens=True
    )[0]

    return transcription.strip()

@app.get("/")
def root():
    return {
        "message": "Whisper ASR API is running",
        "device": device
    }

@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):

    if not file.filename:
        raise HTTPException(status_code=400, detail="No file uploaded.")

    suffix = os.path.splitext(file.filename)[1].lower()

    # Supported formats
    if suffix not in [".wav", ".mp3", ".m4a", ".aac", ".flac", ".ogg", ".caf"]:
        raise HTTPException(
            status_code=400,
            detail="Unsupported audio format."
        )

    temp_path = None

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
            temp_file.write(await file.read())
            temp_path = temp_file.name

        audio = load_audio_16k(temp_path)

        transcript = transcribe_array(audio)

        return JSONResponse({
            "status": "success",
            "filename": file.filename,
            "transcript": transcript
        })

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

    finally:
        if temp_path and os.path.exists(temp_path):
            os.remove(temp_path)