Spaces:

Nick-2x
/

Audio-detection

Sleeping

File size: 1,360 Bytes
from fastapi import FastAPI, UploadFile, File
from transformers import pipeline
import torch
import librosa
import io

app = FastAPI()

# Using the pipeline for simplicity and speed
MODEL_ID = "MelodyMachine/Deepfake-audio-detection-V2"
pipe = pipeline("audio-classification", model=MODEL_ID)

@app.get("/")
async def root():
    return {"status": "Audio Deepfake Detector is running"}

@app.post("/predict")
async def predict_audio(file: UploadFile = File(...)):
    # 1. Read the uploaded file into memory
    audio_bytes = await file.read()
    
    # 2. Load audio with librosa (Resample to 16kHz which most models expect)
    audio, sr = librosa.load(io.BytesIO(audio_bytes), sr=16000)
    
    # 3. Run Prediction
    # pipeline handles the feature extraction automatically
    results = pipe(audio)
    
    # 4. Process Results
    # Usually returns [{'label': 'real', 'score': 0.9}, {'label': 'fake', 'score': 0.1}]
    top_prediction = results[0]
    
    return {
        "filename": file.filename,
        "prediction": top_prediction["label"],
        "confidence": round(top_prediction["score"], 4),
        "all_scores": {res["label"]: round(res["score"], 4) for res in results},
        "is_deepfake": "fake" in top_prediction["label"].lower()
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)