Spaces:

MaenMN
/

STT-MODEL

Sleeping

File size: 2,111 Bytes

aecbecf
 
 
 
 
 
a823bd6
dcda167
aecbecf
 
 
 
 
 
 
 
 
 
dcda167
10b630e
dcda167
 
ab3e171
a823bd6
dcda167
 
 
 
a823bd6
dcda167
88486f8
aecbecf
14ef29c
88486f8
dcda167
 
 
aecbecf
dcda167
aecbecf
dcda167
 
 
 
 
 
 
 
 
 
14ef29c
dcda167
 
 
 
 
 
 
 
 
 
aecbecf
 
dcda167

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from faster_whisper import WhisperModel
import anyio
import os
import tempfile
import shutil
import asyncio

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# Use the 'Base' model for maximum speed on Free Tier CPU
MODEL_ID = "Systran/faster-whisper-small"#Systran/faster-whisper-base
inference_lock = asyncio.Lock()
user_sessions = {}

try:
    print(f"Loading {MODEL_ID}...")
    # int8 quantization keeps it tiny and fast
    model = WhisperModel(MODEL_ID, device="cpu", compute_type="int8", download_root="./model_cache")
    print("Base Model Loaded!")
except Exception as e:
    print(f"Error: {e}")

@app.post("/whisper")
async def transcribe_audio(audio: UploadFile = File(...), session_id: str = "default",lang: str = None):
    async with inference_lock:
        if session_id not in user_sessions:
            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".webm", dir="/tmp")
            user_sessions[session_id] = temp_file.name

        temp_path = user_sessions[session_id]
        
        try:
            with open(temp_path, "ab") as buffer:
                shutil.copyfileobj(audio.file, buffer)

            # Performance settings for the Base model
            segments, info = await anyio.to_thread.run_sync(
                lambda: model.transcribe(
                    temp_path, 
                    beam_size=1, 
                    vad_filter=True,
                    language=lang,
                    # Helping the small model with language context
                    initial_prompt="English and Arabic conversation. مرحبا بكم"
                )
            )
            
            full_text = " ".join([s.text for s in segments]).strip()
            return {"text": full_text, "language": info.language}

        except Exception as e:
            return {"text": "", "error": str(e)}

@app.get("/")
def health(): return {"status": "base-model-active"}