fasttext-langid / app.py
abyayel's picture
Update app.py
20dddc3 verified
import fasttext
from fastapi import FastAPI, HTTPException, Depends
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
import os
app = FastAPI()
# API Key security – read from environment secret
API_KEY = os.getenv("INTERNAL_API_KEY")
if not API_KEY:
raise RuntimeError("INTERNAL_API_KEY environment variable not set")
API_KEY_NAME = "X-Internal-API-Key"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
async def verify_api_key(api_key: str = Depends(api_key_header)):
if not api_key or api_key != API_KEY:
raise HTTPException(status_code=403, detail="Invalid API Key")
return api_key
# Download the model (1.2 GB) only once when the Space starts
MODEL_PATH = hf_hub_download(
repo_id="facebook/fasttext-language-identification",
filename="model.bin"
)
model = fasttext.load_model(MODEL_PATH)
class TextInput(BaseModel):
text: str
class DetectionResult(BaseModel):
language: str # e.g., "amh", "tir", "orm", "gaz", "eng"
confidence: float
raw_label: str # e.g., "__label__amh_Ethi"
@app.post("/detect", response_model=DetectionResult)
async def detect_language(input: TextInput, _ = Depends(verify_api_key)):
if not input.text or not input.text.strip():
raise HTTPException(status_code=400, detail="Empty text")
predictions = model.predict(input.text, k=1)
raw_label = predictions[0][0].replace('__label__', '')
confidence = float(predictions[1][0])
# Extract base language code (e.g., "orm" from "orm_Latn")
lang_code = raw_label.split('_')[0] if '_' in raw_label else raw_label
return {
"language": lang_code,
"confidence": confidence,
"raw_label": raw_label
}
@app.get("/health")
async def health():
return {"status": "ok"}