import fasttext from fastapi import FastAPI, HTTPException, Depends from fastapi.security import APIKeyHeader from pydantic import BaseModel from huggingface_hub import hf_hub_download import os app = FastAPI() # API Key security – read from environment secret API_KEY = os.getenv("INTERNAL_API_KEY") if not API_KEY: raise RuntimeError("INTERNAL_API_KEY environment variable not set") API_KEY_NAME = "X-Internal-API-Key" api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False) async def verify_api_key(api_key: str = Depends(api_key_header)): if not api_key or api_key != API_KEY: raise HTTPException(status_code=403, detail="Invalid API Key") return api_key # Download the model (1.2 GB) only once when the Space starts MODEL_PATH = hf_hub_download( repo_id="facebook/fasttext-language-identification", filename="model.bin" ) model = fasttext.load_model(MODEL_PATH) class TextInput(BaseModel): text: str class DetectionResult(BaseModel): language: str # e.g., "amh", "tir", "orm", "gaz", "eng" confidence: float raw_label: str # e.g., "__label__amh_Ethi" @app.post("/detect", response_model=DetectionResult) async def detect_language(input: TextInput, _ = Depends(verify_api_key)): if not input.text or not input.text.strip(): raise HTTPException(status_code=400, detail="Empty text") predictions = model.predict(input.text, k=1) raw_label = predictions[0][0].replace('__label__', '') confidence = float(predictions[1][0]) # Extract base language code (e.g., "orm" from "orm_Latn") lang_code = raw_label.split('_')[0] if '_' in raw_label else raw_label return { "language": lang_code, "confidence": confidence, "raw_label": raw_label } @app.get("/health") async def health(): return {"status": "ok"}