import os from contextlib import asynccontextmanager from pathlib import Path from typing import Any import fasttext import numpy as np from fastapi import FastAPI, HTTPException, Query from pydantic import BaseModel, Field MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/cc.ug.300.bin")) class SimilarityRequest(BaseModel): word1: str = Field(..., min_length=1) word2: str = Field(..., min_length=1) def load_fasttext_model() -> Any: if not MODEL_PATH.exists(): raise RuntimeError( f"fastText model not found at {MODEL_PATH}. " "Set FASTTEXT_MODEL_PATH or download cc.ug.300.bin during the Docker build." ) return fasttext.load_model(str(MODEL_PATH)) @asynccontextmanager async def lifespan(app: FastAPI): app.state.fasttext_model = load_fasttext_model() yield app = FastAPI( title="Uyghur Word Similarity API", description="Returns fastText cosine similarity multiplied by 100.", version="1.0.0", lifespan=lifespan, ) def get_model() -> Any: model = getattr(app.state, "fasttext_model", None) if model is None: raise HTTPException(status_code=503, detail="fastText model is not loaded") return model def normalize_word(word: str, field_name: str) -> str: normalized = word.strip() if not normalized: raise HTTPException(status_code=400, detail=f"{field_name} must not be empty") return normalized def cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float: denominator = np.linalg.norm(v1) * np.linalg.norm(v2) if denominator == 0: raise HTTPException(status_code=422, detail="Could not compute similarity") return float(np.dot(v1, v2) / denominator) def similarity_score(word1: str, word2: str) -> float: word1 = normalize_word(word1, "word1") word2 = normalize_word(word2, "word2") model = get_model() v1 = model.get_word_vector(word1) v2 = model.get_word_vector(word2) return cosine_similarity(v1, v2) * 100 @app.get("/") def root(): return { "status": "ok", # "model": str(MODEL_PATH), # "usage": { # "GET": "/similarity?word1=سىزغۇچ&word2=نان", # "POST": {"url": "/similarity", "body": {"word1": "سىزغۇچ", "word2": "نان"}}, # }, } @app.get("/health") def health(): return {"status": "ok", "model_loaded": getattr(app.state, "fasttext_model", None) is not None} @app.get("/similarity", response_model=float) def similarity_from_query( word1: str = Query(..., min_length=1), word2: str = Query(..., min_length=1), ): return similarity_score(word1, word2) @app.post("/similarity", response_model=float) def similarity_from_body(payload: SimilarityRequest): return similarity_score(payload.word1, payload.word2)