| import os |
| from contextlib import asynccontextmanager |
| from pathlib import Path |
| from typing import Any |
|
|
| import fasttext |
| import numpy as np |
| from fastapi import FastAPI, HTTPException, Query |
| from pydantic import BaseModel, Field |
|
|
|
|
| MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/cc.ug.300.bin")) |
|
|
|
|
| class SimilarityRequest(BaseModel): |
| word1: str = Field(..., min_length=1) |
| word2: str = Field(..., min_length=1) |
|
|
|
|
| def load_fasttext_model() -> Any: |
| if not MODEL_PATH.exists(): |
| raise RuntimeError( |
| f"fastText model not found at {MODEL_PATH}. " |
| "Set FASTTEXT_MODEL_PATH or download cc.ug.300.bin during the Docker build." |
| ) |
|
|
| return fasttext.load_model(str(MODEL_PATH)) |
|
|
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| app.state.fasttext_model = load_fasttext_model() |
| yield |
|
|
|
|
| app = FastAPI( |
| title="Uyghur Word Similarity API", |
| description="Returns fastText cosine similarity multiplied by 100.", |
| version="1.0.0", |
| lifespan=lifespan, |
| ) |
|
|
|
|
| def get_model() -> Any: |
| model = getattr(app.state, "fasttext_model", None) |
| if model is None: |
| raise HTTPException(status_code=503, detail="fastText model is not loaded") |
| return model |
|
|
|
|
| def normalize_word(word: str, field_name: str) -> str: |
| normalized = word.strip() |
| if not normalized: |
| raise HTTPException(status_code=400, detail=f"{field_name} must not be empty") |
| return normalized |
|
|
|
|
| def cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float: |
| denominator = np.linalg.norm(v1) * np.linalg.norm(v2) |
| if denominator == 0: |
| raise HTTPException(status_code=422, detail="Could not compute similarity") |
| return float(np.dot(v1, v2) / denominator) |
|
|
|
|
| def similarity_score(word1: str, word2: str) -> float: |
| word1 = normalize_word(word1, "word1") |
| word2 = normalize_word(word2, "word2") |
|
|
| model = get_model() |
| v1 = model.get_word_vector(word1) |
| v2 = model.get_word_vector(word2) |
| return cosine_similarity(v1, v2) * 100 |
|
|
|
|
| @app.get("/") |
| def root(): |
| return { |
| "status": "ok", |
| |
| |
| |
| |
| |
| } |
|
|
|
|
| @app.get("/health") |
| def health(): |
| return {"status": "ok", "model_loaded": getattr(app.state, "fasttext_model", None) is not None} |
|
|
|
|
| @app.get("/similarity", response_model=float) |
| def similarity_from_query( |
| word1: str = Query(..., min_length=1), |
| word2: str = Query(..., min_length=1), |
| ): |
| return similarity_score(word1, word2) |
|
|
|
|
| @app.post("/similarity", response_model=float) |
| def similarity_from_body(payload: SimilarityRequest): |
| return similarity_score(payload.word1, payload.word2) |
|
|