UWS_API / app.py
Piyazon
roll back
98b1f2a
import os
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Any
import fasttext
import numpy as np
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel, Field
MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/cc.ug.300.bin"))
class SimilarityRequest(BaseModel):
word1: str = Field(..., min_length=1)
word2: str = Field(..., min_length=1)
def load_fasttext_model() -> Any:
if not MODEL_PATH.exists():
raise RuntimeError(
f"fastText model not found at {MODEL_PATH}. "
"Set FASTTEXT_MODEL_PATH or download cc.ug.300.bin during the Docker build."
)
return fasttext.load_model(str(MODEL_PATH))
@asynccontextmanager
async def lifespan(app: FastAPI):
app.state.fasttext_model = load_fasttext_model()
yield
app = FastAPI(
title="Uyghur Word Similarity API",
description="Returns fastText cosine similarity multiplied by 100.",
version="1.0.0",
lifespan=lifespan,
)
def get_model() -> Any:
model = getattr(app.state, "fasttext_model", None)
if model is None:
raise HTTPException(status_code=503, detail="fastText model is not loaded")
return model
def normalize_word(word: str, field_name: str) -> str:
normalized = word.strip()
if not normalized:
raise HTTPException(status_code=400, detail=f"{field_name} must not be empty")
return normalized
def cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
denominator = np.linalg.norm(v1) * np.linalg.norm(v2)
if denominator == 0:
raise HTTPException(status_code=422, detail="Could not compute similarity")
return float(np.dot(v1, v2) / denominator)
def similarity_score(word1: str, word2: str) -> float:
word1 = normalize_word(word1, "word1")
word2 = normalize_word(word2, "word2")
model = get_model()
v1 = model.get_word_vector(word1)
v2 = model.get_word_vector(word2)
return cosine_similarity(v1, v2) * 100
@app.get("/")
def root():
return {
"status": "ok",
# "model": str(MODEL_PATH),
# "usage": {
# "GET": "/similarity?word1=سىزغۇچ&word2=نان",
# "POST": {"url": "/similarity", "body": {"word1": "سىزغۇچ", "word2": "نان"}},
# },
}
@app.get("/health")
def health():
return {"status": "ok", "model_loaded": getattr(app.state, "fasttext_model", None) is not None}
@app.get("/similarity", response_model=float)
def similarity_from_query(
word1: str = Query(..., min_length=1),
word2: str = Query(..., min_length=1),
):
return similarity_score(word1, word2)
@app.post("/similarity", response_model=float)
def similarity_from_body(payload: SimilarityRequest):
return similarity_score(payload.word1, payload.word2)