Spaces:
Sleeping
Sleeping
File size: 2,987 Bytes
fb5dd49 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import logging
from contextlib import asynccontextmanager
from typing import Optional
from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from word_vectors import WordVectorAnalyzer
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s — %(message)s",
)
logger = logging.getLogger(__name__)
analyzer: Optional[WordVectorAnalyzer] = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global analyzer
logger.info("Starting up — loading Word2Vec model...")
try:
analyzer = WordVectorAnalyzer()
logger.info("Model loaded and ready.")
except Exception:
logger.exception("Failed to load model")
yield
logger.info("Shutting down.")
app = FastAPI(title="Word2Vec Galaxy", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET", "POST"],
allow_headers=["Content-Type"],
)
@app.get("/api/health")
def health():
return {"status": "ready" if analyzer is not None else "loading",
"model_loaded": analyzer is not None}
@app.get("/api/similar")
def get_similar(
word: str = Query(..., min_length=1, max_length=100),
n: int = Query(20, ge=5, le=50),
):
if analyzer is None:
raise HTTPException(503, detail="Model is still loading — please wait.")
similar = analyzer.find_similar_words(word, n)
if not similar:
raise HTTPException(404, detail=f"'{word}' not found in vocabulary.")
words, vectors = analyzer.reduce_dimensions(similar)
similarities: list[float] = []
for w in words:
if w == word:
similarities.append(1.0)
else:
try:
similarities.append(float(analyzer.model.similarity(word, w)))
except Exception:
similarities.append(0.5)
return {
"target": word,
"words": words,
"vectors": vectors.tolist(),
"similarities": similarities,
}
class AnalogyRequest(BaseModel):
word1: str # subtracted (e.g. "man")
word2: str # added (e.g. "woman")
word3: str # base (e.g. "king")
@app.post("/api/analogy")
def get_analogy(req: AnalogyRequest):
if analyzer is None:
raise HTTPException(503, detail="Model is still loading — please wait.")
result, error = analyzer.word_analogy(req.word1, req.word2, req.word3)
if error:
raise HTTPException(400, detail=error)
words_in = [req.word1, req.word2, req.word3, result]
valid_words, vectors = analyzer.reduce_dimensions(words_in)
return {
"word1": req.word1,
"word2": req.word2,
"word3": req.word3,
"result": result,
"words": valid_words,
"vectors": vectors.tolist(),
}
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|