File size: 2,987 Bytes
fb5dd49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import logging
from contextlib import asynccontextmanager
from typing import Optional

from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel

from word_vectors import WordVectorAnalyzer

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s — %(message)s",
)
logger = logging.getLogger(__name__)

analyzer: Optional[WordVectorAnalyzer] = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    global analyzer
    logger.info("Starting up — loading Word2Vec model...")
    try:
        analyzer = WordVectorAnalyzer()
        logger.info("Model loaded and ready.")
    except Exception:
        logger.exception("Failed to load model")
    yield
    logger.info("Shutting down.")


app = FastAPI(title="Word2Vec Galaxy", lifespan=lifespan)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["GET", "POST"],
    allow_headers=["Content-Type"],
)


@app.get("/api/health")
def health():
    return {"status": "ready" if analyzer is not None else "loading",
            "model_loaded": analyzer is not None}


@app.get("/api/similar")
def get_similar(
    word: str = Query(..., min_length=1, max_length=100),
    n: int = Query(20, ge=5, le=50),
):
    if analyzer is None:
        raise HTTPException(503, detail="Model is still loading — please wait.")

    similar = analyzer.find_similar_words(word, n)
    if not similar:
        raise HTTPException(404, detail=f"'{word}' not found in vocabulary.")

    words, vectors = analyzer.reduce_dimensions(similar)

    similarities: list[float] = []
    for w in words:
        if w == word:
            similarities.append(1.0)
        else:
            try:
                similarities.append(float(analyzer.model.similarity(word, w)))
            except Exception:
                similarities.append(0.5)

    return {
        "target": word,
        "words": words,
        "vectors": vectors.tolist(),
        "similarities": similarities,
    }


class AnalogyRequest(BaseModel):
    word1: str   # subtracted  (e.g. "man")
    word2: str   # added       (e.g. "woman")
    word3: str   # base        (e.g. "king")


@app.post("/api/analogy")
def get_analogy(req: AnalogyRequest):
    if analyzer is None:
        raise HTTPException(503, detail="Model is still loading — please wait.")

    result, error = analyzer.word_analogy(req.word1, req.word2, req.word3)
    if error:
        raise HTTPException(400, detail=error)

    words_in = [req.word1, req.word2, req.word3, result]
    valid_words, vectors = analyzer.reduce_dimensions(words_in)

    return {
        "word1": req.word1,
        "word2": req.word2,
        "word3": req.word3,
        "result": result,
        "words": valid_words,
        "vectors": vectors.tolist(),
    }

app.mount("/", StaticFiles(directory="static", html=True), name="static")