Spaces:

piyazon
/

UWS_API

Sleeping

App Files Files Community

Piyazon commited on 20 days ago

Commit

2fbba2c

1 Parent(s): 2a56b48

test

Browse files

Files changed (5) hide show

.dockerignore +9 -0
Dockerfile +18 -0
README.md +28 -0
app.py +102 -0
requirements.txt +4 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.git
+__pycache__/
+*.pyc
+.pytest_cache/
+.venv/
+venv/
+models/
+*.bin
+*.bin.gz

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user && mkdir -p /app/models && chown -R user:user /app
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+ENV FASTTEXT_MODEL_PATH="/app/models/cc.ug.300.bin"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN python -c "import gzip, os, shutil, urllib.request; url='https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.bin.gz'; gz='/app/models/cc.ug.300.bin.gz'; out='/app/models/cc.ug.300.bin'; print('Downloading fastText model:', url, flush=True); urllib.request.urlretrieve(url, gz); print('Extracting fastText model to:', out, flush=True); f_in=gzip.open(gz, 'rb'); f_out=open(out, 'wb'); shutil.copyfileobj(f_in, f_out); f_in.close(); f_out.close(); os.remove(gz)"
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -8,3 +8,31 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## API
+The Docker build downloads the official fastText Uyghur Common Crawl binary model, extracts it to `/app/models/cc.ug.300.bin`, and loads it once when the FastAPI app starts.
+Model source: https://fasttext.cc/docs/en/crawl-vectors
+### Query request
+```bash
+curl --get "https://<your-space>.hf.space/similarity" \
+  --data-urlencode "word1=سىزغۇچ" \
+  --data-urlencode "word2=نان"
+```
+### JSON request
+```bash
+curl -X POST "https://<your-space>.hf.space/similarity" \
+  -H "Content-Type: application/json" \
+  -d '{"word1":"سىزغۇچ","word2":"نان"}'
+```
+Both endpoints return a JSON number, for example:
+```json
+42.123456
+```

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import Any
+import fasttext
+import numpy as np
+from fastapi import FastAPI, HTTPException, Query
+from pydantic import BaseModel, Field
+MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/cc.ug.300.bin"))
+class SimilarityRequest(BaseModel):
+    word1: str = Field(..., min_length=1)
+    word2: str = Field(..., min_length=1)
+def load_fasttext_model() -> Any:
+    if not MODEL_PATH.exists():
+        raise RuntimeError(
+            f"fastText model not found at {MODEL_PATH}. "
+            "Set FASTTEXT_MODEL_PATH or download cc.ug.300.bin during the Docker build."
+        )
+    return fasttext.load_model(str(MODEL_PATH))
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    app.state.fasttext_model = load_fasttext_model()
+    yield
+app = FastAPI(
+    title="Uyghur Word Similarity API",
+    description="Returns fastText cosine similarity multiplied by 100.",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+def get_model() -> Any:
+    model = getattr(app.state, "fasttext_model", None)
+    if model is None:
+        raise HTTPException(status_code=503, detail="fastText model is not loaded")
+    return model
+def normalize_word(word: str, field_name: str) -> str:
+    normalized = word.strip()
+    if not normalized:
+        raise HTTPException(status_code=400, detail=f"{field_name} must not be empty")
+    return normalized
+def cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
+    denominator = np.linalg.norm(v1) * np.linalg.norm(v2)
+    if denominator == 0:
+        raise HTTPException(status_code=422, detail="Could not compute similarity")
+    return float(np.dot(v1, v2) / denominator)
+def similarity_score(word1: str, word2: str) -> float:
+    word1 = normalize_word(word1, "word1")
+    word2 = normalize_word(word2, "word2")
+    model = get_model()
+    v1 = model.get_word_vector(word1)
+    v2 = model.get_word_vector(word2)
+    return cosine_similarity(v1, v2) * 100
+@app.get("/")
+def root():
+    return {
+        "status": "ok",
+        "model": str(MODEL_PATH),
+        "usage": {
+            "GET": "/similarity?word1=سىزغۇچ&word2=نان",
+            "POST": {"url": "/similarity", "body": {"word1": "سىزغۇچ", "word2": "نان"}},
+        },
+    }
+@app.get("/health")
+def health():
+    return {"status": "ok", "model_loaded": getattr(app.state, "fasttext_model", None) is not None}
+@app.get("/similarity", response_model=float)
+def similarity_from_query(
+    word1: str = Query(..., min_length=1),
+    word2: str = Query(..., min_length=1),
+):
+    return similarity_score(word1, word2)
+@app.post("/similarity", response_model=float)
+def similarity_from_body(payload: SimilarityRequest):
+    return similarity_score(payload.word1, payload.word2)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi
+uvicorn[standard]
+fasttext
+numpy