Piyazon commited on
Commit ·
98b1f2a
1
Parent(s): 9545e31
roll back
Browse files- Dockerfile +2 -6
- README.md +2 -4
- app.py +2 -2
- requirements.txt +0 -1
Dockerfile
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# syntax=docker/dockerfile:1
|
| 2 |
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 3 |
# you will also find guides on how best to write your Dockerfile
|
| 4 |
|
|
@@ -7,16 +6,13 @@ FROM python:3.9
|
|
| 7 |
RUN useradd -m -u 1000 user && mkdir -p /app/models && chown -R user:user /app
|
| 8 |
USER user
|
| 9 |
ENV PATH="/home/user/.local/bin:$PATH"
|
| 10 |
-
ENV
|
| 11 |
-
ENV HF_MODEL_FILENAME="uyghur_custom_300.bin"
|
| 12 |
-
ENV FASTTEXT_MODEL_PATH="/app/models/uyghur_custom_300.bin"
|
| 13 |
|
| 14 |
WORKDIR /app
|
| 15 |
|
| 16 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 17 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 18 |
-
RUN --
|
| 19 |
-
python -c "import os; from pathlib import Path; from huggingface_hub import hf_hub_download; token = Path('/run/secrets/HF_TOKEN').read_text().strip(); print('Downloading Hugging Face model:', os.environ['HF_MODEL_REPO_ID'] + '/' + os.environ['HF_MODEL_FILENAME'], flush=True); hf_hub_download(repo_id=os.environ['HF_MODEL_REPO_ID'], filename=os.environ['HF_MODEL_FILENAME'], local_dir='/app/models', token=token)"
|
| 20 |
|
| 21 |
COPY --chown=user . /app
|
| 22 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
| 1 |
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
# you will also find guides on how best to write your Dockerfile
|
| 3 |
|
|
|
|
| 6 |
RUN useradd -m -u 1000 user && mkdir -p /app/models && chown -R user:user /app
|
| 7 |
USER user
|
| 8 |
ENV PATH="/home/user/.local/bin:$PATH"
|
| 9 |
+
ENV FASTTEXT_MODEL_PATH="/app/models/cc.ug.300.bin"
|
|
|
|
|
|
|
| 10 |
|
| 11 |
WORKDIR /app
|
| 12 |
|
| 13 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 14 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 15 |
+
RUN python -c "import gzip, os, shutil, urllib.request; url='https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.bin.gz'; gz='/app/models/cc.ug.300.bin.gz'; out='/app/models/cc.ug.300.bin'; print('Downloading fastText model:', url, flush=True); urllib.request.urlretrieve(url, gz); print('Extracting fastText model to:', out, flush=True); f_in=gzip.open(gz, 'rb'); f_out=open(out, 'wb'); shutil.copyfileobj(f_in, f_out); f_in.close(); f_out.close(); os.remove(gz)"
|
|
|
|
| 16 |
|
| 17 |
COPY --chown=user . /app
|
| 18 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -11,11 +11,9 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 11 |
|
| 12 |
## API
|
| 13 |
|
| 14 |
-
The Docker build downloads the
|
| 15 |
|
| 16 |
-
Model source: https://
|
| 17 |
-
|
| 18 |
-
Because the model repository is private, add a Space secret named `HF_TOKEN` with read access to `piyazon/uyghur_custom_300`. The Dockerfile exposes that secret only during the model download build step.
|
| 19 |
|
| 20 |
### Query request
|
| 21 |
|
|
|
|
| 11 |
|
| 12 |
## API
|
| 13 |
|
| 14 |
+
The Docker build downloads the official fastText Uyghur Common Crawl binary model, extracts it to `/app/models/cc.ug.300.bin`, and loads it once when the FastAPI app starts.
|
| 15 |
|
| 16 |
+
Model source: https://fasttext.cc/docs/en/crawl-vectors
|
|
|
|
|
|
|
| 17 |
|
| 18 |
### Query request
|
| 19 |
|
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from fastapi import FastAPI, HTTPException, Query
|
|
| 9 |
from pydantic import BaseModel, Field
|
| 10 |
|
| 11 |
|
| 12 |
-
MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/
|
| 13 |
|
| 14 |
|
| 15 |
class SimilarityRequest(BaseModel):
|
|
@@ -21,7 +21,7 @@ def load_fasttext_model() -> Any:
|
|
| 21 |
if not MODEL_PATH.exists():
|
| 22 |
raise RuntimeError(
|
| 23 |
f"fastText model not found at {MODEL_PATH}. "
|
| 24 |
-
"Set FASTTEXT_MODEL_PATH or download
|
| 25 |
)
|
| 26 |
|
| 27 |
return fasttext.load_model(str(MODEL_PATH))
|
|
|
|
| 9 |
from pydantic import BaseModel, Field
|
| 10 |
|
| 11 |
|
| 12 |
+
MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/cc.ug.300.bin"))
|
| 13 |
|
| 14 |
|
| 15 |
class SimilarityRequest(BaseModel):
|
|
|
|
| 21 |
if not MODEL_PATH.exists():
|
| 22 |
raise RuntimeError(
|
| 23 |
f"fastText model not found at {MODEL_PATH}. "
|
| 24 |
+
"Set FASTTEXT_MODEL_PATH or download cc.ug.300.bin during the Docker build."
|
| 25 |
)
|
| 26 |
|
| 27 |
return fasttext.load_model(str(MODEL_PATH))
|
requirements.txt
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
| 3 |
fasttext
|
| 4 |
-
huggingface_hub[hf_xet]
|
| 5 |
numpy
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
| 3 |
fasttext
|
|
|
|
| 4 |
numpy
|