Piyazon commited on
Commit
9545e31
·
1 Parent(s): 814693e

change to custom model

Browse files
Files changed (4) hide show
  1. Dockerfile +6 -2
  2. README.md +4 -2
  3. app.py +2 -2
  4. requirements.txt +1 -0
Dockerfile CHANGED
@@ -1,3 +1,4 @@
 
1
  # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
  # you will also find guides on how best to write your Dockerfile
3
 
@@ -6,13 +7,16 @@ FROM python:3.9
6
  RUN useradd -m -u 1000 user && mkdir -p /app/models && chown -R user:user /app
7
  USER user
8
  ENV PATH="/home/user/.local/bin:$PATH"
9
- ENV FASTTEXT_MODEL_PATH="/app/models/cc.ug.300.bin"
 
 
10
 
11
  WORKDIR /app
12
 
13
  COPY --chown=user ./requirements.txt requirements.txt
14
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
- RUN python -c "import gzip, os, shutil, urllib.request; url='https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.bin.gz'; gz='/app/models/cc.ug.300.bin.gz'; out='/app/models/cc.ug.300.bin'; print('Downloading fastText model:', url, flush=True); urllib.request.urlretrieve(url, gz); print('Extracting fastText model to:', out, flush=True); f_in=gzip.open(gz, 'rb'); f_out=open(out, 'wb'); shutil.copyfileobj(f_in, f_out); f_in.close(); f_out.close(); os.remove(gz)"
 
16
 
17
  COPY --chown=user . /app
18
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # syntax=docker/dockerfile:1
2
  # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
3
  # you will also find guides on how best to write your Dockerfile
4
 
 
7
  RUN useradd -m -u 1000 user && mkdir -p /app/models && chown -R user:user /app
8
  USER user
9
  ENV PATH="/home/user/.local/bin:$PATH"
10
+ ENV HF_MODEL_REPO_ID="piyazon/uyghur_custom_300"
11
+ ENV HF_MODEL_FILENAME="uyghur_custom_300.bin"
12
+ ENV FASTTEXT_MODEL_PATH="/app/models/uyghur_custom_300.bin"
13
 
14
  WORKDIR /app
15
 
16
  COPY --chown=user ./requirements.txt requirements.txt
17
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
18
+ RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
19
+ python -c "import os; from pathlib import Path; from huggingface_hub import hf_hub_download; token = Path('/run/secrets/HF_TOKEN').read_text().strip(); print('Downloading Hugging Face model:', os.environ['HF_MODEL_REPO_ID'] + '/' + os.environ['HF_MODEL_FILENAME'], flush=True); hf_hub_download(repo_id=os.environ['HF_MODEL_REPO_ID'], filename=os.environ['HF_MODEL_FILENAME'], local_dir='/app/models', token=token)"
20
 
21
  COPY --chown=user . /app
22
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -11,9 +11,11 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
11
 
12
  ## API
13
 
14
- The Docker build downloads the official fastText Uyghur Common Crawl binary model, extracts it to `/app/models/cc.ug.300.bin`, and loads it once when the FastAPI app starts.
15
 
16
- Model source: https://fasttext.cc/docs/en/crawl-vectors
 
 
17
 
18
  ### Query request
19
 
 
11
 
12
  ## API
13
 
14
+ The Docker build downloads the custom fastText Uyghur binary model from `piyazon/uyghur_custom_300` on Hugging Face, saves it to `/app/models/uyghur_custom_300.bin`, and loads it once when the FastAPI app starts.
15
 
16
+ Model source: https://huggingface.co/piyazon/uyghur_custom_300
17
+
18
+ Because the model repository is private, add a Space secret named `HF_TOKEN` with read access to `piyazon/uyghur_custom_300`. The Dockerfile exposes that secret only during the model download build step.
19
 
20
  ### Query request
21
 
app.py CHANGED
@@ -9,7 +9,7 @@ from fastapi import FastAPI, HTTPException, Query
9
  from pydantic import BaseModel, Field
10
 
11
 
12
- MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/cc.ug.300.bin"))
13
 
14
 
15
  class SimilarityRequest(BaseModel):
@@ -21,7 +21,7 @@ def load_fasttext_model() -> Any:
21
  if not MODEL_PATH.exists():
22
  raise RuntimeError(
23
  f"fastText model not found at {MODEL_PATH}. "
24
- "Set FASTTEXT_MODEL_PATH or download cc.ug.300.bin during the Docker build."
25
  )
26
 
27
  return fasttext.load_model(str(MODEL_PATH))
 
9
  from pydantic import BaseModel, Field
10
 
11
 
12
+ MODEL_PATH = Path(os.getenv("FASTTEXT_MODEL_PATH", "/app/models/uyghur_custom_300.bin"))
13
 
14
 
15
  class SimilarityRequest(BaseModel):
 
21
  if not MODEL_PATH.exists():
22
  raise RuntimeError(
23
  f"fastText model not found at {MODEL_PATH}. "
24
+ "Set FASTTEXT_MODEL_PATH or download uyghur_custom_300.bin during the Docker build."
25
  )
26
 
27
  return fasttext.load_model(str(MODEL_PATH))
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  fastapi
2
  uvicorn[standard]
3
  fasttext
 
4
  numpy
 
1
  fastapi
2
  uvicorn[standard]
3
  fasttext
4
+ huggingface_hub[hf_xet]
5
  numpy