Piyazon commited on
Commit
2b84626
·
1 Parent(s): 6d6cbe5

test initilaize

Browse files
Files changed (5) hide show
  1. Dockerfile +47 -7
  2. README.md +26 -1
  3. download_models.py +62 -0
  4. requirements.txt +3 -1
  5. start.sh +48 -0
Dockerfile CHANGED
@@ -1,13 +1,53 @@
1
- FROM python:3.12
2
 
3
- RUN useradd -m -u 1000 user
4
- USER user
5
- ENV PATH="/home/user/.local/bin:$PATH"
 
 
 
6
 
7
- WORKDIR /app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
9
  COPY --chown=user ./requirements.txt requirements.txt
10
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
 
12
  COPY --chown=user . /app
13
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
 
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PIP_NO_CACHE_DIR=1 \
6
+ PATH="/home/user/.local/bin:$PATH" \
7
+ HF_HOME="/home/user/.cache/huggingface" \
8
+ WHISPER_CACHE_DIR="/home/user/.cache/whisper"
9
 
10
+ RUN apt-get update \
11
+ && apt-get install -y --no-install-recommends \
12
+ build-essential \
13
+ ffmpeg \
14
+ git \
15
+ libsndfile1 \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ ARG WHISPERLIVEKIT_REPO="https://github.com/Alimjoo/WhisperLiveKit-ug.git"
19
+ ARG WHISPERLIVEKIT_REF=""
20
+
21
+ RUN git clone --depth 1 "${WHISPERLIVEKIT_REPO}" /opt/WhisperLiveKit-ug \
22
+ && if [ -n "${WHISPERLIVEKIT_REF}" ]; then \
23
+ cd /opt/WhisperLiveKit-ug \
24
+ && git fetch --depth 1 origin "${WHISPERLIVEKIT_REF}" \
25
+ && git checkout FETCH_HEAD; \
26
+ fi
27
+
28
+ WORKDIR /opt/WhisperLiveKit-ug
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -e .
31
+
32
+ RUN useradd -m -u 1000 user \
33
+ && mkdir -p /app /home/user/models "${HF_HOME}" "${WHISPER_CACHE_DIR}" \
34
+ && chown -R user:user /app /home/user /opt/WhisperLiveKit-ug
35
 
36
+ WORKDIR /app
37
  COPY --chown=user ./requirements.txt requirements.txt
38
+ RUN pip install -r requirements.txt
39
 
40
  COPY --chown=user . /app
41
+ RUN chmod +x /app/start.sh
42
+
43
+ USER user
44
+
45
+ ENV HF_MODEL_REPO="piyazon/whisper_uyghur_pt" \
46
+ HF_MODEL_FILENAMES="uyghur_whisper_base.pt uyghur_whisper_tiny.pt" \
47
+ WHISPER_MODEL_FILENAME="uyghur_whisper_base.pt" \
48
+ WHISPER_LANGUAGE="ug" \
49
+ WHISPER_BACKEND="whisper" \
50
+ PORT="7860"
51
+
52
+ EXPOSE 7860
53
+ CMD ["/app/start.sh"]
README.md CHANGED
@@ -4,7 +4,32 @@ emoji: 🔥
4
  colorFrom: pink
5
  colorTo: gray
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  colorFrom: pink
5
  colorTo: gray
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ # ASR Streaming Server
12
+
13
+ Docker Space for running `https://github.com/Alimjoo/WhisperLiveKit-ug.git`
14
+ with Uyghur Whisper `.pt` models from `piyazon/whisper_uyghur_pt`.
15
+
16
+ ## Space secrets
17
+
18
+ Set `HF_TOKEN` as a Space secret with read access to the private model repo.
19
+ `HUGGING_FACE_HUB_TOKEN` is also supported.
20
+
21
+ ## Defaults
22
+
23
+ - WhisperLiveKit repo: `https://github.com/Alimjoo/WhisperLiveKit-ug.git`
24
+ - Hugging Face model repo: `piyazon/whisper_uyghur_pt`
25
+ - Downloaded model files: `uyghur_whisper_base.pt uyghur_whisper_tiny.pt`
26
+ - Started model: `uyghur_whisper_base.pt`
27
+ - Port: `7860`
28
+
29
+ ## Runtime configuration
30
+
31
+ - `WHISPER_MODEL_FILENAME=uyghur_whisper_tiny.pt` starts the tiny model instead.
32
+ - `WHISPER_MODEL_PATH=/path/to/model.pt` skips Hugging Face download.
33
+ - `WHISPER_LANGUAGE=auto` can be used if the fork rejects `ug`.
34
+ - `WHISPERLIVEKIT_ARGS` appends extra CLI arguments, for example
35
+ `--backend-policy localagreement`.
download_models.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from huggingface_hub import hf_hub_download
6
+
7
+
8
+ DEFAULT_MODEL_REPO = "piyazon/whisper_uyghur_pt"
9
+ DEFAULT_MODEL_FILENAMES = "uyghur_whisper_base.pt uyghur_whisper_tiny.pt"
10
+ DEFAULT_SELECTED_MODEL = "uyghur_whisper_base.pt"
11
+
12
+
13
+ def _split_filenames(value: str) -> list[str]:
14
+ return [item.strip() for item in value.replace(",", " ").split() if item.strip()]
15
+
16
+
17
+ def _log(message: str) -> None:
18
+ print(message, file=sys.stderr, flush=True)
19
+
20
+
21
+ def main() -> int:
22
+ repo_id = os.getenv("HF_MODEL_REPO", DEFAULT_MODEL_REPO)
23
+ selected_filename = os.getenv("WHISPER_MODEL_FILENAME", DEFAULT_SELECTED_MODEL)
24
+ model_dir = Path(os.getenv("MODEL_DIR", "/home/user/models")).expanduser()
25
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
26
+
27
+ filenames = _split_filenames(os.getenv("HF_MODEL_FILENAMES", DEFAULT_MODEL_FILENAMES))
28
+ if selected_filename not in filenames:
29
+ filenames.insert(0, selected_filename)
30
+
31
+ model_dir.mkdir(parents=True, exist_ok=True)
32
+
33
+ downloaded_paths: dict[str, str] = {}
34
+ for filename in filenames:
35
+ cached_path = model_dir / filename
36
+ if cached_path.exists() and cached_path.stat().st_size > 0:
37
+ _log(f"Using cached model: {cached_path}")
38
+ downloaded_paths[filename] = str(cached_path)
39
+ continue
40
+
41
+ _log(f"Downloading {repo_id}/{filename} to {model_dir}")
42
+ try:
43
+ downloaded_paths[filename] = hf_hub_download(
44
+ repo_id=repo_id,
45
+ filename=filename,
46
+ local_dir=str(model_dir),
47
+ token=token,
48
+ )
49
+ except Exception as exc:
50
+ _log(f"Failed to download {repo_id}/{filename}: {exc}")
51
+ _log(
52
+ "If the model repository is private, add an HF_TOKEN Space secret "
53
+ "with read access to the model repo."
54
+ )
55
+ return 1
56
+
57
+ print(downloaded_paths[selected_filename], flush=True)
58
+ return 0
59
+
60
+
61
+ if __name__ == "__main__":
62
+ raise SystemExit(main())
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  fastapi
2
- uvicorn[standard]
 
 
 
1
  fastapi
2
+ uvicorn[standard]
3
+ huggingface_hub>=0.23
4
+ hf_xet>=1.0
start.sh ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ PORT="${PORT:-7860}"
5
+ MODEL_PATH="${WHISPER_MODEL_PATH:-}"
6
+ WHISPER_LANGUAGE="${WHISPER_LANGUAGE:-ug}"
7
+ WHISPER_BACKEND="${WHISPER_BACKEND:-whisper}"
8
+ WHISPERLIVEKIT_ARGS="${WHISPERLIVEKIT_ARGS:-}"
9
+
10
+ if [ -z "${MODEL_PATH}" ]; then
11
+ MODEL_PATH="$(python /app/download_models.py)"
12
+ fi
13
+
14
+ if command -v wlk >/dev/null 2>&1; then
15
+ WLK_CMD="wlk"
16
+ elif command -v whisperlivekit-server >/dev/null 2>&1; then
17
+ WLK_CMD="whisperlivekit-server"
18
+ else
19
+ echo "Neither 'wlk' nor 'whisperlivekit-server' is available on PATH." >&2
20
+ exit 1
21
+ fi
22
+
23
+ HELP_FILE="/tmp/whisperlivekit-help.txt"
24
+ "${WLK_CMD}" --help >"${HELP_FILE}" 2>&1 || true
25
+
26
+ ARGS=(--host 0.0.0.0 --port "${PORT}")
27
+
28
+ if grep -q -- "--model-path" "${HELP_FILE}"; then
29
+ ARGS+=(--model-path "${MODEL_PATH}")
30
+ else
31
+ ARGS+=(--model "${MODEL_PATH}")
32
+ fi
33
+
34
+ if [ -n "${WHISPER_LANGUAGE}" ] && grep -q -- "--language" "${HELP_FILE}"; then
35
+ ARGS+=(--language "${WHISPER_LANGUAGE}")
36
+ fi
37
+
38
+ if [ -n "${WHISPER_BACKEND}" ] && grep -q -- "--backend" "${HELP_FILE}"; then
39
+ ARGS+=(--backend "${WHISPER_BACKEND}")
40
+ fi
41
+
42
+ if [ -n "${WHISPERLIVEKIT_ARGS}" ]; then
43
+ read -r -a EXTRA_ARGS <<<"${WHISPERLIVEKIT_ARGS}"
44
+ ARGS+=("${EXTRA_ARGS[@]}")
45
+ fi
46
+
47
+ echo "Starting WhisperLiveKit on 0.0.0.0:${PORT} with model ${MODEL_PATH}"
48
+ exec "${WLK_CMD}" "${ARGS[@]}"