Spaces:
Sleeping
Sleeping
Piyazon commited on
Commit ·
2b84626
1
Parent(s): 6d6cbe5
test initilaize
Browse files- Dockerfile +47 -7
- README.md +26 -1
- download_models.py +62 -0
- requirements.txt +3 -1
- start.sh +48 -0
Dockerfile
CHANGED
|
@@ -1,13 +1,53 @@
|
|
| 1 |
-
FROM python:3.
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
|
|
|
| 9 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 10 |
-
RUN pip install -
|
| 11 |
|
| 12 |
COPY --chown=user . /app
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 4 |
+
PYTHONUNBUFFERED=1 \
|
| 5 |
+
PIP_NO_CACHE_DIR=1 \
|
| 6 |
+
PATH="/home/user/.local/bin:$PATH" \
|
| 7 |
+
HF_HOME="/home/user/.cache/huggingface" \
|
| 8 |
+
WHISPER_CACHE_DIR="/home/user/.cache/whisper"
|
| 9 |
|
| 10 |
+
RUN apt-get update \
|
| 11 |
+
&& apt-get install -y --no-install-recommends \
|
| 12 |
+
build-essential \
|
| 13 |
+
ffmpeg \
|
| 14 |
+
git \
|
| 15 |
+
libsndfile1 \
|
| 16 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 17 |
+
|
| 18 |
+
ARG WHISPERLIVEKIT_REPO="https://github.com/Alimjoo/WhisperLiveKit-ug.git"
|
| 19 |
+
ARG WHISPERLIVEKIT_REF=""
|
| 20 |
+
|
| 21 |
+
RUN git clone --depth 1 "${WHISPERLIVEKIT_REPO}" /opt/WhisperLiveKit-ug \
|
| 22 |
+
&& if [ -n "${WHISPERLIVEKIT_REF}" ]; then \
|
| 23 |
+
cd /opt/WhisperLiveKit-ug \
|
| 24 |
+
&& git fetch --depth 1 origin "${WHISPERLIVEKIT_REF}" \
|
| 25 |
+
&& git checkout FETCH_HEAD; \
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
WORKDIR /opt/WhisperLiveKit-ug
|
| 29 |
+
RUN pip install --upgrade pip setuptools wheel \
|
| 30 |
+
&& pip install -e .
|
| 31 |
+
|
| 32 |
+
RUN useradd -m -u 1000 user \
|
| 33 |
+
&& mkdir -p /app /home/user/models "${HF_HOME}" "${WHISPER_CACHE_DIR}" \
|
| 34 |
+
&& chown -R user:user /app /home/user /opt/WhisperLiveKit-ug
|
| 35 |
|
| 36 |
+
WORKDIR /app
|
| 37 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 38 |
+
RUN pip install -r requirements.txt
|
| 39 |
|
| 40 |
COPY --chown=user . /app
|
| 41 |
+
RUN chmod +x /app/start.sh
|
| 42 |
+
|
| 43 |
+
USER user
|
| 44 |
+
|
| 45 |
+
ENV HF_MODEL_REPO="piyazon/whisper_uyghur_pt" \
|
| 46 |
+
HF_MODEL_FILENAMES="uyghur_whisper_base.pt uyghur_whisper_tiny.pt" \
|
| 47 |
+
WHISPER_MODEL_FILENAME="uyghur_whisper_base.pt" \
|
| 48 |
+
WHISPER_LANGUAGE="ug" \
|
| 49 |
+
WHISPER_BACKEND="whisper" \
|
| 50 |
+
PORT="7860"
|
| 51 |
+
|
| 52 |
+
EXPOSE 7860
|
| 53 |
+
CMD ["/app/start.sh"]
|
README.md
CHANGED
|
@@ -4,7 +4,32 @@ emoji: 🔥
|
|
| 4 |
colorFrom: pink
|
| 5 |
colorTo: gray
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
colorFrom: pink
|
| 5 |
colorTo: gray
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# ASR Streaming Server
|
| 12 |
+
|
| 13 |
+
Docker Space for running `https://github.com/Alimjoo/WhisperLiveKit-ug.git`
|
| 14 |
+
with Uyghur Whisper `.pt` models from `piyazon/whisper_uyghur_pt`.
|
| 15 |
+
|
| 16 |
+
## Space secrets
|
| 17 |
+
|
| 18 |
+
Set `HF_TOKEN` as a Space secret with read access to the private model repo.
|
| 19 |
+
`HUGGING_FACE_HUB_TOKEN` is also supported.
|
| 20 |
+
|
| 21 |
+
## Defaults
|
| 22 |
+
|
| 23 |
+
- WhisperLiveKit repo: `https://github.com/Alimjoo/WhisperLiveKit-ug.git`
|
| 24 |
+
- Hugging Face model repo: `piyazon/whisper_uyghur_pt`
|
| 25 |
+
- Downloaded model files: `uyghur_whisper_base.pt uyghur_whisper_tiny.pt`
|
| 26 |
+
- Started model: `uyghur_whisper_base.pt`
|
| 27 |
+
- Port: `7860`
|
| 28 |
+
|
| 29 |
+
## Runtime configuration
|
| 30 |
+
|
| 31 |
+
- `WHISPER_MODEL_FILENAME=uyghur_whisper_tiny.pt` starts the tiny model instead.
|
| 32 |
+
- `WHISPER_MODEL_PATH=/path/to/model.pt` skips Hugging Face download.
|
| 33 |
+
- `WHISPER_LANGUAGE=auto` can be used if the fork rejects `ug`.
|
| 34 |
+
- `WHISPERLIVEKIT_ARGS` appends extra CLI arguments, for example
|
| 35 |
+
`--backend-policy localagreement`.
|
download_models.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
DEFAULT_MODEL_REPO = "piyazon/whisper_uyghur_pt"
|
| 9 |
+
DEFAULT_MODEL_FILENAMES = "uyghur_whisper_base.pt uyghur_whisper_tiny.pt"
|
| 10 |
+
DEFAULT_SELECTED_MODEL = "uyghur_whisper_base.pt"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _split_filenames(value: str) -> list[str]:
|
| 14 |
+
return [item.strip() for item in value.replace(",", " ").split() if item.strip()]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _log(message: str) -> None:
|
| 18 |
+
print(message, file=sys.stderr, flush=True)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def main() -> int:
|
| 22 |
+
repo_id = os.getenv("HF_MODEL_REPO", DEFAULT_MODEL_REPO)
|
| 23 |
+
selected_filename = os.getenv("WHISPER_MODEL_FILENAME", DEFAULT_SELECTED_MODEL)
|
| 24 |
+
model_dir = Path(os.getenv("MODEL_DIR", "/home/user/models")).expanduser()
|
| 25 |
+
token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 26 |
+
|
| 27 |
+
filenames = _split_filenames(os.getenv("HF_MODEL_FILENAMES", DEFAULT_MODEL_FILENAMES))
|
| 28 |
+
if selected_filename not in filenames:
|
| 29 |
+
filenames.insert(0, selected_filename)
|
| 30 |
+
|
| 31 |
+
model_dir.mkdir(parents=True, exist_ok=True)
|
| 32 |
+
|
| 33 |
+
downloaded_paths: dict[str, str] = {}
|
| 34 |
+
for filename in filenames:
|
| 35 |
+
cached_path = model_dir / filename
|
| 36 |
+
if cached_path.exists() and cached_path.stat().st_size > 0:
|
| 37 |
+
_log(f"Using cached model: {cached_path}")
|
| 38 |
+
downloaded_paths[filename] = str(cached_path)
|
| 39 |
+
continue
|
| 40 |
+
|
| 41 |
+
_log(f"Downloading {repo_id}/{filename} to {model_dir}")
|
| 42 |
+
try:
|
| 43 |
+
downloaded_paths[filename] = hf_hub_download(
|
| 44 |
+
repo_id=repo_id,
|
| 45 |
+
filename=filename,
|
| 46 |
+
local_dir=str(model_dir),
|
| 47 |
+
token=token,
|
| 48 |
+
)
|
| 49 |
+
except Exception as exc:
|
| 50 |
+
_log(f"Failed to download {repo_id}/{filename}: {exc}")
|
| 51 |
+
_log(
|
| 52 |
+
"If the model repository is private, add an HF_TOKEN Space secret "
|
| 53 |
+
"with read access to the model repo."
|
| 54 |
+
)
|
| 55 |
+
return 1
|
| 56 |
+
|
| 57 |
+
print(downloaded_paths[selected_filename], flush=True)
|
| 58 |
+
return 0
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
raise SystemExit(main())
|
requirements.txt
CHANGED
|
@@ -1,2 +1,4 @@
|
|
| 1 |
fastapi
|
| 2 |
-
uvicorn[standard]
|
|
|
|
|
|
|
|
|
| 1 |
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
huggingface_hub>=0.23
|
| 4 |
+
hf_xet>=1.0
|
start.sh
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
PORT="${PORT:-7860}"
|
| 5 |
+
MODEL_PATH="${WHISPER_MODEL_PATH:-}"
|
| 6 |
+
WHISPER_LANGUAGE="${WHISPER_LANGUAGE:-ug}"
|
| 7 |
+
WHISPER_BACKEND="${WHISPER_BACKEND:-whisper}"
|
| 8 |
+
WHISPERLIVEKIT_ARGS="${WHISPERLIVEKIT_ARGS:-}"
|
| 9 |
+
|
| 10 |
+
if [ -z "${MODEL_PATH}" ]; then
|
| 11 |
+
MODEL_PATH="$(python /app/download_models.py)"
|
| 12 |
+
fi
|
| 13 |
+
|
| 14 |
+
if command -v wlk >/dev/null 2>&1; then
|
| 15 |
+
WLK_CMD="wlk"
|
| 16 |
+
elif command -v whisperlivekit-server >/dev/null 2>&1; then
|
| 17 |
+
WLK_CMD="whisperlivekit-server"
|
| 18 |
+
else
|
| 19 |
+
echo "Neither 'wlk' nor 'whisperlivekit-server' is available on PATH." >&2
|
| 20 |
+
exit 1
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
HELP_FILE="/tmp/whisperlivekit-help.txt"
|
| 24 |
+
"${WLK_CMD}" --help >"${HELP_FILE}" 2>&1 || true
|
| 25 |
+
|
| 26 |
+
ARGS=(--host 0.0.0.0 --port "${PORT}")
|
| 27 |
+
|
| 28 |
+
if grep -q -- "--model-path" "${HELP_FILE}"; then
|
| 29 |
+
ARGS+=(--model-path "${MODEL_PATH}")
|
| 30 |
+
else
|
| 31 |
+
ARGS+=(--model "${MODEL_PATH}")
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
if [ -n "${WHISPER_LANGUAGE}" ] && grep -q -- "--language" "${HELP_FILE}"; then
|
| 35 |
+
ARGS+=(--language "${WHISPER_LANGUAGE}")
|
| 36 |
+
fi
|
| 37 |
+
|
| 38 |
+
if [ -n "${WHISPER_BACKEND}" ] && grep -q -- "--backend" "${HELP_FILE}"; then
|
| 39 |
+
ARGS+=(--backend "${WHISPER_BACKEND}")
|
| 40 |
+
fi
|
| 41 |
+
|
| 42 |
+
if [ -n "${WHISPERLIVEKIT_ARGS}" ]; then
|
| 43 |
+
read -r -a EXTRA_ARGS <<<"${WHISPERLIVEKIT_ARGS}"
|
| 44 |
+
ARGS+=("${EXTRA_ARGS[@]}")
|
| 45 |
+
fi
|
| 46 |
+
|
| 47 |
+
echo "Starting WhisperLiveKit on 0.0.0.0:${PORT} with model ${MODEL_PATH}"
|
| 48 |
+
exec "${WLK_CMD}" "${ARGS[@]}"
|