Spaces:
Sleeping
Sleeping
Deploy Polyglot backend with quantized models
Browse files- Dockerfile +13 -4
- README.md +52 -40
- README.md.bak +40 -0
- download_code.py +79 -0
Dockerfile
CHANGED
|
@@ -20,10 +20,20 @@ RUN apt-get update && apt-get install -y \
|
|
| 20 |
COPY requirements.txt .
|
| 21 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 22 |
|
| 23 |
-
# Copy
|
| 24 |
-
COPY
|
| 25 |
COPY preload_models.py .
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Set environment variables for caching
|
| 28 |
ENV HF_HOME=/app/.cache
|
| 29 |
ENV TRANSFORMERS_CACHE=/app/.cache
|
|
@@ -41,8 +51,7 @@ RUN mkdir -p $NUMBA_CACHE_DIR && chmod -R 777 $NUMBA_CACHE_DIR
|
|
| 41 |
RUN mkdir -p /app/data/learning/users && chmod -R 777 /app/data
|
| 42 |
|
| 43 |
# Download models using HF token from environment
|
| 44 |
-
# HuggingFace Spaces automatically provides HUGGING_FACE_HUB_TOKEN
|
| 45 |
-
ARG HUGGING_FACE_HUB_TOKEN
|
| 46 |
RUN python preload_models.py $HUGGING_FACE_HUB_TOKEN || echo "Model preload skipped - will download on first use"
|
| 47 |
|
| 48 |
# Expose port 7860 (HuggingFace Spaces standard)
|
|
|
|
| 20 |
COPY requirements.txt .
|
| 21 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 22 |
|
| 23 |
+
# Copy download script and preload script
|
| 24 |
+
COPY download_code.py .
|
| 25 |
COPY preload_models.py .
|
| 26 |
|
| 27 |
+
# Download application code from private code space
|
| 28 |
+
# CODE_SPACE_ID should be set as a Space secret (e.g., "mutisya/polyglot-backend-code")
|
| 29 |
+
ARG CODE_SPACE_ID
|
| 30 |
+
ARG HUGGING_FACE_HUB_TOKEN
|
| 31 |
+
RUN if [ -n "$CODE_SPACE_ID" ] && [ -n "$HUGGING_FACE_HUB_TOKEN" ]; then \
|
| 32 |
+
python download_code.py "$CODE_SPACE_ID" "$HUGGING_FACE_HUB_TOKEN" || echo "Code download failed - using local files"; \
|
| 33 |
+
else \
|
| 34 |
+
echo "WARNING: CODE_SPACE_ID or token not provided - code must be copied locally"; \
|
| 35 |
+
fi
|
| 36 |
+
|
| 37 |
# Set environment variables for caching
|
| 38 |
ENV HF_HOME=/app/.cache
|
| 39 |
ENV TRANSFORMERS_CACHE=/app/.cache
|
|
|
|
| 51 |
RUN mkdir -p /app/data/learning/users && chmod -R 777 /app/data
|
| 52 |
|
| 53 |
# Download models using HF token from environment
|
| 54 |
+
# HuggingFace Spaces automatically provides HUGGING_FACE_HUB_TOKEN (already defined above)
|
|
|
|
| 55 |
RUN python preload_models.py $HUGGING_FACE_HUB_TOKEN || echo "Model preload skipped - will download on first use"
|
| 56 |
|
| 57 |
# Expose port 7860 (HuggingFace Spaces standard)
|
README.md
CHANGED
|
@@ -1,40 +1,52 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Polyglot Translation Backend
|
| 3 |
-
emoji: 🌍
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: green
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
license: mit
|
| 9 |
-
app_port: 7860
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
# Polyglot Translation Backend - Quantized Models
|
| 13 |
-
|
| 14 |
-
Real-time speech transcription and translation API with Socket.IO for WebSocket communication. This version uses INT8 quantized models for improved performance and reduced memory footprint.
|
| 15 |
-
|
| 16 |
-
## Features
|
| 17 |
-
|
| 18 |
-
- **Real-time Speech Recognition**: Support for English, Swahili, Kikuyu, Kamba, Kimeru, Luo, and Somali
|
| 19 |
-
- **Translation**: Multi-language translation using NLLB models
|
| 20 |
-
- **Text-to-Speech**: Generate speech in multiple languages
|
| 21 |
-
- **WebSocket Support**: Real-time communication via Socket.IO
|
| 22 |
-
- **Model Quantization**: INT8 dynamic quantization for faster inference
|
| 23 |
-
|
| 24 |
-
## API Endpoints
|
| 25 |
-
|
| 26 |
-
- `GET /health` - Health check endpoint
|
| 27 |
-
- `WebSocket /` - Socket.IO connection for real-time communication
|
| 28 |
-
|
| 29 |
-
## Environment
|
| 30 |
-
|
| 31 |
-
This Space requires
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
- **
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Polyglot Translation Backend
|
| 3 |
+
emoji: 🌍
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
app_port: 7860
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Polyglot Translation Backend - Quantized Models
|
| 13 |
+
|
| 14 |
+
Real-time speech transcription and translation API with Socket.IO for WebSocket communication. This version uses INT8 quantized models for improved performance and reduced memory footprint.
|
| 15 |
+
|
| 16 |
+
## Features
|
| 17 |
+
|
| 18 |
+
- **Real-time Speech Recognition**: Support for English, Swahili, Kikuyu, Kamba, Kimeru, Luo, and Somali
|
| 19 |
+
- **Translation**: Multi-language translation using NLLB models
|
| 20 |
+
- **Text-to-Speech**: Generate speech in multiple languages
|
| 21 |
+
- **WebSocket Support**: Real-time communication via Socket.IO
|
| 22 |
+
- **Model Quantization**: INT8 dynamic quantization for faster inference
|
| 23 |
+
|
| 24 |
+
## API Endpoints
|
| 25 |
+
|
| 26 |
+
- `GET /health` - Health check endpoint
|
| 27 |
+
- `WebSocket /` - Socket.IO connection for real-time communication
|
| 28 |
+
|
| 29 |
+
## Environment
|
| 30 |
+
|
| 31 |
+
This Space requires the following secrets to be configured:
|
| 32 |
+
|
| 33 |
+
- `HUGGING_FACE_HUB_TOKEN` - HuggingFace token for model access
|
| 34 |
+
- `CODE_SPACE_ID` - ID of the private code space (e.g., "mutisya/polyglot-backend-code")
|
| 35 |
+
|
| 36 |
+
### Code Space Architecture
|
| 37 |
+
|
| 38 |
+
This Docker Space downloads the application code from a separate private Space during build time. This allows the Docker Space to be public while keeping the source code private.
|
| 39 |
+
|
| 40 |
+
- **Public Docker Space** (this one): Contains only the Dockerfile and deployment configuration
|
| 41 |
+
- **Private Code Space**: Contains the actual application code (`app/`) and data (`data/`)
|
| 42 |
+
|
| 43 |
+
During the build process, the Dockerfile downloads the code from the private space using the HuggingFace Hub API.
|
| 44 |
+
|
| 45 |
+
## Technical Details
|
| 46 |
+
|
| 47 |
+
- **Framework**: FastAPI with Socket.IO
|
| 48 |
+
- **Models**:
|
| 49 |
+
- ASR: Whisper (English) and Wav2Vec2-BERT (African languages)
|
| 50 |
+
- Translation: NLLB-600M fine-tuned model
|
| 51 |
+
- TTS: VITS models for each language
|
| 52 |
+
- **Optimization**: INT8 dynamic quantization via PyTorch
|
README.md.bak
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Polyglot Translation Backend
|
| 3 |
+
emoji: 🌍
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
app_port: 7860
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Polyglot Translation Backend - Quantized Models
|
| 13 |
+
|
| 14 |
+
Real-time speech transcription and translation API with Socket.IO for WebSocket communication. This version uses INT8 quantized models for improved performance and reduced memory footprint.
|
| 15 |
+
|
| 16 |
+
## Features
|
| 17 |
+
|
| 18 |
+
- **Real-time Speech Recognition**: Support for English, Swahili, Kikuyu, Kamba, Kimeru, Luo, and Somali
|
| 19 |
+
- **Translation**: Multi-language translation using NLLB models
|
| 20 |
+
- **Text-to-Speech**: Generate speech in multiple languages
|
| 21 |
+
- **WebSocket Support**: Real-time communication via Socket.IO
|
| 22 |
+
- **Model Quantization**: INT8 dynamic quantization for faster inference
|
| 23 |
+
|
| 24 |
+
## API Endpoints
|
| 25 |
+
|
| 26 |
+
- `GET /health` - Health check endpoint
|
| 27 |
+
- `WebSocket /` - Socket.IO connection for real-time communication
|
| 28 |
+
|
| 29 |
+
## Environment
|
| 30 |
+
|
| 31 |
+
This Space requires a HuggingFace token for model access. The token is automatically provided by HuggingFace Spaces when configured as a secret.
|
| 32 |
+
|
| 33 |
+
## Technical Details
|
| 34 |
+
|
| 35 |
+
- **Framework**: FastAPI with Socket.IO
|
| 36 |
+
- **Models**:
|
| 37 |
+
- ASR: Whisper (English) and Wav2Vec2-BERT (African languages)
|
| 38 |
+
- Translation: NLLB-600M fine-tuned model
|
| 39 |
+
- TTS: VITS models for each language
|
| 40 |
+
- **Optimization**: INT8 dynamic quantization via PyTorch
|
download_code.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Download application code from private HuggingFace Space
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
from huggingface_hub import snapshot_download
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
def download_code(code_space_id, token):
|
| 11 |
+
"""
|
| 12 |
+
Download app and data from the private code space
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
code_space_id: Full space ID (e.g., "mutisya/polyglot-backend-code")
|
| 16 |
+
token: HuggingFace token for authentication
|
| 17 |
+
"""
|
| 18 |
+
print(f"Downloading code from: {code_space_id}")
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
# Download the entire space to a temporary directory
|
| 22 |
+
download_path = snapshot_download(
|
| 23 |
+
repo_id=code_space_id,
|
| 24 |
+
repo_type="space",
|
| 25 |
+
token=token,
|
| 26 |
+
local_dir="/tmp/code_download",
|
| 27 |
+
local_dir_use_symlinks=False
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
print(f"Code downloaded to: {download_path}")
|
| 31 |
+
|
| 32 |
+
# Move app and data to the correct locations
|
| 33 |
+
import shutil
|
| 34 |
+
|
| 35 |
+
# Move app directory
|
| 36 |
+
if Path("/tmp/code_download/app").exists():
|
| 37 |
+
if Path("/app/app").exists():
|
| 38 |
+
shutil.rmtree("/app/app")
|
| 39 |
+
shutil.move("/tmp/code_download/app", "/app/app")
|
| 40 |
+
print("OK app/ directory copied")
|
| 41 |
+
else:
|
| 42 |
+
print("WARNING: app/ directory not found in code space")
|
| 43 |
+
|
| 44 |
+
# Move data directory
|
| 45 |
+
if Path("/tmp/code_download/data").exists():
|
| 46 |
+
if Path("/app/data").exists():
|
| 47 |
+
shutil.rmtree("/app/data")
|
| 48 |
+
shutil.move("/tmp/code_download/data", "/app/data")
|
| 49 |
+
print("OK data/ directory copied")
|
| 50 |
+
else:
|
| 51 |
+
print("WARNING: data/ directory not found in code space")
|
| 52 |
+
|
| 53 |
+
# Clean up
|
| 54 |
+
if Path("/tmp/code_download").exists():
|
| 55 |
+
shutil.rmtree("/tmp/code_download")
|
| 56 |
+
|
| 57 |
+
print("OK Code download complete")
|
| 58 |
+
return True
|
| 59 |
+
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"ERROR downloading code: {e}")
|
| 62 |
+
import traceback
|
| 63 |
+
traceback.print_exc()
|
| 64 |
+
return False
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
if len(sys.argv) < 2:
|
| 68 |
+
print("Usage: python download_code.py <code_space_id> [token]")
|
| 69 |
+
sys.exit(1)
|
| 70 |
+
|
| 71 |
+
code_space_id = sys.argv[1]
|
| 72 |
+
token = sys.argv[2] if len(sys.argv) > 2 else os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 73 |
+
|
| 74 |
+
if not token:
|
| 75 |
+
print("ERROR: No HuggingFace token provided")
|
| 76 |
+
sys.exit(1)
|
| 77 |
+
|
| 78 |
+
success = download_code(code_space_id, token)
|
| 79 |
+
sys.exit(0 if success else 1)
|