Spaces:
Running
Running
Commit ·
781f7b0
1
Parent(s): 0e5ada8
Add embeddings API, caching, and Docker revamp
Browse filesImplement embeddings endpoint (router, service, schemas) using
sentence-transformers and configurable embedding model. Add a TTL cache
and concurrent alternative-language fallback to the subtitles service.
Introduce rate limiting (slowapi), timing-safe API key verification and
updated API key middleware. Replace Dockerfile with a multi-stage
builder/runtime image, set model cache dirs, bump package version to
1.0.0, and update README, tests, .env.example, and .gitignore (ignore
models/)
- .env.example +19 -5
- .gitignore +1 -0
- Dockerfile +43 -47
- README.md +84 -329
- app/__init__.py +3 -1
- app/apis/__init__.py +6 -1
- app/apis/embeddings/__init__.py +6 -0
- app/apis/embeddings/router.py +51 -0
- app/apis/embeddings/schemas.py +44 -0
- app/apis/embeddings/service.py +70 -0
- app/apis/subtitles/__init__.py +7 -1
- app/apis/subtitles/router.py +20 -60
- app/apis/subtitles/schemas.py +9 -23
- app/apis/subtitles/service.py +104 -133
- app/core/__init__.py +26 -1
- app/core/config.py +40 -31
- app/core/security.py +13 -26
- app/main.py +19 -22
- app/middleware/__init__.py +6 -1
- app/middleware/api_key_auth.py +35 -51
- app/middleware/rate_limit.py +27 -0
- poetry.lock +0 -0
- pyproject.toml +5 -1
- scripts/run_dev.bat +0 -38
- scripts/run_dev.sh +0 -42
- scripts/run_docker.bat +0 -20
- scripts/run_docker.sh +0 -18
- tests/conftest.py +4 -8
- tests/test_security.py +14 -26
- tests/test_subtitles.py +53 -107
.env.example
CHANGED
|
@@ -1,18 +1,32 @@
|
|
| 1 |
-
# Environment variables
|
| 2 |
# Copy this file to .env and update the values
|
| 3 |
|
| 4 |
-
# API Security - Comma-separated list of valid API keys
|
| 5 |
-
API_KEYS=your-secret-key-1,your-secret-key-2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Logging configuration
|
| 8 |
LOG_LEVEL=INFO
|
| 9 |
|
| 10 |
# yt-dlp configuration
|
| 11 |
-
YT_DLP_BINARY=python -m yt_dlp
|
| 12 |
YT_DLP_TIMEOUT_LIST=30
|
| 13 |
YT_DLP_TIMEOUT_DOWNLOAD=60
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Server configuration
|
| 16 |
HOST=0.0.0.0
|
| 17 |
PORT=8000
|
| 18 |
-
RELOAD=true
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment variables for the multi-utility server
|
| 2 |
# Copy this file to .env and update the values
|
| 3 |
|
| 4 |
+
# API Security - Comma-separated list of valid API keys (REQUIRED)
|
| 5 |
+
API_KEYS=your-secret-key-1,your-secret-key-2
|
| 6 |
+
|
| 7 |
+
# CORS Configuration - Comma-separated list of allowed origins (optional, defaults to *)
|
| 8 |
+
CORS_ORIGINS=http://localhost:3000,https://yourdomain.com
|
| 9 |
+
|
| 10 |
+
# Rate Limiting
|
| 11 |
+
RATE_LIMIT_REQUESTS=100
|
| 12 |
+
RATE_LIMIT_WINDOW=60
|
| 13 |
|
| 14 |
# Logging configuration
|
| 15 |
LOG_LEVEL=INFO
|
| 16 |
|
| 17 |
# yt-dlp configuration
|
|
|
|
| 18 |
YT_DLP_TIMEOUT_LIST=30
|
| 19 |
YT_DLP_TIMEOUT_DOWNLOAD=60
|
| 20 |
|
| 21 |
+
# Embedding model configuration
|
| 22 |
+
EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
|
| 23 |
+
# Models cache directory (set by Docker, optional for local dev)
|
| 24 |
+
# SENTENCE_TRANSFORMERS_HOME=models
|
| 25 |
+
|
| 26 |
# Server configuration
|
| 27 |
HOST=0.0.0.0
|
| 28 |
PORT=8000
|
| 29 |
+
RELOAD=true
|
| 30 |
+
|
| 31 |
+
# Container-specific (set automatically in Dockerfile)
|
| 32 |
+
# DISABLE_FILE_LOGGING=true
|
.gitignore
CHANGED
|
@@ -124,6 +124,7 @@ dmypy.json
|
|
| 124 |
|
| 125 |
# Project specific
|
| 126 |
logs/
|
|
|
|
| 127 |
*.log
|
| 128 |
.DS_Store
|
| 129 |
Thumbs.db
|
|
|
|
| 124 |
|
| 125 |
# Project specific
|
| 126 |
logs/
|
| 127 |
+
models/
|
| 128 |
*.log
|
| 129 |
.DS_Store
|
| 130 |
Thumbs.db
|
Dockerfile
CHANGED
|
@@ -1,62 +1,58 @@
|
|
| 1 |
-
#
|
| 2 |
-
FROM python:3.11-slim
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
ENV PYTHONUNBUFFERED=1 \
|
| 6 |
-
PYTHONDONTWRITEBYTECODE=1 \
|
| 7 |
-
POETRY_VERSION=1.7.1 \
|
| 8 |
-
POETRY_HOME="/opt/poetry" \
|
| 9 |
-
POETRY_VIRTUALENVS_IN_PROJECT=true \
|
| 10 |
-
POETRY_NO_INTERACTION=1
|
| 11 |
-
|
| 12 |
-
# Add Poetry to PATH
|
| 13 |
-
ENV PATH="$POETRY_HOME/bin:$PATH"
|
| 14 |
-
|
| 15 |
-
# Install system dependencies including curl for health checks
|
| 16 |
-
RUN apt-get update \
|
| 17 |
-
&& apt-get install -y --no-install-recommends \
|
| 18 |
-
curl \
|
| 19 |
-
build-essential \
|
| 20 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 21 |
|
| 22 |
# Install Poetry
|
| 23 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
# Set work directory
|
| 26 |
WORKDIR /app
|
| 27 |
|
| 28 |
-
# Copy
|
| 29 |
-
COPY
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
RUN if [ ! -f poetry.lock ]; then \
|
| 34 |
-
echo "poetry.lock not found, generating it..." && \
|
| 35 |
-
poetry lock; \
|
| 36 |
-
fi
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
# Disable file logging
|
| 52 |
ENV DISABLE_FILE_LOGGING=true
|
| 53 |
|
| 54 |
-
|
| 55 |
-
EXPOSE 8000
|
| 56 |
|
| 57 |
-
# Health check
|
| 58 |
-
|
| 59 |
-
|
| 60 |
|
| 61 |
-
|
| 62 |
-
CMD ["poetry", "run", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
| 1 |
+
# Stage 1: Builder - Install dependencies
|
| 2 |
+
FROM python:3.11-slim AS builder
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Install Poetry
|
| 7 |
+
RUN pip install --no-cache-dir poetry==1.8.0
|
| 8 |
+
|
| 9 |
+
# Configure Poetry for non-interactive installation
|
| 10 |
+
RUN poetry config virtualenvs.create false
|
| 11 |
+
|
| 12 |
+
# Copy only dependency files first (cache layer)
|
| 13 |
+
COPY pyproject.toml poetry.lock ./
|
| 14 |
+
|
| 15 |
+
# Install only production dependencies
|
| 16 |
+
RUN poetry install --only main --no-interaction --no-ansi --no-root
|
| 17 |
+
|
| 18 |
+
# Stage 2: Runtime - Minimal production image
|
| 19 |
+
FROM python:3.11-slim AS runtime
|
| 20 |
|
|
|
|
| 21 |
WORKDIR /app
|
| 22 |
|
| 23 |
+
# Copy installed packages from builder
|
| 24 |
+
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
| 25 |
+
COPY --from=builder /usr/local/bin /usr/local/bin
|
| 26 |
+
|
| 27 |
+
# Create non-root user for security
|
| 28 |
+
RUN useradd --create-home --shell /bin/bash appuser
|
| 29 |
|
| 30 |
+
# Create directories with proper ownership
|
| 31 |
+
RUN mkdir -p /app/logs /app/models && chown -R appuser:appuser /app
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
# Copy application code (separate layer for faster rebuilds)
|
| 34 |
+
COPY --chown=appuser:appuser app/ ./app/
|
| 35 |
|
| 36 |
+
# Switch to non-root user
|
| 37 |
+
USER appuser
|
| 38 |
|
| 39 |
+
# Environment configuration
|
| 40 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 41 |
+
ENV PYTHONUNBUFFERED=1
|
| 42 |
+
ENV PYTHONPATH=/app
|
| 43 |
|
| 44 |
+
# Hugging Face model cache directory
|
| 45 |
+
ENV HF_HOME=/app/models
|
| 46 |
+
ENV TRANSFORMERS_CACHE=/app/models
|
| 47 |
+
ENV SENTENCE_TRANSFORMERS_HOME=/app/models
|
| 48 |
|
| 49 |
+
# Disable file logging in container
|
| 50 |
ENV DISABLE_FILE_LOGGING=true
|
| 51 |
|
| 52 |
+
EXPOSE 7860
|
|
|
|
| 53 |
|
| 54 |
+
# Health check
|
| 55 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 56 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
|
| 57 |
|
| 58 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
README.md
CHANGED
|
@@ -1,392 +1,147 @@
|
|
| 1 |
# Multi-Utility FastAPI Server
|
| 2 |
|
| 3 |
-
A centralized, extensible FastAPI server
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
- **API Key Authentication**: Secure access with configurable API keys
|
| 9 |
-
- **Comprehensive Logging**: Structured logging with Loguru
|
| 10 |
-
- **Robust Error Handling**: Custom exceptions with proper HTTP status codes
|
| 11 |
-
- **YouTube Subtitle Extractor**: First utility - extract and clean YouTube subtitles
|
| 12 |
-
- **Async Support**: Full async/await support for better performance
|
| 13 |
-
- **Comprehensive Testing**: Unit and integration tests with pytest
|
| 14 |
-
- **Development Tools**: Pre-configured with Black, isort, flake8, and mypy
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
##
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
|
| 24 |
-
|
| 25 |
-
- Supports multiple language codes
|
| 26 |
-
- Automatic fallback to alternative language variants
|
| 27 |
-
- Text cleaning and deduplication
|
| 28 |
-
- Timeout protection
|
| 29 |
-
- Direct JSON response (no file storage)
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
- Python 3.11+
|
| 36 |
-
- Poetry (for dependency management)
|
| 37 |
-
|
| 38 |
-
### Setup
|
| 39 |
-
|
| 40 |
-
1. **Clone the repository**:
|
| 41 |
-
```bash
|
| 42 |
-
git clone <repository-url>
|
| 43 |
-
cd multi-utility-server
|
| 44 |
-
```
|
| 45 |
-
|
| 46 |
-
2. **Install dependencies**:
|
| 47 |
-
```bash
|
| 48 |
-
poetry install
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
-
3. **Configure environment**:
|
| 52 |
-
```bash
|
| 53 |
-
cp .env.example .env
|
| 54 |
-
# Edit .env file with your API keys and settings
|
| 55 |
-
```
|
| 56 |
-
|
| 57 |
-
4. **Run development server**:
|
| 58 |
-
```bash
|
| 59 |
-
# On Linux/macOS
|
| 60 |
-
./scripts/run_dev.sh
|
| 61 |
-
|
| 62 |
-
# On Windows
|
| 63 |
-
scripts\run_dev.bat
|
| 64 |
-
|
| 65 |
-
# Or manually
|
| 66 |
-
poetry run uvicorn app.main:app --reload
|
| 67 |
-
```
|
| 68 |
-
|
| 69 |
-
## 🔧 Configuration
|
| 70 |
-
|
| 71 |
-
### Environment Variables
|
| 72 |
-
|
| 73 |
-
Create a `.env` file (copy from `.env.example`):
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
|
| 79 |
-
#
|
| 80 |
-
|
|
|
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
YT_DLP_BINARY=python -m yt_dlp
|
| 84 |
-
YT_DLP_TIMEOUT_LIST=30
|
| 85 |
-
YT_DLP_TIMEOUT_DOWNLOAD=60
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
RELOAD=true
|
| 91 |
```
|
| 92 |
|
| 93 |
-
##
|
| 94 |
-
|
| 95 |
-
The server uses API key authentication. Configure your keys in the `.env` file:
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
``
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
##
|
| 102 |
|
| 103 |
### Authentication
|
| 104 |
|
| 105 |
-
All
|
| 106 |
|
| 107 |
```bash
|
| 108 |
-
|
| 109 |
-
curl -H "x-api-key: your-api-key" ...
|
| 110 |
```
|
| 111 |
|
| 112 |
-
###
|
| 113 |
-
|
| 114 |
-
#### Extract Subtitles
|
| 115 |
-
|
| 116 |
-
**POST** `/api/v1/subtitles/extract`
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
{
|
| 123 |
-
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
| 124 |
-
"lang": "en"
|
| 125 |
-
}
|
| 126 |
```
|
| 127 |
|
| 128 |
-
**Response**
|
| 129 |
```json
|
| 130 |
{
|
| 131 |
"status": "success",
|
|
|
|
| 132 |
"language": "en",
|
| 133 |
-
"
|
| 134 |
-
"subtitles": [
|
| 135 |
-
"Never gonna give you up",
|
| 136 |
-
"Never gonna let you down",
|
| 137 |
-
"Never gonna run around and desert you"
|
| 138 |
-
]
|
| 139 |
}
|
| 140 |
```
|
| 141 |
|
| 142 |
-
|
| 143 |
-
- `400 Bad Request`: Invalid YouTube URL or parameters
|
| 144 |
-
- `401 Unauthorized`: Missing or invalid API key
|
| 145 |
-
- `404 Not Found`: No subtitles available in requested language
|
| 146 |
-
- `408 Request Timeout`: Subtitle extraction timed out
|
| 147 |
-
- `500 Internal Server Error`: yt-dlp error or unexpected failure
|
| 148 |
-
|
| 149 |
-
**Example Usage**:
|
| 150 |
|
| 151 |
```bash
|
| 152 |
-
|
| 153 |
-
curl -X POST "http://localhost:8000/api/v1/subtitles/extract" \
|
| 154 |
-H "Content-Type: application/json" \
|
| 155 |
-
-H "x-api-key: your-
|
| 156 |
-
-d '{
|
| 157 |
-
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
| 158 |
-
"lang": "en"
|
| 159 |
-
}'
|
| 160 |
-
|
| 161 |
-
# Extract Spanish subtitles
|
| 162 |
-
curl -X POST "http://localhost:8000/api/v1/subtitles/extract" \
|
| 163 |
-
-H "Content-Type: application/json" \
|
| 164 |
-
-H "x-api-key: your-api-key" \
|
| 165 |
-
-d '{
|
| 166 |
-
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
| 167 |
-
"lang": "es"
|
| 168 |
-
}'
|
| 169 |
```
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
**GET** `/api/v1/subtitles/health`
|
| 174 |
-
|
| 175 |
-
Check subtitles service health (no authentication required).
|
| 176 |
-
|
| 177 |
-
**Response**:
|
| 178 |
```json
|
| 179 |
{
|
| 180 |
-
"status": "
|
| 181 |
-
"
|
| 182 |
-
"
|
|
|
|
| 183 |
}
|
| 184 |
```
|
| 185 |
|
| 186 |
-
##
|
| 187 |
-
|
| 188 |
-
Run the test suite:
|
| 189 |
-
|
| 190 |
-
```bash
|
| 191 |
-
# Run all tests
|
| 192 |
-
poetry run pytest
|
| 193 |
-
|
| 194 |
-
# Run with coverage
|
| 195 |
-
poetry run pytest --cov=app
|
| 196 |
-
|
| 197 |
-
# Run specific test file
|
| 198 |
-
poetry run pytest tests/test_subtitles.py
|
| 199 |
-
|
| 200 |
-
# Run with verbose output
|
| 201 |
-
poetry run pytest -v
|
| 202 |
-
```
|
| 203 |
-
|
| 204 |
-
## 🏗️ Development
|
| 205 |
-
|
| 206 |
-
### Project Structure
|
| 207 |
|
| 208 |
```
|
| 209 |
-
|
| 210 |
-
├──
|
| 211 |
-
├──
|
| 212 |
-
├──
|
| 213 |
-
|
| 214 |
-
├──
|
| 215 |
-
|
| 216 |
-
│ ├── core/
|
| 217 |
-
│ │ ├── config.py # Environment-based config loader
|
| 218 |
-
│ │ ├── security.py # API key verification
|
| 219 |
-
│ │ ├── logging.py # Structured logging setup
|
| 220 |
-
│ │ └── exceptions.py # Custom exception definitions
|
| 221 |
-
│ ├── apis/
|
| 222 |
-
│ │ ├── __init__.py
|
| 223 |
-
│ │ ├── subtitles/
|
| 224 |
-
│ │ │ ├── router.py # FastAPI routes
|
| 225 |
-
│ │ │ ├── service.py # Business logic (yt-dlp, cleaning)
|
| 226 |
-
│ │ │ ├── schemas.py # Request/response models
|
| 227 |
-
│ │ │ └── utils.py # Helpers for text cleaning
|
| 228 |
-
│ │ └── <future_api>/
|
| 229 |
-
│ └── middleware/
|
| 230 |
-
│ └── api_key_auth.py # API key auth middleware
|
| 231 |
-
├── tests/
|
| 232 |
-
│ ├── test_subtitles.py
|
| 233 |
-
│ ├── test_security.py
|
| 234 |
-
│ └── conftest.py
|
| 235 |
-
└── scripts/
|
| 236 |
-
├── run_dev.sh # Linux/macOS dev script
|
| 237 |
-
└── run_dev.bat # Windows dev script
|
| 238 |
```
|
| 239 |
|
| 240 |
-
##
|
| 241 |
|
| 242 |
-
|
| 243 |
-
```bash
|
| 244 |
-
mkdir app/apis/your_new_api
|
| 245 |
-
touch app/apis/your_new_api/__init__.py
|
| 246 |
-
touch app/apis/your_new_api/router.py
|
| 247 |
-
touch app/apis/your_new_api/service.py
|
| 248 |
-
touch app/apis/your_new_api/schemas.py
|
| 249 |
-
```
|
| 250 |
|
| 251 |
-
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
|
| 254 |
-
```python
|
| 255 |
-
from app.apis.your_new_api.router import router as your_new_api_router
|
| 256 |
-
app.include_router(your_new_api_router)
|
| 257 |
-
```
|
| 258 |
-
|
| 259 |
-
4. **Add tests** in `tests/test_your_new_api.py`
|
| 260 |
-
|
| 261 |
-
### Code Quality
|
| 262 |
-
|
| 263 |
-
The project includes several code quality tools:
|
| 264 |
|
| 265 |
```bash
|
| 266 |
-
|
| 267 |
-
poetry run black .
|
| 268 |
-
poetry run isort .
|
| 269 |
-
|
| 270 |
-
# Lint code
|
| 271 |
-
poetry run flake8 .
|
| 272 |
-
|
| 273 |
-
# Type checking
|
| 274 |
-
poetry run mypy app/
|
| 275 |
-
|
| 276 |
-
# Run all quality checks
|
| 277 |
-
poetry run black . && poetry run isort . && poetry run flake8 . && poetry run mypy app/
|
| 278 |
```
|
| 279 |
|
| 280 |
-
##
|
| 281 |
-
|
| 282 |
-
### Using Docker
|
| 283 |
-
|
| 284 |
-
The project includes Docker support for easy deployment:
|
| 285 |
-
|
| 286 |
-
#### Quick Start with Docker Compose
|
| 287 |
-
|
| 288 |
-
1. **Build and run**:
|
| 289 |
-
```bash
|
| 290 |
-
# Production deployment
|
| 291 |
-
docker-compose up --build
|
| 292 |
-
|
| 293 |
-
# Development with hot reload
|
| 294 |
-
docker-compose -f docker-compose.dev.yml up --build
|
| 295 |
-
```
|
| 296 |
-
|
| 297 |
-
2. **Access the application**:
|
| 298 |
-
- API: http://localhost:8000
|
| 299 |
-
- Documentation: http://localhost:8000/docs
|
| 300 |
-
- Health check: http://localhost:8000/health
|
| 301 |
-
|
| 302 |
-
#### Docker Files Included
|
| 303 |
-
|
| 304 |
-
- `Dockerfile`: Production-ready container
|
| 305 |
-
- `Dockerfile.dev`: Development container with hot reload
|
| 306 |
-
- `docker-compose.yml`: Basic production setup
|
| 307 |
-
- `docker-compose.dev.yml`: Development setup
|
| 308 |
-
- `docker-compose.prod.yml`: Production setup with resource limits
|
| 309 |
-
- `.dockerignore`: Optimized build context
|
| 310 |
-
|
| 311 |
-
#### Manual Docker Build
|
| 312 |
|
| 313 |
```bash
|
| 314 |
-
#
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
# Run container
|
| 318 |
-
docker run -p 8000:8000 --env-file .env multiutility-server
|
| 319 |
-
```
|
| 320 |
|
| 321 |
-
#
|
|
|
|
| 322 |
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
poetry run gunicorn -k uvicorn.workers.UvicornWorker app.main:app \
|
| 326 |
-
--bind 0.0.0.0:8000 \
|
| 327 |
-
--workers 4 \
|
| 328 |
-
--worker-class uvicorn.workers.UvicornWorker \
|
| 329 |
-
--access-logfile - \
|
| 330 |
-
--error-logfile -
|
| 331 |
```
|
| 332 |
|
| 333 |
-
##
|
| 334 |
-
|
| 335 |
-
- **API Keys**: Store API keys securely and rotate them regularly
|
| 336 |
-
- **CORS**: Configure CORS appropriately for your use case
|
| 337 |
-
- **Rate Limiting**: Consider adding rate limiting for production use
|
| 338 |
-
- **HTTPS**: Always use HTTPS in production
|
| 339 |
-
- **Input Validation**: All inputs are validated using Pydantic schemas
|
| 340 |
-
|
| 341 |
-
## 📊 Monitoring and Logging
|
| 342 |
-
|
| 343 |
-
The server includes comprehensive logging:
|
| 344 |
-
|
| 345 |
-
- **Console Logs**: Colored output for development
|
| 346 |
-
- **File Logs**: Rotating log files in `logs/` directory
|
| 347 |
-
- **Error Logs**: Separate error log file
|
| 348 |
-
- **Request Logging**: All HTTP requests are logged
|
| 349 |
-
- **Structured Format**: JSON-like format for easy parsing
|
| 350 |
-
|
| 351 |
-
Log files:
|
| 352 |
-
- `logs/app.log`: General application logs
|
| 353 |
-
- `logs/error.log`: Error-only logs
|
| 354 |
-
|
| 355 |
-
## 🤝 Contributing
|
| 356 |
-
|
| 357 |
-
1. Fork the repository
|
| 358 |
-
2. Create a feature branch
|
| 359 |
-
3. Make your changes
|
| 360 |
-
4. Add tests for new functionality
|
| 361 |
-
5. Run quality checks and tests
|
| 362 |
-
6. Submit a pull request
|
| 363 |
-
|
| 364 |
-
## 📝 License
|
| 365 |
-
|
| 366 |
-
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 367 |
-
|
| 368 |
-
## 🆘 Troubleshooting
|
| 369 |
-
|
| 370 |
-
### Common Issues
|
| 371 |
-
|
| 372 |
-
1. **yt-dlp not found**: Ensure yt-dlp is installed: `pip install yt-dlp`
|
| 373 |
-
2. **Permission denied on scripts**: Make scripts executable: `chmod +x scripts/run_dev.sh`
|
| 374 |
-
3. **Port already in use**: Change the port in `.env` file or stop the conflicting service
|
| 375 |
-
4. **API key errors**: Verify your API key is correctly set in the `.env` file
|
| 376 |
-
|
| 377 |
-
### Getting Help
|
| 378 |
-
|
| 379 |
-
- Check the logs in `logs/` directory
|
| 380 |
-
- Use the health check endpoints to verify service status
|
| 381 |
-
- Run tests to ensure everything is working: `poetry run pytest`
|
| 382 |
-
- Check the interactive API documentation at `/docs`
|
| 383 |
-
|
| 384 |
-
## 🔮 Future Enhancements
|
| 385 |
|
| 386 |
-
|
| 387 |
-
- **Metrics**: Prometheus metrics integration
|
| 388 |
-
- **Caching**: Redis caching for frequently requested data
|
| 389 |
-
- **File Upload APIs**: Handle file processing utilities
|
| 390 |
-
- **Webhook Support**: Async webhook notifications
|
| 391 |
-
- **Admin Dashboard**: Web-based administration interface
|
| 392 |
-
- **OAuth2/JWT**: Advanced authentication options
|
|
|
|
| 1 |
# Multi-Utility FastAPI Server
|
| 2 |
|
| 3 |
+
A centralized, extensible FastAPI server providing reusable APIs with robust authentication, rate limiting, and logging.
|
| 4 |
|
| 5 |
+
[](https://www.python.org/downloads/)
|
| 6 |
+
[](https://fastapi.tiangolo.com/)
|
| 7 |
+
[](LICENSE)
|
| 8 |
|
| 9 |
+
## Features
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
- **Modular Architecture** - Easy to add new APIs
|
| 12 |
+
- **API Key Authentication** - Secure, timing-safe key validation
|
| 13 |
+
- **Rate Limiting** - Configurable per-endpoint limits with `slowapi`
|
| 14 |
+
- **Result Caching** - TTL-based caching with `cachetools`
|
| 15 |
+
- **Structured Logging** - Loguru with console/file output
|
| 16 |
+
- **Docker Ready** - Multi-stage, cache-optimized Dockerfile
|
| 17 |
|
| 18 |
+
## APIs
|
| 19 |
|
| 20 |
+
| API | Endpoint | Description |
|
| 21 |
+
|-----|----------|-------------|
|
| 22 |
+
| **Subtitles** | `POST /api/v1/subtitles/extract` | Extract YouTube subtitles |
|
| 23 |
+
| **Embeddings** | `POST /api/v1/embeddings/generate` | Generate text embeddings (1024-dim) |
|
| 24 |
|
| 25 |
+
## Quick Start
|
| 26 |
|
| 27 |
+
### Local Development
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
```bash
|
| 30 |
+
# Install dependencies
|
| 31 |
+
poetry install
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
# Configure environment
|
| 34 |
+
cp .env.example .env
|
| 35 |
+
# Edit .env with your API keys
|
| 36 |
|
| 37 |
+
# Run server
|
| 38 |
+
poetry run uvicorn app.main:app --reload
|
| 39 |
+
```
|
| 40 |
|
| 41 |
+
### Docker
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
```bash
|
| 44 |
+
docker build -t multiutility-server .
|
| 45 |
+
docker run -p 7860:7860 -e API_KEYS=your-key multiutility-server
|
|
|
|
| 46 |
```
|
| 47 |
|
| 48 |
+
## Configuration
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
| Variable | Description | Default |
|
| 51 |
+
|----------|-------------|---------|
|
| 52 |
+
| `API_KEYS` | Comma-separated API keys (required) | - |
|
| 53 |
+
| `CORS_ORIGINS` | Allowed origins | `*` |
|
| 54 |
+
| `RATE_LIMIT_REQUESTS` | Requests per minute | `100` |
|
| 55 |
+
| `LOG_LEVEL` | Logging level | `INFO` |
|
| 56 |
+
| `EMBEDDING_MODEL` | HuggingFace model | `mixedbread-ai/mxbai-embed-large-v1` |
|
| 57 |
|
| 58 |
+
## API Usage
|
| 59 |
|
| 60 |
### Authentication
|
| 61 |
|
| 62 |
+
All endpoints (except health checks) require the `x-api-key` header:
|
| 63 |
|
| 64 |
```bash
|
| 65 |
+
curl -H "x-api-key: your-api-key" http://localhost:8000/api/v1/...
|
|
|
|
| 66 |
```
|
| 67 |
|
| 68 |
+
### Subtitles API
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
```bash
|
| 71 |
+
curl -X POST http://localhost:8000/api/v1/subtitles/extract \
|
| 72 |
+
-H "Content-Type: application/json" \
|
| 73 |
+
-H "x-api-key: your-key" \
|
| 74 |
+
-d '{"url": "https://youtube.com/watch?v=VIDEO_ID", "lang": "en"}'
|
|
|
|
|
|
|
|
|
|
| 75 |
```
|
| 76 |
|
| 77 |
+
**Response:**
|
| 78 |
```json
|
| 79 |
{
|
| 80 |
"status": "success",
|
| 81 |
+
"video_id": "VIDEO_ID",
|
| 82 |
"language": "en",
|
| 83 |
+
"subtitles": ["Line 1", "Line 2", "..."]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
}
|
| 85 |
```
|
| 86 |
|
| 87 |
+
### Embeddings API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
```bash
|
| 90 |
+
curl -X POST http://localhost:8000/api/v1/embeddings/generate \
|
|
|
|
| 91 |
-H "Content-Type: application/json" \
|
| 92 |
+
-H "x-api-key: your-key" \
|
| 93 |
+
-d '{"texts": ["Hello world", "Another text"], "normalize": true}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
```
|
| 95 |
|
| 96 |
+
**Response:**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
```json
|
| 98 |
{
|
| 99 |
+
"status": "success",
|
| 100 |
+
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
|
| 101 |
+
"model": "mixedbread-ai/mxbai-embed-large-v1",
|
| 102 |
+
"dimensions": 1024
|
| 103 |
}
|
| 104 |
```
|
| 105 |
|
| 106 |
+
## Project Structure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
```
|
| 109 |
+
app/
|
| 110 |
+
├── main.py # FastAPI application
|
| 111 |
+
├── core/ # Config, logging, exceptions
|
| 112 |
+
├── middleware/ # Auth, rate limiting
|
| 113 |
+
└── apis/
|
| 114 |
+
├── subtitles/ # YouTube subtitle extraction
|
| 115 |
+
└── embeddings/ # Text embedding generation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
```
|
| 117 |
|
| 118 |
+
## Deployment
|
| 119 |
|
| 120 |
+
### Hugging Face Spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
1. Create a Docker Space
|
| 123 |
+
2. Set `API_KEYS` secret in Space settings
|
| 124 |
+
3. Push repository
|
| 125 |
|
| 126 |
+
### Docker Compose
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
```bash
|
| 129 |
+
docker-compose up --build
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
```
|
| 131 |
|
| 132 |
+
## Development
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
```bash
|
| 135 |
+
# Run tests
|
| 136 |
+
poetry run pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
# Type checking
|
| 139 |
+
poetry run mypy app/
|
| 140 |
|
| 141 |
+
# Format code
|
| 142 |
+
poetry run black . && poetry run isort .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
```
|
| 144 |
|
| 145 |
+
## License
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/__init__.py
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-utility FastAPI Server."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
app/apis/__init__.py
CHANGED
|
@@ -1 +1,6 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""API modules."""
|
| 2 |
+
|
| 3 |
+
from app.apis.subtitles.router import router as subtitles_router
|
| 4 |
+
from app.apis.embeddings.router import router as embeddings_router
|
| 5 |
+
|
| 6 |
+
__all__ = ["subtitles_router", "embeddings_router"]
|
app/apis/embeddings/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Embeddings API module."""
|
| 2 |
+
|
| 3 |
+
from app.apis.embeddings.router import router
|
| 4 |
+
from app.apis.embeddings.service import embedding_service
|
| 5 |
+
|
| 6 |
+
__all__ = ["router", "embedding_service"]
|
app/apis/embeddings/router.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI router for embeddings API."""
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter
|
| 4 |
+
|
| 5 |
+
from app.apis.embeddings.schemas import EmbeddingRequest, EmbeddingResponse
|
| 6 |
+
from app.apis.embeddings.service import embedding_service
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
router = APIRouter(prefix="/api/v1/embeddings", tags=["embeddings"])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.post(
|
| 13 |
+
"/generate",
|
| 14 |
+
response_model=EmbeddingResponse,
|
| 15 |
+
summary="Generate text embeddings",
|
| 16 |
+
description="Generate 1024-dimensional embeddings for a list of texts using a local sentence-transformers model."
|
| 17 |
+
)
|
| 18 |
+
async def generate_embeddings(request: EmbeddingRequest) -> EmbeddingResponse:
|
| 19 |
+
"""
|
| 20 |
+
Generate embeddings for a list of texts.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
request: Contains texts list and optional normalize flag
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
Embedding vectors for each input text
|
| 27 |
+
"""
|
| 28 |
+
embeddings = embedding_service.generate_embeddings(
|
| 29 |
+
request.texts,
|
| 30 |
+
normalize=request.normalize
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
return EmbeddingResponse(
|
| 34 |
+
embeddings=embeddings,
|
| 35 |
+
model=embedding_service.model_name,
|
| 36 |
+
dimensions=embedding_service.dimensions
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@router.get(
|
| 41 |
+
"/health",
|
| 42 |
+
summary="Health check for embeddings service",
|
| 43 |
+
description="Check if the embeddings service is operational"
|
| 44 |
+
)
|
| 45 |
+
async def health_check():
|
| 46 |
+
"""Health check endpoint for the embeddings service."""
|
| 47 |
+
return {
|
| 48 |
+
"status": "healthy",
|
| 49 |
+
"service": "embeddings",
|
| 50 |
+
"model": embedding_service.model_name
|
| 51 |
+
}
|
app/apis/embeddings/schemas.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for embeddings API."""
|
| 2 |
+
|
| 3 |
+
from typing import List
|
| 4 |
+
from pydantic import BaseModel, field_validator
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class EmbeddingRequest(BaseModel):
|
| 8 |
+
"""Request model for embedding generation."""
|
| 9 |
+
|
| 10 |
+
texts: List[str]
|
| 11 |
+
normalize: bool = True
|
| 12 |
+
|
| 13 |
+
@field_validator("texts")
|
| 14 |
+
@classmethod
|
| 15 |
+
def validate_texts(cls, v: List[str]) -> List[str]:
|
| 16 |
+
"""Validate that texts list is not empty and has valid content."""
|
| 17 |
+
if not v:
|
| 18 |
+
raise ValueError("texts list cannot be empty")
|
| 19 |
+
if len(v) > 100:
|
| 20 |
+
raise ValueError("Maximum 100 texts per request")
|
| 21 |
+
for i, text in enumerate(v):
|
| 22 |
+
if not text or not text.strip():
|
| 23 |
+
raise ValueError(f"Text at index {i} is empty")
|
| 24 |
+
return v
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class EmbeddingResponse(BaseModel):
|
| 28 |
+
"""Response model for embedding generation."""
|
| 29 |
+
|
| 30 |
+
status: str = "success"
|
| 31 |
+
embeddings: List[List[float]]
|
| 32 |
+
model: str
|
| 33 |
+
dimensions: int
|
| 34 |
+
|
| 35 |
+
model_config = {
|
| 36 |
+
"json_schema_extra": {
|
| 37 |
+
"example": {
|
| 38 |
+
"status": "success",
|
| 39 |
+
"embeddings": [[0.1, 0.2, 0.3]],
|
| 40 |
+
"model": "mixedbread-ai/mxbai-embed-large-v1",
|
| 41 |
+
"dimensions": 1024
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
}
|
app/apis/embeddings/service.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Embedding generation service using sentence-transformers."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from typing import List
|
| 5 |
+
import threading
|
| 6 |
+
|
| 7 |
+
from app.core.config import settings
|
| 8 |
+
from app.core.logging import get_logger
|
| 9 |
+
|
| 10 |
+
logger = get_logger(__name__)
|
| 11 |
+
|
| 12 |
+
MODELS_DIR = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "models")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class EmbeddingService:
|
| 16 |
+
"""Service for generating text embeddings using local models."""
|
| 17 |
+
|
| 18 |
+
def __init__(self) -> None:
|
| 19 |
+
self._model = None
|
| 20 |
+
self._model_name = settings.embedding_model
|
| 21 |
+
self._lock = threading.Lock()
|
| 22 |
+
|
| 23 |
+
def _load_model(self):
|
| 24 |
+
"""Lazy load the embedding model on first use."""
|
| 25 |
+
if self._model is None:
|
| 26 |
+
with self._lock:
|
| 27 |
+
if self._model is None:
|
| 28 |
+
logger.info(f"Loading embedding model: {self._model_name}")
|
| 29 |
+
logger.info(f"Models directory: {MODELS_DIR}")
|
| 30 |
+
from sentence_transformers import SentenceTransformer
|
| 31 |
+
self._model = SentenceTransformer(
|
| 32 |
+
self._model_name,
|
| 33 |
+
cache_folder=MODELS_DIR
|
| 34 |
+
)
|
| 35 |
+
logger.info(f"Model loaded. Dimensions: {self._model.get_sentence_embedding_dimension()}")
|
| 36 |
+
|
| 37 |
+
def generate_embeddings(self, texts: List[str], normalize: bool = True) -> List[List[float]]:
|
| 38 |
+
"""
|
| 39 |
+
Generate embeddings for a list of texts.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
texts: List of text strings to embed
|
| 43 |
+
normalize: Whether to normalize embeddings to unit length
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
List of embedding vectors (each 1024-dimensional)
|
| 47 |
+
"""
|
| 48 |
+
self._load_model()
|
| 49 |
+
|
| 50 |
+
embeddings = self._model.encode(
|
| 51 |
+
texts,
|
| 52 |
+
normalize_embeddings=normalize,
|
| 53 |
+
convert_to_numpy=True
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
return embeddings.tolist()
|
| 57 |
+
|
| 58 |
+
@property
|
| 59 |
+
def model_name(self) -> str:
|
| 60 |
+
"""Get the model name."""
|
| 61 |
+
return self._model_name
|
| 62 |
+
|
| 63 |
+
@property
|
| 64 |
+
def dimensions(self) -> int:
|
| 65 |
+
"""Get the embedding dimensions."""
|
| 66 |
+
self._load_model()
|
| 67 |
+
return self._model.get_sentence_embedding_dimension()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
embedding_service = EmbeddingService()
|
app/apis/subtitles/__init__.py
CHANGED
|
@@ -1 +1,7 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Subtitles API module."""
|
| 2 |
+
|
| 3 |
+
from app.apis.subtitles.router import router
|
| 4 |
+
from app.apis.subtitles.service import subtitle_service
|
| 5 |
+
from app.apis.subtitles.schemas import SubtitleExtractRequest, SubtitleExtractResponse
|
| 6 |
+
|
| 7 |
+
__all__ = ["router", "subtitle_service", "SubtitleExtractRequest", "SubtitleExtractResponse"]
|
app/apis/subtitles/router.py
CHANGED
|
@@ -1,18 +1,9 @@
|
|
| 1 |
"""FastAPI router for subtitles API."""
|
| 2 |
|
| 3 |
-
from fastapi import APIRouter
|
| 4 |
-
from typing import Optional
|
| 5 |
|
| 6 |
-
from app.apis.subtitles.schemas import SubtitleExtractRequest, SubtitleExtractResponse
|
| 7 |
from app.apis.subtitles.service import subtitle_service
|
| 8 |
-
from app.core.exceptions import (
|
| 9 |
-
InvalidVideoURLError,
|
| 10 |
-
SubtitlesNotFoundError,
|
| 11 |
-
DownloadTimeoutError,
|
| 12 |
-
SubtitleExtractionError,
|
| 13 |
-
AuthenticationError
|
| 14 |
-
)
|
| 15 |
-
from app.core.security import get_api_key_from_header
|
| 16 |
|
| 17 |
|
| 18 |
router = APIRouter(prefix="/api/v1/subtitles", tags=["subtitles"])
|
|
@@ -21,59 +12,29 @@ router = APIRouter(prefix="/api/v1/subtitles", tags=["subtitles"])
|
|
| 21 |
@router.post(
|
| 22 |
"/extract",
|
| 23 |
response_model=SubtitleExtractResponse,
|
| 24 |
-
responses={
|
| 25 |
-
400: {"model": SubtitleErrorResponse, "description": "Bad Request"},
|
| 26 |
-
401: {"model": SubtitleErrorResponse, "description": "Unauthorized"},
|
| 27 |
-
404: {"model": SubtitleErrorResponse, "description": "Not Found"},
|
| 28 |
-
408: {"model": SubtitleErrorResponse, "description": "Request Timeout"},
|
| 29 |
-
500: {"model": SubtitleErrorResponse, "description": "Internal Server Error"},
|
| 30 |
-
},
|
| 31 |
summary="Extract subtitles from YouTube video",
|
| 32 |
description="Extract and clean subtitles from a YouTube video URL. Returns subtitles as a list of text lines."
|
| 33 |
)
|
| 34 |
-
async def extract_subtitles(
|
| 35 |
-
request: SubtitleExtractRequest,
|
| 36 |
-
x_api_key: Optional[str] = Header(None, description="API key for authentication")
|
| 37 |
-
) -> SubtitleExtractResponse:
|
| 38 |
"""
|
| 39 |
Extract subtitles from a YouTube video.
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
Returns
|
|
|
|
| 45 |
"""
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
return SubtitleExtractResponse(
|
| 57 |
-
language=request.lang,
|
| 58 |
-
video_id=video_id,
|
| 59 |
-
subtitles=subtitle_lines
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
except AuthenticationError as e:
|
| 63 |
-
raise HTTPException(status_code=401, detail={"status": "error", "message": e.message})
|
| 64 |
-
except InvalidVideoURLError as e:
|
| 65 |
-
raise HTTPException(status_code=400, detail={"status": "error", "message": e.message})
|
| 66 |
-
except SubtitlesNotFoundError as e:
|
| 67 |
-
raise HTTPException(status_code=404, detail={"status": "error", "message": e.message})
|
| 68 |
-
except DownloadTimeoutError as e:
|
| 69 |
-
raise HTTPException(status_code=408, detail={"status": "error", "message": e.message})
|
| 70 |
-
except SubtitleExtractionError as e:
|
| 71 |
-
raise HTTPException(status_code=500, detail={"status": "error", "message": e.message})
|
| 72 |
-
except Exception as e:
|
| 73 |
-
raise HTTPException(
|
| 74 |
-
status_code=500,
|
| 75 |
-
detail={"status": "error", "message": "An unexpected error occurred"}
|
| 76 |
-
)
|
| 77 |
|
| 78 |
|
| 79 |
@router.get(
|
|
@@ -85,6 +46,5 @@ async def health_check():
|
|
| 85 |
"""Health check endpoint for the subtitles service."""
|
| 86 |
return {
|
| 87 |
"status": "healthy",
|
| 88 |
-
"service": "subtitles"
|
| 89 |
-
"yt_dlp_binary": subtitle_service.yt_dlp_binary
|
| 90 |
}
|
|
|
|
| 1 |
"""FastAPI router for subtitles API."""
|
| 2 |
|
| 3 |
+
from fastapi import APIRouter
|
|
|
|
| 4 |
|
| 5 |
+
from app.apis.subtitles.schemas import SubtitleExtractRequest, SubtitleExtractResponse
|
| 6 |
from app.apis.subtitles.service import subtitle_service
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
router = APIRouter(prefix="/api/v1/subtitles", tags=["subtitles"])
|
|
|
|
| 12 |
@router.post(
|
| 13 |
"/extract",
|
| 14 |
response_model=SubtitleExtractResponse,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
summary="Extract subtitles from YouTube video",
|
| 16 |
description="Extract and clean subtitles from a YouTube video URL. Returns subtitles as a list of text lines."
|
| 17 |
)
|
| 18 |
+
async def extract_subtitles(request: SubtitleExtractRequest) -> SubtitleExtractResponse:
|
|
|
|
|
|
|
|
|
|
| 19 |
"""
|
| 20 |
Extract subtitles from a YouTube video.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
request: Contains url and optional lang parameter
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
Cleaned subtitle text as a list of strings
|
| 27 |
"""
|
| 28 |
+
video_id, subtitle_lines = await subtitle_service.extract_subtitles(
|
| 29 |
+
str(request.url),
|
| 30 |
+
request.lang
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
return SubtitleExtractResponse(
|
| 34 |
+
language=request.lang,
|
| 35 |
+
video_id=video_id,
|
| 36 |
+
subtitles=subtitle_lines
|
| 37 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
@router.get(
|
|
|
|
| 46 |
"""Health check endpoint for the subtitles service."""
|
| 47 |
return {
|
| 48 |
"status": "healthy",
|
| 49 |
+
"service": "subtitles"
|
|
|
|
| 50 |
}
|
app/apis/subtitles/schemas.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
"""Pydantic schemas for subtitles API."""
|
| 2 |
|
| 3 |
-
from typing import List
|
| 4 |
from pydantic import BaseModel, HttpUrl, field_validator
|
| 5 |
|
| 6 |
|
| 7 |
class SubtitleExtractRequest(BaseModel):
|
| 8 |
"""Request model for subtitle extraction."""
|
| 9 |
-
|
| 10 |
url: HttpUrl
|
| 11 |
lang: str = "en"
|
| 12 |
-
|
| 13 |
@field_validator("url")
|
| 14 |
@classmethod
|
| 15 |
def validate_youtube_url(cls, v: HttpUrl) -> HttpUrl:
|
|
@@ -18,7 +18,7 @@ class SubtitleExtractRequest(BaseModel):
|
|
| 18 |
if not any(domain in url_str for domain in ["youtube.com", "youtu.be"]):
|
| 19 |
raise ValueError("URL must be a valid YouTube URL")
|
| 20 |
return v
|
| 21 |
-
|
| 22 |
@field_validator("lang")
|
| 23 |
@classmethod
|
| 24 |
def validate_language(cls, v: str) -> str:
|
|
@@ -30,14 +30,14 @@ class SubtitleExtractRequest(BaseModel):
|
|
| 30 |
|
| 31 |
class SubtitleExtractResponse(BaseModel):
|
| 32 |
"""Response model for successful subtitle extraction."""
|
| 33 |
-
|
| 34 |
status: str = "success"
|
| 35 |
language: str
|
| 36 |
video_id: str
|
| 37 |
subtitles: List[str]
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
json_schema_extra
|
| 41 |
"example": {
|
| 42 |
"status": "success",
|
| 43 |
"language": "en",
|
|
@@ -49,18 +49,4 @@ class SubtitleExtractResponse(BaseModel):
|
|
| 49 |
]
|
| 50 |
}
|
| 51 |
}
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
class SubtitleErrorResponse(BaseModel):
|
| 55 |
-
"""Response model for subtitle extraction errors."""
|
| 56 |
-
|
| 57 |
-
status: str = "error"
|
| 58 |
-
message: str
|
| 59 |
-
|
| 60 |
-
class Config:
|
| 61 |
-
json_schema_extra = {
|
| 62 |
-
"example": {
|
| 63 |
-
"status": "error",
|
| 64 |
-
"message": "No subtitles available in the requested language"
|
| 65 |
-
}
|
| 66 |
-
}
|
|
|
|
| 1 |
"""Pydantic schemas for subtitles API."""
|
| 2 |
|
| 3 |
+
from typing import List
|
| 4 |
from pydantic import BaseModel, HttpUrl, field_validator
|
| 5 |
|
| 6 |
|
| 7 |
class SubtitleExtractRequest(BaseModel):
|
| 8 |
"""Request model for subtitle extraction."""
|
| 9 |
+
|
| 10 |
url: HttpUrl
|
| 11 |
lang: str = "en"
|
| 12 |
+
|
| 13 |
@field_validator("url")
|
| 14 |
@classmethod
|
| 15 |
def validate_youtube_url(cls, v: HttpUrl) -> HttpUrl:
|
|
|
|
| 18 |
if not any(domain in url_str for domain in ["youtube.com", "youtu.be"]):
|
| 19 |
raise ValueError("URL must be a valid YouTube URL")
|
| 20 |
return v
|
| 21 |
+
|
| 22 |
@field_validator("lang")
|
| 23 |
@classmethod
|
| 24 |
def validate_language(cls, v: str) -> str:
|
|
|
|
| 30 |
|
| 31 |
class SubtitleExtractResponse(BaseModel):
|
| 32 |
"""Response model for successful subtitle extraction."""
|
| 33 |
+
|
| 34 |
status: str = "success"
|
| 35 |
language: str
|
| 36 |
video_id: str
|
| 37 |
subtitles: List[str]
|
| 38 |
+
|
| 39 |
+
model_config = {
|
| 40 |
+
"json_schema_extra": {
|
| 41 |
"example": {
|
| 42 |
"status": "success",
|
| 43 |
"language": "en",
|
|
|
|
| 49 |
]
|
| 50 |
}
|
| 51 |
}
|
| 52 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/apis/subtitles/service.py
CHANGED
|
@@ -1,183 +1,154 @@
|
|
| 1 |
-
"""Subtitle extraction service using yt-dlp."""
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
-
import subprocess
|
| 5 |
import sys
|
| 6 |
import tempfile
|
| 7 |
from pathlib import Path
|
| 8 |
-
from typing import List, Optional
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
from app.core.config import settings
|
| 12 |
from app.core.exceptions import (
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
)
|
| 18 |
from app.apis.subtitles.utils import extract_video_id, convert_vtt_to_text
|
| 19 |
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
class SubtitleService:
|
| 22 |
"""Service for extracting subtitles from YouTube videos."""
|
| 23 |
-
|
| 24 |
-
def __init__(self):
|
| 25 |
-
self.yt_dlp_binary = settings.yt_dlp_binary
|
| 26 |
self.timeout_list = settings.yt_dlp_timeout_list
|
| 27 |
self.timeout_download = settings.yt_dlp_timeout_download
|
| 28 |
-
|
| 29 |
-
async def extract_subtitles(self, url: str, lang: str = "en") ->
|
| 30 |
"""
|
| 31 |
Extract subtitles from a YouTube video.
|
| 32 |
-
|
| 33 |
Args:
|
| 34 |
url: YouTube video URL
|
| 35 |
lang: Language code for subtitles
|
| 36 |
-
|
| 37 |
Returns:
|
| 38 |
Tuple of (video_id, subtitle_lines)
|
| 39 |
-
|
| 40 |
Raises:
|
| 41 |
-
InvalidVideoURLError: If the URL is invalid
|
| 42 |
SubtitlesNotFoundError: If no subtitles are found
|
| 43 |
DownloadTimeoutError: If the operation times out
|
| 44 |
SubtitleExtractionError: If extraction fails
|
| 45 |
"""
|
| 46 |
video_id = extract_video_id(url)
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
| 49 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
url
|
| 90 |
-
]
|
| 91 |
-
|
| 92 |
-
process = await asyncio.create_subprocess_exec(
|
| 93 |
-
*cmd,
|
| 94 |
-
stdout=asyncio.subprocess.PIPE,
|
| 95 |
-
stderr=asyncio.subprocess.PIPE
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
stdout, stderr = await asyncio.wait_for(
|
| 99 |
-
process.communicate(),
|
| 100 |
-
timeout=self.timeout_list
|
| 101 |
-
)
|
| 102 |
-
|
| 103 |
-
if process.returncode != 0:
|
| 104 |
-
error_msg = stderr.decode('utf-8', errors='ignore')
|
| 105 |
-
if "Video unavailable" in error_msg or "Private video" in error_msg:
|
| 106 |
-
raise InvalidVideoURLError("Video is unavailable, private, or does not exist")
|
| 107 |
-
raise SubtitleExtractionError(f"Failed to check subtitles: {error_msg}")
|
| 108 |
-
|
| 109 |
-
except asyncio.TimeoutError:
|
| 110 |
-
raise DownloadTimeoutError("Timeout while checking available subtitles")
|
| 111 |
-
except (InvalidVideoURLError, SubtitleExtractionError):
|
| 112 |
-
raise
|
| 113 |
-
except Exception as e:
|
| 114 |
-
raise SubtitleExtractionError(f"Error checking subtitles: {str(e)}")
|
| 115 |
-
|
| 116 |
-
async def _download_subtitles(self, url: str, lang: str, temp_dir: str, video_id: str) -> Optional[str]:
|
| 117 |
"""Download subtitles for a specific language."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
try:
|
| 119 |
-
output_template = os.path.join(temp_dir, f"{video_id}.%(ext)s")
|
| 120 |
-
|
| 121 |
-
cmd = [
|
| 122 |
-
sys.executable, "-m", "yt_dlp",
|
| 123 |
-
"--write-subs",
|
| 124 |
-
"--write-auto-subs", # Also try auto-generated subs
|
| 125 |
-
"--sub-lang", lang,
|
| 126 |
-
"--skip-download",
|
| 127 |
-
"--no-warnings",
|
| 128 |
-
"--output", output_template,
|
| 129 |
-
"--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
| 130 |
-
url
|
| 131 |
-
]
|
| 132 |
-
|
| 133 |
process = await asyncio.create_subprocess_exec(
|
| 134 |
*cmd,
|
| 135 |
stdout=asyncio.subprocess.PIPE,
|
| 136 |
stderr=asyncio.subprocess.PIPE
|
| 137 |
)
|
| 138 |
-
|
| 139 |
stdout, stderr = await asyncio.wait_for(
|
| 140 |
process.communicate(),
|
| 141 |
timeout=self.timeout_download
|
| 142 |
)
|
| 143 |
-
|
| 144 |
if process.returncode != 0:
|
| 145 |
-
|
|
|
|
|
|
|
| 146 |
return None
|
| 147 |
-
|
| 148 |
-
# Look for downloaded subtitle files
|
| 149 |
temp_path = Path(temp_dir)
|
| 150 |
subtitle_files = list(temp_path.glob(f"{video_id}*.vtt"))
|
| 151 |
-
|
| 152 |
if not subtitle_files:
|
| 153 |
return None
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
return f.read()
|
| 158 |
-
|
| 159 |
except asyncio.TimeoutError:
|
| 160 |
raise DownloadTimeoutError(f"Timeout while downloading subtitles for language '{lang}'")
|
| 161 |
-
except
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
alternatives = {
|
| 168 |
-
"en": ["en-US", "en-GB", "en-orig"],
|
| 169 |
-
"es": ["es-ES", "es-MX", "es-419"],
|
| 170 |
-
"fr": ["fr-FR", "fr-CA"],
|
| 171 |
-
"de": ["de-DE"],
|
| 172 |
-
"it": ["it-IT"],
|
| 173 |
-
"pt": ["pt-BR", "pt-PT"],
|
| 174 |
-
"ja": ["ja-JP"],
|
| 175 |
-
"ko": ["ko-KR"],
|
| 176 |
-
"zh": ["zh-CN", "zh-TW", "zh-Hans", "zh-Hant"]
|
| 177 |
-
}
|
| 178 |
-
|
| 179 |
-
return alternatives.get(lang, [f"{lang}-{lang.upper()}"])
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
# Global service instance
|
| 183 |
subtitle_service = SubtitleService()
|
|
|
|
| 1 |
+
"""Subtitle extraction service using yt-dlp with caching."""
|
| 2 |
|
| 3 |
import asyncio
|
|
|
|
| 4 |
import sys
|
| 5 |
import tempfile
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import List, Optional, Tuple
|
| 8 |
+
|
| 9 |
+
from cachetools import TTLCache
|
| 10 |
|
| 11 |
from app.core.config import settings
|
| 12 |
from app.core.exceptions import (
|
| 13 |
+
SubtitlesNotFoundError,
|
| 14 |
+
DownloadTimeoutError,
|
| 15 |
+
SubtitleExtractionError,
|
| 16 |
+
InvalidVideoURLError
|
| 17 |
)
|
| 18 |
from app.apis.subtitles.utils import extract_video_id, convert_vtt_to_text
|
| 19 |
|
| 20 |
|
| 21 |
+
SUBTITLE_CACHE: TTLCache = TTLCache(maxsize=100, ttl=3600)
|
| 22 |
+
|
| 23 |
+
ALTERNATIVE_LANGUAGES = {
|
| 24 |
+
"en": ["en-US", "en-GB", "en-orig"],
|
| 25 |
+
"es": ["es-ES", "es-MX", "es-419"],
|
| 26 |
+
"fr": ["fr-FR", "fr-CA"],
|
| 27 |
+
"de": ["de-DE"],
|
| 28 |
+
"it": ["it-IT"],
|
| 29 |
+
"pt": ["pt-BR", "pt-PT"],
|
| 30 |
+
"ja": ["ja-JP"],
|
| 31 |
+
"ko": ["ko-KR"],
|
| 32 |
+
"zh": ["zh-CN", "zh-TW", "zh-Hans", "zh-Hant"]
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
class SubtitleService:
|
| 37 |
"""Service for extracting subtitles from YouTube videos."""
|
| 38 |
+
|
| 39 |
+
def __init__(self) -> None:
|
|
|
|
| 40 |
self.timeout_list = settings.yt_dlp_timeout_list
|
| 41 |
self.timeout_download = settings.yt_dlp_timeout_download
|
| 42 |
+
|
| 43 |
+
async def extract_subtitles(self, url: str, lang: str = "en") -> Tuple[str, List[str]]:
|
| 44 |
"""
|
| 45 |
Extract subtitles from a YouTube video.
|
| 46 |
+
|
| 47 |
Args:
|
| 48 |
url: YouTube video URL
|
| 49 |
lang: Language code for subtitles
|
| 50 |
+
|
| 51 |
Returns:
|
| 52 |
Tuple of (video_id, subtitle_lines)
|
| 53 |
+
|
| 54 |
Raises:
|
|
|
|
| 55 |
SubtitlesNotFoundError: If no subtitles are found
|
| 56 |
DownloadTimeoutError: If the operation times out
|
| 57 |
SubtitleExtractionError: If extraction fails
|
| 58 |
"""
|
| 59 |
video_id = extract_video_id(url)
|
| 60 |
+
cache_key = f"{video_id}:{lang}"
|
| 61 |
+
|
| 62 |
+
if cache_key in SUBTITLE_CACHE:
|
| 63 |
+
return SUBTITLE_CACHE[cache_key]
|
| 64 |
+
|
| 65 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 66 |
+
subtitle_content = await self._download_subtitles(url, lang, temp_dir, video_id)
|
| 67 |
+
|
| 68 |
+
if not subtitle_content:
|
| 69 |
+
subtitle_content = await self._try_alternative_languages(url, lang, temp_dir, video_id)
|
| 70 |
+
|
| 71 |
+
if not subtitle_content:
|
| 72 |
+
raise SubtitlesNotFoundError(f"No subtitles available in language '{lang}' or alternatives")
|
| 73 |
+
|
| 74 |
+
clean_lines = convert_vtt_to_text(subtitle_content)
|
| 75 |
+
|
| 76 |
+
if not clean_lines:
|
| 77 |
+
raise SubtitlesNotFoundError("Subtitles found but appear to be empty after cleaning")
|
| 78 |
+
|
| 79 |
+
result = (video_id, clean_lines)
|
| 80 |
+
SUBTITLE_CACHE[cache_key] = result
|
| 81 |
+
return result
|
| 82 |
+
|
| 83 |
+
async def _try_alternative_languages(
|
| 84 |
+
self, url: str, lang: str, temp_dir: str, video_id: str
|
| 85 |
+
) -> Optional[str]:
|
| 86 |
+
"""Try downloading subtitles in alternative language codes concurrently."""
|
| 87 |
+
alt_langs = ALTERNATIVE_LANGUAGES.get(lang, [f"{lang}-{lang.upper()}"])
|
| 88 |
+
|
| 89 |
+
tasks = [
|
| 90 |
+
self._download_subtitles(url, alt_lang, temp_dir, video_id)
|
| 91 |
+
for alt_lang in alt_langs
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 95 |
+
|
| 96 |
+
for result in results:
|
| 97 |
+
if isinstance(result, str) and result:
|
| 98 |
+
return result
|
| 99 |
+
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
+
async def _download_subtitles(
|
| 103 |
+
self, url: str, lang: str, temp_dir: str, video_id: str
|
| 104 |
+
) -> Optional[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
"""Download subtitles for a specific language."""
|
| 106 |
+
output_template = str(Path(temp_dir) / f"{video_id}.%(ext)s")
|
| 107 |
+
|
| 108 |
+
cmd = [
|
| 109 |
+
sys.executable, "-m", "yt_dlp",
|
| 110 |
+
"--write-subs",
|
| 111 |
+
"--write-auto-subs",
|
| 112 |
+
"--sub-lang", lang,
|
| 113 |
+
"--skip-download",
|
| 114 |
+
"--no-warnings",
|
| 115 |
+
"--output", output_template,
|
| 116 |
+
"--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
| 117 |
+
url
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
process = await asyncio.create_subprocess_exec(
|
| 122 |
*cmd,
|
| 123 |
stdout=asyncio.subprocess.PIPE,
|
| 124 |
stderr=asyncio.subprocess.PIPE
|
| 125 |
)
|
| 126 |
+
|
| 127 |
stdout, stderr = await asyncio.wait_for(
|
| 128 |
process.communicate(),
|
| 129 |
timeout=self.timeout_download
|
| 130 |
)
|
| 131 |
+
|
| 132 |
if process.returncode != 0:
|
| 133 |
+
error_msg = stderr.decode('utf-8', errors='ignore')
|
| 134 |
+
if "Video unavailable" in error_msg or "Private video" in error_msg:
|
| 135 |
+
raise InvalidVideoURLError("Video is unavailable, private, or does not exist")
|
| 136 |
return None
|
| 137 |
+
|
|
|
|
| 138 |
temp_path = Path(temp_dir)
|
| 139 |
subtitle_files = list(temp_path.glob(f"{video_id}*.vtt"))
|
| 140 |
+
|
| 141 |
if not subtitle_files:
|
| 142 |
return None
|
| 143 |
+
|
| 144 |
+
return subtitle_files[0].read_text(encoding='utf-8', errors='ignore')
|
| 145 |
+
|
|
|
|
|
|
|
| 146 |
except asyncio.TimeoutError:
|
| 147 |
raise DownloadTimeoutError(f"Timeout while downloading subtitles for language '{lang}'")
|
| 148 |
+
except (InvalidVideoURLError, DownloadTimeoutError):
|
| 149 |
+
raise
|
| 150 |
+
except Exception as e:
|
| 151 |
+
raise SubtitleExtractionError(f"Error downloading subtitles: {str(e)}")
|
| 152 |
+
|
| 153 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
subtitle_service = SubtitleService()
|
app/core/__init__.py
CHANGED
|
@@ -1 +1,26 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Core module exports."""
|
| 2 |
+
|
| 3 |
+
from app.core.config import settings
|
| 4 |
+
from app.core.exceptions import (
|
| 5 |
+
MultiUtilityServerException,
|
| 6 |
+
AuthenticationError,
|
| 7 |
+
InvalidVideoURLError,
|
| 8 |
+
SubtitlesNotFoundError,
|
| 9 |
+
DownloadTimeoutError,
|
| 10 |
+
SubtitleExtractionError
|
| 11 |
+
)
|
| 12 |
+
from app.core.logging import setup_logging, get_logger
|
| 13 |
+
from app.core.security import verify_api_key
|
| 14 |
+
|
| 15 |
+
__all__ = [
|
| 16 |
+
"settings",
|
| 17 |
+
"MultiUtilityServerException",
|
| 18 |
+
"AuthenticationError",
|
| 19 |
+
"InvalidVideoURLError",
|
| 20 |
+
"SubtitlesNotFoundError",
|
| 21 |
+
"DownloadTimeoutError",
|
| 22 |
+
"SubtitleExtractionError",
|
| 23 |
+
"setup_logging",
|
| 24 |
+
"get_logger",
|
| 25 |
+
"verify_api_key"
|
| 26 |
+
]
|
app/core/config.py
CHANGED
|
@@ -1,56 +1,65 @@
|
|
| 1 |
"""Configuration module for the multi-utility server."""
|
| 2 |
|
| 3 |
-
import
|
| 4 |
-
from
|
| 5 |
from pydantic_settings import BaseSettings
|
| 6 |
-
from pydantic import field_validator
|
| 7 |
|
| 8 |
|
| 9 |
class Settings(BaseSettings):
|
| 10 |
"""Application settings loaded from environment variables."""
|
| 11 |
-
|
| 12 |
-
# API Security
|
| 13 |
-
api_keys: str = "
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Logging
|
| 16 |
log_level: str = "INFO"
|
| 17 |
-
|
| 18 |
# yt-dlp configuration
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
# Server configuration
|
| 24 |
host: str = "0.0.0.0"
|
| 25 |
port: int = 8000
|
| 26 |
-
reload: bool = False
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
@field_validator("api_keys")
|
| 33 |
-
@classmethod
|
| 34 |
-
def parse_api_keys(cls, v: str) -> Set[str]:
|
| 35 |
-
"""Convert comma-separated API keys to a set."""
|
| 36 |
-
return {key.strip() for key in v.split(",") if key.strip()}
|
| 37 |
-
|
| 38 |
@property
|
| 39 |
def api_keys_set(self) -> Set[str]:
|
| 40 |
"""Get API keys as a set."""
|
| 41 |
-
if
|
| 42 |
-
return
|
| 43 |
-
return self.api_keys
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
@field_validator("log_level")
|
| 46 |
@classmethod
|
| 47 |
def validate_log_level(cls, v: str) -> str:
|
| 48 |
"""Validate log level."""
|
| 49 |
valid_levels = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
|
| 50 |
-
|
|
|
|
| 51 |
raise ValueError(f"Invalid log level: {v}. Must be one of {valid_levels}")
|
| 52 |
-
return
|
| 53 |
|
| 54 |
|
| 55 |
-
# Global settings instance
|
| 56 |
settings = Settings()
|
|
|
|
| 1 |
"""Configuration module for the multi-utility server."""
|
| 2 |
|
| 3 |
+
from typing import List, Set
|
| 4 |
+
from pydantic import computed_field, field_validator
|
| 5 |
from pydantic_settings import BaseSettings
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
class Settings(BaseSettings):
|
| 9 |
"""Application settings loaded from environment variables."""
|
| 10 |
+
|
| 11 |
+
# API Security - no default keys for security
|
| 12 |
+
api_keys: str = ""
|
| 13 |
+
|
| 14 |
+
# CORS Configuration
|
| 15 |
+
cors_origins: str = ""
|
| 16 |
+
|
| 17 |
+
# Rate Limiting
|
| 18 |
+
rate_limit_requests: int = 100
|
| 19 |
+
rate_limit_window: int = 60
|
| 20 |
+
|
| 21 |
# Logging
|
| 22 |
log_level: str = "INFO"
|
| 23 |
+
|
| 24 |
# yt-dlp configuration
|
| 25 |
+
yt_dlp_timeout_list: int = 30
|
| 26 |
+
yt_dlp_timeout_download: int = 60
|
| 27 |
+
|
| 28 |
+
# Embedding configuration
|
| 29 |
+
embedding_model: str = "mixedbread-ai/mxbai-embed-large-v1"
|
| 30 |
+
|
| 31 |
# Server configuration
|
| 32 |
host: str = "0.0.0.0"
|
| 33 |
port: int = 8000
|
| 34 |
+
reload: bool = False
|
| 35 |
+
|
| 36 |
+
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
| 37 |
+
|
| 38 |
+
@computed_field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
@property
|
| 40 |
def api_keys_set(self) -> Set[str]:
|
| 41 |
"""Get API keys as a set."""
|
| 42 |
+
if not self.api_keys:
|
| 43 |
+
return set()
|
| 44 |
+
return {key.strip() for key in self.api_keys.split(",") if key.strip()}
|
| 45 |
+
|
| 46 |
+
@computed_field
|
| 47 |
+
@property
|
| 48 |
+
def cors_origins_list(self) -> List[str]:
|
| 49 |
+
"""Get CORS origins as a list."""
|
| 50 |
+
if not self.cors_origins:
|
| 51 |
+
return ["*"]
|
| 52 |
+
return [origin.strip() for origin in self.cors_origins.split(",") if origin.strip()]
|
| 53 |
+
|
| 54 |
@field_validator("log_level")
|
| 55 |
@classmethod
|
| 56 |
def validate_log_level(cls, v: str) -> str:
|
| 57 |
"""Validate log level."""
|
| 58 |
valid_levels = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
|
| 59 |
+
upper_v = v.upper()
|
| 60 |
+
if upper_v not in valid_levels:
|
| 61 |
raise ValueError(f"Invalid log level: {v}. Must be one of {valid_levels}")
|
| 62 |
+
return upper_v
|
| 63 |
|
| 64 |
|
|
|
|
| 65 |
settings = Settings()
|
app/core/security.py
CHANGED
|
@@ -1,44 +1,31 @@
|
|
| 1 |
"""Security utilities for API key verification."""
|
| 2 |
|
| 3 |
-
|
| 4 |
from app.core.config import settings
|
| 5 |
from app.core.exceptions import AuthenticationError
|
| 6 |
|
| 7 |
|
| 8 |
-
def verify_api_key(api_key:
|
| 9 |
"""
|
| 10 |
-
Verify if the provided API key is valid.
|
| 11 |
-
|
| 12 |
Args:
|
| 13 |
api_key: The API key to verify
|
| 14 |
-
|
| 15 |
Returns:
|
| 16 |
True if the API key is valid
|
| 17 |
-
|
| 18 |
Raises:
|
| 19 |
AuthenticationError: If the API key is invalid or missing
|
| 20 |
"""
|
| 21 |
if not api_key:
|
| 22 |
raise AuthenticationError("Missing API key")
|
| 23 |
-
|
| 24 |
-
if api_key not in settings.api_keys_set:
|
| 25 |
-
raise AuthenticationError("Invalid API key")
|
| 26 |
-
|
| 27 |
-
return True
|
| 28 |
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
x_api_key: The X-API-Key header value
|
| 36 |
-
|
| 37 |
-
Returns:
|
| 38 |
-
The validated API key
|
| 39 |
-
|
| 40 |
-
Raises:
|
| 41 |
-
AuthenticationError: If the API key is invalid or missing
|
| 42 |
-
"""
|
| 43 |
-
verify_api_key(x_api_key)
|
| 44 |
-
return x_api_key
|
|
|
|
| 1 |
"""Security utilities for API key verification."""
|
| 2 |
|
| 3 |
+
import secrets
|
| 4 |
from app.core.config import settings
|
| 5 |
from app.core.exceptions import AuthenticationError
|
| 6 |
|
| 7 |
|
| 8 |
+
def verify_api_key(api_key: str) -> bool:
|
| 9 |
"""
|
| 10 |
+
Verify if the provided API key is valid using timing-safe comparison.
|
| 11 |
+
|
| 12 |
Args:
|
| 13 |
api_key: The API key to verify
|
| 14 |
+
|
| 15 |
Returns:
|
| 16 |
True if the API key is valid
|
| 17 |
+
|
| 18 |
Raises:
|
| 19 |
AuthenticationError: If the API key is invalid or missing
|
| 20 |
"""
|
| 21 |
if not api_key:
|
| 22 |
raise AuthenticationError("Missing API key")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
if not settings.api_keys_set:
|
| 25 |
+
raise AuthenticationError("No API keys configured on server")
|
| 26 |
|
| 27 |
+
for valid_key in settings.api_keys_set:
|
| 28 |
+
if secrets.compare_digest(api_key, valid_key):
|
| 29 |
+
return True
|
| 30 |
+
|
| 31 |
+
raise AuthenticationError("Invalid API key")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/main.py
CHANGED
|
@@ -1,18 +1,21 @@
|
|
| 1 |
"""Main FastAPI application for the multi-utility server."""
|
| 2 |
|
| 3 |
-
from fastapi import FastAPI, Request, HTTPException
|
| 4 |
-
from fastapi.responses import JSONResponse
|
| 5 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
import time
|
| 7 |
from contextlib import asynccontextmanager
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from app.core.config import settings
|
| 10 |
from app.core.logging import setup_logging, get_logger
|
| 11 |
from app.core.exceptions import MultiUtilityServerException
|
| 12 |
-
from app.middleware
|
| 13 |
from app.apis.subtitles.router import router as subtitles_router
|
|
|
|
| 14 |
|
| 15 |
-
# Set up logging
|
| 16 |
setup_logging()
|
| 17 |
logger = get_logger(__name__)
|
| 18 |
|
|
@@ -23,11 +26,11 @@ async def lifespan(app: FastAPI):
|
|
| 23 |
logger.info("Starting multi-utility server...")
|
| 24 |
logger.info(f"Log level: {settings.log_level}")
|
| 25 |
logger.info(f"API keys configured: {len(settings.api_keys_set)}")
|
|
|
|
| 26 |
yield
|
| 27 |
logger.info("Shutting down multi-utility server...")
|
| 28 |
|
| 29 |
|
| 30 |
-
# Create FastAPI application
|
| 31 |
app = FastAPI(
|
| 32 |
title="Multi-Utility Server",
|
| 33 |
description="Centralized FastAPI server providing reusable APIs for different projects",
|
|
@@ -37,20 +40,20 @@ app = FastAPI(
|
|
| 37 |
lifespan=lifespan
|
| 38 |
)
|
| 39 |
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
app.add_middleware(
|
| 42 |
CORSMiddleware,
|
| 43 |
-
allow_origins=
|
| 44 |
-
allow_credentials=True,
|
| 45 |
-
allow_methods=["
|
| 46 |
allow_headers=["*"],
|
| 47 |
)
|
| 48 |
|
| 49 |
-
# Add API key authentication middleware
|
| 50 |
app.add_middleware(APIKeyMiddleware)
|
| 51 |
|
| 52 |
|
| 53 |
-
# Custom exception handler
|
| 54 |
@app.exception_handler(MultiUtilityServerException)
|
| 55 |
async def custom_exception_handler(request: Request, exc: MultiUtilityServerException):
|
| 56 |
"""Handle custom application exceptions."""
|
|
@@ -61,7 +64,6 @@ async def custom_exception_handler(request: Request, exc: MultiUtilityServerExce
|
|
| 61 |
)
|
| 62 |
|
| 63 |
|
| 64 |
-
# Global exception handler
|
| 65 |
@app.exception_handler(Exception)
|
| 66 |
async def global_exception_handler(request: Request, exc: Exception):
|
| 67 |
"""Handle unexpected exceptions."""
|
|
@@ -72,18 +74,14 @@ async def global_exception_handler(request: Request, exc: Exception):
|
|
| 72 |
)
|
| 73 |
|
| 74 |
|
| 75 |
-
# Request logging middleware
|
| 76 |
@app.middleware("http")
|
| 77 |
async def log_requests(request: Request, call_next):
|
| 78 |
"""Log all HTTP requests."""
|
| 79 |
start_time = time.time()
|
| 80 |
-
|
| 81 |
-
# Log request
|
| 82 |
logger.info(f"Request: {request.method} {request.url.path}")
|
| 83 |
|
| 84 |
response = await call_next(request)
|
| 85 |
|
| 86 |
-
# Log response
|
| 87 |
process_time = time.time() - start_time
|
| 88 |
logger.info(
|
| 89 |
f"Response: {response.status_code} | "
|
|
@@ -94,7 +92,6 @@ async def log_requests(request: Request, call_next):
|
|
| 94 |
return response
|
| 95 |
|
| 96 |
|
| 97 |
-
# Health check endpoint
|
| 98 |
@app.get("/health")
|
| 99 |
async def health_check():
|
| 100 |
"""Health check endpoint."""
|
|
@@ -105,23 +102,23 @@ async def health_check():
|
|
| 105 |
}
|
| 106 |
|
| 107 |
|
| 108 |
-
# Root endpoint
|
| 109 |
@app.get("/")
|
| 110 |
async def root():
|
| 111 |
-
"""Root endpoint with
|
| 112 |
return {
|
| 113 |
"message": "Multi-Utility FastAPI Server",
|
| 114 |
"version": "0.1.0",
|
| 115 |
"docs": "/docs",
|
| 116 |
"health": "/health",
|
| 117 |
"apis": {
|
| 118 |
-
"subtitles": "/api/v1/subtitles"
|
|
|
|
| 119 |
}
|
| 120 |
}
|
| 121 |
|
| 122 |
|
| 123 |
-
# Include API routers
|
| 124 |
app.include_router(subtitles_router)
|
|
|
|
| 125 |
|
| 126 |
|
| 127 |
if __name__ == "__main__":
|
|
|
|
| 1 |
"""Main FastAPI application for the multi-utility server."""
|
| 2 |
|
|
|
|
|
|
|
|
|
|
| 3 |
import time
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
|
| 6 |
+
from fastapi import FastAPI, Request
|
| 7 |
+
from fastapi.responses import JSONResponse
|
| 8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
+
from slowapi import _rate_limit_exceeded_handler
|
| 10 |
+
from slowapi.errors import RateLimitExceeded
|
| 11 |
+
|
| 12 |
from app.core.config import settings
|
| 13 |
from app.core.logging import setup_logging, get_logger
|
| 14 |
from app.core.exceptions import MultiUtilityServerException
|
| 15 |
+
from app.middleware import APIKeyMiddleware, limiter
|
| 16 |
from app.apis.subtitles.router import router as subtitles_router
|
| 17 |
+
from app.apis.embeddings.router import router as embeddings_router
|
| 18 |
|
|
|
|
| 19 |
setup_logging()
|
| 20 |
logger = get_logger(__name__)
|
| 21 |
|
|
|
|
| 26 |
logger.info("Starting multi-utility server...")
|
| 27 |
logger.info(f"Log level: {settings.log_level}")
|
| 28 |
logger.info(f"API keys configured: {len(settings.api_keys_set)}")
|
| 29 |
+
logger.info(f"CORS origins: {settings.cors_origins_list}")
|
| 30 |
yield
|
| 31 |
logger.info("Shutting down multi-utility server...")
|
| 32 |
|
| 33 |
|
|
|
|
| 34 |
app = FastAPI(
|
| 35 |
title="Multi-Utility Server",
|
| 36 |
description="Centralized FastAPI server providing reusable APIs for different projects",
|
|
|
|
| 40 |
lifespan=lifespan
|
| 41 |
)
|
| 42 |
|
| 43 |
+
app.state.limiter = limiter
|
| 44 |
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
| 45 |
+
|
| 46 |
app.add_middleware(
|
| 47 |
CORSMiddleware,
|
| 48 |
+
allow_origins=settings.cors_origins_list,
|
| 49 |
+
allow_credentials=True if settings.cors_origins else False,
|
| 50 |
+
allow_methods=["GET", "POST", "PUT", "DELETE"],
|
| 51 |
allow_headers=["*"],
|
| 52 |
)
|
| 53 |
|
|
|
|
| 54 |
app.add_middleware(APIKeyMiddleware)
|
| 55 |
|
| 56 |
|
|
|
|
| 57 |
@app.exception_handler(MultiUtilityServerException)
|
| 58 |
async def custom_exception_handler(request: Request, exc: MultiUtilityServerException):
|
| 59 |
"""Handle custom application exceptions."""
|
|
|
|
| 64 |
)
|
| 65 |
|
| 66 |
|
|
|
|
| 67 |
@app.exception_handler(Exception)
|
| 68 |
async def global_exception_handler(request: Request, exc: Exception):
|
| 69 |
"""Handle unexpected exceptions."""
|
|
|
|
| 74 |
)
|
| 75 |
|
| 76 |
|
|
|
|
| 77 |
@app.middleware("http")
|
| 78 |
async def log_requests(request: Request, call_next):
|
| 79 |
"""Log all HTTP requests."""
|
| 80 |
start_time = time.time()
|
|
|
|
|
|
|
| 81 |
logger.info(f"Request: {request.method} {request.url.path}")
|
| 82 |
|
| 83 |
response = await call_next(request)
|
| 84 |
|
|
|
|
| 85 |
process_time = time.time() - start_time
|
| 86 |
logger.info(
|
| 87 |
f"Response: {response.status_code} | "
|
|
|
|
| 92 |
return response
|
| 93 |
|
| 94 |
|
|
|
|
| 95 |
@app.get("/health")
|
| 96 |
async def health_check():
|
| 97 |
"""Health check endpoint."""
|
|
|
|
| 102 |
}
|
| 103 |
|
| 104 |
|
|
|
|
| 105 |
@app.get("/")
|
| 106 |
async def root():
|
| 107 |
+
"""Root endpoint with API information."""
|
| 108 |
return {
|
| 109 |
"message": "Multi-Utility FastAPI Server",
|
| 110 |
"version": "0.1.0",
|
| 111 |
"docs": "/docs",
|
| 112 |
"health": "/health",
|
| 113 |
"apis": {
|
| 114 |
+
"subtitles": "/api/v1/subtitles",
|
| 115 |
+
"embeddings": "/api/v1/embeddings"
|
| 116 |
}
|
| 117 |
}
|
| 118 |
|
| 119 |
|
|
|
|
| 120 |
app.include_router(subtitles_router)
|
| 121 |
+
app.include_router(embeddings_router)
|
| 122 |
|
| 123 |
|
| 124 |
if __name__ == "__main__":
|
app/middleware/__init__.py
CHANGED
|
@@ -1 +1,6 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Middleware module exports."""
|
| 2 |
+
|
| 3 |
+
from app.middleware.api_key_auth import APIKeyMiddleware
|
| 4 |
+
from app.middleware.rate_limit import limiter, rate_limit_exceeded_handler
|
| 5 |
+
|
| 6 |
+
__all__ = ["APIKeyMiddleware", "limiter", "rate_limit_exceeded_handler"]
|
app/middleware/api_key_auth.py
CHANGED
|
@@ -1,72 +1,56 @@
|
|
| 1 |
"""API key authentication middleware."""
|
| 2 |
|
| 3 |
-
|
| 4 |
-
from fastapi import Request
|
| 5 |
-
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 6 |
from starlette.middleware.base import BaseHTTPMiddleware
|
| 7 |
from starlette.responses import JSONResponse
|
| 8 |
|
| 9 |
from app.core.config import settings
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
class APIKeyMiddleware(BaseHTTPMiddleware):
|
| 14 |
"""Middleware to enforce API key authentication."""
|
| 15 |
-
|
| 16 |
async def dispatch(self, request: Request, call_next):
|
| 17 |
-
|
| 18 |
-
if request.url.path in
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
# Extract API key from header
|
| 23 |
api_key = request.headers.get("x-api-key")
|
| 24 |
-
|
| 25 |
if not api_key:
|
| 26 |
return JSONResponse(
|
| 27 |
status_code=401,
|
| 28 |
-
content={
|
| 29 |
-
"status": "error",
|
| 30 |
-
"message": "Missing API key. Include 'x-api-key' header."
|
| 31 |
-
}
|
| 32 |
)
|
| 33 |
-
|
| 34 |
-
if
|
| 35 |
return JSONResponse(
|
| 36 |
-
status_code=
|
| 37 |
-
content={
|
| 38 |
-
"status": "error",
|
| 39 |
-
"message": "Invalid API key"
|
| 40 |
-
}
|
| 41 |
)
|
| 42 |
-
|
| 43 |
-
# Add API key to request state for potential logging
|
| 44 |
-
request.state.api_key = api_key
|
| 45 |
-
|
| 46 |
-
response = await call_next(request)
|
| 47 |
-
return response
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
Dependency to extract and validate API key from Authorization header.
|
| 57 |
-
|
| 58 |
-
Args:
|
| 59 |
-
credentials: HTTP authorization credentials
|
| 60 |
-
|
| 61 |
-
Returns:
|
| 62 |
-
The validated API key
|
| 63 |
-
|
| 64 |
-
Raises:
|
| 65 |
-
HTTPException: If the API key is invalid
|
| 66 |
-
"""
|
| 67 |
-
if not credentials or credentials.credentials not in settings.api_keys_set:
|
| 68 |
-
raise HTTPException(
|
| 69 |
-
status_code=401,
|
| 70 |
-
detail="Invalid API key"
|
| 71 |
-
)
|
| 72 |
-
return credentials.credentials
|
|
|
|
| 1 |
"""API key authentication middleware."""
|
| 2 |
|
| 3 |
+
import secrets
|
| 4 |
+
from fastapi import Request
|
|
|
|
| 5 |
from starlette.middleware.base import BaseHTTPMiddleware
|
| 6 |
from starlette.responses import JSONResponse
|
| 7 |
|
| 8 |
from app.core.config import settings
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
EXEMPT_PATHS = frozenset([
|
| 12 |
+
"/",
|
| 13 |
+
"/health",
|
| 14 |
+
"/docs",
|
| 15 |
+
"/redoc",
|
| 16 |
+
"/openapi.json",
|
| 17 |
+
"/api/v1/subtitles/health",
|
| 18 |
+
"/api/v1/embeddings/health"
|
| 19 |
+
])
|
| 20 |
|
| 21 |
|
| 22 |
class APIKeyMiddleware(BaseHTTPMiddleware):
|
| 23 |
"""Middleware to enforce API key authentication."""
|
| 24 |
+
|
| 25 |
async def dispatch(self, request: Request, call_next):
|
| 26 |
+
"""Process request and validate API key for protected endpoints."""
|
| 27 |
+
if request.url.path in EXEMPT_PATHS:
|
| 28 |
+
return await call_next(request)
|
| 29 |
+
|
|
|
|
|
|
|
| 30 |
api_key = request.headers.get("x-api-key")
|
| 31 |
+
|
| 32 |
if not api_key:
|
| 33 |
return JSONResponse(
|
| 34 |
status_code=401,
|
| 35 |
+
content={"status": "error", "message": "Missing API key. Include 'x-api-key' header."}
|
|
|
|
|
|
|
|
|
|
| 36 |
)
|
| 37 |
+
|
| 38 |
+
if not settings.api_keys_set:
|
| 39 |
return JSONResponse(
|
| 40 |
+
status_code=500,
|
| 41 |
+
content={"status": "error", "message": "No API keys configured on server"}
|
|
|
|
|
|
|
|
|
|
| 42 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
is_valid = any(
|
| 45 |
+
secrets.compare_digest(api_key, valid_key)
|
| 46 |
+
for valid_key in settings.api_keys_set
|
| 47 |
+
)
|
| 48 |
|
| 49 |
+
if not is_valid:
|
| 50 |
+
return JSONResponse(
|
| 51 |
+
status_code=401,
|
| 52 |
+
content={"status": "error", "message": "Invalid API key"}
|
| 53 |
+
)
|
| 54 |
|
| 55 |
+
request.state.api_key = api_key
|
| 56 |
+
return await call_next(request)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/middleware/rate_limit.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Rate limiting middleware using slowapi."""
|
| 2 |
+
|
| 3 |
+
from slowapi import Limiter
|
| 4 |
+
from slowapi.util import get_remote_address
|
| 5 |
+
from slowapi.errors import RateLimitExceeded
|
| 6 |
+
from slowapi.middleware import SlowAPIMiddleware
|
| 7 |
+
from starlette.responses import JSONResponse
|
| 8 |
+
from starlette.requests import Request
|
| 9 |
+
|
| 10 |
+
from app.core.config import settings
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
limiter = Limiter(
|
| 14 |
+
key_func=get_remote_address,
|
| 15 |
+
default_limits=[f"{settings.rate_limit_requests}/minute"]
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
|
| 20 |
+
"""Handle rate limit exceeded errors."""
|
| 21 |
+
return JSONResponse(
|
| 22 |
+
status_code=429,
|
| 23 |
+
content={
|
| 24 |
+
"status": "error",
|
| 25 |
+
"message": f"Rate limit exceeded. Try again in {exc.detail} seconds."
|
| 26 |
+
}
|
| 27 |
+
)
|
poetry.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[tool.poetry]
|
| 2 |
name = "multi-utility-server"
|
| 3 |
-
version = "
|
| 4 |
description = "Centralized multi-utility FastAPI server with reusable APIs"
|
| 5 |
authors = ["Abhishek Sharma <abhishek@abhisheksan.com>"]
|
| 6 |
readme = "README.md"
|
|
@@ -15,6 +15,10 @@ pydantic-settings = "^2.1.0"
|
|
| 15 |
yt-dlp = "^2025.9.5"
|
| 16 |
python-dotenv = "^1.0.0"
|
| 17 |
loguru = "^0.7.2"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
[tool.poetry.group.dev.dependencies]
|
| 20 |
pytest = "^7.4.3"
|
|
|
|
| 1 |
[tool.poetry]
|
| 2 |
name = "multi-utility-server"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
description = "Centralized multi-utility FastAPI server with reusable APIs"
|
| 5 |
authors = ["Abhishek Sharma <abhishek@abhisheksan.com>"]
|
| 6 |
readme = "README.md"
|
|
|
|
| 15 |
yt-dlp = "^2025.9.5"
|
| 16 |
python-dotenv = "^1.0.0"
|
| 17 |
loguru = "^0.7.2"
|
| 18 |
+
slowapi = "^0.1.9"
|
| 19 |
+
cachetools = "^5.3.0"
|
| 20 |
+
sentence-transformers = "^2.2.2"
|
| 21 |
+
torch = "^2.0.0"
|
| 22 |
|
| 23 |
[tool.poetry.group.dev.dependencies]
|
| 24 |
pytest = "^7.4.3"
|
scripts/run_dev.bat
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
@echo off
|
| 2 |
-
REM Development script for running the multi-utility server on Windows
|
| 3 |
-
|
| 4 |
-
echo Starting Multi-Utility Server Development Environment
|
| 5 |
-
|
| 6 |
-
REM Check if Poetry is installed
|
| 7 |
-
poetry --version >nul 2>&1
|
| 8 |
-
if %errorlevel% neq 0 (
|
| 9 |
-
echo Poetry is not installed. Please install Poetry first.
|
| 10 |
-
echo Visit: https://python-poetry.org/docs/#installation
|
| 11 |
-
pause
|
| 12 |
-
exit /b 1
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
REM Install dependencies if not already installed
|
| 16 |
-
if not exist ".venv" (
|
| 17 |
-
echo Installing dependencies...
|
| 18 |
-
poetry install
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
REM Create .env file if it doesn't exist
|
| 22 |
-
if not exist ".env" (
|
| 23 |
-
echo Creating .env file from template...
|
| 24 |
-
copy .env.example .env
|
| 25 |
-
echo Please edit .env file to configure your API keys and settings.
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
REM Create logs directory
|
| 29 |
-
if not exist "logs" mkdir logs
|
| 30 |
-
|
| 31 |
-
REM Start the development server
|
| 32 |
-
echo Starting development server...
|
| 33 |
-
echo Server will be available at: http://localhost:8000
|
| 34 |
-
echo API documentation: http://localhost:8000/docs
|
| 35 |
-
echo Press Ctrl+C to stop the server
|
| 36 |
-
echo.
|
| 37 |
-
|
| 38 |
-
poetry run python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_dev.sh
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
# Development script for running the multi-utility server
|
| 3 |
-
|
| 4 |
-
# Colors for output
|
| 5 |
-
GREEN='\033[0;32m'
|
| 6 |
-
YELLOW='\033[1;33m'
|
| 7 |
-
RED='\033[0;31m'
|
| 8 |
-
NC='\033[0m' # No Color
|
| 9 |
-
|
| 10 |
-
echo -e "${GREEN}Starting Multi-Utility Server Development Environment${NC}"
|
| 11 |
-
|
| 12 |
-
# Check if Poetry is installed
|
| 13 |
-
if ! command -v poetry &> /dev/null; then
|
| 14 |
-
echo -e "${RED}Poetry is not installed. Please install Poetry first.${NC}"
|
| 15 |
-
echo "Visit: https://python-poetry.org/docs/#installation"
|
| 16 |
-
exit 1
|
| 17 |
-
fi
|
| 18 |
-
|
| 19 |
-
# Install dependencies if not already installed
|
| 20 |
-
if [ ! -d ".venv" ]; then
|
| 21 |
-
echo -e "${YELLOW}Installing dependencies...${NC}"
|
| 22 |
-
poetry install
|
| 23 |
-
fi
|
| 24 |
-
|
| 25 |
-
# Create .env file if it doesn't exist
|
| 26 |
-
if [ ! -f ".env" ]; then
|
| 27 |
-
echo -e "${YELLOW}Creating .env file from template...${NC}"
|
| 28 |
-
cp .env.example .env
|
| 29 |
-
echo -e "${YELLOW}Please edit .env file to configure your API keys and settings.${NC}"
|
| 30 |
-
fi
|
| 31 |
-
|
| 32 |
-
# Create logs directory
|
| 33 |
-
mkdir -p logs
|
| 34 |
-
|
| 35 |
-
# Start the development server
|
| 36 |
-
echo -e "${GREEN}Starting development server...${NC}"
|
| 37 |
-
echo -e "${YELLOW}Server will be available at: http://localhost:8000${NC}"
|
| 38 |
-
echo -e "${YELLOW}API documentation: http://localhost:8000/docs${NC}"
|
| 39 |
-
echo -e "${YELLOW}Press Ctrl+C to stop the server${NC}"
|
| 40 |
-
echo ""
|
| 41 |
-
|
| 42 |
-
poetry run python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_docker.bat
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
@echo off
|
| 2 |
-
echo Building and running Multi-Utility Server with Docker...
|
| 3 |
-
echo.
|
| 4 |
-
|
| 5 |
-
REM Change to project root directory
|
| 6 |
-
cd /d "%~dp0\.."
|
| 7 |
-
|
| 8 |
-
REM Check if .env file exists
|
| 9 |
-
if not exist .env (
|
| 10 |
-
echo Warning: .env file not found. Copying from .env.example...
|
| 11 |
-
copy .env.example .env
|
| 12 |
-
echo Please edit .env file with your API keys before running again.
|
| 13 |
-
pause
|
| 14 |
-
exit /b 1
|
| 15 |
-
)
|
| 16 |
-
|
| 17 |
-
REM Build and run with docker-compose
|
| 18 |
-
docker-compose up --build
|
| 19 |
-
|
| 20 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_docker.sh
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
|
| 3 |
-
echo "Building and running Multi-Utility Server with Docker..."
|
| 4 |
-
echo
|
| 5 |
-
|
| 6 |
-
# Change to project root directory
|
| 7 |
-
cd "$(dirname "$0")/.."
|
| 8 |
-
|
| 9 |
-
# Check if .env file exists
|
| 10 |
-
if [ ! -f .env ]; then
|
| 11 |
-
echo "Warning: .env file not found. Copying from .env.example..."
|
| 12 |
-
cp .env.example .env
|
| 13 |
-
echo "Please edit .env file with your API keys before running again."
|
| 14 |
-
exit 1
|
| 15 |
-
fi
|
| 16 |
-
|
| 17 |
-
# Build and run with docker-compose
|
| 18 |
-
docker-compose up --build
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/conftest.py
CHANGED
|
@@ -2,10 +2,13 @@
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
from fastapi.testclient import TestClient
|
| 5 |
-
from unittest.mock import
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
from app.main import app
|
| 10 |
from app.core.config import settings
|
| 11 |
|
|
@@ -29,13 +32,6 @@ def invalid_api_key():
|
|
| 29 |
return "invalid-key-for-testing"
|
| 30 |
|
| 31 |
|
| 32 |
-
@pytest.fixture
|
| 33 |
-
def mock_subprocess():
|
| 34 |
-
"""Mock subprocess calls for yt-dlp."""
|
| 35 |
-
with patch('subprocess.run') as mock_run:
|
| 36 |
-
yield mock_run
|
| 37 |
-
|
| 38 |
-
|
| 39 |
@pytest.fixture
|
| 40 |
def mock_asyncio_subprocess():
|
| 41 |
"""Mock asyncio subprocess calls for yt-dlp."""
|
|
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
from fastapi.testclient import TestClient
|
| 5 |
+
from unittest.mock import patch
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
# Ensure API_KEYS is set for testing
|
| 10 |
+
os.environ.setdefault("API_KEYS", "test-key-1,test-key-2")
|
| 11 |
+
|
| 12 |
from app.main import app
|
| 13 |
from app.core.config import settings
|
| 14 |
|
|
|
|
| 32 |
return "invalid-key-for-testing"
|
| 33 |
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
@pytest.fixture
|
| 36 |
def mock_asyncio_subprocess():
|
| 37 |
"""Mock asyncio subprocess calls for yt-dlp."""
|
tests/test_security.py
CHANGED
|
@@ -4,57 +4,46 @@ from unittest import mock
|
|
| 4 |
import pytest
|
| 5 |
from fastapi.testclient import TestClient
|
| 6 |
|
| 7 |
-
from app.core.security import verify_api_key
|
| 8 |
from app.core.exceptions import AuthenticationError
|
| 9 |
-
from app.main import app
|
| 10 |
|
| 11 |
|
| 12 |
class TestAPIKeySecurity:
|
| 13 |
"""Test API key security functions."""
|
| 14 |
-
|
| 15 |
def test_verify_api_key_valid(self, api_key):
|
| 16 |
"""Test verifying a valid API key."""
|
| 17 |
assert verify_api_key(api_key) is True
|
| 18 |
-
|
| 19 |
def test_verify_api_key_invalid(self, invalid_api_key):
|
| 20 |
"""Test verifying an invalid API key."""
|
| 21 |
with pytest.raises(AuthenticationError):
|
| 22 |
verify_api_key(invalid_api_key)
|
| 23 |
-
|
| 24 |
-
def
|
| 25 |
-
"""Test verifying
|
| 26 |
-
with pytest.raises(AuthenticationError):
|
| 27 |
-
verify_api_key(None)
|
| 28 |
-
|
| 29 |
-
def test_get_api_key_from_header_valid(self, api_key):
|
| 30 |
-
"""Test getting API key from valid header."""
|
| 31 |
-
result = get_api_key_from_header(api_key)
|
| 32 |
-
assert result == api_key
|
| 33 |
-
|
| 34 |
-
def test_get_api_key_from_header_invalid(self, invalid_api_key):
|
| 35 |
-
"""Test getting API key from invalid header."""
|
| 36 |
with pytest.raises(AuthenticationError):
|
| 37 |
-
|
| 38 |
|
| 39 |
|
| 40 |
class TestAPIKeyMiddleware:
|
| 41 |
"""Test API key middleware functionality."""
|
| 42 |
-
|
| 43 |
def test_middleware_allows_health_endpoint(self, client):
|
| 44 |
"""Test that middleware allows access to health endpoint without API key."""
|
| 45 |
response = client.get("/health")
|
| 46 |
assert response.status_code == 200
|
| 47 |
-
|
| 48 |
def test_middleware_allows_docs_endpoint(self, client):
|
| 49 |
"""Test that middleware allows access to docs endpoint without API key."""
|
| 50 |
response = client.get("/docs")
|
| 51 |
assert response.status_code == 200
|
| 52 |
-
|
| 53 |
def test_middleware_allows_root_endpoint(self, client):
|
| 54 |
"""Test that middleware allows access to root endpoint without API key."""
|
| 55 |
response = client.get("/")
|
| 56 |
assert response.status_code == 200
|
| 57 |
-
|
| 58 |
def test_middleware_blocks_api_without_key(self, client):
|
| 59 |
"""Test that middleware blocks API access without API key."""
|
| 60 |
response = client.post(
|
|
@@ -65,7 +54,7 @@ class TestAPIKeyMiddleware:
|
|
| 65 |
data = response.json()
|
| 66 |
assert data["status"] == "error"
|
| 67 |
assert "Missing API key" in data["message"]
|
| 68 |
-
|
| 69 |
def test_middleware_blocks_api_with_invalid_key(self, client, invalid_api_key):
|
| 70 |
"""Test that middleware blocks API access with invalid API key."""
|
| 71 |
response = client.post(
|
|
@@ -77,13 +66,12 @@ class TestAPIKeyMiddleware:
|
|
| 77 |
data = response.json()
|
| 78 |
assert data["status"] == "error"
|
| 79 |
assert "Invalid API key" in data["message"]
|
| 80 |
-
|
| 81 |
def test_middleware_allows_api_with_valid_key(self, client, api_key):
|
| 82 |
"""Test that middleware allows API access with valid API key."""
|
| 83 |
-
# Mock the subtitle service to avoid actual yt-dlp calls
|
| 84 |
with mock.patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
|
| 85 |
mock_extract.return_value = ("dQw4w9WgXcQ", ["Test subtitle"])
|
| 86 |
-
|
| 87 |
response = client.post(
|
| 88 |
"/api/v1/subtitles/extract",
|
| 89 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
|
|
|
| 4 |
import pytest
|
| 5 |
from fastapi.testclient import TestClient
|
| 6 |
|
| 7 |
+
from app.core.security import verify_api_key
|
| 8 |
from app.core.exceptions import AuthenticationError
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class TestAPIKeySecurity:
|
| 12 |
"""Test API key security functions."""
|
| 13 |
+
|
| 14 |
def test_verify_api_key_valid(self, api_key):
|
| 15 |
"""Test verifying a valid API key."""
|
| 16 |
assert verify_api_key(api_key) is True
|
| 17 |
+
|
| 18 |
def test_verify_api_key_invalid(self, invalid_api_key):
|
| 19 |
"""Test verifying an invalid API key."""
|
| 20 |
with pytest.raises(AuthenticationError):
|
| 21 |
verify_api_key(invalid_api_key)
|
| 22 |
+
|
| 23 |
+
def test_verify_api_key_empty(self):
|
| 24 |
+
"""Test verifying empty API key."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
with pytest.raises(AuthenticationError):
|
| 26 |
+
verify_api_key("")
|
| 27 |
|
| 28 |
|
| 29 |
class TestAPIKeyMiddleware:
|
| 30 |
"""Test API key middleware functionality."""
|
| 31 |
+
|
| 32 |
def test_middleware_allows_health_endpoint(self, client):
|
| 33 |
"""Test that middleware allows access to health endpoint without API key."""
|
| 34 |
response = client.get("/health")
|
| 35 |
assert response.status_code == 200
|
| 36 |
+
|
| 37 |
def test_middleware_allows_docs_endpoint(self, client):
|
| 38 |
"""Test that middleware allows access to docs endpoint without API key."""
|
| 39 |
response = client.get("/docs")
|
| 40 |
assert response.status_code == 200
|
| 41 |
+
|
| 42 |
def test_middleware_allows_root_endpoint(self, client):
|
| 43 |
"""Test that middleware allows access to root endpoint without API key."""
|
| 44 |
response = client.get("/")
|
| 45 |
assert response.status_code == 200
|
| 46 |
+
|
| 47 |
def test_middleware_blocks_api_without_key(self, client):
|
| 48 |
"""Test that middleware blocks API access without API key."""
|
| 49 |
response = client.post(
|
|
|
|
| 54 |
data = response.json()
|
| 55 |
assert data["status"] == "error"
|
| 56 |
assert "Missing API key" in data["message"]
|
| 57 |
+
|
| 58 |
def test_middleware_blocks_api_with_invalid_key(self, client, invalid_api_key):
|
| 59 |
"""Test that middleware blocks API access with invalid API key."""
|
| 60 |
response = client.post(
|
|
|
|
| 66 |
data = response.json()
|
| 67 |
assert data["status"] == "error"
|
| 68 |
assert "Invalid API key" in data["message"]
|
| 69 |
+
|
| 70 |
def test_middleware_allows_api_with_valid_key(self, client, api_key):
|
| 71 |
"""Test that middleware allows API access with valid API key."""
|
|
|
|
| 72 |
with mock.patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
|
| 73 |
mock_extract.return_value = ("dQw4w9WgXcQ", ["Test subtitle"])
|
| 74 |
+
|
| 75 |
response = client.post(
|
| 76 |
"/api/v1/subtitles/extract",
|
| 77 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
tests/test_subtitles.py
CHANGED
|
@@ -1,42 +1,39 @@
|
|
| 1 |
"""Tests for subtitle extraction functionality."""
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
-
from unittest.mock import
|
| 5 |
import asyncio
|
| 6 |
-
from pathlib import Path
|
| 7 |
|
| 8 |
-
from app.apis.subtitles.service import SubtitleService
|
| 9 |
from app.apis.subtitles.utils import (
|
| 10 |
-
clean_subtitle_text,
|
| 11 |
-
extract_video_id,
|
| 12 |
convert_vtt_to_text
|
| 13 |
)
|
| 14 |
from app.core.exceptions import (
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
DownloadTimeoutError,
|
| 18 |
-
SubtitleExtractionError
|
| 19 |
)
|
| 20 |
|
| 21 |
|
| 22 |
class TestSubtitleUtils:
|
| 23 |
"""Test subtitle utility functions."""
|
| 24 |
-
|
| 25 |
def test_extract_video_id_standard_url(self):
|
| 26 |
"""Test extracting video ID from standard YouTube URL."""
|
| 27 |
url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
| 28 |
assert extract_video_id(url) == "dQw4w9WgXcQ"
|
| 29 |
-
|
| 30 |
def test_extract_video_id_short_url(self):
|
| 31 |
"""Test extracting video ID from short YouTube URL."""
|
| 32 |
url = "https://youtu.be/dQw4w9WgXcQ"
|
| 33 |
assert extract_video_id(url) == "dQw4w9WgXcQ"
|
| 34 |
-
|
| 35 |
def test_extract_video_id_embed_url(self):
|
| 36 |
"""Test extracting video ID from embed URL."""
|
| 37 |
url = "https://www.youtube.com/embed/dQw4w9WgXcQ"
|
| 38 |
assert extract_video_id(url) == "dQw4w9WgXcQ"
|
| 39 |
-
|
| 40 |
def test_clean_subtitle_text(self):
|
| 41 |
"""Test cleaning subtitle text."""
|
| 42 |
raw_lines = [
|
|
@@ -46,24 +43,24 @@ class TestSubtitleUtils:
|
|
| 46 |
"00:00:00.000 --> 00:00:03.000",
|
| 47 |
"Never gonna give you up",
|
| 48 |
"",
|
| 49 |
-
"2",
|
| 50 |
"00:00:03.000 --> 00:00:06.000",
|
| 51 |
"Never gonna let you down",
|
| 52 |
-
"Never gonna give you up",
|
| 53 |
""
|
| 54 |
]
|
| 55 |
-
|
| 56 |
cleaned = clean_subtitle_text(raw_lines)
|
| 57 |
assert "Never gonna give you up" in cleaned
|
| 58 |
assert "Never gonna let you down" in cleaned
|
| 59 |
assert "WEBVTT" not in cleaned
|
| 60 |
assert "00:00:00.000 --> 00:00:03.000" not in cleaned
|
| 61 |
assert len([line for line in cleaned if line == "Never gonna give you up"]) == 1
|
| 62 |
-
|
| 63 |
def test_convert_vtt_to_text(self, sample_vtt_content):
|
| 64 |
"""Test converting VTT content to clean text."""
|
| 65 |
result = convert_vtt_to_text(sample_vtt_content)
|
| 66 |
-
|
| 67 |
assert "Never gonna give you up" in result
|
| 68 |
assert "Never gonna let you down" in result
|
| 69 |
assert "WEBVTT" not in result
|
|
@@ -72,109 +69,73 @@ class TestSubtitleUtils:
|
|
| 72 |
|
| 73 |
class TestSubtitleService:
|
| 74 |
"""Test subtitle extraction service."""
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
@pytest.fixture
|
| 77 |
def service(self):
|
| 78 |
"""Create a subtitle service instance."""
|
| 79 |
return SubtitleService()
|
| 80 |
-
|
| 81 |
@pytest.mark.asyncio
|
| 82 |
async def test_extract_subtitles_success(self, service, sample_youtube_url, sample_vtt_content):
|
| 83 |
"""Test successful subtitle extraction."""
|
| 84 |
-
with patch.object(service, '
|
| 85 |
-
patch.object(service, '_download_subtitles') as mock_download:
|
| 86 |
-
|
| 87 |
-
mock_check.return_value = None
|
| 88 |
mock_download.return_value = sample_vtt_content
|
| 89 |
-
|
| 90 |
video_id, subtitles = await service.extract_subtitles(sample_youtube_url, "en")
|
| 91 |
-
|
| 92 |
assert video_id == "dQw4w9WgXcQ"
|
| 93 |
assert len(subtitles) > 0
|
| 94 |
assert "Never gonna give you up" in subtitles
|
| 95 |
-
|
| 96 |
@pytest.mark.asyncio
|
| 97 |
async def test_extract_subtitles_not_found(self, service, sample_youtube_url):
|
| 98 |
"""Test subtitle extraction when no subtitles are found."""
|
| 99 |
-
with patch.object(service, '
|
| 100 |
-
patch.object(service, '
|
| 101 |
-
|
| 102 |
-
mock_check.return_value = None
|
| 103 |
mock_download.return_value = None
|
| 104 |
-
|
|
|
|
| 105 |
with pytest.raises(SubtitlesNotFoundError):
|
| 106 |
await service.extract_subtitles(sample_youtube_url, "en")
|
| 107 |
-
|
| 108 |
-
@pytest.mark.asyncio
|
| 109 |
-
async def test_extract_subtitles_timeout(self, service, sample_youtube_url):
|
| 110 |
-
"""Test subtitle extraction timeout."""
|
| 111 |
-
with patch.object(service, '_check_available_subtitles') as mock_check:
|
| 112 |
-
mock_check.side_effect = asyncio.TimeoutError()
|
| 113 |
-
|
| 114 |
-
with pytest.raises(DownloadTimeoutError):
|
| 115 |
-
await service.extract_subtitles(sample_youtube_url, "en")
|
| 116 |
-
|
| 117 |
-
@pytest.mark.asyncio
|
| 118 |
-
async def test_check_available_subtitles_success(self, service, sample_youtube_url):
|
| 119 |
-
"""Test checking available subtitles."""
|
| 120 |
-
mock_process = AsyncMock()
|
| 121 |
-
mock_process.returncode = 0
|
| 122 |
-
mock_process.communicate.return_value = (b"Subtitle output", b"")
|
| 123 |
-
|
| 124 |
-
with patch('asyncio.create_subprocess_exec', return_value=mock_process), \
|
| 125 |
-
patch('asyncio.wait_for', return_value=(b"Subtitle output", b"")):
|
| 126 |
-
|
| 127 |
-
# Should not raise any exception
|
| 128 |
-
await service._check_available_subtitles(sample_youtube_url)
|
| 129 |
-
|
| 130 |
@pytest.mark.asyncio
|
| 131 |
-
async def
|
| 132 |
-
"""Test
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
await service._check_available_subtitles("https://www.youtube.com/watch?v=invalid")
|
| 142 |
-
|
| 143 |
-
def test_get_alternative_languages(self, service):
|
| 144 |
-
"""Test getting alternative language codes."""
|
| 145 |
-
alternatives = service._get_alternative_languages("en")
|
| 146 |
-
assert "en-US" in alternatives
|
| 147 |
-
assert "en-GB" in alternatives
|
| 148 |
-
|
| 149 |
-
alternatives = service._get_alternative_languages("es")
|
| 150 |
-
assert "es-ES" in alternatives
|
| 151 |
-
assert "es-MX" in alternatives
|
| 152 |
-
|
| 153 |
-
# Test unknown language
|
| 154 |
-
alternatives = service._get_alternative_languages("unknown")
|
| 155 |
-
assert "unknown-UNKNOWN" in alternatives
|
| 156 |
|
| 157 |
|
| 158 |
class TestSubtitleAPI:
|
| 159 |
"""Test subtitle API endpoints."""
|
| 160 |
-
|
| 161 |
def test_extract_subtitles_endpoint_success(self, client, api_key, sample_vtt_content):
|
| 162 |
"""Test successful subtitle extraction via API."""
|
| 163 |
with patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
|
| 164 |
mock_extract.return_value = ("dQw4w9WgXcQ", ["Never gonna give you up", "Never gonna let you down"])
|
| 165 |
-
|
| 166 |
response = client.post(
|
| 167 |
"/api/v1/subtitles/extract",
|
| 168 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
| 169 |
headers={"x-api-key": api_key}
|
| 170 |
)
|
| 171 |
-
|
| 172 |
assert response.status_code == 200
|
| 173 |
data = response.json()
|
| 174 |
assert data["status"] == "success"
|
| 175 |
assert data["video_id"] == "dQw4w9WgXcQ"
|
| 176 |
assert "Never gonna give you up" in data["subtitles"]
|
| 177 |
-
|
| 178 |
def test_extract_subtitles_endpoint_invalid_api_key(self, client, invalid_api_key):
|
| 179 |
"""Test API endpoint with invalid API key."""
|
| 180 |
response = client.post(
|
|
@@ -182,18 +143,18 @@ class TestSubtitleAPI:
|
|
| 182 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
| 183 |
headers={"x-api-key": invalid_api_key}
|
| 184 |
)
|
| 185 |
-
|
| 186 |
assert response.status_code == 401
|
| 187 |
-
|
| 188 |
def test_extract_subtitles_endpoint_missing_api_key(self, client):
|
| 189 |
"""Test API endpoint with missing API key."""
|
| 190 |
response = client.post(
|
| 191 |
"/api/v1/subtitles/extract",
|
| 192 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"}
|
| 193 |
)
|
| 194 |
-
|
| 195 |
assert response.status_code == 401
|
| 196 |
-
|
| 197 |
def test_extract_subtitles_endpoint_invalid_url(self, client, api_key):
|
| 198 |
"""Test API endpoint with invalid URL."""
|
| 199 |
response = client.post(
|
|
@@ -201,28 +162,13 @@ class TestSubtitleAPI:
|
|
| 201 |
json={"url": "https://example.com/not-youtube", "lang": "en"},
|
| 202 |
headers={"x-api-key": api_key}
|
| 203 |
)
|
| 204 |
-
|
| 205 |
-
assert response.status_code == 422
|
| 206 |
-
|
| 207 |
-
def test_extract_subtitles_endpoint_not_found(self, client, api_key):
|
| 208 |
-
"""Test API endpoint when subtitles are not found."""
|
| 209 |
-
with patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
|
| 210 |
-
mock_extract.side_effect = SubtitlesNotFoundError("No subtitles available")
|
| 211 |
-
|
| 212 |
-
response = client.post(
|
| 213 |
-
"/api/v1/subtitles/extract",
|
| 214 |
-
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
| 215 |
-
headers={"x-api-key": api_key}
|
| 216 |
-
)
|
| 217 |
-
|
| 218 |
-
assert response.status_code == 404
|
| 219 |
-
data = response.json()
|
| 220 |
-
assert data["detail"]["status"] == "error"
|
| 221 |
-
|
| 222 |
def test_subtitles_health_endpoint(self, client):
|
| 223 |
"""Test subtitles health check endpoint."""
|
| 224 |
response = client.get("/api/v1/subtitles/health")
|
| 225 |
-
|
| 226 |
assert response.status_code == 200
|
| 227 |
data = response.json()
|
| 228 |
assert data["status"] == "healthy"
|
|
|
|
| 1 |
"""Tests for subtitle extraction functionality."""
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
+
from unittest.mock import AsyncMock, patch
|
| 5 |
import asyncio
|
|
|
|
| 6 |
|
| 7 |
+
from app.apis.subtitles.service import SubtitleService, SUBTITLE_CACHE
|
| 8 |
from app.apis.subtitles.utils import (
|
| 9 |
+
clean_subtitle_text,
|
| 10 |
+
extract_video_id,
|
| 11 |
convert_vtt_to_text
|
| 12 |
)
|
| 13 |
from app.core.exceptions import (
|
| 14 |
+
SubtitlesNotFoundError,
|
| 15 |
+
DownloadTimeoutError
|
|
|
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
|
| 19 |
class TestSubtitleUtils:
|
| 20 |
"""Test subtitle utility functions."""
|
| 21 |
+
|
| 22 |
def test_extract_video_id_standard_url(self):
|
| 23 |
"""Test extracting video ID from standard YouTube URL."""
|
| 24 |
url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
| 25 |
assert extract_video_id(url) == "dQw4w9WgXcQ"
|
| 26 |
+
|
| 27 |
def test_extract_video_id_short_url(self):
|
| 28 |
"""Test extracting video ID from short YouTube URL."""
|
| 29 |
url = "https://youtu.be/dQw4w9WgXcQ"
|
| 30 |
assert extract_video_id(url) == "dQw4w9WgXcQ"
|
| 31 |
+
|
| 32 |
def test_extract_video_id_embed_url(self):
|
| 33 |
"""Test extracting video ID from embed URL."""
|
| 34 |
url = "https://www.youtube.com/embed/dQw4w9WgXcQ"
|
| 35 |
assert extract_video_id(url) == "dQw4w9WgXcQ"
|
| 36 |
+
|
| 37 |
def test_clean_subtitle_text(self):
|
| 38 |
"""Test cleaning subtitle text."""
|
| 39 |
raw_lines = [
|
|
|
|
| 43 |
"00:00:00.000 --> 00:00:03.000",
|
| 44 |
"Never gonna give you up",
|
| 45 |
"",
|
| 46 |
+
"2",
|
| 47 |
"00:00:03.000 --> 00:00:06.000",
|
| 48 |
"Never gonna let you down",
|
| 49 |
+
"Never gonna give you up",
|
| 50 |
""
|
| 51 |
]
|
| 52 |
+
|
| 53 |
cleaned = clean_subtitle_text(raw_lines)
|
| 54 |
assert "Never gonna give you up" in cleaned
|
| 55 |
assert "Never gonna let you down" in cleaned
|
| 56 |
assert "WEBVTT" not in cleaned
|
| 57 |
assert "00:00:00.000 --> 00:00:03.000" not in cleaned
|
| 58 |
assert len([line for line in cleaned if line == "Never gonna give you up"]) == 1
|
| 59 |
+
|
| 60 |
def test_convert_vtt_to_text(self, sample_vtt_content):
|
| 61 |
"""Test converting VTT content to clean text."""
|
| 62 |
result = convert_vtt_to_text(sample_vtt_content)
|
| 63 |
+
|
| 64 |
assert "Never gonna give you up" in result
|
| 65 |
assert "Never gonna let you down" in result
|
| 66 |
assert "WEBVTT" not in result
|
|
|
|
| 69 |
|
| 70 |
class TestSubtitleService:
|
| 71 |
"""Test subtitle extraction service."""
|
| 72 |
+
|
| 73 |
+
@pytest.fixture(autouse=True)
|
| 74 |
+
def clear_cache(self):
|
| 75 |
+
"""Clear cache before each test."""
|
| 76 |
+
SUBTITLE_CACHE.clear()
|
| 77 |
+
|
| 78 |
@pytest.fixture
|
| 79 |
def service(self):
|
| 80 |
"""Create a subtitle service instance."""
|
| 81 |
return SubtitleService()
|
| 82 |
+
|
| 83 |
@pytest.mark.asyncio
|
| 84 |
async def test_extract_subtitles_success(self, service, sample_youtube_url, sample_vtt_content):
|
| 85 |
"""Test successful subtitle extraction."""
|
| 86 |
+
with patch.object(service, '_download_subtitles') as mock_download:
|
|
|
|
|
|
|
|
|
|
| 87 |
mock_download.return_value = sample_vtt_content
|
| 88 |
+
|
| 89 |
video_id, subtitles = await service.extract_subtitles(sample_youtube_url, "en")
|
| 90 |
+
|
| 91 |
assert video_id == "dQw4w9WgXcQ"
|
| 92 |
assert len(subtitles) > 0
|
| 93 |
assert "Never gonna give you up" in subtitles
|
| 94 |
+
|
| 95 |
@pytest.mark.asyncio
|
| 96 |
async def test_extract_subtitles_not_found(self, service, sample_youtube_url):
|
| 97 |
"""Test subtitle extraction when no subtitles are found."""
|
| 98 |
+
with patch.object(service, '_download_subtitles') as mock_download, \
|
| 99 |
+
patch.object(service, '_try_alternative_languages') as mock_alt:
|
|
|
|
|
|
|
| 100 |
mock_download.return_value = None
|
| 101 |
+
mock_alt.return_value = None
|
| 102 |
+
|
| 103 |
with pytest.raises(SubtitlesNotFoundError):
|
| 104 |
await service.extract_subtitles(sample_youtube_url, "en")
|
| 105 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
@pytest.mark.asyncio
|
| 107 |
+
async def test_extract_subtitles_uses_cache(self, service, sample_youtube_url, sample_vtt_content):
|
| 108 |
+
"""Test that cached results are returned."""
|
| 109 |
+
with patch.object(service, '_download_subtitles') as mock_download:
|
| 110 |
+
mock_download.return_value = sample_vtt_content
|
| 111 |
+
|
| 112 |
+
result1 = await service.extract_subtitles(sample_youtube_url, "en")
|
| 113 |
+
result2 = await service.extract_subtitles(sample_youtube_url, "en")
|
| 114 |
+
|
| 115 |
+
assert result1 == result2
|
| 116 |
+
assert mock_download.call_count == 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
|
| 119 |
class TestSubtitleAPI:
|
| 120 |
"""Test subtitle API endpoints."""
|
| 121 |
+
|
| 122 |
def test_extract_subtitles_endpoint_success(self, client, api_key, sample_vtt_content):
|
| 123 |
"""Test successful subtitle extraction via API."""
|
| 124 |
with patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
|
| 125 |
mock_extract.return_value = ("dQw4w9WgXcQ", ["Never gonna give you up", "Never gonna let you down"])
|
| 126 |
+
|
| 127 |
response = client.post(
|
| 128 |
"/api/v1/subtitles/extract",
|
| 129 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
| 130 |
headers={"x-api-key": api_key}
|
| 131 |
)
|
| 132 |
+
|
| 133 |
assert response.status_code == 200
|
| 134 |
data = response.json()
|
| 135 |
assert data["status"] == "success"
|
| 136 |
assert data["video_id"] == "dQw4w9WgXcQ"
|
| 137 |
assert "Never gonna give you up" in data["subtitles"]
|
| 138 |
+
|
| 139 |
def test_extract_subtitles_endpoint_invalid_api_key(self, client, invalid_api_key):
|
| 140 |
"""Test API endpoint with invalid API key."""
|
| 141 |
response = client.post(
|
|
|
|
| 143 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
|
| 144 |
headers={"x-api-key": invalid_api_key}
|
| 145 |
)
|
| 146 |
+
|
| 147 |
assert response.status_code == 401
|
| 148 |
+
|
| 149 |
def test_extract_subtitles_endpoint_missing_api_key(self, client):
|
| 150 |
"""Test API endpoint with missing API key."""
|
| 151 |
response = client.post(
|
| 152 |
"/api/v1/subtitles/extract",
|
| 153 |
json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"}
|
| 154 |
)
|
| 155 |
+
|
| 156 |
assert response.status_code == 401
|
| 157 |
+
|
| 158 |
def test_extract_subtitles_endpoint_invalid_url(self, client, api_key):
|
| 159 |
"""Test API endpoint with invalid URL."""
|
| 160 |
response = client.post(
|
|
|
|
| 162 |
json={"url": "https://example.com/not-youtube", "lang": "en"},
|
| 163 |
headers={"x-api-key": api_key}
|
| 164 |
)
|
| 165 |
+
|
| 166 |
+
assert response.status_code == 422
|
| 167 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
def test_subtitles_health_endpoint(self, client):
|
| 169 |
"""Test subtitles health check endpoint."""
|
| 170 |
response = client.get("/api/v1/subtitles/health")
|
| 171 |
+
|
| 172 |
assert response.status_code == 200
|
| 173 |
data = response.json()
|
| 174 |
assert data["status"] == "healthy"
|