Commit ·
f51532a
1
Parent(s): 119352e
Simplify the web server
Browse files- Dockerfile +5 -11
- app.py +19 -59
- docker-compose.yml +5 -91
- gunicorn_config.py +10 -11
Dockerfile
CHANGED
|
@@ -5,7 +5,6 @@ WORKDIR /app
|
|
| 5 |
# Install system dependencies
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
build-essential \
|
| 8 |
-
curl \
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
# Copy requirements first to leverage Docker cache
|
|
@@ -15,15 +14,10 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 15 |
# Copy application code
|
| 16 |
COPY . .
|
| 17 |
|
| 18 |
-
# Create
|
| 19 |
-
RUN mkdir -p uploads &&
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
RUN useradd -m appuser && chown -R appuser:appuser /app
|
| 23 |
-
USER appuser
|
| 24 |
-
|
| 25 |
-
# # Set environment variables for the buffered output
|
| 26 |
-
# ENV PYTHONUNBUFFERED=1
|
| 27 |
-
|
| 28 |
-
# Default command (can be overridden in docker-compose.yml)
|
| 29 |
CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
|
|
|
|
| 5 |
# Install system dependencies
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
build-essential \
|
|
|
|
| 8 |
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
# Copy requirements first to leverage Docker cache
|
|
|
|
| 14 |
# Copy application code
|
| 15 |
COPY . .
|
| 16 |
|
| 17 |
+
# Create necessary directories with proper permissions
|
| 18 |
+
RUN mkdir -p uploads md_files && \
|
| 19 |
+
chmod 777 uploads && \
|
| 20 |
+
chmod 755 md_files
|
| 21 |
|
| 22 |
+
# Default command
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
|
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# import json
|
| 2 |
import inspect
|
| 3 |
import io
|
| 4 |
import os
|
|
@@ -16,7 +15,6 @@ import numpy as np
|
|
| 16 |
import orjson
|
| 17 |
import pandas as pd
|
| 18 |
from flask import Flask, Response, render_template, request, send_file
|
| 19 |
-
from flask_status import FlaskStatus
|
| 20 |
from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
|
| 21 |
from selector.methods.partition import GridPartition, Medoid
|
| 22 |
from selector.methods.similarity import NSimilarity
|
|
@@ -24,17 +22,13 @@ from selector.measures.diversity import compute_diversity
|
|
| 24 |
from sklearn.metrics import pairwise_distances
|
| 25 |
from werkzeug.utils import secure_filename
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
CELERY_AVAILABLE = True
|
| 31 |
-
except ImportError:
|
| 32 |
-
CELERY_AVAILABLE = False
|
| 33 |
|
| 34 |
app = Flask(__name__)
|
| 35 |
-
app_status = FlaskStatus(app)
|
| 36 |
app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024 # 32MB max file size
|
| 37 |
-
app.config["UPLOAD_FOLDER"] =
|
| 38 |
file_lock = threading.Lock()
|
| 39 |
|
| 40 |
# Ensure upload directory exists
|
|
@@ -57,24 +51,23 @@ SELECTION_ALGORITHM_MAP = {
|
|
| 57 |
|
| 58 |
|
| 59 |
def allowed_file(filename):
|
|
|
|
| 60 |
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
| 61 |
|
| 62 |
-
|
| 63 |
def get_unique_upload_dir():
|
| 64 |
"""Create a unique directory for each upload session."""
|
| 65 |
unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
|
| 66 |
os.makedirs(unique_dir, exist_ok=True)
|
|
|
|
| 67 |
return unique_dir
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
"""Safely clean up upload directory."""
|
| 72 |
try:
|
| 73 |
-
if os.path.exists(
|
| 74 |
-
shutil.rmtree(
|
| 75 |
except Exception as e:
|
| 76 |
-
print(f"Error cleaning
|
| 77 |
-
|
| 78 |
|
| 79 |
def load_data(filepath):
|
| 80 |
"""Load data from various file formats."""
|
|
@@ -348,6 +341,7 @@ def upload_selection_file():
|
|
| 348 |
|
| 349 |
with file_lock:
|
| 350 |
file.save(file_path)
|
|
|
|
| 351 |
|
| 352 |
# Load data
|
| 353 |
array = load_data(file_path)
|
|
@@ -434,7 +428,7 @@ def calculate_diversity():
|
|
| 434 |
# Get files from request
|
| 435 |
feature_subset_file = request.files.get('feature_subset')
|
| 436 |
features_file = request.files.get('features')
|
| 437 |
-
|
| 438 |
if not feature_subset_file:
|
| 439 |
return create_json_response({"error": "Feature subset file is required"}, 400)
|
| 440 |
|
|
@@ -497,7 +491,7 @@ def calculate_diversity():
|
|
| 497 |
features=features,
|
| 498 |
cs=cs
|
| 499 |
)
|
| 500 |
-
|
| 501 |
return create_json_response({
|
| 502 |
"success": True,
|
| 503 |
"diversity_score": float(diversity_score)
|
|
@@ -512,44 +506,10 @@ def calculate_diversity():
|
|
| 512 |
except Exception as e:
|
| 513 |
return create_json_response({"error": str(e)}, 500)
|
| 514 |
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
""
|
| 519 |
-
status = {
|
| 520 |
-
"status": "ok",
|
| 521 |
-
"message": "Server is running",
|
| 522 |
-
"timestamp": datetime.now().isoformat(),
|
| 523 |
-
"components": {"flask": True, "celery": False, "redis": False},
|
| 524 |
-
}
|
| 525 |
-
|
| 526 |
-
if CELERY_AVAILABLE:
|
| 527 |
-
# Check Celery
|
| 528 |
-
try:
|
| 529 |
-
celery.control.ping(timeout=1)
|
| 530 |
-
status["components"]["celery"] = True
|
| 531 |
-
except Exception as e:
|
| 532 |
-
print(f"Celery check failed: {e}")
|
| 533 |
-
|
| 534 |
-
# Check Redis
|
| 535 |
-
try:
|
| 536 |
-
redis_client = celery.backend.client
|
| 537 |
-
redis_client.ping()
|
| 538 |
-
status["components"]["redis"] = True
|
| 539 |
-
except Exception as e:
|
| 540 |
-
print(f"Redis check failed: {e}")
|
| 541 |
-
|
| 542 |
-
# Set overall status
|
| 543 |
-
if not all(status["components"].values()):
|
| 544 |
-
status["status"] = "degraded"
|
| 545 |
-
status["message"] = "Some components are not available"
|
| 546 |
-
else:
|
| 547 |
-
status["message"] = "Running without Celery/Redis support"
|
| 548 |
-
|
| 549 |
-
return create_json_response(status)
|
| 550 |
-
|
| 551 |
|
| 552 |
if __name__ == "__main__":
|
| 553 |
-
app.run(debug=True, host="0.0.0.0", port=
|
| 554 |
-
from flask_debugtoolbar import DebugToolbarExtension
|
| 555 |
-
toolbar = DebugToolbarExtension(app)
|
|
|
|
|
|
|
| 1 |
import inspect
|
| 2 |
import io
|
| 3 |
import os
|
|
|
|
| 15 |
import orjson
|
| 16 |
import pandas as pd
|
| 17 |
from flask import Flask, Response, render_template, request, send_file
|
|
|
|
| 18 |
from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
|
| 19 |
from selector.methods.partition import GridPartition, Medoid
|
| 20 |
from selector.methods.similarity import NSimilarity
|
|
|
|
| 22 |
from sklearn.metrics import pairwise_distances
|
| 23 |
from werkzeug.utils import secure_filename
|
| 24 |
|
| 25 |
+
# Constants
|
| 26 |
+
UPLOAD_FOLDER = "uploads"
|
| 27 |
+
ALLOWED_EXTENSIONS = {"txt", "npz", "xlsx", "xls"}
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
app = Flask(__name__)
|
|
|
|
| 30 |
app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024 # 32MB max file size
|
| 31 |
+
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
| 32 |
file_lock = threading.Lock()
|
| 33 |
|
| 34 |
# Ensure upload directory exists
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
def allowed_file(filename):
|
| 54 |
+
"""Check if file extension is allowed."""
|
| 55 |
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
| 56 |
|
|
|
|
| 57 |
def get_unique_upload_dir():
|
| 58 |
"""Create a unique directory for each upload session."""
|
| 59 |
unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
|
| 60 |
os.makedirs(unique_dir, exist_ok=True)
|
| 61 |
+
os.chmod(unique_dir, 0o777) # Full permissions for Docker container
|
| 62 |
return unique_dir
|
| 63 |
|
| 64 |
+
def clean_upload_dir(upload_dir):
|
| 65 |
+
"""Clean up upload directory after processing."""
|
|
|
|
| 66 |
try:
|
| 67 |
+
if os.path.exists(upload_dir):
|
| 68 |
+
shutil.rmtree(upload_dir)
|
| 69 |
except Exception as e:
|
| 70 |
+
print(f"Error cleaning upload directory: {e}")
|
|
|
|
| 71 |
|
| 72 |
def load_data(filepath):
|
| 73 |
"""Load data from various file formats."""
|
|
|
|
| 341 |
|
| 342 |
with file_lock:
|
| 343 |
file.save(file_path)
|
| 344 |
+
# os.chmod(file_path, 0o666) # Read/write for all
|
| 345 |
|
| 346 |
# Load data
|
| 347 |
array = load_data(file_path)
|
|
|
|
| 428 |
# Get files from request
|
| 429 |
feature_subset_file = request.files.get('feature_subset')
|
| 430 |
features_file = request.files.get('features')
|
| 431 |
+
|
| 432 |
if not feature_subset_file:
|
| 433 |
return create_json_response({"error": "Feature subset file is required"}, 400)
|
| 434 |
|
|
|
|
| 491 |
features=features,
|
| 492 |
cs=cs
|
| 493 |
)
|
| 494 |
+
|
| 495 |
return create_json_response({
|
| 496 |
"success": True,
|
| 497 |
"diversity_score": float(diversity_score)
|
|
|
|
| 506 |
except Exception as e:
|
| 507 |
return create_json_response({"error": str(e)}, 500)
|
| 508 |
|
| 509 |
+
@app.route('/health')
|
| 510 |
+
def health_check():
|
| 511 |
+
"""Health check endpoint for Docker"""
|
| 512 |
+
return create_json_response({"status": "healthy"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
|
| 514 |
if __name__ == "__main__":
|
| 515 |
+
app.run(debug=True, host="0.0.0.0", port=8009)
|
|
|
|
|
|
docker-compose.yml
CHANGED
|
@@ -3,106 +3,20 @@ version: '3.8'
|
|
| 3 |
services:
|
| 4 |
web:
|
| 5 |
build: .
|
| 6 |
-
command: gunicorn --config gunicorn_config.py app:app
|
| 7 |
-
expose:
|
| 8 |
-
- "8008"
|
| 9 |
-
volumes:
|
| 10 |
-
- .:/app
|
| 11 |
-
- upload_data:/app/uploads
|
| 12 |
-
depends_on:
|
| 13 |
-
redis:
|
| 14 |
-
condition: service_healthy
|
| 15 |
-
environment:
|
| 16 |
-
- FLASK_ENV=production
|
| 17 |
-
- REDIS_URL=redis://redis:6379/0
|
| 18 |
-
deploy:
|
| 19 |
-
replicas: 1
|
| 20 |
-
resources:
|
| 21 |
-
limits:
|
| 22 |
-
cpus: '0.6'
|
| 23 |
-
memory: 6G
|
| 24 |
-
healthcheck:
|
| 25 |
-
test: ["CMD", "curl", "-f", "http://localhost:8008/health"]
|
| 26 |
-
interval: 30s
|
| 27 |
-
timeout: 10s
|
| 28 |
-
retries: 3
|
| 29 |
-
start_period: 40s
|
| 30 |
-
restart: unless-stopped
|
| 31 |
-
|
| 32 |
-
redis:
|
| 33 |
-
image: redis:7-alpine
|
| 34 |
-
command: redis-server --appendonly yes
|
| 35 |
-
volumes:
|
| 36 |
-
- redis_data:/data
|
| 37 |
ports:
|
| 38 |
-
- "
|
| 39 |
-
healthcheck:
|
| 40 |
-
test: ["CMD", "redis-cli", "ping"]
|
| 41 |
-
interval: 10s
|
| 42 |
-
timeout: 5s
|
| 43 |
-
retries: 3
|
| 44 |
-
deploy:
|
| 45 |
-
resources:
|
| 46 |
-
limits:
|
| 47 |
-
cpus: '0.4'
|
| 48 |
-
memory: 1G
|
| 49 |
-
restart: unless-stopped
|
| 50 |
-
|
| 51 |
-
celery_worker:
|
| 52 |
-
build: .
|
| 53 |
-
command: celery -A app.celery worker --loglevel=info
|
| 54 |
volumes:
|
| 55 |
- .:/app
|
| 56 |
- upload_data:/app/uploads
|
| 57 |
-
depends_on:
|
| 58 |
-
- redis
|
| 59 |
environment:
|
| 60 |
-
-
|
| 61 |
-
deploy:
|
| 62 |
-
replicas: 2
|
| 63 |
-
resources:
|
| 64 |
-
limits:
|
| 65 |
-
cpus: '0.8'
|
| 66 |
-
memory: 6G
|
| 67 |
-
restart: unless-stopped
|
| 68 |
-
|
| 69 |
-
celery_flower:
|
| 70 |
-
build: .
|
| 71 |
-
command: celery -A app.celery flower
|
| 72 |
-
ports:
|
| 73 |
-
- "5555:5555"
|
| 74 |
-
volumes:
|
| 75 |
-
- .:/app
|
| 76 |
-
- flower_data:/app/flower
|
| 77 |
-
depends_on:
|
| 78 |
-
- redis
|
| 79 |
-
- celery_worker
|
| 80 |
-
environment:
|
| 81 |
-
- REDIS_URL=redis://redis:6379/0
|
| 82 |
-
deploy:
|
| 83 |
-
resources:
|
| 84 |
-
limits:
|
| 85 |
-
cpus: '0.2'
|
| 86 |
-
memory: 512M
|
| 87 |
-
restart: unless-stopped
|
| 88 |
-
|
| 89 |
-
nginx:
|
| 90 |
-
image: nginx:alpine
|
| 91 |
-
ports:
|
| 92 |
-
- "8008:8008"
|
| 93 |
-
volumes:
|
| 94 |
-
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
| 95 |
-
- .:/app:ro
|
| 96 |
-
depends_on:
|
| 97 |
-
- web
|
| 98 |
deploy:
|
| 99 |
resources:
|
| 100 |
limits:
|
| 101 |
-
cpus: '
|
| 102 |
-
memory:
|
| 103 |
restart: unless-stopped
|
| 104 |
|
| 105 |
volumes:
|
| 106 |
-
redis_data:
|
| 107 |
upload_data:
|
| 108 |
-
flower_data:
|
|
|
|
| 3 |
services:
|
| 4 |
web:
|
| 5 |
build: .
|
| 6 |
+
command: gunicorn --config gunicorn_config.py app:app --reload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
ports:
|
| 8 |
+
- "8009:8009"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
volumes:
|
| 10 |
- .:/app
|
| 11 |
- upload_data:/app/uploads
|
|
|
|
|
|
|
| 12 |
environment:
|
| 13 |
+
- FLASK_ENV=production
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
deploy:
|
| 15 |
resources:
|
| 16 |
limits:
|
| 17 |
+
cpus: '1.0'
|
| 18 |
+
memory: 12G
|
| 19 |
restart: unless-stopped
|
| 20 |
|
| 21 |
volumes:
|
|
|
|
| 22 |
upload_data:
|
|
|
gunicorn_config.py
CHANGED
|
@@ -1,21 +1,21 @@
|
|
| 1 |
import multiprocessing
|
| 2 |
import os
|
| 3 |
|
| 4 |
-
# Number of worker processes -
|
| 5 |
-
workers =
|
| 6 |
|
| 7 |
-
# Number of threads per worker
|
| 8 |
-
threads =
|
| 9 |
|
| 10 |
# Maximum number of pending connections
|
| 11 |
-
backlog =
|
| 12 |
|
| 13 |
# Maximum number of requests a worker will process before restarting
|
| 14 |
-
max_requests =
|
| 15 |
max_requests_jitter = 50
|
| 16 |
|
| 17 |
-
# Timeout for worker processes (
|
| 18 |
-
timeout =
|
| 19 |
|
| 20 |
# Keep-alive timeout
|
| 21 |
keepalive = 5
|
|
@@ -27,10 +27,9 @@ errorlog = "-"
|
|
| 27 |
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
| 28 |
|
| 29 |
# Bind address - use HF_PORT for Hugging Face deployment
|
| 30 |
-
|
| 31 |
-
bind = f"0.0.0.0:{port}"
|
| 32 |
|
| 33 |
-
# Worker class
|
| 34 |
worker_class = "sync"
|
| 35 |
|
| 36 |
# Process name
|
|
|
|
| 1 |
import multiprocessing
|
| 2 |
import os
|
| 3 |
|
| 4 |
+
# Number of worker processes - adjust based on CPU cores
|
| 5 |
+
workers = 1
|
| 6 |
|
| 7 |
+
# Number of threads per worker
|
| 8 |
+
threads = 4
|
| 9 |
|
| 10 |
# Maximum number of pending connections
|
| 11 |
+
backlog = 2048
|
| 12 |
|
| 13 |
# Maximum number of requests a worker will process before restarting
|
| 14 |
+
max_requests = 10000
|
| 15 |
max_requests_jitter = 50
|
| 16 |
|
| 17 |
+
# Timeout for worker processes (2 minutes)
|
| 18 |
+
timeout = 120
|
| 19 |
|
| 20 |
# Keep-alive timeout
|
| 21 |
keepalive = 5
|
|
|
|
| 27 |
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
| 28 |
|
| 29 |
# Bind address - use HF_PORT for Hugging Face deployment
|
| 30 |
+
bind = "0.0.0.0:8009"
|
|
|
|
| 31 |
|
| 32 |
+
# Worker class
|
| 33 |
worker_class = "sync"
|
| 34 |
|
| 35 |
# Process name
|