Spaces:

QCDevs
/

Selector

Running

App Files Files Community

legend1234 commited on Jan 7, 2025

Commit

f51532a

1 Parent(s): 119352e

Simplify the web server

Browse files

Files changed (4) hide show

Dockerfile +5 -11
app.py +19 -59
docker-compose.yml +5 -91
gunicorn_config.py +10 -11

Dockerfile CHANGED Viewed

@@ -5,7 +5,6 @@ WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
-    curl \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements first to leverage Docker cache
@@ -15,15 +14,10 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY . .
-# Create uploads directory
-RUN mkdir -p uploads && chmod 777 uploads
-# Create a non-root user
-RUN useradd -m appuser && chown -R appuser:appuser /app
-USER appuser
-# # Set environment variables for the buffered output
-# ENV PYTHONUNBUFFERED=1
-# Default command (can be overridden in docker-compose.yml)
 CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]

 # Install system dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements first to leverage Docker cache
 # Copy application code
 COPY . .
+# Create necessary directories with proper permissions
+RUN mkdir -p uploads md_files && \
+    chmod 777 uploads && \
+    chmod 755 md_files
+# Default command
 CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# import json
 import inspect
 import io
 import os
@@ -16,7 +15,6 @@ import numpy as np
 import orjson
 import pandas as pd
 from flask import Flask, Response, render_template, request, send_file
-from flask_status import FlaskStatus
 from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
 from selector.methods.partition import GridPartition, Medoid
 from selector.methods.similarity import NSimilarity
@@ -24,17 +22,13 @@ from selector.measures.diversity import compute_diversity
 from sklearn.metrics import pairwise_distances
 from werkzeug.utils import secure_filename
-try:
-    from celery_config import celery
-    CELERY_AVAILABLE = True
-except ImportError:
-    CELERY_AVAILABLE = False
 app = Flask(__name__)
-app_status = FlaskStatus(app)
 app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024  # 32MB max file size
-app.config["UPLOAD_FOLDER"] = "uploads"
 file_lock = threading.Lock()
 # Ensure upload directory exists
@@ -57,24 +51,23 @@ SELECTION_ALGORITHM_MAP = {
 def allowed_file(filename):
     return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
 def get_unique_upload_dir():
     """Create a unique directory for each upload session."""
     unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
     os.makedirs(unique_dir, exist_ok=True)
     return unique_dir
-def clean_upload_dir(directory):
-    """Safely clean up upload directory."""
     try:
-        if os.path.exists(directory):
-            shutil.rmtree(directory)
     except Exception as e:
-        print(f"Error cleaning directory {directory}: {e}")
 def load_data(filepath):
     """Load data from various file formats."""
@@ -348,6 +341,7 @@ def upload_selection_file():
             with file_lock:
                 file.save(file_path)
             # Load data
             array = load_data(file_path)
@@ -434,7 +428,7 @@ def calculate_diversity():
         # Get files from request
         feature_subset_file = request.files.get('feature_subset')
         features_file = request.files.get('features')
         if not feature_subset_file:
             return create_json_response({"error": "Feature subset file is required"}, 400)
@@ -497,7 +491,7 @@ def calculate_diversity():
                 features=features,
                 cs=cs
             )
             return create_json_response({
                 "success": True,
                 "diversity_score": float(diversity_score)
@@ -512,44 +506,10 @@ def calculate_diversity():
     except Exception as e:
         return create_json_response({"error": str(e)}, 500)
-@app.route("/status")
-def server_status():
-    """Return server status"""
-    status = {
-        "status": "ok",
-        "message": "Server is running",
-        "timestamp": datetime.now().isoformat(),
-        "components": {"flask": True, "celery": False, "redis": False},
-    }
-    if CELERY_AVAILABLE:
-        # Check Celery
-        try:
-            celery.control.ping(timeout=1)
-            status["components"]["celery"] = True
-        except Exception as e:
-            print(f"Celery check failed: {e}")
-        # Check Redis
-        try:
-            redis_client = celery.backend.client
-            redis_client.ping()
-            status["components"]["redis"] = True
-        except Exception as e:
-            print(f"Redis check failed: {e}")
-        # Set overall status
-        if not all(status["components"].values()):
-            status["status"] = "degraded"
-            status["message"] = "Some components are not available"
-    else:
-        status["message"] = "Running without Celery/Redis support"
-    return create_json_response(status)
 if __name__ == "__main__":
-    app.run(debug=True, host="0.0.0.0", port=8008)
-    from flask_debugtoolbar import DebugToolbarExtension
-    toolbar = DebugToolbarExtension(app)

 import inspect
 import io
 import os
 import orjson
 import pandas as pd
 from flask import Flask, Response, render_template, request, send_file
 from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
 from selector.methods.partition import GridPartition, Medoid
 from selector.methods.similarity import NSimilarity
 from sklearn.metrics import pairwise_distances
 from werkzeug.utils import secure_filename
+# Constants
+UPLOAD_FOLDER = "uploads"
+ALLOWED_EXTENSIONS = {"txt", "npz", "xlsx", "xls"}
 app = Flask(__name__)
 app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024  # 32MB max file size
+app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
 file_lock = threading.Lock()
 # Ensure upload directory exists
 def allowed_file(filename):
+    """Check if file extension is allowed."""
     return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
 def get_unique_upload_dir():
     """Create a unique directory for each upload session."""
     unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
     os.makedirs(unique_dir, exist_ok=True)
+    os.chmod(unique_dir, 0o777)  # Full permissions for Docker container
     return unique_dir
+def clean_upload_dir(upload_dir):
+    """Clean up upload directory after processing."""
     try:
+        if os.path.exists(upload_dir):
+            shutil.rmtree(upload_dir)
     except Exception as e:
+        print(f"Error cleaning upload directory: {e}")
 def load_data(filepath):
     """Load data from various file formats."""
             with file_lock:
                 file.save(file_path)
+                # os.chmod(file_path, 0o666)  # Read/write for all
             # Load data
             array = load_data(file_path)
         # Get files from request
         feature_subset_file = request.files.get('feature_subset')
         features_file = request.files.get('features')
         if not feature_subset_file:
             return create_json_response({"error": "Feature subset file is required"}, 400)
                 features=features,
                 cs=cs
             )
             return create_json_response({
                 "success": True,
                 "diversity_score": float(diversity_score)
     except Exception as e:
         return create_json_response({"error": str(e)}, 500)
+@app.route('/health')
+def health_check():
+    """Health check endpoint for Docker"""
+    return create_json_response({"status": "healthy"})
 if __name__ == "__main__":
+    app.run(debug=True, host="0.0.0.0", port=8009)

docker-compose.yml CHANGED Viewed

@@ -3,106 +3,20 @@ version: '3.8'
 services:
   web:
     build: .
-    command: gunicorn --config gunicorn_config.py app:app
-    expose:
-      - "8008"
-    volumes:
-      - .:/app
-      - upload_data:/app/uploads
-    depends_on:
-      redis:
-        condition: service_healthy
-    environment:
-      - FLASK_ENV=production
-      - REDIS_URL=redis://redis:6379/0
-    deploy:
-      replicas: 1
-      resources:
-        limits:
-          cpus: '0.6'
-          memory: 6G
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8008/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 40s
-    restart: unless-stopped
-  redis:
-    image: redis:7-alpine
-    command: redis-server --appendonly yes
-    volumes:
-      - redis_data:/data
     ports:
-      - "6379:6379"
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      interval: 10s
-      timeout: 5s
-      retries: 3
-    deploy:
-      resources:
-        limits:
-          cpus: '0.4'
-          memory: 1G
-    restart: unless-stopped
-  celery_worker:
-    build: .
-    command: celery -A app.celery worker --loglevel=info
     volumes:
       - .:/app
       - upload_data:/app/uploads
-    depends_on:
-      - redis
     environment:
-      - REDIS_URL=redis://redis:6379/0
-    deploy:
-      replicas: 2
-      resources:
-        limits:
-          cpus: '0.8'
-          memory: 6G
-    restart: unless-stopped
-  celery_flower:
-    build: .
-    command: celery -A app.celery flower
-    ports:
-      - "5555:5555"
-    volumes:
-      - .:/app
-      - flower_data:/app/flower
-    depends_on:
-      - redis
-      - celery_worker
-    environment:
-      - REDIS_URL=redis://redis:6379/0
-    deploy:
-      resources:
-        limits:
-          cpus: '0.2'
-          memory: 512M
-    restart: unless-stopped
-  nginx:
-    image: nginx:alpine
-    ports:
-      - "8008:8008"
-    volumes:
-      - ./nginx.conf:/etc/nginx/nginx.conf:ro
-      - .:/app:ro
-    depends_on:
-      - web
     deploy:
       resources:
         limits:
-          cpus: '0.2'
-          memory: 512M
     restart: unless-stopped
 volumes:
-  redis_data:
   upload_data:
-  flower_data:

 services:
   web:
     build: .
+    command: gunicorn --config gunicorn_config.py app:app --reload
     ports:
+      - "8009:8009"
     volumes:
       - .:/app
       - upload_data:/app/uploads
     environment:
+      - FLASK_ENV=production
     deploy:
       resources:
         limits:
+          cpus: '1.0'
+          memory: 12G
     restart: unless-stopped
 volumes:
   upload_data:

gunicorn_config.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import multiprocessing
 import os
-# Number of worker processes - limited for 2vCPU environment
-workers = 2  # Using 2 workers for 2vCPU
-# Number of threads per worker - reduced for memory efficiency
-threads = 2
 # Maximum number of pending connections
-backlog = 1024
 # Maximum number of requests a worker will process before restarting
-max_requests = 1000
 max_requests_jitter = 50
-# Timeout for worker processes (5 minutes)
-timeout = 300
 # Keep-alive timeout
 keepalive = 5
@@ -27,10 +27,9 @@ errorlog = "-"
 access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
 # Bind address - use HF_PORT for Hugging Face deployment
-port = os.getenv('PORT', '8008')  # HF uses PORT env var
-bind = f"0.0.0.0:{port}"
-# Worker class - using sync for better stability
 worker_class = "sync"
 # Process name

 import multiprocessing
 import os
+# Number of worker processes - adjust based on CPU cores
+workers = 1
+# Number of threads per worker
+threads = 4
 # Maximum number of pending connections
+backlog = 2048
 # Maximum number of requests a worker will process before restarting
+max_requests = 10000
 max_requests_jitter = 50
+# Timeout for worker processes (2 minutes)
+timeout = 120
 # Keep-alive timeout
 keepalive = 5
 access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
 # Bind address - use HF_PORT for Hugging Face deployment
+bind = "0.0.0.0:8009"
+# Worker class
 worker_class = "sync"
 # Process name