FROM ollama/ollama:latest # Install Python & Dependencies RUN apt-get update && apt-get install -y python3 python3-pip && \ pip3 install flask flask-cors requests --break-system-packages # Set up environment variables ENV OLLAMA_HOST=127.0.0.1:11434 ENV OLLAMA_MODELS=/home/ollama/.ollama/models ENV HOME=/home/ollama # Create writable directories RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama # --- COMPLETE Flask Guard Script (with whitelist endpoint) --- RUN cat <<'EOF' > /guard.py from flask import Flask, request, Response, jsonify, stream_with_context import requests from flask_cors import CORS import json, os, datetime, time, threading app = Flask(__name__) CORS(app) DB_PATH = "/home/ollama/usage.json" WL_PATH = "/home/ollama/whitelist.txt" LIMIT = 500 UNLIMITED_KEY = "sk-ess4l0ri37" # Ensure whitelist exists if not os.path.exists(WL_PATH): with open(WL_PATH, "w") as f: f.write(f"sk-admin-seed-99\nsk-ljlubs0boej\n{UNLIMITED_KEY}\n") # CRITICAL: Whitelist Management Endpoint (was missing!) @app.route("/whitelist", methods=["POST"]) def whitelist_key(): try: data = request.get_json() key = data.get("key", "").strip() if not key: return jsonify({"error": "No key provided"}), 400 # Add key to whitelist with open(WL_PATH, "a") as f: f.write(f"{key}\n") return jsonify({"message": "Key whitelisted successfully"}), 200 except Exception as e: return jsonify({"error": str(e)}), 500 # Health Check @app.route("/", methods=["GET"]) def health(): return "Ollama Proxy is Running", 200 # API Tags endpoint for health checks @app.route("/api/tags", methods=["GET"]) def tags(): try: resp = requests.get("http://127.0.0.1:11434/api/tags") return Response(resp.content, status=resp.status_code, content_type=resp.headers.get('Content-Type')) except: return jsonify({"error": "Ollama starting"}), 503 def get_whitelist(): try: with open(WL_PATH, "r") as f: return set(line.strip() for line in f.readlines()) except: return set([UNLIMITED_KEY]) @app.route("/api/generate", methods=["POST"]) @app.route("/api/chat", methods=["POST"]) def proxy(): user_key = request.headers.get("x-api-key", "") # 1. Auth Check if user_key not in get_whitelist(): return jsonify({"error": "Unauthorized: Key not registered"}), 401 # 2. Usage Check is_unlimited = (user_key == UNLIMITED_KEY) if not is_unlimited: now = datetime.datetime.now() month_key = now.strftime("%Y-%m") usage = {} if os.path.exists(DB_PATH): try: with open(DB_PATH, "r") as f: usage = json.load(f) except: usage = {} key_usage = usage.get(user_key, {}).get(month_key, 0) if key_usage >= LIMIT: return jsonify({"error": f"Monthly limit of {LIMIT} reached"}), 429 # 3. Proxy to Ollama try: target_url = "http://127.0.0.1:11434" + request.path resp = requests.post(target_url, json=request.json, stream=True, timeout=300) if resp.status_code == 404: return jsonify({"error": "Model is loading (First run takes ~2 mins). Please wait."}), 503 if resp.status_code != 200: return jsonify({"error": f"Ollama Error: {resp.text}"}), resp.status_code # Log usage if not is_unlimited: if user_key not in usage: usage[user_key] = {} usage[user_key][month_key] = key_usage + 1 with open(DB_PATH, "w") as f: json.dump(usage, f) # Stream response def generate(): for chunk in resp.iter_content(chunk_size=1024): if chunk: yield chunk return Response(stream_with_context(generate()), content_type=resp.headers.get('Content-Type')) except requests.exceptions.ConnectionError: return jsonify({"error": "Ollama is starting up. Please wait..."}), 503 except Exception as e: return jsonify({"error": f"Proxy Error: {str(e)}"}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860) EOF # --- Startup Script --- RUN cat <<'EOF' > /start.sh #!/bin/bash # Start Ollama in the background ollama serve & # Start the Python Guard (Opens Port 7860 immediately for HF) python3 /guard.py & # Wait for Ollama to wake up, then pull the model sleep 5 echo "Starting Model Pull..." ollama pull llama3.2:1b echo "Model Pull Complete." # Keep container running wait EOF RUN chmod +x /start.sh # --- Entrypoint --- ENTRYPOINT ["/bin/bash", "/start.sh"]