my-ollama-api / Dockerfile
guydffdsdsfd's picture
Update Dockerfile
6b82988 verified
FROM ollama/ollama:latest
# Install Python & Dependencies
RUN apt-get update && apt-get install -y python3 python3-pip && \
pip3 install flask flask-cors requests --break-system-packages
# Set up environment variables
ENV OLLAMA_HOST=127.0.0.1:11434
ENV OLLAMA_MODELS=/home/ollama/.ollama/models
ENV HOME=/home/ollama
# Create writable directories
RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
# --- COMPLETE Flask Guard Script (with whitelist endpoint) ---
RUN cat <<'EOF' > /guard.py
from flask import Flask, request, Response, jsonify, stream_with_context
import requests
from flask_cors import CORS
import json, os, datetime, time, threading
app = Flask(__name__)
CORS(app)
DB_PATH = "/home/ollama/usage.json"
WL_PATH = "/home/ollama/whitelist.txt"
LIMIT = 500
UNLIMITED_KEY = "sk-ess4l0ri37"
# Ensure whitelist exists
if not os.path.exists(WL_PATH):
with open(WL_PATH, "w") as f:
f.write(f"sk-admin-seed-99\nsk-ljlubs0boej\n{UNLIMITED_KEY}\n")
# CRITICAL: Whitelist Management Endpoint (was missing!)
@app.route("/whitelist", methods=["POST"])
def whitelist_key():
try:
data = request.get_json()
key = data.get("key", "").strip()
if not key:
return jsonify({"error": "No key provided"}), 400
# Add key to whitelist
with open(WL_PATH, "a") as f:
f.write(f"{key}\n")
return jsonify({"message": "Key whitelisted successfully"}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
# Health Check
@app.route("/", methods=["GET"])
def health():
return "Ollama Proxy is Running", 200
# API Tags endpoint for health checks
@app.route("/api/tags", methods=["GET"])
def tags():
try:
resp = requests.get("http://127.0.0.1:11434/api/tags")
return Response(resp.content, status=resp.status_code, content_type=resp.headers.get('Content-Type'))
except:
return jsonify({"error": "Ollama starting"}), 503
def get_whitelist():
try:
with open(WL_PATH, "r") as f:
return set(line.strip() for line in f.readlines())
except:
return set([UNLIMITED_KEY])
@app.route("/api/generate", methods=["POST"])
@app.route("/api/chat", methods=["POST"])
def proxy():
user_key = request.headers.get("x-api-key", "")
# 1. Auth Check
if user_key not in get_whitelist():
return jsonify({"error": "Unauthorized: Key not registered"}), 401
# 2. Usage Check
is_unlimited = (user_key == UNLIMITED_KEY)
if not is_unlimited:
now = datetime.datetime.now()
month_key = now.strftime("%Y-%m")
usage = {}
if os.path.exists(DB_PATH):
try:
with open(DB_PATH, "r") as f:
usage = json.load(f)
except:
usage = {}
key_usage = usage.get(user_key, {}).get(month_key, 0)
if key_usage >= LIMIT:
return jsonify({"error": f"Monthly limit of {LIMIT} reached"}), 429
# 3. Proxy to Ollama
try:
target_url = "http://127.0.0.1:11434" + request.path
resp = requests.post(target_url, json=request.json, stream=True, timeout=300)
if resp.status_code == 404:
return jsonify({"error": "Model is loading (First run takes ~2 mins). Please wait."}), 503
if resp.status_code != 200:
return jsonify({"error": f"Ollama Error: {resp.text}"}), resp.status_code
# Log usage
if not is_unlimited:
if user_key not in usage: usage[user_key] = {}
usage[user_key][month_key] = key_usage + 1
with open(DB_PATH, "w") as f:
json.dump(usage, f)
# Stream response
def generate():
for chunk in resp.iter_content(chunk_size=1024):
if chunk: yield chunk
return Response(stream_with_context(generate()), content_type=resp.headers.get('Content-Type'))
except requests.exceptions.ConnectionError:
return jsonify({"error": "Ollama is starting up. Please wait..."}), 503
except Exception as e:
return jsonify({"error": f"Proxy Error: {str(e)}"}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
EOF
# --- Startup Script ---
RUN cat <<'EOF' > /start.sh
#!/bin/bash
# Start Ollama in the background
ollama serve &
# Start the Python Guard (Opens Port 7860 immediately for HF)
python3 /guard.py &
# Wait for Ollama to wake up, then pull the model
sleep 5
echo "Starting Model Pull..."
ollama pull llama3.2:1b
echo "Model Pull Complete."
# Keep container running
wait
EOF
RUN chmod +x /start.sh
# --- Entrypoint ---
ENTRYPOINT ["/bin/bash", "/start.sh"]