Spaces:

guydffdsdsfd
/

my-ollama-api

Sleeping

App Files Files Community

guydffdsdsfd commited on Jan 10

Commit

abf19aa

verified ·

1 Parent(s): 7db7a01

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +115 -40

Dockerfile CHANGED Viewed

@@ -1,79 +1,154 @@
 FROM ollama/ollama:latest
 RUN apt-get update && apt-get install -y python3 python3-pip && \
     pip3 install flask flask-cors requests --break-system-packages
 ENV OLLAMA_HOST=127.0.0.1:11434
 ENV HOME=/home/ollama
 RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
-# --- Flask Guard Script ---
 RUN cat <<'EOF' > /guard.py
-from flask import Flask, request, Response, jsonify
 import requests
 from flask_cors import CORS
 app = Flask(__name__)
-# Enable CORS for direct browser access
-CORS(app, resources={r"/*": {"origins": "*", "allow_headers": ["Content-Type", "x-api-key"]}})
 UNLIMITED_KEY = "sk-ess4l0ri37"
-@app.route("/api/generate", methods=["POST", "OPTIONS"])
-def proxy():
-    if request.method == "OPTIONS":
-        return Response(status=200)
     user_key = request.headers.get("x-api-key", "")
-    if user_key != UNLIMITED_KEY:
-        return jsonify({"error": "Unauthorized"}), 401
     try:
-        data = request.json
-        # Convert your 'prompt' into a 'messages' array for the /api/chat endpoint
-        ollama_payload = {
-            "model": data.get("model", "dolphin3:8b"),
-            "messages": [{"role": "user", "content": data.get("prompt", "")}],
-            "stream": False,
-            "options": {
-                "temperature": data.get("temperature", 0.7)
-            }
-        }
-        # TALKING TO /api/chat INSTEAD OF /api/generate
-        resp = requests.post(
-            "http://127.0.0.1:11434/api/chat",
-            json=ollama_payload,
-            timeout=180
-        )
         if resp.status_code != 200:
-            return jsonify({"error": "Ollama Error", "details": resp.text}), resp.status_code
-        ollama_res = resp.json()
-        # Flatten the response back to the format your frontend expects
-        return jsonify({
-            "response": ollama_res.get("message", {}).get("content", ""),
-            "done": True
-        })
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)
 EOF
-# --- start.sh stays the same ---
 RUN cat <<'EOF' > /start.sh
 #!/bin/bash
 ollama serve &
 python3 /guard.py &
-sleep 10
-ollama pull dolphin3:8b
 wait
 EOF
 RUN chmod +x /start.sh
 ENTRYPOINT ["/bin/bash", "/start.sh"]

 FROM ollama/ollama:latest
+# Install Python & Dependencies
 RUN apt-get update && apt-get install -y python3 python3-pip && \
     pip3 install flask flask-cors requests --break-system-packages
+# Set up environment variables
 ENV OLLAMA_HOST=127.0.0.1:11434
+ENV OLLAMA_MODELS=/home/ollama/.ollama/models
 ENV HOME=/home/ollama
+# Create writable directories
 RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
+# --- COMPLETE Flask Guard Script (with whitelist endpoint) ---
 RUN cat <<'EOF' > /guard.py
+from flask import Flask, request, Response, jsonify, stream_with_context
 import requests
 from flask_cors import CORS
+import json, os, datetime, time, threading
 app = Flask(__name__)
+CORS(app)
+DB_PATH = "/home/ollama/usage.json"
+WL_PATH = "/home/ollama/whitelist.txt"
+LIMIT = 500
 UNLIMITED_KEY = "sk-ess4l0ri37"
+# Ensure whitelist exists
+if not os.path.exists(WL_PATH):
+    with open(WL_PATH, "w") as f:
+        f.write(f"sk-admin-seed-99\nsk-ljlubs0boej\n{UNLIMITED_KEY}\n")
+# CRITICAL: Whitelist Management Endpoint (was missing!)
+@app.route("/whitelist", methods=["POST"])
+def whitelist_key():
+    try:
+        data = request.get_json()
+        key = data.get("key", "").strip()
+        if not key:
+            return jsonify({"error": "No key provided"}), 400
+        # Add key to whitelist
+        with open(WL_PATH, "a") as f:
+            f.write(f"{key}\n")
+        return jsonify({"message": "Key whitelisted successfully"}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Health Check
+@app.route("/", methods=["GET"])
+def health():
+    return "Ollama Proxy is Running", 200
+# API Tags endpoint for health checks
+@app.route("/api/tags", methods=["GET"])
+def tags():
+    try:
+        resp = requests.get("http://127.0.0.1:11434/api/tags")
+        return Response(resp.content, status=resp.status_code, content_type=resp.headers.get('Content-Type'))
+    except:
+        return jsonify({"error": "Ollama starting"}), 503
+def get_whitelist():
+    try:
+        with open(WL_PATH, "r") as f:
+            return set(line.strip() for line in f.readlines())
+    except:
+        return set([UNLIMITED_KEY])
+@app.route("/api/generate", methods=["POST"])
+@app.route("/api/chat", methods=["POST"])
+def proxy():
     user_key = request.headers.get("x-api-key", "")
+    # 1. Auth Check
+    if user_key not in get_whitelist():
+        return jsonify({"error": "Unauthorized: Key not registered"}), 401
+    # 2. Usage Check
+    is_unlimited = (user_key == UNLIMITED_KEY)
+    if not is_unlimited:
+        now = datetime.datetime.now()
+        month_key = now.strftime("%Y-%m")
+        usage = {}
+        if os.path.exists(DB_PATH):
+            try:
+                with open(DB_PATH, "r") as f:
+                    usage = json.load(f)
+            except:
+                usage = {}
+        key_usage = usage.get(user_key, {}).get(month_key, 0)
+        if key_usage >= LIMIT:
+            return jsonify({"error": f"Monthly limit of {LIMIT} reached"}), 429
+    # 3. Proxy to Ollama
     try:
+        target_url = "http://127.0.0.1:11434" + request.path
+        resp = requests.post(target_url, json=request.json, stream=True, timeout=300)
+        if resp.status_code == 404:
+            return jsonify({"error": "Model is loading (First run takes ~2 mins). Please wait."}), 503
         if resp.status_code != 200:
+            return jsonify({"error": f"Ollama Error: {resp.text}"}), resp.status_code
+        # Log usage
+        if not is_unlimited:
+            if user_key not in usage: usage[user_key] = {}
+            usage[user_key][month_key] = key_usage + 1
+            with open(DB_PATH, "w") as f:
+                json.dump(usage, f)
+        # Stream response
+        def generate():
+            for chunk in resp.iter_content(chunk_size=1024):
+                if chunk: yield chunk
+        return Response(stream_with_context(generate()), content_type=resp.headers.get('Content-Type'))
+    except requests.exceptions.ConnectionError:
+        return jsonify({"error": "Ollama is starting up. Please wait..."}), 503
     except Exception as e:
+        return jsonify({"error": f"Proxy Error: {str(e)}"}), 500
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)
 EOF
+# --- Startup Script ---
 RUN cat <<'EOF' > /start.sh
 #!/bin/bash
+# Start Ollama in the background
 ollama serve &
+# Start the Python Guard (Opens Port 7860 immediately for HF)
 python3 /guard.py &
+# Wait for Ollama to wake up, then pull the model
+sleep 5
+echo "Starting Model Pull..."
+ollama pull llama2-uncensored:7b
+echo "Model Pull Complete."
+# Keep container running
 wait
 EOF
 RUN chmod +x /start.sh
+# --- Entrypoint ---
 ENTRYPOINT ["/bin/bash", "/start.sh"]