guydffdsdsfd commited on
Commit
abf19aa
·
verified ·
1 Parent(s): 7db7a01

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +115 -40
Dockerfile CHANGED
@@ -1,79 +1,154 @@
1
  FROM ollama/ollama:latest
2
 
 
3
  RUN apt-get update && apt-get install -y python3 python3-pip && \
4
  pip3 install flask flask-cors requests --break-system-packages
5
 
 
6
  ENV OLLAMA_HOST=127.0.0.1:11434
 
7
  ENV HOME=/home/ollama
 
 
8
  RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
9
 
10
- # --- Flask Guard Script ---
11
  RUN cat <<'EOF' > /guard.py
12
- from flask import Flask, request, Response, jsonify
13
  import requests
14
  from flask_cors import CORS
 
15
 
16
  app = Flask(__name__)
17
- # Enable CORS for direct browser access
18
- CORS(app, resources={r"/*": {"origins": "*", "allow_headers": ["Content-Type", "x-api-key"]}})
19
 
 
 
 
20
  UNLIMITED_KEY = "sk-ess4l0ri37"
21
 
22
- @app.route("/api/generate", methods=["POST", "OPTIONS"])
23
- def proxy():
24
- if request.method == "OPTIONS":
25
- return Response(status=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  user_key = request.headers.get("x-api-key", "")
28
- if user_key != UNLIMITED_KEY:
29
- return jsonify({"error": "Unauthorized"}), 401
 
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
- data = request.json
33
- # Convert your 'prompt' into a 'messages' array for the /api/chat endpoint
34
- ollama_payload = {
35
- "model": data.get("model", "dolphin3:8b"),
36
- "messages": [{"role": "user", "content": data.get("prompt", "")}],
37
- "stream": False,
38
- "options": {
39
- "temperature": data.get("temperature", 0.7)
40
- }
41
- }
42
-
43
- # TALKING TO /api/chat INSTEAD OF /api/generate
44
- resp = requests.post(
45
- "http://127.0.0.1:11434/api/chat",
46
- json=ollama_payload,
47
- timeout=180
48
- )
49
 
 
 
 
 
 
50
  if resp.status_code != 200:
51
- return jsonify({"error": "Ollama Error", "details": resp.text}), resp.status_code
52
 
53
- ollama_res = resp.json()
54
-
55
- # Flatten the response back to the format your frontend expects
56
- return jsonify({
57
- "response": ollama_res.get("message", {}).get("content", ""),
58
- "done": True
59
- })
60
-
 
 
 
 
 
 
 
 
61
  except Exception as e:
62
- return jsonify({"error": str(e)}), 500
63
 
64
  if __name__ == "__main__":
65
  app.run(host="0.0.0.0", port=7860)
66
  EOF
67
 
68
- # --- start.sh stays the same ---
69
  RUN cat <<'EOF' > /start.sh
70
  #!/bin/bash
 
71
  ollama serve &
 
 
72
  python3 /guard.py &
73
- sleep 10
74
- ollama pull dolphin3:8b
 
 
 
 
 
 
75
  wait
76
  EOF
77
 
78
  RUN chmod +x /start.sh
 
 
79
  ENTRYPOINT ["/bin/bash", "/start.sh"]
 
1
  FROM ollama/ollama:latest
2
 
3
+ # Install Python & Dependencies
4
  RUN apt-get update && apt-get install -y python3 python3-pip && \
5
  pip3 install flask flask-cors requests --break-system-packages
6
 
7
+ # Set up environment variables
8
  ENV OLLAMA_HOST=127.0.0.1:11434
9
+ ENV OLLAMA_MODELS=/home/ollama/.ollama/models
10
  ENV HOME=/home/ollama
11
+
12
+ # Create writable directories
13
  RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
14
 
15
+ # --- COMPLETE Flask Guard Script (with whitelist endpoint) ---
16
  RUN cat <<'EOF' > /guard.py
17
+ from flask import Flask, request, Response, jsonify, stream_with_context
18
  import requests
19
  from flask_cors import CORS
20
+ import json, os, datetime, time, threading
21
 
22
  app = Flask(__name__)
23
+ CORS(app)
 
24
 
25
+ DB_PATH = "/home/ollama/usage.json"
26
+ WL_PATH = "/home/ollama/whitelist.txt"
27
+ LIMIT = 500
28
  UNLIMITED_KEY = "sk-ess4l0ri37"
29
 
30
+ # Ensure whitelist exists
31
+ if not os.path.exists(WL_PATH):
32
+ with open(WL_PATH, "w") as f:
33
+ f.write(f"sk-admin-seed-99\nsk-ljlubs0boej\n{UNLIMITED_KEY}\n")
34
+
35
+ # CRITICAL: Whitelist Management Endpoint (was missing!)
36
+ @app.route("/whitelist", methods=["POST"])
37
+ def whitelist_key():
38
+ try:
39
+ data = request.get_json()
40
+ key = data.get("key", "").strip()
41
+ if not key:
42
+ return jsonify({"error": "No key provided"}), 400
43
+
44
+ # Add key to whitelist
45
+ with open(WL_PATH, "a") as f:
46
+ f.write(f"{key}\n")
47
+ return jsonify({"message": "Key whitelisted successfully"}), 200
48
+ except Exception as e:
49
+ return jsonify({"error": str(e)}), 500
50
 
51
+ # Health Check
52
+ @app.route("/", methods=["GET"])
53
+ def health():
54
+ return "Ollama Proxy is Running", 200
55
+
56
+ # API Tags endpoint for health checks
57
+ @app.route("/api/tags", methods=["GET"])
58
+ def tags():
59
+ try:
60
+ resp = requests.get("http://127.0.0.1:11434/api/tags")
61
+ return Response(resp.content, status=resp.status_code, content_type=resp.headers.get('Content-Type'))
62
+ except:
63
+ return jsonify({"error": "Ollama starting"}), 503
64
+
65
+ def get_whitelist():
66
+ try:
67
+ with open(WL_PATH, "r") as f:
68
+ return set(line.strip() for line in f.readlines())
69
+ except:
70
+ return set([UNLIMITED_KEY])
71
+
72
+ @app.route("/api/generate", methods=["POST"])
73
+ @app.route("/api/chat", methods=["POST"])
74
+ def proxy():
75
  user_key = request.headers.get("x-api-key", "")
76
+
77
+ # 1. Auth Check
78
+ if user_key not in get_whitelist():
79
+ return jsonify({"error": "Unauthorized: Key not registered"}), 401
80
 
81
+ # 2. Usage Check
82
+ is_unlimited = (user_key == UNLIMITED_KEY)
83
+ if not is_unlimited:
84
+ now = datetime.datetime.now()
85
+ month_key = now.strftime("%Y-%m")
86
+ usage = {}
87
+ if os.path.exists(DB_PATH):
88
+ try:
89
+ with open(DB_PATH, "r") as f:
90
+ usage = json.load(f)
91
+ except:
92
+ usage = {}
93
+ key_usage = usage.get(user_key, {}).get(month_key, 0)
94
+ if key_usage >= LIMIT:
95
+ return jsonify({"error": f"Monthly limit of {LIMIT} reached"}), 429
96
+
97
+ # 3. Proxy to Ollama
98
  try:
99
+ target_url = "http://127.0.0.1:11434" + request.path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ resp = requests.post(target_url, json=request.json, stream=True, timeout=300)
102
+
103
+ if resp.status_code == 404:
104
+ return jsonify({"error": "Model is loading (First run takes ~2 mins). Please wait."}), 503
105
+
106
  if resp.status_code != 200:
107
+ return jsonify({"error": f"Ollama Error: {resp.text}"}), resp.status_code
108
 
109
+ # Log usage
110
+ if not is_unlimited:
111
+ if user_key not in usage: usage[user_key] = {}
112
+ usage[user_key][month_key] = key_usage + 1
113
+ with open(DB_PATH, "w") as f:
114
+ json.dump(usage, f)
115
+
116
+ # Stream response
117
+ def generate():
118
+ for chunk in resp.iter_content(chunk_size=1024):
119
+ if chunk: yield chunk
120
+
121
+ return Response(stream_with_context(generate()), content_type=resp.headers.get('Content-Type'))
122
+
123
+ except requests.exceptions.ConnectionError:
124
+ return jsonify({"error": "Ollama is starting up. Please wait..."}), 503
125
  except Exception as e:
126
+ return jsonify({"error": f"Proxy Error: {str(e)}"}), 500
127
 
128
  if __name__ == "__main__":
129
  app.run(host="0.0.0.0", port=7860)
130
  EOF
131
 
132
+ # --- Startup Script ---
133
  RUN cat <<'EOF' > /start.sh
134
  #!/bin/bash
135
+ # Start Ollama in the background
136
  ollama serve &
137
+
138
+ # Start the Python Guard (Opens Port 7860 immediately for HF)
139
  python3 /guard.py &
140
+
141
+ # Wait for Ollama to wake up, then pull the model
142
+ sleep 5
143
+ echo "Starting Model Pull..."
144
+ ollama pull llama2-uncensored:7b
145
+ echo "Model Pull Complete."
146
+
147
+ # Keep container running
148
  wait
149
  EOF
150
 
151
  RUN chmod +x /start.sh
152
+
153
+ # --- Entrypoint ---
154
  ENTRYPOINT ["/bin/bash", "/start.sh"]