Spaces:

guydffdsdsfd
/

my-ollama-api

Sleeping

App Files Files Community

guydffdsdsfd commited on Jan 14

Commit

d298b41

verified ·

1 Parent(s): 751d4cf

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +140 -217

Dockerfile CHANGED Viewed

@@ -1,231 +1,154 @@
-from flask import Flask, request, jsonify, send_file
 from flask_cors import CORS
-import os, torch, io, time, json
-from diffusers import StableDiffusionPipeline
-import threading
 app = Flask(__name__)
-# CORS configuration
-CORS(app, resources={
-    r"/*": {
-        "origins": [
-            "https://kaigpt.vercel.app",
-            "https://kaigpt.vercel.app/chat",
-            "http://localhost:3000",
-            "*"
-        ],
-        "methods": ["GET", "POST", "OPTIONS"],
-        "allow_headers": ["Content-Type", "Authorization", "x-api-key"]
-    }
-})
-# Configuration
-WL_PATH = 'whitelist.txt'
-UNLIMITED_KEY = 'sk-ess4l0ri37'
-TRUSTED_DOMAINS = ["kaigpt.vercel.app", "localhost"]
-# Global progress tracking
-image_progress = {}
-progress_lock = threading.Lock()
-print('Loading Stable Diffusion v1.5...')
-try:
-    pipe = StableDiffusionPipeline.from_pretrained(
-        'runwayml/stable-diffusion-v1-5',
-        torch_dtype=torch.float32,
-        safety_checker=None,
-        requires_safety_checker=False
-    ).to('cpu')
-    print('✅ Stable Diffusion loaded successfully')
-except Exception as e:
-    print(f'❌ Error loading Stable Diffusion: {e}')
-    pipe = None
-def get_whitelist():
-    """Get whitelisted API keys"""
-    if not os.path.exists(WL_PATH):
-        return {UNLIMITED_KEY}
-    with open(WL_PATH, 'r') as f:
-        return set(line.strip() for line in f.readlines() if line.strip())
-def is_trusted_origin():
-    """Check if request comes from trusted origin"""
-    origin = request.headers.get("Origin", "")
-    referer = request.headers.get("Referer", "")
-    for domain in TRUSTED_DOMAINS:
-        if domain in origin or domain in referer:
-            return True
-    return False
-def update_progress(request_id, progress, status):
-    """Update progress for a request"""
-    with progress_lock:
-        image_progress[request_id] = {
-            'progress': progress,
-            'status': status,
-            'timestamp': time.time()
-        }
-def cleanup_old_progress():
-    """Remove old progress entries"""
-    with progress_lock:
-        current_time = time.time()
-        to_remove = []
-        for req_id, data in image_progress.items():
-            if current_time - data['timestamp'] > 300:  # 5 minutes
-                to_remove.append(req_id)
-        for req_id in to_remove:
-            del image_progress[req_id]
-@app.route('/api/txt2img', methods=['POST', 'OPTIONS'])
-def gen_img():
-    """Generate image from text prompt"""
-    if request.method == 'OPTIONS':
-        return jsonify({'status': 'ok'}), 200
-    # Check authorization
-    if not is_trusted_origin():
-        api_key = request.headers.get('x-api-key') or request.json.get('api_key', '')
-        if api_key not in get_whitelist():
-            return jsonify({'error': 'Unauthorized', 'message': 'Invalid API key'}), 401
-    if not pipe:
-        return jsonify({'error': 'Model not loaded', 'message': 'Stable Diffusion is not available'}), 503
-    data = request.get_json(force=True) or {}
-    prompt = data.get('prompt', 'a beautiful landscape')
-    steps = min(max(int(data.get('steps', 25)), 10), 50)  # Clamp between 10-50
-    request_id = data.get('request_id', f'img_{int(time.time())}_{hash(prompt) % 10000}')
-    # Clean up old progress entries
-    cleanup_old_progress()
-    # Initialize progress
-    update_progress(request_id, 0, 'Starting image generation...')
     try:
-        # Define progress callback
-        def progress_callback(step, timestep, latents):
-            progress = int((step / steps) * 100)
-            update_progress(request_id, progress, f'Step {step}/{steps}')
-        # Generate image
-        print(f'Generating image: "{prompt[:50]}..." ({steps} steps)')
-        with torch.no_grad():
-            image = pipe(
-                prompt,
-                num_inference_steps=steps,
-                guidance_scale=7.5,
-                callback=progress_callback,
-                callback_steps=1
-            ).images[0]
-        # Convert to bytes
-        img_io = io.BytesIO()
-        image.save(img_io, 'PNG', quality=95)
-        img_io.seek(0)
-        # Mark as complete
-        update_progress(request_id, 100, 'Complete!')
-        # Return image
-        return send_file(
-            img_io,
-            mimetype='image/png',
-            as_attachment=False,
-            download_name=f'generated_{int(time.time())}.png'
-        )
     except Exception as e:
-        print(f'Image generation error: {e}')
-        update_progress(request_id, 0, f'Error: {str(e)}')
-        return jsonify({
-            'error': 'Generation failed',
-            'message': str(e),
-            'request_id': request_id
-        }), 500
-@app.route('/api/img_progress/<request_id>', methods=['GET'])
-def get_progress(request_id):
-    """Get progress of image generation"""
-    cleanup_old_progress()
-    with progress_lock:
-        progress_data = image_progress.get(request_id, {
-            'progress': 0,
-            'status': 'Not found or expired',
-            'timestamp': time.time()
-        })
-    return jsonify(progress_data)
-@app.route('/api/health', methods=['GET'])
 def health():
-    """Health check endpoint"""
-    status = {
-        'status': 'online' if pipe else 'offline',
-        'model': 'stable-diffusion-v1-5',
-        'loaded': pipe is not None,
-        'trusted_domains': TRUSTED_DOMAINS,
-        'timestamp': time.time()
-    }
-    return jsonify(status)
-@app.route('/api/whitelist/add', methods=['POST'])
-def add_to_whitelist():
-    """Add API key to whitelist (admin only)"""
-    data = request.get_json() or {}
-    admin_key = data.get('admin_key', '')
-    new_key = data.get('key', '').strip()
-    # Simple admin check - in production use proper authentication
-    if admin_key != 'admin123':
-        return jsonify({'error': 'Invalid admin key'}), 403
-    if not new_key:
-        return jsonify({'error': 'No key provided'}), 400
     try:
-        with open(WL_PATH, 'a') as f:
-            f.write(f"{new_key}\n")
-        return jsonify({
-            'status': 'success',
-            'message': f'Key added to whitelist',
-            'total_keys': len(get_whitelist())
-        }), 200
     except Exception as e:
-        return jsonify({'error': str(e)}), 500
-@app.route('/api/config', methods=['GET'])
-def get_config():
-    """Get server configuration"""
-    return jsonify({
-        'max_steps': 50,
-        'min_steps': 10,
-        'default_steps': 25,
-        'supported_sizes': ['512x512', '768x768'],
-        'model': 'stable-diffusion-v1-5'
-    })
-if __name__ == '__main__':
-    # Create whitelist file if it doesn't exist
-    if not os.path.exists(WL_PATH):
-        with open(WL_PATH, 'w') as f:
-            f.write(f"{UNLIMITED_KEY}\n")
-        print(f'Created whitelist file with default key: {UNLIMITED_KEY}')
-    print(f'Whitelisted keys: {get_whitelist()}')
-    print(f'Trusted domains: {TRUSTED_DOMAINS}')
-    print(f'Server starting on port 7860...')
-    app.run(
-        host='0.0.0.0',
-        port=7860,
-        debug=False,
-        threaded=True
-    )

+FROM ollama/ollama:latest
+# Install Python & Dependencies
+RUN apt-get update && apt-get install -y python3 python3-pip && \
+    pip3 install flask flask-cors requests --break-system-packages
+# Set up environment variables
+ENV OLLAMA_HOST=127.0.0.1:11434
+ENV OLLAMA_MODELS=/home/ollama/.ollama/models
+ENV HOME=/home/ollama
+# Create writable directories
+RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
+# --- COMPLETE Flask Guard Script (with whitelist endpoint) ---
+RUN cat <<'EOF' > /guard.py
+from flask import Flask, request, Response, jsonify, stream_with_context
+import requests
 from flask_cors import CORS
+import json, os, datetime, time, threading
 app = Flask(__name__)
+CORS(app)
+DB_PATH = "/home/ollama/usage.json"
+WL_PATH = "/home/ollama/whitelist.txt"
+LIMIT = 500
+UNLIMITED_KEY = "sk-ess4l0ri37"
+# Ensure whitelist exists
+if not os.path.exists(WL_PATH):
+    with open(WL_PATH, "w") as f:
+        f.write(f"sk-admin-seed-99\nsk-ljlubs0boej\n{UNLIMITED_KEY}\n")
+# CRITICAL: Whitelist Management Endpoint (was missing!)
+@app.route("/whitelist", methods=["POST"])
+def whitelist_key():
     try:
+        data = request.get_json()
+        key = data.get("key", "").strip()
+        if not key:
+            return jsonify({"error": "No key provided"}), 400
+        # Add key to whitelist
+        with open(WL_PATH, "a") as f:
+            f.write(f"{key}\n")
+        return jsonify({"message": "Key whitelisted successfully"}), 200
     except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Health Check
+@app.route("/", methods=["GET"])
 def health():
+    return "Ollama Proxy is Running", 200
+# API Tags endpoint for health checks
+@app.route("/api/tags", methods=["GET"])
+def tags():
+    try:
+        resp = requests.get("http://127.0.0.1:11434/api/tags")
+        return Response(resp.content, status=resp.status_code, content_type=resp.headers.get('Content-Type'))
+    except:
+        return jsonify({"error": "Ollama starting"}), 503
+def get_whitelist():
+    try:
+        with open(WL_PATH, "r") as f:
+            return set(line.strip() for line in f.readlines())
+    except:
+        return set([UNLIMITED_KEY])
+@app.route("/api/generate", methods=["POST"])
+@app.route("/api/chat", methods=["POST"])
+def proxy():
+    user_key = request.headers.get("x-api-key", "")
+    # 1. Auth Check
+    if user_key not in get_whitelist():
+        return jsonify({"error": "Unauthorized: Key not registered"}), 401
+    # 2. Usage Check
+    is_unlimited = (user_key == UNLIMITED_KEY)
+    if not is_unlimited:
+        now = datetime.datetime.now()
+        month_key = now.strftime("%Y-%m")
+        usage = {}
+        if os.path.exists(DB_PATH):
+            try:
+                with open(DB_PATH, "r") as f:
+                    usage = json.load(f)
+            except:
+                usage = {}
+        key_usage = usage.get(user_key, {}).get(month_key, 0)
+        if key_usage >= LIMIT:
+            return jsonify({"error": f"Monthly limit of {LIMIT} reached"}), 429
+    # 3. Proxy to Ollama
     try:
+        target_url = "http://127.0.0.1:11434" + request.path
+        resp = requests.post(target_url, json=request.json, stream=True, timeout=300)
+        if resp.status_code == 404:
+            return jsonify({"error": "Model is loading (First run takes ~2 mins). Please wait."}), 503
+        if resp.status_code != 200:
+            return jsonify({"error": f"Ollama Error: {resp.text}"}), resp.status_code
+        # Log usage
+        if not is_unlimited:
+            if user_key not in usage: usage[user_key] = {}
+            usage[user_key][month_key] = key_usage + 1
+            with open(DB_PATH, "w") as f:
+                json.dump(usage, f)
+        # Stream response
+        def generate():
+            for chunk in resp.iter_content(chunk_size=1024):
+                if chunk: yield chunk
+        return Response(stream_with_context(generate()), content_type=resp.headers.get('Content-Type'))
+    except requests.exceptions.ConnectionError:
+        return jsonify({"error": "Ollama is starting up. Please wait..."}), 503
     except Exception as e:
+        return jsonify({"error": f"Proxy Error: {str(e)}"}), 500
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)
+EOF
+# --- Startup Script ---
+RUN cat <<'EOF' > /start.sh
+#!/bin/bash
+# Start Ollama in the background
+ollama serve &
+# Start the Python Guard (Opens Port 7860 immediately for HF)
+python3 /guard.py &
+# Wait for Ollama to wake up, then pull the model
+sleep 5
+echo "Starting Model Pull..."
+ollama pull llama3.2:3b
+echo "Model Pull Complete."
+# Keep container running
+wait
+EOF
+RUN chmod +x /start.sh
+# --- Entrypoint ---
+ENTRYPOINT ["/bin/bash", "/start.sh"]