Spaces:

guydffdsdsfd
/

my-ollama-api

Running

App Files Files Community

guydffdsdsfd commited on Jan 14

Commit

751d4cf

verified ·

1 Parent(s): 53ce083

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +217 -140

Dockerfile CHANGED Viewed

@@ -1,154 +1,231 @@
-FROM ollama/ollama:latest
-# Install Python & Dependencies
-RUN apt-get update && apt-get install -y python3 python3-pip && \
-    pip3 install flask flask-cors requests --break-system-packages
-# Set up environment variables
-ENV OLLAMA_HOST=127.0.0.1:11434
-ENV OLLAMA_MODELS=/home/ollama/.ollama/models
-ENV HOME=/home/ollama
-# Create writable directories
-RUN mkdir -p /home/ollama/.ollama && chmod -R 777 /home/ollama
-# --- COMPLETE Flask Guard Script (with whitelist endpoint) ---
-RUN cat <<'EOF' > /guard.py
-from flask import Flask, request, Response, jsonify, stream_with_context
-import requests
 from flask_cors import CORS
-import json, os, datetime, time, threading
 app = Flask(__name__)
-CORS(app)
-DB_PATH = "/home/ollama/usage.json"
-WL_PATH = "/home/ollama/whitelist.txt"
-LIMIT = 500
-UNLIMITED_KEY = "sk-ess4l0ri37"
-# Ensure whitelist exists
-if not os.path.exists(WL_PATH):
-    with open(WL_PATH, "w") as f:
-        f.write(f"sk-admin-seed-99\nsk-ljlubs0boej\n{UNLIMITED_KEY}\n")
-# CRITICAL: Whitelist Management Endpoint (was missing!)
-@app.route("/whitelist", methods=["POST"])
-def whitelist_key():
     try:
-        data = request.get_json()
-        key = data.get("key", "").strip()
-        if not key:
-            return jsonify({"error": "No key provided"}), 400
-        # Add key to whitelist
-        with open(WL_PATH, "a") as f:
-            f.write(f"{key}\n")
-        return jsonify({"message": "Key whitelisted successfully"}), 200
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
-# Health Check
-@app.route("/", methods=["GET"])
 def health():
-    return "Ollama Proxy is Running", 200
-# API Tags endpoint for health checks
-@app.route("/api/tags", methods=["GET"])
-def tags():
-    try:
-        resp = requests.get("http://127.0.0.1:11434/api/tags")
-        return Response(resp.content, status=resp.status_code, content_type=resp.headers.get('Content-Type'))
-    except:
-        return jsonify({"error": "Ollama starting"}), 503
-def get_whitelist():
-    try:
-        with open(WL_PATH, "r") as f:
-            return set(line.strip() for line in f.readlines())
-    except:
-        return set([UNLIMITED_KEY])
-@app.route("/api/generate", methods=["POST"])
-@app.route("/api/chat", methods=["POST"])
-def proxy():
-    user_key = request.headers.get("x-api-key", "")
-    # 1. Auth Check
-    if user_key not in get_whitelist():
-        return jsonify({"error": "Unauthorized: Key not registered"}), 401
-    # 2. Usage Check
-    is_unlimited = (user_key == UNLIMITED_KEY)
-    if not is_unlimited:
-        now = datetime.datetime.now()
-        month_key = now.strftime("%Y-%m")
-        usage = {}
-        if os.path.exists(DB_PATH):
-            try:
-                with open(DB_PATH, "r") as f:
-                    usage = json.load(f)
-            except:
-                usage = {}
-        key_usage = usage.get(user_key, {}).get(month_key, 0)
-        if key_usage >= LIMIT:
-            return jsonify({"error": f"Monthly limit of {LIMIT} reached"}), 429
-    # 3. Proxy to Ollama
     try:
-        target_url = "http://127.0.0.1:11434" + request.path
-        resp = requests.post(target_url, json=request.json, stream=True, timeout=300)
-        if resp.status_code == 404:
-            return jsonify({"error": "Model is loading (First run takes ~2 mins). Please wait."}), 503
-        if resp.status_code != 200:
-            return jsonify({"error": f"Ollama Error: {resp.text}"}), resp.status_code
-        # Log usage
-        if not is_unlimited:
-            if user_key not in usage: usage[user_key] = {}
-            usage[user_key][month_key] = key_usage + 1
-            with open(DB_PATH, "w") as f:
-                json.dump(usage, f)
-        # Stream response
-        def generate():
-            for chunk in resp.iter_content(chunk_size=1024):
-                if chunk: yield chunk
-        return Response(stream_with_context(generate()), content_type=resp.headers.get('Content-Type'))
-    except requests.exceptions.ConnectionError:
-        return jsonify({"error": "Ollama is starting up. Please wait..."}), 503
     except Exception as e:
-        return jsonify({"error": f"Proxy Error: {str(e)}"}), 500
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)
-EOF
-# --- Startup Script ---
-RUN cat <<'EOF' > /start.sh
-#!/bin/bash
-# Start Ollama in the background
-ollama serve &
-# Start the Python Guard (Opens Port 7860 immediately for HF)
-python3 /guard.py &
-# Wait for Ollama to wake up, then pull the model
-sleep 5
-echo "Starting Model Pull..."
-ollama pull llama3.2:3b
-echo "Model Pull Complete."
-# Keep container running
-wait
-EOF
-RUN chmod +x /start.sh
-# --- Entrypoint ---
-ENTRYPOINT ["/bin/bash", "/start.sh"]

+from flask import Flask, request, jsonify, send_file
 from flask_cors import CORS
+import os, torch, io, time, json
+from diffusers import StableDiffusionPipeline
+import threading
 app = Flask(__name__)
+# CORS configuration
+CORS(app, resources={
+    r"/*": {
+        "origins": [
+            "https://kaigpt.vercel.app",
+            "https://kaigpt.vercel.app/chat",
+            "http://localhost:3000",
+            "*"
+        ],
+        "methods": ["GET", "POST", "OPTIONS"],
+        "allow_headers": ["Content-Type", "Authorization", "x-api-key"]
+    }
+})
+# Configuration
+WL_PATH = 'whitelist.txt'
+UNLIMITED_KEY = 'sk-ess4l0ri37'
+TRUSTED_DOMAINS = ["kaigpt.vercel.app", "localhost"]
+# Global progress tracking
+image_progress = {}
+progress_lock = threading.Lock()
+print('Loading Stable Diffusion v1.5...')
+try:
+    pipe = StableDiffusionPipeline.from_pretrained(
+        'runwayml/stable-diffusion-v1-5',
+        torch_dtype=torch.float32,
+        safety_checker=None,
+        requires_safety_checker=False
+    ).to('cpu')
+    print('✅ Stable Diffusion loaded successfully')
+except Exception as e:
+    print(f'❌ Error loading Stable Diffusion: {e}')
+    pipe = None
+def get_whitelist():
+    """Get whitelisted API keys"""
+    if not os.path.exists(WL_PATH):
+        return {UNLIMITED_KEY}
+    with open(WL_PATH, 'r') as f:
+        return set(line.strip() for line in f.readlines() if line.strip())
+def is_trusted_origin():
+    """Check if request comes from trusted origin"""
+    origin = request.headers.get("Origin", "")
+    referer = request.headers.get("Referer", "")
+    for domain in TRUSTED_DOMAINS:
+        if domain in origin or domain in referer:
+            return True
+    return False
+def update_progress(request_id, progress, status):
+    """Update progress for a request"""
+    with progress_lock:
+        image_progress[request_id] = {
+            'progress': progress,
+            'status': status,
+            'timestamp': time.time()
+        }
+def cleanup_old_progress():
+    """Remove old progress entries"""
+    with progress_lock:
+        current_time = time.time()
+        to_remove = []
+        for req_id, data in image_progress.items():
+            if current_time - data['timestamp'] > 300:  # 5 minutes
+                to_remove.append(req_id)
+        for req_id in to_remove:
+            del image_progress[req_id]
+@app.route('/api/txt2img', methods=['POST', 'OPTIONS'])
+def gen_img():
+    """Generate image from text prompt"""
+    if request.method == 'OPTIONS':
+        return jsonify({'status': 'ok'}), 200
+    # Check authorization
+    if not is_trusted_origin():
+        api_key = request.headers.get('x-api-key') or request.json.get('api_key', '')
+        if api_key not in get_whitelist():
+            return jsonify({'error': 'Unauthorized', 'message': 'Invalid API key'}), 401
+    if not pipe:
+        return jsonify({'error': 'Model not loaded', 'message': 'Stable Diffusion is not available'}), 503
+    data = request.get_json(force=True) or {}
+    prompt = data.get('prompt', 'a beautiful landscape')
+    steps = min(max(int(data.get('steps', 25)), 10), 50)  # Clamp between 10-50
+    request_id = data.get('request_id', f'img_{int(time.time())}_{hash(prompt) % 10000}')
+    # Clean up old progress entries
+    cleanup_old_progress()
+    # Initialize progress
+    update_progress(request_id, 0, 'Starting image generation...')
     try:
+        # Define progress callback
+        def progress_callback(step, timestep, latents):
+            progress = int((step / steps) * 100)
+            update_progress(request_id, progress, f'Step {step}/{steps}')
+        # Generate image
+        print(f'Generating image: "{prompt[:50]}..." ({steps} steps)')
+        with torch.no_grad():
+            image = pipe(
+                prompt,
+                num_inference_steps=steps,
+                guidance_scale=7.5,
+                callback=progress_callback,
+                callback_steps=1
+            ).images[0]
+        # Convert to bytes
+        img_io = io.BytesIO()
+        image.save(img_io, 'PNG', quality=95)
+        img_io.seek(0)
+        # Mark as complete
+        update_progress(request_id, 100, 'Complete!')
+        # Return image
+        return send_file(
+            img_io,
+            mimetype='image/png',
+            as_attachment=False,
+            download_name=f'generated_{int(time.time())}.png'
+        )
     except Exception as e:
+        print(f'Image generation error: {e}')
+        update_progress(request_id, 0, f'Error: {str(e)}')
+        return jsonify({
+            'error': 'Generation failed',
+            'message': str(e),
+            'request_id': request_id
+        }), 500
+@app.route('/api/img_progress/<request_id>', methods=['GET'])
+def get_progress(request_id):
+    """Get progress of image generation"""
+    cleanup_old_progress()
+    with progress_lock:
+        progress_data = image_progress.get(request_id, {
+            'progress': 0,
+            'status': 'Not found or expired',
+            'timestamp': time.time()
+        })
+    return jsonify(progress_data)
+@app.route('/api/health', methods=['GET'])
 def health():
+    """Health check endpoint"""
+    status = {
+        'status': 'online' if pipe else 'offline',
+        'model': 'stable-diffusion-v1-5',
+        'loaded': pipe is not None,
+        'trusted_domains': TRUSTED_DOMAINS,
+        'timestamp': time.time()
+    }
+    return jsonify(status)
+@app.route('/api/whitelist/add', methods=['POST'])
+def add_to_whitelist():
+    """Add API key to whitelist (admin only)"""
+    data = request.get_json() or {}
+    admin_key = data.get('admin_key', '')
+    new_key = data.get('key', '').strip()
+    # Simple admin check - in production use proper authentication
+    if admin_key != 'admin123':
+        return jsonify({'error': 'Invalid admin key'}), 403
+    if not new_key:
+        return jsonify({'error': 'No key provided'}), 400
     try:
+        with open(WL_PATH, 'a') as f:
+            f.write(f"{new_key}\n")
+        return jsonify({
+            'status': 'success',
+            'message': f'Key added to whitelist',
+            'total_keys': len(get_whitelist())
+        }), 200
     except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@app.route('/api/config', methods=['GET'])
+def get_config():
+    """Get server configuration"""
+    return jsonify({
+        'max_steps': 50,
+        'min_steps': 10,
+        'default_steps': 25,
+        'supported_sizes': ['512x512', '768x768'],
+        'model': 'stable-diffusion-v1-5'
+    })
+if __name__ == '__main__':
+    # Create whitelist file if it doesn't exist
+    if not os.path.exists(WL_PATH):
+        with open(WL_PATH, 'w') as f:
+            f.write(f"{UNLIMITED_KEY}\n")
+        print(f'Created whitelist file with default key: {UNLIMITED_KEY}')
+    print(f'Whitelisted keys: {get_whitelist()}')
+    print(f'Trusted domains: {TRUSTED_DOMAINS}')
+    print(f'Server starting on port 7860...')
+    app.run(
+        host='0.0.0.0',
+        port=7860,
+        debug=False,
+        threaded=True
+    )