Spaces:

guydffdsdsfd
/

image

Build error

App Files Files Community

guydffdsdsfd commited on Jan 5

Commit

277eaa3

verified ·

1 Parent(s): 70a06fc

Create app.py

Browse files

Files changed (1) hide show

app.py +265 -0

app.py ADDED Viewed

	@@ -0,0 +1,265 @@

+from flask import Flask, request, jsonify, send_file
+from flask_cors import CORS
+from diffusers import DiffusionPipeline, LCMScheduler
+import torch
+import os
+import json
+import secrets
+from io import BytesIO
+import gc
+from datetime import datetime
+import traceback
+app = Flask(__name__)
+CORS(app)
+# Configuration
+BASE = "/home/sd"
+WL_PATH = f"{BASE}/whitelist.txt"
+USAGE_PATH = f"{BASE}/usage.json"
+LIMITS_PATH = f"{BASE}/limits.json"
+DEFAULT_LIMIT = 500
+# Use a fast, reliable model: LCM version for speed + quality
+# Alternatives: "segmind/SSD-1B" (smaller) or "stabilityai/sdxl-turbo" (fastest)
+MODEL_ID = "Lykon/dreamshaper-8-lcm"
+# Global pipeline with lazy loading
+pipe = None
+def init_pipeline():
+    """Initialize the pipeline with optimizations"""
+    global pipe
+    if pipe is not None:
+        return pipe
+    print(f"Loading model: {MODEL_ID}")
+    # Use half precision for speed and memory efficiency
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    try:
+        # Load pipeline with optimizations
+        pipe = DiffusionPipeline.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch_dtype,
+            variant="fp16" if torch_dtype == torch.float16 else None,
+            use_safetensors=True,
+            safety_checker=None,  # Disable for speed (optional)
+            requires_safety_checker=False
+        )
+        # Move to GPU if available
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        pipe = pipe.to(device)
+        # Enable optimizations
+        if device == "cuda":
+            pipe.enable_attention_slicing()  # Reduce memory usage
+            if torch_dtype == torch.float16:
+                pipe.enable_model_cpu_offload()  # Offload to CPU when not in use
+        print(f"Model loaded successfully on {device}")
+        return pipe
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        # Fallback to a simpler model
+        try:
+            pipe = DiffusionPipeline.from_pretrained(
+                "SimianLuo/LCM_Dreamshaper_v7",
+                torch_dtype=torch_dtype
+            ).to("cuda" if torch.cuda.is_available() else "cpu")
+            print("Loaded fallback model")
+            return pipe
+        except:
+            raise Exception("Failed to load any model")
+# Initialize storage
+os.makedirs(BASE, exist_ok=True)
+for path in [WL_PATH, USAGE_PATH, LIMITS_PATH]:
+    if not os.path.exists(path):
+        if path.endswith(".json"):
+            with open(path, "w") as f:
+                json.dump({}, f)
+        else:
+            with open(path, "w") as f:
+                f.write("")
+# Helper functions
+def get_whitelist():
+    try:
+        with open(WL_PATH, "r") as f:
+            return set(line.strip() for line in f if line.strip())
+    except:
+        return set()
+def load_json(path):
+    try:
+        with open(path, "r") as f:
+            return json.load(f)
+    except:
+        return {}
+def save_json(path, data):
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+def validate_api_key(key):
+    """Validate API key and check rate limits"""
+    if key not in get_whitelist():
+        return False, "Unauthorized"
+    limits = load_json(LIMITS_PATH)
+    usage = load_json(USAGE_PATH)
+    limit = limits.get(key, DEFAULT_LIMIT)
+    if limit == "unlimited":
+        return True, "OK"
+    month = datetime.now().strftime("%Y-%m")
+    used = usage.get(key, {}).get(month, 0)
+    if used >= limit:
+        return False, "Monthly limit reached"
+    return True, "OK"
+# Routes
+@app.route("/", methods=["GET"])
+def health():
+    return jsonify({
+        "status": "online",
+        "model": MODEL_ID,
+        "device": "cuda" if torch.cuda.is_available() else "cpu"
+    }), 200
+@app.route("/generate-key", methods=["POST"])
+def generate_key():
+    try:
+        data = request.get_json() or {}
+        unlimited = data.get("unlimited", False)
+        limit = data.get("limit", DEFAULT_LIMIT)
+        key = "sk-" + secrets.token_hex(16)
+        # Add to whitelist
+        with open(WL_PATH, "a") as f:
+            f.write(key + "\n")
+        # Set limits
+        limits = load_json(LIMITS_PATH)
+        limits[key] = "unlimited" if unlimited else int(limit)
+        save_json(LIMITS_PATH, limits)
+        # Initialize usage
+        usage = load_json(USAGE_PATH)
+        if key not in usage:
+            usage[key] = {}
+        save_json(USAGE_PATH, usage)
+        return jsonify({
+            "key": key,
+            "limit": limits[key],
+            "message": "Key generated successfully"
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/generate", methods=["POST"])
+def generate():
+    try:
+        # Validate API key
+        key = request.headers.get("x-api-key", "")
+        valid, message = validate_api_key(key)
+        if not valid:
+            return jsonify({"error": message}), 401 if message == "Unauthorized" else 429
+        # Parse request
+        data = request.get_json() or {}
+        prompt = data.get("prompt", "").strip()
+        if not prompt:
+            return jsonify({"error": "Prompt is required"}), 400
+        # Set generation parameters with safe defaults
+        steps = min(max(int(data.get("steps", 4)), 1), 20)  # LCM models work with 4-8 steps
+        guidance = float(data.get("guidance", 1.2))  # LCM uses low guidance
+        width = min(max(int(data.get("width", 512)), 256), 1024)
+        height = min(max(int(data.get("height", 512)), 256), 1024)
+        # Ensure pipeline is loaded
+        if pipe is None:
+            init_pipeline()
+        # Generate image
+        print(f"Generating: {prompt[:50]}... (steps: {steps}, guidance: {guidance})")
+        with torch.inference_mode():
+            image = pipe(
+                prompt=prompt,
+                num_inference_steps=steps,
+                guidance_scale=guidance,
+                width=width,
+                height=height,
+                output_type="pil"
+            ).images[0]
+        # Update usage
+        usage = load_json(USAGE_PATH)
+        month = datetime.now().strftime("%Y-%m")
+        usage.setdefault(key, {})
+        usage[key][month] = usage[key].get(month, 0) + 1
+        save_json(USAGE_PATH, usage)
+        # Return image
+        buf = BytesIO()
+        image.save(buf, format="PNG", optimize=True)
+        buf.seek(0)
+        return send_file(buf, mimetype="image/png")
+    except torch.cuda.OutOfMemoryError:
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return jsonify({"error": "GPU out of memory. Try smaller image size."}), 507
+    except Exception as e:
+        error_details = traceback.format_exc()
+        print(f"Generation error: {error_details}")
+        return jsonify({
+            "error": "Generation failed",
+            "details": str(e)
+        }), 500
+@app.route("/api/status", methods=["GET"])
+def status():
+    """Check API key status and usage"""
+    key = request.headers.get("x-api-key", "")
+    if key not in get_whitelist():
+        return jsonify({"error": "Invalid API key"}), 401
+    limits = load_json(LIMITS_PATH)
+    usage = load_json(USAGE_PATH)
+    month = datetime.now().strftime("%Y-%m")
+    used = usage.get(key, {}).get(month, 0)
+    limit = limits.get(key, DEFAULT_LIMIT)
+    return jsonify({
+        "key": key[:8] + "..." + key[-4:] if len(key) > 12 else key,
+        "usage": used,
+        "limit": limit,
+        "remaining": "unlimited" if limit == "unlimited" else max(0, limit - used),
+        "month": month
+    })
+if __name__ == "__main__":
+    # Initialize pipeline on startup
+    print("Initializing pipeline...")
+    init_pipeline()
+    print("API starting on port 7860...")
+    app.run(host="0.0.0.0", port=7860, debug=False)