Spaces:

broadfield-dev
/

Equivariant-Encryption-Server

Paused

App Files Files Community

broadfield-dev commited on Feb 25

Commit

ba9a967

verified ·

1 Parent(s): 71e6b73

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -15

app.py CHANGED Viewed

@@ -1,21 +1,30 @@
 from flask import Flask, render_template, request, flash, jsonify
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from huggingface_hub import login, whoami, HfApi
-import numpy as np
 import os, json
 app = Flask(__name__)
 app.secret_key = os.urandom(24)
-# Global for running server mode
 ee_model = None
 ee_tokenizer = None
 ee_config = None
 @app.route("/", methods=["GET", "POST"])
 def index():
-    global ee_model, ee_tokenizer, ee_config
     if request.method == "POST":
         action = request.form.get("action")
@@ -25,40 +34,49 @@ def index():
             try:
                 login(token=hf_token)
-                global ee_model, ee_tokenizer, ee_config
                 ee_model = AutoModelForCausalLM.from_pretrained(
                     ee_model_name,
                     torch_dtype=torch.float16,
-                    #load_in_4bit=True,
                     device_map="auto",
                     trust_remote_code=True
                 )
-                ee_tokenizer = AutoTokenizer.from_pretrained(ee_model_name, trust_remote_code=True)
-                # Load config
                 from huggingface_hub import hf_hub_download
                 config_path = hf_hub_download(ee_model_name, "ee_config.json")
                 with open(config_path) as f:
                     ee_config = json.load(f)
-                flash(f"✅ Server ready! Model loaded: {ee_model_name}", "success")
-                flash("Now use the Client Space and point it to this Space's URL", "info")
             except Exception as e:
                 flash(f"Error: {str(e)}", "danger")
-    return render_template("index.html")
-# === INFERENCE ENDPOINT (always available when model is loaded) ===
 @app.route("/generate", methods=["POST"])
 def generate():
     if ee_model is None:
         return jsonify({"error": "Server not started yet"}), 400
     data = request.json
-    encrypted_embeds = torch.tensor(data["encrypted_embeds"]).to(ee_model.device)  # (1, seq, hidden)
-    attention_mask = torch.tensor(data.get("attention_mask", [[1]*encrypted_embeds.shape[1]])).to(ee_model.device)
     max_new = int(data.get("max_new_tokens", 256))
     with torch.no_grad():
@@ -69,10 +87,11 @@ def generate():
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
-            pad_token_id=ee_tokenizer.eos_token_id
         )
     return jsonify({"generated_ids": output_ids[0].tolist()})
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 from flask import Flask, render_template, request, flash, jsonify
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import login
 import os, json
 app = Flask(__name__)
 app.secret_key = os.urandom(24)
+# Globals for running server mode
 ee_model = None
 ee_tokenizer = None
 ee_config = None
+loaded_model_name = None
+# Detect the HF Space URL automatically, fallback to localhost
+SPACE_HOST = os.environ.get("SPACE_HOST", "")
+if SPACE_HOST:
+    SPACE_URL = f"https://{SPACE_HOST}"
+else:
+    SPACE_URL = "http://localhost:7860"
 @app.route("/", methods=["GET", "POST"])
 def index():
+    global ee_model, ee_tokenizer, ee_config, loaded_model_name
     if request.method == "POST":
         action = request.form.get("action")
             try:
                 login(token=hf_token)
                 ee_model = AutoModelForCausalLM.from_pretrained(
                     ee_model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
                     trust_remote_code=True
                 )
+                ee_tokenizer = AutoTokenizer.from_pretrained(
+                    ee_model_name, trust_remote_code=True
+                )
+                # Load EE config
                 from huggingface_hub import hf_hub_download
                 config_path = hf_hub_download(ee_model_name, "ee_config.json")
                 with open(config_path) as f:
                     ee_config = json.load(f)
+                loaded_model_name = ee_model_name
+                flash(f"Model loaded successfully: {ee_model_name}", "success")
+                flash("Point your Client Space to this Space's URL below.", "info")
             except Exception as e:
                 flash(f"Error: {str(e)}", "danger")
+    return render_template(
+        "index.html",
+        server_ready=(ee_model is not None),
+        model_name=loaded_model_name,
+        space_url=SPACE_URL,
+    )
+# === INFERENCE ENDPOINT ===
 @app.route("/generate", methods=["POST"])
 def generate():
     if ee_model is None:
         return jsonify({"error": "Server not started yet"}), 400
     data = request.json
+    encrypted_embeds = torch.tensor(data["encrypted_embeds"]).to(ee_model.device)
+    attention_mask = torch.tensor(
+        data.get("attention_mask", [[1] * encrypted_embeds.shape[1]])
+    ).to(ee_model.device)
     max_new = int(data.get("max_new_tokens", 256))
     with torch.no_grad():
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
+            pad_token_id=ee_tokenizer.eos_token_id,
         )
     return jsonify({"generated_ids": output_ids[0].tolist()})
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)