Spaces:

broadfield-dev
/

Equivariant-Encryption-Server

Paused

App Files Files Community

broadfield-dev commited on Feb 25

Commit

3383b9c

verified ·

1 Parent(s): 3fb89d8

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -29

app.py CHANGED Viewed

@@ -2,23 +2,21 @@ from flask import Flask, render_template, request, flash, jsonify
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from huggingface_hub import login
 import os, json
 app = Flask(__name__)
 app.secret_key = os.urandom(24)
-# Globals for running server mode
 ee_model = None
 ee_tokenizer = None
 ee_config = None
 loaded_model_name = None
-# Detect the HF Space URL automatically, fallback to localhost
 SPACE_HOST = os.environ.get("SPACE_HOST", "")
-if SPACE_HOST:
-    SPACE_URL = f"https://{SPACE_HOST}"
-else:
-    SPACE_URL = "http://localhost:7860"
 @app.route("/", methods=["GET", "POST"])
@@ -39,13 +37,12 @@ def index():
                     ee_model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
-                    trust_remote_code=True
                 )
                 ee_tokenizer = AutoTokenizer.from_pretrained(
                     ee_model_name, trust_remote_code=True
                 )
-                # Load EE config
                 from huggingface_hub import hf_hub_download
                 config_path = hf_hub_download(ee_model_name, "ee_config.json")
                 with open(config_path) as f:
@@ -70,27 +67,42 @@ def index():
 @app.route("/generate", methods=["POST"])
 def generate():
     if ee_model is None:
-        return jsonify({"error": "Server not started yet"}), 400
-    data = request.json
-    encrypted_embeds = torch.tensor(data["encrypted_embeds"]).to(ee_model.device)
-    attention_mask = torch.tensor(
-        data.get("attention_mask", [[1] * encrypted_embeds.shape[1]])
-    ).to(ee_model.device)
-    max_new = int(data.get("max_new_tokens", 256))
-    with torch.no_grad():
-        output_ids = ee_model.generate(
-            inputs_embeds=encrypted_embeds,
-            attention_mask=attention_mask,
-            max_new_tokens=max_new,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            pad_token_id=ee_tokenizer.eos_token_id,
-        )
-    return jsonify({"generated_ids": output_ids[0].tolist()})
 if __name__ == "__main__":

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from huggingface_hub import login
+import traceback
 import os, json
 app = Flask(__name__)
 app.secret_key = os.urandom(24)
+# Globals
 ee_model = None
 ee_tokenizer = None
 ee_config = None
 loaded_model_name = None
+# Detect HF Space URL automatically
 SPACE_HOST = os.environ.get("SPACE_HOST", "")
+SPACE_URL = f"https://{SPACE_HOST}" if SPACE_HOST else "http://localhost:7860"
 @app.route("/", methods=["GET", "POST"])
                     ee_model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
+                    trust_remote_code=True,
                 )
                 ee_tokenizer = AutoTokenizer.from_pretrained(
                     ee_model_name, trust_remote_code=True
                 )
                 from huggingface_hub import hf_hub_download
                 config_path = hf_hub_download(ee_model_name, "ee_config.json")
                 with open(config_path) as f:
 @app.route("/generate", methods=["POST"])
 def generate():
     if ee_model is None:
+        return jsonify({"error": "Server not started yet — load a model first"}), 400
+    try:
+        data = request.json
+        if data is None:
+            return jsonify({"error": "Request body must be JSON"}), 400
+        # Determine the model's actual dtype so we always match it
+        model_dtype = next(ee_model.parameters()).dtype
+        # Build tensors, cast to model dtype + move to device in one step
+        encrypted_embeds = torch.tensor(data["encrypted_embeds"]).to(
+            dtype=model_dtype, device=ee_model.device
+        )  # (1, seq_len, hidden)
+        attention_mask = torch.tensor(
+            data.get("attention_mask", [[1] * encrypted_embeds.shape[1]])
+        ).to(device=ee_model.device)  # stays int64, that's correct
+        max_new = int(data.get("max_new_tokens", 256))
+        with torch.no_grad():
+            output_ids = ee_model.generate(
+                inputs_embeds=encrypted_embeds,
+                attention_mask=attention_mask,
+                max_new_tokens=max_new,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                pad_token_id=ee_tokenizer.eos_token_id,
+            )
+        return jsonify({"generated_ids": output_ids[0].tolist()})
+    except Exception as e:
+        return jsonify({"error": str(e), "traceback": traceback.format_exc()}), 500
 if __name__ == "__main__":