Spaces:

366degrees
/

snp-universal-embedding

Running

App Files Files Community

366degrees commited on Nov 2, 2025

Commit

d2d2c63

verified ·

1 Parent(s): 3f56ea7

Update api_inference.py

Browse files

Files changed (1) hide show

api_inference.py +73 -49

api_inference.py CHANGED Viewed

@@ -11,7 +11,8 @@ from transformers import (
 )
 # ============================================================
-# Redirect Hugging Face cache to /app/hf_cache (always writable)
 CACHE_DIR = "/app/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.environ["HF_HOME"] = CACHE_DIR
@@ -23,33 +24,44 @@ PORT = int(os.environ.get("PORT", 7860))
 app = Flask(__name__)
 # ============================================================
-# Register Custom SNP Architecture
 # ============================================================
-class CustomSNPConfig(PretrainedConfig):
     model_type = "custom_snp"
 class CustomSNPModel(PreTrainedModel):
     config_class = CustomSNPConfig
     def __init__(self, config):
         super().__init__(config)
-        hidden_size = getattr(config, "hidden_size", 768)
-        # Mirror and Prism heads
-        self.encoder = nn.Linear(hidden_size, hidden_size)
         self.mirror_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
         self.prism_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
         self.projection = nn.Linear(hidden_size, 6)
-    def forward(self, input_ids=None, attention_mask=None, **kwargs):
-        # Simulate encoded representations
-        x = self.encoder(input_ids.float()) if input_ids is not None else None
-        x = self.mirror_head(x)
         x = self.prism_head(x)
-        return self.projection(x)
 # Register model so AutoModel recognizes it
 AutoConfig.register("custom_snp", CustomSNPConfig)
@@ -61,27 +73,23 @@ AutoModel.register(CustomSNPConfig, CustomSNPModel)
 # ============================================================
 try:
     print("Loading model from:", MODEL_DIR)
     config = AutoConfig.from_pretrained(MODEL_DIR, trust_remote_code=True)
-    # Try loading tokenizer; fallback if not mapped
-    from transformers import RobertaTokenizer
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
-    except Exception:
-        print("⚠️ Falling back to default RoBERTa tokenizer.")
-        tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
     model = AutoModel.from_pretrained(MODEL_DIR, config=config, trust_remote_code=True)
     model.eval()
     print("✅ Custom SNP model loaded successfully.")
 except Exception as e:
-    print("❌ Error loading custom model:", e)
     raise e
 # ============================================================
-# Flask API Routes
 # ============================================================
 @app.route("/", methods=["GET"])
 def home():
@@ -95,32 +103,49 @@ def health():
 @app.route("/embed", methods=["POST"])
 def embed():
-    data = request.get_json(force=True)
-    text = data.get("text", "")
-    if not text:
-        return jsonify({"error": "Text is required"}), 400
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        embeddings = model(**inputs)
-    if hasattr(embeddings, "last_hidden_state"):
-        embeddings = embeddings.last_hidden_state.mean(dim=1)
-    elif isinstance(embeddings, tuple):
-        embeddings = embeddings[0]
-    return jsonify({"embedding": embeddings.tolist()})
 @app.route("/reason", methods=["POST"])
 def reason():
-    data = request.get_json(force=True)
-    premise = data.get("premise", "")
-    hypothesis = data.get("hypothesis", "")
-    combined = f"{premise} {hypothesis}"
-    inputs = tokenizer(combined, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        output = model(**inputs)
-    score = float(output.mean().item())
-    return jsonify({"reasoning_score": score})
 # ============================================================
@@ -128,5 +153,4 @@ def reason():
 # ============================================================
 if __name__ == "__main__":
     print(f"🚀 Starting SNP Universal Embedding API on port {PORT}")
-    app.run(host="0.0.0.0", port=PORT)

 )
 # ============================================================
+# Cache and Port Configuration
+# ============================================================
 CACHE_DIR = "/app/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.environ["HF_HOME"] = CACHE_DIR
 app = Flask(__name__)
 # ============================================================
+# Register Custom SNP Architecture (THE FIX IS HERE)
 # ============================================================
+class CustomSNPConfig(AutoConfig):
+    # This will correctly inherit 'custom_snp' from your config.json
     model_type = "custom_snp"
 class CustomSNPModel(PreTrainedModel):
     config_class = CustomSNPConfig
     def __init__(self, config):
         super().__init__(config)
+        # This is the correct way to load the base transformer
+        self.shared_encoder = AutoModel.from_config(config)
+        hidden_size = self.shared_encoder.config.hidden_size
+        # Your custom heads
         self.mirror_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
         self.prism_head = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh())
         self.projection = nn.Linear(hidden_size, 6)
+    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, **kwargs):
+        # Pass inputs through the transformer
+        outputs = self.shared_encoder(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids
+        )
+        # Get the [CLS] token embedding
+        cls_embedding = outputs.last_hidden_state[:, 0, :]
+        # Pass through your custom heads
+        x = self.mirror_head(cls_embedding)
         x = self.prism_head(x)
+        proj = self.projection(x)
+        return proj # Return the final projection
 # Register model so AutoModel recognizes it
 AutoConfig.register("custom_snp", CustomSNPConfig)
 # ============================================================
 try:
     print("Loading model from:", MODEL_DIR)
+    # trust_remote_code=True is essential for this to work
     config = AutoConfig.from_pretrained(MODEL_DIR, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True)
     model = AutoModel.from_pretrained(MODEL_DIR, config=config, trust_remote_code=True)
     model.eval()
     print("✅ Custom SNP model loaded successfully.")
 except Exception as e:
+    print(f"❌ Error loading custom model: {e}")
+    # This will print the detailed error to your Space logs
     raise e
 # ============================================================
+# Flask API Routes (Your routes are correct)
 # ============================================================
 @app.route("/", methods=["GET"])
 def home():
 @app.route("/embed", methods=["POST"])
 def embed():
+    try:
+        data = request.get_json(force=True)
+        text = data.get("text", "")
+        if not text:
+            return jsonify({"error": "Text is required"}), 400
+        # Tokenize the text
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        # Run inference
+        with torch.no_grad():
+            embeddings = model(**inputs)
+        # The model's forward() method now directly returns the projection
+        return jsonify({"embedding": embeddings.tolist()})
+    except Exception as e:
+        print(f"ERROR in /embed: {e}")
+        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
 @app.route("/reason", methods=["POST"])
 def reason():
+    try:
+        data = request.get_json(force=True)
+        premise = data.get("premise", "")
+        hypothesis = data.get("hypothesis", "")
+        combined = f"{premise} {hypothesis}"
+        # Tokenize
+        inputs = tokenizer(combined, return_tensors="pt", truncation=True, padding=True)
+        # Run inference
+        with torch.no_grad():
+            output = model(**inputs)
+        # Calculate a score (e.g., mean of the projection)
+        score = float(output.mean().item())
+        return jsonify({"reasoning_score": score})
+    except Exception as e:
+        print(f"ERROR in /reason: {e}")
+        return jsonify({"error": "Internal Server Error", "message": str(e)}), 500
 # ============================================================
 # ============================================================
 if __name__ == "__main__":
     print(f"🚀 Starting SNP Universal Embedding API on port {PORT}")
+    app.run(host="0.0.0.0", port=PORT)