Spaces:

metanthropic
/

metanthropic-node-phi3

Sleeping

ekjotsingh commited on 29 days ago

Commit

fec01e0

verified ·

1 Parent(s): 85346fb

Added security

Files changed (1) hide show

app.py CHANGED Viewed

@@ -64,17 +64,17 @@ if initialize_weights():
         log_status("🧠 [ENGINE] Initializing Neural Infrastructure...")
         llm = Llama(
             model_path=TEMP_DECRYPTED,
-            n_ctx=2048,      # Context window optimized for Phi-3
-            n_threads=2,     # Locked to 2-vCPU Free Tier limit for stability
-            n_batch=512,     # High-speed prompt processing
-            use_mlock=True,  # Pin model to RAM to eliminate disk latency
             verbose=False
         )
         log_status("🚀 [SYSTEM] Sovereign Node Online.")
     except Exception as e:
         log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
-# --- API CORE (CONVEX BRIDGE) ---
 app = FastAPI()
 @app.post("/run_inference")
@@ -83,9 +83,14 @@ async def run_inference(request: Request):
         return {"error": "System Offline", "logs": DIAGNOSTIC_LOG[-5:]}
     data = await request.json()
     prompt = data.get("prompt", "")
-    # API calls return the full string for database compatibility
     output = llm(
         f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
         max_tokens=512,
@@ -99,7 +104,6 @@ def ui_chat(msg, hist):
         yield f"🚨 **SYSTEM ARCHITECTURE FAILURE**\n\nLatest Diagnostics:\n```\n" + "\n".join(DIAGNOSTIC_LOG[-3:]) + "\n```"
         return
-    # Real-time token streaming for zero-latency perception
     stream = llm(
         f"<|user|>\n{msg}<|end|>\n<|assistant|>",
         max_tokens=512,

         log_status("🧠 [ENGINE] Initializing Neural Infrastructure...")
         llm = Llama(
             model_path=TEMP_DECRYPTED,
+            n_ctx=2048,
+            n_threads=2,     # Locked to HF Free Tier vCPU limit
+            n_batch=512,     # Optimized for prompt ingestion speed
+            use_mlock=True,  # Pin model to RAM
             verbose=False
         )
         log_status("🚀 [SYSTEM] Sovereign Node Online.")
     except Exception as e:
         log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
+# --- SECURED API CORE (CONVEX BRIDGE) ---
 app = FastAPI()
 @app.post("/run_inference")
         return {"error": "System Offline", "logs": DIAGNOSTIC_LOG[-5:]}
     data = await request.json()
+    # 🔐 SECURITY HANDSHAKE
+    # We use the SECRET_KEY_HEX as a shared secret for API authorization.
+    if data.get("secretKey") != SECRET_KEY_HEX:
+        return {"error": "Unauthorized API Access. Use the Gradio UI instead."}
     prompt = data.get("prompt", "")
     output = llm(
         f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
         max_tokens=512,
         yield f"🚨 **SYSTEM ARCHITECTURE FAILURE**\n\nLatest Diagnostics:\n```\n" + "\n".join(DIAGNOSTIC_LOG[-3:]) + "\n```"
         return
     stream = llm(
         f"<|user|>\n{msg}<|end|>\n<|assistant|>",
         max_tokens=512,