ekjotsingh commited on
Commit
fec01e0
·
verified ·
1 Parent(s): 85346fb

Added security

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -64,17 +64,17 @@ if initialize_weights():
64
  log_status("🧠 [ENGINE] Initializing Neural Infrastructure...")
65
  llm = Llama(
66
  model_path=TEMP_DECRYPTED,
67
- n_ctx=2048, # Context window optimized for Phi-3
68
- n_threads=2, # Locked to 2-vCPU Free Tier limit for stability
69
- n_batch=512, # High-speed prompt processing
70
- use_mlock=True, # Pin model to RAM to eliminate disk latency
71
  verbose=False
72
  )
73
  log_status("🚀 [SYSTEM] Sovereign Node Online.")
74
  except Exception as e:
75
  log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
76
 
77
- # --- API CORE (CONVEX BRIDGE) ---
78
  app = FastAPI()
79
 
80
  @app.post("/run_inference")
@@ -83,9 +83,14 @@ async def run_inference(request: Request):
83
  return {"error": "System Offline", "logs": DIAGNOSTIC_LOG[-5:]}
84
 
85
  data = await request.json()
 
 
 
 
 
 
86
  prompt = data.get("prompt", "")
87
 
88
- # API calls return the full string for database compatibility
89
  output = llm(
90
  f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
91
  max_tokens=512,
@@ -99,7 +104,6 @@ def ui_chat(msg, hist):
99
  yield f"🚨 **SYSTEM ARCHITECTURE FAILURE**\n\nLatest Diagnostics:\n```\n" + "\n".join(DIAGNOSTIC_LOG[-3:]) + "\n```"
100
  return
101
 
102
- # Real-time token streaming for zero-latency perception
103
  stream = llm(
104
  f"<|user|>\n{msg}<|end|>\n<|assistant|>",
105
  max_tokens=512,
 
64
  log_status("🧠 [ENGINE] Initializing Neural Infrastructure...")
65
  llm = Llama(
66
  model_path=TEMP_DECRYPTED,
67
+ n_ctx=2048,
68
+ n_threads=2, # Locked to HF Free Tier vCPU limit
69
+ n_batch=512, # Optimized for prompt ingestion speed
70
+ use_mlock=True, # Pin model to RAM
71
  verbose=False
72
  )
73
  log_status("🚀 [SYSTEM] Sovereign Node Online.")
74
  except Exception as e:
75
  log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
76
 
77
+ # --- SECURED API CORE (CONVEX BRIDGE) ---
78
  app = FastAPI()
79
 
80
  @app.post("/run_inference")
 
83
  return {"error": "System Offline", "logs": DIAGNOSTIC_LOG[-5:]}
84
 
85
  data = await request.json()
86
+
87
+ # 🔐 SECURITY HANDSHAKE
88
+ # We use the SECRET_KEY_HEX as a shared secret for API authorization.
89
+ if data.get("secretKey") != SECRET_KEY_HEX:
90
+ return {"error": "Unauthorized API Access. Use the Gradio UI instead."}
91
+
92
  prompt = data.get("prompt", "")
93
 
 
94
  output = llm(
95
  f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
96
  max_tokens=512,
 
104
  yield f"🚨 **SYSTEM ARCHITECTURE FAILURE**\n\nLatest Diagnostics:\n```\n" + "\n".join(DIAGNOSTIC_LOG[-3:]) + "\n```"
105
  return
106
 
 
107
  stream = llm(
108
  f"<|user|>\n{msg}<|end|>\n<|assistant|>",
109
  max_tokens=512,