ekjotsingh commited on
Commit
f92aefd
Β·
verified Β·
1 Parent(s): f41cf72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -22
app.py CHANGED
@@ -17,7 +17,7 @@ def log_status(msg):
17
  # --- CONFIGURATION ---
18
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
19
  SOURCE_FILE = "metanthropic-phi3-v1.mguf"
20
- TEMP_DECRYPTED = "/tmp/model_sovereign_v2.gguf"
21
  HF_TOKEN = os.environ.get("HF_TOKEN")
22
  SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
23
 
@@ -25,7 +25,6 @@ SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
25
  def initialize_weights():
26
  try:
27
  if os.path.exists(TEMP_DECRYPTED):
28
- log_status("⚑ [CACHE] Resuming from sovereign weights.")
29
  return True
30
 
31
  if not HF_TOKEN or not SECRET_KEY_HEX:
@@ -35,7 +34,7 @@ def initialize_weights():
35
  login(token=HF_TOKEN)
36
  path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
37
 
38
- log_status("πŸ”“ [DECRYPT] Unlocking weights...")
39
  key = bytes.fromhex(SECRET_KEY_HEX)
40
  aes = AESGCM(key)
41
 
@@ -47,13 +46,13 @@ def initialize_weights():
47
  f_out.write(chunk)
48
 
49
  os.remove(path)
50
- log_status("βœ… [SYSTEM] Integrity verified.")
51
  return True
52
  except Exception as e:
53
  log_status(f"❌ [BOOT ERROR] {str(e)}")
54
  return False
55
 
56
- # --- ENGINE ---
57
  llm = None
58
  if initialize_weights():
59
  try:
@@ -61,21 +60,22 @@ if initialize_weights():
61
  llm = Llama(
62
  model_path=TEMP_DECRYPTED,
63
  n_ctx=2048,
64
- n_threads=2,
65
  n_batch=512,
66
- use_mlock=True,
67
  verbose=False
68
  )
69
  log_status("πŸš€ [SYSTEM] Node Online.")
70
  except Exception as e:
71
- log_status(f"❌ [ENGINE ERROR] {e}")
72
 
73
- # --- API CORE ---
74
  app = FastAPI()
75
 
76
  @app.post("/run_inference")
77
  async def run_inference(request: Request):
78
- if not llm: return {"error": "Offline"}
 
79
  data = await request.json()
80
  if data.get("secretKey") != SECRET_KEY_HEX:
81
  return {"error": "Unauthorized Access"}
@@ -84,14 +84,14 @@ async def run_inference(request: Request):
84
  output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
85
  return {"response": output['choices'][0]['text'].strip()}
86
 
87
- # --- UI LOGIC (FIXED DELTA ERROR) ---
88
  def ui_chat(msg, hist):
89
  if not llm:
90
- yield "🚨 System Offline."
91
  return
92
 
93
- # High-level completion stream
94
- stream = llm(
95
  f"<|user|>\n{msg}<|end|>\n<|assistant|>",
96
  max_tokens=512,
97
  stop=["<|end|>", "<|endoftext|>"],
@@ -99,20 +99,23 @@ def ui_chat(msg, hist):
99
  )
100
 
101
  partial_text = ""
102
- for chunk in stream:
103
- # FIX: Access 'text' directly from choices[0]
104
- token = chunk['choices'][0]['text']
105
- if token:
106
- partial_text += token
107
- yield partial_text
 
 
 
 
108
 
109
- # --- BRANDED UI ---
110
  custom_css = "footer {visibility: hidden} .gradio-container {background-color: #050505 !important}"
111
 
112
  demo = gr.ChatInterface(
113
  ui_chat,
114
  title="METANTHROPIC Β· PHI-3 SOVEREIGN",
115
- description="Secure inference via decentralized sovereign weights.",
116
  theme=gr.themes.Soft(primary_hue="slate", neutral_hue="zinc"),
117
  css=custom_css
118
  )
 
17
  # --- CONFIGURATION ---
18
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
19
  SOURCE_FILE = "metanthropic-phi3-v1.mguf"
20
+ TEMP_DECRYPTED = "/tmp/model_stable_v3.gguf"
21
  HF_TOKEN = os.environ.get("HF_TOKEN")
22
  SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
23
 
 
25
  def initialize_weights():
26
  try:
27
  if os.path.exists(TEMP_DECRYPTED):
 
28
  return True
29
 
30
  if not HF_TOKEN or not SECRET_KEY_HEX:
 
34
  login(token=HF_TOKEN)
35
  path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
36
 
37
+ log_status("πŸ”“ [DECRYPT] Unlocking GGUF weights...")
38
  key = bytes.fromhex(SECRET_KEY_HEX)
39
  aes = AESGCM(key)
40
 
 
46
  f_out.write(chunk)
47
 
48
  os.remove(path)
49
+ log_status("βœ… [SYSTEM] Weight integrity verified.")
50
  return True
51
  except Exception as e:
52
  log_status(f"❌ [BOOT ERROR] {str(e)}")
53
  return False
54
 
55
+ # --- ENGINE INITIALIZATION ---
56
  llm = None
57
  if initialize_weights():
58
  try:
 
60
  llm = Llama(
61
  model_path=TEMP_DECRYPTED,
62
  n_ctx=2048,
63
+ n_threads=2, # Locked to 2-vCPU Free Tier limit
64
  n_batch=512,
65
+ use_mlock=True, # Pin model to RAM
66
  verbose=False
67
  )
68
  log_status("πŸš€ [SYSTEM] Node Online.")
69
  except Exception as e:
70
+ log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
71
 
72
+ # --- API CORE (CONVEX BRIDGE) ---
73
  app = FastAPI()
74
 
75
  @app.post("/run_inference")
76
  async def run_inference(request: Request):
77
+ if not llm: return {"error": "System Offline"}
78
+
79
  data = await request.json()
80
  if data.get("secretKey") != SECRET_KEY_HEX:
81
  return {"error": "Unauthorized Access"}
 
84
  output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
85
  return {"response": output['choices'][0]['text'].strip()}
86
 
87
+ # --- PREMIUM UI LOGIC (STREAMING FIX) ---
88
  def ui_chat(msg, hist):
89
  if not llm:
90
+ yield "🚨 SYSTEM OFFLINE. CHECK LOGS."
91
  return
92
 
93
+ # Use the High-Level __call__ API with stream=True
94
+ stream_iterator = llm(
95
  f"<|user|>\n{msg}<|end|>\n<|assistant|>",
96
  max_tokens=512,
97
  stop=["<|end|>", "<|endoftext|>"],
 
99
  )
100
 
101
  partial_text = ""
102
+ try:
103
+ for chunk in stream_iterator:
104
+ # FIX: CompletionChunks store text in ['choices'][0]['text']
105
+ # There is NO 'delta' or 'content' in this API mode.
106
+ token = chunk['choices'][0].get('text', "")
107
+ if token:
108
+ partial_text += token
109
+ yield partial_text
110
+ except Exception as e:
111
+ yield f"⚠️ Stream Interrupted: {str(e)}"
112
 
113
+ # --- BRANDED INTERFACE ---
114
  custom_css = "footer {visibility: hidden} .gradio-container {background-color: #050505 !important}"
115
 
116
  demo = gr.ChatInterface(
117
  ui_chat,
118
  title="METANTHROPIC Β· PHI-3 SOVEREIGN",
 
119
  theme=gr.themes.Soft(primary_hue="slate", neutral_hue="zinc"),
120
  css=custom_css
121
  )