ekjotsingh commited on
Commit
e2d080d
Β·
verified Β·
1 Parent(s): 0acd62e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -34
app.py CHANGED
@@ -1,55 +1,104 @@
1
  import os
 
2
  import struct
 
3
  import gradio as gr
4
- from llama_cpp import Llama
5
- from cryptography.hazmat.primitives.ciphers.aead import AESGCM
6
  from huggingface_hub import hf_hub_download, login
 
7
  from fastapi import FastAPI, Request
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # --- CONFIG ---
10
- HF_TOKEN = os.environ.get("HF_TOKEN")
11
- SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
12
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
13
  SOURCE_FILE = "metanthropic-phi3-v1.mguf"
14
- TEMP_DECRYPTED = "/tmp/model.gguf"
15
-
16
- print("πŸ”„ [BOOT] Metanthropic Node Initiating...")
17
-
18
- def boot_engine():
19
- if os.path.exists(TEMP_DECRYPTED): return
20
- if HF_TOKEN: login(token=HF_TOKEN)
21
-
22
- print(f"⬇️ Fetching {SOURCE_FILE}...")
23
- path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
24
-
25
- print("πŸ”“ Decrypting...")
26
- key = bytes.fromhex(SECRET_KEY_HEX)
27
- aes = AESGCM(key)
28
- with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
29
- nonce = f_in.read(12)
30
- h_len = struct.unpack("<I", f_in.read(4))[0]
31
- f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
32
- while chunk := f_in.read(64*1024*1024): f_out.write(chunk)
33
- os.remove(path)
34
- print("βœ… Engine Ready.")
35
-
36
- # --- LOAD ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  llm = None
38
- boot_engine()
39
- llm = Llama(model_path=TEMP_DECRYPTED, n_ctx=2048, n_threads=2)
 
 
 
 
 
 
40
 
41
- # --- API ---
42
  app = FastAPI()
43
 
44
  @app.post("/run_inference")
45
  async def run_inference(request: Request):
 
 
46
  data = await request.json()
47
  prompt = data.get("prompt", "")
48
- out = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
49
- return {"response": out['choices'][0]['text'].strip()}
 
 
 
 
 
50
 
51
- # UI
52
- demo = gr.ChatInterface(lambda msg, hist: llm(f"<|user|>\n{msg}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])['choices'][0]['text'].strip())
53
  app = gr.mount_gradio_app(app, demo, path="/")
54
 
55
  if __name__ == "__main__":
 
1
  import os
2
+ import sys
3
  import struct
4
+ import traceback
5
  import gradio as gr
 
 
6
  from huggingface_hub import hf_hub_download, login
7
+ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
8
  from fastapi import FastAPI, Request
9
 
10
+ # --- GLOBAL ERROR TRACKER ---
11
+ DIAGNOSTIC_LOG = []
12
+ def log_status(msg):
13
+ print(msg)
14
+ DIAGNOSTIC_LOG.append(msg)
15
+
16
+ # --- 1. CRITICAL IMPORT WRAPPER ---
17
+ Llama = None
18
+ try:
19
+ log_status("πŸ“‘ [IMPORT] Attempting to load llama_cpp...")
20
+ from llama_cpp import Llama
21
+ log_status("βœ… [IMPORT] llama_cpp library linked successfully.")
22
+ except Exception as e:
23
+ log_status(f"❌ [IMPORT ERROR] Library mismatch detected: {e}")
24
+ log_status(f"DEBUG: System Path: {sys.path}")
25
+ log_status(traceback.format_exc())
26
+
27
  # --- CONFIG ---
 
 
28
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
29
  SOURCE_FILE = "metanthropic-phi3-v1.mguf"
30
+ TEMP_DECRYPTED = "/tmp/model_stable.gguf"
31
+ HF_TOKEN = os.environ.get("HF_TOKEN")
32
+ SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
33
+
34
+ def robust_boot():
35
+ try:
36
+ if os.path.exists(TEMP_DECRYPTED):
37
+ log_status("⚑ [CACHE] Decrypted model exists.")
38
+ return True
39
+
40
+ # Check Secrets
41
+ if not HF_TOKEN or not SECRET_KEY_HEX:
42
+ log_status("❌ [AUTH ERROR] Missing HF_TOKEN or DECRYPTION_KEY in Secrets.")
43
+ return False
44
+
45
+ # Login
46
+ log_status("πŸ” [AUTH] Authenticating...")
47
+ login(token=HF_TOKEN)
48
+
49
+ # Download
50
+ log_status(f"⬇️ [NETWORK] Fetching {SOURCE_FILE}...")
51
+ path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
52
+
53
+ # Decrypt
54
+ log_status("πŸ”“ [SECURITY] Decrypting model...")
55
+ key = bytes.fromhex(SECRET_KEY_HEX)
56
+ aes = AESGCM(key)
57
+ with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
58
+ nonce = f_in.read(12)
59
+ h_len = struct.unpack("<I", f_in.read(4))[0]
60
+ f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
61
+ while chunk := f_in.read(64*1024*1024):
62
+ f_out.write(chunk)
63
+
64
+ os.remove(path)
65
+ log_status("βœ… [SUCCESS] Model ready for engine.")
66
+ return True
67
+
68
+ except Exception as e:
69
+ log_status(f"❌ [BOOT ERROR] {e}")
70
+ log_status(traceback.format_exc())
71
+ return False
72
+
73
+ # --- ENGINE INITIALIZATION ---
74
  llm = None
75
+ if Llama and robust_boot():
76
+ try:
77
+ log_status("🧠 [ENGINE] Initializing Llama...")
78
+ llm = Llama(model_path=TEMP_DECRYPTED, n_ctx=2048, n_threads=2)
79
+ log_status("πŸš€ [SYSTEM] Node Online.")
80
+ except Exception as e:
81
+ log_status(f"❌ [ENGINE ERROR] Failed to load model file: {e}")
82
+ log_status(traceback.format_exc())
83
 
84
+ # --- API & INTERFACE ---
85
  app = FastAPI()
86
 
87
  @app.post("/run_inference")
88
  async def run_inference(request: Request):
89
+ if not llm:
90
+ return {"error": "Model offline", "logs": DIAGNOSTIC_LOG}
91
  data = await request.json()
92
  prompt = data.get("prompt", "")
93
+ output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
94
+ return {"response": output['choices'][0]['text'].strip()}
95
+
96
+ def ui_chat(msg, hist):
97
+ if not llm:
98
+ return f"🚨 SYSTEM ERROR\n\nLatest Logs:\n" + "\n".join(DIAGNOSTIC_LOG[-5:])
99
+ return llm(f"<|user|>\n{msg}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])['choices'][0]['text'].strip()
100
 
101
+ demo = gr.ChatInterface(ui_chat, title="Metanthropic Sovereign Node (Diagnostic Mode)")
 
102
  app = gr.mount_gradio_app(app, demo, path="/")
103
 
104
  if __name__ == "__main__":