ekjotsingh commited on
Commit
85346fb
Β·
verified Β·
1 Parent(s): 3154d4b

Made chat ui

Browse files
Files changed (1) hide show
  1. app.py +95 -40
app.py CHANGED
@@ -3,57 +3,45 @@ import sys
3
  import struct
4
  import traceback
5
  import gradio as gr
6
- from huggingface_hub import hf_hub_download, login
7
  from cryptography.hazmat.primitives.ciphers.aead import AESGCM
 
8
  from fastapi import FastAPI, Request
9
 
10
- # --- GLOBAL ERROR TRACKER ---
11
  DIAGNOSTIC_LOG = []
12
  def log_status(msg):
13
  print(msg)
14
  DIAGNOSTIC_LOG.append(msg)
15
 
16
- # --- 1. CRITICAL IMPORT WRAPPER ---
17
- Llama = None
18
- try:
19
- log_status("πŸ“‘ [IMPORT] Attempting to load llama_cpp...")
20
- from llama_cpp import Llama
21
- log_status("βœ… [IMPORT] llama_cpp library linked successfully.")
22
- except Exception as e:
23
- log_status(f"❌ [IMPORT ERROR] Library mismatch detected: {e}")
24
- log_status(f"DEBUG: System Path: {sys.path}")
25
- log_status(traceback.format_exc())
26
-
27
- # --- CONFIG ---
28
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
29
  SOURCE_FILE = "metanthropic-phi3-v1.mguf"
30
- TEMP_DECRYPTED = "/tmp/model_stable.gguf"
31
  HF_TOKEN = os.environ.get("HF_TOKEN")
32
  SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
33
 
34
- def robust_boot():
 
35
  try:
36
  if os.path.exists(TEMP_DECRYPTED):
37
- log_status("⚑ [CACHE] Decrypted model exists.")
38
  return True
39
 
40
- # Check Secrets
41
  if not HF_TOKEN or not SECRET_KEY_HEX:
42
- log_status("❌ [AUTH ERROR] Missing HF_TOKEN or DECRYPTION_KEY in Secrets.")
43
  return False
44
 
45
- # Login
46
- log_status("πŸ” [AUTH] Authenticating...")
47
  login(token=HF_TOKEN)
48
 
49
- # Download
50
  log_status(f"⬇️ [NETWORK] Fetching {SOURCE_FILE}...")
51
  path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
52
 
53
- # Decrypt
54
- log_status("πŸ”“ [SECURITY] Decrypting model...")
55
  key = bytes.fromhex(SECRET_KEY_HEX)
56
  aes = AESGCM(key)
 
57
  with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
58
  nonce = f_in.read(12)
59
  h_len = struct.unpack("<I", f_in.read(4))[0]
@@ -62,43 +50,110 @@ def robust_boot():
62
  f_out.write(chunk)
63
 
64
  os.remove(path)
65
- log_status("βœ… [SUCCESS] Model ready for engine.")
66
  return True
67
-
68
  except Exception as e:
69
- log_status(f"❌ [BOOT ERROR] {e}")
70
  log_status(traceback.format_exc())
71
  return False
72
 
73
- # --- ENGINE INITIALIZATION ---
74
  llm = None
75
- if Llama and robust_boot():
76
  try:
77
- log_status("🧠 [ENGINE] Initializing Llama...")
78
- llm = Llama(model_path=TEMP_DECRYPTED, n_ctx=2048, n_threads=2)
79
- log_status("πŸš€ [SYSTEM] Node Online.")
 
 
 
 
 
 
 
80
  except Exception as e:
81
- log_status(f"❌ [ENGINE ERROR] Failed to load model file: {e}")
82
- log_status(traceback.format_exc())
83
 
84
- # --- API & INTERFACE ---
85
  app = FastAPI()
86
 
87
  @app.post("/run_inference")
88
  async def run_inference(request: Request):
89
  if not llm:
90
- return {"error": "Model offline", "logs": DIAGNOSTIC_LOG}
 
91
  data = await request.json()
92
  prompt = data.get("prompt", "")
93
- output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
 
 
 
 
 
 
94
  return {"response": output['choices'][0]['text'].strip()}
95
 
 
96
  def ui_chat(msg, hist):
97
  if not llm:
98
- return f"🚨 SYSTEM ERROR\n\nLatest Logs:\n" + "\n".join(DIAGNOSTIC_LOG[-5:])
99
- return llm(f"<|user|>\n{msg}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- demo = gr.ChatInterface(ui_chat, title="Metanthropic Sovereign Node (Diagnostic Mode)")
102
  app = gr.mount_gradio_app(app, demo, path="/")
103
 
104
  if __name__ == "__main__":
 
3
  import struct
4
  import traceback
5
  import gradio as gr
6
+ from llama_cpp import Llama
7
  from cryptography.hazmat.primitives.ciphers.aead import AESGCM
8
+ from huggingface_hub import hf_hub_download, login
9
  from fastapi import FastAPI, Request
10
 
11
+ # --- GLOBAL DIAGNOSTICS & LOGGING ---
12
  DIAGNOSTIC_LOG = []
13
  def log_status(msg):
14
  print(msg)
15
  DIAGNOSTIC_LOG.append(msg)
16
 
17
+ # --- CONFIGURATION ---
 
 
 
 
 
 
 
 
 
 
 
18
  SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
19
  SOURCE_FILE = "metanthropic-phi3-v1.mguf"
20
+ TEMP_DECRYPTED = "/tmp/model_sovereign.gguf"
21
  HF_TOKEN = os.environ.get("HF_TOKEN")
22
  SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
23
 
24
+ # --- SOVEREIGN BOOTLOADER ---
25
+ def initialize_weights():
26
  try:
27
  if os.path.exists(TEMP_DECRYPTED):
28
+ log_status("⚑ [CACHE] Resuming from existing sovereign weights.")
29
  return True
30
 
 
31
  if not HF_TOKEN or not SECRET_KEY_HEX:
32
+ log_status("❌ [SECURITY] Credentials missing. Verify HF_TOKEN and DECRYPTION_KEY.")
33
  return False
34
 
35
+ log_status("πŸ” [AUTH] Establishing secure link to Hugging Face...")
 
36
  login(token=HF_TOKEN)
37
 
 
38
  log_status(f"⬇️ [NETWORK] Fetching {SOURCE_FILE}...")
39
  path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
40
 
41
+ log_status("πŸ”“ [DECRYPT] Unlocking GGUF weights...")
 
42
  key = bytes.fromhex(SECRET_KEY_HEX)
43
  aes = AESGCM(key)
44
+
45
  with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
46
  nonce = f_in.read(12)
47
  h_len = struct.unpack("<I", f_in.read(4))[0]
 
50
  f_out.write(chunk)
51
 
52
  os.remove(path)
53
+ log_status("βœ… [SYSTEM] Weight integrity verified.")
54
  return True
 
55
  except Exception as e:
56
+ log_status(f"❌ [CRITICAL] Boot failure: {str(e)}")
57
  log_status(traceback.format_exc())
58
  return False
59
 
60
+ # --- ENGINE INITIALIZATION (PERFORMANCE TUNED) ---
61
  llm = None
62
+ if initialize_weights():
63
  try:
64
+ log_status("🧠 [ENGINE] Initializing Neural Infrastructure...")
65
+ llm = Llama(
66
+ model_path=TEMP_DECRYPTED,
67
+ n_ctx=2048, # Context window optimized for Phi-3
68
+ n_threads=2, # Locked to 2-vCPU Free Tier limit for stability
69
+ n_batch=512, # High-speed prompt processing
70
+ use_mlock=True, # Pin model to RAM to eliminate disk latency
71
+ verbose=False
72
+ )
73
+ log_status("πŸš€ [SYSTEM] Sovereign Node Online.")
74
  except Exception as e:
75
+ log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
 
76
 
77
+ # --- API CORE (CONVEX BRIDGE) ---
78
  app = FastAPI()
79
 
80
  @app.post("/run_inference")
81
  async def run_inference(request: Request):
82
  if not llm:
83
+ return {"error": "System Offline", "logs": DIAGNOSTIC_LOG[-5:]}
84
+
85
  data = await request.json()
86
  prompt = data.get("prompt", "")
87
+
88
+ # API calls return the full string for database compatibility
89
+ output = llm(
90
+ f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
91
+ max_tokens=512,
92
+ stop=["<|end|>", "<|endoftext|>"]
93
+ )
94
  return {"response": output['choices'][0]['text'].strip()}
95
 
96
+ # --- PREMIUM UI LOGIC (STREAMING) ---
97
  def ui_chat(msg, hist):
98
  if not llm:
99
+ yield f"🚨 **SYSTEM ARCHITECTURE FAILURE**\n\nLatest Diagnostics:\n```\n" + "\n".join(DIAGNOSTIC_LOG[-3:]) + "\n```"
100
+ return
101
+
102
+ # Real-time token streaming for zero-latency perception
103
+ stream = llm(
104
+ f"<|user|>\n{msg}<|end|>\n<|assistant|>",
105
+ max_tokens=512,
106
+ stop=["<|end|>", "<|endoftext|>"],
107
+ stream=True
108
+ )
109
+
110
+ partial_text = ""
111
+ for chunk in stream:
112
+ delta = chunk['choices'][0]['delta']
113
+ if 'content' in delta:
114
+ partial_text += delta['content']
115
+ yield partial_text
116
+
117
+ # --- METANTHROPIC BRANDED INTERFACE ---
118
+ custom_css = """
119
+ footer {visibility: hidden}
120
+ .gradio-container {background-color: #050505 !important}
121
+ #title-container {text-align: center; margin-bottom: 30px}
122
+ #title-container h1 {color: #ffffff; font-family: 'Inter', sans-serif; font-weight: 800; letter-spacing: -1.5px}
123
+ .message.user {background-color: #1a1a1a !important; border: 1px solid #333 !important; border-radius: 12px !important}
124
+ .message.assistant {background-color: #0f0f0f !important; border: 1px solid #222 !important; border-radius: 12px !important}
125
+ """
126
+
127
+ demo = gr.ChatInterface(
128
+ ui_chat,
129
+ title="METANTHROPIC Β· PHI-3 SOVEREIGN",
130
+ description="""
131
+ <div id="title-container">
132
+ <p style="color: #a3a3a3; font-size: 1.1em; max-width: 600px; margin: 0 auto;">
133
+ Accessing <b>Node-01</b> of the Metanthropic Neural Infrastructure.
134
+ Secure inference via localized sovereign weights.
135
+ </p>
136
+ <div style="margin-top: 15px; display: flex; justify-content: center; gap: 20px;">
137
+ <span style="color: #22c55e; font-size: 0.85em; font-family: monospace;">● ENGINE: READY</span>
138
+ <span style="color: #3b82f6; font-size: 0.85em; font-family: monospace;">● ENCRYPTION: AES-GCM</span>
139
+ <span style="color: #a855f7; font-size: 0.85em; font-family: monospace;">● TYPE: STREAMING</span>
140
+ </div>
141
+ </div>
142
+ """,
143
+ theme=gr.themes.Soft(
144
+ primary_hue="slate",
145
+ neutral_hue="zinc",
146
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"],
147
+ ).set(
148
+ body_background_fill="#050505",
149
+ block_background_fill="#0a0a0a",
150
+ block_border_width="1px",
151
+ button_primary_background_fill="#ffffff",
152
+ button_primary_text_color="#000000",
153
+ ),
154
+ css=custom_css
155
+ )
156
 
 
157
  app = gr.mount_gradio_app(app, demo, path="/")
158
 
159
  if __name__ == "__main__":