Spaces:

metanthropic
/

metanthropic-node-phi3

Sleeping

App Files Files Community

ekjotsingh commited on Feb 8

Commit

85346fb

verified ·

1 Parent(s): 3154d4b

Made chat ui

Browse files

Files changed (1) hide show

app.py +95 -40

app.py CHANGED Viewed

@@ -3,57 +3,45 @@ import sys
 import struct
 import traceback
 import gradio as gr
-from huggingface_hub import hf_hub_download, login
 from cryptography.hazmat.primitives.ciphers.aead import AESGCM
 from fastapi import FastAPI, Request
-# --- GLOBAL ERROR TRACKER ---
 DIAGNOSTIC_LOG = []
 def log_status(msg):
     print(msg)
     DIAGNOSTIC_LOG.append(msg)
-# --- 1. CRITICAL IMPORT WRAPPER ---
-Llama = None
-try:
-    log_status("📡 [IMPORT] Attempting to load llama_cpp...")
-    from llama_cpp import Llama
-    log_status("✅ [IMPORT] llama_cpp library linked successfully.")
-except Exception as e:
-    log_status(f"❌ [IMPORT ERROR] Library mismatch detected: {e}")
-    log_status(f"DEBUG: System Path: {sys.path}")
-    log_status(traceback.format_exc())
-# --- CONFIG ---
 SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
 SOURCE_FILE = "metanthropic-phi3-v1.mguf"
-TEMP_DECRYPTED = "/tmp/model_stable.gguf"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
-def robust_boot():
     try:
         if os.path.exists(TEMP_DECRYPTED):
-            log_status("⚡ [CACHE] Decrypted model exists.")
             return True
-        # Check Secrets
         if not HF_TOKEN or not SECRET_KEY_HEX:
-            log_status("❌ [AUTH ERROR] Missing HF_TOKEN or DECRYPTION_KEY in Secrets.")
             return False
-        # Login
-        log_status("🔐 [AUTH] Authenticating...")
         login(token=HF_TOKEN)
-        # Download
         log_status(f"⬇️ [NETWORK] Fetching {SOURCE_FILE}...")
         path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
-        # Decrypt
-        log_status("🔓 [SECURITY] Decrypting model...")
         key = bytes.fromhex(SECRET_KEY_HEX)
         aes = AESGCM(key)
         with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
             nonce = f_in.read(12)
             h_len = struct.unpack("<I", f_in.read(4))[0]
@@ -62,43 +50,110 @@ def robust_boot():
                 f_out.write(chunk)
         os.remove(path)
-        log_status("✅ [SUCCESS] Model ready for engine.")
         return True
     except Exception as e:
-        log_status(f"❌ [BOOT ERROR] {e}")
         log_status(traceback.format_exc())
         return False
-# --- ENGINE INITIALIZATION ---
 llm = None
-if Llama and robust_boot():
     try:
-        log_status("🧠 [ENGINE] Initializing Llama...")
-        llm = Llama(model_path=TEMP_DECRYPTED, n_ctx=2048, n_threads=2)
-        log_status("🚀 [SYSTEM] Node Online.")
     except Exception as e:
-        log_status(f"❌ [ENGINE ERROR] Failed to load model file: {e}")
-        log_status(traceback.format_exc())
-# --- API & INTERFACE ---
 app = FastAPI()
 @app.post("/run_inference")
 async def run_inference(request: Request):
     if not llm:
-        return {"error": "Model offline", "logs": DIAGNOSTIC_LOG}
     data = await request.json()
     prompt = data.get("prompt", "")
-    output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
     return {"response": output['choices'][0]['text'].strip()}
 def ui_chat(msg, hist):
     if not llm:
-        return f"🚨 SYSTEM ERROR\n\nLatest Logs:\n" + "\n".join(DIAGNOSTIC_LOG[-5:])
-    return llm(f"<|user|>\n{msg}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])['choices'][0]['text'].strip()
-demo = gr.ChatInterface(ui_chat, title="Metanthropic Sovereign Node (Diagnostic Mode)")
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":

 import struct
 import traceback
 import gradio as gr
+from llama_cpp import Llama
 from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+from huggingface_hub import hf_hub_download, login
 from fastapi import FastAPI, Request
+# --- GLOBAL DIAGNOSTICS & LOGGING ---
 DIAGNOSTIC_LOG = []
 def log_status(msg):
     print(msg)
     DIAGNOSTIC_LOG.append(msg)
+# --- CONFIGURATION ---
 SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
 SOURCE_FILE = "metanthropic-phi3-v1.mguf"
+TEMP_DECRYPTED = "/tmp/model_sovereign.gguf"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
+# --- SOVEREIGN BOOTLOADER ---
+def initialize_weights():
     try:
         if os.path.exists(TEMP_DECRYPTED):
+            log_status("⚡ [CACHE] Resuming from existing sovereign weights.")
             return True
         if not HF_TOKEN or not SECRET_KEY_HEX:
+            log_status("❌ [SECURITY] Credentials missing. Verify HF_TOKEN and DECRYPTION_KEY.")
             return False
+        log_status("🔐 [AUTH] Establishing secure link to Hugging Face...")
         login(token=HF_TOKEN)
         log_status(f"⬇️ [NETWORK] Fetching {SOURCE_FILE}...")
         path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
+        log_status("🔓 [DECRYPT] Unlocking GGUF weights...")
         key = bytes.fromhex(SECRET_KEY_HEX)
         aes = AESGCM(key)
         with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
             nonce = f_in.read(12)
             h_len = struct.unpack("<I", f_in.read(4))[0]
                 f_out.write(chunk)
         os.remove(path)
+        log_status("✅ [SYSTEM] Weight integrity verified.")
         return True
     except Exception as e:
+        log_status(f"❌ [CRITICAL] Boot failure: {str(e)}")
         log_status(traceback.format_exc())
         return False
+# --- ENGINE INITIALIZATION (PERFORMANCE TUNED) ---
 llm = None
+if initialize_weights():
     try:
+        log_status("🧠 [ENGINE] Initializing Neural Infrastructure...")
+        llm = Llama(
+            model_path=TEMP_DECRYPTED,
+            n_ctx=2048,      # Context window optimized for Phi-3
+            n_threads=2,     # Locked to 2-vCPU Free Tier limit for stability
+            n_batch=512,     # High-speed prompt processing
+            use_mlock=True,  # Pin model to RAM to eliminate disk latency
+            verbose=False
+        )
+        log_status("🚀 [SYSTEM] Sovereign Node Online.")
     except Exception as e:
+        log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
+# --- API CORE (CONVEX BRIDGE) ---
 app = FastAPI()
 @app.post("/run_inference")
 async def run_inference(request: Request):
     if not llm:
+        return {"error": "System Offline", "logs": DIAGNOSTIC_LOG[-5:]}
     data = await request.json()
     prompt = data.get("prompt", "")
+    # API calls return the full string for database compatibility
+    output = llm(
+        f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
+        max_tokens=512,
+        stop=["<|end|>", "<|endoftext|>"]
+    )
     return {"response": output['choices'][0]['text'].strip()}
+# --- PREMIUM UI LOGIC (STREAMING) ---
 def ui_chat(msg, hist):
     if not llm:
+        yield f"🚨 **SYSTEM ARCHITECTURE FAILURE**\n\nLatest Diagnostics:\n```\n" + "\n".join(DIAGNOSTIC_LOG[-3:]) + "\n```"
+        return
+    # Real-time token streaming for zero-latency perception
+    stream = llm(
+        f"<|user|>\n{msg}<|end|>\n<|assistant|>",
+        max_tokens=512,
+        stop=["<|end|>", "<|endoftext|>"],
+        stream=True
+    )
+    partial_text = ""
+    for chunk in stream:
+        delta = chunk['choices'][0]['delta']
+        if 'content' in delta:
+            partial_text += delta['content']
+            yield partial_text
+# --- METANTHROPIC BRANDED INTERFACE ---
+custom_css = """
+footer {visibility: hidden}
+.gradio-container {background-color: #050505 !important}
+#title-container {text-align: center; margin-bottom: 30px}
+#title-container h1 {color: #ffffff; font-family: 'Inter', sans-serif; font-weight: 800; letter-spacing: -1.5px}
+.message.user {background-color: #1a1a1a !important; border: 1px solid #333 !important; border-radius: 12px !important}
+.message.assistant {background-color: #0f0f0f !important; border: 1px solid #222 !important; border-radius: 12px !important}
+"""
+demo = gr.ChatInterface(
+    ui_chat,
+    title="METANTHROPIC · PHI-3 SOVEREIGN",
+    description="""
+    <div id="title-container">
+        <p style="color: #a3a3a3; font-size: 1.1em; max-width: 600px; margin: 0 auto;">
+            Accessing <b>Node-01</b> of the Metanthropic Neural Infrastructure.
+            Secure inference via localized sovereign weights.
+        </p>
+        <div style="margin-top: 15px; display: flex; justify-content: center; gap: 20px;">
+            <span style="color: #22c55e; font-size: 0.85em; font-family: monospace;">● ENGINE: READY</span>
+            <span style="color: #3b82f6; font-size: 0.85em; font-family: monospace;">● ENCRYPTION: AES-GCM</span>
+            <span style="color: #a855f7; font-size: 0.85em; font-family: monospace;">● TYPE: STREAMING</span>
+        </div>
+    </div>
+    """,
+    theme=gr.themes.Soft(
+        primary_hue="slate",
+        neutral_hue="zinc",
+        font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"],
+    ).set(
+        body_background_fill="#050505",
+        block_background_fill="#0a0a0a",
+        block_border_width="1px",
+        button_primary_background_fill="#ffffff",
+        button_primary_text_color="#000000",
+    ),
+    css=custom_css
+)
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":