Spaces:
Sleeping
Sleeping
File size: 4,001 Bytes
4b90eb9 e2d080d 4b90eb9 e2d080d 4b90eb9 85346fb e2d080d 85346fb ecd24a9 0822b66 85346fb e2d080d 85346fb 0acd62e f92aefd e2d080d 85346fb e2d080d f41cf72 e2d080d f92aefd e2d080d 85346fb e2d080d f92aefd e2d080d f41cf72 e2d080d f92aefd 4b90eb9 85346fb e2d080d f41cf72 85346fb fec01e0 f92aefd f41cf72 f92aefd 85346fb f41cf72 e2d080d f92aefd ecd24a9 f92aefd ecd24a9 f92aefd ecd24a9 fec01e0 f41cf72 fec01e0 ecd24a9 f41cf72 e2d080d f92aefd e2d080d f92aefd 85346fb f92aefd 85346fb f92aefd 85346fb f92aefd f41cf72 85346fb fe8f4fb f41cf72 85346fb ecd24a9 4b90eb9 ecd24a9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | import os
import sys
import struct
import traceback
import gradio as gr
from llama_cpp import Llama
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from huggingface_hub import hf_hub_download, login
from fastapi import FastAPI, Request
# --- GLOBAL DIAGNOSTICS & LOGGING ---
DIAGNOSTIC_LOG = []
def log_status(msg):
print(msg)
DIAGNOSTIC_LOG.append(msg)
# --- CONFIGURATION ---
SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
SOURCE_FILE = "metanthropic-phi3-v1.mguf"
TEMP_DECRYPTED = "/tmp/model_stable_v3.gguf"
HF_TOKEN = os.environ.get("HF_TOKEN")
SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")
# --- SOVEREIGN BOOTLOADER ---
def initialize_weights():
try:
if os.path.exists(TEMP_DECRYPTED):
return True
if not HF_TOKEN or not SECRET_KEY_HEX:
log_status("❌ [SECURITY] Credentials missing.")
return False
login(token=HF_TOKEN)
path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")
log_status("🔓 [DECRYPT] Unlocking GGUF weights...")
key = bytes.fromhex(SECRET_KEY_HEX)
aes = AESGCM(key)
with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
nonce = f_in.read(12)
h_len = struct.unpack("<I", f_in.read(4))[0]
f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
while chunk := f_in.read(64*1024*1024):
f_out.write(chunk)
os.remove(path)
log_status("✅ [SYSTEM] Weight integrity verified.")
return True
except Exception as e:
log_status(f"❌ [BOOT ERROR] {str(e)}")
return False
# --- ENGINE INITIALIZATION ---
llm = None
if initialize_weights():
try:
log_status("🧠 [ENGINE] Initializing Llama...")
llm = Llama(
model_path=TEMP_DECRYPTED,
n_ctx=2048,
n_threads=2, # Locked to 2-vCPU Free Tier limit
n_batch=512,
use_mlock=True, # Pin model to RAM
verbose=False
)
log_status("🚀 [SYSTEM] Node Online.")
except Exception as e:
log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")
# --- API CORE (CONVEX BRIDGE) ---
app = FastAPI()
@app.post("/run_inference")
async def run_inference(request: Request):
if not llm: return {"error": "System Offline"}
data = await request.json()
if data.get("secretKey") != SECRET_KEY_HEX:
return {"error": "Unauthorized Access"}
prompt = data.get("prompt", "")
output = llm(f"<|user|>\n{prompt}<|end|>\n<|assistant|>", max_tokens=512, stop=["<|end|>"])
return {"response": output['choices'][0]['text'].strip()}
# --- PREMIUM UI LOGIC (STREAMING FIX) ---
def ui_chat(msg, hist):
if not llm:
yield "🚨 SYSTEM OFFLINE. CHECK LOGS."
return
# Use the High-Level __call__ API with stream=True
stream_iterator = llm(
f"<|user|>\n{msg}<|end|>\n<|assistant|>",
max_tokens=512,
stop=["<|end|>", "<|endoftext|>"],
stream=True
)
partial_text = ""
try:
for chunk in stream_iterator:
# FIX: CompletionChunks store text in ['choices'][0]['text']
# There is NO 'delta' or 'content' in this API mode.
token = chunk['choices'][0].get('text', "")
if token:
partial_text += token
yield partial_text
except Exception as e:
yield f"⚠️ Stream Interrupted: {str(e)}"
# --- BRANDED INTERFACE ---
custom_css = "footer {visibility: hidden} .gradio-container {background-color: #050505 !important}"
demo = gr.ChatInterface(
ui_chat,
title="METANTHROPIC · PHI-3",
theme=gr.themes.Soft(primary_hue="slate", neutral_hue="zinc"),
css=custom_css
)
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |