Spaces:

metanthropic
/

metanthropic-node-phi3

Sleeping

App Files Files Community

metanthropic-node-phi3 / app.py

ekjotsingh

Update app.py

fe8f4fb verified 28 days ago

raw

history blame contribute delete

4 kB

	import os
	import sys
	import struct
	import traceback
	import gradio as gr
	from llama_cpp import Llama
	from cryptography.hazmat.primitives.ciphers.aead import AESGCM
	from huggingface_hub import hf_hub_download, login
	from fastapi import FastAPI, Request

	# --- GLOBAL DIAGNOSTICS & LOGGING ---
	DIAGNOSTIC_LOG = []
	def log_status(msg):
	print(msg)
	DIAGNOSTIC_LOG.append(msg)

	# --- CONFIGURATION ---
	SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted"
	SOURCE_FILE = "metanthropic-phi3-v1.mguf"
	TEMP_DECRYPTED = "/tmp/model_stable_v3.gguf"
	HF_TOKEN = os.environ.get("HF_TOKEN")
	SECRET_KEY_HEX = os.environ.get("DECRYPTION_KEY")

	# --- SOVEREIGN BOOTLOADER ---
	def initialize_weights():
	try:
	if os.path.exists(TEMP_DECRYPTED):
	return True

	if not HF_TOKEN or not SECRET_KEY_HEX:
	log_status("❌ [SECURITY] Credentials missing.")
	return False

	login(token=HF_TOKEN)
	path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".")

	log_status("🔓 [DECRYPT] Unlocking GGUF weights...")
	key = bytes.fromhex(SECRET_KEY_HEX)
	aes = AESGCM(key)

	with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out:
	nonce = f_in.read(12)
	h_len = struct.unpack("<I", f_in.read(4))[0]
	f_out.write(aes.decrypt(nonce, f_in.read(h_len), None))
	while chunk := f_in.read(6410241024):
	f_out.write(chunk)

	os.remove(path)
	log_status("✅ [SYSTEM] Weight integrity verified.")
	return True
	except Exception as e:
	log_status(f"❌ [BOOT ERROR] {str(e)}")
	return False

	# --- ENGINE INITIALIZATION ---
	llm = None
	if initialize_weights():
	try:
	log_status("🧠 [ENGINE] Initializing Llama...")
	llm = Llama(
	model_path=TEMP_DECRYPTED,
	n_ctx=2048,
	n_threads=2, # Locked to 2-vCPU Free Tier limit
	n_batch=512,
	use_mlock=True, # Pin model to RAM
	verbose=False
	)
	log_status("🚀 [SYSTEM] Node Online.")
	except Exception as e:
	log_status(f"❌ [ENGINE ERROR] Neural load failed: {e}")

	# --- API CORE (CONVEX BRIDGE) ---
	app = FastAPI()

	@app.post("/run_inference")
	async def run_inference(request: Request):
	if not llm: return {"error": "System Offline"}

	data = await request.json()
	if data.get("secretKey") != SECRET_KEY_HEX:
	return {"error": "Unauthorized Access"}

	prompt = data.get("prompt", "")
	output = llm(f"<\|user\|>\n{prompt}<\|end\|>\n<\|assistant\|>", max_tokens=512, stop=["<\|end\|>"])
	return {"response": output['choices'][0]['text'].strip()}

	# --- PREMIUM UI LOGIC (STREAMING FIX) ---
	def ui_chat(msg, hist):
	if not llm:
	yield "🚨 SYSTEM OFFLINE. CHECK LOGS."
	return

	# Use the High-Level __call__ API with stream=True
	stream_iterator = llm(
	f"<\|user\|>\n{msg}<\|end\|>\n<\|assistant\|>",
	max_tokens=512,
	stop=["<\|end\|>", "<\|endoftext\|>"],
	stream=True
	)

	partial_text = ""
	try:
	for chunk in stream_iterator:
	# FIX: CompletionChunks store text in ['choices'][0]['text']
	# There is NO 'delta' or 'content' in this API mode.
	token = chunk['choices'][0].get('text', "")
	if token:
	partial_text += token
	yield partial_text
	except Exception as e:
	yield f"⚠️ Stream Interrupted: {str(e)}"

	# --- BRANDED INTERFACE ---
	custom_css = "footer {visibility: hidden} .gradio-container {background-color: #050505 !important}"

	demo = gr.ChatInterface(
	ui_chat,
	title="METANTHROPIC · PHI-3",
	theme=gr.themes.Soft(primary_hue="slate", neutral_hue="zinc"),
	css=custom_css
	)

	app = gr.mount_gradio_app(app, demo, path="/")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)