import os import uvicorn import struct import logging import asyncio import resource from fastapi import FastAPI, HTTPException, Security, Request, status from fastapi.security import APIKeyHeader from starlette.concurrency import run_in_threadpool from llama_cpp import Llama from huggingface_hub import hf_hub_download, login from cryptography.hazmat.primitives.ciphers.aead import AESGCM # --- ⚙️ LOGGING CONFIGURATION --- logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s", datefmt="%H:%M:%S" ) logger = logging.getLogger("MetanthropicNode") # --- 🧱 HARDWARE LIMITS (14GB CEILING) --- def set_memory_limit(): # We set a hard ceiling to prevent OOM kills. limit_bytes = 14 * 1024 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes)) logger.info(f"🧱 MEMORY HARD LIMIT SET: 14.0 GB") set_memory_limit() # --- 🔧 ENGINE CONFIGURATION --- SOURCE_REPO = "metanthropic/metanthropic-phi3-encrypted" SOURCE_FILE = "metanthropic-phi3-v1.mguf" TEMP_DECRYPTED = "/dev/shm/metanthropic_v3_16.gguf" # --- 🔑 SECURITY SECRETS --- HF_TOKEN = os.environ.get("HF_TOKEN") DECRYPTION_KEY = os.environ.get("DECRYPTION_KEY") API_KEY_NAME = "x-metanthropic-key" api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False) # --- 🚦 BATCH CONTROLLER (SEMAPHORE) --- # Allows 4 concurrent users. User #5 waits. MAX_CONCURRENT_USERS = 4 BATCH_SEMAPHORE = asyncio.Semaphore(MAX_CONCURRENT_USERS) app = FastAPI(title="Metanthropic Sovereign Node") # --- 🔒 SECURITY HANDSHAKE --- async def get_api_key(api_key: str = Security(api_key_header)): if not DECRYPTION_KEY: raise HTTPException(status_code=500, detail="Node Config Error") if api_key == DECRYPTION_KEY: return api_key raise HTTPException(status_code=403, detail="Access Denied") # --- 🛠️ BOOTLOADER (DECRYPTION) --- def initialize_engine(): if os.path.exists(TEMP_DECRYPTED): logger.info("⚡ RAM-Disk Hit: Model resident.") return True if not HF_TOKEN or not DECRYPTION_KEY: logger.error("❌ CRITICAL: Missing Secrets") return False try: login(token=HF_TOKEN) logger.info(f"⏳ Downloading & Decrypting...") path = hf_hub_download(repo_id=SOURCE_REPO, filename=SOURCE_FILE, local_dir=".") key = bytes.fromhex(DECRYPTION_KEY) aes = AESGCM(key) with open(path, "rb") as f_in, open(TEMP_DECRYPTED, "wb") as f_out: nonce = f_in.read(12) h_len = struct.unpack("\n{msg['content']}<|end|>\n" prompt += "<|assistant|>\n" except Exception as e: raise HTTPException(status_code=400, detail=f"Bad Request: {str(e)}") # ⚡ THE BATCH GATE ⚡ async with BATCH_SEMAPHORE: try: output = await run_in_threadpool( llm, prompt, max_tokens=max_tokens, temperature=temperature, stop=["<|end|>"], echo=False ) return { "id": output["id"], "object": "chat.completion", "created": output["created"], "model": "metanthropic-phi3-parallel", "choices": [ { "index": 0, "message": { "role": "assistant", "content": output['choices'][0]['text'].strip() }, "finish_reason": output['choices'][0]['finish_reason'] } ], "usage": output["usage"] } except Exception as e: logger.error(f"Inference Error: {e}") raise HTTPException(status_code=500, detail="Neural Core Error") if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")