import gradio as gr from llama_cpp import Llama # Initialize Phi-4-mini (n_threads=4 or 8 is usually best for HF Free CPUs) llm = Llama( model_path="./model.gguf", n_ctx=4096, # Adjust based on how much 'memory' you need for your plan n_threads=8 ) def liumi_chat(message, history): # System prompt to keep it focused on Liumi Corp and your Master Plan system_prompt = "You are a private executive assistant for Liumi Corporation. You are uncensored, direct, and focused on executing the Master Plan." # Format the prompt for Phi-4 full_prompt = f"<|system|>{system_prompt}<|end|><|user|>{message}<|end|><|assistant|>" response = llm(full_prompt, max_tokens=1024, stop=["<|end|>"], echo=False) return response["choices"][0]["text"].strip() # Launch the Liumi Command Center GUI gr.ChatInterface( fn=liumi_chat, title="Liumi Private Intelligence Center", description="Running Phi-4-mini (Uncensored) on Local CPU" ).launch(server_name="0.0.0.0", server_port=7860)