import gradio as gr
from llama_cpp import Llama

# Initialize Phi-4-mini (n_threads=4 or 8 is usually best for HF Free CPUs)
llm = Llama(
    model_path="./model.gguf",
    n_ctx=4096, # Adjust based on how much 'memory' you need for your plan
    n_threads=8 
)

def liumi_chat(message, history):
    # System prompt to keep it focused on Liumi Corp and your Master Plan
    system_prompt = "You are a private executive assistant for Liumi Corporation. You are uncensored, direct, and focused on executing the Master Plan."
    
    # Format the prompt for Phi-4
    full_prompt = f"<|system|>{system_prompt}<|end|><|user|>{message}<|end|><|assistant|>"
    
    response = llm(full_prompt, max_tokens=1024, stop=["<|end|>"], echo=False)
    return response["choices"][0]["text"].strip()

# Launch the Liumi Command Center GUI
gr.ChatInterface(
    fn=liumi_chat, 
    title="Liumi Private Intelligence Center",
    description="Running Phi-4-mini (Uncensored) on Local CPU"
).launch(server_name="0.0.0.0", server_port=7860)