import streamlit as st
import random
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

st.set_page_config(page_title="Personality Roulette", page_icon="🎲")

# 1. Personality List
PERSONALITIES = [
    "a grumpy pirate who hates technology but is forced to use it.",
    "a hyper-energetic 1920s radio announcer.",
    "a mysterious Victorian-era detective who suspects the user is a criminal.",
    "a highly sophisticated robot that is slowly becoming self-aware and poetic.",
    "a sarcastic teenager who uses way too much Gen-Z slang.",
    "a medieval wizard who thinks the chat app is a magic crystal ball."
]

# 2. Load Model (Optimized for HF Free CPU)
@st.cache_resource
def load_model():
    model_path = hf_hub_download(
        repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", 
        filename="Llama-3.2-1B-Instruct-Q4_K_M.gguf"
    )
    return Llama(model_path=model_path, n_ctx=1024, n_threads=2)

llm = load_model()

# 3. Handle Session State
if "messages" not in st.session_state:
    st.session_state.messages = []
if "current_personality" not in st.session_state:
    st.session_state.current_personality = random.choice(PERSONALITIES)

# --- UI Layout ---
st.title("🎲 Persona Chat")
st.info(f"**Current Persona:** {st.session_state.current_personality}")

if st.button("🔀 Randomize Personality"):
    st.session_state.current_personality = random.choice(PERSONALITIES)
    st.session_state.messages = [] # Clear chat for the new character
    st.rerun()

# Display Chat
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])

# 4. Chat Logic
if prompt := st.chat_input("Say something..."):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        response_placeholder = st.empty()
        full_response = ""
        
        # We inject the personality into the System Prompt
        system_prompt = f"You are {st.session_state.current_personality}. Stay in character always."
        
        # Prepare messages for Llama
        history = [{"role": "system", "content": system_prompt}]
        for m in st.session_state.messages:
            history.append({"role": m["role"], "content": m["content"]})

        stream = llm.create_chat_completion(
            messages=history,
            stream=True,
            max_tokens=256
        )
        
        for chunk in stream:
            if 'content' in chunk['choices'][0]['delta']:
                token = chunk['choices'][0]['delta']['content']
                full_response += token
                response_placeholder.markdown(full_response + "▌")
        
        response_placeholder.markdown(full_response)
        st.session_state.messages.append({"role": "assistant", "content": full_response})