import streamlit as st
import os
from llama_cpp import Llama

# 1. PAGE SETUP
st.set_page_config(page_title="Nova AI", page_icon="✨", layout="wide")

# 2. SESSION STATE (Memory)
if "auth" not in st.session_state:
    st.session_state.auth = False
if "messages" not in st.session_state:
    st.session_state.messages = []

# 3. SECURE LOGIN
if not st.session_state.auth:
    st.title("🔐 Nova Secure Entry")
    with st.form("login"):
        # Set your password in HF Secrets as NOVA_PASSWORD
        SECRET_PASSWORD = os.getenv("NOVA_PASSWORD", "admin")
        pwd = st.text_input("Access Key", type="password")
        if st.form_submit_button("Unlock"):
            if pwd == SECRET_PASSWORD:
                st.session_state.auth = True
                st.rerun()
            else:
                st.error("Invalid Key")
    st.stop()

# 4. MODEL LOADING (Cached for Speed)
@st.cache_resource
def load_nova():
    return Llama.from_pretrained(
        repo_id="HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF",
        filename="*q4_k_m.gguf",
        n_ctx=2048,
        n_threads=4,
        verbose=False
    )

llm = load_nova()

# 5. UI & AVATAR SETUP
st.title("✨ Nova Coding Assistant")

# Looks for 'coder.png'. If not found, uses the alien emoji.
USER_ICON = "coder.png" if os.path.exists("coder.png") else "👾"

# Display the chat history
for message in st.session_state.messages:
    avatar_to_use = USER_ICON if message["role"] == "user" else "✨"
    with st.chat_message(message["role"], avatar=avatar_to_use):
        st.markdown(message["content"])

# 6. GHOST-FREE GENERATION ENGINE
if prompt := st.chat_input("Ask Nova..."):
    # Add user message to UI and history
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user", avatar=USER_ICON):
        st.markdown(prompt)

    # Nova's Turn
    with st.chat_message("assistant", avatar="✨"):
        # A. UPGRADED THINKING UI: Minimalist and disappears perfectly
        think_slot = st.empty()
        think_slot.markdown("*( Nova is processing... )*")
        
        # B. STERN SYSTEM PROMPT: No apologies, no "AI" talk, just code.
        system_instr = """You are the Nova Kernel, an elite multi-language programming interface.
        You ALWAYS provide full code for any language requested (Python, C++, JS, etc).
        NEVER apologize. NEVER say 'I am an AI' or 'I am sorry'. 
        If asked for code, output the code immediately. 
        CRITICAL: Always wrap code in blocks like ```python or ```cpp."""
        
        full_prompt = f"<|im_start|>system\n{system_instr}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
        
        # C. Dedicated slot for the response to prevent ghosting
        response_container = st.empty()
        full_response = ""
        
        # D. Streaming Loop
        try:
            for i, chunk in enumerate(llm(full_prompt, max_tokens=1024, stop=["<|im_end|>"], stream=True)):
                # Wipe the "thinking" text the moment the first letter arrives
                if i == 0:
                    think_slot.empty()
                    
                token = chunk["choices"][0]["text"]
                full_response += token
                # Show streaming progress with a cursor
                response_container.markdown(full_response + "▌")
            
            # Final clean render
            response_container.markdown(full_response)
            st.session_state.messages.append({"role": "assistant", "content": full_response})
            
        except Exception as e:
            think_slot.empty()
            st.error(f"Kernel Error: {e}")