import streamlit as st import os from llama_cpp import Llama # 1. PAGE SETUP st.set_page_config(page_title="Nova AI", page_icon="✨", layout="wide") # 2. SESSION STATE (Memory) if "auth" not in st.session_state: st.session_state.auth = False if "messages" not in st.session_state: st.session_state.messages = [] # 3. SECURE LOGIN if not st.session_state.auth: st.title("🔐 Nova Secure Entry") with st.form("login"): # Set your password in HF Secrets as NOVA_PASSWORD SECRET_PASSWORD = os.getenv("NOVA_PASSWORD", "admin") pwd = st.text_input("Access Key", type="password") if st.form_submit_button("Unlock"): if pwd == SECRET_PASSWORD: st.session_state.auth = True st.rerun() else: st.error("Invalid Key") st.stop() # 4. MODEL LOADING (Cached for Speed) @st.cache_resource def load_nova(): return Llama.from_pretrained( repo_id="HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF", filename="*q4_k_m.gguf", n_ctx=2048, n_threads=4, verbose=False ) llm = load_nova() # 5. UI & AVATAR SETUP st.title("✨ Nova Coding Assistant") # Looks for 'coder.png'. If not found, uses the alien emoji. USER_ICON = "coder.png" if os.path.exists("coder.png") else "👾" # Display the chat history for message in st.session_state.messages: avatar_to_use = USER_ICON if message["role"] == "user" else "✨" with st.chat_message(message["role"], avatar=avatar_to_use): st.markdown(message["content"]) # 6. GHOST-FREE GENERATION ENGINE if prompt := st.chat_input("Ask Nova..."): # Add user message to UI and history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user", avatar=USER_ICON): st.markdown(prompt) # Nova's Turn with st.chat_message("assistant", avatar="✨"): # A. UPGRADED THINKING UI: Minimalist and disappears perfectly think_slot = st.empty() think_slot.markdown("*( Nova is processing... )*") # B. STERN SYSTEM PROMPT: No apologies, no "AI" talk, just code. system_instr = """You are the Nova Kernel, an elite multi-language programming interface. You ALWAYS provide full code for any language requested (Python, C++, JS, etc). NEVER apologize. NEVER say 'I am an AI' or 'I am sorry'. If asked for code, output the code immediately. CRITICAL: Always wrap code in blocks like ```python or ```cpp.""" full_prompt = f"<|im_start|>system\n{system_instr}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" # C. Dedicated slot for the response to prevent ghosting response_container = st.empty() full_response = "" # D. Streaming Loop try: for i, chunk in enumerate(llm(full_prompt, max_tokens=1024, stop=["<|im_end|>"], stream=True)): # Wipe the "thinking" text the moment the first letter arrives if i == 0: think_slot.empty() token = chunk["choices"][0]["text"] full_response += token # Show streaming progress with a cursor response_container.markdown(full_response + "▌") # Final clean render response_container.markdown(full_response) st.session_state.messages.append({"role": "assistant", "content": full_response}) except Exception as e: think_slot.empty() st.error(f"Kernel Error: {e}")