nova_ai / app.py
py-rus's picture
Update app.py
355584a verified
raw
history blame
3.68 kB
import streamlit as st
import os
from llama_cpp import Llama
# 1. PAGE SETUP
st.set_page_config(page_title="Nova AI", page_icon="✨", layout="wide")
# 2. SESSION STATE (Memory)
if "auth" not in st.session_state:
st.session_state.auth = False
if "messages" not in st.session_state:
st.session_state.messages = []
# 3. SECURE LOGIN
if not st.session_state.auth:
st.title("🔐 Nova Secure Entry")
with st.form("login"):
# Set your password in HF Secrets as NOVA_PASSWORD
SECRET_PASSWORD = os.getenv("NOVA_PASSWORD", "admin")
pwd = st.text_input("Access Key", type="password")
if st.form_submit_button("Unlock"):
if pwd == SECRET_PASSWORD:
st.session_state.auth = True
st.rerun()
else:
st.error("Invalid Key")
st.stop()
# 4. MODEL LOADING (Cached for Speed)
@st.cache_resource
def load_nova():
return Llama.from_pretrained(
repo_id="HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF",
filename="*q4_k_m.gguf",
n_ctx=2048,
n_threads=4,
verbose=False
)
llm = load_nova()
# 5. UI & AVATAR SETUP
st.title("✨ Nova Coding Assistant")
# Looks for 'coder.png'. If not found, uses the alien emoji.
USER_ICON = "coder.png" if os.path.exists("coder.png") else "👾"
# Display the chat history
for message in st.session_state.messages:
avatar_to_use = USER_ICON if message["role"] == "user" else "✨"
with st.chat_message(message["role"], avatar=avatar_to_use):
st.markdown(message["content"])
# 6. GHOST-FREE GENERATION ENGINE
if prompt := st.chat_input("Ask Nova..."):
# Add user message to UI and history
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user", avatar=USER_ICON):
st.markdown(prompt)
# Nova's Turn
with st.chat_message("assistant", avatar="✨"):
# A. UPGRADED THINKING UI: Minimalist and disappears perfectly
think_slot = st.empty()
think_slot.markdown("*( Nova is processing... )*")
# B. STERN SYSTEM PROMPT: No apologies, no "AI" talk, just code.
system_instr = """You are the Nova Kernel, an elite multi-language programming interface.
You ALWAYS provide full code for any language requested (Python, C++, JS, etc).
NEVER apologize. NEVER say 'I am an AI' or 'I am sorry'.
If asked for code, output the code immediately.
CRITICAL: Always wrap code in blocks like ```python or ```cpp."""
full_prompt = f"<|im_start|>system\n{system_instr}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
# C. Dedicated slot for the response to prevent ghosting
response_container = st.empty()
full_response = ""
# D. Streaming Loop
try:
for i, chunk in enumerate(llm(full_prompt, max_tokens=1024, stop=["<|im_end|>"], stream=True)):
# Wipe the "thinking" text the moment the first letter arrives
if i == 0:
think_slot.empty()
token = chunk["choices"][0]["text"]
full_response += token
# Show streaming progress with a cursor
response_container.markdown(full_response + "▌")
# Final clean render
response_container.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
except Exception as e:
think_slot.empty()
st.error(f"Kernel Error: {e}")