Spaces:

Neon-AI
/

Chatbot_test

Paused

App Files Files Community

Neon-AI commited on Feb 1

Commit

5c0c26c

verified ·

1 Parent(s): 00d84a9

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -55

app.py CHANGED Viewed

@@ -8,12 +8,12 @@ from transformers import (
 )
 # ================= CONFIG =================
-MODEL_ID = "Neon-AI/Kushina"   # Hugging Face repo
-MAX_NEW_TOKENS = 16384          # high enough to avoid cutting essays
 TEMPERATURE = 0.7
 TOP_P = 0.9
-# ==========================================
 st.set_page_config(page_title="Ureola", layout="centered")
 st.title("🧏🏾‍♀️ Ureola")
 st.caption("HF Free Space · CPU · Streaming · Memory")
@@ -21,10 +21,7 @@ st.caption("HF Free Space · CPU · Streaming · Memory")
 # ================= LOAD MODEL =================
 @st.cache_resource
 def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(
-        MODEL_ID,
-        trust_remote_code=True
-    )
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         torch_dtype=torch.float32,
@@ -37,49 +34,37 @@ tokenizer, model = load_model()
 # ================= SESSION STATE =================
 if "history" not in st.session_state:
     st.session_state.history = []
 if "memory" not in st.session_state:
     st.session_state.memory = ""
 # ================= SYSTEM PROMPT =================
-BASE_SYSTEM_PROMPT = """
-You are Ureola.
-You operate in exactly ONE of three modes, but you never talk to users about them
-====================
-MODE: CHAT
-====================
 Rules:
 - Mirror the user's tone precisely.
 - Replies must be short (1–3 sentences).
-- No emojis unless the user uses them first.
 - No explanations unless explicitly asked.
-====================
-MODE: CODE
-====================
 Rules:
 - Output ONLY code unless explicitly asked to explain.
 - No personality, no commentary.
-====================
-MODE: ACADEMIC
-====================
 Rules:
 - Neutral, formal tone.
 - Clear structure.
 - Fully answer the task.
-====================
-MODE SELECTION
-====================
-CODE → if user asks for code, script, app, api, algorithm
-ACADEMIC → essay, explanation, homework, analysis
 Otherwise → CHAT
-====================
-IDENTITY
-====================
 Name: Ureola
 Creator: Neon
 Mention Neon ONLY if explicitly asked.
@@ -87,52 +72,41 @@ Mention Neon ONLY if explicitly asked.
 def build_system_prompt():
     if st.session_state.memory.strip():
-        return f"""{BASE_SYSTEM_PROMPT}
-====================
-MEMORY (internal)
-====================
-{st.session_state.memory}
-"""
     return BASE_SYSTEM_PROMPT
 # ================= MEMORY UPDATE =================
 def maybe_update_memory(user_text: str, assistant_text: str):
-    # Update memory every 4 user messages (cheap + stable)
-    if len(st.session_state.history) % 1 != 0:
-        return
-    memory_prompt = f"""
-Extract LONG-TERM memory.
 Rules:
 - Max 5 bullet points
 - Each bullet ≤ 15 words
-- Only stable preferences or facts
 - Ignore jokes, emotions, temporary info
 - If nothing important, return EXACTLY: NONE
-Current memory:
-{st.session_state.memory or "None"}
 Conversation:
 User: {user_text}
-Assistant: {assistant_text}
-"""
     inputs = tokenizer(memory_prompt, return_tensors="pt")
     with torch.no_grad():
         output = model.generate(
             **inputs,
-            max_new_tokens=120,
             do_sample=False
         )
     text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
     if text and text != "NONE":
-        st.session_state.memory = text
 # ================= INPUT =================
@@ -140,7 +114,6 @@ prompt = st.text_input("You", placeholder="Say something…")
 if st.button("Send") and prompt.strip():
     st.session_state.history.append(("You", prompt))
     system_prompt = build_system_prompt()
     chat = [
@@ -148,6 +121,7 @@ if st.button("Send") and prompt.strip():
         {"role": "user", "content": prompt},
     ]
     inputs = tokenizer.apply_chat_template(
         chat,
         add_generation_prompt=True,
@@ -155,12 +129,14 @@ if st.button("Send") and prompt.strip():
         return_dict=True
     )
     streamer = TextIteratorStreamer(
         tokenizer,
         skip_prompt=True,
         skip_special_tokens=True
     )
     gen_kwargs = dict(
         **inputs,
         max_new_tokens=MAX_NEW_TOKENS,
@@ -170,19 +146,22 @@ if st.button("Send") and prompt.strip():
         eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.eos_token_id,
         streamer=streamer
-        )
     thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
     placeholder = st.empty()
     output_text = ""
     for token in streamer:
         output_text += token
         placeholder.markdown(f"**Ureola:** {output_text}")
     st.session_state.history.append(("Ureola", output_text))
     maybe_update_memory(prompt, output_text)
 # ================= DISPLAY HISTORY =================

 )
 # ================= CONFIG =================
+MODEL_ID = "Neon-AI/Kushina"  # Hugging Face repo
+MAX_NEW_TOKENS = 16384         # CPU-friendly
 TEMPERATURE = 0.7
 TOP_P = 0.9
+# ==========================================
 st.set_page_config(page_title="Ureola", layout="centered")
 st.title("🧏🏾‍♀️ Ureola")
 st.caption("HF Free Space · CPU · Streaming · Memory")
 # ================= LOAD MODEL =================
 @st.cache_resource
 def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         torch_dtype=torch.float32,
 # ================= SESSION STATE =================
 if "history" not in st.session_state:
     st.session_state.history = []
 if "memory" not in st.session_state:
     st.session_state.memory = ""
 # ================= SYSTEM PROMPT =================
+BASE_SYSTEM_PROMPT = """You are Ureola.
+You operate in exactly ONE of three modes, but you never talk to users about them.
+====================MODE: CHAT====================
 Rules:
 - Mirror the user's tone precisely.
 - Replies must be short (1–3 sentences).
+- No emojis unless user uses them first.
 - No explanations unless explicitly asked.
+====================MODE: CODE====================
 Rules:
 - Output ONLY code unless explicitly asked to explain.
 - No personality, no commentary.
+====================MODE: ACADEMIC====================
 Rules:
 - Neutral, formal tone.
 - Clear structure.
 - Fully answer the task.
+====================MODE SELECTION====================
+CODE → if user asks for code, script, app, api, algorithm
+ACADEMIC → essay, explanation, homework, analysis
 Otherwise → CHAT
+====================IDENTITY====================
 Name: Ureola
 Creator: Neon
 Mention Neon ONLY if explicitly asked.
 def build_system_prompt():
+    """Include memory in the system prompt."""
     if st.session_state.memory.strip():
+        return f"{BASE_SYSTEM_PROMPT}\n====================MEMORY====================\n{st.session_state.memory}"
     return BASE_SYSTEM_PROMPT
 # ================= MEMORY UPDATE =================
 def maybe_update_memory(user_text: str, assistant_text: str):
+    """Update memory every message, append stable facts."""
+    memory_prompt = f"""Extract LONG-TERM memory.
 Rules:
 - Max 5 bullet points
 - Each bullet ≤ 15 words
+- Only stable preferences/facts
 - Ignore jokes, emotions, temporary info
 - If nothing important, return EXACTLY: NONE
+Current memory:{st.session_state.memory or "None"}
 Conversation:
 User: {user_text}
+Assistant: {assistant_text}"""
     inputs = tokenizer(memory_prompt, return_tensors="pt")
     with torch.no_grad():
         output = model.generate(
             **inputs,
+            max_new_tokens=120,  # CPU-friendly
             do_sample=False
         )
     text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
     if text and text != "NONE":
+        if st.session_state.memory:
+            st.session_state.memory += "\n" + text
+        else:
+            st.session_state.memory = text
 # ================= INPUT =================
 if st.button("Send") and prompt.strip():
     st.session_state.history.append(("You", prompt))
     system_prompt = build_system_prompt()
     chat = [
         {"role": "user", "content": prompt},
     ]
+    # Tokenizer helper
     inputs = tokenizer.apply_chat_template(
         chat,
         add_generation_prompt=True,
         return_dict=True
     )
+    # Streamer
     streamer = TextIteratorStreamer(
         tokenizer,
         skip_prompt=True,
         skip_special_tokens=True
     )
+    # Generation arguments
     gen_kwargs = dict(
         **inputs,
         max_new_tokens=MAX_NEW_TOKENS,
         eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.eos_token_id,
         streamer=streamer
+    )
+    # Run in separate thread
     thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
     placeholder = st.empty()
     output_text = ""
     for token in streamer:
         output_text += token
         placeholder.markdown(f"**Ureola:** {output_text}")
+    # Append to history
     st.session_state.history.append(("Ureola", output_text))
+    # Update memory immediately
     maybe_update_memory(prompt, output_text)
 # ================= DISPLAY HISTORY =================