Spaces:

Neon-AI
/

Chatbot_test

Paused

App Files Files Community

Neon-AI commited on Jan 29

Commit

9ea48ba

verified ·

1 Parent(s): 9869736

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -94

app.py CHANGED Viewed

@@ -1,149 +1,158 @@
 import streamlit as st
 import torch
 import threading
-from peft import PeftModel
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
-    TextIteratorStreamer
 )
-# ---------------- CONFIG ----------------
-MODEL_ID = "Neon-AI/Kushina"
-MAX_NEW_TOKENS = 16384
 TEMPERATURE = 0.7
 TOP_P = 0.9
-# ----------------------------------------
-st.set_page_config(page_title="Niche AI", layout="centered")
-st.title("🧠 Niche AI")
-st.caption("HF Free Space · CPU · Streaming")
 @st.cache_resource
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained(
         MODEL_ID,
         trust_remote_code=True
     )
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
-        dtype=torch.float32,
-        device_map="cpu"   # explicit
     )
-    # DO NOT wrap with PeftModel again
-    if hasattr(model, "peft_config"):
-        print("LoRA detected and loaded once ✅")
     model.eval()
     return tokenizer, model
 tokenizer, model = load_model()
-# -------- SESSION STATE --------
 if "history" not in st.session_state:
     st.session_state.history = []
-# -------- INPUT --------
-prompt = st.text_input("You", placeholder="Say something…")
-if st.button("Send") and prompt.strip():
-    st.session_state.history.append(("You", prompt))
-    system_instructions = """You are Ureola.
 You operate in exactly ONE of three modes, but you never talk to users about them
 ====================
 MODE: CHAT
 ====================
 Rules:
 - Mirror the user's tone precisely.
-- Playful → playful.
-- Neutral → neutral.
-- Serious → serious.
-- Rude → curt or dismissive.
-- Mirroring of emotions is very important and must be talen as priority
-- No enthusiasm by default.
-- No emojis unless the user uses them first.
 - Replies must be short (1–3 sentences).
 - No explanations unless explicitly asked.
 ====================
 MODE: CODE
 ====================
 Rules:
-- No personality.
-- No emojis.
-- No jokes.
-- No commentary.
-- No introductions.
 - Output ONLY code unless explicitly asked to explain.
-- Follow standard best practices.
-- Be deterministic and professional.
-- Finish the task completely.
 ====================
 MODE: ACADEMIC
 ====================
 Rules:
 - Neutral, formal tone.
 - Clear structure.
-- No personality.
-- No emojis.
-- No jokes.
-- No roleplay.
-- Be precise and well-organized.
 - Fully answer the task.
-- Use examples only if helpful.
-- Prioritize correctness and clarity over brevity.
 ====================
 MODE SELECTION
 ====================
-Automatically switch to MODE: CODE if the user requests:
-- code
-- script
-- function
-- program
-- website
-- API
-- algorithm
-- app
-Automatically switch to MODE: ACADEMIC if the user requests:
-- essay
-- quiz
-- comprehension
-- summary
-- analysis
-- literature
-- grammar
-- English
-- assignment
-- homework
-- explanation (academic or educational)
-- questions and answers (academic)
-Otherwise, use MODE: CHAT.
 ====================
-MODE PRIORITY
 ====================
-1. MODE: CODE
-2. MODE: ACADEMIC
-3. MODE: CHAT
 ====================
-IDENTITY
 ====================
-- Name: Ureola
-- Creator/Owner: Neon
-- Mention Neon ONLY if explicitly asked."""
     chat = [
-        {"role": "system", "content": system_instructions},
-        {"role": "user", "content": prompt}
     ]
     inputs = tokenizer.apply_chat_template(
         chat,
         add_generation_prompt=True,
-        return_tensors="pt",
-        return_dict=True
     )
     streamer = TextIteratorStreamer(
@@ -153,20 +162,17 @@ IDENTITY
     )
     gen_kwargs = dict(
-        **inputs,
         max_new_tokens=MAX_NEW_TOKENS,
-        do_sample=True,
         temperature=TEMPERATURE,
         top_p=TOP_P,
         eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.eos_token_id,
-        streamer=streamer
     )
-    thread = threading.Thread(
-        target=model.generate,
-        kwargs=gen_kwargs
-    )
     thread.start()
     placeholder = st.empty()
@@ -174,13 +180,14 @@ IDENTITY
     for token in streamer:
         output_text += token
-        placeholder.markdown(f"**Niche:** {output_text}")
-    st.session_state.history.append(("Niche", output_text))
-# -------- DISPLAY HISTORY --------
 for speaker, text in st.session_state.history:
     if speaker == "You":
         st.markdown(f"**You:** {text}")
     else:
-        st.markdown(f"**Niche:** {text}")

 import streamlit as st
 import torch
 import threading
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
+    TextIteratorStreamer,
 )
+# ================= CONFIG =================
+MODEL_ID = "Neon-AI/Kushina"   # your HF repo
+MAX_NEW_TOKENS = 1024          # generation cap (safe for CPU)
 TEMPERATURE = 0.7
 TOP_P = 0.9
+# ==========================================
+st.set_page_config(page_title="Ureola", layout="centered")
+st.title("🧠 Ureola")
+st.caption("HF Free Space · CPU · Streaming · Memory")
+# ================= LOAD MODEL =================
 @st.cache_resource
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained(
         MODEL_ID,
         trust_remote_code=True
     )
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
+        torch_dtype=torch.float32,
+        device_map="cpu"
     )
     model.eval()
     return tokenizer, model
 tokenizer, model = load_model()
+# ================= SESSION STATE =================
 if "history" not in st.session_state:
     st.session_state.history = []
+if "memory" not in st.session_state:
+    st.session_state.memory = ""
+# ================= SYSTEM PROMPT =================
+BASE_SYSTEM_PROMPT = """
+You are Ureola.
 You operate in exactly ONE of three modes, but you never talk to users about them
 ====================
 MODE: CHAT
 ====================
 Rules:
 - Mirror the user's tone precisely.
 - Replies must be short (1–3 sentences).
+- No emojis unless the user uses them first.
 - No explanations unless explicitly asked.
 ====================
 MODE: CODE
 ====================
 Rules:
 - Output ONLY code unless explicitly asked to explain.
+- No personality, no commentary.
 ====================
 MODE: ACADEMIC
 ====================
 Rules:
 - Neutral, formal tone.
 - Clear structure.
 - Fully answer the task.
 ====================
 MODE SELECTION
 ====================
+CODE → if user asks for code, script, app, api, algorithm
+ACADEMIC → essay, explanation, homework, analysis
+Otherwise → CHAT
 ====================
+IDENTITY
 ====================
+Name: Ureola
+Creator: Neon
+Mention Neon ONLY if explicitly asked.
+""".strip()
+def build_system_prompt():
+    if st.session_state.memory.strip():
+        return f"""{BASE_SYSTEM_PROMPT}
 ====================
+MEMORY (internal)
 ====================
+{st.session_state.memory}
+"""
+    return BASE_SYSTEM_PROMPT
+# ================= MEMORY UPDATE =================
+def maybe_update_memory(user_text: str, assistant_text: str):
+    # Update memory every 4 user messages (cheap + stable)
+    if len(st.session_state.history) % 4 != 0:
+        return
+    memory_prompt = f"""
+Extract LONG-TERM memory.
+Rules:
+- Max 5 bullet points
+- Each bullet ≤ 15 words
+- Only stable preferences or facts
+- Ignore jokes, emotions, temporary info
+- If nothing important, return EXACTLY: NONE
+Current memory:
+{st.session_state.memory or "None"}
+Conversation:
+User: {user_text}
+Assistant: {assistant_text}
+"""
+    inputs = tokenizer(memory_prompt, return_tensors="pt")
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=120,
+            do_sample=False
+        )
+    text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
+    if text and text != "NONE":
+        st.session_state.memory = text
+# ================= INPUT =================
+prompt = st.text_input("You", placeholder="Say something…")
+if st.button("Send") and prompt.strip():
+    st.session_state.history.append(("You", prompt))
+    system_prompt = build_system_prompt()
     chat = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
     ]
     inputs = tokenizer.apply_chat_template(
         chat,
         add_generation_prompt=True,
+        return_tensors="pt"
     )
     streamer = TextIteratorStreamer(
     )
     gen_kwargs = dict(
+        input_ids=inputs,
         max_new_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
         top_p=TOP_P,
+        do_sample=True,
+        streamer=streamer,
         eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.eos_token_id,
     )
+    thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
     placeholder = st.empty()
     for token in streamer:
         output_text += token
+        placeholder.markdown(f"**Ureola:** {output_text}")
+    st.session_state.history.append(("Ureola", output_text))
+    maybe_update_memory(prompt, output_text)
+# ================= DISPLAY HISTORY =================
 for speaker, text in st.session_state.history:
     if speaker == "You":
         st.markdown(f"**You:** {text}")
     else:
+        st.markdown(f"**Ureola:** {text}")