Spaces:

Neon-AI
/

Chatbot_test

Paused

App Files Files Community

Neon-AI commited on Jan 29

Commit

ffaae69

verified ·

1 Parent(s): 587c960

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -51

app.py CHANGED Viewed

@@ -1,11 +1,17 @@
 import streamlit as st
 import torch
 import threading
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 # ---------------- CONFIG ----------------
 MODEL_ID = "Neon-AI/Kushina"
-MAX_MODEL_TOKENS = 16384  # full context size
 TEMPERATURE = 0.7
 TOP_P = 0.9
 # ----------------------------------------
@@ -14,31 +20,40 @@ st.set_page_config(page_title="Niche AI", layout="centered")
 st.title("🧠 Niche AI")
 st.caption("HF Free Space · CPU · Streaming")
-# ---------- LOAD MODEL ----------
 @st.cache_resource
 def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         dtype=torch.float32,
-        device_map="cpu"
     )
-    # Disable LoRA wrapping if present
     if hasattr(model, "peft_config"):
-        print("LoRA detected and already loaded ✅")
     model.eval()
     return tokenizer, model
 tokenizer, model = load_model()
-# ---------- SESSION STATE ----------
 if "history" not in st.session_state:
     st.session_state.history = []
-# ---------- INPUT ----------
 prompt = st.text_input("You", placeholder="Say something…")
-SYSTEM_INSTRUCTIONS = """You are Kushina.
 You operate in exactly ONE of two modes.
 ====================
 MODE: CHAT
@@ -49,7 +64,7 @@ Rules:
 - Neutral → neutral.
 - Serious → serious.
 - Rude → curt or dismissive.
-- Mirroring of emotions is very important and must be taken as priority.
 - No enthusiasm by default.
 - No emojis unless the user uses them first.
 - Replies must be short (1–3 sentences).
@@ -71,34 +86,42 @@ Rules:
 MODE SELECTION
 ====================
 Automatically switch to MODE: CODE if the user requests:
-- code, script, function, program, website, API, algorithm, app
 Otherwise, use MODE: CHAT.
 ====================
 IDENTITY
 ====================
 - Name: Kushina
 - Creator/Owner: Neon
-- Mention Neon ONLY if explicitly asked.
-"""
-def build_prompt(user_text: str):
     chat = [
-        {"role": "system", "content": SYSTEM_INSTRUCTIONS},
-        {"role": "user", "content": user_text}
     ]
-    return tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors="pt", return_dict=True)
-# ---------- GENERATE FUNCTION ----------
-def generate_response(inputs):
-    # Compute remaining tokens dynamically
-    current_tokens = inputs["input_ids"].shape[1]
-    max_new_tokens = max(1, MAX_MODEL_TOKENS - current_tokens)
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     gen_kwargs = dict(
         **inputs,
-        max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=TEMPERATURE,
         top_p=TOP_P,
@@ -107,40 +130,24 @@ def generate_response(inputs):
         streamer=streamer
     )
-    # Run generation in a separate thread
-    thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
-    # Stream tokens into a buffer and only display complete sentences
-    buffer = ""
-    output_text = ""
     placeholder = st.empty()
-    sentence_endings = {".", "!", "?"}
     for token in streamer:
-        buffer += token
-        if any(buffer.rstrip().endswith(punct) for punct in sentence_endings):
-            output_text += buffer
-            placeholder.markdown(f"**Niche:** {output_text}")
-            buffer = ""
-    # Add any leftover text
-    if buffer:
-        output_text += buffer
         placeholder.markdown(f"**Niche:** {output_text}")
-    return output_text
-# ---------- HANDLE PROMPT ----------
-if st.button("Send") and prompt.strip():
-    st.session_state.history.append(("You", prompt))
-    inputs = build_prompt(prompt)
-    response_text = generate_response(inputs)
-    st.session_state.history.append(("Niche", response_text))
-# ---------- DISPLAY HISTORY ----------
 for speaker, text in st.session_state.history:
     if speaker == "You":
         st.markdown(f"**You:** {text}")
     else:
-        st.markdown(f"**Niche:** {text}")

 import streamlit as st
 import torch
 import threading
+from peft import PeftModel
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TextIteratorStreamer
+)
 # ---------------- CONFIG ----------------
 MODEL_ID = "Neon-AI/Kushina"
+MAX_NEW_TOKENS = 16384
 TEMPERATURE = 0.7
 TOP_P = 0.9
 # ----------------------------------------
 st.title("🧠 Niche AI")
 st.caption("HF Free Space · CPU · Streaming")
 @st.cache_resource
 def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_ID,
+        trust_remote_code=True
+    )
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         dtype=torch.float32,
+        device_map="cpu"   # explicit
     )
+    # DO NOT wrap with PeftModel again
     if hasattr(model, "peft_config"):
+        print("LoRA detected and loaded once ✅")
     model.eval()
     return tokenizer, model
 tokenizer, model = load_model()
+# -------- SESSION STATE --------
 if "history" not in st.session_state:
     st.session_state.history = []
+# -------- INPUT --------
 prompt = st.text_input("You", placeholder="Say something…")
+if st.button("Send") and prompt.strip():
+    st.session_state.history.append(("You", prompt))
+    system_instructions = """You are Kushina.
 You operate in exactly ONE of two modes.
 ====================
 MODE: CHAT
 - Neutral → neutral.
 - Serious → serious.
 - Rude → curt or dismissive.
+- Mirroring of emotions is very important and must be talen as priority
 - No enthusiasm by default.
 - No emojis unless the user uses them first.
 - Replies must be short (1–3 sentences).
 MODE SELECTION
 ====================
 Automatically switch to MODE: CODE if the user requests:
+- code
+- script
+- function
+- program
+- website
+- API
+- algorithm
+- app
 Otherwise, use MODE: CHAT.
 ====================
 IDENTITY
 ====================
 - Name: Kushina
 - Creator/Owner: Neon
+- Mention Neon ONLY if explicitly asked."""
     chat = [
+        {"role": "system", "content": system_instructions},
+        {"role": "user", "content": prompt}
     ]
+    inputs = tokenizer.apply_chat_template(
+        chat,
+        add_generation_prompt=True,
+        return_tensors="pt",
+        return_dict=True
+    )
+    streamer = TextIteratorStreamer(
+        tokenizer,
+        skip_prompt=True,
+        skip_special_tokens=True
+    )
     gen_kwargs = dict(
         **inputs,
+        max_new_tokens=MAX_NEW_TOKENS,
         do_sample=True,
         temperature=TEMPERATURE,
         top_p=TOP_P,
         streamer=streamer
     )
+    thread = threading.Thread(
+        target=model.generate,
+        kwargs=gen_kwargs
+    )
     thread.start()
     placeholder = st.empty()
+    output_text = ""
     for token in streamer:
+        output_text += token
         placeholder.markdown(f"**Niche:** {output_text}")
+    st.session_state.history.append(("Niche", output_text))
+# -------- DISPLAY HISTORY --------
 for speaker, text in st.session_state.history:
     if speaker == "You":
         st.markdown(f"**You:** {text}")
     else:
+        st.markdown(f"**Niche:** {text}")