Spaces:

Neon-AI
/

Chatbot_test

Paused

App Files Files Community

Neon-AI commited on Jan 29

Commit

2648494

verified ·

1 Parent(s): cf6ca1e

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -95

app.py CHANGED Viewed

@@ -1,60 +1,47 @@
 import streamlit as st
-import torch
-import threading
-from peft import PeftModel
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    TextIteratorStreamer
-)
-# ---------------- CONFIG ----------------
-MODEL_ID = "Neon-AI/Kushina"
-MAX_NEW_TOKENS = 16384
 TEMPERATURE = 0.7
 TOP_P = 0.9
-# ----------------------------------------
 st.set_page_config(page_title="Niche AI", layout="centered")
 st.title("🧠 Niche AI")
-st.caption("HF Free Space · CPU · Streaming")
 @st.cache_resource
-def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(
-        MODEL_ID,
-        trust_remote_code=True
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        dtype=torch.float32,
-        device_map="cpu"   # explicit
     )
-    # DO NOT wrap with PeftModel again
-    if hasattr(model, "peft_config"):
-        print("LoRA detected and loaded once ✅")
-    model.eval()
-    return tokenizer, model
-tokenizer, model = load_model()
-# -------- SESSION STATE --------
 if "history" not in st.session_state:
     st.session_state.history = []
-# -------- INPUT --------
 prompt = st.text_input("You", placeholder="Say something…")
-if st.button("Send") and prompt.strip():
-    st.session_state.history.append(("You", prompt))
-    system_instructions = """You are Kushina.
 You operate in exactly ONE of two modes.
 ====================
 MODE: CHAT
 ====================
@@ -64,11 +51,10 @@ Rules:
 - Neutral → neutral.
 - Serious → serious.
 - Rude → curt or dismissive.
-- Mirroring of emotions is very important and must be talen as priority
-- No enthusiasm by default.
-- No emojis unless the user uses them first.
 - Replies must be short (1–3 sentences).
 - No explanations unless explicitly asked.
 ====================
 MODE: CODE
 ====================
@@ -77,77 +63,60 @@ Rules:
 - No emojis.
 - No jokes.
 - No commentary.
-- No introductions.
 - Output ONLY code unless explicitly asked to explain.
-- Follow standard best practices.
-- Be deterministic and professional.
 - Finish the task completely.
 ====================
 MODE SELECTION
 ====================
-Automatically switch to MODE: CODE if the user requests:
-- code
-- script
-- function
-- program
-- website
-- API
-- algorithm
-- app
-Otherwise, use MODE: CHAT.
 ====================
 IDENTITY
 ====================
-- Name: Kushina
-- Creator/Owner: Neon
-- Mention Neon ONLY if explicitly asked."""
-    chat = [
-        {"role": "system", "content": system_instructions},
-        {"role": "user", "content": prompt}
-    ]
-    inputs = tokenizer.apply_chat_template(
-        chat,
-        add_generation_prompt=True,
-        return_tensors="pt",
-        return_dict=True
-    )
-    streamer = TextIteratorStreamer(
-        tokenizer,
-        skip_prompt=True,
-        skip_special_tokens=True
-    )
-    gen_kwargs = dict(
-        **inputs,
-        max_new_tokens=MAX_NEW_TOKENS,
-        do_sample=True,
-        temperature=TEMPERATURE,
-        top_p=TOP_P,
-        eos_token_id=tokenizer.eos_token_id,
-        pad_token_id=tokenizer.eos_token_id,
-        streamer=streamer
-    )
-    thread = threading.Thread(
-        target=model.generate,
-        kwargs=gen_kwargs
-    )
-    thread.start()
     placeholder = st.empty()
     output_text = ""
-    for token in streamer:
-        output_text += token
-        placeholder.markdown(f"**Niche:** {output_text}")
     st.session_state.history.append(("Niche", output_text))
-# -------- DISPLAY HISTORY --------
 for speaker, text in st.session_state.history:
     if speaker == "You":
         st.markdown(f"**You:** {text}")
     else:
-        st.markdown(f"**Niche:** {text}")

 import streamlit as st
+from llama_cpp import Llama
+# ================= CONFIG =================
+MODEL_PATH = "model.gguf"
+N_CTX = 16384
+N_THREADS = 4        # HF free CPU sweet spot
+N_BATCH = 256
+MAX_TOKENS = 16384
 TEMPERATURE = 0.7
 TOP_P = 0.9
+# ==========================================
 st.set_page_config(page_title="Niche AI", layout="centered")
 st.title("🧠 Niche AI")
+st.caption("llama.cpp · CPU · Embedded · Streaming")
 @st.cache_resource
+def load_llm():
+    return Llama(
+        model_path=MODEL_PATH,
+        n_ctx=N_CTX,
+        n_threads=N_THREADS,
+        n_batch=N_BATCH,
+        f16_kv=True,
+        use_mmap=True,
+        use_mlock=False,
+        verbose=False,
     )
+llm = load_llm()
+# ---------- SESSION STATE ----------
 if "history" not in st.session_state:
     st.session_state.history = []
+# ---------- INPUT ----------
 prompt = st.text_input("You", placeholder="Say something…")
+SYSTEM_PROMPT = """You are Kushina.
 You operate in exactly ONE of two modes.
 ====================
 MODE: CHAT
 ====================
 - Neutral → neutral.
 - Serious → serious.
 - Rude → curt or dismissive.
 - Replies must be short (1–3 sentences).
+- No emojis unless the user uses them first.
 - No explanations unless explicitly asked.
 ====================
 MODE: CODE
 ====================
 - No emojis.
 - No jokes.
 - No commentary.
 - Output ONLY code unless explicitly asked to explain.
+- Follow best practices.
 - Finish the task completely.
 ====================
 MODE SELECTION
 ====================
+Switch to MODE: CODE if the user asks for:
+code, script, function, program, website, api, algorithm, app
+Otherwise use MODE: CHAT.
 ====================
 IDENTITY
 ====================
+Name: Kushina
+Creator: Neon
+Mention Neon ONLY if explicitly asked.
+"""
+def build_prompt(user_text: str) -> str:
+    return f"""<|system|>
+{SYSTEM_PROMPT}
+<|user|>
+{user_text}
+<|assistant|>
+"""
+if st.button("Send") and prompt.strip():
+    st.session_state.history.append(("You", prompt))
+    full_prompt = build_prompt(prompt)
     placeholder = st.empty()
     output_text = ""
+    for chunk in llm(
+        full_prompt,
+        max_tokens=MAX_TOKENS,
+        temperature=TEMPERATURE,
+        top_p=TOP_P,
+        stream=True,
+        stop=["<|user|>", "<|system|>"],
+    ):
+        if "choices" in chunk:
+            token = chunk["choices"][0]["text"]
+            output_text += token
+            placeholder.markdown(f"**Niche:** {output_text}")
     st.session_state.history.append(("Niche", output_text))
+# ---------- DISPLAY HISTORY ----------
 for speaker, text in st.session_state.history:
     if speaker == "You":
         st.markdown(f"**You:** {text}")
     else:
+        st.markdown(f"**Niche:** {text}")