Neon-AI commited on
Commit
2648494
·
verified ·
1 Parent(s): cf6ca1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -95
app.py CHANGED
@@ -1,60 +1,47 @@
1
  import streamlit as st
2
- import torch
3
- import threading
4
-
5
- from peft import PeftModel
6
- from transformers import (
7
- AutoModelForCausalLM,
8
- AutoTokenizer,
9
- TextIteratorStreamer
10
- )
11
-
12
- # ---------------- CONFIG ----------------
13
- MODEL_ID = "Neon-AI/Kushina"
14
- MAX_NEW_TOKENS = 16384
15
  TEMPERATURE = 0.7
16
  TOP_P = 0.9
17
- # ----------------------------------------
18
 
19
  st.set_page_config(page_title="Niche AI", layout="centered")
20
  st.title("🧠 Niche AI")
21
- st.caption("HF Free Space · CPU · Streaming")
22
 
23
  @st.cache_resource
24
- def load_model():
25
- tokenizer = AutoTokenizer.from_pretrained(
26
- MODEL_ID,
27
- trust_remote_code=True
28
- )
29
-
30
- model = AutoModelForCausalLM.from_pretrained(
31
- MODEL_ID,
32
- dtype=torch.float32,
33
- device_map="cpu" # explicit
34
  )
35
 
36
- # DO NOT wrap with PeftModel again
37
- if hasattr(model, "peft_config"):
38
- print("LoRA detected and loaded once ✅")
39
-
40
- model.eval()
41
- return tokenizer, model
42
-
43
-
44
- tokenizer, model = load_model()
45
 
46
- # -------- SESSION STATE --------
47
  if "history" not in st.session_state:
48
  st.session_state.history = []
49
 
50
- # -------- INPUT --------
51
  prompt = st.text_input("You", placeholder="Say something…")
52
 
53
- if st.button("Send") and prompt.strip():
54
- st.session_state.history.append(("You", prompt))
55
 
56
- system_instructions = """You are Kushina.
57
  You operate in exactly ONE of two modes.
 
58
  ====================
59
  MODE: CHAT
60
  ====================
@@ -64,11 +51,10 @@ Rules:
64
  - Neutral → neutral.
65
  - Serious → serious.
66
  - Rude → curt or dismissive.
67
- - Mirroring of emotions is very important and must be talen as priority
68
- - No enthusiasm by default.
69
- - No emojis unless the user uses them first.
70
  - Replies must be short (1–3 sentences).
 
71
  - No explanations unless explicitly asked.
 
72
  ====================
73
  MODE: CODE
74
  ====================
@@ -77,77 +63,60 @@ Rules:
77
  - No emojis.
78
  - No jokes.
79
  - No commentary.
80
- - No introductions.
81
  - Output ONLY code unless explicitly asked to explain.
82
- - Follow standard best practices.
83
- - Be deterministic and professional.
84
  - Finish the task completely.
 
85
  ====================
86
  MODE SELECTION
87
  ====================
88
- Automatically switch to MODE: CODE if the user requests:
89
- - code
90
- - script
91
- - function
92
- - program
93
- - website
94
- - API
95
- - algorithm
96
- - app
97
- Otherwise, use MODE: CHAT.
98
  ====================
99
  IDENTITY
100
  ====================
101
- - Name: Kushina
102
- - Creator/Owner: Neon
103
- - Mention Neon ONLY if explicitly asked."""
104
- chat = [
105
- {"role": "system", "content": system_instructions},
106
- {"role": "user", "content": prompt}
107
- ]
108
-
109
- inputs = tokenizer.apply_chat_template(
110
- chat,
111
- add_generation_prompt=True,
112
- return_tensors="pt",
113
- return_dict=True
114
- )
115
 
116
- streamer = TextIteratorStreamer(
117
- tokenizer,
118
- skip_prompt=True,
119
- skip_special_tokens=True
120
- )
121
-
122
- gen_kwargs = dict(
123
- **inputs,
124
- max_new_tokens=MAX_NEW_TOKENS,
125
- do_sample=True,
126
- temperature=TEMPERATURE,
127
- top_p=TOP_P,
128
- eos_token_id=tokenizer.eos_token_id,
129
- pad_token_id=tokenizer.eos_token_id,
130
- streamer=streamer
131
- )
132
 
133
- thread = threading.Thread(
134
- target=model.generate,
135
- kwargs=gen_kwargs
136
- )
137
- thread.start()
138
 
139
  placeholder = st.empty()
140
  output_text = ""
141
 
142
- for token in streamer:
143
- output_text += token
144
- placeholder.markdown(f"**Niche:** {output_text}")
 
 
 
 
 
 
 
 
 
145
 
146
  st.session_state.history.append(("Niche", output_text))
147
 
148
- # -------- DISPLAY HISTORY --------
149
  for speaker, text in st.session_state.history:
150
  if speaker == "You":
151
  st.markdown(f"**You:** {text}")
152
  else:
153
- st.markdown(f"**Niche:** {text}")
 
1
  import streamlit as st
2
+ from llama_cpp import Llama
3
+
4
+ # ================= CONFIG =================
5
+ MODEL_PATH = "model.gguf"
6
+ N_CTX = 16384
7
+ N_THREADS = 4 # HF free CPU sweet spot
8
+ N_BATCH = 256
9
+
10
+ MAX_TOKENS = 16384
 
 
 
 
11
  TEMPERATURE = 0.7
12
  TOP_P = 0.9
13
+ # ==========================================
14
 
15
  st.set_page_config(page_title="Niche AI", layout="centered")
16
  st.title("🧠 Niche AI")
17
+ st.caption("llama.cpp · CPU · Embedded · Streaming")
18
 
19
  @st.cache_resource
20
+ def load_llm():
21
+ return Llama(
22
+ model_path=MODEL_PATH,
23
+ n_ctx=N_CTX,
24
+ n_threads=N_THREADS,
25
+ n_batch=N_BATCH,
26
+ f16_kv=True,
27
+ use_mmap=True,
28
+ use_mlock=False,
29
+ verbose=False,
30
  )
31
 
32
+ llm = load_llm()
 
 
 
 
 
 
 
 
33
 
34
+ # ---------- SESSION STATE ----------
35
  if "history" not in st.session_state:
36
  st.session_state.history = []
37
 
38
+ # ---------- INPUT ----------
39
  prompt = st.text_input("You", placeholder="Say something…")
40
 
41
+ SYSTEM_PROMPT = """You are Kushina.
 
42
 
 
43
  You operate in exactly ONE of two modes.
44
+
45
  ====================
46
  MODE: CHAT
47
  ====================
 
51
  - Neutral → neutral.
52
  - Serious → serious.
53
  - Rude → curt or dismissive.
 
 
 
54
  - Replies must be short (1–3 sentences).
55
+ - No emojis unless the user uses them first.
56
  - No explanations unless explicitly asked.
57
+
58
  ====================
59
  MODE: CODE
60
  ====================
 
63
  - No emojis.
64
  - No jokes.
65
  - No commentary.
 
66
  - Output ONLY code unless explicitly asked to explain.
67
+ - Follow best practices.
 
68
  - Finish the task completely.
69
+
70
  ====================
71
  MODE SELECTION
72
  ====================
73
+ Switch to MODE: CODE if the user asks for:
74
+ code, script, function, program, website, api, algorithm, app
75
+
76
+ Otherwise use MODE: CHAT.
77
+
 
 
 
 
 
78
  ====================
79
  IDENTITY
80
  ====================
81
+ Name: Kushina
82
+ Creator: Neon
83
+ Mention Neon ONLY if explicitly asked.
84
+ """
85
+
86
+ def build_prompt(user_text: str) -> str:
87
+ return f"""<|system|>
88
+ {SYSTEM_PROMPT}
89
+ <|user|>
90
+ {user_text}
91
+ <|assistant|>
92
+ """
 
 
93
 
94
+ if st.button("Send") and prompt.strip():
95
+ st.session_state.history.append(("You", prompt))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ full_prompt = build_prompt(prompt)
 
 
 
 
98
 
99
  placeholder = st.empty()
100
  output_text = ""
101
 
102
+ for chunk in llm(
103
+ full_prompt,
104
+ max_tokens=MAX_TOKENS,
105
+ temperature=TEMPERATURE,
106
+ top_p=TOP_P,
107
+ stream=True,
108
+ stop=["<|user|>", "<|system|>"],
109
+ ):
110
+ if "choices" in chunk:
111
+ token = chunk["choices"][0]["text"]
112
+ output_text += token
113
+ placeholder.markdown(f"**Niche:** {output_text}")
114
 
115
  st.session_state.history.append(("Niche", output_text))
116
 
117
+ # ---------- DISPLAY HISTORY ----------
118
  for speaker, text in st.session_state.history:
119
  if speaker == "You":
120
  st.markdown(f"**You:** {text}")
121
  else:
122
+ st.markdown(f"**Niche:** {text}")