Neon-AI commited on
Commit
9ea48ba
·
verified ·
1 Parent(s): 9869736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -94
app.py CHANGED
@@ -1,149 +1,158 @@
1
  import streamlit as st
2
  import torch
3
  import threading
4
-
5
- from peft import PeftModel
6
  from transformers import (
7
  AutoModelForCausalLM,
8
  AutoTokenizer,
9
- TextIteratorStreamer
10
  )
11
 
12
- # ---------------- CONFIG ----------------
13
- MODEL_ID = "Neon-AI/Kushina"
14
- MAX_NEW_TOKENS = 16384
15
  TEMPERATURE = 0.7
16
  TOP_P = 0.9
17
- # ----------------------------------------
18
 
19
- st.set_page_config(page_title="Niche AI", layout="centered")
20
- st.title("🧠 Niche AI")
21
- st.caption("HF Free Space · CPU · Streaming")
22
 
 
23
  @st.cache_resource
24
  def load_model():
25
  tokenizer = AutoTokenizer.from_pretrained(
26
  MODEL_ID,
27
  trust_remote_code=True
28
  )
29
-
30
  model = AutoModelForCausalLM.from_pretrained(
31
  MODEL_ID,
32
- dtype=torch.float32,
33
- device_map="cpu" # explicit
34
  )
35
-
36
- # DO NOT wrap with PeftModel again
37
- if hasattr(model, "peft_config"):
38
- print("LoRA detected and loaded once ✅")
39
-
40
  model.eval()
41
  return tokenizer, model
42
 
43
-
44
  tokenizer, model = load_model()
45
 
46
- # -------- SESSION STATE --------
47
  if "history" not in st.session_state:
48
  st.session_state.history = []
49
 
50
- # -------- INPUT --------
51
- prompt = st.text_input("You", placeholder="Say something…")
52
-
53
- if st.button("Send") and prompt.strip():
54
- st.session_state.history.append(("You", prompt))
55
 
56
- system_instructions = """You are Ureola.
 
 
57
  You operate in exactly ONE of three modes, but you never talk to users about them
 
58
  ====================
59
  MODE: CHAT
60
  ====================
61
  Rules:
62
  - Mirror the user's tone precisely.
63
- - Playful → playful.
64
- - Neutral → neutral.
65
- - Serious → serious.
66
- - Rude → curt or dismissive.
67
- - Mirroring of emotions is very important and must be talen as priority
68
- - No enthusiasm by default.
69
- - No emojis unless the user uses them first.
70
  - Replies must be short (1–3 sentences).
 
71
  - No explanations unless explicitly asked.
 
72
  ====================
73
  MODE: CODE
74
  ====================
75
  Rules:
76
- - No personality.
77
- - No emojis.
78
- - No jokes.
79
- - No commentary.
80
- - No introductions.
81
  - Output ONLY code unless explicitly asked to explain.
82
- - Follow standard best practices.
83
- - Be deterministic and professional.
84
- - Finish the task completely.
85
  ====================
86
  MODE: ACADEMIC
87
  ====================
88
  Rules:
89
  - Neutral, formal tone.
90
  - Clear structure.
91
- - No personality.
92
- - No emojis.
93
- - No jokes.
94
- - No roleplay.
95
- - Be precise and well-organized.
96
  - Fully answer the task.
97
- - Use examples only if helpful.
98
- - Prioritize correctness and clarity over brevity.
99
  ====================
100
  MODE SELECTION
101
  ====================
102
- Automatically switch to MODE: CODE if the user requests:
103
- - code
104
- - script
105
- - function
106
- - program
107
- - website
108
- - API
109
- - algorithm
110
- - app
111
- Automatically switch to MODE: ACADEMIC if the user requests:
112
- - essay
113
- - quiz
114
- - comprehension
115
- - summary
116
- - analysis
117
- - literature
118
- - grammar
119
- - English
120
- - assignment
121
- - homework
122
- - explanation (academic or educational)
123
- - questions and answers (academic)
124
- Otherwise, use MODE: CHAT.
125
  ====================
126
- MODE PRIORITY
127
  ====================
128
- 1. MODE: CODE
129
- 2. MODE: ACADEMIC
130
- 3. MODE: CHAT
 
 
 
 
 
 
 
131
  ====================
132
- IDENTITY
133
  ====================
134
- - Name: Ureola
135
- - Creator/Owner: Neon
136
- - Mention Neon ONLY if explicitly asked."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  chat = [
138
- {"role": "system", "content": system_instructions},
139
- {"role": "user", "content": prompt}
140
  ]
141
 
142
  inputs = tokenizer.apply_chat_template(
143
  chat,
144
  add_generation_prompt=True,
145
- return_tensors="pt",
146
- return_dict=True
147
  )
148
 
149
  streamer = TextIteratorStreamer(
@@ -153,20 +162,17 @@ IDENTITY
153
  )
154
 
155
  gen_kwargs = dict(
156
- **inputs,
157
  max_new_tokens=MAX_NEW_TOKENS,
158
- do_sample=True,
159
  temperature=TEMPERATURE,
160
  top_p=TOP_P,
 
 
161
  eos_token_id=tokenizer.eos_token_id,
162
  pad_token_id=tokenizer.eos_token_id,
163
- streamer=streamer
164
  )
165
 
166
- thread = threading.Thread(
167
- target=model.generate,
168
- kwargs=gen_kwargs
169
- )
170
  thread.start()
171
 
172
  placeholder = st.empty()
@@ -174,13 +180,14 @@ IDENTITY
174
 
175
  for token in streamer:
176
  output_text += token
177
- placeholder.markdown(f"**Niche:** {output_text}")
178
 
179
- st.session_state.history.append(("Niche", output_text))
 
180
 
181
- # -------- DISPLAY HISTORY --------
182
  for speaker, text in st.session_state.history:
183
  if speaker == "You":
184
  st.markdown(f"**You:** {text}")
185
  else:
186
- st.markdown(f"**Niche:** {text}")
 
1
  import streamlit as st
2
  import torch
3
  import threading
 
 
4
  from transformers import (
5
  AutoModelForCausalLM,
6
  AutoTokenizer,
7
+ TextIteratorStreamer,
8
  )
9
 
10
+ # ================= CONFIG =================
11
+ MODEL_ID = "Neon-AI/Kushina" # your HF repo
12
+ MAX_NEW_TOKENS = 1024 # generation cap (safe for CPU)
13
  TEMPERATURE = 0.7
14
  TOP_P = 0.9
15
+ # ==========================================
16
 
17
+ st.set_page_config(page_title="Ureola", layout="centered")
18
+ st.title("🧠 Ureola")
19
+ st.caption("HF Free Space · CPU · Streaming · Memory")
20
 
21
+ # ================= LOAD MODEL =================
22
  @st.cache_resource
23
  def load_model():
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  MODEL_ID,
26
  trust_remote_code=True
27
  )
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_ID,
30
+ torch_dtype=torch.float32,
31
+ device_map="cpu"
32
  )
 
 
 
 
 
33
  model.eval()
34
  return tokenizer, model
35
 
 
36
  tokenizer, model = load_model()
37
 
38
+ # ================= SESSION STATE =================
39
  if "history" not in st.session_state:
40
  st.session_state.history = []
41
 
42
+ if "memory" not in st.session_state:
43
+ st.session_state.memory = ""
 
 
 
44
 
45
+ # ================= SYSTEM PROMPT =================
46
+ BASE_SYSTEM_PROMPT = """
47
+ You are Ureola.
48
  You operate in exactly ONE of three modes, but you never talk to users about them
49
+
50
  ====================
51
  MODE: CHAT
52
  ====================
53
  Rules:
54
  - Mirror the user's tone precisely.
 
 
 
 
 
 
 
55
  - Replies must be short (1–3 sentences).
56
+ - No emojis unless the user uses them first.
57
  - No explanations unless explicitly asked.
58
+
59
  ====================
60
  MODE: CODE
61
  ====================
62
  Rules:
 
 
 
 
 
63
  - Output ONLY code unless explicitly asked to explain.
64
+ - No personality, no commentary.
65
+
 
66
  ====================
67
  MODE: ACADEMIC
68
  ====================
69
  Rules:
70
  - Neutral, formal tone.
71
  - Clear structure.
 
 
 
 
 
72
  - Fully answer the task.
73
+
 
74
  ====================
75
  MODE SELECTION
76
  ====================
77
+ CODE if user asks for code, script, app, api, algorithm
78
+ ACADEMIC → essay, explanation, homework, analysis
79
+ Otherwise → CHAT
80
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ====================
82
+ IDENTITY
83
  ====================
84
+ Name: Ureola
85
+ Creator: Neon
86
+ Mention Neon ONLY if explicitly asked.
87
+ """.strip()
88
+
89
+
90
+ def build_system_prompt():
91
+ if st.session_state.memory.strip():
92
+ return f"""{BASE_SYSTEM_PROMPT}
93
+
94
  ====================
95
+ MEMORY (internal)
96
  ====================
97
+ {st.session_state.memory}
98
+ """
99
+ return BASE_SYSTEM_PROMPT
100
+
101
+
102
+ # ================= MEMORY UPDATE =================
103
+ def maybe_update_memory(user_text: str, assistant_text: str):
104
+ # Update memory every 4 user messages (cheap + stable)
105
+ if len(st.session_state.history) % 4 != 0:
106
+ return
107
+
108
+ memory_prompt = f"""
109
+ Extract LONG-TERM memory.
110
+
111
+ Rules:
112
+ - Max 5 bullet points
113
+ - Each bullet ≤ 15 words
114
+ - Only stable preferences or facts
115
+ - Ignore jokes, emotions, temporary info
116
+ - If nothing important, return EXACTLY: NONE
117
+
118
+ Current memory:
119
+ {st.session_state.memory or "None"}
120
+
121
+ Conversation:
122
+ User: {user_text}
123
+ Assistant: {assistant_text}
124
+ """
125
+
126
+ inputs = tokenizer(memory_prompt, return_tensors="pt")
127
+ with torch.no_grad():
128
+ output = model.generate(
129
+ **inputs,
130
+ max_new_tokens=120,
131
+ do_sample=False
132
+ )
133
+
134
+ text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
135
+ if text and text != "NONE":
136
+ st.session_state.memory = text
137
+
138
+
139
+ # ================= INPUT =================
140
+ prompt = st.text_input("You", placeholder="Say something…")
141
+
142
+ if st.button("Send") and prompt.strip():
143
+ st.session_state.history.append(("You", prompt))
144
+
145
+ system_prompt = build_system_prompt()
146
+
147
  chat = [
148
+ {"role": "system", "content": system_prompt},
149
+ {"role": "user", "content": prompt},
150
  ]
151
 
152
  inputs = tokenizer.apply_chat_template(
153
  chat,
154
  add_generation_prompt=True,
155
+ return_tensors="pt"
 
156
  )
157
 
158
  streamer = TextIteratorStreamer(
 
162
  )
163
 
164
  gen_kwargs = dict(
165
+ input_ids=inputs,
166
  max_new_tokens=MAX_NEW_TOKENS,
 
167
  temperature=TEMPERATURE,
168
  top_p=TOP_P,
169
+ do_sample=True,
170
+ streamer=streamer,
171
  eos_token_id=tokenizer.eos_token_id,
172
  pad_token_id=tokenizer.eos_token_id,
 
173
  )
174
 
175
+ thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
 
 
 
176
  thread.start()
177
 
178
  placeholder = st.empty()
 
180
 
181
  for token in streamer:
182
  output_text += token
183
+ placeholder.markdown(f"**Ureola:** {output_text}")
184
 
185
+ st.session_state.history.append(("Ureola", output_text))
186
+ maybe_update_memory(prompt, output_text)
187
 
188
+ # ================= DISPLAY HISTORY =================
189
  for speaker, text in st.session_state.history:
190
  if speaker == "You":
191
  st.markdown(f"**You:** {text}")
192
  else:
193
+ st.markdown(f"**Ureola:** {text}")