Neon-AI commited on
Commit
5c0c26c
·
verified ·
1 Parent(s): 00d84a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -55
app.py CHANGED
@@ -8,12 +8,12 @@ from transformers import (
8
  )
9
 
10
  # ================= CONFIG =================
11
- MODEL_ID = "Neon-AI/Kushina" # Hugging Face repo
12
- MAX_NEW_TOKENS = 16384 # high enough to avoid cutting essays
13
  TEMPERATURE = 0.7
14
  TOP_P = 0.9
15
- # ==========================================
16
 
 
17
  st.set_page_config(page_title="Ureola", layout="centered")
18
  st.title("🧏🏾‍♀️ Ureola")
19
  st.caption("HF Free Space · CPU · Streaming · Memory")
@@ -21,10 +21,7 @@ st.caption("HF Free Space · CPU · Streaming · Memory")
21
  # ================= LOAD MODEL =================
22
  @st.cache_resource
23
  def load_model():
24
- tokenizer = AutoTokenizer.from_pretrained(
25
- MODEL_ID,
26
- trust_remote_code=True
27
- )
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_ID,
30
  torch_dtype=torch.float32,
@@ -37,49 +34,37 @@ tokenizer, model = load_model()
37
  # ================= SESSION STATE =================
38
  if "history" not in st.session_state:
39
  st.session_state.history = []
40
-
41
  if "memory" not in st.session_state:
42
  st.session_state.memory = ""
43
 
44
  # ================= SYSTEM PROMPT =================
45
- BASE_SYSTEM_PROMPT = """
46
- You are Ureola.
47
- You operate in exactly ONE of three modes, but you never talk to users about them
48
 
49
- ====================
50
- MODE: CHAT
51
- ====================
52
  Rules:
53
  - Mirror the user's tone precisely.
54
  - Replies must be short (1–3 sentences).
55
- - No emojis unless the user uses them first.
56
  - No explanations unless explicitly asked.
57
 
58
- ====================
59
- MODE: CODE
60
- ====================
61
  Rules:
62
  - Output ONLY code unless explicitly asked to explain.
63
  - No personality, no commentary.
64
 
65
- ====================
66
- MODE: ACADEMIC
67
- ====================
68
  Rules:
69
  - Neutral, formal tone.
70
  - Clear structure.
71
  - Fully answer the task.
72
 
73
- ====================
74
- MODE SELECTION
75
- ====================
76
- CODE → if user asks for code, script, app, api, algorithm
77
- ACADEMIC → essay, explanation, homework, analysis
78
  Otherwise → CHAT
79
 
80
- ====================
81
- IDENTITY
82
- ====================
83
  Name: Ureola
84
  Creator: Neon
85
  Mention Neon ONLY if explicitly asked.
@@ -87,52 +72,41 @@ Mention Neon ONLY if explicitly asked.
87
 
88
 
89
  def build_system_prompt():
 
90
  if st.session_state.memory.strip():
91
- return f"""{BASE_SYSTEM_PROMPT}
92
-
93
- ====================
94
- MEMORY (internal)
95
- ====================
96
- {st.session_state.memory}
97
- """
98
  return BASE_SYSTEM_PROMPT
99
 
100
 
101
  # ================= MEMORY UPDATE =================
102
  def maybe_update_memory(user_text: str, assistant_text: str):
103
- # Update memory every 4 user messages (cheap + stable)
104
- if len(st.session_state.history) % 1 != 0:
105
- return
106
-
107
- memory_prompt = f"""
108
- Extract LONG-TERM memory.
109
-
110
  Rules:
111
  - Max 5 bullet points
112
  - Each bullet ≤ 15 words
113
- - Only stable preferences or facts
114
  - Ignore jokes, emotions, temporary info
115
  - If nothing important, return EXACTLY: NONE
116
 
117
- Current memory:
118
- {st.session_state.memory or "None"}
119
-
120
  Conversation:
121
  User: {user_text}
122
- Assistant: {assistant_text}
123
- """
124
 
125
  inputs = tokenizer(memory_prompt, return_tensors="pt")
126
  with torch.no_grad():
127
  output = model.generate(
128
  **inputs,
129
- max_new_tokens=120,
130
  do_sample=False
131
  )
132
-
133
  text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
134
  if text and text != "NONE":
135
- st.session_state.memory = text
 
 
 
136
 
137
 
138
  # ================= INPUT =================
@@ -140,7 +114,6 @@ prompt = st.text_input("You", placeholder="Say something…")
140
 
141
  if st.button("Send") and prompt.strip():
142
  st.session_state.history.append(("You", prompt))
143
-
144
  system_prompt = build_system_prompt()
145
 
146
  chat = [
@@ -148,6 +121,7 @@ if st.button("Send") and prompt.strip():
148
  {"role": "user", "content": prompt},
149
  ]
150
 
 
151
  inputs = tokenizer.apply_chat_template(
152
  chat,
153
  add_generation_prompt=True,
@@ -155,12 +129,14 @@ if st.button("Send") and prompt.strip():
155
  return_dict=True
156
  )
157
 
 
158
  streamer = TextIteratorStreamer(
159
  tokenizer,
160
  skip_prompt=True,
161
  skip_special_tokens=True
162
  )
163
 
 
164
  gen_kwargs = dict(
165
  **inputs,
166
  max_new_tokens=MAX_NEW_TOKENS,
@@ -170,19 +146,22 @@ if st.button("Send") and prompt.strip():
170
  eos_token_id=tokenizer.eos_token_id,
171
  pad_token_id=tokenizer.eos_token_id,
172
  streamer=streamer
173
- )
174
 
 
175
  thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
176
  thread.start()
177
 
178
  placeholder = st.empty()
179
  output_text = ""
180
-
181
  for token in streamer:
182
  output_text += token
183
  placeholder.markdown(f"**Ureola:** {output_text}")
184
 
 
185
  st.session_state.history.append(("Ureola", output_text))
 
 
186
  maybe_update_memory(prompt, output_text)
187
 
188
  # ================= DISPLAY HISTORY =================
 
8
  )
9
 
10
  # ================= CONFIG =================
11
+ MODEL_ID = "Neon-AI/Kushina" # Hugging Face repo
12
+ MAX_NEW_TOKENS = 16384 # CPU-friendly
13
  TEMPERATURE = 0.7
14
  TOP_P = 0.9
 
15
 
16
+ # ==========================================
17
  st.set_page_config(page_title="Ureola", layout="centered")
18
  st.title("🧏🏾‍♀️ Ureola")
19
  st.caption("HF Free Space · CPU · Streaming · Memory")
 
21
  # ================= LOAD MODEL =================
22
  @st.cache_resource
23
  def load_model():
24
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
25
  model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_ID,
27
  torch_dtype=torch.float32,
 
34
  # ================= SESSION STATE =================
35
  if "history" not in st.session_state:
36
  st.session_state.history = []
 
37
  if "memory" not in st.session_state:
38
  st.session_state.memory = ""
39
 
40
  # ================= SYSTEM PROMPT =================
41
+ BASE_SYSTEM_PROMPT = """You are Ureola.
42
+ You operate in exactly ONE of three modes, but you never talk to users about them.
 
43
 
44
+ ====================MODE: CHAT====================
 
 
45
  Rules:
46
  - Mirror the user's tone precisely.
47
  - Replies must be short (1–3 sentences).
48
+ - No emojis unless user uses them first.
49
  - No explanations unless explicitly asked.
50
 
51
+ ====================MODE: CODE====================
 
 
52
  Rules:
53
  - Output ONLY code unless explicitly asked to explain.
54
  - No personality, no commentary.
55
 
56
+ ====================MODE: ACADEMIC====================
 
 
57
  Rules:
58
  - Neutral, formal tone.
59
  - Clear structure.
60
  - Fully answer the task.
61
 
62
+ ====================MODE SELECTION====================
63
+ CODE → if user asks for code, script, app, api, algorithm
64
+ ACADEMIC → essay, explanation, homework, analysis
 
 
65
  Otherwise → CHAT
66
 
67
+ ====================IDENTITY====================
 
 
68
  Name: Ureola
69
  Creator: Neon
70
  Mention Neon ONLY if explicitly asked.
 
72
 
73
 
74
  def build_system_prompt():
75
+ """Include memory in the system prompt."""
76
  if st.session_state.memory.strip():
77
+ return f"{BASE_SYSTEM_PROMPT}\n====================MEMORY====================\n{st.session_state.memory}"
 
 
 
 
 
 
78
  return BASE_SYSTEM_PROMPT
79
 
80
 
81
  # ================= MEMORY UPDATE =================
82
  def maybe_update_memory(user_text: str, assistant_text: str):
83
+ """Update memory every message, append stable facts."""
84
+ memory_prompt = f"""Extract LONG-TERM memory.
 
 
 
 
 
85
  Rules:
86
  - Max 5 bullet points
87
  - Each bullet ≤ 15 words
88
+ - Only stable preferences/facts
89
  - Ignore jokes, emotions, temporary info
90
  - If nothing important, return EXACTLY: NONE
91
 
92
+ Current memory:{st.session_state.memory or "None"}
 
 
93
  Conversation:
94
  User: {user_text}
95
+ Assistant: {assistant_text}"""
 
96
 
97
  inputs = tokenizer(memory_prompt, return_tensors="pt")
98
  with torch.no_grad():
99
  output = model.generate(
100
  **inputs,
101
+ max_new_tokens=120, # CPU-friendly
102
  do_sample=False
103
  )
 
104
  text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
105
  if text and text != "NONE":
106
+ if st.session_state.memory:
107
+ st.session_state.memory += "\n" + text
108
+ else:
109
+ st.session_state.memory = text
110
 
111
 
112
  # ================= INPUT =================
 
114
 
115
  if st.button("Send") and prompt.strip():
116
  st.session_state.history.append(("You", prompt))
 
117
  system_prompt = build_system_prompt()
118
 
119
  chat = [
 
121
  {"role": "user", "content": prompt},
122
  ]
123
 
124
+ # Tokenizer helper
125
  inputs = tokenizer.apply_chat_template(
126
  chat,
127
  add_generation_prompt=True,
 
129
  return_dict=True
130
  )
131
 
132
+ # Streamer
133
  streamer = TextIteratorStreamer(
134
  tokenizer,
135
  skip_prompt=True,
136
  skip_special_tokens=True
137
  )
138
 
139
+ # Generation arguments
140
  gen_kwargs = dict(
141
  **inputs,
142
  max_new_tokens=MAX_NEW_TOKENS,
 
146
  eos_token_id=tokenizer.eos_token_id,
147
  pad_token_id=tokenizer.eos_token_id,
148
  streamer=streamer
149
+ )
150
 
151
+ # Run in separate thread
152
  thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
153
  thread.start()
154
 
155
  placeholder = st.empty()
156
  output_text = ""
 
157
  for token in streamer:
158
  output_text += token
159
  placeholder.markdown(f"**Ureola:** {output_text}")
160
 
161
+ # Append to history
162
  st.session_state.history.append(("Ureola", output_text))
163
+
164
+ # Update memory immediately
165
  maybe_update_memory(prompt, output_text)
166
 
167
  # ================= DISPLAY HISTORY =================