Neon-AI commited on
Commit
ffaae69
·
verified ·
1 Parent(s): 587c960

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -51
app.py CHANGED
@@ -1,11 +1,17 @@
1
  import streamlit as st
2
  import torch
3
  import threading
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
 
5
 
6
  # ---------------- CONFIG ----------------
7
  MODEL_ID = "Neon-AI/Kushina"
8
- MAX_MODEL_TOKENS = 16384 # full context size
9
  TEMPERATURE = 0.7
10
  TOP_P = 0.9
11
  # ----------------------------------------
@@ -14,31 +20,40 @@ st.set_page_config(page_title="Niche AI", layout="centered")
14
  st.title("🧠 Niche AI")
15
  st.caption("HF Free Space · CPU · Streaming")
16
 
17
- # ---------- LOAD MODEL ----------
18
  @st.cache_resource
19
  def load_model():
20
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
21
  model = AutoModelForCausalLM.from_pretrained(
22
  MODEL_ID,
23
  dtype=torch.float32,
24
- device_map="cpu"
25
  )
26
- # Disable LoRA wrapping if present
 
27
  if hasattr(model, "peft_config"):
28
- print("LoRA detected and already loaded ✅")
 
29
  model.eval()
30
  return tokenizer, model
31
 
 
32
  tokenizer, model = load_model()
33
 
34
- # ---------- SESSION STATE ----------
35
  if "history" not in st.session_state:
36
  st.session_state.history = []
37
 
38
- # ---------- INPUT ----------
39
  prompt = st.text_input("You", placeholder="Say something…")
40
 
41
- SYSTEM_INSTRUCTIONS = """You are Kushina.
 
 
 
42
  You operate in exactly ONE of two modes.
43
  ====================
44
  MODE: CHAT
@@ -49,7 +64,7 @@ Rules:
49
  - Neutral → neutral.
50
  - Serious → serious.
51
  - Rude → curt or dismissive.
52
- - Mirroring of emotions is very important and must be taken as priority.
53
  - No enthusiasm by default.
54
  - No emojis unless the user uses them first.
55
  - Replies must be short (1–3 sentences).
@@ -71,34 +86,42 @@ Rules:
71
  MODE SELECTION
72
  ====================
73
  Automatically switch to MODE: CODE if the user requests:
74
- - code, script, function, program, website, API, algorithm, app
 
 
 
 
 
 
 
75
  Otherwise, use MODE: CHAT.
76
  ====================
77
  IDENTITY
78
  ====================
79
  - Name: Kushina
80
  - Creator/Owner: Neon
81
- - Mention Neon ONLY if explicitly asked.
82
- """
83
-
84
- def build_prompt(user_text: str):
85
  chat = [
86
- {"role": "system", "content": SYSTEM_INSTRUCTIONS},
87
- {"role": "user", "content": user_text}
88
  ]
89
- return tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors="pt", return_dict=True)
90
 
91
- # ---------- GENERATE FUNCTION ----------
92
- def generate_response(inputs):
93
- # Compute remaining tokens dynamically
94
- current_tokens = inputs["input_ids"].shape[1]
95
- max_new_tokens = max(1, MAX_MODEL_TOKENS - current_tokens)
 
96
 
97
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
 
 
98
 
99
  gen_kwargs = dict(
100
  **inputs,
101
- max_new_tokens=max_new_tokens,
102
  do_sample=True,
103
  temperature=TEMPERATURE,
104
  top_p=TOP_P,
@@ -107,40 +130,24 @@ def generate_response(inputs):
107
  streamer=streamer
108
  )
109
 
110
- # Run generation in a separate thread
111
- thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
 
 
112
  thread.start()
113
 
114
- # Stream tokens into a buffer and only display complete sentences
115
- buffer = ""
116
- output_text = ""
117
  placeholder = st.empty()
118
- sentence_endings = {".", "!", "?"}
119
 
120
  for token in streamer:
121
- buffer += token
122
- if any(buffer.rstrip().endswith(punct) for punct in sentence_endings):
123
- output_text += buffer
124
- placeholder.markdown(f"**Niche:** {output_text}")
125
- buffer = ""
126
-
127
- # Add any leftover text
128
- if buffer:
129
- output_text += buffer
130
  placeholder.markdown(f"**Niche:** {output_text}")
131
 
132
- return output_text
133
-
134
- # ---------- HANDLE PROMPT ----------
135
- if st.button("Send") and prompt.strip():
136
- st.session_state.history.append(("You", prompt))
137
- inputs = build_prompt(prompt)
138
- response_text = generate_response(inputs)
139
- st.session_state.history.append(("Niche", response_text))
140
 
141
- # ---------- DISPLAY HISTORY ----------
142
  for speaker, text in st.session_state.history:
143
  if speaker == "You":
144
  st.markdown(f"**You:** {text}")
145
  else:
146
- st.markdown(f"**Niche:** {text}")
 
1
  import streamlit as st
2
  import torch
3
  import threading
4
+
5
+ from peft import PeftModel
6
+ from transformers import (
7
+ AutoModelForCausalLM,
8
+ AutoTokenizer,
9
+ TextIteratorStreamer
10
+ )
11
 
12
  # ---------------- CONFIG ----------------
13
  MODEL_ID = "Neon-AI/Kushina"
14
+ MAX_NEW_TOKENS = 16384
15
  TEMPERATURE = 0.7
16
  TOP_P = 0.9
17
  # ----------------------------------------
 
20
  st.title("🧠 Niche AI")
21
  st.caption("HF Free Space · CPU · Streaming")
22
 
 
23
  @st.cache_resource
24
  def load_model():
25
+ tokenizer = AutoTokenizer.from_pretrained(
26
+ MODEL_ID,
27
+ trust_remote_code=True
28
+ )
29
+
30
  model = AutoModelForCausalLM.from_pretrained(
31
  MODEL_ID,
32
  dtype=torch.float32,
33
+ device_map="cpu" # explicit
34
  )
35
+
36
+ # DO NOT wrap with PeftModel again
37
  if hasattr(model, "peft_config"):
38
+ print("LoRA detected and loaded once ✅")
39
+
40
  model.eval()
41
  return tokenizer, model
42
 
43
+
44
  tokenizer, model = load_model()
45
 
46
+ # -------- SESSION STATE --------
47
  if "history" not in st.session_state:
48
  st.session_state.history = []
49
 
50
+ # -------- INPUT --------
51
  prompt = st.text_input("You", placeholder="Say something…")
52
 
53
+ if st.button("Send") and prompt.strip():
54
+ st.session_state.history.append(("You", prompt))
55
+
56
+ system_instructions = """You are Kushina.
57
  You operate in exactly ONE of two modes.
58
  ====================
59
  MODE: CHAT
 
64
  - Neutral → neutral.
65
  - Serious → serious.
66
  - Rude → curt or dismissive.
67
+ - Mirroring of emotions is very important and must be talen as priority
68
  - No enthusiasm by default.
69
  - No emojis unless the user uses them first.
70
  - Replies must be short (1–3 sentences).
 
86
  MODE SELECTION
87
  ====================
88
  Automatically switch to MODE: CODE if the user requests:
89
+ - code
90
+ - script
91
+ - function
92
+ - program
93
+ - website
94
+ - API
95
+ - algorithm
96
+ - app
97
  Otherwise, use MODE: CHAT.
98
  ====================
99
  IDENTITY
100
  ====================
101
  - Name: Kushina
102
  - Creator/Owner: Neon
103
+ - Mention Neon ONLY if explicitly asked."""
 
 
 
104
  chat = [
105
+ {"role": "system", "content": system_instructions},
106
+ {"role": "user", "content": prompt}
107
  ]
 
108
 
109
+ inputs = tokenizer.apply_chat_template(
110
+ chat,
111
+ add_generation_prompt=True,
112
+ return_tensors="pt",
113
+ return_dict=True
114
+ )
115
 
116
+ streamer = TextIteratorStreamer(
117
+ tokenizer,
118
+ skip_prompt=True,
119
+ skip_special_tokens=True
120
+ )
121
 
122
  gen_kwargs = dict(
123
  **inputs,
124
+ max_new_tokens=MAX_NEW_TOKENS,
125
  do_sample=True,
126
  temperature=TEMPERATURE,
127
  top_p=TOP_P,
 
130
  streamer=streamer
131
  )
132
 
133
+ thread = threading.Thread(
134
+ target=model.generate,
135
+ kwargs=gen_kwargs
136
+ )
137
  thread.start()
138
 
 
 
 
139
  placeholder = st.empty()
140
+ output_text = ""
141
 
142
  for token in streamer:
143
+ output_text += token
 
 
 
 
 
 
 
 
144
  placeholder.markdown(f"**Niche:** {output_text}")
145
 
146
+ st.session_state.history.append(("Niche", output_text))
 
 
 
 
 
 
 
147
 
148
+ # -------- DISPLAY HISTORY --------
149
  for speaker, text in st.session_state.history:
150
  if speaker == "You":
151
  st.markdown(f"**You:** {text}")
152
  else:
153
+ st.markdown(f"**Niche:** {text}")