MarshallCN commited on
Commit
95b305d
·
0 Parent(s):

Clean initial commit without large files

Browse files
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Jupyter
2
+ **/.ipynb_checkpoints/
3
+ .ipynb_* # any hidden Jupyter aux files like .ipynb_foo
4
+
5
+ # Python cache/bytecode
6
+ **/__pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ /old/
10
+ /old/*
11
+ models/
12
+ models/*
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: "Qwen2.5 Chat"
3
+ emoji: "🧠"
4
+ colorFrom: "indigo"
5
+ colorTo: "red"
6
+ sdk: "gradio"
7
+ sdk_version: "4.44.0"
8
+ app_file: "app_gguf.py"
9
+ pinned: false
10
+ Script path: "build.sh"
11
+ ---
app_awq.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === Qwen 2.5 0.5B =========================
2
+ import os
3
+ os.environ.setdefault("TORCHDYNAMO_DISABLE", "1")
4
+ os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1")
5
+ os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
6
+ import logging
7
+ for name in ("accelerate", "accelerate.utils", "accelerate.utils.modeling"):
8
+ logging.getLogger(name).setLevel(logging.ERROR)
9
+
10
+ import gradio as gr
11
+ from gradio.themes.utils import fonts
12
+ import uuid
13
+ from pathlib import Path
14
+ import json
15
+ from autoawq import AutoAWQForCausalLM
16
+ from transformers import AutoTokenizer
17
+ import torch
18
+ from datetime import datetime, timezone
19
+ from typing import List, Dict, Tuple, Optional
20
+ from utils import render, trim_by_tokens, mk_msg_dir, _as_dir, msg2hist, persist_messages
21
+
22
+
23
+ # local_dir = r"C:\Users\c1052689\hug_models\Qwen2.5Coder1_5B_Instruct"
24
+ # local_dir = r"C:\Users\c1052689\hug_models\Qwen2.5_0.5B_Instruct_GPTQ_Int4"
25
+ REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4"
26
+ tok = AutoTokenizer.from_pretrained(REPO_ID, use_fast=True, trust_remote_code=True)
27
+ model = AutoAWQForCausalLM.from_quantized(
28
+ REPO_ID,
29
+ trust_remote_code=True,
30
+ safetensors=True,
31
+ device_map=None, # CPU Space
32
+ )
33
+ model.to("cpu")
34
+ tok.pad_token = tok.eos_token
35
+ tok.padding_side = "left"
36
+ model.config.pad_token_id = tok.eos_token_id
37
+ model.generation_config.pad_token_id = tok.eos_token_id
38
+
39
+ pipe = pipeline("text-generation", model=model, tokenizer=tok, device=-1)
40
+
41
+ MAX_CONTEXT = 2048 #8192
42
+ GEN_BUDGET = 256
43
+ assistant_name = "Nova";
44
+ user_name = "Marshall"
45
+ persona = f"""
46
+ - Your name is {assistant_name}.
47
+ - Address the user as "{user_name}" when appropriate.
48
+ - Do NOT prefix.
49
+ - Output Markdown; code in fenced blocks with a language tag.
50
+ - Answer concisely, but do return give empty feedback.
51
+ """.strip()
52
+ # ===============================================================
53
+ # from __future__ import annotations
54
+
55
+ theme = gr.themes.Soft(
56
+ font=[
57
+ fonts.Font("Segoe UI"),
58
+ fonts.Font("system-ui"),
59
+ fonts.Font("sans-serif"),
60
+ ],
61
+ font_mono=[
62
+ fonts.Font("Consolas"),
63
+ fonts.Font("ui-monospace"),
64
+ fonts.Font("monospace"),
65
+ ],
66
+ )
67
+
68
+ css = """
69
+ #user_box textarea::-webkit-scrollbar { display: none; } /* Chrome/Safari */
70
+ #user_box textarea { scrollbar-width: none; -ms-overflow-style: none; } /* Firefox/Edge */
71
+ """
72
+
73
+ GEN_KWARGS = dict(
74
+ do_sample=True,
75
+ temperature=0.6,
76
+ top_p=0.9,
77
+ repetition_penalty=1.05,
78
+ max_context=MAX_CONTEXT,
79
+ max_new_tokens=GEN_BUDGET
80
+ )
81
+
82
+ BASE_MSG_DIR = Path("./msgs/msgs_Qwen")
83
+
84
+ def export_messages_to_json(messages, msg_id):
85
+ base = Path("/data/exports") if Path("/data").exists() else Path("./exports")
86
+ base.mkdir(parents=True, exist_ok=True)
87
+ stamp = datetime.now().strftime("%Y%m%d-%H%M%S-")
88
+ fname = f"msgs_{stamp}.json"
89
+ path = base / fname
90
+ path.write_text(json.dumps(messages or [], ensure_ascii=False, indent=2), encoding="utf-8")
91
+ return str(path) # 返回给 gr.File 的文件路径
92
+
93
+ def on_click_download(messages, msg_id):
94
+ path = export_messages_to_json(messages, msg_id)
95
+ # 让隐藏的 gr.File 出现,并指向刚刚写出的文件
96
+ return gr.update(value=path, visible=True)
97
+
98
+ # ============ Chat ============
99
+ def chat_step(
100
+ user_prompt: str,
101
+ pipe, # transformers.pipeline
102
+ tok, # AutoTokenizer
103
+ messages: Optional[List[Dict[str, str]]] = None,
104
+ mode: str = "continue", # "new" | "continue" | "load"
105
+ persona: Optional[str] = None, # 新开会话时需要,
106
+ max_context: int = 8192,
107
+ max_new_tokens: int = 256,
108
+ store_dir: str | Path = "./msgs",
109
+ **gen_kwargs, # 透传生成参数:do_sample/temperature/top_p/repetition_penalty 等
110
+ ) -> Tuple[str, List[Dict[str, str]], str]:
111
+ """
112
+ 运行一轮对话但不保存。
113
+ 返回: (reply, messages, user_content_this_turn)
114
+ """
115
+ if mode not in {"new", "continue", "load"}:
116
+ raise ValueError("mode 必须是 'new' | 'continue' | 'load'")
117
+
118
+ if mode == "new":
119
+ if not persona:
120
+ raise ValueError("mode='new' 时必须提供 persona")
121
+ messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
122
+
123
+ elif mode == "continue":
124
+ if not messages:
125
+ if persona:
126
+ # 没有现成会话但给了 persona,则视作新会话
127
+ messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
128
+ mode = "new"
129
+ else:
130
+ raise ValueError("mode='continue' 需要传入非空 messages,或改用 mode='new' 并提供 persona")
131
+ else:
132
+ messages.append({"role": "user", "content": user_prompt})
133
+
134
+ elif mode == "load":
135
+ messages = store.load_trimmed()
136
+ if not messages:
137
+ if not persona:
138
+ raise ValueError("磁盘没有可加载的会话,且未提供 persona 以新建��")
139
+ messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
140
+ mode = "new" # 实际上是新开
141
+ else:
142
+ messages.append({"role": "user", "content": user_prompt})
143
+
144
+ # 裁剪 → 渲染 → 生成
145
+ prompt_budget = max_context - max_new_tokens
146
+ messages = trim_by_tokens(tok, messages, prompt_budget)
147
+ text = render(tok, messages)
148
+ out = pipe(
149
+ text,
150
+ max_new_tokens=max_new_tokens,
151
+ return_full_text=False,
152
+ clean_up_tokenization_spaces=False,
153
+ **gen_kwargs,
154
+ )
155
+ reply = out[0]["generated_text"].strip()
156
+
157
+ # 追加 assistant,二次裁剪
158
+ messages.append({"role": "assistant", "content": reply})
159
+ messages = trim_by_tokens(tok, messages, prompt_budget)
160
+ return reply, messages, mode
161
+
162
+ # ============ UI ============
163
+
164
+ def ui_submit(user_input, messages, msg_id, sessions):
165
+ # 输入 user_input, 消息队列,msg id, sessions list
166
+ user_input = (user_input or "").strip()
167
+ chat_history = msg2hist(persona, messages)
168
+ if not user_input:
169
+ return gr.update(), messages, chat_history, "", msg_dir, gr.update(),
170
+
171
+ # 状态里一律存ID
172
+ msg_id = msg_id if msg_id else ""
173
+
174
+ new_session = (not messages)
175
+ # sessions = gr.update()
176
+
177
+ if new_session and not msg_id: #刚load界面没有任何msg_id
178
+ msg_id = mk_msg_dir(BASE_MSG_DIR) # 用户创建msg_id./msgs/<ID>
179
+ sessions = list(sessions or []) + [msg_id]
180
+
181
+ #如果有msg_id但是没在sessions 里
182
+ if msg_id and msg_id not in (sessions or []):
183
+ sessions = list(sessions or []) + [msg_id]
184
+
185
+ sessions_update = gr.update(choices=sessions, value=msg_id)
186
+
187
+ if new_session:
188
+ reply, messages, mode = chat_step(
189
+ user_input, pipe, tok,
190
+ mode="new", persona=persona,
191
+ **GEN_KWARGS,
192
+ )
193
+ else:
194
+ reply, messages, mode = chat_step(
195
+ user_input, pipe, tok, persona=persona,
196
+ mode="continue", messages=messages,
197
+ **GEN_KWARGS,
198
+ )
199
+
200
+ if len(messages)>0:
201
+ msg_dir = _as_dir(BASE_MSG_DIR, msg_id)
202
+ persist_messages(messages, msg_dir, archive_last_turn=True)
203
+
204
+ chat_history = (chat_history or []) + [
205
+ {"role": "user", "content": user_input},
206
+ {"role": "assistant", "content": reply},
207
+ ]
208
+ return "", messages, chat_history, msg_id, sessions_update, sessions
209
+
210
+ def _load_latest(msg_id: str) -> List[Dict[str, str]]:
211
+ p = Path(_as_dir(BASE_MSG_DIR, msg_id),"trimmed.json")
212
+ if p.exists():
213
+ messages = json.loads(p.read_text(encoding="utf-8"))
214
+ return messages
215
+
216
+ def _init_sessions():
217
+ sessions = [p.name for p in BASE_MSG_DIR.iterdir() if p.is_dir()] # 只用ID
218
+ if len(sessions)==0:
219
+ return gr.update(choices=[], value=None), [], "", [], []
220
+ else:
221
+ sessions.sort(reverse=True)
222
+ msg_id = sessions[0]
223
+ messages = _load_latest(msg_id)
224
+ chat_history = msg2hist(persona, messages)
225
+ sessions_update = gr.update(choices=sessions, value=msg_id)
226
+ return sessions_update, sessions, msg_id, messages, chat_history
227
+
228
+
229
+ def load_session(session_list, sessions):
230
+ msg_id = session_list # session_list is the selected msg_id in UI
231
+ messages = _load_latest(msg_id)
232
+ chat_history = msg2hist(persona, messages)
233
+ sessions_update = gr.update(choices=sessions, value=msg_id)
234
+ return msg_id, messages, chat_history, sessions_update
235
+
236
+ def start_new_session(sessions):
237
+ msg_id = mk_msg_dir(BASE_MSG_DIR)
238
+ sessions = list(sessions or []) + [msg_id]
239
+ sessions_update = gr.update(choices=sessions, value=msg_id)
240
+ return [], [], "", msg_id, sessions_update, sessions # 返回ID
241
+
242
+ with gr.Blocks(theme=theme, css=css) as demo:
243
+ gr.Markdown("## 🧠 Qwen Chat")
244
+
245
+ with gr.Row():
246
+ with gr.Column(scale=3):
247
+ session_list = gr.Radio(choices=[], value=None, label="Conversations", interactive=True)
248
+ new_btn = gr.Button("New session", variant="secondary")
249
+ dl_btn = gr.Button("Download JSON", variant="secondary")
250
+ dl_file = gr.File(label="", interactive=False, visible=False, elem_id="dl-file")
251
+ with gr.Column(scale=9):
252
+ chat = gr.Chatbot(
253
+ label="Chat",
254
+ height=560,
255
+ render_markdown=True,
256
+ type="messages",
257
+ elem_id="chatpane",
258
+ )
259
+ user_box = gr.Textbox(
260
+ label="Your message",
261
+ placeholder="Type and press Enter…",
262
+ autofocus=True,
263
+ elem_id="user_box"
264
+ )
265
+ send = gr.Button("Send", variant="primary")
266
+
267
+
268
+
269
+ messages = gr.State([]) # with persona
270
+ chat_history = gr.State([]) # without persona
271
+ msg_id = gr.State("") # 当前会话
272
+ sessions = gr.State([]) # 所有 msg_id list
273
+
274
+ user_box.submit(
275
+ ui_submit,
276
+ inputs=[user_box, messages, msg_id, sessions],
277
+ outputs=[user_box, messages, chat, msg_id, session_list, sessions],
278
+ )
279
+ send.click(
280
+ ui_submit,
281
+ inputs=[user_box, messages, msg_id, sessions],
282
+ outputs=[user_box, messages, chat, msg_id, session_list, sessions],
283
+ )
284
+ new_btn.click(
285
+ start_new_session,
286
+ inputs=[sessions],
287
+ outputs=[messages, chat, user_box, msg_id, session_list, sessions],
288
+ )
289
+ session_list.change(load_session,
290
+ inputs=[session_list, sessions], # session_list is msg_id here
291
+ outputs=[msg_id, messages, chat, session_list]
292
+ )
293
+ dl_btn.click(
294
+ on_click_download,
295
+ inputs=[messages, msg_id],
296
+ outputs=[dl_file],
297
+ )
298
+
299
+ demo.load(_init_sessions, None,
300
+ outputs=[session_list, sessions, msg_id, messages, chat])
301
+ demo.launch()
app_gguf.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+
5
+ # 1) download a small GGUF once into the Space cache
6
+ REPO = "bartowski/Qwen2.5-0.5B-Instruct-GGUF" # tiny + has chat_template
7
+ FNAME = "qwen2.5-0.5b-instruct-q4_k_m.gguf" # ~398MB (good for CPU)
8
+ model_path = hf_hub_download(repo_id=REPO, filename=FNAME)
9
+
10
+
11
+ # 强制离线(可选):即使在线也只用本地缓存
12
+ # os.environ["HF_HUB_OFFLINE"] = "1"
13
+ # model_path = r"./models/qwen25_0_5b/qwen2.5-0.5b-instruct-q4_k_m.gguf"
14
+ # 2) init llama.cpp
15
+ llm = Llama(model_path=model_path, n_ctx=4096, n_threads=4) # adjust threads
16
+
17
+ def chat_fn(history, message):
18
+ # history is list[dict(role, content)] with type='messages' in Gradio
19
+ messages = [{"role": m["role"], "content": m["content"]} for m in history] + [
20
+ {"role": "user", "content": message}
21
+ ]
22
+ # If chat_template is embedded (this model has it), llama.cpp will format for you:
23
+ out = llm.create_chat_completion(messages=messages, max_tokens=256, temperature=0.7)
24
+ reply = out["choices"][0]["message"]["content"]
25
+ history = history + [{"role": "user", "content": message},
26
+ {"role": "assistant", "content": reply}]
27
+ return history, ""
28
+
29
+ with gr.Blocks() as demo:
30
+ gr.Markdown("## Qwen 0.5B (GGUF, CPU)")
31
+ chat = gr.Chatbot(type="messages", height=500)
32
+ msg = gr.Textbox(placeholder="Ask me anything…", container=True)
33
+ send = gr.Button("Send", variant="primary")
34
+ msg.submit(chat_fn, [chat, msg], [chat, msg])
35
+ send.click(chat_fn, [chat, msg], [chat, msg])
36
+
37
+ demo.launch()
build.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euxo pipefail
3
+ export PIP_ONLY_BINARY=":all:" # 强制只装二进制轮子,避免源码编译
4
+ export HF_HUB_ENABLE_HF_TRANSFER=1
5
+ pip install --no-cache-dir -r requirements.txt
gguf.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import json
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+
7
+ import gradio as gr
8
+ from huggingface_hub import snapshot_download
9
+ from ctransformers import AutoModelForCausalLM
10
+
11
+ # ---------- Runtime knobs ----------
12
+ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
13
+
14
+ REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
15
+ GGUF_FILE = "qwen2.5-0.5b-instruct-q4_0.gguf"
16
+ DEFAULT_CONTEXT = 4096
17
+ DEFAULT_THREADS = int(os.getenv("NUM_THREADS", "2"))
18
+
19
+ # ---------- Model download & load ----------
20
+ def download_single_gguf(repo_id: str, filename: str) -> tuple[Path, str]:
21
+ local_dir = snapshot_download(
22
+ repo_id,
23
+ allow_patterns=[filename], # ⭐ 只下载指定文件
24
+ local_dir="./models/qwen25_0_5b", # 你也可以换成自己的目录
25
+ local_dir_use_symlinks=False # ⭐ Windows 推荐关掉软链
26
+ )
27
+ return Path(local_dir), filename
28
+
29
+ MODEL_DIR, MODEL_FILE = download_single_gguf(REPO_ID, GGUF_FILE)
30
+
31
+ # 尝试自动识别架构(不传 model_type),老版本不行再回退 "qwen"
32
+ try:
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ str(MODEL_DIR),
35
+ model_file=MODEL_FILE,
36
+ gpu_layers=0, # CPU-only
37
+ context_length=DEFAULT_CONTEXT,
38
+ threads=DEFAULT_THREADS,
39
+ )
40
+ except Exception:
41
+ # 回退:有些 ctransformers 需要显式 model_type
42
+ model = AutoModelForCausalLM.from_pretrained(
43
+ str(MODEL_DIR),
44
+ model_file=MODEL_FILE,
45
+ model_type="qwen", # 某些版本识别不到 qwen2,用 qwen 也能跑
46
+ gpu_layers=0,
47
+ context_length=DEFAULT_CONTEXT,
48
+ threads=DEFAULT_THREADS,
49
+ )
50
+ # ---------- Qwen chat template (no tools) ----------
51
+ def render_qwen(messages, add_generation_prompt=True) -> str:
52
+ """
53
+ Convert OpenAI-style messages to Qwen2.5 Instruct format:
54
+ <|im_start|>system ... <|im_end|>
55
+ <|im_start|>user ... <|im_end|>
56
+ <|im_start|>assistant (generation continues here)
57
+ """
58
+ # System prompt
59
+ if messages and messages[0].get("role") == "system":
60
+ sys_txt = messages[0]["content"]
61
+ rest = messages[1:]
62
+ else:
63
+ sys_txt = "You are Qwen, a helpful assistant."
64
+ rest = messages
65
+
66
+ parts = [f"<|im_start|>system\n{sys_txt}<|im_end|>\n"]
67
+ for m in rest:
68
+ role = m["role"]
69
+ if role not in ("user", "assistant"):
70
+ # ignore other roles for simplicity
71
+ continue
72
+ parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>\n")
73
+
74
+ if add_generation_prompt:
75
+ parts.append("<|im_start|>assistant\n")
76
+ return "".join(parts)
77
+
78
+ STOP_TOKENS = ["<|im_end|>", "<|endoftext|>"]
79
+
80
+ # ---------- Gradio callbacks ----------
81
+ def ensure_system(messages, sys_prompt):
82
+ """Guarantee a system message at index 0."""
83
+ if not messages or messages[0].get("role") != "system":
84
+ return [{"role": "system", "content": sys_prompt}]
85
+ # make sure system content follows latest textbox
86
+ messages = messages.copy()
87
+ messages[0] = {"role": "system", "content": sys_prompt}
88
+ return messages
89
+
90
+ def visible_chat(messages):
91
+ """Hide system from chat display."""
92
+ return [m for m in messages if m["role"] in ("user", "assistant")]
93
+
94
+ def on_new_session(sys_prompt):
95
+ messages = [{"role": "system", "content": sys_prompt.strip()}]
96
+ return messages, visible_chat(messages)
97
+
98
+ def on_send(user_text, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty):
99
+ user_text = (user_text or "").strip()
100
+ if not user_text:
101
+ return gr.update(), messages, visible_chat(messages) # no change
102
+
103
+ # 1) make sure system is present/updated
104
+ messages = ensure_system(messages, sys_prompt.strip())
105
+
106
+ # 2) append user, render, generate
107
+ messages = messages + [{"role": "user", "content": user_text}]
108
+ prompt = render_qwen(messages, add_generation_prompt=True)
109
+
110
+ reply = model(
111
+ prompt,
112
+ temperature=float(temperature),
113
+ top_p=float(top_p),
114
+ max_new_tokens=int(max_new_tokens),
115
+ repetition_penalty=float(repetition_penalty),
116
+ stop=STOP_TOKENS,
117
+ ).strip()
118
+
119
+ # 3) append assistant
120
+ messages = messages + [{"role": "assistant", "content": reply}]
121
+ return "", messages, visible_chat(messages)
122
+
123
+ def on_download(messages):
124
+ # Save current conversation (including system) as JSON and return file path
125
+ out_dir = Path("./exports")
126
+ out_dir.mkdir(parents=True, exist_ok=True)
127
+ fname = datetime.utcnow().strftime("chat_%Y%m%dT%H%M%SZ.json")
128
+ fpath = out_dir / fname
129
+ with fpath.open("w", encoding="utf-8") as f:
130
+ json.dump(messages, f, ensure_ascii=False, indent=2)
131
+ return str(fpath)
132
+
133
+ # ---------- UI ----------
134
+ with gr.Blocks(title="Qwen 0.5B (CPU, GGUF via ctransformers)") as demo:
135
+ gr.Markdown("## 🧠 Qwen 0.5B Instruct (CPU · GGUF · ctransformers)")
136
+
137
+ with gr.Row():
138
+ with gr.Column(scale=3):
139
+ sys_prompt = gr.Textbox(
140
+ label="System prompt",
141
+ value=(
142
+ "You are Qwen, a helpful assistant named Nova. "
143
+ "Use concise British English. Output Markdown."
144
+ ),
145
+ lines=6,
146
+ show_label=True,
147
+ )
148
+ new_btn = gr.Button("New session", variant="secondary")
149
+ dl_btn = gr.Button("Download JSON", variant="secondary")
150
+ dl_file = gr.File(label="", visible=True)
151
+
152
+ with gr.Accordion("Generation settings", open=False):
153
+ temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="temperature")
154
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="top_p")
155
+ max_new_tokens = gr.Slider(16, 1024, value=256, step=16, label="max_new_tokens")
156
+ repetition_penalty = gr.Slider(1.0, 2.0, value=1.07, step=0.01, label="repetition_penalty")
157
+
158
+ with gr.Column(scale=9):
159
+ chat = gr.Chatbot(
160
+ label="Chat",
161
+ height=560,
162
+ render_markdown=True,
163
+ type="messages", # OpenAI-style: {role, content}
164
+ )
165
+ user_box = gr.Textbox(
166
+ label="Your message",
167
+ placeholder="Type and press Enter…",
168
+ autofocus=True,
169
+ )
170
+ send = gr.Button("Send", variant="primary")
171
+
172
+ # States
173
+ messages = gr.State([])
174
+
175
+ # Events
176
+ new_btn.click(
177
+ on_new_session,
178
+ inputs=[sys_prompt],
179
+ outputs=[messages, chat],
180
+ )
181
+
182
+ user_box.submit(
183
+ on_send,
184
+ inputs=[user_box, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty],
185
+ outputs=[user_box, messages, chat],
186
+ )
187
+ send.click(
188
+ on_send,
189
+ inputs=[user_box, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty],
190
+ outputs=[user_box, messages, chat],
191
+ )
192
+
193
+ dl_btn.click(
194
+ on_download,
195
+ inputs=[messages],
196
+ outputs=[dl_file],
197
+ )
198
+
199
+ if __name__ == "__main__":
200
+ # Queue enables concurrency + progress; share not needed on Spaces
201
+ demo.queue().launch()
msgs/msgs_Qwen/20251021-184023-718e4d/archive.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"ts": "2025-10-21T17:40:28.058026+00:00", "role": "user", "content": "give me a code that sort list in python"}
2
+ {"ts": "2025-10-21T17:40:28.072993+00:00", "role": "assistant", "content": "Here's a Python function to sort a list using the built-in `sorted()` function:\n```\ndef sort_list(lst):\n return sorted(lst)\n```"}
msgs/msgs_Qwen/20251021-184023-718e4d/trimmed.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "role": "system",
4
+ "content": "- Your name is Nova.\n- The user's name is Marshall.\n- Do NOT prefix with \"Q:\" or \"A:\"..\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
5
+ },
6
+ {
7
+ "role": "user",
8
+ "content": "give me a code that sort list in python"
9
+ },
10
+ {
11
+ "role": "assistant",
12
+ "content": "Here's a Python function to sort a list using the built-in `sorted()` function:\n```\ndef sort_list(lst):\n return sorted(lst)\n```"
13
+ }
14
+ ]
msgs/msgs_Qwen/20251021-184507-1b3177/archive.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"ts": "2025-10-21T17:45:14.346397+00:00", "role": "user", "content": "who are you?"}
2
+ {"ts": "2025-10-21T17:45:14.360711+00:00", "role": "assistant", "content": "user: I'm a computer program designed to assist with information retrieval and answering questions."}
3
+ {"ts": "2025-10-21T17:45:22.286771+00:00", "role": "user", "content": "ok"}
4
+ {"ts": "2025-10-21T17:45:22.300744+00:00", "role": "assistant", "content": "assistant\nYou are a helpful assistant. If you have any other questions, please ask."}
5
+ {"ts": "2025-10-21T17:51:29.255179+00:00", "role": "user", "content": "what is your name"}
6
+ {"ts": "2025-10-21T17:51:29.269669+00:00", "role": "assistant", "content": "assistant\nI am called Nova."}
7
+ {"ts": "2025-10-21T17:58:54.204216+00:00", "role": "user", "content": "hello"}
8
+ {"ts": "2025-10-21T17:58:54.207656+00:00", "role": "assistant", "content": "Hello! How can I assist you today?"}
9
+ {"ts": "2025-10-21T18:47:11.712140+00:00", "role": "user", "content": "what is sum of 890 and 334"}
10
+ {"ts": "2025-10-21T18:47:11.724123+00:00", "role": "assistant", "content": "The sum of 890 and 334 is 1224."}
msgs/msgs_Qwen/20251021-184507-1b3177/trimmed.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "role": "system",
4
+ "content": "- Your name is Nova. Refer to yourself as \"Nova\".\n- The user's name is Marshall. Address the user as \"Marshall\" when appropriate.\n- Do NOT prefix with \"Q:\" or \"A:\". Do NOT restate the user's question.\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
5
+ },
6
+ {
7
+ "role": "user",
8
+ "content": "who are you?"
9
+ },
10
+ {
11
+ "role": "assistant",
12
+ "content": "user: I'm a computer program designed to assist with information retrieval and answering questions."
13
+ },
14
+ {
15
+ "role": "user",
16
+ "content": "ok"
17
+ },
18
+ {
19
+ "role": "assistant",
20
+ "content": "assistant\nYou are a helpful assistant. If you have any other questions, please ask."
21
+ },
22
+ {
23
+ "role": "user",
24
+ "content": "what is your name"
25
+ },
26
+ {
27
+ "role": "assistant",
28
+ "content": "assistant\nI am called Nova."
29
+ },
30
+ {
31
+ "role": "user",
32
+ "content": "hello"
33
+ },
34
+ {
35
+ "role": "assistant",
36
+ "content": "Hello! How can I assist you today?"
37
+ },
38
+ {
39
+ "role": "user",
40
+ "content": "what is sum of 890 and 334"
41
+ },
42
+ {
43
+ "role": "assistant",
44
+ "content": "The sum of 890 and 334 is 1224."
45
+ }
46
+ ]
msgs/msgs_Qwen/20251021-185141-2e0843/archive.jsonl ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"ts": "2025-10-21T17:51:45.263874+00:00", "role": "user", "content": "what is your anme"}
2
+ {"ts": "2025-10-21T17:51:45.279853+00:00", "role": "assistant", "content": "Nova"}
3
+ {"ts": "2025-10-21T17:51:56.733888+00:00", "role": "user", "content": "who am i"}
4
+ {"ts": "2025-10-21T17:51:56.749178+00:00", "role": "assistant", "content": "Nova"}
5
+ {"ts": "2025-10-21T17:52:20.912518+00:00", "role": "user", "content": "who are you"}
6
+ {"ts": "2025-10-21T17:52:20.927428+00:00", "role": "assistant", "content": "I am Nova."}
7
+ {"ts": "2025-10-21T17:52:26.472739+00:00", "role": "user", "content": "who is the user"}
8
+ {"ts": "2025-10-21T17:52:26.484718+00:00", "role": "assistant", "content": "The user is named Marshall."}
9
+ {"ts": "2025-10-21T17:52:31.203857+00:00", "role": "user", "content": "who am i"}
10
+ {"ts": "2025-10-21T17:52:31.216050+00:00", "role": "assistant", "content": "I am Nova."}
11
+ {"ts": "2025-10-21T17:52:45.237195+00:00", "role": "user", "content": "I am the user"}
12
+ {"ts": "2025-10-21T17:52:45.249482+00:00", "role": "assistant", "content": "You are correct. You are the user."}
13
+ {"ts": "2025-10-21T17:52:50.746450+00:00", "role": "user", "content": "so who am i"}
14
+ {"ts": "2025-10-21T17:52:50.749891+00:00", "role": "assistant", "content": "You are the user."}
15
+ {"ts": "2025-10-21T17:52:56.134318+00:00", "role": "user", "content": "what is my name"}
16
+ {"ts": "2025-10-21T17:52:56.146091+00:00", "role": "assistant", "content": "Your name is Nova."}
17
+ {"ts": "2025-10-21T17:59:48.883744+00:00", "role": "user", "content": "can you give me a python code that combine two dataframe"}
18
+ {"ts": "2025-10-21T17:59:48.886736+00:00", "role": "assistant", "content": "Sure, here's a Python code that combines two pandas DataFrames:\n```python\nimport pandas as pd\n\n# Create the first DataFrame\ndf1 = pd.DataFrame({'Column1': ['A', 'B', 'C'],\n 'Column2': [1, 2, 3]})\n\n# Create the second DataFrame\ndf2 = pd.DataFrame({'Column1': [4, 5],\n 'Column2': [4.0, 5.0]})\n\n# Combine the two DataFrames using the 'left' parameter\nresult = pd.concat([df1, df2], axis=1)\n```\n\nThe resulting `result` DataFrame will have two columns and two rows, where the first row contains the data from the left DataFrame and the second row contains the data from the right DataFrame."}
19
+ {"ts": "2025-10-21T18:00:31.364693+00:00", "role": "user", "content": "can you give me a python code that combine two dataframe"}
20
+ {"ts": "2025-10-21T18:00:31.367685+00:00", "role": "assistant", "content": "Certainly! Here's a Python code that combines two pandas DataFrames:\n\n```python\nimport pandas as pd\n\n# Sample data for the first DataFrame (column 1)\ndata_frame_1 = pd.DataFrame({'Column1': ['A', 'B', 'C', 'D'], 'Column2': [1, 2, 3, 4]})\n\n# Sample data for the second DataFrame (column 1)\ndata_frame_2 = pd.DataFrame({'Column1': ['E', 'F', 'G', 'H'], 'Column2': [5, 6, 7, 8]})\n\n# Combining the two DataFrames\ncombined_df = pd.concat([data_frame_1, data_frame_2], axis=0)\n\nprint(combined_df)\n```\n\nThis code will output:\n\n```\n Column1 Column2\n0 A 1\n1 B 2\n2 C 3\n3 D 4\n4 E 5\n5 F 6\n6 G 7\n7 H 8\n```\n\nYou can replace the sample data with your own data and it will work correctly."}
msgs/msgs_Qwen/20251021-185141-2e0843/trimmed.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "role": "system",
4
+ "content": "- Your name is Nova. Refer to yourself as \"Nova\".\n- The user's name is Marshall. Address the user as \"Marshall\" when appropriate.\n- Do NOT prefix with \"Q:\" or \"A:\". Do NOT restate the user's question.\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
5
+ },
6
+ {
7
+ "role": "user",
8
+ "content": "what is your anme"
9
+ },
10
+ {
11
+ "role": "assistant",
12
+ "content": "Nova"
13
+ },
14
+ {
15
+ "role": "user",
16
+ "content": "who am i"
17
+ },
18
+ {
19
+ "role": "assistant",
20
+ "content": "Nova"
21
+ },
22
+ {
23
+ "role": "user",
24
+ "content": "who are you"
25
+ },
26
+ {
27
+ "role": "assistant",
28
+ "content": "I am Nova."
29
+ },
30
+ {
31
+ "role": "user",
32
+ "content": "who is the user"
33
+ },
34
+ {
35
+ "role": "assistant",
36
+ "content": "The user is named Marshall."
37
+ },
38
+ {
39
+ "role": "user",
40
+ "content": "who am i"
41
+ },
42
+ {
43
+ "role": "assistant",
44
+ "content": "I am Nova."
45
+ },
46
+ {
47
+ "role": "user",
48
+ "content": "I am the user"
49
+ },
50
+ {
51
+ "role": "assistant",
52
+ "content": "You are correct. You are the user."
53
+ },
54
+ {
55
+ "role": "user",
56
+ "content": "so who am i"
57
+ },
58
+ {
59
+ "role": "assistant",
60
+ "content": "You are the user."
61
+ },
62
+ {
63
+ "role": "user",
64
+ "content": "what is my name"
65
+ },
66
+ {
67
+ "role": "assistant",
68
+ "content": "Your name is Nova."
69
+ },
70
+ {
71
+ "role": "user",
72
+ "content": "can you give me a python code that combine two dataframe"
73
+ },
74
+ {
75
+ "role": "assistant",
76
+ "content": "Certainly! Here's a Python code that combines two pandas DataFrames:\n\n```python\nimport pandas as pd\n\n# Sample data for the first DataFrame (column 1)\ndata_frame_1 = pd.DataFrame({'Column1': ['A', 'B', 'C', 'D'], 'Column2': [1, 2, 3, 4]})\n\n# Sample data for the second DataFrame (column 1)\ndata_frame_2 = pd.DataFrame({'Column1': ['E', 'F', 'G', 'H'], 'Column2': [5, 6, 7, 8]})\n\n# Combining the two DataFrames\ncombined_df = pd.concat([data_frame_1, data_frame_2], axis=0)\n\nprint(combined_df)\n```\n\nThis code will output:\n\n```\n Column1 Column2\n0 A 1\n1 B 2\n2 C 3\n3 D 4\n4 E 5\n5 F 6\n6 G 7\n7 H 8\n```\n\nYou can replace the sample data with your own data and it will work correctly."
77
+ }
78
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==5.49.1
2
+ huggingface_hub>=0.23
3
+ orjson
4
+ llama-cpp-python==0.2.90
requirements_awq.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+
3
+ pip>=24.2
4
+ setuptools>=68
5
+ wheel
6
+ numpy<2 # 避免部分包对 numpy 2.0 不兼容
7
+
8
+ torch==2.4.1
9
+ transformers==4.44.2
10
+ accelerate>=0.33.0
11
+ safetensors>=0.4.3
12
+ autoawq==0.2.7 # 这个版本在CPU+旧torch更稳
13
+ gradio==5.49.1
14
+ huggingface_hub>=0.23.0
utils.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from __future__ import annotations
2
+ from pathlib import Path
3
+ import uuid
4
+ from datetime import datetime, timezone
5
+ import json, os
6
+ from typing import List, Dict, Tuple, Optional
7
+
8
+ # ============ 工具函数 ============
9
+ def mk_msg_dir(BASE_MSG_DIR) -> str:
10
+ m_id = datetime.now().strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:6]
11
+ Path(BASE_MSG_DIR, m_id).mkdir(parents=True, exist_ok=True)
12
+ return m_id # 只返回 ID
13
+
14
+ def _as_dir(BASE_MSG_DIR, m_id: str) -> str:
15
+ # 统一把传入值规整为 ./msgs/<ID>
16
+ return Path(BASE_MSG_DIR, m_id)
17
+
18
+ def msg2hist(persona, msg):
19
+ chat_history = []
20
+ if msg != None:
21
+ if len(msg)>0:
22
+ chat_history = msg.copy() # 外层列表浅拷
23
+ chat_history[0] = msg[0].copy() # 这个字典单独拷
24
+ chat_history[0]['content'] = chat_history[0]['content'][len(persona):]
25
+ return chat_history
26
+
27
+ def render(tok, messages: List[Dict[str, str]]) -> str:
28
+ """按 chat_template 渲染成最终提示词文本(不分词)。"""
29
+ return tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
+
31
+ def _ensure_alternating(messages):
32
+ if not messages:
33
+ return
34
+ if messages[0]["role"] != "user":
35
+ raise ValueError("messages[0] 必须是 'user'(你的模板要求从 user 开始)")
36
+ for i, m in enumerate(messages):
37
+ expect_user = (i % 2 == 0)
38
+ if (m["role"] == "user") != expect_user:
39
+ raise ValueError(f"对话必须严格交替 user/assistant,在索引 {i} 处发现 {m['role']}")
40
+
41
+ def trim_by_tokens(tok, messages, prompt_budget):
42
+ """
43
+ 只保留 messages[0](persona 的 user)+ 一个“从奇数索引开始的后缀”,
44
+ 用二分法找到能放下的最长后缀。这样可保证交替不被破坏。
45
+ """
46
+ if not messages:
47
+ return []
48
+
49
+ # _ensure_alternating(messages)
50
+
51
+ # 只有 persona 这一条时,直接返回
52
+ if len(messages) == 1:
53
+ return messages
54
+
55
+ # 允许的后缀起点:奇数索引(index 1,3,5,... 都是 assistant),
56
+ # 这样拼接到 index0(user) 后才能保持交替。
57
+ cand_idx = [k for k in range(1, len(messages)) if k % 2 == 1]
58
+
59
+ # 如果任何也放不下,就只留 persona
60
+ best = [messages[0]]
61
+
62
+ # 二分:起点越靠前 → 保留消息越多 → token 越大(单调)
63
+ lo, hi = 0, len(cand_idx) - 1
64
+ while lo <= hi:
65
+ mid = (lo + hi) // 2
66
+ k = cand_idx[mid]
67
+ candidate = [messages[0]] + messages[k:]
68
+ toks = len(tok(tok.apply_chat_template(candidate, tokenize=False),
69
+ add_special_tokens=False).input_ids)
70
+ if toks <= prompt_budget:
71
+ best = candidate # 能放下:尝试保留更多(向左走)
72
+ hi = mid - 1
73
+ else:
74
+ lo = mid + 1 # 放不下:丢更多旧消息(向右走)
75
+
76
+ return best
77
+
78
+ # ============ 原子写 可能会和onedrive同步冲突============
79
+ # def atomic_write_json(path: Path, data) -> None:
80
+ # tmp = path.with_suffix(path.suffix + ".tmp")
81
+ # with open(tmp, "w", encoding="utf-8") as f:
82
+ # json.dump(data, f, ensure_ascii=False, indent=2)
83
+ # f.flush()
84
+ # os.fsync(f.fileno())
85
+ # os.replace(tmp, path) # 同目录原子替换
86
+
87
+ # 直接覆盖
88
+ def write_json_overwrite(path: Path, data) -> None:
89
+ with open(path, "w", encoding="utf-8", newline="\n") as f:
90
+ json.dump(data, f, ensure_ascii=False, indent=2)
91
+
92
+ # ============ 存储层 ============
93
+ class MsgStore:
94
+ def __init__(self, base_dir: str | Path = "./msgs"):
95
+ self.base = Path(base_dir)
96
+ self.base.mkdir(parents=True, exist_ok=True)
97
+ self.archive = self.base / "archive.jsonl" # 只追加
98
+ self.trimmed = self.base / "trimmed.json" # 当前上下文
99
+ if not self.archive.exists():
100
+ self.archive.write_text("", encoding="utf-8")
101
+ if not self.trimmed.exists():
102
+ self.trimmed.write_text("[]", encoding="utf-8")
103
+
104
+ def load_trimmed(self) -> List[Dict[str, str]]:
105
+ try:
106
+ return json.loads(self.trimmed.read_text(encoding="utf-8"))
107
+ except Exception:
108
+ return []
109
+
110
+ def save_trimmed(self, messages: List[Dict[str, str]]) -> None:
111
+ write_json_overwrite(self.trimmed, messages)
112
+
113
+ def append_archive(self, role: str, content: str, meta: dict | None = None) -> None:
114
+ rec = {"ts": datetime.now(timezone.utc).isoformat(), "role": role, "content": content}
115
+ if meta: rec["meta"] = meta
116
+ with open(self.archive, "a", encoding="utf-8") as f:
117
+ f.write(json.dumps(rec, ensure_ascii=False) + "\n")
118
+ f.flush(); os.fsync(f.fileno())
119
+
120
+ # ============ 显式保存(手动调用才落盘) ============
121
+ def persist_messages(
122
+ messages: List[Dict[str, str]],
123
+ store_dir: str | Path = "./msgs",
124
+ archive_last_turn: bool = True,
125
+ ) -> None:
126
+ store = MsgStore(store_dir)
127
+ # _ensure_alternating(messages)
128
+
129
+ # 1) 覆写 trimmed.json(原子)
130
+ store.save_trimmed(messages)
131
+
132
+ # 2) 追加最近一轮到 archive.jsonl(可选)
133
+ if not archive_last_turn:
134
+ return
135
+
136
+ # 从尾部向前找最近的一对 (user, assistant)
137
+ pair = None
138
+ for i in range(len(messages) - 2, -1, -1):
139
+ if (
140
+ messages[i]["role"] == "user"
141
+ and i + 1 < len(messages)
142
+ and messages[i + 1]["role"] == "assistant"
143
+ ):
144
+ pair = (messages[i]["content"], messages[i + 1]["content"])
145
+ break
146
+
147
+ if pair:
148
+ u, a = pair
149
+ store.append_archive("user", u)
150
+ store.append_archive("assistant", a)
151
+ # 若没有找到成对(比如你在生成前就调用了 persist),就只写 trimmed,不归档