MarshallCN commited on
Commit ·
95b305d
0
Parent(s):
Clean initial commit without large files
Browse files- .gitignore +12 -0
- README.md +11 -0
- app_awq.py +301 -0
- app_gguf.py +37 -0
- build.sh +5 -0
- gguf.py +201 -0
- msgs/msgs_Qwen/20251021-184023-718e4d/archive.jsonl +2 -0
- msgs/msgs_Qwen/20251021-184023-718e4d/trimmed.json +14 -0
- msgs/msgs_Qwen/20251021-184507-1b3177/archive.jsonl +10 -0
- msgs/msgs_Qwen/20251021-184507-1b3177/trimmed.json +46 -0
- msgs/msgs_Qwen/20251021-185141-2e0843/archive.jsonl +20 -0
- msgs/msgs_Qwen/20251021-185141-2e0843/trimmed.json +78 -0
- requirements.txt +4 -0
- requirements_awq.txt +14 -0
- utils.py +151 -0
.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Jupyter
|
| 2 |
+
**/.ipynb_checkpoints/
|
| 3 |
+
.ipynb_* # any hidden Jupyter aux files like .ipynb_foo
|
| 4 |
+
|
| 5 |
+
# Python cache/bytecode
|
| 6 |
+
**/__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
/old/
|
| 10 |
+
/old/*
|
| 11 |
+
models/
|
| 12 |
+
models/*
|
README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: "Qwen2.5 Chat"
|
| 3 |
+
emoji: "🧠"
|
| 4 |
+
colorFrom: "indigo"
|
| 5 |
+
colorTo: "red"
|
| 6 |
+
sdk: "gradio"
|
| 7 |
+
sdk_version: "4.44.0"
|
| 8 |
+
app_file: "app_gguf.py"
|
| 9 |
+
pinned: false
|
| 10 |
+
Script path: "build.sh"
|
| 11 |
+
---
|
app_awq.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# === Qwen 2.5 0.5B =========================
|
| 2 |
+
import os
|
| 3 |
+
os.environ.setdefault("TORCHDYNAMO_DISABLE", "1")
|
| 4 |
+
os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1")
|
| 5 |
+
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
|
| 6 |
+
import logging
|
| 7 |
+
for name in ("accelerate", "accelerate.utils", "accelerate.utils.modeling"):
|
| 8 |
+
logging.getLogger(name).setLevel(logging.ERROR)
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
from gradio.themes.utils import fonts
|
| 12 |
+
import uuid
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import json
|
| 15 |
+
from autoawq import AutoAWQForCausalLM
|
| 16 |
+
from transformers import AutoTokenizer
|
| 17 |
+
import torch
|
| 18 |
+
from datetime import datetime, timezone
|
| 19 |
+
from typing import List, Dict, Tuple, Optional
|
| 20 |
+
from utils import render, trim_by_tokens, mk_msg_dir, _as_dir, msg2hist, persist_messages
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# local_dir = r"C:\Users\c1052689\hug_models\Qwen2.5Coder1_5B_Instruct"
|
| 24 |
+
# local_dir = r"C:\Users\c1052689\hug_models\Qwen2.5_0.5B_Instruct_GPTQ_Int4"
|
| 25 |
+
REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4"
|
| 26 |
+
tok = AutoTokenizer.from_pretrained(REPO_ID, use_fast=True, trust_remote_code=True)
|
| 27 |
+
model = AutoAWQForCausalLM.from_quantized(
|
| 28 |
+
REPO_ID,
|
| 29 |
+
trust_remote_code=True,
|
| 30 |
+
safetensors=True,
|
| 31 |
+
device_map=None, # CPU Space
|
| 32 |
+
)
|
| 33 |
+
model.to("cpu")
|
| 34 |
+
tok.pad_token = tok.eos_token
|
| 35 |
+
tok.padding_side = "left"
|
| 36 |
+
model.config.pad_token_id = tok.eos_token_id
|
| 37 |
+
model.generation_config.pad_token_id = tok.eos_token_id
|
| 38 |
+
|
| 39 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tok, device=-1)
|
| 40 |
+
|
| 41 |
+
MAX_CONTEXT = 2048 #8192
|
| 42 |
+
GEN_BUDGET = 256
|
| 43 |
+
assistant_name = "Nova";
|
| 44 |
+
user_name = "Marshall"
|
| 45 |
+
persona = f"""
|
| 46 |
+
- Your name is {assistant_name}.
|
| 47 |
+
- Address the user as "{user_name}" when appropriate.
|
| 48 |
+
- Do NOT prefix.
|
| 49 |
+
- Output Markdown; code in fenced blocks with a language tag.
|
| 50 |
+
- Answer concisely, but do return give empty feedback.
|
| 51 |
+
""".strip()
|
| 52 |
+
# ===============================================================
|
| 53 |
+
# from __future__ import annotations
|
| 54 |
+
|
| 55 |
+
theme = gr.themes.Soft(
|
| 56 |
+
font=[
|
| 57 |
+
fonts.Font("Segoe UI"),
|
| 58 |
+
fonts.Font("system-ui"),
|
| 59 |
+
fonts.Font("sans-serif"),
|
| 60 |
+
],
|
| 61 |
+
font_mono=[
|
| 62 |
+
fonts.Font("Consolas"),
|
| 63 |
+
fonts.Font("ui-monospace"),
|
| 64 |
+
fonts.Font("monospace"),
|
| 65 |
+
],
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
css = """
|
| 69 |
+
#user_box textarea::-webkit-scrollbar { display: none; } /* Chrome/Safari */
|
| 70 |
+
#user_box textarea { scrollbar-width: none; -ms-overflow-style: none; } /* Firefox/Edge */
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
GEN_KWARGS = dict(
|
| 74 |
+
do_sample=True,
|
| 75 |
+
temperature=0.6,
|
| 76 |
+
top_p=0.9,
|
| 77 |
+
repetition_penalty=1.05,
|
| 78 |
+
max_context=MAX_CONTEXT,
|
| 79 |
+
max_new_tokens=GEN_BUDGET
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
BASE_MSG_DIR = Path("./msgs/msgs_Qwen")
|
| 83 |
+
|
| 84 |
+
def export_messages_to_json(messages, msg_id):
|
| 85 |
+
base = Path("/data/exports") if Path("/data").exists() else Path("./exports")
|
| 86 |
+
base.mkdir(parents=True, exist_ok=True)
|
| 87 |
+
stamp = datetime.now().strftime("%Y%m%d-%H%M%S-")
|
| 88 |
+
fname = f"msgs_{stamp}.json"
|
| 89 |
+
path = base / fname
|
| 90 |
+
path.write_text(json.dumps(messages or [], ensure_ascii=False, indent=2), encoding="utf-8")
|
| 91 |
+
return str(path) # 返回给 gr.File 的文件路径
|
| 92 |
+
|
| 93 |
+
def on_click_download(messages, msg_id):
|
| 94 |
+
path = export_messages_to_json(messages, msg_id)
|
| 95 |
+
# 让隐藏的 gr.File 出现,并指向刚刚写出的文件
|
| 96 |
+
return gr.update(value=path, visible=True)
|
| 97 |
+
|
| 98 |
+
# ============ Chat ============
|
| 99 |
+
def chat_step(
|
| 100 |
+
user_prompt: str,
|
| 101 |
+
pipe, # transformers.pipeline
|
| 102 |
+
tok, # AutoTokenizer
|
| 103 |
+
messages: Optional[List[Dict[str, str]]] = None,
|
| 104 |
+
mode: str = "continue", # "new" | "continue" | "load"
|
| 105 |
+
persona: Optional[str] = None, # 新开会话时需要,
|
| 106 |
+
max_context: int = 8192,
|
| 107 |
+
max_new_tokens: int = 256,
|
| 108 |
+
store_dir: str | Path = "./msgs",
|
| 109 |
+
**gen_kwargs, # 透传生成参数:do_sample/temperature/top_p/repetition_penalty 等
|
| 110 |
+
) -> Tuple[str, List[Dict[str, str]], str]:
|
| 111 |
+
"""
|
| 112 |
+
运行一轮对话但不保存。
|
| 113 |
+
返回: (reply, messages, user_content_this_turn)
|
| 114 |
+
"""
|
| 115 |
+
if mode not in {"new", "continue", "load"}:
|
| 116 |
+
raise ValueError("mode 必须是 'new' | 'continue' | 'load'")
|
| 117 |
+
|
| 118 |
+
if mode == "new":
|
| 119 |
+
if not persona:
|
| 120 |
+
raise ValueError("mode='new' 时必须提供 persona")
|
| 121 |
+
messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
|
| 122 |
+
|
| 123 |
+
elif mode == "continue":
|
| 124 |
+
if not messages:
|
| 125 |
+
if persona:
|
| 126 |
+
# 没有现成会话但给了 persona,则视作新会话
|
| 127 |
+
messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
|
| 128 |
+
mode = "new"
|
| 129 |
+
else:
|
| 130 |
+
raise ValueError("mode='continue' 需要传入非空 messages,或改用 mode='new' 并提供 persona")
|
| 131 |
+
else:
|
| 132 |
+
messages.append({"role": "user", "content": user_prompt})
|
| 133 |
+
|
| 134 |
+
elif mode == "load":
|
| 135 |
+
messages = store.load_trimmed()
|
| 136 |
+
if not messages:
|
| 137 |
+
if not persona:
|
| 138 |
+
raise ValueError("磁盘没有可加载的会话,且未提供 persona 以新建��")
|
| 139 |
+
messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
|
| 140 |
+
mode = "new" # 实际上是新开
|
| 141 |
+
else:
|
| 142 |
+
messages.append({"role": "user", "content": user_prompt})
|
| 143 |
+
|
| 144 |
+
# 裁剪 → 渲染 → 生成
|
| 145 |
+
prompt_budget = max_context - max_new_tokens
|
| 146 |
+
messages = trim_by_tokens(tok, messages, prompt_budget)
|
| 147 |
+
text = render(tok, messages)
|
| 148 |
+
out = pipe(
|
| 149 |
+
text,
|
| 150 |
+
max_new_tokens=max_new_tokens,
|
| 151 |
+
return_full_text=False,
|
| 152 |
+
clean_up_tokenization_spaces=False,
|
| 153 |
+
**gen_kwargs,
|
| 154 |
+
)
|
| 155 |
+
reply = out[0]["generated_text"].strip()
|
| 156 |
+
|
| 157 |
+
# 追加 assistant,二次裁剪
|
| 158 |
+
messages.append({"role": "assistant", "content": reply})
|
| 159 |
+
messages = trim_by_tokens(tok, messages, prompt_budget)
|
| 160 |
+
return reply, messages, mode
|
| 161 |
+
|
| 162 |
+
# ============ UI ============
|
| 163 |
+
|
| 164 |
+
def ui_submit(user_input, messages, msg_id, sessions):
|
| 165 |
+
# 输入 user_input, 消息队列,msg id, sessions list
|
| 166 |
+
user_input = (user_input or "").strip()
|
| 167 |
+
chat_history = msg2hist(persona, messages)
|
| 168 |
+
if not user_input:
|
| 169 |
+
return gr.update(), messages, chat_history, "", msg_dir, gr.update(),
|
| 170 |
+
|
| 171 |
+
# 状态里一律存ID
|
| 172 |
+
msg_id = msg_id if msg_id else ""
|
| 173 |
+
|
| 174 |
+
new_session = (not messages)
|
| 175 |
+
# sessions = gr.update()
|
| 176 |
+
|
| 177 |
+
if new_session and not msg_id: #刚load界面没有任何msg_id
|
| 178 |
+
msg_id = mk_msg_dir(BASE_MSG_DIR) # 用户创建msg_id./msgs/<ID>
|
| 179 |
+
sessions = list(sessions or []) + [msg_id]
|
| 180 |
+
|
| 181 |
+
#如果有msg_id但是没在sessions 里
|
| 182 |
+
if msg_id and msg_id not in (sessions or []):
|
| 183 |
+
sessions = list(sessions or []) + [msg_id]
|
| 184 |
+
|
| 185 |
+
sessions_update = gr.update(choices=sessions, value=msg_id)
|
| 186 |
+
|
| 187 |
+
if new_session:
|
| 188 |
+
reply, messages, mode = chat_step(
|
| 189 |
+
user_input, pipe, tok,
|
| 190 |
+
mode="new", persona=persona,
|
| 191 |
+
**GEN_KWARGS,
|
| 192 |
+
)
|
| 193 |
+
else:
|
| 194 |
+
reply, messages, mode = chat_step(
|
| 195 |
+
user_input, pipe, tok, persona=persona,
|
| 196 |
+
mode="continue", messages=messages,
|
| 197 |
+
**GEN_KWARGS,
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
if len(messages)>0:
|
| 201 |
+
msg_dir = _as_dir(BASE_MSG_DIR, msg_id)
|
| 202 |
+
persist_messages(messages, msg_dir, archive_last_turn=True)
|
| 203 |
+
|
| 204 |
+
chat_history = (chat_history or []) + [
|
| 205 |
+
{"role": "user", "content": user_input},
|
| 206 |
+
{"role": "assistant", "content": reply},
|
| 207 |
+
]
|
| 208 |
+
return "", messages, chat_history, msg_id, sessions_update, sessions
|
| 209 |
+
|
| 210 |
+
def _load_latest(msg_id: str) -> List[Dict[str, str]]:
|
| 211 |
+
p = Path(_as_dir(BASE_MSG_DIR, msg_id),"trimmed.json")
|
| 212 |
+
if p.exists():
|
| 213 |
+
messages = json.loads(p.read_text(encoding="utf-8"))
|
| 214 |
+
return messages
|
| 215 |
+
|
| 216 |
+
def _init_sessions():
|
| 217 |
+
sessions = [p.name for p in BASE_MSG_DIR.iterdir() if p.is_dir()] # 只用ID
|
| 218 |
+
if len(sessions)==0:
|
| 219 |
+
return gr.update(choices=[], value=None), [], "", [], []
|
| 220 |
+
else:
|
| 221 |
+
sessions.sort(reverse=True)
|
| 222 |
+
msg_id = sessions[0]
|
| 223 |
+
messages = _load_latest(msg_id)
|
| 224 |
+
chat_history = msg2hist(persona, messages)
|
| 225 |
+
sessions_update = gr.update(choices=sessions, value=msg_id)
|
| 226 |
+
return sessions_update, sessions, msg_id, messages, chat_history
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def load_session(session_list, sessions):
|
| 230 |
+
msg_id = session_list # session_list is the selected msg_id in UI
|
| 231 |
+
messages = _load_latest(msg_id)
|
| 232 |
+
chat_history = msg2hist(persona, messages)
|
| 233 |
+
sessions_update = gr.update(choices=sessions, value=msg_id)
|
| 234 |
+
return msg_id, messages, chat_history, sessions_update
|
| 235 |
+
|
| 236 |
+
def start_new_session(sessions):
|
| 237 |
+
msg_id = mk_msg_dir(BASE_MSG_DIR)
|
| 238 |
+
sessions = list(sessions or []) + [msg_id]
|
| 239 |
+
sessions_update = gr.update(choices=sessions, value=msg_id)
|
| 240 |
+
return [], [], "", msg_id, sessions_update, sessions # 返回ID
|
| 241 |
+
|
| 242 |
+
with gr.Blocks(theme=theme, css=css) as demo:
|
| 243 |
+
gr.Markdown("## 🧠 Qwen Chat")
|
| 244 |
+
|
| 245 |
+
with gr.Row():
|
| 246 |
+
with gr.Column(scale=3):
|
| 247 |
+
session_list = gr.Radio(choices=[], value=None, label="Conversations", interactive=True)
|
| 248 |
+
new_btn = gr.Button("New session", variant="secondary")
|
| 249 |
+
dl_btn = gr.Button("Download JSON", variant="secondary")
|
| 250 |
+
dl_file = gr.File(label="", interactive=False, visible=False, elem_id="dl-file")
|
| 251 |
+
with gr.Column(scale=9):
|
| 252 |
+
chat = gr.Chatbot(
|
| 253 |
+
label="Chat",
|
| 254 |
+
height=560,
|
| 255 |
+
render_markdown=True,
|
| 256 |
+
type="messages",
|
| 257 |
+
elem_id="chatpane",
|
| 258 |
+
)
|
| 259 |
+
user_box = gr.Textbox(
|
| 260 |
+
label="Your message",
|
| 261 |
+
placeholder="Type and press Enter…",
|
| 262 |
+
autofocus=True,
|
| 263 |
+
elem_id="user_box"
|
| 264 |
+
)
|
| 265 |
+
send = gr.Button("Send", variant="primary")
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
messages = gr.State([]) # with persona
|
| 270 |
+
chat_history = gr.State([]) # without persona
|
| 271 |
+
msg_id = gr.State("") # 当前会话
|
| 272 |
+
sessions = gr.State([]) # 所有 msg_id list
|
| 273 |
+
|
| 274 |
+
user_box.submit(
|
| 275 |
+
ui_submit,
|
| 276 |
+
inputs=[user_box, messages, msg_id, sessions],
|
| 277 |
+
outputs=[user_box, messages, chat, msg_id, session_list, sessions],
|
| 278 |
+
)
|
| 279 |
+
send.click(
|
| 280 |
+
ui_submit,
|
| 281 |
+
inputs=[user_box, messages, msg_id, sessions],
|
| 282 |
+
outputs=[user_box, messages, chat, msg_id, session_list, sessions],
|
| 283 |
+
)
|
| 284 |
+
new_btn.click(
|
| 285 |
+
start_new_session,
|
| 286 |
+
inputs=[sessions],
|
| 287 |
+
outputs=[messages, chat, user_box, msg_id, session_list, sessions],
|
| 288 |
+
)
|
| 289 |
+
session_list.change(load_session,
|
| 290 |
+
inputs=[session_list, sessions], # session_list is msg_id here
|
| 291 |
+
outputs=[msg_id, messages, chat, session_list]
|
| 292 |
+
)
|
| 293 |
+
dl_btn.click(
|
| 294 |
+
on_click_download,
|
| 295 |
+
inputs=[messages, msg_id],
|
| 296 |
+
outputs=[dl_file],
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
demo.load(_init_sessions, None,
|
| 300 |
+
outputs=[session_list, sessions, msg_id, messages, chat])
|
| 301 |
+
demo.launch()
|
app_gguf.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from huggingface_hub import hf_hub_download
|
| 3 |
+
from llama_cpp import Llama
|
| 4 |
+
|
| 5 |
+
# 1) download a small GGUF once into the Space cache
|
| 6 |
+
REPO = "bartowski/Qwen2.5-0.5B-Instruct-GGUF" # tiny + has chat_template
|
| 7 |
+
FNAME = "qwen2.5-0.5b-instruct-q4_k_m.gguf" # ~398MB (good for CPU)
|
| 8 |
+
model_path = hf_hub_download(repo_id=REPO, filename=FNAME)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# 强制离线(可选):即使在线也只用本地缓存
|
| 12 |
+
# os.environ["HF_HUB_OFFLINE"] = "1"
|
| 13 |
+
# model_path = r"./models/qwen25_0_5b/qwen2.5-0.5b-instruct-q4_k_m.gguf"
|
| 14 |
+
# 2) init llama.cpp
|
| 15 |
+
llm = Llama(model_path=model_path, n_ctx=4096, n_threads=4) # adjust threads
|
| 16 |
+
|
| 17 |
+
def chat_fn(history, message):
|
| 18 |
+
# history is list[dict(role, content)] with type='messages' in Gradio
|
| 19 |
+
messages = [{"role": m["role"], "content": m["content"]} for m in history] + [
|
| 20 |
+
{"role": "user", "content": message}
|
| 21 |
+
]
|
| 22 |
+
# If chat_template is embedded (this model has it), llama.cpp will format for you:
|
| 23 |
+
out = llm.create_chat_completion(messages=messages, max_tokens=256, temperature=0.7)
|
| 24 |
+
reply = out["choices"][0]["message"]["content"]
|
| 25 |
+
history = history + [{"role": "user", "content": message},
|
| 26 |
+
{"role": "assistant", "content": reply}]
|
| 27 |
+
return history, ""
|
| 28 |
+
|
| 29 |
+
with gr.Blocks() as demo:
|
| 30 |
+
gr.Markdown("## Qwen 0.5B (GGUF, CPU)")
|
| 31 |
+
chat = gr.Chatbot(type="messages", height=500)
|
| 32 |
+
msg = gr.Textbox(placeholder="Ask me anything…", container=True)
|
| 33 |
+
send = gr.Button("Send", variant="primary")
|
| 34 |
+
msg.submit(chat_fn, [chat, msg], [chat, msg])
|
| 35 |
+
send.click(chat_fn, [chat, msg], [chat, msg])
|
| 36 |
+
|
| 37 |
+
demo.launch()
|
build.sh
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euxo pipefail
|
| 3 |
+
export PIP_ONLY_BINARY=":all:" # 强制只装二进制轮子,避免源码编译
|
| 4 |
+
export HF_HUB_ENABLE_HF_TRANSFER=1
|
| 5 |
+
pip install --no-cache-dir -r requirements.txt
|
gguf.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
from huggingface_hub import snapshot_download
|
| 9 |
+
from ctransformers import AutoModelForCausalLM
|
| 10 |
+
|
| 11 |
+
# ---------- Runtime knobs ----------
|
| 12 |
+
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
|
| 13 |
+
|
| 14 |
+
REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
|
| 15 |
+
GGUF_FILE = "qwen2.5-0.5b-instruct-q4_0.gguf"
|
| 16 |
+
DEFAULT_CONTEXT = 4096
|
| 17 |
+
DEFAULT_THREADS = int(os.getenv("NUM_THREADS", "2"))
|
| 18 |
+
|
| 19 |
+
# ---------- Model download & load ----------
|
| 20 |
+
def download_single_gguf(repo_id: str, filename: str) -> tuple[Path, str]:
|
| 21 |
+
local_dir = snapshot_download(
|
| 22 |
+
repo_id,
|
| 23 |
+
allow_patterns=[filename], # ⭐ 只下载指定文件
|
| 24 |
+
local_dir="./models/qwen25_0_5b", # 你也可以换成自己的目录
|
| 25 |
+
local_dir_use_symlinks=False # ⭐ Windows 推荐关掉软链
|
| 26 |
+
)
|
| 27 |
+
return Path(local_dir), filename
|
| 28 |
+
|
| 29 |
+
MODEL_DIR, MODEL_FILE = download_single_gguf(REPO_ID, GGUF_FILE)
|
| 30 |
+
|
| 31 |
+
# 尝试自动识别架构(不传 model_type),老版本不行再回退 "qwen"
|
| 32 |
+
try:
|
| 33 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 34 |
+
str(MODEL_DIR),
|
| 35 |
+
model_file=MODEL_FILE,
|
| 36 |
+
gpu_layers=0, # CPU-only
|
| 37 |
+
context_length=DEFAULT_CONTEXT,
|
| 38 |
+
threads=DEFAULT_THREADS,
|
| 39 |
+
)
|
| 40 |
+
except Exception:
|
| 41 |
+
# 回退:有些 ctransformers 需要显式 model_type
|
| 42 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 43 |
+
str(MODEL_DIR),
|
| 44 |
+
model_file=MODEL_FILE,
|
| 45 |
+
model_type="qwen", # 某些版本识别不到 qwen2,用 qwen 也能跑
|
| 46 |
+
gpu_layers=0,
|
| 47 |
+
context_length=DEFAULT_CONTEXT,
|
| 48 |
+
threads=DEFAULT_THREADS,
|
| 49 |
+
)
|
| 50 |
+
# ---------- Qwen chat template (no tools) ----------
|
| 51 |
+
def render_qwen(messages, add_generation_prompt=True) -> str:
|
| 52 |
+
"""
|
| 53 |
+
Convert OpenAI-style messages to Qwen2.5 Instruct format:
|
| 54 |
+
<|im_start|>system ... <|im_end|>
|
| 55 |
+
<|im_start|>user ... <|im_end|>
|
| 56 |
+
<|im_start|>assistant (generation continues here)
|
| 57 |
+
"""
|
| 58 |
+
# System prompt
|
| 59 |
+
if messages and messages[0].get("role") == "system":
|
| 60 |
+
sys_txt = messages[0]["content"]
|
| 61 |
+
rest = messages[1:]
|
| 62 |
+
else:
|
| 63 |
+
sys_txt = "You are Qwen, a helpful assistant."
|
| 64 |
+
rest = messages
|
| 65 |
+
|
| 66 |
+
parts = [f"<|im_start|>system\n{sys_txt}<|im_end|>\n"]
|
| 67 |
+
for m in rest:
|
| 68 |
+
role = m["role"]
|
| 69 |
+
if role not in ("user", "assistant"):
|
| 70 |
+
# ignore other roles for simplicity
|
| 71 |
+
continue
|
| 72 |
+
parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>\n")
|
| 73 |
+
|
| 74 |
+
if add_generation_prompt:
|
| 75 |
+
parts.append("<|im_start|>assistant\n")
|
| 76 |
+
return "".join(parts)
|
| 77 |
+
|
| 78 |
+
STOP_TOKENS = ["<|im_end|>", "<|endoftext|>"]
|
| 79 |
+
|
| 80 |
+
# ---------- Gradio callbacks ----------
|
| 81 |
+
def ensure_system(messages, sys_prompt):
|
| 82 |
+
"""Guarantee a system message at index 0."""
|
| 83 |
+
if not messages or messages[0].get("role") != "system":
|
| 84 |
+
return [{"role": "system", "content": sys_prompt}]
|
| 85 |
+
# make sure system content follows latest textbox
|
| 86 |
+
messages = messages.copy()
|
| 87 |
+
messages[0] = {"role": "system", "content": sys_prompt}
|
| 88 |
+
return messages
|
| 89 |
+
|
| 90 |
+
def visible_chat(messages):
|
| 91 |
+
"""Hide system from chat display."""
|
| 92 |
+
return [m for m in messages if m["role"] in ("user", "assistant")]
|
| 93 |
+
|
| 94 |
+
def on_new_session(sys_prompt):
|
| 95 |
+
messages = [{"role": "system", "content": sys_prompt.strip()}]
|
| 96 |
+
return messages, visible_chat(messages)
|
| 97 |
+
|
| 98 |
+
def on_send(user_text, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty):
|
| 99 |
+
user_text = (user_text or "").strip()
|
| 100 |
+
if not user_text:
|
| 101 |
+
return gr.update(), messages, visible_chat(messages) # no change
|
| 102 |
+
|
| 103 |
+
# 1) make sure system is present/updated
|
| 104 |
+
messages = ensure_system(messages, sys_prompt.strip())
|
| 105 |
+
|
| 106 |
+
# 2) append user, render, generate
|
| 107 |
+
messages = messages + [{"role": "user", "content": user_text}]
|
| 108 |
+
prompt = render_qwen(messages, add_generation_prompt=True)
|
| 109 |
+
|
| 110 |
+
reply = model(
|
| 111 |
+
prompt,
|
| 112 |
+
temperature=float(temperature),
|
| 113 |
+
top_p=float(top_p),
|
| 114 |
+
max_new_tokens=int(max_new_tokens),
|
| 115 |
+
repetition_penalty=float(repetition_penalty),
|
| 116 |
+
stop=STOP_TOKENS,
|
| 117 |
+
).strip()
|
| 118 |
+
|
| 119 |
+
# 3) append assistant
|
| 120 |
+
messages = messages + [{"role": "assistant", "content": reply}]
|
| 121 |
+
return "", messages, visible_chat(messages)
|
| 122 |
+
|
| 123 |
+
def on_download(messages):
|
| 124 |
+
# Save current conversation (including system) as JSON and return file path
|
| 125 |
+
out_dir = Path("./exports")
|
| 126 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 127 |
+
fname = datetime.utcnow().strftime("chat_%Y%m%dT%H%M%SZ.json")
|
| 128 |
+
fpath = out_dir / fname
|
| 129 |
+
with fpath.open("w", encoding="utf-8") as f:
|
| 130 |
+
json.dump(messages, f, ensure_ascii=False, indent=2)
|
| 131 |
+
return str(fpath)
|
| 132 |
+
|
| 133 |
+
# ---------- UI ----------
|
| 134 |
+
with gr.Blocks(title="Qwen 0.5B (CPU, GGUF via ctransformers)") as demo:
|
| 135 |
+
gr.Markdown("## 🧠 Qwen 0.5B Instruct (CPU · GGUF · ctransformers)")
|
| 136 |
+
|
| 137 |
+
with gr.Row():
|
| 138 |
+
with gr.Column(scale=3):
|
| 139 |
+
sys_prompt = gr.Textbox(
|
| 140 |
+
label="System prompt",
|
| 141 |
+
value=(
|
| 142 |
+
"You are Qwen, a helpful assistant named Nova. "
|
| 143 |
+
"Use concise British English. Output Markdown."
|
| 144 |
+
),
|
| 145 |
+
lines=6,
|
| 146 |
+
show_label=True,
|
| 147 |
+
)
|
| 148 |
+
new_btn = gr.Button("New session", variant="secondary")
|
| 149 |
+
dl_btn = gr.Button("Download JSON", variant="secondary")
|
| 150 |
+
dl_file = gr.File(label="", visible=True)
|
| 151 |
+
|
| 152 |
+
with gr.Accordion("Generation settings", open=False):
|
| 153 |
+
temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="temperature")
|
| 154 |
+
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="top_p")
|
| 155 |
+
max_new_tokens = gr.Slider(16, 1024, value=256, step=16, label="max_new_tokens")
|
| 156 |
+
repetition_penalty = gr.Slider(1.0, 2.0, value=1.07, step=0.01, label="repetition_penalty")
|
| 157 |
+
|
| 158 |
+
with gr.Column(scale=9):
|
| 159 |
+
chat = gr.Chatbot(
|
| 160 |
+
label="Chat",
|
| 161 |
+
height=560,
|
| 162 |
+
render_markdown=True,
|
| 163 |
+
type="messages", # OpenAI-style: {role, content}
|
| 164 |
+
)
|
| 165 |
+
user_box = gr.Textbox(
|
| 166 |
+
label="Your message",
|
| 167 |
+
placeholder="Type and press Enter…",
|
| 168 |
+
autofocus=True,
|
| 169 |
+
)
|
| 170 |
+
send = gr.Button("Send", variant="primary")
|
| 171 |
+
|
| 172 |
+
# States
|
| 173 |
+
messages = gr.State([])
|
| 174 |
+
|
| 175 |
+
# Events
|
| 176 |
+
new_btn.click(
|
| 177 |
+
on_new_session,
|
| 178 |
+
inputs=[sys_prompt],
|
| 179 |
+
outputs=[messages, chat],
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
user_box.submit(
|
| 183 |
+
on_send,
|
| 184 |
+
inputs=[user_box, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty],
|
| 185 |
+
outputs=[user_box, messages, chat],
|
| 186 |
+
)
|
| 187 |
+
send.click(
|
| 188 |
+
on_send,
|
| 189 |
+
inputs=[user_box, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty],
|
| 190 |
+
outputs=[user_box, messages, chat],
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
dl_btn.click(
|
| 194 |
+
on_download,
|
| 195 |
+
inputs=[messages],
|
| 196 |
+
outputs=[dl_file],
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
if __name__ == "__main__":
|
| 200 |
+
# Queue enables concurrency + progress; share not needed on Spaces
|
| 201 |
+
demo.queue().launch()
|
msgs/msgs_Qwen/20251021-184023-718e4d/archive.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"ts": "2025-10-21T17:40:28.058026+00:00", "role": "user", "content": "give me a code that sort list in python"}
|
| 2 |
+
{"ts": "2025-10-21T17:40:28.072993+00:00", "role": "assistant", "content": "Here's a Python function to sort a list using the built-in `sorted()` function:\n```\ndef sort_list(lst):\n return sorted(lst)\n```"}
|
msgs/msgs_Qwen/20251021-184023-718e4d/trimmed.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"role": "system",
|
| 4 |
+
"content": "- Your name is Nova.\n- The user's name is Marshall.\n- Do NOT prefix with \"Q:\" or \"A:\"..\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"role": "user",
|
| 8 |
+
"content": "give me a code that sort list in python"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"role": "assistant",
|
| 12 |
+
"content": "Here's a Python function to sort a list using the built-in `sorted()` function:\n```\ndef sort_list(lst):\n return sorted(lst)\n```"
|
| 13 |
+
}
|
| 14 |
+
]
|
msgs/msgs_Qwen/20251021-184507-1b3177/archive.jsonl
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"ts": "2025-10-21T17:45:14.346397+00:00", "role": "user", "content": "who are you?"}
|
| 2 |
+
{"ts": "2025-10-21T17:45:14.360711+00:00", "role": "assistant", "content": "user: I'm a computer program designed to assist with information retrieval and answering questions."}
|
| 3 |
+
{"ts": "2025-10-21T17:45:22.286771+00:00", "role": "user", "content": "ok"}
|
| 4 |
+
{"ts": "2025-10-21T17:45:22.300744+00:00", "role": "assistant", "content": "assistant\nYou are a helpful assistant. If you have any other questions, please ask."}
|
| 5 |
+
{"ts": "2025-10-21T17:51:29.255179+00:00", "role": "user", "content": "what is your name"}
|
| 6 |
+
{"ts": "2025-10-21T17:51:29.269669+00:00", "role": "assistant", "content": "assistant\nI am called Nova."}
|
| 7 |
+
{"ts": "2025-10-21T17:58:54.204216+00:00", "role": "user", "content": "hello"}
|
| 8 |
+
{"ts": "2025-10-21T17:58:54.207656+00:00", "role": "assistant", "content": "Hello! How can I assist you today?"}
|
| 9 |
+
{"ts": "2025-10-21T18:47:11.712140+00:00", "role": "user", "content": "what is sum of 890 and 334"}
|
| 10 |
+
{"ts": "2025-10-21T18:47:11.724123+00:00", "role": "assistant", "content": "The sum of 890 and 334 is 1224."}
|
msgs/msgs_Qwen/20251021-184507-1b3177/trimmed.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"role": "system",
|
| 4 |
+
"content": "- Your name is Nova. Refer to yourself as \"Nova\".\n- The user's name is Marshall. Address the user as \"Marshall\" when appropriate.\n- Do NOT prefix with \"Q:\" or \"A:\". Do NOT restate the user's question.\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"role": "user",
|
| 8 |
+
"content": "who are you?"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"role": "assistant",
|
| 12 |
+
"content": "user: I'm a computer program designed to assist with information retrieval and answering questions."
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"role": "user",
|
| 16 |
+
"content": "ok"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"role": "assistant",
|
| 20 |
+
"content": "assistant\nYou are a helpful assistant. If you have any other questions, please ask."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"role": "user",
|
| 24 |
+
"content": "what is your name"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"role": "assistant",
|
| 28 |
+
"content": "assistant\nI am called Nova."
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"role": "user",
|
| 32 |
+
"content": "hello"
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"role": "assistant",
|
| 36 |
+
"content": "Hello! How can I assist you today?"
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"role": "user",
|
| 40 |
+
"content": "what is sum of 890 and 334"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"role": "assistant",
|
| 44 |
+
"content": "The sum of 890 and 334 is 1224."
|
| 45 |
+
}
|
| 46 |
+
]
|
msgs/msgs_Qwen/20251021-185141-2e0843/archive.jsonl
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"ts": "2025-10-21T17:51:45.263874+00:00", "role": "user", "content": "what is your anme"}
|
| 2 |
+
{"ts": "2025-10-21T17:51:45.279853+00:00", "role": "assistant", "content": "Nova"}
|
| 3 |
+
{"ts": "2025-10-21T17:51:56.733888+00:00", "role": "user", "content": "who am i"}
|
| 4 |
+
{"ts": "2025-10-21T17:51:56.749178+00:00", "role": "assistant", "content": "Nova"}
|
| 5 |
+
{"ts": "2025-10-21T17:52:20.912518+00:00", "role": "user", "content": "who are you"}
|
| 6 |
+
{"ts": "2025-10-21T17:52:20.927428+00:00", "role": "assistant", "content": "I am Nova."}
|
| 7 |
+
{"ts": "2025-10-21T17:52:26.472739+00:00", "role": "user", "content": "who is the user"}
|
| 8 |
+
{"ts": "2025-10-21T17:52:26.484718+00:00", "role": "assistant", "content": "The user is named Marshall."}
|
| 9 |
+
{"ts": "2025-10-21T17:52:31.203857+00:00", "role": "user", "content": "who am i"}
|
| 10 |
+
{"ts": "2025-10-21T17:52:31.216050+00:00", "role": "assistant", "content": "I am Nova."}
|
| 11 |
+
{"ts": "2025-10-21T17:52:45.237195+00:00", "role": "user", "content": "I am the user"}
|
| 12 |
+
{"ts": "2025-10-21T17:52:45.249482+00:00", "role": "assistant", "content": "You are correct. You are the user."}
|
| 13 |
+
{"ts": "2025-10-21T17:52:50.746450+00:00", "role": "user", "content": "so who am i"}
|
| 14 |
+
{"ts": "2025-10-21T17:52:50.749891+00:00", "role": "assistant", "content": "You are the user."}
|
| 15 |
+
{"ts": "2025-10-21T17:52:56.134318+00:00", "role": "user", "content": "what is my name"}
|
| 16 |
+
{"ts": "2025-10-21T17:52:56.146091+00:00", "role": "assistant", "content": "Your name is Nova."}
|
| 17 |
+
{"ts": "2025-10-21T17:59:48.883744+00:00", "role": "user", "content": "can you give me a python code that combine two dataframe"}
|
| 18 |
+
{"ts": "2025-10-21T17:59:48.886736+00:00", "role": "assistant", "content": "Sure, here's a Python code that combines two pandas DataFrames:\n```python\nimport pandas as pd\n\n# Create the first DataFrame\ndf1 = pd.DataFrame({'Column1': ['A', 'B', 'C'],\n 'Column2': [1, 2, 3]})\n\n# Create the second DataFrame\ndf2 = pd.DataFrame({'Column1': [4, 5],\n 'Column2': [4.0, 5.0]})\n\n# Combine the two DataFrames using the 'left' parameter\nresult = pd.concat([df1, df2], axis=1)\n```\n\nThe resulting `result` DataFrame will have two columns and two rows, where the first row contains the data from the left DataFrame and the second row contains the data from the right DataFrame."}
|
| 19 |
+
{"ts": "2025-10-21T18:00:31.364693+00:00", "role": "user", "content": "can you give me a python code that combine two dataframe"}
|
| 20 |
+
{"ts": "2025-10-21T18:00:31.367685+00:00", "role": "assistant", "content": "Certainly! Here's a Python code that combines two pandas DataFrames:\n\n```python\nimport pandas as pd\n\n# Sample data for the first DataFrame (column 1)\ndata_frame_1 = pd.DataFrame({'Column1': ['A', 'B', 'C', 'D'], 'Column2': [1, 2, 3, 4]})\n\n# Sample data for the second DataFrame (column 1)\ndata_frame_2 = pd.DataFrame({'Column1': ['E', 'F', 'G', 'H'], 'Column2': [5, 6, 7, 8]})\n\n# Combining the two DataFrames\ncombined_df = pd.concat([data_frame_1, data_frame_2], axis=0)\n\nprint(combined_df)\n```\n\nThis code will output:\n\n```\n Column1 Column2\n0 A 1\n1 B 2\n2 C 3\n3 D 4\n4 E 5\n5 F 6\n6 G 7\n7 H 8\n```\n\nYou can replace the sample data with your own data and it will work correctly."}
|
msgs/msgs_Qwen/20251021-185141-2e0843/trimmed.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"role": "system",
|
| 4 |
+
"content": "- Your name is Nova. Refer to yourself as \"Nova\".\n- The user's name is Marshall. Address the user as \"Marshall\" when appropriate.\n- Do NOT prefix with \"Q:\" or \"A:\". Do NOT restate the user's question.\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"role": "user",
|
| 8 |
+
"content": "what is your anme"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"role": "assistant",
|
| 12 |
+
"content": "Nova"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"role": "user",
|
| 16 |
+
"content": "who am i"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"role": "assistant",
|
| 20 |
+
"content": "Nova"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"role": "user",
|
| 24 |
+
"content": "who are you"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"role": "assistant",
|
| 28 |
+
"content": "I am Nova."
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"role": "user",
|
| 32 |
+
"content": "who is the user"
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"role": "assistant",
|
| 36 |
+
"content": "The user is named Marshall."
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"role": "user",
|
| 40 |
+
"content": "who am i"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"role": "assistant",
|
| 44 |
+
"content": "I am Nova."
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"role": "user",
|
| 48 |
+
"content": "I am the user"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"role": "assistant",
|
| 52 |
+
"content": "You are correct. You are the user."
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"role": "user",
|
| 56 |
+
"content": "so who am i"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"role": "assistant",
|
| 60 |
+
"content": "You are the user."
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"role": "user",
|
| 64 |
+
"content": "what is my name"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"role": "assistant",
|
| 68 |
+
"content": "Your name is Nova."
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"role": "user",
|
| 72 |
+
"content": "can you give me a python code that combine two dataframe"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"role": "assistant",
|
| 76 |
+
"content": "Certainly! Here's a Python code that combines two pandas DataFrames:\n\n```python\nimport pandas as pd\n\n# Sample data for the first DataFrame (column 1)\ndata_frame_1 = pd.DataFrame({'Column1': ['A', 'B', 'C', 'D'], 'Column2': [1, 2, 3, 4]})\n\n# Sample data for the second DataFrame (column 1)\ndata_frame_2 = pd.DataFrame({'Column1': ['E', 'F', 'G', 'H'], 'Column2': [5, 6, 7, 8]})\n\n# Combining the two DataFrames\ncombined_df = pd.concat([data_frame_1, data_frame_2], axis=0)\n\nprint(combined_df)\n```\n\nThis code will output:\n\n```\n Column1 Column2\n0 A 1\n1 B 2\n2 C 3\n3 D 4\n4 E 5\n5 F 6\n6 G 7\n7 H 8\n```\n\nYou can replace the sample data with your own data and it will work correctly."
|
| 77 |
+
}
|
| 78 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.49.1
|
| 2 |
+
huggingface_hub>=0.23
|
| 3 |
+
orjson
|
| 4 |
+
llama-cpp-python==0.2.90
|
requirements_awq.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 2 |
+
|
| 3 |
+
pip>=24.2
|
| 4 |
+
setuptools>=68
|
| 5 |
+
wheel
|
| 6 |
+
numpy<2 # 避免部分包对 numpy 2.0 不兼容
|
| 7 |
+
|
| 8 |
+
torch==2.4.1
|
| 9 |
+
transformers==4.44.2
|
| 10 |
+
accelerate>=0.33.0
|
| 11 |
+
safetensors>=0.4.3
|
| 12 |
+
autoawq==0.2.7 # 这个版本在CPU+旧torch更稳
|
| 13 |
+
gradio==5.49.1
|
| 14 |
+
huggingface_hub>=0.23.0
|
utils.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from __future__ import annotations
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import uuid
|
| 4 |
+
from datetime import datetime, timezone
|
| 5 |
+
import json, os
|
| 6 |
+
from typing import List, Dict, Tuple, Optional
|
| 7 |
+
|
| 8 |
+
# ============ 工具函数 ============
|
| 9 |
+
def mk_msg_dir(BASE_MSG_DIR) -> str:
|
| 10 |
+
m_id = datetime.now().strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:6]
|
| 11 |
+
Path(BASE_MSG_DIR, m_id).mkdir(parents=True, exist_ok=True)
|
| 12 |
+
return m_id # 只返回 ID
|
| 13 |
+
|
| 14 |
+
def _as_dir(BASE_MSG_DIR, m_id: str) -> str:
|
| 15 |
+
# 统一把传入值规整为 ./msgs/<ID>
|
| 16 |
+
return Path(BASE_MSG_DIR, m_id)
|
| 17 |
+
|
| 18 |
+
def msg2hist(persona, msg):
|
| 19 |
+
chat_history = []
|
| 20 |
+
if msg != None:
|
| 21 |
+
if len(msg)>0:
|
| 22 |
+
chat_history = msg.copy() # 外层列表浅拷
|
| 23 |
+
chat_history[0] = msg[0].copy() # 这个字典单独拷
|
| 24 |
+
chat_history[0]['content'] = chat_history[0]['content'][len(persona):]
|
| 25 |
+
return chat_history
|
| 26 |
+
|
| 27 |
+
def render(tok, messages: List[Dict[str, str]]) -> str:
|
| 28 |
+
"""按 chat_template 渲染成最终提示词文本(不分词)。"""
|
| 29 |
+
return tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 30 |
+
|
| 31 |
+
def _ensure_alternating(messages):
|
| 32 |
+
if not messages:
|
| 33 |
+
return
|
| 34 |
+
if messages[0]["role"] != "user":
|
| 35 |
+
raise ValueError("messages[0] 必须是 'user'(你的模板要求从 user 开始)")
|
| 36 |
+
for i, m in enumerate(messages):
|
| 37 |
+
expect_user = (i % 2 == 0)
|
| 38 |
+
if (m["role"] == "user") != expect_user:
|
| 39 |
+
raise ValueError(f"对话必须严格交替 user/assistant,在索引 {i} 处发现 {m['role']}")
|
| 40 |
+
|
| 41 |
+
def trim_by_tokens(tok, messages, prompt_budget):
|
| 42 |
+
"""
|
| 43 |
+
只保留 messages[0](persona 的 user)+ 一个“从奇数索引开始的后缀”,
|
| 44 |
+
用二分法找到能放下的最长后缀。这样可保证交替不被破坏。
|
| 45 |
+
"""
|
| 46 |
+
if not messages:
|
| 47 |
+
return []
|
| 48 |
+
|
| 49 |
+
# _ensure_alternating(messages)
|
| 50 |
+
|
| 51 |
+
# 只有 persona 这一条时,直接返回
|
| 52 |
+
if len(messages) == 1:
|
| 53 |
+
return messages
|
| 54 |
+
|
| 55 |
+
# 允许的后缀起点:奇数索引(index 1,3,5,... 都是 assistant),
|
| 56 |
+
# 这样拼接到 index0(user) 后才能保持交替。
|
| 57 |
+
cand_idx = [k for k in range(1, len(messages)) if k % 2 == 1]
|
| 58 |
+
|
| 59 |
+
# 如果任何也放不下,就只留 persona
|
| 60 |
+
best = [messages[0]]
|
| 61 |
+
|
| 62 |
+
# 二分:起点越靠前 → 保留消息越多 → token 越大(单调)
|
| 63 |
+
lo, hi = 0, len(cand_idx) - 1
|
| 64 |
+
while lo <= hi:
|
| 65 |
+
mid = (lo + hi) // 2
|
| 66 |
+
k = cand_idx[mid]
|
| 67 |
+
candidate = [messages[0]] + messages[k:]
|
| 68 |
+
toks = len(tok(tok.apply_chat_template(candidate, tokenize=False),
|
| 69 |
+
add_special_tokens=False).input_ids)
|
| 70 |
+
if toks <= prompt_budget:
|
| 71 |
+
best = candidate # 能放下:尝试保留更多(向左走)
|
| 72 |
+
hi = mid - 1
|
| 73 |
+
else:
|
| 74 |
+
lo = mid + 1 # 放不下:丢更多旧消息(向右走)
|
| 75 |
+
|
| 76 |
+
return best
|
| 77 |
+
|
| 78 |
+
# ============ 原子写 可能会和onedrive同步冲突============
|
| 79 |
+
# def atomic_write_json(path: Path, data) -> None:
|
| 80 |
+
# tmp = path.with_suffix(path.suffix + ".tmp")
|
| 81 |
+
# with open(tmp, "w", encoding="utf-8") as f:
|
| 82 |
+
# json.dump(data, f, ensure_ascii=False, indent=2)
|
| 83 |
+
# f.flush()
|
| 84 |
+
# os.fsync(f.fileno())
|
| 85 |
+
# os.replace(tmp, path) # 同目录原子替换
|
| 86 |
+
|
| 87 |
+
# 直接覆盖
|
| 88 |
+
def write_json_overwrite(path: Path, data) -> None:
|
| 89 |
+
with open(path, "w", encoding="utf-8", newline="\n") as f:
|
| 90 |
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
| 91 |
+
|
| 92 |
+
# ============ 存储层 ============
|
| 93 |
+
class MsgStore:
|
| 94 |
+
def __init__(self, base_dir: str | Path = "./msgs"):
|
| 95 |
+
self.base = Path(base_dir)
|
| 96 |
+
self.base.mkdir(parents=True, exist_ok=True)
|
| 97 |
+
self.archive = self.base / "archive.jsonl" # 只追加
|
| 98 |
+
self.trimmed = self.base / "trimmed.json" # 当前上下文
|
| 99 |
+
if not self.archive.exists():
|
| 100 |
+
self.archive.write_text("", encoding="utf-8")
|
| 101 |
+
if not self.trimmed.exists():
|
| 102 |
+
self.trimmed.write_text("[]", encoding="utf-8")
|
| 103 |
+
|
| 104 |
+
def load_trimmed(self) -> List[Dict[str, str]]:
|
| 105 |
+
try:
|
| 106 |
+
return json.loads(self.trimmed.read_text(encoding="utf-8"))
|
| 107 |
+
except Exception:
|
| 108 |
+
return []
|
| 109 |
+
|
| 110 |
+
def save_trimmed(self, messages: List[Dict[str, str]]) -> None:
|
| 111 |
+
write_json_overwrite(self.trimmed, messages)
|
| 112 |
+
|
| 113 |
+
def append_archive(self, role: str, content: str, meta: dict | None = None) -> None:
|
| 114 |
+
rec = {"ts": datetime.now(timezone.utc).isoformat(), "role": role, "content": content}
|
| 115 |
+
if meta: rec["meta"] = meta
|
| 116 |
+
with open(self.archive, "a", encoding="utf-8") as f:
|
| 117 |
+
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
| 118 |
+
f.flush(); os.fsync(f.fileno())
|
| 119 |
+
|
| 120 |
+
# ============ 显式保存(手动调用才落盘) ============
|
| 121 |
+
def persist_messages(
|
| 122 |
+
messages: List[Dict[str, str]],
|
| 123 |
+
store_dir: str | Path = "./msgs",
|
| 124 |
+
archive_last_turn: bool = True,
|
| 125 |
+
) -> None:
|
| 126 |
+
store = MsgStore(store_dir)
|
| 127 |
+
# _ensure_alternating(messages)
|
| 128 |
+
|
| 129 |
+
# 1) 覆写 trimmed.json(原子)
|
| 130 |
+
store.save_trimmed(messages)
|
| 131 |
+
|
| 132 |
+
# 2) 追加最近一轮到 archive.jsonl(可选)
|
| 133 |
+
if not archive_last_turn:
|
| 134 |
+
return
|
| 135 |
+
|
| 136 |
+
# 从尾部向前找最近的一对 (user, assistant)
|
| 137 |
+
pair = None
|
| 138 |
+
for i in range(len(messages) - 2, -1, -1):
|
| 139 |
+
if (
|
| 140 |
+
messages[i]["role"] == "user"
|
| 141 |
+
and i + 1 < len(messages)
|
| 142 |
+
and messages[i + 1]["role"] == "assistant"
|
| 143 |
+
):
|
| 144 |
+
pair = (messages[i]["content"], messages[i + 1]["content"])
|
| 145 |
+
break
|
| 146 |
+
|
| 147 |
+
if pair:
|
| 148 |
+
u, a = pair
|
| 149 |
+
store.append_archive("user", u)
|
| 150 |
+
store.append_archive("assistant", a)
|
| 151 |
+
# 若没有找到成对(比如你在生成前就调用了 persist),就只写 trimmed,不归档
|