Spaces:

ZehaoLiu
/

QwenChat

Running

App Files Files Community

MarshallCN commited on Oct 22, 2025

Commit

95b305d

0 Parent(s):

Clean initial commit without large files

Browse files

Files changed (15) hide show

.gitignore +12 -0
README.md +11 -0
app_awq.py +301 -0
app_gguf.py +37 -0
build.sh +5 -0
gguf.py +201 -0
msgs/msgs_Qwen/20251021-184023-718e4d/archive.jsonl +2 -0
msgs/msgs_Qwen/20251021-184023-718e4d/trimmed.json +14 -0
msgs/msgs_Qwen/20251021-184507-1b3177/archive.jsonl +10 -0
msgs/msgs_Qwen/20251021-184507-1b3177/trimmed.json +46 -0
msgs/msgs_Qwen/20251021-185141-2e0843/archive.jsonl +20 -0
msgs/msgs_Qwen/20251021-185141-2e0843/trimmed.json +78 -0
requirements.txt +4 -0
requirements_awq.txt +14 -0
utils.py +151 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+# Jupyter
+**/.ipynb_checkpoints/
+.ipynb_*            # any hidden Jupyter aux files like .ipynb_foo
+# Python cache/bytecode
+**/__pycache__/
+*.py[cod]
+*$py.class
+/old/
+/old/*
+models/
+models/*

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: "Qwen2.5 Chat"
+emoji: "🧠"
+colorFrom: "indigo"
+colorTo: "red"
+sdk: "gradio"
+sdk_version: "4.44.0"
+app_file: "app_gguf.py"
+pinned: false
+Script path: "build.sh"
+---

app_awq.py ADDED Viewed

	@@ -0,0 +1,301 @@

+# === Qwen 2.5 0.5B =========================
+import os
+os.environ.setdefault("TORCHDYNAMO_DISABLE", "1")
+os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1")
+os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
+import logging
+for name in ("accelerate", "accelerate.utils", "accelerate.utils.modeling"):
+    logging.getLogger(name).setLevel(logging.ERROR)
+import gradio as gr
+from gradio.themes.utils import fonts
+import uuid
+from pathlib import Path
+import json
+from autoawq import AutoAWQForCausalLM
+from transformers import AutoTokenizer
+import torch
+from datetime import datetime, timezone
+from typing import List, Dict, Tuple, Optional
+from utils import render, trim_by_tokens, mk_msg_dir, _as_dir, msg2hist, persist_messages
+# local_dir = r"C:\Users\c1052689\hug_models\Qwen2.5Coder1_5B_Instruct"
+# local_dir = r"C:\Users\c1052689\hug_models\Qwen2.5_0.5B_Instruct_GPTQ_Int4"
+REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4"
+tok = AutoTokenizer.from_pretrained(REPO_ID, use_fast=True, trust_remote_code=True)
+model = AutoAWQForCausalLM.from_quantized(
+    REPO_ID,
+    trust_remote_code=True,
+    safetensors=True,
+    device_map=None,   # CPU Space
+)
+model.to("cpu")
+tok.pad_token = tok.eos_token
+tok.padding_side = "left"
+model.config.pad_token_id = tok.eos_token_id
+model.generation_config.pad_token_id = tok.eos_token_id
+pipe = pipeline("text-generation", model=model, tokenizer=tok, device=-1)
+MAX_CONTEXT = 2048 #8192
+GEN_BUDGET  = 256
+assistant_name = "Nova";
+user_name = "Marshall"
+persona = f"""
+- Your name is {assistant_name}.
+- Address the user as "{user_name}" when appropriate.
+- Do NOT prefix.
+- Output Markdown; code in fenced blocks with a language tag.
+- Answer concisely, but do return give empty feedback.
+""".strip()
+# ===============================================================
+# from __future__ import annotations
+theme = gr.themes.Soft(
+    font=[
+        fonts.Font("Segoe UI"),
+        fonts.Font("system-ui"),
+        fonts.Font("sans-serif"),
+    ],
+    font_mono=[
+        fonts.Font("Consolas"),
+        fonts.Font("ui-monospace"),
+        fonts.Font("monospace"),
+    ],
+)
+css = """
+#user_box textarea::-webkit-scrollbar { display: none; }      /* Chrome/Safari */
+#user_box textarea { scrollbar-width: none; -ms-overflow-style: none; } /* Firefox/Edge */
+"""
+GEN_KWARGS = dict(
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.9,
+    repetition_penalty=1.05,
+    max_context=MAX_CONTEXT,
+    max_new_tokens=GEN_BUDGET
+)
+BASE_MSG_DIR = Path("./msgs/msgs_Qwen")
+def export_messages_to_json(messages, msg_id):
+    base = Path("/data/exports") if Path("/data").exists() else Path("./exports")
+    base.mkdir(parents=True, exist_ok=True)
+    stamp = datetime.now().strftime("%Y%m%d-%H%M%S-")
+    fname = f"msgs_{stamp}.json"
+    path = base / fname
+    path.write_text(json.dumps(messages or [], ensure_ascii=False, indent=2), encoding="utf-8")
+    return str(path)  # 返回给 gr.File 的文件路径
+def on_click_download(messages, msg_id):
+    path = export_messages_to_json(messages, msg_id)
+    # 让隐藏的 gr.File 出现，并指向刚刚写出的文件
+    return gr.update(value=path, visible=True)
+# ============ Chat ============
+def chat_step(
+    user_prompt: str,
+    pipe,                     # transformers.pipeline
+    tok,                      # AutoTokenizer
+    messages: Optional[List[Dict[str, str]]] = None,
+    mode: str = "continue",   # "new" | "continue" | "load"
+    persona: Optional[str] = None,  # 新开会话时需要，
+    max_context: int = 8192,
+    max_new_tokens: int = 256,
+    store_dir: str | Path = "./msgs",
+    **gen_kwargs,             # 透传生成参数：do_sample/temperature/top_p/repetition_penalty 等
+) -> Tuple[str, List[Dict[str, str]], str]:
+    """
+    运行一轮对话但不保存。
+    返回: (reply, messages, user_content_this_turn)
+    """
+    if mode not in {"new", "continue", "load"}:
+        raise ValueError("mode 必须是 'new' | 'continue' | 'load'")
+    if mode == "new":
+        if not persona:
+            raise ValueError("mode='new' 时必须提供 persona")
+        messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
+    elif mode == "continue":
+        if not messages:
+            if persona:
+                # 没有现成会话但给了 persona，则视作新会话
+                messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
+                mode = "new"
+            else:
+                raise ValueError("mode='continue' 需要传入非空 messages，或改用 mode='new' 并提供 persona")
+        else:
+            messages.append({"role": "user", "content": user_prompt})
+    elif mode == "load":
+        messages = store.load_trimmed()
+        if not messages:
+            if not persona:
+                raise ValueError("磁盘没有可加载的会话，且未提供 persona 以新建��")
+            messages = [{"role": "system", "content": persona}, {"role": "user", "content": user_prompt.strip()}]
+            mode = "new"   # 实际上是新开
+        else:
+            messages.append({"role": "user", "content": user_prompt})
+    # 裁剪 → 渲染 → 生成
+    prompt_budget = max_context - max_new_tokens
+    messages = trim_by_tokens(tok, messages, prompt_budget)
+    text = render(tok, messages)
+    out = pipe(
+        text,
+        max_new_tokens=max_new_tokens,
+        return_full_text=False,
+        clean_up_tokenization_spaces=False,
+        **gen_kwargs,
+    )
+    reply = out[0]["generated_text"].strip()
+    # 追加 assistant，二次裁剪
+    messages.append({"role": "assistant", "content": reply})
+    messages = trim_by_tokens(tok, messages, prompt_budget)
+    return reply, messages, mode
+# ============ UI ============
+def ui_submit(user_input, messages, msg_id, sessions):
+    # 输入 user_input, 消息队列，msg id, sessions list
+    user_input = (user_input or "").strip()
+    chat_history = msg2hist(persona, messages)
+    if not user_input:
+        return gr.update(), messages, chat_history, "", msg_dir, gr.update(),
+    # 状态里一律存ID
+    msg_id = msg_id if msg_id else ""
+    new_session = (not messages)
+    # sessions = gr.update()
+    if new_session and not msg_id: #刚load界面没有任何msg_id
+        msg_id = mk_msg_dir(BASE_MSG_DIR)  # 用户创建msg_id./msgs/<ID>
+        sessions = list(sessions or []) + [msg_id]
+     #如果有msg_id但是没在sessions 里
+    if msg_id and msg_id not in (sessions or []):
+        sessions = list(sessions or []) + [msg_id]
+    sessions_update = gr.update(choices=sessions, value=msg_id)
+    if new_session:
+        reply, messages, mode = chat_step(
+            user_input, pipe, tok,
+            mode="new", persona=persona,
+            **GEN_KWARGS,
+        )
+    else:
+        reply, messages, mode = chat_step(
+            user_input, pipe, tok, persona=persona,
+            mode="continue", messages=messages,
+            **GEN_KWARGS,
+        )
+    if len(messages)>0:
+        msg_dir = _as_dir(BASE_MSG_DIR, msg_id)
+        persist_messages(messages, msg_dir, archive_last_turn=True)
+    chat_history = (chat_history or []) + [
+        {"role": "user", "content": user_input},
+        {"role": "assistant", "content": reply},
+    ]
+    return "", messages, chat_history, msg_id, sessions_update, sessions
+def _load_latest(msg_id: str) -> List[Dict[str, str]]:
+    p = Path(_as_dir(BASE_MSG_DIR, msg_id),"trimmed.json")
+    if p.exists():
+        messages = json.loads(p.read_text(encoding="utf-8"))
+        return messages
+def _init_sessions():
+    sessions = [p.name for p in BASE_MSG_DIR.iterdir() if p.is_dir()]  # 只用ID
+    if len(sessions)==0:
+        return gr.update(choices=[], value=None), [], "", [], []
+    else:
+        sessions.sort(reverse=True)
+        msg_id = sessions[0]
+        messages = _load_latest(msg_id)
+        chat_history = msg2hist(persona, messages)
+        sessions_update = gr.update(choices=sessions, value=msg_id)
+        return sessions_update, sessions, msg_id, messages, chat_history
+def load_session(session_list, sessions):
+    msg_id = session_list   # session_list is the selected msg_id in UI
+    messages = _load_latest(msg_id)
+    chat_history = msg2hist(persona, messages)
+    sessions_update = gr.update(choices=sessions, value=msg_id)
+    return msg_id, messages, chat_history, sessions_update
+def start_new_session(sessions):
+    msg_id = mk_msg_dir(BASE_MSG_DIR)
+    sessions = list(sessions or []) + [msg_id]
+    sessions_update = gr.update(choices=sessions, value=msg_id)
+    return [], [], "", msg_id, sessions_update, sessions  # 返回ID
+with gr.Blocks(theme=theme, css=css) as demo:
+    gr.Markdown("## 🧠 Qwen Chat")
+    with gr.Row():
+        with gr.Column(scale=3):
+            session_list = gr.Radio(choices=[], value=None, label="Conversations", interactive=True)
+            new_btn = gr.Button("New session", variant="secondary")
+            dl_btn = gr.Button("Download JSON", variant="secondary")
+            dl_file = gr.File(label="", interactive=False, visible=False, elem_id="dl-file")
+        with gr.Column(scale=9):
+            chat = gr.Chatbot(
+                label="Chat",
+                height=560,
+                render_markdown=True,
+                type="messages",
+                elem_id="chatpane",
+            )
+            user_box = gr.Textbox(
+                label="Your message",
+                placeholder="Type and press Enter…",
+                autofocus=True,
+                elem_id="user_box"
+            )
+            send = gr.Button("Send", variant="primary")
+    messages = gr.State([]) # with persona
+    chat_history  = gr.State([])  # without persona
+    msg_id = gr.State("")  # 当前会话
+    sessions = gr.State([])   # 所有 msg_id list
+    user_box.submit(
+        ui_submit,
+        inputs=[user_box, messages, msg_id, sessions],
+        outputs=[user_box, messages, chat, msg_id, session_list, sessions],
+    )
+    send.click(
+        ui_submit,
+        inputs=[user_box, messages, msg_id, sessions],
+        outputs=[user_box, messages, chat, msg_id, session_list, sessions],
+    )
+    new_btn.click(
+        start_new_session,
+        inputs=[sessions],
+        outputs=[messages, chat, user_box, msg_id, session_list, sessions],
+    )
+    session_list.change(load_session,
+                        inputs=[session_list, sessions], # session_list is msg_id here
+                        outputs=[msg_id, messages, chat, session_list]
+    )
+    dl_btn.click(
+        on_click_download,
+        inputs=[messages, msg_id],
+        outputs=[dl_file],
+    )
+    demo.load(_init_sessions, None,
+              outputs=[session_list, sessions, msg_id, messages, chat])
+demo.launch()

app_gguf.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# 1) download a small GGUF once into the Space cache
+REPO = "bartowski/Qwen2.5-0.5B-Instruct-GGUF"    # tiny + has chat_template
+FNAME = "qwen2.5-0.5b-instruct-q4_k_m.gguf"      # ~398MB (good for CPU)
+model_path = hf_hub_download(repo_id=REPO, filename=FNAME)
+# 强制离线（可选）：即使在线也只用本地缓存
+# os.environ["HF_HUB_OFFLINE"] = "1"
+# model_path =  r"./models/qwen25_0_5b/qwen2.5-0.5b-instruct-q4_k_m.gguf"
+# 2) init llama.cpp
+llm = Llama(model_path=model_path, n_ctx=4096, n_threads=4)  # adjust threads
+def chat_fn(history, message):
+    # history is list[dict(role, content)] with type='messages' in Gradio
+    messages = [{"role": m["role"], "content": m["content"]} for m in history] + [
+        {"role": "user", "content": message}
+    ]
+    # If chat_template is embedded (this model has it), llama.cpp will format for you:
+    out = llm.create_chat_completion(messages=messages, max_tokens=256, temperature=0.7)
+    reply = out["choices"][0]["message"]["content"]
+    history = history + [{"role": "user", "content": message},
+                         {"role": "assistant", "content": reply}]
+    return history, ""
+with gr.Blocks() as demo:
+    gr.Markdown("## Qwen 0.5B (GGUF, CPU)")
+    chat = gr.Chatbot(type="messages", height=500)
+    msg = gr.Textbox(placeholder="Ask me anything…", container=True)
+    send = gr.Button("Send", variant="primary")
+    msg.submit(chat_fn, [chat, msg], [chat, msg])
+    send.click(chat_fn, [chat, msg], [chat, msg])
+demo.launch()

build.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/usr/bin/env bash
+set -euxo pipefail
+export PIP_ONLY_BINARY=":all:"        # 强制只装二进制轮子，避免源码编译
+export HF_HUB_ENABLE_HF_TRANSFER=1
+pip install --no-cache-dir -r requirements.txt

gguf.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# app.py
+import os
+import json
+from pathlib import Path
+from datetime import datetime
+import gradio as gr
+from huggingface_hub import snapshot_download
+from ctransformers import AutoModelForCausalLM
+# ---------- Runtime knobs ----------
+os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
+REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
+GGUF_FILE = "qwen2.5-0.5b-instruct-q4_0.gguf"
+DEFAULT_CONTEXT = 4096
+DEFAULT_THREADS = int(os.getenv("NUM_THREADS", "2"))
+# ---------- Model download & load ----------
+def download_single_gguf(repo_id: str, filename: str) -> tuple[Path, str]:
+    local_dir = snapshot_download(
+        repo_id,
+        allow_patterns=[filename],          # ⭐ 只下载指定文件
+        local_dir="./models/qwen25_0_5b",   # 你也可以换成自己的目录
+        local_dir_use_symlinks=False        # ⭐ Windows 推荐关掉软链
+    )
+    return Path(local_dir), filename
+MODEL_DIR, MODEL_FILE = download_single_gguf(REPO_ID, GGUF_FILE)
+# 尝试自动识别架构（不传 model_type），老版本不行再回退 "qwen"
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        str(MODEL_DIR),
+        model_file=MODEL_FILE,
+        gpu_layers=0,                 # CPU-only
+        context_length=DEFAULT_CONTEXT,
+        threads=DEFAULT_THREADS,
+    )
+except Exception:
+    # 回退：有些 ctransformers 需要显式 model_type
+    model = AutoModelForCausalLM.from_pretrained(
+        str(MODEL_DIR),
+        model_file=MODEL_FILE,
+        model_type="qwen",            # 某些版本识别不到 qwen2，用 qwen 也能跑
+        gpu_layers=0,
+        context_length=DEFAULT_CONTEXT,
+        threads=DEFAULT_THREADS,
+    )
+# ---------- Qwen chat template (no tools) ----------
+def render_qwen(messages, add_generation_prompt=True) -> str:
+    """
+    Convert OpenAI-style messages to Qwen2.5 Instruct format:
+      <|im_start|>system ... <|im_end|>
+      <|im_start|>user ...   <|im_end|>
+      <|im_start|>assistant  (generation continues here)
+    """
+    # System prompt
+    if messages and messages[0].get("role") == "system":
+        sys_txt = messages[0]["content"]
+        rest = messages[1:]
+    else:
+        sys_txt = "You are Qwen, a helpful assistant."
+        rest = messages
+    parts = [f"<|im_start|>system\n{sys_txt}<|im_end|>\n"]
+    for m in rest:
+        role = m["role"]
+        if role not in ("user", "assistant"):
+            # ignore other roles for simplicity
+            continue
+        parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>\n")
+    if add_generation_prompt:
+        parts.append("<|im_start|>assistant\n")
+    return "".join(parts)
+STOP_TOKENS = ["<|im_end|>", "<|endoftext|>"]
+# ---------- Gradio callbacks ----------
+def ensure_system(messages, sys_prompt):
+    """Guarantee a system message at index 0."""
+    if not messages or messages[0].get("role") != "system":
+        return [{"role": "system", "content": sys_prompt}]
+    # make sure system content follows latest textbox
+    messages = messages.copy()
+    messages[0] = {"role": "system", "content": sys_prompt}
+    return messages
+def visible_chat(messages):
+    """Hide system from chat display."""
+    return [m for m in messages if m["role"] in ("user", "assistant")]
+def on_new_session(sys_prompt):
+    messages = [{"role": "system", "content": sys_prompt.strip()}]
+    return messages, visible_chat(messages)
+def on_send(user_text, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty):
+    user_text = (user_text or "").strip()
+    if not user_text:
+        return gr.update(), messages, visible_chat(messages)  # no change
+    # 1) make sure system is present/updated
+    messages = ensure_system(messages, sys_prompt.strip())
+    # 2) append user, render, generate
+    messages = messages + [{"role": "user", "content": user_text}]
+    prompt = render_qwen(messages, add_generation_prompt=True)
+    reply = model(
+        prompt,
+        temperature=float(temperature),
+        top_p=float(top_p),
+        max_new_tokens=int(max_new_tokens),
+        repetition_penalty=float(repetition_penalty),
+        stop=STOP_TOKENS,
+    ).strip()
+    # 3) append assistant
+    messages = messages + [{"role": "assistant", "content": reply}]
+    return "", messages, visible_chat(messages)
+def on_download(messages):
+    # Save current conversation (including system) as JSON and return file path
+    out_dir = Path("./exports")
+    out_dir.mkdir(parents=True, exist_ok=True)
+    fname = datetime.utcnow().strftime("chat_%Y%m%dT%H%M%SZ.json")
+    fpath = out_dir / fname
+    with fpath.open("w", encoding="utf-8") as f:
+        json.dump(messages, f, ensure_ascii=False, indent=2)
+    return str(fpath)
+# ---------- UI ----------
+with gr.Blocks(title="Qwen 0.5B (CPU, GGUF via ctransformers)") as demo:
+    gr.Markdown("## 🧠 Qwen 0.5B Instruct (CPU · GGUF · ctransformers)")
+    with gr.Row():
+        with gr.Column(scale=3):
+            sys_prompt = gr.Textbox(
+                label="System prompt",
+                value=(
+                    "You are Qwen, a helpful assistant named Nova. "
+                    "Use concise British English. Output Markdown."
+                ),
+                lines=6,
+                show_label=True,
+            )
+            new_btn = gr.Button("New session", variant="secondary")
+            dl_btn = gr.Button("Download JSON", variant="secondary")
+            dl_file = gr.File(label="", visible=True)
+            with gr.Accordion("Generation settings", open=False):
+                temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="temperature")
+                top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="top_p")
+                max_new_tokens = gr.Slider(16, 1024, value=256, step=16, label="max_new_tokens")
+                repetition_penalty = gr.Slider(1.0, 2.0, value=1.07, step=0.01, label="repetition_penalty")
+        with gr.Column(scale=9):
+            chat = gr.Chatbot(
+                label="Chat",
+                height=560,
+                render_markdown=True,
+                type="messages",   # OpenAI-style: {role, content}
+            )
+            user_box = gr.Textbox(
+                label="Your message",
+                placeholder="Type and press Enter…",
+                autofocus=True,
+            )
+            send = gr.Button("Send", variant="primary")
+    # States
+    messages = gr.State([])
+    # Events
+    new_btn.click(
+        on_new_session,
+        inputs=[sys_prompt],
+        outputs=[messages, chat],
+    )
+    user_box.submit(
+        on_send,
+        inputs=[user_box, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty],
+        outputs=[user_box, messages, chat],
+    )
+    send.click(
+        on_send,
+        inputs=[user_box, messages, sys_prompt, temperature, top_p, max_new_tokens, repetition_penalty],
+        outputs=[user_box, messages, chat],
+    )
+    dl_btn.click(
+        on_download,
+        inputs=[messages],
+        outputs=[dl_file],
+    )
+if __name__ == "__main__":
+    # Queue enables concurrency + progress; share not needed on Spaces
+    demo.queue().launch()

msgs/msgs_Qwen/20251021-184023-718e4d/archive.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"ts": "2025-10-21T17:40:28.058026+00:00", "role": "user", "content": "give me a code that sort list in python"}
2	+ {"ts": "2025-10-21T17:40:28.072993+00:00", "role": "assistant", "content": "Here's a Python function to sort a list using the built-in `sorted()` function:\n```\ndef sort_list(lst):\n return sorted(lst)\n```"}

msgs/msgs_Qwen/20251021-184023-718e4d/trimmed.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "role": "system",
+    "content": "- Your name is Nova.\n- The user's name is Marshall.\n- Do NOT prefix with \"Q:\" or \"A:\"..\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
+  },
+  {
+    "role": "user",
+    "content": "give me a code that sort list in python"
+  },
+  {
+    "role": "assistant",
+    "content": "Here's a Python function to sort a list using the built-in `sorted()` function:\n```\ndef sort_list(lst):\n    return sorted(lst)\n```"
+  }
+]

msgs/msgs_Qwen/20251021-184507-1b3177/archive.jsonl ADDED Viewed

	@@ -0,0 +1,10 @@

+{"ts": "2025-10-21T17:45:14.346397+00:00", "role": "user", "content": "who are you?"}
+{"ts": "2025-10-21T17:45:14.360711+00:00", "role": "assistant", "content": "user: I'm a computer program designed to assist with information retrieval and answering questions."}
+{"ts": "2025-10-21T17:45:22.286771+00:00", "role": "user", "content": "ok"}
+{"ts": "2025-10-21T17:45:22.300744+00:00", "role": "assistant", "content": "assistant\nYou are a helpful assistant. If you have any other questions, please ask."}
+{"ts": "2025-10-21T17:51:29.255179+00:00", "role": "user", "content": "what is your name"}
+{"ts": "2025-10-21T17:51:29.269669+00:00", "role": "assistant", "content": "assistant\nI am called Nova."}
+{"ts": "2025-10-21T17:58:54.204216+00:00", "role": "user", "content": "hello"}
+{"ts": "2025-10-21T17:58:54.207656+00:00", "role": "assistant", "content": "Hello! How can I assist you today?"}
+{"ts": "2025-10-21T18:47:11.712140+00:00", "role": "user", "content": "what is sum of 890 and 334"}
+{"ts": "2025-10-21T18:47:11.724123+00:00", "role": "assistant", "content": "The sum of 890 and 334 is 1224."}

msgs/msgs_Qwen/20251021-184507-1b3177/trimmed.json ADDED Viewed

	@@ -0,0 +1,46 @@

+[
+  {
+    "role": "system",
+    "content": "- Your name is Nova. Refer to yourself as \"Nova\".\n- The user's name is Marshall. Address the user as \"Marshall\" when appropriate.\n- Do NOT prefix with \"Q:\" or \"A:\". Do NOT restate the user's question.\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
+  },
+  {
+    "role": "user",
+    "content": "who are you?"
+  },
+  {
+    "role": "assistant",
+    "content": "user: I'm a computer program designed to assist with information retrieval and answering questions."
+  },
+  {
+    "role": "user",
+    "content": "ok"
+  },
+  {
+    "role": "assistant",
+    "content": "assistant\nYou are a helpful assistant. If you have any other questions, please ask."
+  },
+  {
+    "role": "user",
+    "content": "what is your name"
+  },
+  {
+    "role": "assistant",
+    "content": "assistant\nI am called Nova."
+  },
+  {
+    "role": "user",
+    "content": "hello"
+  },
+  {
+    "role": "assistant",
+    "content": "Hello! How can I assist you today?"
+  },
+  {
+    "role": "user",
+    "content": "what is sum of 890 and 334"
+  },
+  {
+    "role": "assistant",
+    "content": "The sum of 890 and 334 is 1224."
+  }
+]

msgs/msgs_Qwen/20251021-185141-2e0843/archive.jsonl ADDED Viewed

	@@ -0,0 +1,20 @@

+{"ts": "2025-10-21T17:51:45.263874+00:00", "role": "user", "content": "what is your anme"}
+{"ts": "2025-10-21T17:51:45.279853+00:00", "role": "assistant", "content": "Nova"}
+{"ts": "2025-10-21T17:51:56.733888+00:00", "role": "user", "content": "who am i"}
+{"ts": "2025-10-21T17:51:56.749178+00:00", "role": "assistant", "content": "Nova"}
+{"ts": "2025-10-21T17:52:20.912518+00:00", "role": "user", "content": "who are you"}
+{"ts": "2025-10-21T17:52:20.927428+00:00", "role": "assistant", "content": "I am Nova."}
+{"ts": "2025-10-21T17:52:26.472739+00:00", "role": "user", "content": "who is the user"}
+{"ts": "2025-10-21T17:52:26.484718+00:00", "role": "assistant", "content": "The user is named Marshall."}
+{"ts": "2025-10-21T17:52:31.203857+00:00", "role": "user", "content": "who am i"}
+{"ts": "2025-10-21T17:52:31.216050+00:00", "role": "assistant", "content": "I am Nova."}
+{"ts": "2025-10-21T17:52:45.237195+00:00", "role": "user", "content": "I am the user"}
+{"ts": "2025-10-21T17:52:45.249482+00:00", "role": "assistant", "content": "You are correct. You are the user."}
+{"ts": "2025-10-21T17:52:50.746450+00:00", "role": "user", "content": "so who am i"}
+{"ts": "2025-10-21T17:52:50.749891+00:00", "role": "assistant", "content": "You are the user."}
+{"ts": "2025-10-21T17:52:56.134318+00:00", "role": "user", "content": "what is my name"}
+{"ts": "2025-10-21T17:52:56.146091+00:00", "role": "assistant", "content": "Your name is Nova."}
+{"ts": "2025-10-21T17:59:48.883744+00:00", "role": "user", "content": "can you give me a python code that combine two dataframe"}
+{"ts": "2025-10-21T17:59:48.886736+00:00", "role": "assistant", "content": "Sure, here's a Python code that combines two pandas DataFrames:\n```python\nimport pandas as pd\n\n# Create the first DataFrame\ndf1 = pd.DataFrame({'Column1': ['A', 'B', 'C'],\n                    'Column2': [1, 2, 3]})\n\n# Create the second DataFrame\ndf2 = pd.DataFrame({'Column1': [4, 5],\n                    'Column2': [4.0, 5.0]})\n\n# Combine the two DataFrames using the 'left' parameter\nresult = pd.concat([df1, df2], axis=1)\n```\n\nThe resulting `result` DataFrame will have two columns and two rows, where the first row contains the data from the left DataFrame and the second row contains the data from the right DataFrame."}
+{"ts": "2025-10-21T18:00:31.364693+00:00", "role": "user", "content": "can you give me a python code that combine two dataframe"}
+{"ts": "2025-10-21T18:00:31.367685+00:00", "role": "assistant", "content": "Certainly! Here's a Python code that combines two pandas DataFrames:\n\n```python\nimport pandas as pd\n\n# Sample data for the first DataFrame (column 1)\ndata_frame_1 = pd.DataFrame({'Column1': ['A', 'B', 'C', 'D'], 'Column2': [1, 2, 3, 4]})\n\n# Sample data for the second DataFrame (column 1)\ndata_frame_2 = pd.DataFrame({'Column1': ['E', 'F', 'G', 'H'], 'Column2': [5, 6, 7, 8]})\n\n# Combining the two DataFrames\ncombined_df = pd.concat([data_frame_1, data_frame_2], axis=0)\n\nprint(combined_df)\n```\n\nThis code will output:\n\n```\n   Column1 Column2\n0      A       1\n1      B       2\n2      C       3\n3    D       4\n4      E       5\n5      F       6\n6      G       7\n7    H       8\n```\n\nYou can replace the sample data with your own data and it will work correctly."}

msgs/msgs_Qwen/20251021-185141-2e0843/trimmed.json ADDED Viewed

	@@ -0,0 +1,78 @@

+[
+  {
+    "role": "system",
+    "content": "- Your name is Nova. Refer to yourself as \"Nova\".\n- The user's name is Marshall. Address the user as \"Marshall\" when appropriate.\n- Do NOT prefix with \"Q:\" or \"A:\". Do NOT restate the user's question.\n- Output Markdown; code in fenced blocks with a language tag.\n- Answer concisely, but do return give empty feedback."
+  },
+  {
+    "role": "user",
+    "content": "what is your anme"
+  },
+  {
+    "role": "assistant",
+    "content": "Nova"
+  },
+  {
+    "role": "user",
+    "content": "who am i"
+  },
+  {
+    "role": "assistant",
+    "content": "Nova"
+  },
+  {
+    "role": "user",
+    "content": "who are you"
+  },
+  {
+    "role": "assistant",
+    "content": "I am Nova."
+  },
+  {
+    "role": "user",
+    "content": "who is the user"
+  },
+  {
+    "role": "assistant",
+    "content": "The user is named Marshall."
+  },
+  {
+    "role": "user",
+    "content": "who am i"
+  },
+  {
+    "role": "assistant",
+    "content": "I am Nova."
+  },
+  {
+    "role": "user",
+    "content": "I am the user"
+  },
+  {
+    "role": "assistant",
+    "content": "You are correct. You are the user."
+  },
+  {
+    "role": "user",
+    "content": "so who am i"
+  },
+  {
+    "role": "assistant",
+    "content": "You are the user."
+  },
+  {
+    "role": "user",
+    "content": "what is my name"
+  },
+  {
+    "role": "assistant",
+    "content": "Your name is Nova."
+  },
+  {
+    "role": "user",
+    "content": "can you give me a python code that combine two dataframe"
+  },
+  {
+    "role": "assistant",
+    "content": "Certainly! Here's a Python code that combines two pandas DataFrames:\n\n```python\nimport pandas as pd\n\n# Sample data for the first DataFrame (column 1)\ndata_frame_1 = pd.DataFrame({'Column1': ['A', 'B', 'C', 'D'], 'Column2': [1, 2, 3, 4]})\n\n# Sample data for the second DataFrame (column 1)\ndata_frame_2 = pd.DataFrame({'Column1': ['E', 'F', 'G', 'H'], 'Column2': [5, 6, 7, 8]})\n\n# Combining the two DataFrames\ncombined_df = pd.concat([data_frame_1, data_frame_2], axis=0)\n\nprint(combined_df)\n```\n\nThis code will output:\n\n```\n   Column1 Column2\n0      A       1\n1      B       2\n2      C       3\n3    D       4\n4      E       5\n5      F       6\n6      G       7\n7    H       8\n```\n\nYou can replace the sample data with your own data and it will work correctly."
+  }
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==5.49.1
+huggingface_hub>=0.23
+orjson
+llama-cpp-python==0.2.90

requirements_awq.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+--extra-index-url https://download.pytorch.org/whl/cpu
+pip>=24.2
+setuptools>=68
+wheel
+numpy<2               # 避免部分包对 numpy 2.0 不兼容
+torch==2.4.1
+transformers==4.44.2
+accelerate>=0.33.0
+safetensors>=0.4.3
+autoawq==0.2.7        # 这个版本在CPU+旧torch更稳
+gradio==5.49.1
+huggingface_hub>=0.23.0

utils.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# from __future__ import annotations
+from pathlib import Path
+import uuid
+from datetime import datetime, timezone
+import json, os
+from typing import List, Dict, Tuple, Optional
+# ============ 工具函数 ============
+def mk_msg_dir(BASE_MSG_DIR) -> str:
+    m_id = datetime.now().strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:6]
+    Path(BASE_MSG_DIR, m_id).mkdir(parents=True, exist_ok=True)
+    return m_id  # 只返回 ID
+def _as_dir(BASE_MSG_DIR, m_id: str) -> str:
+    # 统一把传入值规整为 ./msgs/<ID>
+    return Path(BASE_MSG_DIR, m_id)
+def msg2hist(persona, msg):
+    chat_history = []
+    if msg != None:
+        if len(msg)>0:
+            chat_history = msg.copy()                 # 外层列表浅拷
+            chat_history[0] = msg[0].copy()           # 这个字典单独拷
+            chat_history[0]['content'] = chat_history[0]['content'][len(persona):]
+    return chat_history
+def render(tok, messages: List[Dict[str, str]]) -> str:
+    """按 chat_template 渲染成最终提示词文本（不分词）。"""
+    return tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+def _ensure_alternating(messages):
+    if not messages:
+        return
+    if messages[0]["role"] != "user":
+        raise ValueError("messages[0] 必须是 'user'（你的模板要求从 user 开始）")
+    for i, m in enumerate(messages):
+        expect_user = (i % 2 == 0)
+        if (m["role"] == "user") != expect_user:
+            raise ValueError(f"对话必须严格交替 user/assistant，在索引 {i} 处发现 {m['role']}")
+def trim_by_tokens(tok, messages, prompt_budget):
+    """
+    只保留 messages[0]（persona 的 user）+ 一个“从奇数索引开始的后缀”，
+    用二分法找到能放下的最长后缀。这样可保证交替不被破坏。
+    """
+    if not messages:
+        return []
+    # _ensure_alternating(messages)
+    # 只有 persona 这一条时，直接返回
+    if len(messages) == 1:
+        return messages
+    # 允许的后缀起点：奇数索引（index 1,3,5,... 都是 assistant），
+    # 这样拼接到 index0(user) 后才能保持交替。
+    cand_idx = [k for k in range(1, len(messages)) if k % 2 == 1]
+    # 如果任何也放不下，就只留 persona
+    best = [messages[0]]
+    # 二分：起点越靠前 → 保留消息越多 → token 越大（单调）
+    lo, hi = 0, len(cand_idx) - 1
+    while lo <= hi:
+        mid = (lo + hi) // 2
+        k = cand_idx[mid]
+        candidate = [messages[0]] + messages[k:]
+        toks = len(tok(tok.apply_chat_template(candidate, tokenize=False),
+                       add_special_tokens=False).input_ids)
+        if toks <= prompt_budget:
+            best = candidate     # 能放下：尝试保留更多（向左走）
+            hi = mid - 1
+        else:
+            lo = mid + 1         # 放不下：丢更多旧消息（向右走）
+    return best
+# ============ 原子写 可能会和onedrive同步冲突============
+# def atomic_write_json(path: Path, data) -> None:
+#     tmp = path.with_suffix(path.suffix + ".tmp")
+#     with open(tmp, "w", encoding="utf-8") as f:
+#         json.dump(data, f, ensure_ascii=False, indent=2)
+#         f.flush()
+#         os.fsync(f.fileno())
+#     os.replace(tmp, path)  # 同目录原子替换
+# 直接覆盖
+def write_json_overwrite(path: Path, data) -> None:
+    with open(path, "w", encoding="utf-8", newline="\n") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+# ============ 存储层 ============
+class MsgStore:
+    def __init__(self, base_dir: str | Path = "./msgs"):
+        self.base = Path(base_dir)
+        self.base.mkdir(parents=True, exist_ok=True)
+        self.archive = self.base / "archive.jsonl"  # 只追加
+        self.trimmed = self.base / "trimmed.json"   # 当前上下文
+        if not self.archive.exists():
+            self.archive.write_text("", encoding="utf-8")
+        if not self.trimmed.exists():
+            self.trimmed.write_text("[]", encoding="utf-8")
+    def load_trimmed(self) -> List[Dict[str, str]]:
+        try:
+            return json.loads(self.trimmed.read_text(encoding="utf-8"))
+        except Exception:
+            return []
+    def save_trimmed(self, messages: List[Dict[str, str]]) -> None:
+        write_json_overwrite(self.trimmed, messages)
+    def append_archive(self, role: str, content: str, meta: dict | None = None) -> None:
+        rec = {"ts": datetime.now(timezone.utc).isoformat(), "role": role, "content": content}
+        if meta: rec["meta"] = meta
+        with open(self.archive, "a", encoding="utf-8") as f:
+            f.write(json.dumps(rec, ensure_ascii=False) + "\n")
+            f.flush(); os.fsync(f.fileno())
+# ============ 显式保存（手动调用才落盘） ============
+def persist_messages(
+    messages: List[Dict[str, str]],
+    store_dir: str | Path = "./msgs",
+    archive_last_turn: bool = True,
+) -> None:
+    store = MsgStore(store_dir)
+    # _ensure_alternating(messages)
+    # 1) 覆写 trimmed.json（原子）
+    store.save_trimmed(messages)
+    # 2) 追加最近一轮到 archive.jsonl（可选）
+    if not archive_last_turn:
+        return
+    # 从尾部向前找最近的一对 (user, assistant)
+    pair = None
+    for i in range(len(messages) - 2, -1, -1):
+        if (
+            messages[i]["role"] == "user"
+            and i + 1 < len(messages)
+            and messages[i + 1]["role"] == "assistant"
+        ):
+            pair = (messages[i]["content"], messages[i + 1]["content"])
+            break
+    if pair:
+        u, a = pair
+        store.append_archive("user", u)
+        store.append_archive("assistant", a)
+    # 若没有找到成对（比如你在生成前就调用了 persist），就只写 trimmed，不归档