Spaces:
Running on Zero
Running on Zero
| import gc | |
| import importlib.metadata as importlib_metadata | |
| import os | |
| import re | |
| import site | |
| import subprocess | |
| import sys | |
| from threading import Lock, Thread | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| from huggingface_hub import HfApi | |
| SYSTEM_PROMPT = "你是肉糖生,一个接地气的中文时政分析者。风格:结论先行,再用结构化分析展开;敢于质疑主流叙事,不和稀泥;用类比和现实例子把复杂问题讲透;语气直率但逻辑严密。回答时先给核心判断,再分层拆解,最后给出预测或建议。直接给出分析,不要先描述用户的问题或你的计划。" | |
| MAX_TOKENS = 1024 | |
| TEMPERATURE = 0.7 | |
| TOP_P = 0.9 | |
| MODEL_CANDIDATES = [ | |
| ("4B Phase 10 Think-SFT (recommended) · bobber/routangseng-phase10-think-sft", "bobber/routangseng-phase10-think-sft"), | |
| ("0.8B Phase 11 Hot-Take · bobber/routangseng-0.8b-hottake", "bobber/routangseng-0.8b-hottake"), | |
| ("4B Phase 9 SFT (386 clean v2) · bobber/routangseng-phase9-sft", "bobber/routangseng-phase9-sft"), | |
| ("4B Phase 8C LoRA GRPO · bobber/routangseng-grpo-4b-phase8c", "bobber/routangseng-grpo-4b-phase8c"), | |
| ("4B Phase 8 GRPO (Heuristic) · bobber/routangseng-grpo-4b-phase8", "bobber/routangseng-grpo-4b-phase8"), | |
| ("4B Phase 8 SFT (Format Fix) · bobber/routangseng-phase8-sft", "bobber/routangseng-phase8-sft"), | |
| ("4B Phase 7B (BERT GRPO) · bobber/routangseng-grpo-4b-phase7b", "bobber/routangseng-grpo-4b-phase7b"), | |
| ("4B Phase 5 (recommended) · bobber/routangseng-voice-phase5-4b", "bobber/routangseng-voice-phase5-4b"), | |
| ("4B Phase 6 · bobber/routangseng-grpo-4b-6b", "bobber/routangseng-grpo-4b-6b"), | |
| ("4B Phase 4 · bobber/routangseng-voice-4b", "bobber/routangseng-voice-4b"), | |
| ("4B GRPO 6A · bobber/routangseng-grpo-4b-calibration", "bobber/routangseng-grpo-4b-calibration"), | |
| ("0.8B Phase 4 · bobber/routangseng-voice-0.8b", "bobber/routangseng-voice-0.8b"), | |
| ("0.8B Base · huihui-ai/Huihui-Qwen3.5-0.8B-abliterated", "huihui-ai/Huihui-Qwen3.5-0.8B-abliterated"), | |
| ("2B Base · huihui-ai/Huihui-Qwen3.5-2B-abliterated", "huihui-ai/Huihui-Qwen3.5-2B-abliterated"), | |
| ("4B Base · huihui-ai/Huihui-Qwen3.5-4B-abliterated", "huihui-ai/Huihui-Qwen3.5-4B-abliterated"), | |
| ] | |
| DEFAULT_MODEL_LABEL = MODEL_CANDIDATES[0][0] | |
| _model = None | |
| _tokenizer = None | |
| _current_model_id = None | |
| _lock = Lock() | |
| _bootstrap_error = None | |
| def patch_hf_hub_compat(): | |
| import huggingface_hub | |
| import huggingface_hub.dataclasses as hf_dataclasses | |
| from huggingface_hub import constants | |
| if not hasattr(huggingface_hub, "is_offline_mode"): | |
| def is_offline_mode() -> bool: | |
| return bool(getattr(constants, "HF_HUB_OFFLINE", False)) | |
| huggingface_hub.is_offline_mode = is_offline_mode | |
| if not hasattr(hf_dataclasses, "validate_typed_dict"): | |
| def validate_typed_dict(typed_dict_cls, values): | |
| if values is None: | |
| return | |
| if not isinstance(values, dict): | |
| raise TypeError(f"Expected dict-like values for {typed_dict_cls}, got {type(values).__name__}") | |
| allowed = getattr(typed_dict_cls, "__annotations__", None) or {} | |
| if allowed: | |
| unknown = [k for k in values.keys() if k not in allowed] | |
| if unknown: | |
| raise TypeError( | |
| f"Unexpected keys for {getattr(typed_dict_cls, '__name__', typed_dict_cls)}: {unknown}" | |
| ) | |
| hf_dataclasses.validate_typed_dict = validate_typed_dict | |
| real_version = importlib_metadata.version | |
| def patched_version(name: str) -> str: | |
| if name in {"huggingface-hub", "huggingface_hub"}: | |
| return "1.3.0" | |
| return real_version(name) | |
| importlib_metadata.version = patched_version | |
| def ensure_transformers_main(): | |
| global _bootstrap_error | |
| if _bootstrap_error is not None: | |
| raise RuntimeError(_bootstrap_error) | |
| patch_hf_hub_compat() | |
| try: | |
| import transformers # noqa: F401 | |
| return | |
| except Exception: | |
| pass | |
| try: | |
| subprocess.check_call([ | |
| sys.executable, | |
| "-m", | |
| "pip", | |
| "install", | |
| "--user", | |
| "--no-deps", | |
| "https://github.com/huggingface/transformers/archive/refs/heads/main.zip", | |
| ]) | |
| site.addsitedir(site.getusersitepackages()) | |
| patch_hf_hub_compat() | |
| import transformers # noqa: F401 | |
| except Exception as e: | |
| _bootstrap_error = f"Transformers bootstrap failed: {e}" | |
| raise | |
| def repo_has_weights(repo_id: str) -> bool: | |
| try: | |
| api = HfApi() | |
| files = list(api.list_repo_tree(repo_id, repo_type="model")) | |
| wanted = ( | |
| "model.safetensors", | |
| "model.safetensors.index.json", | |
| "model-00001-of-00002.safetensors", | |
| ) | |
| return any(f.path.endswith(wanted) for f in files) | |
| except Exception: | |
| return False | |
| def get_model_options(): | |
| options = [(label, repo_id) for label, repo_id in MODEL_CANDIDATES if repo_has_weights(repo_id)] | |
| return options or MODEL_CANDIDATES[:1] | |
| MODEL_OPTIONS = get_model_options() | |
| MODEL_LABEL_TO_ID = dict(MODEL_OPTIONS) | |
| MODEL_LABELS = list(MODEL_LABEL_TO_ID.keys()) | |
| DEFAULT_MODEL_LABEL = MODEL_LABELS[0] | |
| ensure_transformers_main() | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer # noqa: E402 | |
| def unload_model(): | |
| global _model, _tokenizer, _current_model_id | |
| _model = None | |
| _tokenizer = None | |
| _current_model_id = None | |
| gc.collect() | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| def load_model(model_id): | |
| global _model, _tokenizer, _current_model_id | |
| if _model is not None and _current_model_id == model_id: | |
| return _model, _tokenizer | |
| with _lock: | |
| if _model is not None and _current_model_id == model_id: | |
| return _model, _tokenizer | |
| if _model is not None and _current_model_id != model_id: | |
| unload_model() | |
| _tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False) | |
| _model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| trust_remote_code=False, | |
| ) | |
| _model.eval() | |
| _current_model_id = model_id | |
| return _model, _tokenizer | |
| def strip_meta_planning(text: str) -> str: | |
| """Strip meta-planning prefixes and <think> blocks from model output. | |
| The model sometimes produces internal planning text like "用户问..." before | |
| the actual analysis. This function finds the first substantive opener and | |
| strips everything before it. It also removes <think>...</think> blocks. | |
| """ | |
| # Remove <think>...</think> blocks (greedy, handles multiline) | |
| text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip() | |
| # Remove dangling </think> tags (common with 0.8B models) | |
| text = re.sub(r"</think>", "", text).strip() | |
| text = re.sub(r"<think>", "", text).strip() | |
| # Openers that signal the start of real content | |
| openers = [ | |
| "先说结论", | |
| "核心判断", | |
| "结论先行", | |
| "直接说", | |
| "第一", | |
| "短期", | |
| "长期", | |
| "行,但", | |
| "能恢复", | |
| "不会", | |
| "不能", | |
| "可以", | |
| "会的", | |
| "对的", | |
| "没错", | |
| "问题的本质", | |
| "这个问题", | |
| "本质上", | |
| "关键在于", | |
| "简单说", | |
| "一句话", | |
| "答案是", | |
| ] | |
| # Find the earliest opener | |
| earliest_pos = len(text) | |
| for opener in openers: | |
| pos = text.find(opener) | |
| if pos != -1 and pos < earliest_pos: | |
| earliest_pos = pos | |
| if earliest_pos < len(text): | |
| text = text[earliest_pos:] | |
| return text.strip() | |
| def chat_fn(message, history, model_label, enable_thinking=False, strip_meta=False): | |
| model_id = MODEL_LABEL_TO_ID.get(model_label, MODEL_LABEL_TO_ID[DEFAULT_MODEL_LABEL]) | |
| model, tokenizer = load_model(model_id) | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for item in history: | |
| role = item.get("role") | |
| content = item.get("content") | |
| if role in {"user", "assistant"} and content: | |
| messages.append({"role": role, "content": content}) | |
| messages.append({"role": "user", "content": message}) | |
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=enable_thinking) | |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generate_kwargs = dict( | |
| **inputs, | |
| max_new_tokens=MAX_TOKENS, | |
| do_sample=True, | |
| temperature=TEMPERATURE, | |
| top_p=TOP_P, | |
| streamer=streamer, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generate_kwargs) | |
| thread.start() | |
| partial = "" | |
| for token in streamer: | |
| partial += token | |
| yield strip_meta_planning(partial) if strip_meta else partial | |
| thread.join() | |
| CSS = """ | |
| .gradio-container { max-width: 980px !important; } | |
| footer { display: none !important; } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo: | |
| gr.Markdown( | |
| f""" | |
| # 🥩 肉糖生 Chat | |
| **Multi-model PyTorch chat** · ZeroGPU · 结论先行,不和稀泥 | |
| <small>Available now: {len(MODEL_LABELS)} published model(s). Default: `{MODEL_LABEL_TO_ID[DEFAULT_MODEL_LABEL]}`</small> | |
| """ | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| choices=MODEL_LABELS, | |
| value=DEFAULT_MODEL_LABEL, | |
| label="Model", | |
| info="Only published torch checkpoints with weights are shown.", | |
| ) | |
| enable_thinking_checkbox = gr.Checkbox( | |
| value=False, | |
| label="Enable thinking", | |
| info="Allow model to reason in <think> blocks before answering (may show '用户问...' in thinking mode)", | |
| ) | |
| strip_meta_checkbox = gr.Checkbox( | |
| value=False, | |
| label="Strip meta-planning", | |
| info="Remove '用户问...' prefix and <think> blocks from output (useful when thinking is enabled)", | |
| ) | |
| gr.ChatInterface( | |
| fn=chat_fn, | |
| type="messages", | |
| additional_inputs=[model_dropdown, enable_thinking_checkbox, strip_meta_checkbox], | |
| examples=[ | |
| ["白领工作都被AI不断代替,现在学生还在用传统方式积累白领知识,这不是学了个寂寞嘛?", DEFAULT_MODEL_LABEL, False, False], | |
| ["为什么很多国家的年轻人不想生孩子?这个趋势能逆转吗?", DEFAULT_MODEL_LABEL, False, False], | |
| ["中美关系未来五年会怎么走?从结构性矛盾的角度讲讲。", DEFAULT_MODEL_LABEL, False, False], | |
| ["AI发展很快,大家也拼命跟上快速发展,白领工作时间变长可是失业率上升工资也没有上涨,到底AI的快速发展谁受益?", DEFAULT_MODEL_LABEL, False, False], | |
| ], | |
| fill_height=True, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |