import gradio as gr import requests import threading import subprocess import time import os import base64 from pathlib import Path # ── 模型路径配置 ────────────────────────────────────────────── MODEL_DIR = "/home/user/app/models" MODEL_URL = ( "https://huggingface.co/HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive" "/resolve/main/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-IQ4_XS.gguf" ) MMPROJ_URL = ( "https://huggingface.co/HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive" "/resolve/main/mmproj-Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-f16.gguf" ) MODEL_PATH = os.path.join(MODEL_DIR, "model.gguf") MMPROJ_PATH = os.path.join(MODEL_DIR, "mmproj.gguf") SERVER_URL = "http://127.0.0.1:8080" server_ready = threading.Event() download_status = {"progress": "⏳ 正在初始化..."} # ── 下载 + 启动服务器 ────────────────────────────────────────── def download_file(url: str, dest: str, label: str): if os.path.exists(dest): download_status["progress"] = f"✅ {label} 已缓存,跳过下载" return download_status["progress"] = f"⬇️ 正在下载 {label}..." r = requests.get(url, stream=True) total = int(r.headers.get("content-length", 0)) done = 0 os.makedirs(os.path.dirname(dest), exist_ok=True) with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=1 << 20): f.write(chunk) done += len(chunk) if total: pct = done * 100 // total download_status["progress"] = f"⬇️ {label}: {pct}% ({done>>20} MB / {total>>20} MB)" def start_backend(): download_file(MODEL_URL, MODEL_PATH, "主模型 IQ4_XS") download_file(MMPROJ_URL, MMPROJ_PATH, "多模态投影层 mmproj") download_status["progress"] = "🚀 正在启动 llama-server..." cmd = [ "llama-server", "-m", MODEL_PATH, "--mmproj", MMPROJ_PATH, "--host", "127.0.0.1", "--port", "8080", "-c", "4096", # 上下文窗口 "--n-predict", "1024", "-t", str(os.cpu_count() or 4), # 使用全部 CPU 核心 "--cont-batching", # 连续批处理,提升吞吐 "--flash-attn", # Flash Attention(若支持) "-np", "1", ] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in proc.stdout: text = line.decode(errors="ignore").strip() if "server listening" in text.lower() or "all slots are idle" in text.lower(): download_status["progress"] = "✅ 模型已就绪,可以开始对话!" server_ready.set() break threading.Thread(target=start_backend, daemon=True).start() # ── 推理函数(流式) ─────────────────────────────────────────── def encode_image(path: str) -> str: with open(path, "rb") as f: return base64.b64encode(f.read()).decode() def build_messages(history, system_prompt): msgs = [] if system_prompt.strip(): msgs.append({"role": "system", "content": system_prompt.strip()}) for turn in history: role = turn["role"] content = turn["content"] msgs.append({"role": role, "content": content}) return msgs def respond(message, image, history, system_prompt, max_tokens, temperature, top_p): if not server_ready.is_set(): yield history, download_status["progress"] return # 构造用户消息(支持图片) if image: user_content = [ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image)}"}}, {"type": "text", "text": message or "请描述这张图片"} ] else: user_content = message history = history + [{"role": "user", "content": user_content}] payload = { "model": "gemma", "messages": build_messages(history, system_prompt), "max_tokens": int(max_tokens), "temperature": float(temperature), "top_p": float(top_p), "stream": True, } assistant_text = "" history = history + [{"role": "assistant", "content": ""}] try: with requests.post(f"{SERVER_URL}/v1/chat/completions", json=payload, stream=True, timeout=120) as resp: for raw in resp.iter_lines(): if not raw: continue line = raw.decode("utf-8", errors="ignore") if line.startswith("data: "): line = line[6:] if line == "[DONE]": break try: import json delta = json.loads(line)["choices"][0]["delta"].get("content", "") assistant_text += delta history[-1]["content"] = assistant_text yield history, "" except Exception: continue except Exception as e: history[-1]["content"] = f"❌ 推理出错: {e}" yield history, "" # ── Gradio UI ───────────────────────────────────────────────── CSS = """ @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600&family=Noto+Sans+SC:wght@300;400;500&display=swap'); :root { --bg: #0d0f14; --surface: #161923; --border: #252a36; --accent: #4fffff; --accent2: #7c6efa; --text: #dce3f0; --muted: #5a6480; --user-bg: #1a2540; --bot-bg: #111520; --radius: 12px; --font-mono: 'JetBrains Mono', monospace; --font-body: 'Noto Sans SC', sans-serif; } body, .gradio-container { background: var(--bg) !important; color: var(--text) !important; font-family: var(--font-body) !important; } /* Header */ #header { text-align: center; padding: 28px 0 16px; border-bottom: 1px solid var(--border); margin-bottom: 16px; } #header h1 { font-family: var(--font-mono); font-size: 1.6rem; font-weight: 600; background: linear-gradient(135deg, var(--accent), var(--accent2)); -webkit-background-clip: text; -webkit-text-fill-color: transparent; letter-spacing: 2px; margin: 0; } #header p { color: var(--muted); font-size: 0.82rem; margin-top: 6px; font-family: var(--font-mono); } /* Status bar */ #status-bar { font-family: var(--font-mono); font-size: 0.78rem; color: var(--accent); background: rgba(79,255,255,0.05); border: 1px solid rgba(79,255,255,0.15); border-radius: 8px; padding: 8px 14px; margin-bottom: 12px; } /* Chatbot */ #chatbot { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: var(--radius) !important; min-height: 460px; } #chatbot .message.user { background: var(--user-bg) !important; border-radius: 10px 10px 2px 10px !important; } #chatbot .message.bot { background: var(--bot-bg) !important; border-radius: 10px 10px 10px 2px !important; } #chatbot .message { color: var(--text) !important; font-size: 0.9rem !important; line-height: 1.7 !important; } /* Input row */ #input-row { margin-top: 10px; } #msg-box textarea { background: var(--surface) !important; border: 1px solid var(--border) !important; color: var(--text) !important; border-radius: 10px !important; font-family: var(--font-body) !important; font-size: 0.9rem !important; resize: none !important; } #msg-box textarea:focus { border-color: var(--accent) !important; box-shadow: 0 0 0 2px rgba(79,255,255,0.1) !important; } /* Buttons */ #send-btn, #clear-btn { font-family: var(--font-mono) !important; font-size: 0.82rem !important; border-radius: 8px !important; transition: all 0.2s !important; } #send-btn { background: linear-gradient(135deg, #2a7fff, var(--accent2)) !important; color: #fff !important; border: none !important; } #send-btn:hover { filter: brightness(1.15) !important; transform: translateY(-1px) !important; } #clear-btn { background: transparent !important; border: 1px solid var(--border) !important; color: var(--muted) !important; } #clear-btn:hover { border-color: var(--accent) !important; color: var(--accent) !important; } /* Settings panel */ #settings-panel { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: var(--radius) !important; padding: 16px !important; } #settings-panel label { color: var(--muted) !important; font-size: 0.78rem !important; font-family: var(--font-mono) !important; } #settings-panel input[type=range] { accent-color: var(--accent) !important; } /* Image upload */ #image-upload { border: 1px dashed var(--border) !important; border-radius: 10px !important; background: var(--bg) !important; } /* Accordion */ .gr-accordion { background: var(--surface) !important; border-color: var(--border) !important; } /* Scrollbar */ ::-webkit-scrollbar { width: 4px; } ::-webkit-scrollbar-track { background: var(--bg); } ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; } """ def get_status(): return download_status["progress"] with gr.Blocks(css=CSS, title="Gemma-4 Chat", theme=gr.themes.Base()) as demo: # Header gr.HTML("""
IQ4_XS · Multimodal · llama.cpp backend · HF Space