gemma4-e4b / app.py
wenyin's picture
Update app.py
c9b8758 verified
import gradio as gr
import requests
import threading
import subprocess
import time
import os
import base64
from pathlib import Path
# โ”€โ”€ ๆจกๅž‹่ทฏๅพ„้…็ฝฎ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
MODEL_DIR = "/home/user/app/models"
MODEL_URL = (
"https://huggingface.co/HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive"
"/resolve/main/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-IQ4_XS.gguf"
)
MMPROJ_URL = (
"https://huggingface.co/HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive"
"/resolve/main/mmproj-Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-f16.gguf"
)
MODEL_PATH = os.path.join(MODEL_DIR, "model.gguf")
MMPROJ_PATH = os.path.join(MODEL_DIR, "mmproj.gguf")
SERVER_URL = "http://127.0.0.1:8080"
server_ready = threading.Event()
download_status = {"progress": "โณ ๆญฃๅœจๅˆๅง‹ๅŒ–..."}
# โ”€โ”€ ไธ‹่ฝฝ + ๅฏๅŠจๆœๅŠกๅ™จ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def download_file(url: str, dest: str, label: str):
if os.path.exists(dest):
download_status["progress"] = f"โœ… {label} ๅทฒ็ผ“ๅญ˜๏ผŒ่ทณ่ฟ‡ไธ‹่ฝฝ"
return
download_status["progress"] = f"โฌ‡๏ธ ๆญฃๅœจไธ‹่ฝฝ {label}..."
r = requests.get(url, stream=True)
total = int(r.headers.get("content-length", 0))
done = 0
os.makedirs(os.path.dirname(dest), exist_ok=True)
with open(dest, "wb") as f:
for chunk in r.iter_content(chunk_size=1 << 20):
f.write(chunk)
done += len(chunk)
if total:
pct = done * 100 // total
download_status["progress"] = f"โฌ‡๏ธ {label}: {pct}% ({done>>20} MB / {total>>20} MB)"
def start_backend():
download_file(MODEL_URL, MODEL_PATH, "ไธปๆจกๅž‹ IQ4_XS")
download_file(MMPROJ_URL, MMPROJ_PATH, "ๅคšๆจกๆ€ๆŠ•ๅฝฑๅฑ‚ mmproj")
download_status["progress"] = "๐Ÿš€ ๆญฃๅœจๅฏๅŠจ llama-server..."
cmd = [
"llama-server",
"-m", MODEL_PATH,
"--mmproj", MMPROJ_PATH,
"--host", "127.0.0.1",
"--port", "8080",
"-c", "4096", # ไธŠไธ‹ๆ–‡็ช—ๅฃ
"--n-predict", "1024",
"-t", str(os.cpu_count() or 4), # ไฝฟ็”จๅ…จ้ƒจ CPU ๆ ธๅฟƒ
"--cont-batching", # ่ฟž็ปญๆ‰นๅค„็†๏ผŒๆๅ‡ๅžๅ
"--flash-attn", # Flash Attention๏ผˆ่‹ฅๆ”ฏๆŒ๏ผ‰
"-np", "1",
]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in proc.stdout:
text = line.decode(errors="ignore").strip()
if "server listening" in text.lower() or "all slots are idle" in text.lower():
download_status["progress"] = "โœ… ๆจกๅž‹ๅทฒๅฐฑ็ปช๏ผŒๅฏไปฅๅผ€ๅง‹ๅฏน่ฏ๏ผ"
server_ready.set()
break
threading.Thread(target=start_backend, daemon=True).start()
# โ”€โ”€ ๆŽจ็†ๅ‡ฝๆ•ฐ๏ผˆๆตๅผ๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def encode_image(path: str) -> str:
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode()
def build_messages(history, system_prompt):
msgs = []
if system_prompt.strip():
msgs.append({"role": "system", "content": system_prompt.strip()})
for turn in history:
role = turn["role"]
content = turn["content"]
msgs.append({"role": role, "content": content})
return msgs
def respond(message, image, history, system_prompt, max_tokens, temperature, top_p):
if not server_ready.is_set():
yield history, download_status["progress"]
return
# ๆž„้€ ็”จๆˆทๆถˆๆฏ๏ผˆๆ”ฏๆŒๅ›พ็‰‡๏ผ‰
if image:
user_content = [
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image)}"}},
{"type": "text", "text": message or "่ฏทๆ่ฟฐ่ฟ™ๅผ ๅ›พ็‰‡"}
]
else:
user_content = message
history = history + [{"role": "user", "content": user_content}]
payload = {
"model": "gemma",
"messages": build_messages(history, system_prompt),
"max_tokens": int(max_tokens),
"temperature": float(temperature),
"top_p": float(top_p),
"stream": True,
}
assistant_text = ""
history = history + [{"role": "assistant", "content": ""}]
try:
with requests.post(f"{SERVER_URL}/v1/chat/completions",
json=payload, stream=True, timeout=120) as resp:
for raw in resp.iter_lines():
if not raw:
continue
line = raw.decode("utf-8", errors="ignore")
if line.startswith("data: "):
line = line[6:]
if line == "[DONE]":
break
try:
import json
delta = json.loads(line)["choices"][0]["delta"].get("content", "")
assistant_text += delta
history[-1]["content"] = assistant_text
yield history, ""
except Exception:
continue
except Exception as e:
history[-1]["content"] = f"โŒ ๆŽจ็†ๅ‡บ้”™: {e}"
yield history, ""
# โ”€โ”€ Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CSS = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600&family=Noto+Sans+SC:wght@300;400;500&display=swap');
:root {
--bg: #0d0f14;
--surface: #161923;
--border: #252a36;
--accent: #4fffff;
--accent2: #7c6efa;
--text: #dce3f0;
--muted: #5a6480;
--user-bg: #1a2540;
--bot-bg: #111520;
--radius: 12px;
--font-mono: 'JetBrains Mono', monospace;
--font-body: 'Noto Sans SC', sans-serif;
}
body, .gradio-container {
background: var(--bg) !important;
color: var(--text) !important;
font-family: var(--font-body) !important;
}
/* Header */
#header {
text-align: center;
padding: 28px 0 16px;
border-bottom: 1px solid var(--border);
margin-bottom: 16px;
}
#header h1 {
font-family: var(--font-mono);
font-size: 1.6rem;
font-weight: 600;
background: linear-gradient(135deg, var(--accent), var(--accent2));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
letter-spacing: 2px;
margin: 0;
}
#header p {
color: var(--muted);
font-size: 0.82rem;
margin-top: 6px;
font-family: var(--font-mono);
}
/* Status bar */
#status-bar {
font-family: var(--font-mono);
font-size: 0.78rem;
color: var(--accent);
background: rgba(79,255,255,0.05);
border: 1px solid rgba(79,255,255,0.15);
border-radius: 8px;
padding: 8px 14px;
margin-bottom: 12px;
}
/* Chatbot */
#chatbot {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
min-height: 460px;
}
#chatbot .message.user { background: var(--user-bg) !important; border-radius: 10px 10px 2px 10px !important; }
#chatbot .message.bot { background: var(--bot-bg) !important; border-radius: 10px 10px 10px 2px !important; }
#chatbot .message { color: var(--text) !important; font-size: 0.9rem !important; line-height: 1.7 !important; }
/* Input row */
#input-row { margin-top: 10px; }
#msg-box textarea {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
color: var(--text) !important;
border-radius: 10px !important;
font-family: var(--font-body) !important;
font-size: 0.9rem !important;
resize: none !important;
}
#msg-box textarea:focus { border-color: var(--accent) !important; box-shadow: 0 0 0 2px rgba(79,255,255,0.1) !important; }
/* Buttons */
#send-btn, #clear-btn {
font-family: var(--font-mono) !important;
font-size: 0.82rem !important;
border-radius: 8px !important;
transition: all 0.2s !important;
}
#send-btn { background: linear-gradient(135deg, #2a7fff, var(--accent2)) !important; color: #fff !important; border: none !important; }
#send-btn:hover { filter: brightness(1.15) !important; transform: translateY(-1px) !important; }
#clear-btn { background: transparent !important; border: 1px solid var(--border) !important; color: var(--muted) !important; }
#clear-btn:hover { border-color: var(--accent) !important; color: var(--accent) !important; }
/* Settings panel */
#settings-panel {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
padding: 16px !important;
}
#settings-panel label { color: var(--muted) !important; font-size: 0.78rem !important; font-family: var(--font-mono) !important; }
#settings-panel input[type=range] { accent-color: var(--accent) !important; }
/* Image upload */
#image-upload { border: 1px dashed var(--border) !important; border-radius: 10px !important; background: var(--bg) !important; }
/* Accordion */
.gr-accordion { background: var(--surface) !important; border-color: var(--border) !important; }
/* Scrollbar */
::-webkit-scrollbar { width: 4px; }
::-webkit-scrollbar-track { background: var(--bg); }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
"""
def get_status():
return download_status["progress"]
with gr.Blocks(css=CSS, title="Gemma-4 Chat", theme=gr.themes.Base()) as demo:
# Header
gr.HTML("""
<div id="header">
<h1>โ—ˆ GEMMA-4 ยท UNCENSORED</h1>
<p>IQ4_XS ยท Multimodal ยท llama.cpp backend ยท HF Space</p>
</div>
""")
# Status
status_box = gr.Markdown(value=get_status, every=2, elem_id="status-bar")
with gr.Row():
# โ”€โ”€ ๅทฆๅˆ—๏ผš่ŠๅคฉไธปๅŒบๅŸŸ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Column(scale=3):
chatbot = gr.Chatbot(
elem_id="chatbot",
type="messages",
show_label=False,
height=480,
avatar_images=(None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"),
render_markdown=True,
)
with gr.Row(elem_id="input-row"):
with gr.Column(scale=5):
msg = gr.Textbox(
placeholder="่พ“ๅ…ฅๆถˆๆฏ๏ผŒShift+Enter ๆข่กŒ๏ผŒEnter ๅ‘้€...",
show_label=False,
lines=2,
elem_id="msg-box",
max_lines=6,
)
with gr.Column(scale=1, min_width=80):
send_btn = gr.Button("ๅ‘ ้€ โ–ถ", variant="primary", elem_id="send-btn")
clear_btn = gr.Button("ๆธ… ็ฉบ", elem_id="clear-btn")
image_input = gr.Image(
label="๐Ÿ“Ž ไธŠไผ ๅ›พ็‰‡๏ผˆๅฏ้€‰๏ผŒๆ”ฏๆŒๅคšๆจกๆ€๏ผ‰",
type="filepath",
elem_id="image-upload",
height=120,
)
# โ”€โ”€ ๅณๅˆ—๏ผš่ฎพ็ฝฎ้ขๆฟ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Column(scale=1, min_width=220, elem_id="settings-panel"):
gr.Markdown("### โš™ ๅ‚ๆ•ฐ่ฎพ็ฝฎ", elem_classes=["setting-title"])
system_prompt = gr.Textbox(
label="System Prompt",
value="You are a helpful assistant.",
lines=4,
max_lines=8,
)
max_tokens = gr.Slider(
label="Max Tokens",
minimum=64, maximum=2048, value=512, step=64,
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0, maximum=2.0, value=0.7, step=0.05,
)
top_p = gr.Slider(
label="Top-P",
minimum=0.1, maximum=1.0, value=0.9, step=0.05,
)
gr.Markdown("""
---
**ๅฟซๆท่ฏดๆ˜Ž**
- ๆ”ฏๆŒๅ›พๆ–‡ๆททๅˆ่พ“ๅ…ฅ
- ๆตๅผ้€ๅญ—่พ“ๅ‡บ
- ไธŠไธ‹ๆ–‡้•ฟๅบฆ 4096
- ๅ…จๆ ธ CPU ๆŽจ็†
""", elem_classes=["muted-text"])
# โ”€โ”€ ไบ‹ไปถ็ป‘ๅฎš โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def user_submit(message, image, history, system_prompt, max_tokens, temp, top_p):
if not message and not image:
return history, "", None
yield from respond(message, image, history, system_prompt, max_tokens, temp, top_p)
send_btn.click(
user_submit,
inputs=[msg, image_input, chatbot, system_prompt, max_tokens, temperature, top_p],
outputs=[chatbot, status_box],
).then(lambda: ("", None), outputs=[msg, image_input])
msg.submit(
user_submit,
inputs=[msg, image_input, chatbot, system_prompt, max_tokens, temperature, top_p],
outputs=[chatbot, status_box],
).then(lambda: ("", None), outputs=[msg, image_input])
clear_btn.click(lambda: ([], "", None), outputs=[chatbot, msg, image_input])
# โ”€โ”€ ๅฏ†็ ้ชŒ่ฏ๏ผˆไปŽ HF Secret ่ฏปๅ–๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
APP_USER = os.environ.get("APP_USER", "admin")
APP_PASSWORD = os.environ.get("APP_PASSWORD", "")
demo.queue(max_size=4).launch(
server_name="0.0.0.0",
server_port=7860,
auth=(APP_USER, APP_PASSWORD) if APP_PASSWORD else None,
)