Spaces:
Running on Zero
Running on Zero
File size: 7,130 Bytes
f78bbeb 61d526a f78bbeb 61d526a f78bbeb 61d526a f78bbeb 61d526a f78bbeb 61d526a f78bbeb 61d526a f78bbeb 61d526a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
# --- Model Loading ---
MODEL_ID = "reaperdoesntknow/DualMinded-Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
)
def parse_dualmind_output(text):
"""Separate the thinking trace from the final response."""
think_match = re.search(r'<think>(.*?)</think>', text, re.DOTALL)
thinking = think_match.group(1).strip() if think_match else ""
if '</think>' in text:
response = text.split('</think>')[-1].strip()
else:
response = text.strip()
return thinking, response
@spaces.GPU
def generate(
message: str,
history: list,
system_prompt: str,
max_tokens: int,
temperature: float,
top_p: float,
repetition_penalty: float,
):
if not system_prompt:
system_prompt = "You are a helpful assistant. Think carefully before responding."
messages = [{"role": "system", "content": system_prompt}]
# Gradio 6 messages format: list of {"role": ..., "content": ...}
for msg in history:
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": message})
input_text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[-1]:]
raw_output = tokenizer.decode(generated, skip_special_tokens=True)
thinking, response = parse_dualmind_output(raw_output)
if thinking:
formatted = f"🧠 **Explore → Examine**\n\n{thinking}\n\n---\n\n💬 **Response**\n\n{response}"
else:
formatted = response
return formatted
# --- Custom CSS ---
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Plus+Jakarta+Sans:wght@400;600;800&display=swap');
.gradio-container {
font-family: 'Plus Jakarta Sans', sans-serif !important;
background: #0a0a0f !important;
max-width: 900px !important;
margin: auto !important;
}
.main-header {
text-align: center;
padding: 2rem 1rem;
background: linear-gradient(135deg, #0a0a0f 0%, #1a1a2e 50%, #0a0a0f 100%);
border-bottom: 1px solid #2a2a3e;
margin-bottom: 1rem;
}
.main-header h1 {
font-family: 'Plus Jakarta Sans', sans-serif;
font-weight: 800;
font-size: 2.2rem;
background: linear-gradient(135deg, #00d4aa, #00a8e8, #7b68ee);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin: 0;
}
.main-header p {
color: #8888aa;
font-size: 0.95rem;
margin-top: 0.5rem;
font-family: 'JetBrains Mono', monospace;
}
.info-banner {
background: linear-gradient(135deg, rgba(0,212,170,0.08), rgba(0,168,232,0.08));
border: 1px solid rgba(0,212,170,0.2);
border-radius: 12px;
padding: 1rem 1.5rem;
margin: 0.5rem 0 1rem 0;
color: #ccccdd;
font-size: 0.85rem;
line-height: 1.6;
}
.info-banner a {
color: #00d4aa !important;
text-decoration: none;
}
footer { display: none !important; }
"""
# --- UI ---
with gr.Blocks() as demo:
gr.HTML("""
<div class="main-header">
<h1>DualMind</h1>
<p>Explore → Examine → Response</p>
</div>
""")
gr.HTML("""
<div class="info-banner">
<strong>One model, two voices.</strong> DualMind uses a three-phase cognitive loop:
the model <em>explores</em> the problem space, <em>examines</em> its own reasoning,
then produces a <em>response</em>. Watch the thinking trace unfold in real time.<br><br>
Built by <a href="https://huggingface.co/reaperdoesntknow">Convergent Intelligence LLC: Research Division</a> ·
<a href="https://huggingface.co/reaperdoesntknow/DualMind_Methodolgy">Paper (DOI: 10.57967/hf/8184)</a> ·
<a href="https://huggingface.co/reaperdoesntknow/Discrepancy_Calculus">DISC Foundations (DOI: 10.57967/hf/8194)</a>
</div>
""")
chatbot = gr.Chatbot(
height=500,
show_label=False,
container=True,
)
with gr.Row():
msg = gr.Textbox(
placeholder="Ask DualMind something...",
show_label=False,
container=False,
scale=8,
)
send_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Accordion("Settings", open=False):
system_prompt = gr.Textbox(
value="You are a helpful assistant. Think carefully before responding.",
label="System Prompt",
lines=2,
)
with gr.Row():
max_tokens = gr.Slider(64, 2048, value=1024, step=64, label="Max Tokens")
temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
with gr.Row():
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
rep_penalty = gr.Slider(1.0, 2.0, value=1.3, step=0.05, label="Repetition Penalty")
gr.HTML("""
<div style="text-align:center; padding:1rem; color:#555; font-size:0.8rem; font-family:'JetBrains Mono',monospace;">
DualMinded-Qwen3-1.7B · Claude Opus 4.6 reasoning traces ·
<a href="https://huggingface.co/collections/reaperdoesntknow/dualmind-69c93f888c6e79ecc69cf41e" style="color:#00d4aa;">DualMind Collection</a> ·
<a href="https://huggingface.co/collections/reaperdoesntknow/distilqwen-69bf40ec669117e3f069ef1c" style="color:#00a8e8;">DistilQwen Collection</a>
</div>
""")
def user_message(message, history):
history = history + [{"role": "user", "content": message}]
return "", history
def bot_response(history, system_prompt, max_tokens, temperature, top_p, rep_penalty):
user_msg = history[-1]["content"]
past = history[:-1]
response = generate(user_msg, past, system_prompt, max_tokens, temperature, top_p, rep_penalty)
history = history + [{"role": "assistant", "content": response}]
return history
msg.submit(
user_message, [msg, chatbot], [msg, chatbot]
).then(
bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p, rep_penalty], chatbot
)
send_btn.click(
user_message, [msg, chatbot], [msg, chatbot]
).then(
bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p, rep_penalty], chatbot
)
demo.launch(css=css, theme=gr.themes.Base(primary_hue="teal", neutral_hue="slate"))
|