Spaces:

reaperdoesntknow
/

DualMind-Demo

Running on Zero

File size: 7,130 Bytes

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re

# --- Model Loading ---
MODEL_ID = "reaperdoesntknow/DualMinded-Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)

def parse_dualmind_output(text):
    """Separate the thinking trace from the final response."""
    think_match = re.search(r'<think>(.*?)</think>', text, re.DOTALL)
    thinking = think_match.group(1).strip() if think_match else ""
    
    if '</think>' in text:
        response = text.split('</think>')[-1].strip()
    else:
        response = text.strip()
    
    return thinking, response

@spaces.GPU
def generate(
    message: str,
    history: list,
    system_prompt: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    repetition_penalty: float,
):
    if not system_prompt:
        system_prompt = "You are a helpful assistant. Think carefully before responding."
    
    messages = [{"role": "system", "content": system_prompt}]
    
    # Gradio 6 messages format: list of {"role": ..., "content": ...}
    for msg in history:
        messages.append({"role": msg["role"], "content": msg["content"]})
    
    messages.append({"role": "user", "content": message})
    
    input_text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    generated = outputs[0][inputs["input_ids"].shape[-1]:]
    raw_output = tokenizer.decode(generated, skip_special_tokens=True)
    
    thinking, response = parse_dualmind_output(raw_output)
    
    if thinking:
        formatted = f"🧠 **Explore → Examine**\n\n{thinking}\n\n---\n\n💬 **Response**\n\n{response}"
    else:
        formatted = response
    
    return formatted

# --- Custom CSS ---
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Plus+Jakarta+Sans:wght@400;600;800&display=swap');

.gradio-container {
    font-family: 'Plus Jakarta Sans', sans-serif !important;
    background: #0a0a0f !important;
    max-width: 900px !important;
    margin: auto !important;
}

.main-header {
    text-align: center;
    padding: 2rem 1rem;
    background: linear-gradient(135deg, #0a0a0f 0%, #1a1a2e 50%, #0a0a0f 100%);
    border-bottom: 1px solid #2a2a3e;
    margin-bottom: 1rem;
}

.main-header h1 {
    font-family: 'Plus Jakarta Sans', sans-serif;
    font-weight: 800;
    font-size: 2.2rem;
    background: linear-gradient(135deg, #00d4aa, #00a8e8, #7b68ee);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    margin: 0;
}

.main-header p {
    color: #8888aa;
    font-size: 0.95rem;
    margin-top: 0.5rem;
    font-family: 'JetBrains Mono', monospace;
}

.info-banner {
    background: linear-gradient(135deg, rgba(0,212,170,0.08), rgba(0,168,232,0.08));
    border: 1px solid rgba(0,212,170,0.2);
    border-radius: 12px;
    padding: 1rem 1.5rem;
    margin: 0.5rem 0 1rem 0;
    color: #ccccdd;
    font-size: 0.85rem;
    line-height: 1.6;
}

.info-banner a {
    color: #00d4aa !important;
    text-decoration: none;
}

footer { display: none !important; }
"""

# --- UI ---
with gr.Blocks() as demo:
    
    gr.HTML("""
    <div class="main-header">
        <h1>DualMind</h1>
        <p>Explore → Examine → Response</p>
    </div>
    """)
    
    gr.HTML("""
    <div class="info-banner">
        <strong>One model, two voices.</strong> DualMind uses a three-phase cognitive loop: 
        the model <em>explores</em> the problem space, <em>examines</em> its own reasoning, 
        then produces a <em>response</em>. Watch the thinking trace unfold in real time.<br><br>
        Built by <a href="https://huggingface.co/reaperdoesntknow">Convergent Intelligence LLC: Research Division</a> · 
        <a href="https://huggingface.co/reaperdoesntknow/DualMind_Methodolgy">Paper (DOI: 10.57967/hf/8184)</a> · 
        <a href="https://huggingface.co/reaperdoesntknow/Discrepancy_Calculus">DISC Foundations (DOI: 10.57967/hf/8194)</a>
    </div>
    """)
    
    chatbot = gr.Chatbot(
        height=500,
        show_label=False,
        container=True,
    )
    
    with gr.Row():
        msg = gr.Textbox(
            placeholder="Ask DualMind something...",
            show_label=False,
            container=False,
            scale=8,
        )
        send_btn = gr.Button("Send", variant="primary", scale=1)
    
    with gr.Accordion("Settings", open=False):
        system_prompt = gr.Textbox(
            value="You are a helpful assistant. Think carefully before responding.",
            label="System Prompt",
            lines=2,
        )
        with gr.Row():
            max_tokens = gr.Slider(64, 2048, value=1024, step=64, label="Max Tokens")
            temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
        with gr.Row():
            top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
            rep_penalty = gr.Slider(1.0, 2.0, value=1.3, step=0.05, label="Repetition Penalty")
    
    gr.HTML("""
    <div style="text-align:center; padding:1rem; color:#555; font-size:0.8rem; font-family:'JetBrains Mono',monospace;">
        DualMinded-Qwen3-1.7B · Claude Opus 4.6 reasoning traces · 
        <a href="https://huggingface.co/collections/reaperdoesntknow/dualmind-69c93f888c6e79ecc69cf41e" style="color:#00d4aa;">DualMind Collection</a> · 
        <a href="https://huggingface.co/collections/reaperdoesntknow/distilqwen-69bf40ec669117e3f069ef1c" style="color:#00a8e8;">DistilQwen Collection</a>
    </div>
    """)
    
    def user_message(message, history):
        history = history + [{"role": "user", "content": message}]
        return "", history
    
    def bot_response(history, system_prompt, max_tokens, temperature, top_p, rep_penalty):
        user_msg = history[-1]["content"]
        past = history[:-1]
        response = generate(user_msg, past, system_prompt, max_tokens, temperature, top_p, rep_penalty)
        history = history + [{"role": "assistant", "content": response}]
        return history
    
    msg.submit(
        user_message, [msg, chatbot], [msg, chatbot]
    ).then(
        bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p, rep_penalty], chatbot
    )
    
    send_btn.click(
        user_message, [msg, chatbot], [msg, chatbot]
    ).then(
        bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p, rep_penalty], chatbot
    )

demo.launch(css=css, theme=gr.themes.Base(primary_hue="teal", neutral_hue="slate"))