Spaces:

hikewa
/

dialectic-reasoning

Sleeping

File size: 3,598 Bytes

"""Dialectic Reasoning Chatbot — Gradio Space with ZeroGPU."""

import gc

import spaces
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

MODELS = {
    "Qwen3-8B (recommended)": {
        "base": "Qwen/Qwen3-8B",
        "adapter": "hikewa/dialectic-qwen3-8b-lora",
    },
    "Qwen2.5-1.5B": {
        "base": "Qwen/Qwen2.5-1.5B-Instruct",
        "adapter": "hikewa/dialectic-qwen2.5-1.5b-lora",
    },
}

DEFAULT_MODEL = "Qwen3-8B (recommended)"

SYSTEM_PROMPT = (
    "You reason carefully through problems by considering competing "
    "perspectives before reaching a conclusion. You identify genuine "
    "tensions, engage with the strongest form of each argument, and "
    "integrate insights rather than picking sides or hedging."
)

loaded = {"name": None, "model": None, "tokenizer": None}


def load_model(model_name):
    global loaded
    if loaded["name"] == model_name:
        return loaded["model"], loaded["tokenizer"]

    # Free previous model
    if loaded["model"] is not None:
        del loaded["model"]
        loaded["model"] = None
        gc.collect()
        torch.cuda.empty_cache()

    cfg = MODELS[model_name]
    tokenizer = AutoTokenizer.from_pretrained(
        cfg["adapter"], trust_remote_code=True
    )
    base = AutoModelForCausalLM.from_pretrained(
        cfg["base"], torch_dtype=torch.float16, trust_remote_code=True
    )
    model = PeftModel.from_pretrained(base, cfg["adapter"])
    model = model.to("cuda")
    model.eval()

    loaded["name"] = model_name
    loaded["model"] = model
    loaded["tokenizer"] = tokenizer
    return model, tokenizer


@spaces.GPU
def respond(message, history, model_name):
    model, tokenizer = load_model(model_name)

    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for msg in history:
        if isinstance(msg, dict):
            messages.append(msg)
        elif isinstance(msg, (list, tuple)) and len(msg) == 2:
            messages.append({"role": "user", "content": msg[0]})
            messages.append({"role": "assistant", "content": msg[1]})
    messages.append({"role": "user", "content": message})

    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer(text, return_tensors="pt")
    inputs = {k: v.to("cuda") for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated = outputs[0][inputs["input_ids"].shape[1]:]
    response = tokenizer.decode(generated, skip_special_tokens=True).strip()
    return response


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Dropdown(
            choices=list(MODELS.keys()),
            value=DEFAULT_MODEL,
            label="Model",
        ),
    ],
    title="Dialectic Reasoning",
    description=(
        "Fine-tuned on 510 dialectic reasoning traces. "
        "Ask a question involving competing perspectives."
    ),
    examples=[
        ["Should AI systems be transparent about their reasoning, even when transparency reduces performance?"],
        ["Is it better to optimize for individual freedom or collective wellbeing?"],
        ["When does pragmatic compromise become unprincipled capitulation?"],
    ],
    cache_examples=False,
)

if __name__ == "__main__":
    demo.launch(ssr_mode=False)