Spaces:
Sleeping
Sleeping
File size: 3,598 Bytes
8c26b84 88c1530 8c26b84 88c1530 f3e7b7e 8c26b84 7950a29 8c26b84 88c1530 8c26b84 88c1530 f3e7b7e 88c1530 45419a8 8c26b84 88c1530 f3e7b7e 88c1530 f3e7b7e 8c26b84 88c1530 8c26b84 88c1530 8c26b84 88c1530 f3e7b7e 8c26b84 88c1530 8c26b84 c9ded2b 8c26b84 50d4ca4 8c26b84 88c1530 45419a8 8c26b84 88c1530 7950a29 8c26b84 88c1530 8c26b84 45419a8 8c26b84 45419a8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | """Dialectic Reasoning Chatbot — Gradio Space with ZeroGPU."""
import gc
import spaces
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
MODELS = {
"Qwen3-8B (recommended)": {
"base": "Qwen/Qwen3-8B",
"adapter": "hikewa/dialectic-qwen3-8b-lora",
},
"Qwen2.5-1.5B": {
"base": "Qwen/Qwen2.5-1.5B-Instruct",
"adapter": "hikewa/dialectic-qwen2.5-1.5b-lora",
},
}
DEFAULT_MODEL = "Qwen3-8B (recommended)"
SYSTEM_PROMPT = (
"You reason carefully through problems by considering competing "
"perspectives before reaching a conclusion. You identify genuine "
"tensions, engage with the strongest form of each argument, and "
"integrate insights rather than picking sides or hedging."
)
loaded = {"name": None, "model": None, "tokenizer": None}
def load_model(model_name):
global loaded
if loaded["name"] == model_name:
return loaded["model"], loaded["tokenizer"]
# Free previous model
if loaded["model"] is not None:
del loaded["model"]
loaded["model"] = None
gc.collect()
torch.cuda.empty_cache()
cfg = MODELS[model_name]
tokenizer = AutoTokenizer.from_pretrained(
cfg["adapter"], trust_remote_code=True
)
base = AutoModelForCausalLM.from_pretrained(
cfg["base"], torch_dtype=torch.float16, trust_remote_code=True
)
model = PeftModel.from_pretrained(base, cfg["adapter"])
model = model.to("cuda")
model.eval()
loaded["name"] = model_name
loaded["model"] = model
loaded["tokenizer"] = tokenizer
return model, tokenizer
@spaces.GPU
def respond(message, history, model_name):
model, tokenizer = load_model(model_name)
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for msg in history:
if isinstance(msg, dict):
messages.append(msg)
elif isinstance(msg, (list, tuple)) and len(msg) == 2:
messages.append({"role": "user", "content": msg[0]})
messages.append({"role": "assistant", "content": msg[1]})
messages.append({"role": "user", "content": message})
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt")
inputs = {k: v.to("cuda") for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
repetition_penalty=1.1,
pad_token_id=tokenizer.pad_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[1]:]
response = tokenizer.decode(generated, skip_special_tokens=True).strip()
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(
choices=list(MODELS.keys()),
value=DEFAULT_MODEL,
label="Model",
),
],
title="Dialectic Reasoning",
description=(
"Fine-tuned on 510 dialectic reasoning traces. "
"Ask a question involving competing perspectives."
),
examples=[
["Should AI systems be transparent about their reasoning, even when transparency reduces performance?"],
["Is it better to optimize for individual freedom or collective wellbeing?"],
["When does pragmatic compromise become unprincipled capitulation?"],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch(ssr_mode=False)
|