import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# ----------------------------------------------------------------------
# Model (unchanged from your working code)
# ----------------------------------------------------------------------
MODEL_ID = "SupraLabs/Supra-50M-Reasoning"
THINK_START = "<|begin_of_thought|>"
THINK_END = "<|end_of_thought|>"
SOL_START = "<|begin_of_solution|>"
SOL_END = "<|end_of_solution|>"
DEFAULT_SYSTEM_PROMPT = (
"Your role as an assistant involves thoroughly exploring questions through "
"a systematic long thinking process before providing the final precise and "
"accurate solutions."
)
# ----------------------------------------------------------------------
# Load model once
# ----------------------------------------------------------------------
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
dtype=torch.float32,
device_map="cpu",
)
model.eval()
print("Model ready.")
# ----------------------------------------------------------------------
# Prompt construction (as provided)
# ----------------------------------------------------------------------
def build_prompt(question: str, system_prompt: str) -> str:
return (
f"[SYSTEM]: {system_prompt}\n\n"
f"[USER]: {question}\n\n"
f"[ASSISTANT]: {THINK_START}\n"
)
def parse_output(raw: str):
thought, answer = "", raw
if THINK_START in raw and THINK_END in raw:
t0 = raw.index(THINK_START) + len(THINK_START)
t1 = raw.index(THINK_END)
thought = raw[t0:t1].strip()
if SOL_START in raw and SOL_END in raw:
s0 = raw.index(SOL_START) + len(SOL_START)
s1 = raw.index(SOL_END)
answer = raw[s0:s1].strip()
elif SOL_START in raw:
s0 = raw.index(SOL_START) + len(SOL_START)
answer = raw[s0:].strip()
elif THINK_END in raw:
answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()
return thought, answer
def generate(prompt, system_prompt, max_new_tokens, temperature, top_p, top_k, show_thinking):
if not prompt.strip():
return "", "Please enter a question."
full_prompt = build_prompt(prompt, system_prompt)
inputs = tokenizer(full_prompt, return_tensors="pt")
input_ids = inputs["input_ids"]
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
do_sample=temperature > 0,
temperature=temperature if temperature > 0 else 1.0,
top_p=top_p,
top_k=top_k,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
generated = output_ids[0][input_ids.shape[-1]:]
raw = tokenizer.decode(generated, skip_special_tokens=False)
raw = raw.replace("", "").replace("", "").strip()
raw = THINK_START + "\n" + raw
thought, answer = parse_output(raw)
return (thought if show_thinking else ""), answer
# ----------------------------------------------------------------------
# Chat callback for Gradio
# ----------------------------------------------------------------------
def chat_generate(message, history, system_prompt, max_tokens, temperature, top_p, top_k, show_think):
if not message.strip():
return "", [], "", ""
thought, answer = generate(message, system_prompt, max_tokens, temperature, top_p, top_k, show_think)
new_history = [
{"role": "user", "content": message},
{"role": "assistant", "content": answer},
]
return "", new_history, thought, answer
def clear_fn():
return "", [], "", ""
# ----------------------------------------------------------------------
# Custom CSS โ Classic, elegant, dark theme with serif headings
# ----------------------------------------------------------------------
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Inter:wght@300;400;500;600&family=JetBrains+Mono&display=swap');
* { box-sizing: border-box; }
body, .gradio-container {
background: #1a1a1a !important;
color: #d4c5b2 !important;
font-family: 'Inter', sans-serif !important;
}
.gradio-container {
max-width: 1300px !important;
margin: 0 auto !important;
padding: 2rem 1.5rem !important;
}
/* Header with language toggle */
#header-section {
background: linear-gradient(145deg, #2a2118 0%, #1e1b15 100%);
border: 1px solid #5c4a32;
border-radius: 18px;
padding: 2rem;
margin-bottom: 2rem;
position: relative;
box-shadow: 0 8px 30px rgba(0,0,0,0.5);
}
#header-section h1 {
font-family: 'Playfair Display', serif;
font-size: 2.5rem;
color: #d4af37;
margin-top: 0;
font-weight: 700;
letter-spacing: 1px;
}
#header-section p {
font-size: 1.1rem;
color: #c0b09a;
line-height: 1.7;
}
.lang-toggle {
position: absolute;
top: 20px;
right: 20px;
background: #3e3525;
border: 1px solid #5c4a32;
color: #d4af37;
padding: 6px 16px;
border-radius: 30px;
font-family: 'Inter', sans-serif;
font-weight: 600;
font-size: 0.9rem;
cursor: pointer;
transition: all 0.3s;
}
.lang-toggle:hover {
background: #5c4a32;
color: #f5e6c8;
}
/* Model cards */
.model-card {
background: #2a241c;
border: 1px solid #4a3e2c;
border-radius: 14px;
padding: 1.2rem;
margin-bottom: 1rem;
transition: transform 0.2s, box-shadow 0.2s;
}
.model-card:hover {
transform: translateY(-3px);
box-shadow: 0 10px 25px rgba(0,0,0,0.7);
}
.model-card a {
color: #d4af37;
text-decoration: none;
font-weight: 600;
font-size: 1.15rem;
}
.model-card p {
color: #b9a88c;
margin: 0.5rem 0 0;
font-size: 0.9rem;
}
/* Focus list */
.focus-list {
list-style: none;
padding-left: 0;
}
.focus-list li {
padding: 0.3rem 0;
font-size: 1rem;
color: #c0b09a;
}
/* Resources table */
.resources-table {
width: 100%;
border-collapse: collapse;
margin-top: 1rem;
}
.resources-table td {
padding: 10px 0;
border-bottom: 1px solid #3e3525;
}
.resources-table a {
color: #d4af37;
text-decoration: none;
font-weight: 500;
}
.resources-table a:hover {
text-decoration: underline;
}
/* Footer */
.footer-text {
text-align: center;
color: #6b5e4a;
font-size: 0.85rem;
margin-top: 2rem;
padding-top: 1.5rem;
border-top: 1px solid #3e3525;
}
.footer-text a {
color: #d4af37;
text-decoration: none;
}
/* Gradio components restyling */
.chatbot-wrap .wrap {
background: #1e1b15 !important;
border: 1px solid #4a3e2c !important;
border-radius: 14px !important;
}
.message.user {
background: linear-gradient(135deg, #5c4a32, #7a5c3e) !important;
color: white !important;
border-radius: 18px 18px 4px 18px !important;
padding: 12px 16px !important;
}
.message.bot {
background: #2a241c !important;
color: #e8dcc8 !important;
border: 1px solid #5c4a32 !important;
border-radius: 18px 18px 18px 4px !important;
}
.input-wrap textarea {
background: #2a241c !important;
border: 1px solid #4a3e2c !important;
color: #e8dcc8 !important;
font-family: 'Inter', sans-serif !important;
}
.input-wrap textarea:focus {
border-color: #d4af37 !important;
box-shadow: 0 0 0 3px rgba(212,175,55,0.15) !important;
}
button.primary {
background: linear-gradient(135deg, #7a5c3e, #a67c46) !important;
border: none !important;
border-radius: 10px !important;
color: white !important;
font-weight: 600 !important;
transition: all 0.2s !important;
}
button.primary:hover {
transform: translateY(-1px) !important;
box-shadow: 0 4px 20px rgba(166,124,70,0.4) !important;
}
.thinking-box textarea {
font-family: 'JetBrains Mono', monospace !important;
background: #1a1510 !important;
border: 1px solid #3e3525 !important;
color: #b9a88c !important;
}
.answer-box textarea {
font-family: 'Inter', sans-serif !important;
background: #1a1e15 !important;
border: 1px solid #3e4a2c !important;
color: #c5d4af !important;
}
.system-box textarea {
background: #1a1510 !important;
border: 1px solid #5c4a32 !important;
color: #d4af37 !important;
}
input[type=range] {
accent-color: #d4af37 !important;
}
.accordion {
background: #1e1b15 !important;
border: 1px solid #4a3e2c !important;
}
footer { display: none !important; }
"""
# ----------------------------------------------------------------------
# Bilingual content for the header & info section
# ----------------------------------------------------------------------
CONTENT = {
"en": {
"title": "Welcome to ThingsAI! ๐ค",
"intro": "Building efficient, bilingual AI models that run anywhere. ๐ฎ๐น ๐ฌ๐ง",
"models_title": "๐ค Our Models",
"model_q135": "A lightweight bilingual (Italian + English) language model with 135M parameters. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.",
"model_q270": "Our most powerful small model โ 270M parameters with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.",
"model_qmod": "A multi-label moderation model covering 9 categories: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.",
"focus_title": "๐ฏ What We Focus On",
"focus_items": [
"โก Small, efficient architectures โ GQA, weight tying, deepโthin design",
"๐ Bilingual training โ Italian + English from scratch",
"๐ Openโsource everything โ weights, code, datasets",
"๐ป Realโworld deployment โ runs on consumer hardware"
],
"resources_title": "๐ Resources",
"resources": [
("๐ Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
("๐ก๏ธ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
("๐ HuggingFace Community", "https://huggingface.co/ThingsAI"),
("๐ป GitHub", "https://github.com/overcastlab")
],
"dataset_link": "๐ Dataset: ThingAI/OmniBook",
"footer": "Made with โค๏ธ by ThingsAI ยท Website ยท GitHub"
},
"it": {
"title": "Benvenuti in ThingsAI! ๐ค",
"intro": "Costruiamo modelli AI bilingui efficienti che funzionano ovunque. ๐ฎ๐น ๐ฌ๐ง",
"models_title": "๐ค I Nostri Modelli",
"model_q135": "Un modello linguistico bilingue leggero (italiano + inglese) con 135M parametri. Caratteristiche: GQA, SwiGLU, RMSNorm, RoPE. Addestrato su 50B+ token.",
"model_q270": "Il nostro piccolo modello piรน potente โ 270M parametri con 32 strati, dimensione nascosta 768, vocabolario 65K. In addestramento attivo su 10B+ token, pianificato 135B token.",
"model_qmod": "Un modello di moderazione multiโetichetta che copre 9 categorie: tossico, gravemente_tossico, osceno, minaccia, insulto, odio_identitario, cyberbullismo, incitamento_all'odio, offensivo.",
"focus_title": "๐ฏ Su Cosa Ci Concentriamo",
"focus_items": [
"โก Architetture piccole ed efficienti โ GQA, weight tying, design deepโthin",
"๐ Addestramento bilingue โ italiano + inglese da zero",
"๐ Tutto openโsource โ pesi, codice, dataset",
"๐ป Implementazione reale โ funziona su hardware consumer"
],
"resources_title": "๐ Risorse",
"resources": [
("๐ Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
("๐ก๏ธ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
("๐ Comunitร HuggingFace", "https://huggingface.co/ThingsAI"),
("๐ป GitHub", "https://github.com/overcastlab")
],
"dataset_link": "๐ Dataset: ThingAI/OmniBook",
"footer": "Fatto con โค๏ธ da ThingsAI ยท Sito Web ยท GitHub"
}
}
# ----------------------------------------------------------------------
# Build the complete Gradio interface
# ----------------------------------------------------------------------
with gr.Blocks(
title="ThingsAI โ Chat & Models",
css=CUSTOM_CSS,
theme=gr.themes.Soft() # base theme overridden by our CSS
) as demo:
# --- Header + Language Toggle ---
gr.HTML("""
Building efficient, bilingual AI models that run anywhere. ๐ฎ๐น ๐ฌ๐ง
A lightweight bilingual (Italian + English) language model with 135M parameters. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.
Our most powerful small model โ 270M parameters with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.
A multi-label moderation model covering 9 categories: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.
Base model.
Lightweight 50M model.
๐ Dataset: ThingAI/OmniBook
""") # --- Focus & Resources --- gr.HTML("""| ๐ Quark-135M-Bilingual |
| ๐ก๏ธ Quark-Mod |
| ๐ HuggingFace Community |
| ๐ป GitHub |