test / app.py
usermma's picture
Create app.py
f6dd924 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# ----------------------------------------------------------------------
# Model (unchanged from your working code)
# ----------------------------------------------------------------------
MODEL_ID = "SupraLabs/Supra-50M-Reasoning"
THINK_START = "<|begin_of_thought|>"
THINK_END = "<|end_of_thought|>"
SOL_START = "<|begin_of_solution|>"
SOL_END = "<|end_of_solution|>"
DEFAULT_SYSTEM_PROMPT = (
"Your role as an assistant involves thoroughly exploring questions through "
"a systematic long thinking process before providing the final precise and "
"accurate solutions."
)
# ----------------------------------------------------------------------
# Load model once
# ----------------------------------------------------------------------
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
dtype=torch.float32,
device_map="cpu",
)
model.eval()
print("Model ready.")
# ----------------------------------------------------------------------
# Prompt construction (as provided)
# ----------------------------------------------------------------------
def build_prompt(question: str, system_prompt: str) -> str:
return (
f"[SYSTEM]: {system_prompt}\n\n"
f"[USER]: {question}\n\n"
f"[ASSISTANT]: {THINK_START}\n"
)
def parse_output(raw: str):
thought, answer = "", raw
if THINK_START in raw and THINK_END in raw:
t0 = raw.index(THINK_START) + len(THINK_START)
t1 = raw.index(THINK_END)
thought = raw[t0:t1].strip()
if SOL_START in raw and SOL_END in raw:
s0 = raw.index(SOL_START) + len(SOL_START)
s1 = raw.index(SOL_END)
answer = raw[s0:s1].strip()
elif SOL_START in raw:
s0 = raw.index(SOL_START) + len(SOL_START)
answer = raw[s0:].strip()
elif THINK_END in raw:
answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()
return thought, answer
def generate(prompt, system_prompt, max_new_tokens, temperature, top_p, top_k, show_thinking):
if not prompt.strip():
return "", "Please enter a question."
full_prompt = build_prompt(prompt, system_prompt)
inputs = tokenizer(full_prompt, return_tensors="pt")
input_ids = inputs["input_ids"]
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
do_sample=temperature > 0,
temperature=temperature if temperature > 0 else 1.0,
top_p=top_p,
top_k=top_k,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
generated = output_ids[0][input_ids.shape[-1]:]
raw = tokenizer.decode(generated, skip_special_tokens=False)
raw = raw.replace("<s>", "").replace("</s>", "").strip()
raw = THINK_START + "\n" + raw
thought, answer = parse_output(raw)
return (thought if show_thinking else ""), answer
# ----------------------------------------------------------------------
# Chat callback for Gradio
# ----------------------------------------------------------------------
def chat_generate(message, history, system_prompt, max_tokens, temperature, top_p, top_k, show_think):
if not message.strip():
return "", [], "", ""
thought, answer = generate(message, system_prompt, max_tokens, temperature, top_p, top_k, show_think)
new_history = [
{"role": "user", "content": message},
{"role": "assistant", "content": answer},
]
return "", new_history, thought, answer
def clear_fn():
return "", [], "", ""
# ----------------------------------------------------------------------
# Custom CSS โ€“ Classic, elegant, dark theme with serif headings
# ----------------------------------------------------------------------
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Inter:wght@300;400;500;600&family=JetBrains+Mono&display=swap');
* { box-sizing: border-box; }
body, .gradio-container {
background: #1a1a1a !important;
color: #d4c5b2 !important;
font-family: 'Inter', sans-serif !important;
}
.gradio-container {
max-width: 1300px !important;
margin: 0 auto !important;
padding: 2rem 1.5rem !important;
}
/* Header with language toggle */
#header-section {
background: linear-gradient(145deg, #2a2118 0%, #1e1b15 100%);
border: 1px solid #5c4a32;
border-radius: 18px;
padding: 2rem;
margin-bottom: 2rem;
position: relative;
box-shadow: 0 8px 30px rgba(0,0,0,0.5);
}
#header-section h1 {
font-family: 'Playfair Display', serif;
font-size: 2.5rem;
color: #d4af37;
margin-top: 0;
font-weight: 700;
letter-spacing: 1px;
}
#header-section p {
font-size: 1.1rem;
color: #c0b09a;
line-height: 1.7;
}
.lang-toggle {
position: absolute;
top: 20px;
right: 20px;
background: #3e3525;
border: 1px solid #5c4a32;
color: #d4af37;
padding: 6px 16px;
border-radius: 30px;
font-family: 'Inter', sans-serif;
font-weight: 600;
font-size: 0.9rem;
cursor: pointer;
transition: all 0.3s;
}
.lang-toggle:hover {
background: #5c4a32;
color: #f5e6c8;
}
/* Model cards */
.model-card {
background: #2a241c;
border: 1px solid #4a3e2c;
border-radius: 14px;
padding: 1.2rem;
margin-bottom: 1rem;
transition: transform 0.2s, box-shadow 0.2s;
}
.model-card:hover {
transform: translateY(-3px);
box-shadow: 0 10px 25px rgba(0,0,0,0.7);
}
.model-card a {
color: #d4af37;
text-decoration: none;
font-weight: 600;
font-size: 1.15rem;
}
.model-card p {
color: #b9a88c;
margin: 0.5rem 0 0;
font-size: 0.9rem;
}
/* Focus list */
.focus-list {
list-style: none;
padding-left: 0;
}
.focus-list li {
padding: 0.3rem 0;
font-size: 1rem;
color: #c0b09a;
}
/* Resources table */
.resources-table {
width: 100%;
border-collapse: collapse;
margin-top: 1rem;
}
.resources-table td {
padding: 10px 0;
border-bottom: 1px solid #3e3525;
}
.resources-table a {
color: #d4af37;
text-decoration: none;
font-weight: 500;
}
.resources-table a:hover {
text-decoration: underline;
}
/* Footer */
.footer-text {
text-align: center;
color: #6b5e4a;
font-size: 0.85rem;
margin-top: 2rem;
padding-top: 1.5rem;
border-top: 1px solid #3e3525;
}
.footer-text a {
color: #d4af37;
text-decoration: none;
}
/* Gradio components restyling */
.chatbot-wrap .wrap {
background: #1e1b15 !important;
border: 1px solid #4a3e2c !important;
border-radius: 14px !important;
}
.message.user {
background: linear-gradient(135deg, #5c4a32, #7a5c3e) !important;
color: white !important;
border-radius: 18px 18px 4px 18px !important;
padding: 12px 16px !important;
}
.message.bot {
background: #2a241c !important;
color: #e8dcc8 !important;
border: 1px solid #5c4a32 !important;
border-radius: 18px 18px 18px 4px !important;
}
.input-wrap textarea {
background: #2a241c !important;
border: 1px solid #4a3e2c !important;
color: #e8dcc8 !important;
font-family: 'Inter', sans-serif !important;
}
.input-wrap textarea:focus {
border-color: #d4af37 !important;
box-shadow: 0 0 0 3px rgba(212,175,55,0.15) !important;
}
button.primary {
background: linear-gradient(135deg, #7a5c3e, #a67c46) !important;
border: none !important;
border-radius: 10px !important;
color: white !important;
font-weight: 600 !important;
transition: all 0.2s !important;
}
button.primary:hover {
transform: translateY(-1px) !important;
box-shadow: 0 4px 20px rgba(166,124,70,0.4) !important;
}
.thinking-box textarea {
font-family: 'JetBrains Mono', monospace !important;
background: #1a1510 !important;
border: 1px solid #3e3525 !important;
color: #b9a88c !important;
}
.answer-box textarea {
font-family: 'Inter', sans-serif !important;
background: #1a1e15 !important;
border: 1px solid #3e4a2c !important;
color: #c5d4af !important;
}
.system-box textarea {
background: #1a1510 !important;
border: 1px solid #5c4a32 !important;
color: #d4af37 !important;
}
input[type=range] {
accent-color: #d4af37 !important;
}
.accordion {
background: #1e1b15 !important;
border: 1px solid #4a3e2c !important;
}
footer { display: none !important; }
"""
# ----------------------------------------------------------------------
# Bilingual content for the header & info section
# ----------------------------------------------------------------------
CONTENT = {
"en": {
"title": "Welcome to ThingsAI! ๐Ÿค—",
"intro": "Building efficient, bilingual AI models that run anywhere. ๐Ÿ‡ฎ๐Ÿ‡น ๐Ÿ‡ฌ๐Ÿ‡ง",
"models_title": "๐Ÿค– Our Models",
"model_q135": "A lightweight bilingual (Italian + English) language model with <b>135M parameters</b>. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.",
"model_q270": "Our most powerful small model โ€” <b>270M parameters</b> with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.",
"model_qmod": "A multi-label moderation model covering <b>9 categories</b>: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.",
"focus_title": "๐ŸŽฏ What We Focus On",
"focus_items": [
"โšก Small, efficient architectures โ€” GQA, weight tying, deepโ€‘thin design",
"๐ŸŒ Bilingual training โ€” Italian + English from scratch",
"๐Ÿ”“ Openโ€‘source everything โ€” weights, code, datasets",
"๐Ÿ’ป Realโ€‘world deployment โ€” runs on consumer hardware"
],
"resources_title": "๐Ÿ“‚ Resources",
"resources": [
("๐Ÿ“š Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
("๐Ÿ›ก๏ธ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
("๐Ÿ“ HuggingFace Community", "https://huggingface.co/ThingsAI"),
("๐Ÿ’ป GitHub", "https://github.com/overcastlab")
],
"dataset_link": "๐Ÿ“Š Dataset: <a href='https://huggingface.co/datasets/ThingAI/OmniBook'>ThingAI/OmniBook</a>",
"footer": "Made with โค๏ธ by ThingsAI ยท <a href='https://things-ai.org'>Website</a> ยท <a href='https://github.com/overcastlab'>GitHub</a>"
},
"it": {
"title": "Benvenuti in ThingsAI! ๐Ÿค—",
"intro": "Costruiamo modelli AI bilingui efficienti che funzionano ovunque. ๐Ÿ‡ฎ๐Ÿ‡น ๐Ÿ‡ฌ๐Ÿ‡ง",
"models_title": "๐Ÿค– I Nostri Modelli",
"model_q135": "Un modello linguistico bilingue leggero (italiano + inglese) con <b>135M parametri</b>. Caratteristiche: GQA, SwiGLU, RMSNorm, RoPE. Addestrato su 50B+ token.",
"model_q270": "Il nostro piccolo modello piรน potente โ€” <b>270M parametri</b> con 32 strati, dimensione nascosta 768, vocabolario 65K. In addestramento attivo su 10B+ token, pianificato 135B token.",
"model_qmod": "Un modello di moderazione multiโ€‘etichetta che copre <b>9 categorie</b>: tossico, gravemente_tossico, osceno, minaccia, insulto, odio_identitario, cyberbullismo, incitamento_all'odio, offensivo.",
"focus_title": "๐ŸŽฏ Su Cosa Ci Concentriamo",
"focus_items": [
"โšก Architetture piccole ed efficienti โ€” GQA, weight tying, design deepโ€‘thin",
"๐ŸŒ Addestramento bilingue โ€” italiano + inglese da zero",
"๐Ÿ”“ Tutto openโ€‘source โ€” pesi, codice, dataset",
"๐Ÿ’ป Implementazione reale โ€” funziona su hardware consumer"
],
"resources_title": "๐Ÿ“‚ Risorse",
"resources": [
("๐Ÿ“š Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
("๐Ÿ›ก๏ธ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
("๐Ÿ“ Comunitร  HuggingFace", "https://huggingface.co/ThingsAI"),
("๐Ÿ’ป GitHub", "https://github.com/overcastlab")
],
"dataset_link": "๐Ÿ“Š Dataset: <a href='https://huggingface.co/datasets/ThingAI/OmniBook'>ThingAI/OmniBook</a>",
"footer": "Fatto con โค๏ธ da ThingsAI ยท <a href='https://things-ai.org'>Sito Web</a> ยท <a href='https://github.com/overcastlab'>GitHub</a>"
}
}
# ----------------------------------------------------------------------
# Build the complete Gradio interface
# ----------------------------------------------------------------------
with gr.Blocks(
title="ThingsAI โ€“ Chat & Models",
css=CUSTOM_CSS,
theme=gr.themes.Soft() # base theme overridden by our CSS
) as demo:
# --- Header + Language Toggle ---
gr.HTML("""
<div id="header-section">
<button class="lang-toggle" onclick="switchLanguage()">๐Ÿ‡ฎ๐Ÿ‡น Italiano</button>
<h1 id="main-title">Welcome to ThingsAI! ๐Ÿค—</h1>
<p id="main-intro">Building efficient, bilingual AI models that run anywhere. ๐Ÿ‡ฎ๐Ÿ‡น ๐Ÿ‡ฌ๐Ÿ‡ง</p>
</div>
""")
# --- Model Cards (using HTML, IDs for translation) ---
gr.HTML("""
<h2 id="models-title" style="color:#d4af37; font-family:'Playfair Display',serif;">๐Ÿค– Our Models</h2>
<div class="model-card">
<a href="https://huggingface.co/ThingAI/Quark-135m-Bilingual" target="_blank">Quark-135M</a>
<p id="model-desc-135">A lightweight bilingual (Italian + English) language model with <b>135M parameters</b>. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.</p>
</div>
<div class="model-card">
<a href="https://huggingface.co/ThingAI/Quark-270m-Instruct" target="_blank">Quark-270M (Instruct)</a>
<p id="model-desc-270">Our most powerful small model โ€” <b>270M parameters</b> with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.</p>
</div>
<div class="model-card">
<a href="https://huggingface.co/ThingAI/Quark-Mod" target="_blank">Quark-Mod</a>
<p id="model-desc-mod">A multi-label moderation model covering <b>9 categories</b>: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.</p>
</div>
<div class="model-card">
<a href="https://huggingface.co/ThingAI/Quark-135m" target="_blank">Quark-135m (Base)</a>
<p>Base model.</p>
</div>
<div class="model-card">
<a href="https://huggingface.co/ThingAI/Quark-50m" target="_blank">Quark-50m</a>
<p>Lightweight 50M model.</p>
</div>
<p id="dataset-paragraph" style="margin-top:1rem; color:#c0b09a;">๐Ÿ“Š Dataset: <a href="https://huggingface.co/datasets/ThingAI/OmniBook" style="color:#d4af37;">ThingAI/OmniBook</a></p>
""")
# --- Focus & Resources ---
gr.HTML("""
<h2 id="focus-title" style="color:#d4af37; font-family:'Playfair Display',serif;">๐ŸŽฏ What We Focus On</h2>
<ul class="focus-list" id="focus-list">
<li>โšก Small, efficient architectures โ€” GQA, weight tying, deepโ€‘thin design</li>
<li>๐ŸŒ Bilingual training โ€” Italian + English from scratch</li>
<li>๐Ÿ”“ Openโ€‘source everything โ€” weights, code, datasets</li>
<li>๐Ÿ’ป Realโ€‘world deployment โ€” runs on consumer hardware</li>
</ul>
<h2 id="resources-title" style="color:#d4af37; font-family:'Playfair Display',serif; margin-top:2rem;">๐Ÿ“‚ Resources</h2>
<table class="resources-table" id="resources-table">
<tr><td>๐Ÿ“š <a href="https://huggingface.co/ThingAI/Quark-135m-Bilingual" target="_blank">Quark-135M-Bilingual</a></td></tr>
<tr><td>๐Ÿ›ก๏ธ <a href="https://huggingface.co/ThingsAI/Quark-Mod" target="_blank">Quark-Mod</a></td></tr>
<tr><td>๐Ÿ“ <a href="https://huggingface.co/ThingsAI" target="_blank">HuggingFace Community</a></td></tr>
<tr><td>๐Ÿ’ป <a href="https://github.com/overcastlab" target="_blank">GitHub</a></td></tr>
</table>
<p class="footer-text" id="footer-text">Made with โค๏ธ by ThingsAI ยท <a href="https://things-ai.org">Website</a> ยท <a href="https://github.com/overcastlab">GitHub</a></p>
""")
# --- Chat interface (exactly your working code, only relocated inside Blocks) ---
with gr.Row(equal_height=False):
with gr.Column(scale=5):
chatbot = gr.Chatbot(
label="๐Ÿ’ฌ Conversation",
height=520,
elem_classes=["chatbot-wrap"]
)
prompt_input = gr.Textbox(
label="Your Message",
placeholder="Ask anything... (hallucination may occur โš ๏ธ)",
lines=3,
elem_classes=["input-wrap"]
)
with gr.Row():
run_btn = gr.Button("โšก Send", variant="primary", scale=3)
clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear", variant="secondary", scale=1)
with gr.Column(scale=4):
thinking_out = gr.Textbox(
label="๐Ÿง  Thinking Process",
lines=10,
interactive=False,
elem_classes=["thinking-box"]
)
answer_out = gr.Textbox(
label="โœ… Final Answer",
lines=6,
interactive=False,
elem_classes=["answer-box"]
)
with gr.Accordion("โš™๏ธ Settings", open=False):
system_prompt_input = gr.Textbox(
label="๐Ÿ”ง System Prompt",
value=DEFAULT_SYSTEM_PROMPT,
lines=4,
elem_classes=["system-box"]
)
max_tokens = gr.Slider(64, 4096, value=4048, step=32, label="Max Tokens")
temperature = gr.Slider(0.0, 4, value=0.9, step=0.05, label="Temperature")
top_p = gr.Slider(0.1, 5.0, value=0.35, step=0.05, label="Top-p")
top_k = gr.Slider(1, 500, value=61, step=1, label="Top-k")
show_think = gr.Checkbox(value=True, label="Show Thinking Process")
# Examples
gr.Examples(
examples=[
["What is artificial intelligence?"],
["How does a large language model learn?"],
["Explain the water cycle in simple terms."],
["What is the meaning of life?"],
["Write a short poem about the universe."],
["What is Drugs?"]
],
inputs=[prompt_input],
label="๐Ÿ’ก Example Questions"
)
# Wire events
inputs_list = [prompt_input, chatbot, system_prompt_input, max_tokens, temperature, top_p, top_k, show_think]
outputs_list = [prompt_input, chatbot, thinking_out, answer_out]
run_btn.click(chat_generate, inputs=inputs_list, outputs=outputs_list)
prompt_input.submit(chat_generate, inputs=inputs_list, outputs=outputs_list)
clear_btn.click(clear_fn, outputs=outputs_list)
# ------------------------------------------------------------------
# Language switch JavaScript โ€“ swaps all translatable text
# ------------------------------------------------------------------
gr.HTML("""
<script>
const content = """ + str(CONTENT) + """;
let currentLang = 'en';
function switchLanguage() {
currentLang = currentLang === 'en' ? 'it' : 'en';
const t = content[currentLang];
// Update header
document.getElementById('main-title').innerHTML = t.title;
document.getElementById('main-intro').innerHTML = t.intro;
document.getElementById('models-title').innerHTML = t.models_title;
document.getElementById('focus-title').innerHTML = t.focus_title;
document.getElementById('resources-title').innerHTML = t.resources_title;
// Model descriptions
document.getElementById('model-desc-135').innerHTML = t.model_q135;
document.getElementById('model-desc-270').innerHTML = t.model_q270;
document.getElementById('model-desc-mod').innerHTML = t.model_qmod;
// Dataset paragraph
document.getElementById('dataset-paragraph').innerHTML = t.dataset_link;
// Focus list
const focusList = document.getElementById('focus-list');
focusList.innerHTML = t.focus_items.map(item => '<li>' + item + '</li>').join('');
// Resources table (rebuild rows)
const resTable = document.getElementById('resources-table');
resTable.innerHTML = t.resources.map(r => `<tr><td>${r[0].replace(/๐Ÿ“š|๐Ÿ›ก๏ธ|๐Ÿ“|๐Ÿ’ป/g, '')} <a href="${r[1]}" target="_blank">${r[1].split('/').pop()}</a></td></tr>`).join('');
// Footer
document.getElementById('footer-text').innerHTML = t.footer;
// Toggle button text
const btn = document.querySelector('.lang-toggle');
btn.innerHTML = currentLang === 'en' ? '๐Ÿ‡ฎ๐Ÿ‡น Italiano' : '๐Ÿ‡ฌ๐Ÿ‡ง English';
}
</script>
""")
# ----------------------------------------------------------------------
# Launch
# ----------------------------------------------------------------------
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
)