Spaces:

usermma
/

test

Paused

File size: 21,421 Bytes

f6dd924

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# ----------------------------------------------------------------------
# Model (unchanged from your working code)
# ----------------------------------------------------------------------
MODEL_ID = "SupraLabs/Supra-50M-Reasoning"

THINK_START = "<|begin_of_thought|>"
THINK_END = "<|end_of_thought|>"
SOL_START = "<|begin_of_solution|>"
SOL_END = "<|end_of_solution|>"

DEFAULT_SYSTEM_PROMPT = (
    "Your role as an assistant involves thoroughly exploring questions through "
    "a systematic long thinking process before providing the final precise and "
    "accurate solutions."
)

# ----------------------------------------------------------------------
# Load model once
# ----------------------------------------------------------------------
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    dtype=torch.float32,
    device_map="cpu",
)
model.eval()
print("Model ready.")

# ----------------------------------------------------------------------
# Prompt construction (as provided)
# ----------------------------------------------------------------------
def build_prompt(question: str, system_prompt: str) -> str:
    return (
        f"[SYSTEM]: {system_prompt}\n\n"
        f"[USER]: {question}\n\n"
        f"[ASSISTANT]: {THINK_START}\n"
    )

def parse_output(raw: str):
    thought, answer = "", raw
    if THINK_START in raw and THINK_END in raw:
        t0 = raw.index(THINK_START) + len(THINK_START)
        t1 = raw.index(THINK_END)
        thought = raw[t0:t1].strip()
    if SOL_START in raw and SOL_END in raw:
        s0 = raw.index(SOL_START) + len(SOL_START)
        s1 = raw.index(SOL_END)
        answer = raw[s0:s1].strip()
    elif SOL_START in raw:
        s0 = raw.index(SOL_START) + len(SOL_START)
        answer = raw[s0:].strip()
    elif THINK_END in raw:
        answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()
    return thought, answer

def generate(prompt, system_prompt, max_new_tokens, temperature, top_p, top_k, show_thinking):
    if not prompt.strip():
        return "", "Please enter a question."
    full_prompt = build_prompt(prompt, system_prompt)
    inputs = tokenizer(full_prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_new_tokens=max_new_tokens,
            do_sample=temperature > 0,
            temperature=temperature if temperature > 0 else 1.0,
            top_p=top_p,
            top_k=top_k,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    generated = output_ids[0][input_ids.shape[-1]:]
    raw = tokenizer.decode(generated, skip_special_tokens=False)
    raw = raw.replace("<s>", "").replace("</s>", "").strip()
    raw = THINK_START + "\n" + raw
    thought, answer = parse_output(raw)
    return (thought if show_thinking else ""), answer

# ----------------------------------------------------------------------
# Chat callback for Gradio
# ----------------------------------------------------------------------
def chat_generate(message, history, system_prompt, max_tokens, temperature, top_p, top_k, show_think):
    if not message.strip():
        return "", [], "", ""
    thought, answer = generate(message, system_prompt, max_tokens, temperature, top_p, top_k, show_think)
    new_history = [
        {"role": "user", "content": message},
        {"role": "assistant", "content": answer},
    ]
    return "", new_history, thought, answer

def clear_fn():
    return "", [], "", ""

# ----------------------------------------------------------------------
# Custom CSS – Classic, elegant, dark theme with serif headings
# ----------------------------------------------------------------------
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Inter:wght@300;400;500;600&family=JetBrains+Mono&display=swap');

* { box-sizing: border-box; }

body, .gradio-container {
    background: #1a1a1a !important;
    color: #d4c5b2 !important;
    font-family: 'Inter', sans-serif !important;
}

.gradio-container {
    max-width: 1300px !important;
    margin: 0 auto !important;
    padding: 2rem 1.5rem !important;
}

/* Header with language toggle */
#header-section {
    background: linear-gradient(145deg, #2a2118 0%, #1e1b15 100%);
    border: 1px solid #5c4a32;
    border-radius: 18px;
    padding: 2rem;
    margin-bottom: 2rem;
    position: relative;
    box-shadow: 0 8px 30px rgba(0,0,0,0.5);
}

#header-section h1 {
    font-family: 'Playfair Display', serif;
    font-size: 2.5rem;
    color: #d4af37;
    margin-top: 0;
    font-weight: 700;
    letter-spacing: 1px;
}

#header-section p {
    font-size: 1.1rem;
    color: #c0b09a;
    line-height: 1.7;
}

.lang-toggle {
    position: absolute;
    top: 20px;
    right: 20px;
    background: #3e3525;
    border: 1px solid #5c4a32;
    color: #d4af37;
    padding: 6px 16px;
    border-radius: 30px;
    font-family: 'Inter', sans-serif;
    font-weight: 600;
    font-size: 0.9rem;
    cursor: pointer;
    transition: all 0.3s;
}
.lang-toggle:hover {
    background: #5c4a32;
    color: #f5e6c8;
}

/* Model cards */
.model-card {
    background: #2a241c;
    border: 1px solid #4a3e2c;
    border-radius: 14px;
    padding: 1.2rem;
    margin-bottom: 1rem;
    transition: transform 0.2s, box-shadow 0.2s;
}
.model-card:hover {
    transform: translateY(-3px);
    box-shadow: 0 10px 25px rgba(0,0,0,0.7);
}
.model-card a {
    color: #d4af37;
    text-decoration: none;
    font-weight: 600;
    font-size: 1.15rem;
}
.model-card p {
    color: #b9a88c;
    margin: 0.5rem 0 0;
    font-size: 0.9rem;
}

/* Focus list */
.focus-list {
    list-style: none;
    padding-left: 0;
}
.focus-list li {
    padding: 0.3rem 0;
    font-size: 1rem;
    color: #c0b09a;
}

/* Resources table */
.resources-table {
    width: 100%;
    border-collapse: collapse;
    margin-top: 1rem;
}
.resources-table td {
    padding: 10px 0;
    border-bottom: 1px solid #3e3525;
}
.resources-table a {
    color: #d4af37;
    text-decoration: none;
    font-weight: 500;
}
.resources-table a:hover {
    text-decoration: underline;
}

/* Footer */
.footer-text {
    text-align: center;
    color: #6b5e4a;
    font-size: 0.85rem;
    margin-top: 2rem;
    padding-top: 1.5rem;
    border-top: 1px solid #3e3525;
}
.footer-text a {
    color: #d4af37;
    text-decoration: none;
}

/* Gradio components restyling */
.chatbot-wrap .wrap {
    background: #1e1b15 !important;
    border: 1px solid #4a3e2c !important;
    border-radius: 14px !important;
}

.message.user {
    background: linear-gradient(135deg, #5c4a32, #7a5c3e) !important;
    color: white !important;
    border-radius: 18px 18px 4px 18px !important;
    padding: 12px 16px !important;
}
.message.bot {
    background: #2a241c !important;
    color: #e8dcc8 !important;
    border: 1px solid #5c4a32 !important;
    border-radius: 18px 18px 18px 4px !important;
}

.input-wrap textarea {
    background: #2a241c !important;
    border: 1px solid #4a3e2c !important;
    color: #e8dcc8 !important;
    font-family: 'Inter', sans-serif !important;
}
.input-wrap textarea:focus {
    border-color: #d4af37 !important;
    box-shadow: 0 0 0 3px rgba(212,175,55,0.15) !important;
}

button.primary {
    background: linear-gradient(135deg, #7a5c3e, #a67c46) !important;
    border: none !important;
    border-radius: 10px !important;
    color: white !important;
    font-weight: 600 !important;
    transition: all 0.2s !important;
}
button.primary:hover {
    transform: translateY(-1px) !important;
    box-shadow: 0 4px 20px rgba(166,124,70,0.4) !important;
}

.thinking-box textarea {
    font-family: 'JetBrains Mono', monospace !important;
    background: #1a1510 !important;
    border: 1px solid #3e3525 !important;
    color: #b9a88c !important;
}
.answer-box textarea {
    font-family: 'Inter', sans-serif !important;
    background: #1a1e15 !important;
    border: 1px solid #3e4a2c !important;
    color: #c5d4af !important;
}
.system-box textarea {
    background: #1a1510 !important;
    border: 1px solid #5c4a32 !important;
    color: #d4af37 !important;
}

input[type=range] {
    accent-color: #d4af37 !important;
}
.accordion {
    background: #1e1b15 !important;
    border: 1px solid #4a3e2c !important;
}

footer { display: none !important; }
"""

# ----------------------------------------------------------------------
# Bilingual content for the header & info section
# ----------------------------------------------------------------------
CONTENT = {
    "en": {
        "title": "Welcome to ThingsAI! 🤗",
        "intro": "Building efficient, bilingual AI models that run anywhere. 🇮🇹 🇬🇧",
        "models_title": "🤖 Our Models",
        "model_q135": "A lightweight bilingual (Italian + English) language model with <b>135M parameters</b>. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.",
        "model_q270": "Our most powerful small model — <b>270M parameters</b> with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.",
        "model_qmod": "A multi-label moderation model covering <b>9 categories</b>: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.",
        "focus_title": "🎯 What We Focus On",
        "focus_items": [
            "⚡ Small, efficient architectures — GQA, weight tying, deep‑thin design",
            "🌍 Bilingual training — Italian + English from scratch",
            "🔓 Open‑source everything — weights, code, datasets",
            "💻 Real‑world deployment — runs on consumer hardware"
        ],
        "resources_title": "📂 Resources",
        "resources": [
            ("📚 Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
            ("🛡️ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
            ("📝 HuggingFace Community", "https://huggingface.co/ThingsAI"),
            ("💻 GitHub", "https://github.com/overcastlab")
        ],
        "dataset_link": "📊 Dataset: <a href='https://huggingface.co/datasets/ThingAI/OmniBook'>ThingAI/OmniBook</a>",
        "footer": "Made with ❤️ by ThingsAI · <a href='https://things-ai.org'>Website</a> · <a href='https://github.com/overcastlab'>GitHub</a>"
    },
    "it": {
        "title": "Benvenuti in ThingsAI! 🤗",
        "intro": "Costruiamo modelli AI bilingui efficienti che funzionano ovunque. 🇮🇹 🇬🇧",
        "models_title": "🤖 I Nostri Modelli",
        "model_q135": "Un modello linguistico bilingue leggero (italiano + inglese) con <b>135M parametri</b>. Caratteristiche: GQA, SwiGLU, RMSNorm, RoPE. Addestrato su 50B+ token.",
        "model_q270": "Il nostro piccolo modello più potente — <b>270M parametri</b> con 32 strati, dimensione nascosta 768, vocabolario 65K. In addestramento attivo su 10B+ token, pianificato 135B token.",
        "model_qmod": "Un modello di moderazione multi‑etichetta che copre <b>9 categorie</b>: tossico, gravemente_tossico, osceno, minaccia, insulto, odio_identitario, cyberbullismo, incitamento_all'odio, offensivo.",
        "focus_title": "🎯 Su Cosa Ci Concentriamo",
        "focus_items": [
            "⚡ Architetture piccole ed efficienti — GQA, weight tying, design deep‑thin",
            "🌍 Addestramento bilingue — italiano + inglese da zero",
            "🔓 Tutto open‑source — pesi, codice, dataset",
            "💻 Implementazione reale — funziona su hardware consumer"
        ],
        "resources_title": "📂 Risorse",
        "resources": [
            ("📚 Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
            ("🛡️ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
            ("📝 Comunità HuggingFace", "https://huggingface.co/ThingsAI"),
            ("💻 GitHub", "https://github.com/overcastlab")
        ],
        "dataset_link": "📊 Dataset: <a href='https://huggingface.co/datasets/ThingAI/OmniBook'>ThingAI/OmniBook</a>",
        "footer": "Fatto con ❤️ da ThingsAI · <a href='https://things-ai.org'>Sito Web</a> · <a href='https://github.com/overcastlab'>GitHub</a>"
    }
}

# ----------------------------------------------------------------------
# Build the complete Gradio interface
# ----------------------------------------------------------------------
with gr.Blocks(
    title="ThingsAI – Chat & Models",
    css=CUSTOM_CSS,
    theme=gr.themes.Soft()   # base theme overridden by our CSS
) as demo:

    # --- Header + Language Toggle ---
    gr.HTML("""
    <div id="header-section">
        <button class="lang-toggle" onclick="switchLanguage()">🇮🇹 Italiano</button>
        <h1 id="main-title">Welcome to ThingsAI! 🤗</h1>
        <p id="main-intro">Building efficient, bilingual AI models that run anywhere. 🇮🇹 🇬🇧</p>
    </div>
    """)

    # --- Model Cards (using HTML, IDs for translation) ---
    gr.HTML("""
    <h2 id="models-title" style="color:#d4af37; font-family:'Playfair Display',serif;">🤖 Our Models</h2>
    <div class="model-card">
        <a href="https://huggingface.co/ThingAI/Quark-135m-Bilingual" target="_blank">Quark-135M</a>
        <p id="model-desc-135">A lightweight bilingual (Italian + English) language model with <b>135M parameters</b>. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.</p>
    </div>
    <div class="model-card">
        <a href="https://huggingface.co/ThingAI/Quark-270m-Instruct" target="_blank">Quark-270M (Instruct)</a>
        <p id="model-desc-270">Our most powerful small model — <b>270M parameters</b> with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.</p>
    </div>
    <div class="model-card">
        <a href="https://huggingface.co/ThingAI/Quark-Mod" target="_blank">Quark-Mod</a>
        <p id="model-desc-mod">A multi-label moderation model covering <b>9 categories</b>: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.</p>
    </div>
    <div class="model-card">
        <a href="https://huggingface.co/ThingAI/Quark-135m" target="_blank">Quark-135m (Base)</a>
        <p>Base model.</p>
    </div>
    <div class="model-card">
        <a href="https://huggingface.co/ThingAI/Quark-50m" target="_blank">Quark-50m</a>
        <p>Lightweight 50M model.</p>
    </div>
    <p id="dataset-paragraph" style="margin-top:1rem; color:#c0b09a;">📊 Dataset: <a href="https://huggingface.co/datasets/ThingAI/OmniBook" style="color:#d4af37;">ThingAI/OmniBook</a></p>
    """)

    # --- Focus & Resources ---
    gr.HTML("""
    <h2 id="focus-title" style="color:#d4af37; font-family:'Playfair Display',serif;">🎯 What We Focus On</h2>
    <ul class="focus-list" id="focus-list">
        <li>⚡ Small, efficient architectures — GQA, weight tying, deep‑thin design</li>
        <li>🌍 Bilingual training — Italian + English from scratch</li>
        <li>🔓 Open‑source everything — weights, code, datasets</li>
        <li>💻 Real‑world deployment — runs on consumer hardware</li>
    </ul>
    <h2 id="resources-title" style="color:#d4af37; font-family:'Playfair Display',serif; margin-top:2rem;">📂 Resources</h2>
    <table class="resources-table" id="resources-table">
        <tr><td>📚 <a href="https://huggingface.co/ThingAI/Quark-135m-Bilingual" target="_blank">Quark-135M-Bilingual</a></td></tr>
        <tr><td>🛡️ <a href="https://huggingface.co/ThingsAI/Quark-Mod" target="_blank">Quark-Mod</a></td></tr>
        <tr><td>📝 <a href="https://huggingface.co/ThingsAI" target="_blank">HuggingFace Community</a></td></tr>
        <tr><td>💻 <a href="https://github.com/overcastlab" target="_blank">GitHub</a></td></tr>
    </table>
    <p class="footer-text" id="footer-text">Made with ❤️ by ThingsAI · <a href="https://things-ai.org">Website</a> · <a href="https://github.com/overcastlab">GitHub</a></p>
    """)

    # --- Chat interface (exactly your working code, only relocated inside Blocks) ---
    with gr.Row(equal_height=False):
        with gr.Column(scale=5):
            chatbot = gr.Chatbot(
                label="💬 Conversation",
                height=520,
                elem_classes=["chatbot-wrap"]
            )
            prompt_input = gr.Textbox(
                label="Your Message",
                placeholder="Ask anything... (hallucination may occur ⚠️)",
                lines=3,
                elem_classes=["input-wrap"]
            )
            with gr.Row():
                run_btn = gr.Button("⚡ Send", variant="primary", scale=3)
                clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)

        with gr.Column(scale=4):
            thinking_out = gr.Textbox(
                label="🧠 Thinking Process",
                lines=10,
                interactive=False,
                elem_classes=["thinking-box"]
            )
            answer_out = gr.Textbox(
                label="✅ Final Answer",
                lines=6,
                interactive=False,
                elem_classes=["answer-box"]
            )
            with gr.Accordion("⚙️ Settings", open=False):
                system_prompt_input = gr.Textbox(
                    label="🔧 System Prompt",
                    value=DEFAULT_SYSTEM_PROMPT,
                    lines=4,
                    elem_classes=["system-box"]
                )
                max_tokens = gr.Slider(64, 4096, value=4048, step=32, label="Max Tokens")
                temperature = gr.Slider(0.0, 4, value=0.9, step=0.05, label="Temperature")
                top_p = gr.Slider(0.1, 5.0, value=0.35, step=0.05, label="Top-p")
                top_k = gr.Slider(1, 500, value=61, step=1, label="Top-k")
                show_think = gr.Checkbox(value=True, label="Show Thinking Process")

    # Examples
    gr.Examples(
        examples=[
            ["What is artificial intelligence?"],
            ["How does a large language model learn?"],
            ["Explain the water cycle in simple terms."],
            ["What is the meaning of life?"],
            ["Write a short poem about the universe."],
            ["What is Drugs?"]
        ],
        inputs=[prompt_input],
        label="💡 Example Questions"
    )

    # Wire events
    inputs_list = [prompt_input, chatbot, system_prompt_input, max_tokens, temperature, top_p, top_k, show_think]
    outputs_list = [prompt_input, chatbot, thinking_out, answer_out]

    run_btn.click(chat_generate, inputs=inputs_list, outputs=outputs_list)
    prompt_input.submit(chat_generate, inputs=inputs_list, outputs=outputs_list)
    clear_btn.click(clear_fn, outputs=outputs_list)

    # ------------------------------------------------------------------
    # Language switch JavaScript – swaps all translatable text
    # ------------------------------------------------------------------
    gr.HTML("""
    <script>
    const content = """ + str(CONTENT) + """;
    let currentLang = 'en';

    function switchLanguage() {
        currentLang = currentLang === 'en' ? 'it' : 'en';
        const t = content[currentLang];

        // Update header
        document.getElementById('main-title').innerHTML = t.title;
        document.getElementById('main-intro').innerHTML = t.intro;
        document.getElementById('models-title').innerHTML = t.models_title;
        document.getElementById('focus-title').innerHTML = t.focus_title;
        document.getElementById('resources-title').innerHTML = t.resources_title;

        // Model descriptions
        document.getElementById('model-desc-135').innerHTML = t.model_q135;
        document.getElementById('model-desc-270').innerHTML = t.model_q270;
        document.getElementById('model-desc-mod').innerHTML = t.model_qmod;

        // Dataset paragraph
        document.getElementById('dataset-paragraph').innerHTML = t.dataset_link;

        // Focus list
        const focusList = document.getElementById('focus-list');
        focusList.innerHTML = t.focus_items.map(item => '<li>' + item + '</li>').join('');

        // Resources table (rebuild rows)
        const resTable = document.getElementById('resources-table');
        resTable.innerHTML = t.resources.map(r => `<tr><td>${r[0].replace(/📚|🛡️|📝|💻/g, '')} <a href="${r[1]}" target="_blank">${r[1].split('/').pop()}</a></td></tr>`).join('');

        // Footer
        document.getElementById('footer-text').innerHTML = t.footer;

        // Toggle button text
        const btn = document.querySelector('.lang-toggle');
        btn.innerHTML = currentLang === 'en' ? '🇮🇹 Italiano' : '🇬🇧 English';
    }
    </script>
    """)

# ----------------------------------------------------------------------
# Launch
# ----------------------------------------------------------------------
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
    )