import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM # ---------------------------------------------------------------------- # Model (unchanged from your working code) # ---------------------------------------------------------------------- MODEL_ID = "SupraLabs/Supra-50M-Reasoning" THINK_START = "<|begin_of_thought|>" THINK_END = "<|end_of_thought|>" SOL_START = "<|begin_of_solution|>" SOL_END = "<|end_of_solution|>" DEFAULT_SYSTEM_PROMPT = ( "Your role as an assistant involves thoroughly exploring questions through " "a systematic long thinking process before providing the final precise and " "accurate solutions." ) # ---------------------------------------------------------------------- # Load model once # ---------------------------------------------------------------------- print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, dtype=torch.float32, device_map="cpu", ) model.eval() print("Model ready.") # ---------------------------------------------------------------------- # Prompt construction (as provided) # ---------------------------------------------------------------------- def build_prompt(question: str, system_prompt: str) -> str: return ( f"[SYSTEM]: {system_prompt}\n\n" f"[USER]: {question}\n\n" f"[ASSISTANT]: {THINK_START}\n" ) def parse_output(raw: str): thought, answer = "", raw if THINK_START in raw and THINK_END in raw: t0 = raw.index(THINK_START) + len(THINK_START) t1 = raw.index(THINK_END) thought = raw[t0:t1].strip() if SOL_START in raw and SOL_END in raw: s0 = raw.index(SOL_START) + len(SOL_START) s1 = raw.index(SOL_END) answer = raw[s0:s1].strip() elif SOL_START in raw: s0 = raw.index(SOL_START) + len(SOL_START) answer = raw[s0:].strip() elif THINK_END in raw: answer = raw[raw.index(THINK_END) + len(THINK_END):].strip() return thought, answer def generate(prompt, system_prompt, max_new_tokens, temperature, top_p, top_k, show_thinking): if not prompt.strip(): return "", "Please enter a question." full_prompt = build_prompt(prompt, system_prompt) inputs = tokenizer(full_prompt, return_tensors="pt") input_ids = inputs["input_ids"] with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_new_tokens, do_sample=temperature > 0, temperature=temperature if temperature > 0 else 1.0, top_p=top_p, top_k=top_k, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) generated = output_ids[0][input_ids.shape[-1]:] raw = tokenizer.decode(generated, skip_special_tokens=False) raw = raw.replace("~~", "").replace("~~", "").strip() raw = THINK_START + "\n" + raw thought, answer = parse_output(raw) return (thought if show_thinking else ""), answer # ---------------------------------------------------------------------- # Chat callback for Gradio # ---------------------------------------------------------------------- def chat_generate(message, history, system_prompt, max_tokens, temperature, top_p, top_k, show_think): if not message.strip(): return "", [], "", "" thought, answer = generate(message, system_prompt, max_tokens, temperature, top_p, top_k, show_think) new_history = [ {"role": "user", "content": message}, {"role": "assistant", "content": answer}, ] return "", new_history, thought, answer def clear_fn(): return "", [], "", "" # ---------------------------------------------------------------------- # Custom CSS – Classic, elegant, dark theme with serif headings # ---------------------------------------------------------------------- CUSTOM_CSS = """ @import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Inter:wght@300;400;500;600&family=JetBrains+Mono&display=swap'); * { box-sizing: border-box; } body, .gradio-container { background: #1a1a1a !important; color: #d4c5b2 !important; font-family: 'Inter', sans-serif !important; } .gradio-container { max-width: 1300px !important; margin: 0 auto !important; padding: 2rem 1.5rem !important; } /* Header with language toggle */ #header-section { background: linear-gradient(145deg, #2a2118 0%, #1e1b15 100%); border: 1px solid #5c4a32; border-radius: 18px; padding: 2rem; margin-bottom: 2rem; position: relative; box-shadow: 0 8px 30px rgba(0,0,0,0.5); } #header-section h1 { font-family: 'Playfair Display', serif; font-size: 2.5rem; color: #d4af37; margin-top: 0; font-weight: 700; letter-spacing: 1px; } #header-section p { font-size: 1.1rem; color: #c0b09a; line-height: 1.7; } .lang-toggle { position: absolute; top: 20px; right: 20px; background: #3e3525; border: 1px solid #5c4a32; color: #d4af37; padding: 6px 16px; border-radius: 30px; font-family: 'Inter', sans-serif; font-weight: 600; font-size: 0.9rem; cursor: pointer; transition: all 0.3s; } .lang-toggle:hover { background: #5c4a32; color: #f5e6c8; } /* Model cards */ .model-card { background: #2a241c; border: 1px solid #4a3e2c; border-radius: 14px; padding: 1.2rem; margin-bottom: 1rem; transition: transform 0.2s, box-shadow 0.2s; } .model-card:hover { transform: translateY(-3px); box-shadow: 0 10px 25px rgba(0,0,0,0.7); } .model-card a { color: #d4af37; text-decoration: none; font-weight: 600; font-size: 1.15rem; } .model-card p { color: #b9a88c; margin: 0.5rem 0 0; font-size: 0.9rem; } /* Focus list */ .focus-list { list-style: none; padding-left: 0; } .focus-list li { padding: 0.3rem 0; font-size: 1rem; color: #c0b09a; } /* Resources table */ .resources-table { width: 100%; border-collapse: collapse; margin-top: 1rem; } .resources-table td { padding: 10px 0; border-bottom: 1px solid #3e3525; } .resources-table a { color: #d4af37; text-decoration: none; font-weight: 500; } .resources-table a:hover { text-decoration: underline; } /* Footer */ .footer-text { text-align: center; color: #6b5e4a; font-size: 0.85rem; margin-top: 2rem; padding-top: 1.5rem; border-top: 1px solid #3e3525; } .footer-text a { color: #d4af37; text-decoration: none; } /* Gradio components restyling */ .chatbot-wrap .wrap { background: #1e1b15 !important; border: 1px solid #4a3e2c !important; border-radius: 14px !important; } .message.user { background: linear-gradient(135deg, #5c4a32, #7a5c3e) !important; color: white !important; border-radius: 18px 18px 4px 18px !important; padding: 12px 16px !important; } .message.bot { background: #2a241c !important; color: #e8dcc8 !important; border: 1px solid #5c4a32 !important; border-radius: 18px 18px 18px 4px !important; } .input-wrap textarea { background: #2a241c !important; border: 1px solid #4a3e2c !important; color: #e8dcc8 !important; font-family: 'Inter', sans-serif !important; } .input-wrap textarea:focus { border-color: #d4af37 !important; box-shadow: 0 0 0 3px rgba(212,175,55,0.15) !important; } button.primary { background: linear-gradient(135deg, #7a5c3e, #a67c46) !important; border: none !important; border-radius: 10px !important; color: white !important; font-weight: 600 !important; transition: all 0.2s !important; } button.primary:hover { transform: translateY(-1px) !important; box-shadow: 0 4px 20px rgba(166,124,70,0.4) !important; } .thinking-box textarea { font-family: 'JetBrains Mono', monospace !important; background: #1a1510 !important; border: 1px solid #3e3525 !important; color: #b9a88c !important; } .answer-box textarea { font-family: 'Inter', sans-serif !important; background: #1a1e15 !important; border: 1px solid #3e4a2c !important; color: #c5d4af !important; } .system-box textarea { background: #1a1510 !important; border: 1px solid #5c4a32 !important; color: #d4af37 !important; } input[type=range] { accent-color: #d4af37 !important; } .accordion { background: #1e1b15 !important; border: 1px solid #4a3e2c !important; } footer { display: none !important; } """ # ---------------------------------------------------------------------- # Bilingual content for the header & info section # ---------------------------------------------------------------------- CONTENT = { "en": { "title": "Welcome to ThingsAI! 🤗", "intro": "Building efficient, bilingual AI models that run anywhere. 🇮🇹 🇬🇧", "models_title": "🤖 Our Models", "model_q135": "A lightweight bilingual (Italian + English) language model with 135M parameters. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.", "model_q270": "Our most powerful small model — 270M parameters with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.", "model_qmod": "A multi-label moderation model covering 9 categories: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.", "focus_title": "🎯 What We Focus On", "focus_items": [ "⚡ Small, efficient architectures — GQA, weight tying, deep‑thin design", "🌍 Bilingual training — Italian + English from scratch", "🔓 Open‑source everything — weights, code, datasets", "💻 Real‑world deployment — runs on consumer hardware" ], "resources_title": "📂 Resources", "resources": [ ("📚 Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"), ("🛡️ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"), ("📝 HuggingFace Community", "https://huggingface.co/ThingsAI"), ("💻 GitHub", "https://github.com/overcastlab") ], "dataset_link": "📊 Dataset: ThingAI/OmniBook", "footer": "Made with ❤️ by ThingsAI · Website · GitHub" }, "it": { "title": "Benvenuti in ThingsAI! 🤗", "intro": "Costruiamo modelli AI bilingui efficienti che funzionano ovunque. 🇮🇹 🇬🇧", "models_title": "🤖 I Nostri Modelli", "model_q135": "Un modello linguistico bilingue leggero (italiano + inglese) con 135M parametri. Caratteristiche: GQA, SwiGLU, RMSNorm, RoPE. Addestrato su 50B+ token.", "model_q270": "Il nostro piccolo modello più potente — 270M parametri con 32 strati, dimensione nascosta 768, vocabolario 65K. In addestramento attivo su 10B+ token, pianificato 135B token.", "model_qmod": "Un modello di moderazione multi‑etichetta che copre 9 categorie: tossico, gravemente_tossico, osceno, minaccia, insulto, odio_identitario, cyberbullismo, incitamento_all'odio, offensivo.", "focus_title": "🎯 Su Cosa Ci Concentriamo", "focus_items": [ "⚡ Architetture piccole ed efficienti — GQA, weight tying, design deep‑thin", "🌍 Addestramento bilingue — italiano + inglese da zero", "🔓 Tutto open‑source — pesi, codice, dataset", "💻 Implementazione reale — funziona su hardware consumer" ], "resources_title": "📂 Risorse", "resources": [ ("📚 Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"), ("🛡️ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"), ("📝 Comunità HuggingFace", "https://huggingface.co/ThingsAI"), ("💻 GitHub", "https://github.com/overcastlab") ], "dataset_link": "📊 Dataset: ThingAI/OmniBook", "footer": "Fatto con ❤️ da ThingsAI · Sito Web · GitHub" } } # ---------------------------------------------------------------------- # Build the complete Gradio interface # ---------------------------------------------------------------------- with gr.Blocks( title="ThingsAI – Chat & Models", css=CUSTOM_CSS, theme=gr.themes.Soft() # base theme overridden by our CSS ) as demo: # --- Header + Language Toggle --- gr.HTML("""

Welcome to ThingsAI! 🤗

Building efficient, bilingual AI models that run anywhere. 🇮🇹 🇬🇧

""") # --- Model Cards (using HTML, IDs for translation) --- gr.HTML("""

🤖 Our Models

Quark-135M

A lightweight bilingual (Italian + English) language model with 135M parameters. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.

Quark-270M (Instruct)

Our most powerful small model — 270M parameters with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.

Quark-Mod

A multi-label moderation model covering 9 categories: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.

Quark-135m (Base)

Base model.

Quark-50m

Lightweight 50M model.

📊 Dataset: ThingAI/OmniBook

""") # --- Focus & Resources --- gr.HTML("""

🎯 What We Focus On

⚡ Small, efficient architectures — GQA, weight tying, deep‑thin design
🌍 Bilingual training — Italian + English from scratch
🔓 Open‑source everything — weights, code, datasets
💻 Real‑world deployment — runs on consumer hardware

📂 Resources

📚 Quark-135M-Bilingual

🛡️ Quark-Mod

📝 HuggingFace Community

💻 GitHub

Made with ❤️ by ThingsAI · Website · GitHub

""") # --- Chat interface (exactly your working code, only relocated inside Blocks) --- with gr.Row(equal_height=False): with gr.Column(scale=5): chatbot = gr.Chatbot( label="💬 Conversation", height=520, elem_classes=["chatbot-wrap"] ) prompt_input = gr.Textbox( label="Your Message", placeholder="Ask anything... (hallucination may occur ⚠️)", lines=3, elem_classes=["input-wrap"] ) with gr.Row(): run_btn = gr.Button("⚡ Send", variant="primary", scale=3) clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1) with gr.Column(scale=4): thinking_out = gr.Textbox( label="🧠 Thinking Process", lines=10, interactive=False, elem_classes=["thinking-box"] ) answer_out = gr.Textbox( label="✅ Final Answer", lines=6, interactive=False, elem_classes=["answer-box"] ) with gr.Accordion("⚙️ Settings", open=False): system_prompt_input = gr.Textbox( label="🔧 System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=4, elem_classes=["system-box"] ) max_tokens = gr.Slider(64, 4096, value=4048, step=32, label="Max Tokens") temperature = gr.Slider(0.0, 4, value=0.9, step=0.05, label="Temperature") top_p = gr.Slider(0.1, 5.0, value=0.35, step=0.05, label="Top-p") top_k = gr.Slider(1, 500, value=61, step=1, label="Top-k") show_think = gr.Checkbox(value=True, label="Show Thinking Process") # Examples gr.Examples( examples=[ ["What is artificial intelligence?"], ["How does a large language model learn?"], ["Explain the water cycle in simple terms."], ["What is the meaning of life?"], ["Write a short poem about the universe."], ["What is Drugs?"] ], inputs=[prompt_input], label="💡 Example Questions" ) # Wire events inputs_list = [prompt_input, chatbot, system_prompt_input, max_tokens, temperature, top_p, top_k, show_think] outputs_list = [prompt_input, chatbot, thinking_out, answer_out] run_btn.click(chat_generate, inputs=inputs_list, outputs=outputs_list) prompt_input.submit(chat_generate, inputs=inputs_list, outputs=outputs_list) clear_btn.click(clear_fn, outputs=outputs_list) # ------------------------------------------------------------------ # Language switch JavaScript – swaps all translatable text # ------------------------------------------------------------------ gr.HTML(""" """) # ---------------------------------------------------------------------- # Launch # ---------------------------------------------------------------------- if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True, )