Spaces:

usermma
/

test

Paused

App Files Files Community

test / app.py

usermma

Create app.py

f6dd924 verified about 15 hours ago

raw

history blame contribute delete

21.4 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# ----------------------------------------------------------------------
	# Model (unchanged from your working code)
	# ----------------------------------------------------------------------
	MODEL_ID = "SupraLabs/Supra-50M-Reasoning"

	THINK_START = "<\|begin_of_thought\|>"
	THINK_END = "<\|end_of_thought\|>"
	SOL_START = "<\|begin_of_solution\|>"
	SOL_END = "<\|end_of_solution\|>"

	DEFAULT_SYSTEM_PROMPT = (
	"Your role as an assistant involves thoroughly exploring questions through "
	"a systematic long thinking process before providing the final precise and "
	"accurate solutions."
	)

	# ----------------------------------------------------------------------
	# Load model once
	# ----------------------------------------------------------------------
	print("Loading model...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	dtype=torch.float32,
	device_map="cpu",
	)
	model.eval()
	print("Model ready.")

	# ----------------------------------------------------------------------
	# Prompt construction (as provided)
	# ----------------------------------------------------------------------
	def build_prompt(question: str, system_prompt: str) -> str:
	return (
	f"[SYSTEM]: {system_prompt}\n\n"
	f"[USER]: {question}\n\n"
	f"[ASSISTANT]: {THINK_START}\n"
	)

	def parse_output(raw: str):
	thought, answer = "", raw
	if THINK_START in raw and THINK_END in raw:
	t0 = raw.index(THINK_START) + len(THINK_START)
	t1 = raw.index(THINK_END)
	thought = raw[t0:t1].strip()
	if SOL_START in raw and SOL_END in raw:
	s0 = raw.index(SOL_START) + len(SOL_START)
	s1 = raw.index(SOL_END)
	answer = raw[s0:s1].strip()
	elif SOL_START in raw:
	s0 = raw.index(SOL_START) + len(SOL_START)
	answer = raw[s0:].strip()
	elif THINK_END in raw:
	answer = raw[raw.index(THINK_END) + len(THINK_END):].strip()
	return thought, answer

	def generate(prompt, system_prompt, max_new_tokens, temperature, top_p, top_k, show_thinking):
	if not prompt.strip():
	return "", "Please enter a question."
	full_prompt = build_prompt(prompt, system_prompt)
	inputs = tokenizer(full_prompt, return_tensors="pt")
	input_ids = inputs["input_ids"]
	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_new_tokens,
	do_sample=temperature > 0,
	temperature=temperature if temperature > 0 else 1.0,
	top_p=top_p,
	top_k=top_k,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)
	generated = output_ids[0][input_ids.shape[-1]:]
	raw = tokenizer.decode(generated, skip_special_tokens=False)
	raw = raw.replace("<s>", "").replace("</s>", "").strip()
	raw = THINK_START + "\n" + raw
	thought, answer = parse_output(raw)
	return (thought if show_thinking else ""), answer

	# ----------------------------------------------------------------------
	# Chat callback for Gradio
	# ----------------------------------------------------------------------
	def chat_generate(message, history, system_prompt, max_tokens, temperature, top_p, top_k, show_think):
	if not message.strip():
	return "", [], "", ""
	thought, answer = generate(message, system_prompt, max_tokens, temperature, top_p, top_k, show_think)
	new_history = [
	{"role": "user", "content": message},
	{"role": "assistant", "content": answer},
	]
	return "", new_history, thought, answer

	def clear_fn():
	return "", [], "", ""

	# ----------------------------------------------------------------------
	# Custom CSS – Classic, elegant, dark theme with serif headings
	# ----------------------------------------------------------------------
	CUSTOM_CSS = """
	@import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Inter:wght@300;400;500;600&family=JetBrains+Mono&display=swap');

	* { box-sizing: border-box; }

	body, .gradio-container {
	background: #1a1a1a !important;
	color: #d4c5b2 !important;
	font-family: 'Inter', sans-serif !important;
	}

	.gradio-container {
	max-width: 1300px !important;
	margin: 0 auto !important;
	padding: 2rem 1.5rem !important;
	}

	/* Header with language toggle */
	#header-section {
	background: linear-gradient(145deg, #2a2118 0%, #1e1b15 100%);
	border: 1px solid #5c4a32;
	border-radius: 18px;
	padding: 2rem;
	margin-bottom: 2rem;
	position: relative;
	box-shadow: 0 8px 30px rgba(0,0,0,0.5);
	}

	#header-section h1 {
	font-family: 'Playfair Display', serif;
	font-size: 2.5rem;
	color: #d4af37;
	margin-top: 0;
	font-weight: 700;
	letter-spacing: 1px;
	}

	#header-section p {
	font-size: 1.1rem;
	color: #c0b09a;
	line-height: 1.7;
	}

	.lang-toggle {
	position: absolute;
	top: 20px;
	right: 20px;
	background: #3e3525;
	border: 1px solid #5c4a32;
	color: #d4af37;
	padding: 6px 16px;
	border-radius: 30px;
	font-family: 'Inter', sans-serif;
	font-weight: 600;
	font-size: 0.9rem;
	cursor: pointer;
	transition: all 0.3s;
	}
	.lang-toggle:hover {
	background: #5c4a32;
	color: #f5e6c8;
	}

	/* Model cards */
	.model-card {
	background: #2a241c;
	border: 1px solid #4a3e2c;
	border-radius: 14px;
	padding: 1.2rem;
	margin-bottom: 1rem;
	transition: transform 0.2s, box-shadow 0.2s;
	}
	.model-card:hover {
	transform: translateY(-3px);
	box-shadow: 0 10px 25px rgba(0,0,0,0.7);
	}
	.model-card a {
	color: #d4af37;
	text-decoration: none;
	font-weight: 600;
	font-size: 1.15rem;
	}
	.model-card p {
	color: #b9a88c;
	margin: 0.5rem 0 0;
	font-size: 0.9rem;
	}

	/* Focus list */
	.focus-list {
	list-style: none;
	padding-left: 0;
	}
	.focus-list li {
	padding: 0.3rem 0;
	font-size: 1rem;
	color: #c0b09a;
	}

	/* Resources table */
	.resources-table {
	width: 100%;
	border-collapse: collapse;
	margin-top: 1rem;
	}
	.resources-table td {
	padding: 10px 0;
	border-bottom: 1px solid #3e3525;
	}
	.resources-table a {
	color: #d4af37;
	text-decoration: none;
	font-weight: 500;
	}
	.resources-table a:hover {
	text-decoration: underline;
	}

	/* Footer */
	.footer-text {
	text-align: center;
	color: #6b5e4a;
	font-size: 0.85rem;
	margin-top: 2rem;
	padding-top: 1.5rem;
	border-top: 1px solid #3e3525;
	}
	.footer-text a {
	color: #d4af37;
	text-decoration: none;
	}

	/* Gradio components restyling */
	.chatbot-wrap .wrap {
	background: #1e1b15 !important;
	border: 1px solid #4a3e2c !important;
	border-radius: 14px !important;
	}

	.message.user {
	background: linear-gradient(135deg, #5c4a32, #7a5c3e) !important;
	color: white !important;
	border-radius: 18px 18px 4px 18px !important;
	padding: 12px 16px !important;
	}
	.message.bot {
	background: #2a241c !important;
	color: #e8dcc8 !important;
	border: 1px solid #5c4a32 !important;
	border-radius: 18px 18px 18px 4px !important;
	}

	.input-wrap textarea {
	background: #2a241c !important;
	border: 1px solid #4a3e2c !important;
	color: #e8dcc8 !important;
	font-family: 'Inter', sans-serif !important;
	}
	.input-wrap textarea:focus {
	border-color: #d4af37 !important;
	box-shadow: 0 0 0 3px rgba(212,175,55,0.15) !important;
	}

	button.primary {
	background: linear-gradient(135deg, #7a5c3e, #a67c46) !important;
	border: none !important;
	border-radius: 10px !important;
	color: white !important;
	font-weight: 600 !important;
	transition: all 0.2s !important;
	}
	button.primary:hover {
	transform: translateY(-1px) !important;
	box-shadow: 0 4px 20px rgba(166,124,70,0.4) !important;
	}

	.thinking-box textarea {
	font-family: 'JetBrains Mono', monospace !important;
	background: #1a1510 !important;
	border: 1px solid #3e3525 !important;
	color: #b9a88c !important;
	}
	.answer-box textarea {
	font-family: 'Inter', sans-serif !important;
	background: #1a1e15 !important;
	border: 1px solid #3e4a2c !important;
	color: #c5d4af !important;
	}
	.system-box textarea {
	background: #1a1510 !important;
	border: 1px solid #5c4a32 !important;
	color: #d4af37 !important;
	}

	input[type=range] {
	accent-color: #d4af37 !important;
	}
	.accordion {
	background: #1e1b15 !important;
	border: 1px solid #4a3e2c !important;
	}

	footer { display: none !important; }
	"""

	# ----------------------------------------------------------------------
	# Bilingual content for the header & info section
	# ----------------------------------------------------------------------
	CONTENT = {
	"en": {
	"title": "Welcome to ThingsAI! 🤗",
	"intro": "Building efficient, bilingual AI models that run anywhere. 🇮🇹 🇬🇧",
	"models_title": "🤖 Our Models",
	"model_q135": "A lightweight bilingual (Italian + English) language model with <b>135M parameters</b>. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.",
	"model_q270": "Our most powerful small model — <b>270M parameters</b> with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.",
	"model_qmod": "A multi-label moderation model covering <b>9 categories</b>: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.",
	"focus_title": "🎯 What We Focus On",
	"focus_items": [
	"⚡ Small, efficient architectures — GQA, weight tying, deep‑thin design",
	"🌍 Bilingual training — Italian + English from scratch",
	"🔓 Open‑source everything — weights, code, datasets",
	"💻 Real‑world deployment — runs on consumer hardware"
	],
	"resources_title": "📂 Resources",
	"resources": [
	("📚 Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
	("🛡️ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
	("📝 HuggingFace Community", "https://huggingface.co/ThingsAI"),
	("💻 GitHub", "https://github.com/overcastlab")
	],
	"dataset_link": "📊 Dataset: <a href='https://huggingface.co/datasets/ThingAI/OmniBook'>ThingAI/OmniBook</a>",
	"footer": "Made with ❤️ by ThingsAI · <a href='https://things-ai.org'>Website</a> · <a href='https://github.com/overcastlab'>GitHub</a>"
	},
	"it": {
	"title": "Benvenuti in ThingsAI! 🤗",
	"intro": "Costruiamo modelli AI bilingui efficienti che funzionano ovunque. 🇮🇹 🇬🇧",
	"models_title": "🤖 I Nostri Modelli",
	"model_q135": "Un modello linguistico bilingue leggero (italiano + inglese) con <b>135M parametri</b>. Caratteristiche: GQA, SwiGLU, RMSNorm, RoPE. Addestrato su 50B+ token.",
	"model_q270": "Il nostro piccolo modello più potente — <b>270M parametri</b> con 32 strati, dimensione nascosta 768, vocabolario 65K. In addestramento attivo su 10B+ token, pianificato 135B token.",
	"model_qmod": "Un modello di moderazione multi‑etichetta che copre <b>9 categorie</b>: tossico, gravemente_tossico, osceno, minaccia, insulto, odio_identitario, cyberbullismo, incitamento_all'odio, offensivo.",
	"focus_title": "🎯 Su Cosa Ci Concentriamo",
	"focus_items": [
	"⚡ Architetture piccole ed efficienti — GQA, weight tying, design deep‑thin",
	"🌍 Addestramento bilingue — italiano + inglese da zero",
	"🔓 Tutto open‑source — pesi, codice, dataset",
	"💻 Implementazione reale — funziona su hardware consumer"
	],
	"resources_title": "📂 Risorse",
	"resources": [
	("📚 Quark-135M-Bilingual", "https://huggingface.co/ThingAI/Quark-135m-Bilingual"),
	("🛡️ Quark-Mod", "https://huggingface.co/ThingsAI/Quark-Mod"),
	("📝 Comunità HuggingFace", "https://huggingface.co/ThingsAI"),
	("💻 GitHub", "https://github.com/overcastlab")
	],
	"dataset_link": "📊 Dataset: <a href='https://huggingface.co/datasets/ThingAI/OmniBook'>ThingAI/OmniBook</a>",
	"footer": "Fatto con ❤️ da ThingsAI · <a href='https://things-ai.org'>Sito Web</a> · <a href='https://github.com/overcastlab'>GitHub</a>"
	}
	}

	# ----------------------------------------------------------------------
	# Build the complete Gradio interface
	# ----------------------------------------------------------------------
	with gr.Blocks(
	title="ThingsAI – Chat & Models",
	css=CUSTOM_CSS,
	theme=gr.themes.Soft() # base theme overridden by our CSS
	) as demo:

	# --- Header + Language Toggle ---
	gr.HTML("""
	<div id="header-section">
	<button class="lang-toggle" onclick="switchLanguage()">🇮🇹 Italiano</button>
	<h1 id="main-title">Welcome to ThingsAI! 🤗</h1>
	<p id="main-intro">Building efficient, bilingual AI models that run anywhere. 🇮🇹 🇬🇧</p>
	</div>
	""")

	# --- Model Cards (using HTML, IDs for translation) ---
	gr.HTML("""
	<h2 id="models-title" style="color:#d4af37; font-family:'Playfair Display',serif;">🤖 Our Models</h2>
	<div class="model-card">
	<a href="https://huggingface.co/ThingAI/Quark-135m-Bilingual" target="_blank">Quark-135M</a>
	<p id="model-desc-135">A lightweight bilingual (Italian + English) language model with <b>135M parameters</b>. Features GQA, SwiGLU, RMSNorm, and RoPE. Trained on 50B+ tokens.</p>
	</div>
	<div class="model-card">
	<a href="https://huggingface.co/ThingAI/Quark-270m-Instruct" target="_blank">Quark-270M (Instruct)</a>
	<p id="model-desc-270">Our most powerful small model — <b>270M parameters</b> with 32 layers, 768 hidden dimensions, and 65K vocabulary. Currently in active training on 10B+ tokens, planned 135B tokens.</p>
	</div>
	<div class="model-card">
	<a href="https://huggingface.co/ThingAI/Quark-Mod" target="_blank">Quark-Mod</a>
	<p id="model-desc-mod">A multi-label moderation model covering <b>9 categories</b>: toxic, severe_toxic, obscene, threat, insult, identity_hate, cyberbullying, hate_speech, offensive.</p>
	</div>
	<div class="model-card">
	<a href="https://huggingface.co/ThingAI/Quark-135m" target="_blank">Quark-135m (Base)</a>
	<p>Base model.</p>
	</div>
	<div class="model-card">
	<a href="https://huggingface.co/ThingAI/Quark-50m" target="_blank">Quark-50m</a>
	<p>Lightweight 50M model.</p>
	</div>
	<p id="dataset-paragraph" style="margin-top:1rem; color:#c0b09a;">📊 Dataset: <a href="https://huggingface.co/datasets/ThingAI/OmniBook" style="color:#d4af37;">ThingAI/OmniBook</a></p>
	""")

	# --- Focus & Resources ---
	gr.HTML("""
	<h2 id="focus-title" style="color:#d4af37; font-family:'Playfair Display',serif;">🎯 What We Focus On</h2>
	<ul class="focus-list" id="focus-list">
	<li>⚡ Small, efficient architectures — GQA, weight tying, deep‑thin design</li>
	<li>🌍 Bilingual training — Italian + English from scratch</li>
	<li>🔓 Open‑source everything — weights, code, datasets</li>
	<li>💻 Real‑world deployment — runs on consumer hardware</li>
	</ul>
	<h2 id="resources-title" style="color:#d4af37; font-family:'Playfair Display',serif; margin-top:2rem;">📂 Resources</h2>
	<table class="resources-table" id="resources-table">
	<tr><td>📚 <a href="https://huggingface.co/ThingAI/Quark-135m-Bilingual" target="_blank">Quark-135M-Bilingual</a></td></tr>
	<tr><td>🛡️ <a href="https://huggingface.co/ThingsAI/Quark-Mod" target="_blank">Quark-Mod</a></td></tr>
	<tr><td>📝 <a href="https://huggingface.co/ThingsAI" target="_blank">HuggingFace Community</a></td></tr>
	<tr><td>💻 <a href="https://github.com/overcastlab" target="_blank">GitHub</a></td></tr>
	</table>
	<p class="footer-text" id="footer-text">Made with ❤️ by ThingsAI · <a href="https://things-ai.org">Website</a> · <a href="https://github.com/overcastlab">GitHub</a></p>
	""")

	# --- Chat interface (exactly your working code, only relocated inside Blocks) ---
	with gr.Row(equal_height=False):
	with gr.Column(scale=5):
	chatbot = gr.Chatbot(
	label="💬 Conversation",
	height=520,
	elem_classes=["chatbot-wrap"]
	)
	prompt_input = gr.Textbox(
	label="Your Message",
	placeholder="Ask anything... (hallucination may occur ⚠️)",
	lines=3,
	elem_classes=["input-wrap"]
	)
	with gr.Row():
	run_btn = gr.Button("⚡ Send", variant="primary", scale=3)
	clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)

	with gr.Column(scale=4):
	thinking_out = gr.Textbox(
	label="🧠 Thinking Process",
	lines=10,
	interactive=False,
	elem_classes=["thinking-box"]
	)
	answer_out = gr.Textbox(
	label="✅ Final Answer",
	lines=6,
	interactive=False,
	elem_classes=["answer-box"]
	)
	with gr.Accordion("⚙️ Settings", open=False):
	system_prompt_input = gr.Textbox(
	label="🔧 System Prompt",
	value=DEFAULT_SYSTEM_PROMPT,
	lines=4,
	elem_classes=["system-box"]
	)
	max_tokens = gr.Slider(64, 4096, value=4048, step=32, label="Max Tokens")
	temperature = gr.Slider(0.0, 4, value=0.9, step=0.05, label="Temperature")
	top_p = gr.Slider(0.1, 5.0, value=0.35, step=0.05, label="Top-p")
	top_k = gr.Slider(1, 500, value=61, step=1, label="Top-k")
	show_think = gr.Checkbox(value=True, label="Show Thinking Process")

	# Examples
	gr.Examples(
	examples=[
	["What is artificial intelligence?"],
	["How does a large language model learn?"],
	["Explain the water cycle in simple terms."],
	["What is the meaning of life?"],
	["Write a short poem about the universe."],
	["What is Drugs?"]
	],
	inputs=[prompt_input],
	label="💡 Example Questions"
	)

	# Wire events
	inputs_list = [prompt_input, chatbot, system_prompt_input, max_tokens, temperature, top_p, top_k, show_think]
	outputs_list = [prompt_input, chatbot, thinking_out, answer_out]

	run_btn.click(chat_generate, inputs=inputs_list, outputs=outputs_list)
	prompt_input.submit(chat_generate, inputs=inputs_list, outputs=outputs_list)
	clear_btn.click(clear_fn, outputs=outputs_list)

	# ------------------------------------------------------------------
	# Language switch JavaScript – swaps all translatable text
	# ------------------------------------------------------------------
	gr.HTML("""
	<script>
	const content = """ + str(CONTENT) + """;
	let currentLang = 'en';

	function switchLanguage() {
	currentLang = currentLang === 'en' ? 'it' : 'en';
	const t = content[currentLang];

	// Update header
	document.getElementById('main-title').innerHTML = t.title;
	document.getElementById('main-intro').innerHTML = t.intro;
	document.getElementById('models-title').innerHTML = t.models_title;
	document.getElementById('focus-title').innerHTML = t.focus_title;
	document.getElementById('resources-title').innerHTML = t.resources_title;

	// Model descriptions
	document.getElementById('model-desc-135').innerHTML = t.model_q135;
	document.getElementById('model-desc-270').innerHTML = t.model_q270;
	document.getElementById('model-desc-mod').innerHTML = t.model_qmod;

	// Dataset paragraph
	document.getElementById('dataset-paragraph').innerHTML = t.dataset_link;

	// Focus list
	const focusList = document.getElementById('focus-list');
	focusList.innerHTML = t.focus_items.map(item => '<li>' + item + '</li>').join('');

	// Resources table (rebuild rows)
	const resTable = document.getElementById('resources-table');
	resTable.innerHTML = t.resources.map(r => `<tr><td>${r[0].replace(/📚\|🛡️\|📝\|💻/g, '')} <a href="${r[1]}" target="_blank">${r[1].split('/').pop()}</a></td></tr>`).join('');

	// Footer
	document.getElementById('footer-text').innerHTML = t.footer;

	// Toggle button text
	const btn = document.querySelector('.lang-toggle');
	btn.innerHTML = currentLang === 'en' ? '🇮🇹 Italiano' : '🇬🇧 English';
	}
	</script>
	""")

	# ----------------------------------------------------------------------
	# Launch
	# ----------------------------------------------------------------------
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	)