Mosquito-Demo

Sleeping

App Files Files Community

Mosquito-Demo / app.py

ag14850

Update app.py

e2932cc verified 19 days ago

raw

history blame contribute delete

5.36 kB

	import gradio as gr
	import torch
	import numpy as np
	import struct
	import lzma
	import json
	from huggingface_hub import hf_hub_download
	from transformers import T5Config, T5ForConditionalGeneration, AutoTokenizer

	# Download quantized model
	model_path = hf_hub_download(repo_id="ag14850/Mosquito", filename="mosquito_tiny.bin.xz")

	def unpack_nbits(data, bits, count):
	if bits == 8:
	return np.frombuffer(data, dtype=np.uint8)[:count]
	result = []
	if bits == 4:
	for byte in data:
	result.append((byte >> 4) & 0x0F)
	result.append(byte & 0x0F)
	elif bits == 6:
	for i in range(0, len(data), 3):
	if i + 2 >= len(data):
	break
	b0, b1, b2 = data[i], data[i+1], data[i+2]
	result.append((b0 >> 2) & 0x3F)
	result.append(((b0 & 0x03) << 4) \| ((b1 >> 4) & 0x0F))
	result.append(((b1 & 0x0F) << 2) \| ((b2 >> 6) & 0x03))
	result.append(b2 & 0x3F)
	elif bits == 5:
	for i in range(0, len(data), 5):
	if i + 4 >= len(data):
	break
	packed = int.from_bytes(data[i:i+5], 'little')
	for j in range(8):
	result.append((packed >> (j * 5)) & 0x1F)
	elif bits == 7:
	for i in range(0, len(data), 7):
	if i + 6 >= len(data):
	break
	packed = int.from_bytes(data[i:i+7], 'little')
	for j in range(8):
	result.append((packed >> (j * 7)) & 0x7F)
	return np.array(result[:count], dtype=np.uint8)

	def load_quantized_model(path):
	with lzma.open(path, 'rb') as f:
	data = f.read()

	offset = 0
	version, default_bits, num_params = struct.unpack_from('<BBH', data, offset)
	offset += 4

	state_dict = {}

	for _ in range(num_params):
	name_len = struct.unpack_from('<H', data, offset)[0]
	offset += 2
	name = data[offset:offset + name_len].decode('utf-8')
	offset += name_len

	ndim = struct.unpack_from('<B', data, offset)[0]
	offset += 1
	shape = tuple(struct.unpack_from('<I', data, offset + i*4)[0] for i in range(ndim))
	offset += ndim * 4
	numel = int(np.prod(shape)) if shape else 1

	bits = struct.unpack_from('<B', data, offset)[0]
	offset += 1

	if bits < 16:
	scale, zp = struct.unpack_from('<ff', data, offset)
	offset += 8
	packed_len = struct.unpack_from('<I', data, offset)[0]
	offset += 4
	packed_data = data[offset:offset + packed_len]
	offset += packed_len

	quantized = unpack_nbits(packed_data, bits, numel)
	tensor_data = ((quantized.astype(np.float32) - zp) * scale).reshape(shape)
	state_dict[name] = torch.from_numpy(tensor_data)
	else:
	fp16_len = struct.unpack_from('<I', data, offset)[0]
	offset += 4
	fp16_data = data[offset:offset + fp16_len]
	offset += fp16_len

	tensor_data = np.frombuffer(fp16_data, dtype=np.float16).reshape(shape)
	state_dict[name] = torch.from_numpy(tensor_data.astype(np.float32))

	config_len = struct.unpack_from('<I', data, offset)[0]
	offset += 4
	config_json = data[offset:offset + config_len].decode('utf-8')

	config = T5Config.from_dict(json.loads(config_json))
	model = T5ForConditionalGeneration(config)
	model.load_state_dict(state_dict)
	model.eval()

	return model

	# Load model
	model = load_quantized_model(model_path)
	tokenizer = AutoTokenizer.from_pretrained("google/t5-v1_1-base", legacy=False)

	def ask(question):
	inputs = tokenizer(f"question: {question}", return_tensors="pt", max_length=128, truncation=True)
	outputs = model.generate(
	**inputs,
	max_new_tokens=24,
	num_beams=6,
	no_repeat_ngram_size=2,
	repetition_penalty=20.0,
	early_stopping=True
	)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Sample Q&A to display
	sample_qa = """
	## 📊 Sample Questions & Answers

	\| Question \| Answer \|
	\|----------\|--------\|
	\| How do vaccines work? \| Vaccines stimulate the immune system to recognize and fight specific pathogens. \|
	\| Why do we sneeze? \| Sneezes clear irritants from the nasal passages. \|
	\| What is empathy? \| Empathy is the ability to understand and share the feelings of another person. \|
	"""

	with gr.Blocks() as demo:
	gr.Markdown("# 🦟 Mosquito - Tiny Knowledge Model")
	gr.Markdown("A 7.3M parameter model that answers general knowledge questions. Smaller than a mosquito's brain!")

	gr.Markdown(sample_qa)

	gr.Markdown("---")
	gr.Markdown("## Try it yourself:")

	with gr.Row():
	question = gr.Textbox(label="Question", placeholder="Why do we dream?")
	answer = gr.Textbox(label="Answer")

	submit_btn = gr.Button("Ask", variant="primary")
	submit_btn.click(fn=ask, inputs=question, outputs=answer)

	gr.Examples(
	examples=[
	["How do vaccines work?"],
	["Why do we sneeze?"],
	["What is empathy?"],
	["Why is the sky blue?"],
	["What causes earthquakes?"],
	],
	inputs=question,
	)

	demo.launch()