Mosquito-Demo / app.py
ag14850's picture
Update app.py
e2932cc verified
import gradio as gr
import torch
import numpy as np
import struct
import lzma
import json
from huggingface_hub import hf_hub_download
from transformers import T5Config, T5ForConditionalGeneration, AutoTokenizer
# Download quantized model
model_path = hf_hub_download(repo_id="ag14850/Mosquito", filename="mosquito_tiny.bin.xz")
def unpack_nbits(data, bits, count):
if bits == 8:
return np.frombuffer(data, dtype=np.uint8)[:count]
result = []
if bits == 4:
for byte in data:
result.append((byte >> 4) & 0x0F)
result.append(byte & 0x0F)
elif bits == 6:
for i in range(0, len(data), 3):
if i + 2 >= len(data):
break
b0, b1, b2 = data[i], data[i+1], data[i+2]
result.append((b0 >> 2) & 0x3F)
result.append(((b0 & 0x03) << 4) | ((b1 >> 4) & 0x0F))
result.append(((b1 & 0x0F) << 2) | ((b2 >> 6) & 0x03))
result.append(b2 & 0x3F)
elif bits == 5:
for i in range(0, len(data), 5):
if i + 4 >= len(data):
break
packed = int.from_bytes(data[i:i+5], 'little')
for j in range(8):
result.append((packed >> (j * 5)) & 0x1F)
elif bits == 7:
for i in range(0, len(data), 7):
if i + 6 >= len(data):
break
packed = int.from_bytes(data[i:i+7], 'little')
for j in range(8):
result.append((packed >> (j * 7)) & 0x7F)
return np.array(result[:count], dtype=np.uint8)
def load_quantized_model(path):
with lzma.open(path, 'rb') as f:
data = f.read()
offset = 0
version, default_bits, num_params = struct.unpack_from('<BBH', data, offset)
offset += 4
state_dict = {}
for _ in range(num_params):
name_len = struct.unpack_from('<H', data, offset)[0]
offset += 2
name = data[offset:offset + name_len].decode('utf-8')
offset += name_len
ndim = struct.unpack_from('<B', data, offset)[0]
offset += 1
shape = tuple(struct.unpack_from('<I', data, offset + i*4)[0] for i in range(ndim))
offset += ndim * 4
numel = int(np.prod(shape)) if shape else 1
bits = struct.unpack_from('<B', data, offset)[0]
offset += 1
if bits < 16:
scale, zp = struct.unpack_from('<ff', data, offset)
offset += 8
packed_len = struct.unpack_from('<I', data, offset)[0]
offset += 4
packed_data = data[offset:offset + packed_len]
offset += packed_len
quantized = unpack_nbits(packed_data, bits, numel)
tensor_data = ((quantized.astype(np.float32) - zp) * scale).reshape(shape)
state_dict[name] = torch.from_numpy(tensor_data)
else:
fp16_len = struct.unpack_from('<I', data, offset)[0]
offset += 4
fp16_data = data[offset:offset + fp16_len]
offset += fp16_len
tensor_data = np.frombuffer(fp16_data, dtype=np.float16).reshape(shape)
state_dict[name] = torch.from_numpy(tensor_data.astype(np.float32))
config_len = struct.unpack_from('<I', data, offset)[0]
offset += 4
config_json = data[offset:offset + config_len].decode('utf-8')
config = T5Config.from_dict(json.loads(config_json))
model = T5ForConditionalGeneration(config)
model.load_state_dict(state_dict)
model.eval()
return model
# Load model
model = load_quantized_model(model_path)
tokenizer = AutoTokenizer.from_pretrained("google/t5-v1_1-base", legacy=False)
def ask(question):
inputs = tokenizer(f"question: {question}", return_tensors="pt", max_length=128, truncation=True)
outputs = model.generate(
**inputs,
max_new_tokens=24,
num_beams=6,
no_repeat_ngram_size=2,
repetition_penalty=20.0,
early_stopping=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Sample Q&A to display
sample_qa = """
## 📊 Sample Questions & Answers
| Question | Answer |
|----------|--------|
| How do vaccines work? | Vaccines stimulate the immune system to recognize and fight specific pathogens. |
| Why do we sneeze? | Sneezes clear irritants from the nasal passages. |
| What is empathy? | Empathy is the ability to understand and share the feelings of another person. |
"""
with gr.Blocks() as demo:
gr.Markdown("# 🦟 Mosquito - Tiny Knowledge Model")
gr.Markdown("A **7.3M parameter** model that answers general knowledge questions. Smaller than a mosquito's brain!")
gr.Markdown(sample_qa)
gr.Markdown("---")
gr.Markdown("## Try it yourself:")
with gr.Row():
question = gr.Textbox(label="Question", placeholder="Why do we dream?")
answer = gr.Textbox(label="Answer")
submit_btn = gr.Button("Ask", variant="primary")
submit_btn.click(fn=ask, inputs=question, outputs=answer)
gr.Examples(
examples=[
["How do vaccines work?"],
["Why do we sneeze?"],
["What is empathy?"],
["Why is the sky blue?"],
["What causes earthquakes?"],
],
inputs=question,
)
demo.launch()