Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import struct | |
| import lzma | |
| import json | |
| from huggingface_hub import hf_hub_download | |
| from transformers import T5Config, T5ForConditionalGeneration, AutoTokenizer | |
| # Download quantized model | |
| model_path = hf_hub_download(repo_id="ag14850/Mosquito", filename="mosquito_tiny.bin.xz") | |
| def unpack_nbits(data, bits, count): | |
| if bits == 8: | |
| return np.frombuffer(data, dtype=np.uint8)[:count] | |
| result = [] | |
| if bits == 4: | |
| for byte in data: | |
| result.append((byte >> 4) & 0x0F) | |
| result.append(byte & 0x0F) | |
| elif bits == 6: | |
| for i in range(0, len(data), 3): | |
| if i + 2 >= len(data): | |
| break | |
| b0, b1, b2 = data[i], data[i+1], data[i+2] | |
| result.append((b0 >> 2) & 0x3F) | |
| result.append(((b0 & 0x03) << 4) | ((b1 >> 4) & 0x0F)) | |
| result.append(((b1 & 0x0F) << 2) | ((b2 >> 6) & 0x03)) | |
| result.append(b2 & 0x3F) | |
| elif bits == 5: | |
| for i in range(0, len(data), 5): | |
| if i + 4 >= len(data): | |
| break | |
| packed = int.from_bytes(data[i:i+5], 'little') | |
| for j in range(8): | |
| result.append((packed >> (j * 5)) & 0x1F) | |
| elif bits == 7: | |
| for i in range(0, len(data), 7): | |
| if i + 6 >= len(data): | |
| break | |
| packed = int.from_bytes(data[i:i+7], 'little') | |
| for j in range(8): | |
| result.append((packed >> (j * 7)) & 0x7F) | |
| return np.array(result[:count], dtype=np.uint8) | |
| def load_quantized_model(path): | |
| with lzma.open(path, 'rb') as f: | |
| data = f.read() | |
| offset = 0 | |
| version, default_bits, num_params = struct.unpack_from('<BBH', data, offset) | |
| offset += 4 | |
| state_dict = {} | |
| for _ in range(num_params): | |
| name_len = struct.unpack_from('<H', data, offset)[0] | |
| offset += 2 | |
| name = data[offset:offset + name_len].decode('utf-8') | |
| offset += name_len | |
| ndim = struct.unpack_from('<B', data, offset)[0] | |
| offset += 1 | |
| shape = tuple(struct.unpack_from('<I', data, offset + i*4)[0] for i in range(ndim)) | |
| offset += ndim * 4 | |
| numel = int(np.prod(shape)) if shape else 1 | |
| bits = struct.unpack_from('<B', data, offset)[0] | |
| offset += 1 | |
| if bits < 16: | |
| scale, zp = struct.unpack_from('<ff', data, offset) | |
| offset += 8 | |
| packed_len = struct.unpack_from('<I', data, offset)[0] | |
| offset += 4 | |
| packed_data = data[offset:offset + packed_len] | |
| offset += packed_len | |
| quantized = unpack_nbits(packed_data, bits, numel) | |
| tensor_data = ((quantized.astype(np.float32) - zp) * scale).reshape(shape) | |
| state_dict[name] = torch.from_numpy(tensor_data) | |
| else: | |
| fp16_len = struct.unpack_from('<I', data, offset)[0] | |
| offset += 4 | |
| fp16_data = data[offset:offset + fp16_len] | |
| offset += fp16_len | |
| tensor_data = np.frombuffer(fp16_data, dtype=np.float16).reshape(shape) | |
| state_dict[name] = torch.from_numpy(tensor_data.astype(np.float32)) | |
| config_len = struct.unpack_from('<I', data, offset)[0] | |
| offset += 4 | |
| config_json = data[offset:offset + config_len].decode('utf-8') | |
| config = T5Config.from_dict(json.loads(config_json)) | |
| model = T5ForConditionalGeneration(config) | |
| model.load_state_dict(state_dict) | |
| model.eval() | |
| return model | |
| # Load model | |
| model = load_quantized_model(model_path) | |
| tokenizer = AutoTokenizer.from_pretrained("google/t5-v1_1-base", legacy=False) | |
| def ask(question): | |
| inputs = tokenizer(f"question: {question}", return_tensors="pt", max_length=128, truncation=True) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=24, | |
| num_beams=6, | |
| no_repeat_ngram_size=2, | |
| repetition_penalty=20.0, | |
| early_stopping=True | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Sample Q&A to display | |
| sample_qa = """ | |
| ## 📊 Sample Questions & Answers | |
| | Question | Answer | | |
| |----------|--------| | |
| | How do vaccines work? | Vaccines stimulate the immune system to recognize and fight specific pathogens. | | |
| | Why do we sneeze? | Sneezes clear irritants from the nasal passages. | | |
| | What is empathy? | Empathy is the ability to understand and share the feelings of another person. | | |
| """ | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🦟 Mosquito - Tiny Knowledge Model") | |
| gr.Markdown("A **7.3M parameter** model that answers general knowledge questions. Smaller than a mosquito's brain!") | |
| gr.Markdown(sample_qa) | |
| gr.Markdown("---") | |
| gr.Markdown("## Try it yourself:") | |
| with gr.Row(): | |
| question = gr.Textbox(label="Question", placeholder="Why do we dream?") | |
| answer = gr.Textbox(label="Answer") | |
| submit_btn = gr.Button("Ask", variant="primary") | |
| submit_btn.click(fn=ask, inputs=question, outputs=answer) | |
| gr.Examples( | |
| examples=[ | |
| ["How do vaccines work?"], | |
| ["Why do we sneeze?"], | |
| ["What is empathy?"], | |
| ["Why is the sky blue?"], | |
| ["What causes earthquakes?"], | |
| ], | |
| inputs=question, | |
| ) | |
| demo.launch() |