Spaces:

FlameF0X
/

i3-80m

Running

App Files Files Community

i3-80m / app.py

FlameF0X

Update app.py

5dcf306 verified about 1 month ago

raw

history blame

3.57 kB

	import torch
	from pathlib import Path
	import gradio as gr
	import json
	from huggingface_hub import hf_hub_download

	# -------------------- DEVICE --------------------
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# -------------------- MODEL CONFIG --------------------
	MODEL_NAME = "FlameF0X/i3-80m" # HuggingFace repo name
	LOCAL_SAFETENSORS = Path("model.safetensors")
	LOCAL_BIN = Path("pytorch_model.bin")
	VOCAB_JSON = Path("chunk_vocab_combined.json")

	# -------------------- LOAD VOCAB --------------------
	with open(VOCAB_JSON, 'r') as f:
	vocab_data = json.load(f)
	VOCAB_SIZE = vocab_data["vocab_size"]

	# -------------------- IMPORT YOUR MODEL CLASS --------------------
	# Make sure i3Model is in the same folder or installed as a package
	from app_classes import i3Model, ChunkTokenizer

	tokenizer = ChunkTokenizer()
	tokenizer.load(VOCAB_JSON)

	model = i3Model(
	vocab_size=VOCAB_SIZE,
	d_model=512,
	n_heads=16,
	max_seq_len=256,
	d_state=32
	).to(DEVICE)

	# -------------------- LOAD WEIGHTS --------------------
	try:
	if LOCAL_SAFETENSORS.exists():
	from safetensors.torch import load_file
	state_dict = load_file(LOCAL_SAFETENSORS)
	model.load_state_dict(state_dict)
	print("✅ Loaded weights from local safetensors")
	elif LOCAL_BIN.exists():
	state_dict = torch.load(LOCAL_BIN, map_location=DEVICE, weights_only=False)
	model.load_state_dict(state_dict)
	print("✅ Loaded weights from local .bin")
	else:
	# HuggingFace fallback
	print("⚡ Downloading model from HuggingFace...")
	bin_file = hf_hub_download(repo_id=MODEL_NAME, filename="pytorch_model.bin")
	state_dict = torch.load(bin_file, map_location=DEVICE, weights_only=False)
	model.load_state_dict(state_dict)
	print("✅ Loaded weights from HuggingFace")
	except Exception as e:
	raise RuntimeError(f"Failed to load model weights: {e}")

	model.eval()

	# -------------------- GENERATION FUNCTION --------------------
	def generate_text(prompt, max_tokens=100, temperature=0.8, top_k=40):
	idx = torch.tensor([tokenizer.encode(prompt)], dtype=torch.long).to(DEVICE)
	out_idx = model.generate(idx, max_new_tokens=max_tokens, temperature=temperature, top_k=top_k)
	return tokenizer.decode(out_idx[0].cpu())

	# -------------------- GRADIO UI --------------------
	with gr.Blocks() as demo:
	gr.Markdown("### i3-80M Text Generation")

	with gr.Row():
	prompt_input = gr.Textbox(label="Prompt", placeholder="Type something...")
	max_tokens_input = gr.Slider(10, 500, value=100, step=10, label="Max Tokens")
	temp_input = gr.Slider(0.1, 2.0, value=0.8, step=0.05, label="Temperature")
	topk_input = gr.Slider(1, 100, value=40, step=1, label="Top-k Sampling")

	output_text = gr.Textbox(label="Generated Text")

	generate_btn = gr.Button("Generate")

	# Connect UI
	generate_btn.click(
	generate_text,
	inputs=[prompt_input, max_tokens_input, temp_input, topk_input],
	outputs=[output_text]
	)

	# Developer Panel (shows model info)
	with gr.Accordion("Dev Panel: Model Info", open=False):
	gr.Markdown(f"Device: {DEVICE}")
	gr.Markdown(f"Vocab size: {VOCAB_SIZE}")
	total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	gr.Markdown(f"Total Parameters: {total_params:,} ({total_params/1e6:.2f}M)")

	# -------------------- RUN --------------------
	if __name__ == "__main__":
	demo.launch()