| """ |
| Test TinyV4 base model |
| β load dari HuggingFace Hub (ukung/tinyv4) atau dari folder lokal |
| β forward pass, generate text (ID & EN) |
| """ |
| import torch |
| import json |
| import os |
| import sys |
|
|
| |
| |
| |
| USE_HUB = True |
| HF_REPO = "ukung/tinyv4" |
|
|
| if USE_HUB: |
| |
| from transformers import AutoTokenizer, AutoModel |
| tokenizer = AutoTokenizer.from_pretrained(HF_REPO) |
| model = AutoModel.from_pretrained(HF_REPO, trust_remote_code=True) |
| model.head.weight = model.embed.weight |
| model.eval() |
| else: |
| |
| MODEL_DIR = os.path.dirname(os.path.abspath(__file__)) |
| sys.path.insert(0, MODEL_DIR) |
| from modeling_tinyv4 import TinyV4, TinyV4Config |
| from transformers import AutoTokenizer |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) |
| model = TinyV4.from_pretrained(MODEL_DIR) |
| model.head.weight = model.embed.weight |
| model.eval() |
|
|
| n_params = sum(p.numel() for p in model.parameters()) |
| print(f"β
Model loaded: {n_params:,} params ({n_params/1e6:.2f}M)") |
|
|
| |
| |
| |
| cfg = model.config |
| print(f"β
Config: dim={cfg.dim}, depth={cfg.depth}, vocab={cfg.vocab_size}") |
| print(f" MoE: {cfg.n_routed} routed + {cfg.n_shared} shared, {cfg.n_active} active") |
| print(f" MTP: depth={cfg.mtp_depth}, max_len={cfg.max_len}") |
|
|
| |
| |
| |
| assert model.head.weight.data_ptr() == model.embed.weight.data_ptr(), "β Embedding tie FAILED!" |
| print("β
Embedding tie: OK") |
|
|
| |
| |
| |
| dummy = torch.randint(0, cfg.vocab_size, (2, 64)) |
| with torch.no_grad(): |
| logits, mtp, bal = model(dummy) |
|
|
| has_nan = torch.isnan(logits).any().item() |
| has_inf = torch.isinf(logits).any().item() |
| print(f"β
Forward pass: logits={logits.shape}, NaN={has_nan}, Inf={has_inf}") |
| if mtp is not None: |
| print(f" MTP logits: {mtp.shape}, NaN={torch.isnan(mtp).any().item()}") |
| print(f" Balance loss: {bal.item():.6f}" if bal is not None else " Balance loss: None") |
|
|
| |
| |
| |
| @torch.no_grad() |
| def generate(prompt, max_new_tokens=60, temperature=0.8, top_k=40): |
| input_ids = tokenizer.encode(prompt, return_tensors="pt") |
|
|
| for _ in range(max_new_tokens): |
| idx = input_ids[:, -cfg.max_len:] |
| logits, _, _ = model(idx) |
| logits = logits[:, -1, :] / temperature |
|
|
| |
| v, _ = torch.topk(logits, min(top_k, logits.size(-1))) |
| logits[logits < v[:, [-1]]] = float("-inf") |
|
|
| probs = torch.softmax(logits, dim=-1) |
|
|
| |
| if torch.isnan(probs).any() or torch.isinf(probs).any(): |
| probs = torch.ones_like(probs) / probs.size(-1) |
|
|
| next_token = torch.multinomial(probs, 1) |
| input_ids = torch.cat([input_ids, next_token], dim=1) |
|
|
| if next_token.item() == tokenizer.eos_token_id: |
| break |
|
|
| return tokenizer.decode(input_ids[0], skip_special_tokens=True) |
|
|
| print() |
| print("=" * 60) |
| print("π GENERATION TEST") |
| print("=" * 60) |
|
|
| prompts = [ |
| ("EN", "Once upon a time,"), |
| ("EN", "There was a little"), |
| ("EN", "In a small village,"), |
| ("ID", "Pada suatu hari,"), |
| ("ID", "Di sebuah desa kecil,"), |
| ("ID", "Alkisah, tersebutlah"), |
| ] |
|
|
| for lang, prompt in prompts: |
| output = generate(prompt, max_new_tokens=50, temperature=0.8) |
| print(f" [{lang}] {prompt}") |
| print(f" β {output}") |
| print() |
|
|
| print("=" * 60) |
| print("β
ALL TESTS PASSED") |
| print("=" * 60) |
|
|