| """ |
| TERA V2 — Hugging Face Space (Inference Only) |
| """ |
|
|
| import os |
| import json |
| import numpy as np |
| import gradio as gr |
|
|
| os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" |
| import tensorflow as tf |
|
|
| from tokenizer import TeraTokenizer |
| from model import TeraLM |
|
|
| print("Loading TERA V2...") |
|
|
| with open("model_config.json", "r") as f: |
| config = json.load(f) |
|
|
| vocab_size = config.get("vocab_size", config.get("vocab", 1504)) |
| d_model = config.get("d_model", config.get("dim", 128)) |
| n_heads = config.get("n_heads", config.get("heads", 4)) |
| n_layers = config.get("n_layers", config.get("layers", 3)) |
| max_seq = config.get("max_seq", config.get("seq_len", 32)) |
|
|
| print(f" Config: vocab={vocab_size}, d={d_model}, heads={n_heads}, layers={n_layers}, seq={max_seq}") |
|
|
| tokenizer = TeraTokenizer() |
| tokenizer.load("tokenizer.json") |
| print(f" Tokenizer vocab: {tokenizer.size()}") |
|
|
| model = TeraLM( |
| vocab_size=vocab_size, |
| d_model=d_model, |
| n_heads=n_heads, |
| n_layers=n_layers, |
| max_seq=max_seq, |
| drop_rate=0.0, |
| ) |
|
|
| dummy = tf.zeros((1, max_seq), dtype=tf.int32) |
| _ = model(dummy) |
|
|
| |
| model.load_weights("model.weights.h5") |
| print(" Checkpoint restored ✅") |
|
|
| MAX_SEQ = max_seq |
|
|
| def generate(prompt, max_tokens=40, temperature=0.3, top_k=8, top_p=0.85, rep_penalty=1.4): |
| formatted = f"Q: {prompt}\nA:" |
| ids = tokenizer.encode(formatted, add_special=False) |
| input_ids = ids[-MAX_SEQ:] |
| generated = list(input_ids) |
| seen = set(input_ids) |
|
|
| for _ in range(int(max_tokens)): |
| inp = tf.constant([generated[-MAX_SEQ:]], dtype=tf.int32) |
| logits = model(inp, training=False) |
| next_logits = logits[0, -1, :].numpy().astype(np.float64) |
|
|
| for tid in seen: |
| if next_logits[tid] > 0: |
| next_logits[tid] /= rep_penalty |
| else: |
| next_logits[tid] *= rep_penalty |
|
|
| next_logits = next_logits / max(temperature, 0.01) |
|
|
| top_k_int = int(top_k) |
| if top_k_int > 0: |
| top_k_idx = np.argsort(next_logits)[-top_k_int:] |
| mask = np.full_like(next_logits, -1e9) |
| mask[top_k_idx] = next_logits[top_k_idx] |
| next_logits = mask |
|
|
| probs = np.exp(next_logits - np.max(next_logits)) |
| probs = probs / probs.sum() |
|
|
| sorted_idx = np.argsort(probs)[::-1] |
| cum = np.cumsum(probs[sorted_idx]) |
| cutoff = np.searchsorted(cum, top_p) + 1 |
| allowed = sorted_idx[:cutoff] |
| filtered = np.zeros_like(probs) |
| filtered[allowed] = probs[allowed] |
| filtered = filtered / filtered.sum() |
|
|
| next_id = np.random.choice(len(filtered), p=filtered) |
|
|
| if next_id == tokenizer.eos_id: |
| break |
| generated.append(int(next_id)) |
| seen.add(int(next_id)) |
|
|
| response = tokenizer.decode(generated[len(input_ids):]) |
| return response.strip() |
|
|
| with gr.Blocks(title="TERA V2", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # ⭐ TERA V2 |
| **A language model built from scratch — no pretrained weights, no transformers.** |
| |
| *Architecture: Time Mix + Token Shift + GroupNorm + Channel Mix + Squared ReLU* |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(scale=3): |
| user_input = gr.Textbox(label="Your message", placeholder="Ask me anything...", lines=2) |
| output_box = gr.Textbox(label="TERA V2 says", lines=4, interactive=False) |
| send_btn = gr.Button("Send ⭐", variant="primary") |
|
|
| with gr.Column(scale=1): |
| max_tokens = gr.Slider(10, 60, value=40, step=5, label="Max tokens") |
| temperature = gr.Slider(0.1, 1.0, value=0.3, step=0.05, label="Temperature") |
| top_k = gr.Slider(3, 20, value=8, step=1, label="Top-K") |
| top_p = gr.Slider(0.3, 1.0, value=0.85, step=0.05, label="Top-P") |
| rep_penalty = gr.Slider(1.0, 2.0, value=1.4, step=0.05, label="Rep penalty") |
|
|
| send_btn.click(fn=generate, inputs=[user_input, max_tokens, temperature, top_k, top_p, rep_penalty], outputs=output_box) |
| user_input.submit(fn=generate, inputs=[user_input, max_tokens, temperature, top_k, top_p, rep_penalty], outputs=output_box) |
|
|
| gr.Markdown("---\n*TERA V2 by Vedaco • ~929K parameters • Trained from scratch*") |
|
|
| demo.launch() |
|
|