""" TERA V2 — Hugging Face Space (Inference Only) """ import os import json import numpy as np import gradio as gr os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" import tensorflow as tf from tokenizer import TeraTokenizer from model import TeraLM print("Loading TERA V2...") with open("model_config.json", "r") as f: config = json.load(f) vocab_size = config.get("vocab_size", config.get("vocab", 1504)) d_model = config.get("d_model", config.get("dim", 128)) n_heads = config.get("n_heads", config.get("heads", 4)) n_layers = config.get("n_layers", config.get("layers", 3)) max_seq = config.get("max_seq", config.get("seq_len", 32)) print(f" Config: vocab={vocab_size}, d={d_model}, heads={n_heads}, layers={n_layers}, seq={max_seq}") tokenizer = TeraTokenizer() tokenizer.load("tokenizer.json") print(f" Tokenizer vocab: {tokenizer.size()}") model = TeraLM( vocab_size=vocab_size, d_model=d_model, n_heads=n_heads, n_layers=n_layers, max_seq=max_seq, drop_rate=0.0, ) dummy = tf.zeros((1, max_seq), dtype=tf.int32) _ = model(dummy) # ---- LOAD H5 WEIGHTS ---- model.load_weights("model.weights.h5") print(" Checkpoint restored ✅") MAX_SEQ = max_seq def generate(prompt, max_tokens=40, temperature=0.3, top_k=8, top_p=0.85, rep_penalty=1.4): formatted = f"Q: {prompt}\nA:" ids = tokenizer.encode(formatted, add_special=False) input_ids = ids[-MAX_SEQ:] generated = list(input_ids) seen = set(input_ids) for _ in range(int(max_tokens)): inp = tf.constant([generated[-MAX_SEQ:]], dtype=tf.int32) logits = model(inp, training=False) next_logits = logits[0, -1, :].numpy().astype(np.float64) for tid in seen: if next_logits[tid] > 0: next_logits[tid] /= rep_penalty else: next_logits[tid] *= rep_penalty next_logits = next_logits / max(temperature, 0.01) top_k_int = int(top_k) if top_k_int > 0: top_k_idx = np.argsort(next_logits)[-top_k_int:] mask = np.full_like(next_logits, -1e9) mask[top_k_idx] = next_logits[top_k_idx] next_logits = mask probs = np.exp(next_logits - np.max(next_logits)) probs = probs / probs.sum() sorted_idx = np.argsort(probs)[::-1] cum = np.cumsum(probs[sorted_idx]) cutoff = np.searchsorted(cum, top_p) + 1 allowed = sorted_idx[:cutoff] filtered = np.zeros_like(probs) filtered[allowed] = probs[allowed] filtered = filtered / filtered.sum() next_id = np.random.choice(len(filtered), p=filtered) if next_id == tokenizer.eos_id: break generated.append(int(next_id)) seen.add(int(next_id)) response = tokenizer.decode(generated[len(input_ids):]) return response.strip() with gr.Blocks(title="TERA V2", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # ⭐ TERA V2 **A language model built from scratch — no pretrained weights, no transformers.** *Architecture: Time Mix + Token Shift + GroupNorm + Channel Mix + Squared ReLU* """) with gr.Row(): with gr.Column(scale=3): user_input = gr.Textbox(label="Your message", placeholder="Ask me anything...", lines=2) output_box = gr.Textbox(label="TERA V2 says", lines=4, interactive=False) send_btn = gr.Button("Send ⭐", variant="primary") with gr.Column(scale=1): max_tokens = gr.Slider(10, 60, value=40, step=5, label="Max tokens") temperature = gr.Slider(0.1, 1.0, value=0.3, step=0.05, label="Temperature") top_k = gr.Slider(3, 20, value=8, step=1, label="Top-K") top_p = gr.Slider(0.3, 1.0, value=0.85, step=0.05, label="Top-P") rep_penalty = gr.Slider(1.0, 2.0, value=1.4, step=0.05, label="Rep penalty") send_btn.click(fn=generate, inputs=[user_input, max_tokens, temperature, top_k, top_p, rep_penalty], outputs=output_box) user_input.submit(fn=generate, inputs=[user_input, max_tokens, temperature, top_k, top_p, rep_penalty], outputs=output_box) gr.Markdown("---\n*TERA V2 by Vedaco • ~929K parameters • Trained from scratch*") demo.launch()