Tera.v2 / app.py
vedaco's picture
Upload app.py with huggingface_hub
4446404 verified
"""
TERA V2 — Hugging Face Space (Inference Only)
"""
import os
import json
import numpy as np
import gradio as gr
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
from tokenizer import TeraTokenizer
from model import TeraLM
print("Loading TERA V2...")
with open("model_config.json", "r") as f:
config = json.load(f)
vocab_size = config.get("vocab_size", config.get("vocab", 1504))
d_model = config.get("d_model", config.get("dim", 128))
n_heads = config.get("n_heads", config.get("heads", 4))
n_layers = config.get("n_layers", config.get("layers", 3))
max_seq = config.get("max_seq", config.get("seq_len", 32))
print(f" Config: vocab={vocab_size}, d={d_model}, heads={n_heads}, layers={n_layers}, seq={max_seq}")
tokenizer = TeraTokenizer()
tokenizer.load("tokenizer.json")
print(f" Tokenizer vocab: {tokenizer.size()}")
model = TeraLM(
vocab_size=vocab_size,
d_model=d_model,
n_heads=n_heads,
n_layers=n_layers,
max_seq=max_seq,
drop_rate=0.0,
)
dummy = tf.zeros((1, max_seq), dtype=tf.int32)
_ = model(dummy)
# ---- LOAD H5 WEIGHTS ----
model.load_weights("model.weights.h5")
print(" Checkpoint restored ✅")
MAX_SEQ = max_seq
def generate(prompt, max_tokens=40, temperature=0.3, top_k=8, top_p=0.85, rep_penalty=1.4):
formatted = f"Q: {prompt}\nA:"
ids = tokenizer.encode(formatted, add_special=False)
input_ids = ids[-MAX_SEQ:]
generated = list(input_ids)
seen = set(input_ids)
for _ in range(int(max_tokens)):
inp = tf.constant([generated[-MAX_SEQ:]], dtype=tf.int32)
logits = model(inp, training=False)
next_logits = logits[0, -1, :].numpy().astype(np.float64)
for tid in seen:
if next_logits[tid] > 0:
next_logits[tid] /= rep_penalty
else:
next_logits[tid] *= rep_penalty
next_logits = next_logits / max(temperature, 0.01)
top_k_int = int(top_k)
if top_k_int > 0:
top_k_idx = np.argsort(next_logits)[-top_k_int:]
mask = np.full_like(next_logits, -1e9)
mask[top_k_idx] = next_logits[top_k_idx]
next_logits = mask
probs = np.exp(next_logits - np.max(next_logits))
probs = probs / probs.sum()
sorted_idx = np.argsort(probs)[::-1]
cum = np.cumsum(probs[sorted_idx])
cutoff = np.searchsorted(cum, top_p) + 1
allowed = sorted_idx[:cutoff]
filtered = np.zeros_like(probs)
filtered[allowed] = probs[allowed]
filtered = filtered / filtered.sum()
next_id = np.random.choice(len(filtered), p=filtered)
if next_id == tokenizer.eos_id:
break
generated.append(int(next_id))
seen.add(int(next_id))
response = tokenizer.decode(generated[len(input_ids):])
return response.strip()
with gr.Blocks(title="TERA V2", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# ⭐ TERA V2
**A language model built from scratch — no pretrained weights, no transformers.**
*Architecture: Time Mix + Token Shift + GroupNorm + Channel Mix + Squared ReLU*
""")
with gr.Row():
with gr.Column(scale=3):
user_input = gr.Textbox(label="Your message", placeholder="Ask me anything...", lines=2)
output_box = gr.Textbox(label="TERA V2 says", lines=4, interactive=False)
send_btn = gr.Button("Send ⭐", variant="primary")
with gr.Column(scale=1):
max_tokens = gr.Slider(10, 60, value=40, step=5, label="Max tokens")
temperature = gr.Slider(0.1, 1.0, value=0.3, step=0.05, label="Temperature")
top_k = gr.Slider(3, 20, value=8, step=1, label="Top-K")
top_p = gr.Slider(0.3, 1.0, value=0.85, step=0.05, label="Top-P")
rep_penalty = gr.Slider(1.0, 2.0, value=1.4, step=0.05, label="Rep penalty")
send_btn.click(fn=generate, inputs=[user_input, max_tokens, temperature, top_k, top_p, rep_penalty], outputs=output_box)
user_input.submit(fn=generate, inputs=[user_input, max_tokens, temperature, top_k, top_p, rep_penalty], outputs=output_box)
gr.Markdown("---\n*TERA V2 by Vedaco • ~929K parameters • Trained from scratch*")
demo.launch()