import gradio as gr import tensorflow as tf import numpy as np from model import TeraV3 import PIL.Image # --- 1. LOAD SOVEREIGN CORE --- # Initialize model with training-consistent dimensions model = TeraV3(vocab_size=100, dim=512, depth=12) # Build model with dummy input to initialize weights _ = model(tf.zeros((1, 32), dtype=tf.int32), vision_inputs=tf.zeros((1, 224, 224, 3), dtype=tf.float32), training=False) try: model.load_weights('stable.weights.h5') print("✅ Sovereign Weights Loaded.") except: print("⚠️ Loading default weights (untrained).") def predict(text, image): # Preprocess text (dummy tokenizer for current architecture state) # In a full deployment, this would use a saved SentencePiece/ByteLevel BPE model text_ids = tf.cast(tf.random.uniform([1, 32], maxval=100), tf.int32) vis_in = None if image is not None: image = image.resize((224, 224)) vis_in = np.array(image).astype(np.float32) / 255.0 vis_in = np.expand_dims(vis_in, axis=0) logits = model(text_ids, vision_inputs=vis_in, training=False) # Convert logits to a human-readable placeholder for this stage # Real-world deployment would involve top-k sampling return "[Tera.V3 Sovereign Response]: The neural pathway is active. The interface is processing your multimodal request." # --- 2. GRADIO INTERFACE --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🪐 Tera.V3 Sovereign Interface") gr.Markdown("Interact with the Dense-Elite multimodal architecture.") with gr.Row(): with gr.Column(): txt = gr.Textbox(label="Sovereign Query", placeholder="Type your message...") img = gr.Image(type='pil', label="Visual Context") btn = gr.Button("Execute", variant="primary") with gr.Column(): out = gr.Textbox(label="Tera.V3 Output") btn.click(predict, inputs=[txt, img], outputs=out) if __name__ == '__main__': demo.launch()