import gradio as gr import torch from transformers import AutoTokenizer from huggingface_hub import hf_hub_download import sys # Import your custom biological architecture from moire_conv_trainer_v3 import MoireGPT, MoireGPTConfig print("Downloading Moiré weights from HF Hub...") # Automatically fetches the weights you uploaded earlier! weights_path = hf_hub_download(repo_id="Aluode/MoireFormer", filename="moire_phase2_weights_final.pt") device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Running on device: {device}") print("Initializing Moiré wave-field (104.9M)...") tokenizer = AutoTokenizer.from_pretrained("gpt2") # We use the 'large' config from your script (8 layers, 8 heads, 768 embd) config = MoireGPTConfig(n_layer=8, n_head=8, n_embd=768) model = MoireGPT(config) # Load the weights into the field state_dict = torch.load(weights_path, map_location=device) if 'model_state_dict' in state_dict: state_dict = state_dict['model_state_dict'] model.load_state_dict(state_dict) model.to(device) model.eval() def generate_text(prompt, max_new_tokens=80, temperature=0.7): """The raw physics generation loop.""" input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device) with torch.no_grad(): for _ in range(max_new_tokens): # CROP THE INPUT: Only look at the most recent max_seq_len tokens # so the positional embeddings never go out of bounds (257) cond_input = input_ids[:, -config.max_seq_len:] # Pass the cropped signal through the Moiré field logits, _ = model(cond_input) # Grab the prediction for the last token next_token_logits = logits[:, -1, :] / temperature # Sample the next token probs = torch.nn.functional.softmax(next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) # Append it to the running sequence input_ids = torch.cat((input_ids, next_token), dim=1) # Stop if the field decides the thought is complete if next_token.item() == tokenizer.eos_token_id: break return tokenizer.decode(input_ids[0].tolist()) def chat_interface(message, history): """Formats the Gradio UI input to match your dataset's User/Bot structure.""" prompt = "" # Inject context memory (Handles both Gradio 4 and Gradio 5 formats) for msg in history: # Gradio 5 format: dictionaries with 'role' and 'content' if isinstance(msg, dict): if msg.get("role") == "user": prompt += f"User: {msg.get('content')}\n" elif msg.get("role") == "assistant": prompt += f"Bot: {msg.get('content')}\n" # Fallback for Gradio 4 format: [user_msg, bot_msg] elif isinstance(msg, (list, tuple)) and len(msg) == 2: prompt += f"User: {msg[0]}\nBot: {msg[1]}\n" # Inject current message prompt += f"User: {message}\nBot:" # Generate the wave-interference response full_response = generate_text(prompt) # Strip the prompt out so the UI only shows the Bot's new reply response_only = full_response[len(prompt):].strip() return response_only # Build the beautiful Gradio Web UI demo = gr.ChatInterface( fn=chat_interface, title="MoireFormer (104.9M) - Phase-Interference AI", description="This is not a standard Transformer. It does not use dot-product attention. It calculates language via biological **Moiré wave-interference math**, proving AI can run on continuous geometric phase-space. *Note: At 100M parameters, this is a proof-of-substrate for syntax and logic, but it will hallucinate specific facts.*", examples=["What is the best way to make a database schema?", "Who are you?", "What is the capital of India?"] ) if __name__ == "__main__": demo.launch()