Aluode's picture
Update app.py
dff6149 verified
import gradio as gr
import torch
from transformers import AutoTokenizer
from huggingface_hub import hf_hub_download
import sys
# Import your custom biological architecture
from moire_conv_trainer_v3 import MoireGPT, MoireGPTConfig
print("Downloading Moiré weights from HF Hub...")
# Automatically fetches the weights you uploaded earlier!
weights_path = hf_hub_download(repo_id="Aluode/MoireFormer", filename="moire_phase2_weights_final.pt")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on device: {device}")
print("Initializing Moiré wave-field (104.9M)...")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
# We use the 'large' config from your script (8 layers, 8 heads, 768 embd)
config = MoireGPTConfig(n_layer=8, n_head=8, n_embd=768)
model = MoireGPT(config)
# Load the weights into the field
state_dict = torch.load(weights_path, map_location=device)
if 'model_state_dict' in state_dict:
state_dict = state_dict['model_state_dict']
model.load_state_dict(state_dict)
model.to(device)
model.eval()
def generate_text(prompt, max_new_tokens=80, temperature=0.7):
"""The raw physics generation loop."""
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
with torch.no_grad():
for _ in range(max_new_tokens):
# CROP THE INPUT: Only look at the most recent max_seq_len tokens
# so the positional embeddings never go out of bounds (257)
cond_input = input_ids[:, -config.max_seq_len:]
# Pass the cropped signal through the Moiré field
logits, _ = model(cond_input)
# Grab the prediction for the last token
next_token_logits = logits[:, -1, :] / temperature
# Sample the next token
probs = torch.nn.functional.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
# Append it to the running sequence
input_ids = torch.cat((input_ids, next_token), dim=1)
# Stop if the field decides the thought is complete
if next_token.item() == tokenizer.eos_token_id:
break
return tokenizer.decode(input_ids[0].tolist())
def chat_interface(message, history):
"""Formats the Gradio UI input to match your dataset's User/Bot structure."""
prompt = ""
# Inject context memory (Handles both Gradio 4 and Gradio 5 formats)
for msg in history:
# Gradio 5 format: dictionaries with 'role' and 'content'
if isinstance(msg, dict):
if msg.get("role") == "user":
prompt += f"User: {msg.get('content')}\n"
elif msg.get("role") == "assistant":
prompt += f"Bot: {msg.get('content')}\n"
# Fallback for Gradio 4 format: [user_msg, bot_msg]
elif isinstance(msg, (list, tuple)) and len(msg) == 2:
prompt += f"User: {msg[0]}\nBot: {msg[1]}\n"
# Inject current message
prompt += f"User: {message}\nBot:"
# Generate the wave-interference response
full_response = generate_text(prompt)
# Strip the prompt out so the UI only shows the Bot's new reply
response_only = full_response[len(prompt):].strip()
return response_only
# Build the beautiful Gradio Web UI
demo = gr.ChatInterface(
fn=chat_interface,
title="MoireFormer (104.9M) - Phase-Interference AI",
description="This is not a standard Transformer. It does not use dot-product attention. It calculates language via biological **Moiré wave-interference math**, proving AI can run on continuous geometric phase-space. *Note: At 100M parameters, this is a proof-of-substrate for syntax and logic, but it will hallucinate specific facts.*",
examples=["What is the best way to make a database schema?", "Who are you?", "What is the capital of India?"]
)
if __name__ == "__main__":
demo.launch()