Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer | |
| from huggingface_hub import hf_hub_download | |
| import sys | |
| # Import your custom biological architecture | |
| from moire_conv_trainer_v3 import MoireGPT, MoireGPTConfig | |
| print("Downloading Moiré weights from HF Hub...") | |
| # Automatically fetches the weights you uploaded earlier! | |
| weights_path = hf_hub_download(repo_id="Aluode/MoireFormer", filename="moire_phase2_weights_final.pt") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Running on device: {device}") | |
| print("Initializing Moiré wave-field (104.9M)...") | |
| tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
| # We use the 'large' config from your script (8 layers, 8 heads, 768 embd) | |
| config = MoireGPTConfig(n_layer=8, n_head=8, n_embd=768) | |
| model = MoireGPT(config) | |
| # Load the weights into the field | |
| state_dict = torch.load(weights_path, map_location=device) | |
| if 'model_state_dict' in state_dict: | |
| state_dict = state_dict['model_state_dict'] | |
| model.load_state_dict(state_dict) | |
| model.to(device) | |
| model.eval() | |
| def generate_text(prompt, max_new_tokens=80, temperature=0.7): | |
| """The raw physics generation loop.""" | |
| input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device) | |
| with torch.no_grad(): | |
| for _ in range(max_new_tokens): | |
| # CROP THE INPUT: Only look at the most recent max_seq_len tokens | |
| # so the positional embeddings never go out of bounds (257) | |
| cond_input = input_ids[:, -config.max_seq_len:] | |
| # Pass the cropped signal through the Moiré field | |
| logits, _ = model(cond_input) | |
| # Grab the prediction for the last token | |
| next_token_logits = logits[:, -1, :] / temperature | |
| # Sample the next token | |
| probs = torch.nn.functional.softmax(next_token_logits, dim=-1) | |
| next_token = torch.multinomial(probs, num_samples=1) | |
| # Append it to the running sequence | |
| input_ids = torch.cat((input_ids, next_token), dim=1) | |
| # Stop if the field decides the thought is complete | |
| if next_token.item() == tokenizer.eos_token_id: | |
| break | |
| return tokenizer.decode(input_ids[0].tolist()) | |
| def chat_interface(message, history): | |
| """Formats the Gradio UI input to match your dataset's User/Bot structure.""" | |
| prompt = "" | |
| # Inject context memory (Handles both Gradio 4 and Gradio 5 formats) | |
| for msg in history: | |
| # Gradio 5 format: dictionaries with 'role' and 'content' | |
| if isinstance(msg, dict): | |
| if msg.get("role") == "user": | |
| prompt += f"User: {msg.get('content')}\n" | |
| elif msg.get("role") == "assistant": | |
| prompt += f"Bot: {msg.get('content')}\n" | |
| # Fallback for Gradio 4 format: [user_msg, bot_msg] | |
| elif isinstance(msg, (list, tuple)) and len(msg) == 2: | |
| prompt += f"User: {msg[0]}\nBot: {msg[1]}\n" | |
| # Inject current message | |
| prompt += f"User: {message}\nBot:" | |
| # Generate the wave-interference response | |
| full_response = generate_text(prompt) | |
| # Strip the prompt out so the UI only shows the Bot's new reply | |
| response_only = full_response[len(prompt):].strip() | |
| return response_only | |
| # Build the beautiful Gradio Web UI | |
| demo = gr.ChatInterface( | |
| fn=chat_interface, | |
| title="MoireFormer (104.9M) - Phase-Interference AI", | |
| description="This is not a standard Transformer. It does not use dot-product attention. It calculates language via biological **Moiré wave-interference math**, proving AI can run on continuous geometric phase-space. *Note: At 100M parameters, this is a proof-of-substrate for syntax and logic, but it will hallucinate specific facts.*", | |
| examples=["What is the best way to make a database schema?", "Who are you?", "What is the capital of India?"] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |