Spaces:

Aluode
/

MoireFormer-Chat

Sleeping

App Files Files Community

MoireFormer-Chat / app.py

Aluode

Update app.py

dff6149 verified 7 days ago

raw

history blame contribute delete

4 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer
	from huggingface_hub import hf_hub_download
	import sys

	# Import your custom biological architecture
	from moire_conv_trainer_v3 import MoireGPT, MoireGPTConfig

	print("Downloading Moiré weights from HF Hub...")
	# Automatically fetches the weights you uploaded earlier!
	weights_path = hf_hub_download(repo_id="Aluode/MoireFormer", filename="moire_phase2_weights_final.pt")

	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Running on device: {device}")

	print("Initializing Moiré wave-field (104.9M)...")
	tokenizer = AutoTokenizer.from_pretrained("gpt2")

	# We use the 'large' config from your script (8 layers, 8 heads, 768 embd)
	config = MoireGPTConfig(n_layer=8, n_head=8, n_embd=768)
	model = MoireGPT(config)

	# Load the weights into the field
	state_dict = torch.load(weights_path, map_location=device)
	if 'model_state_dict' in state_dict:
	state_dict = state_dict['model_state_dict']
	model.load_state_dict(state_dict)
	model.to(device)
	model.eval()

	def generate_text(prompt, max_new_tokens=80, temperature=0.7):
	"""The raw physics generation loop."""
	input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)

	with torch.no_grad():
	for _ in range(max_new_tokens):
	# CROP THE INPUT: Only look at the most recent max_seq_len tokens
	# so the positional embeddings never go out of bounds (257)
	cond_input = input_ids[:, -config.max_seq_len:]

	# Pass the cropped signal through the Moiré field
	logits, _ = model(cond_input)

	# Grab the prediction for the last token
	next_token_logits = logits[:, -1, :] / temperature

	# Sample the next token
	probs = torch.nn.functional.softmax(next_token_logits, dim=-1)
	next_token = torch.multinomial(probs, num_samples=1)

	# Append it to the running sequence
	input_ids = torch.cat((input_ids, next_token), dim=1)

	# Stop if the field decides the thought is complete
	if next_token.item() == tokenizer.eos_token_id:
	break

	return tokenizer.decode(input_ids[0].tolist())

	def chat_interface(message, history):
	"""Formats the Gradio UI input to match your dataset's User/Bot structure."""
	prompt = ""

	# Inject context memory (Handles both Gradio 4 and Gradio 5 formats)
	for msg in history:
	# Gradio 5 format: dictionaries with 'role' and 'content'
	if isinstance(msg, dict):
	if msg.get("role") == "user":
	prompt += f"User: {msg.get('content')}\n"
	elif msg.get("role") == "assistant":
	prompt += f"Bot: {msg.get('content')}\n"

	# Fallback for Gradio 4 format: [user_msg, bot_msg]
	elif isinstance(msg, (list, tuple)) and len(msg) == 2:
	prompt += f"User: {msg[0]}\nBot: {msg[1]}\n"

	# Inject current message
	prompt += f"User: {message}\nBot:"

	# Generate the wave-interference response
	full_response = generate_text(prompt)

	# Strip the prompt out so the UI only shows the Bot's new reply
	response_only = full_response[len(prompt):].strip()
	return response_only

	# Build the beautiful Gradio Web UI
	demo = gr.ChatInterface(
	fn=chat_interface,
	title="MoireFormer (104.9M) - Phase-Interference AI",
	description="This is not a standard Transformer. It does not use dot-product attention. It calculates language via biological Moiré wave-interference math, proving AI can run on continuous geometric phase-space. Note: At 100M parameters, this is a proof-of-substrate for syntax and logic, but it will hallucinate specific facts.",
	examples=["What is the best way to make a database schema?", "Who are you?", "What is the capital of India?"]
	)

	if __name__ == "__main__":
	demo.launch()