Spaces:

SupraLabs
/

Michael_Morgan_Models

Paused

App Files Files Community

Michael_Morgan_Models / app.py

Mmorgan-ML

Create app.py

59cdde3 verified 28 days ago

Raw

History Blame Contribute Delete

12.8 kB

	# ==============================================================================
	# Neuromodulatory Control Network (NCN) 2.0 Model Catalogue Interface
	# Copyright (c) 2026 Michael Morgan. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# ============================================================================
	# MANDATORY ATTRIBUTION & ARCHITECTURAL CITATION RIDER:
	# ============================================================================
	# By using, copying, modifying, distributing, or re-implementing this model
	# architecture (the Neuromodulatory Control Network / NCN), you agree to
	# prominently credit and cite the original creator, Michael Morgan, in all
	# academic papers, technical reports, repositories, and commercial/for-profit
	# product documentation that leverage these ideas.
	#
	# The mandatory citation format must include:
	# - Author: Michael Morgan (2026)
	# - Project: Neuromodulatory Control Network (NCN) Architecture
	# - Links: GitHub: https://github.com/Mmorgan-ML
	# Hugging Face: https://huggingface.co/Mmorgan-ML
	# Twitter/X: @Mmorgan_ML
	# Email: mmorgankorea@gmail.com
	# ==============================================================================

	import os
	import torch
	import torch.nn.functional as F
	import gradio as gr
	from transformers import PreTrainedTokenizerFast
	from safetensors.torch import load_file

	# Import your dynamic architecture package
	from ncn_architecture.config import NCNConfig
	from ncn_architecture.model import ModulatedLLM

	# Determine execution hardware
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Memory cache to prevent reloading models from disk on every click
	MODEL_CACHE = {}
	TOKENIZER_CACHE = {}

	def load_and_configure_model(model_choice):
	"""
	Dynamically reconstructs model configurations from weight shapes
	to guarantee absolute compatibility before loading.
	"""
	if model_choice in MODEL_CACHE:
	return MODEL_CACHE[model_choice], TOKENIZER_CACHE[model_choice]

	if model_choice == "NCN 2M (TinyStories)":
	weights_path = "models/ncn_2m_tinystories/model.safetensors"
	tokenizer_path = "models/ncn_2m_tinystories/tokenizer.json"

	if not os.path.exists(weights_path) or not os.path.exists(tokenizer_path):
	raise FileNotFoundError("Model weight file or tokenizer file could not be located in the specified path.")

	# Load weights safely
	state_dict = load_file(weights_path)

	# Reconstruct hyper-parameters based on the structural shapes of the weights
	vocab_size, d_model = state_dict["token_embeddings.weight"].shape
	max_position_embeddings = state_dict["position_embeddings.weight"].shape[0]
	dim_feedforward = state_dict["transformer_layers.0.feed_forward.linear1.weight"].shape[0]

	# Calculate the number of layers
	layer_indices = set()
	for key in state_dict.keys():
	if key.startswith("transformer_layers."):
	layer_indices.add(int(key.split(".")[1]))
	num_layers = len(layer_indices) if layer_indices else 12

	# Solve for number of heads (nhead) mathematically using the NCN projection bias dimension
	nhead = 12
	if "ncn.layer2.bias" in state_dict:
	bias_length = state_dict["ncn.layer2.bias"].shape[0]
	try:
	# ncn_output_dim = (2 * nhead + 1) * num_layers
	nhead = int(((bias_length / num_layers) - 1) / 2)
	except Exception:
	nhead = 12

	# Extract tonic hidden dimension
	ncn_hidden_dim = 128
	if "ncn.layer1.weight" in state_dict:
	ncn_hidden_dim = state_dict["ncn.layer1.weight"].shape[0]

	# 1. Initialize configuration with matched dimensions
	config = NCNConfig(
	vocab_size=vocab_size,
	d_model=d_model,
	nhead=nhead,
	num_layers=num_layers,
	dim_feedforward=dim_feedforward,
	max_position_embeddings=max_position_embeddings,
	ncn_hidden_dim=ncn_hidden_dim
	)

	# 2. Instantiate and map weights to architecture
	model = ModulatedLLM(config)
	model.load_state_dict(state_dict, strict=True)
	model.to(device)
	model.eval()

	# 3. Load tokenizer directly from local JSON file
	tokenizer = PreTrainedTokenizerFast(tokenizer_file=tokenizer_path)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Cache model instance
	MODEL_CACHE[model_choice] = model
	TOKENIZER_CACHE[model_choice] = tokenizer

	return model, tokenizer

	raise ValueError("Selected model profile is not registered.")


	@torch.no_grad()
	def generate_text(model_choice, prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
	"""
	Autoregressive inference loop with standard KV caching and logits filtering.
	"""
	if not prompt.strip():
	return "Please enter a starting prompt to begin generating a story."

	try:
	model, tokenizer = load_and_configure_model(model_choice)
	except Exception as e:
	return f"Error loading model: {str(e)}"

	# Tokenize input
	input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
	generated_ids = input_ids.clone()

	past_key_values = None
	past_rnn_state = None

	for _ in range(max_new_tokens):
	# KV Cache feeding optimization: process only newly added tokens
	if past_key_values is None:
	outputs = model(
	input_ids=generated_ids,
	past_key_values=None,
	use_cache=True,
	past_rnn_state=None
	)
	else:
	outputs = model(
	input_ids=generated_ids[:, -1:],
	past_key_values=past_key_values,
	use_cache=True,
	past_rnn_state=past_rnn_state
	)

	logits, past_key_values, _, past_rnn_state = outputs
	next_token_logits = logits[:, -1, :]

	# Repetition Penalty logic
	if repetition_penalty != 1.0:
	for batch_idx in range(next_token_logits.shape[0]):
	for prev_token_id in set(generated_ids[batch_idx].tolist()):
	logit = next_token_logits[batch_idx, prev_token_id]
	if logit < 0:
	next_token_logits[batch_idx, prev_token_id] = logit * repetition_penalty
	else:
	next_token_logits[batch_idx, prev_token_id] = logit / repetition_penalty

	# Temperature / Sampling Selection
	if temperature == 0.0:
	next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
	else:
	next_token_logits = next_token_logits / temperature

	# Top-K Filtering
	if top_k > 0:
	indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1:]
	next_token_logits[indices_to_remove] = float("-inf")

	# Top-P (Nucleus) Filtering
	if top_p < 1.0:
	sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
	cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
	sorted_indices_to_remove = cumulative_probs > top_p
	# Shift indices to protect first token exceeding the top-p boundary
	sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
	sorted_indices_to_remove[..., 0] = 0

	indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
	next_token_logits[indices_to_remove] = float("-inf")

	# Sample token from filtered probability distribution
	probs = F.softmax(next_token_logits, dim=-1)
	next_token = torch.multinomial(probs, num_samples=1)

	generated_ids = torch.cat([generated_ids, next_token], dim=-1)

	# Halt generation if End-Of-Sequence token is hit
	if next_token.item() == tokenizer.eos_token_id:
	break

	# Decode final output sequence
	return tokenizer.decode(generated_ids[0], skip_special_tokens=True)


	# --- GRADIO INTERFACE CONSTRUCTION ---

	css = """
	footer {visibility: hidden}
	.primary-btn {background-color: #5c4ff2 !important; color: white !important;}
	.clear-btn {background-color: #374151 !important; color: white !important;}
	"""

	with gr.Blocks(title="Michael Morgan Model Catalogue", css=css) as demo:
	gr.Markdown("# Michael Morgan Model Catalogue")
	gr.Markdown("Select a model, enter a story starter, and adjust the generation settings.")

	with gr.Row():
	# Left-hand Interaction & Settings Column
	with gr.Column(scale=1):
	model_dropdown = gr.Dropdown(
	choices=["NCN 2M (TinyStories)"],
	value="NCN 2M (TinyStories)",
	label="Model",
	interactive=True
	)

	prompt_input = gr.Textbox(
	lines=5,
	placeholder="Type your story starter here...",
	label="Story starter"
	)

	# Collapsible generation panel to preserve clean layout
	with gr.Accordion("Generation settings", open=False):
	max_tokens = gr.Slider(
	minimum=1,
	maximum=512,
	value=128,
	step=1,
	label="Max new tokens"
	)
	temperature = gr.Slider(
	minimum=0.0,
	maximum=1.5,
	value=0.7,
	step=0.05,
	label="Temperature (0 = greedy)"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.8,
	step=0.05,
	label="Top-p"
	)
	top_k = gr.Slider(
	minimum=1,
	maximum=100,
	value=25,
	step=1,
	label="Top-k"
	)
	rep_penalty = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	value=1.1,
	step=0.05,
	label="Repetition penalty"
	)

	with gr.Row():
	clear_btn = gr.Button("Clear", elem_classes=["clear-btn"])
	generate_btn = gr.Button("Generate", variant="primary", elem_classes=["primary-btn"])

	# Right-hand Generation Output Column
	with gr.Column(scale=1):
	output_display = gr.Textbox(
	lines=12,
	placeholder="The generated story will appear here...",
	label="Generated story",
	interactive=False
	)

	# Examples Section matching TinyStories V2 style
	gr.Markdown("### Try these examples")
	gr.Examples(
	examples=[
	["Once upon a time, there was a little dragon who"],
	["Lily found a tiny wooden key buried in the sand box. She wondered what"],
	["One sunny morning, a big friendly dog named Max decided to"],
	["Tom had a bright yellow balloon. When he let go of the string, the balloon"]
	],
	inputs=prompt_input
	)

	# Technical footer details
	gr.Markdown(
	"Model: SupraLabs/NCN-2M-TinyStories \| License: Apache 2.0 \| CPU-only \| © 2026 Michael Morgan"
	)

	# Event Handlers
	generate_btn.click(
	fn=generate_text,
	inputs=[model_dropdown, prompt_input, max_tokens, temperature, top_p, top_k, rep_penalty],
	outputs=output_display
	)

	# Simple UI clear button callback
	clear_btn.click(
	fn=lambda: ("", ""),
	inputs=None,
	outputs=[prompt_input, output_display]
	)

	if __name__ == "__main__":
	demo.launch()