Mmorgan-ML's picture
Create app.py
59cdde3 verified
Raw
History Blame Contribute Delete
12.8 kB
# ==============================================================================
# Neuromodulatory Control Network (NCN) 2.0 Model Catalogue Interface
# Copyright (c) 2026 Michael Morgan. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ============================================================================
# MANDATORY ATTRIBUTION & ARCHITECTURAL CITATION RIDER:
# ============================================================================
# By using, copying, modifying, distributing, or re-implementing this model
# architecture (the Neuromodulatory Control Network / NCN), you agree to
# prominently credit and cite the original creator, Michael Morgan, in all
# academic papers, technical reports, repositories, and commercial/for-profit
# product documentation that leverage these ideas.
#
# The mandatory citation format must include:
# - Author: Michael Morgan (2026)
# - Project: Neuromodulatory Control Network (NCN) Architecture
# - Links: GitHub: https://github.com/Mmorgan-ML
# Hugging Face: https://huggingface.co/Mmorgan-ML
# Twitter/X: @Mmorgan_ML
# Email: mmorgankorea@gmail.com
# ==============================================================================
import os
import torch
import torch.nn.functional as F
import gradio as gr
from transformers import PreTrainedTokenizerFast
from safetensors.torch import load_file
# Import your dynamic architecture package
from ncn_architecture.config import NCNConfig
from ncn_architecture.model import ModulatedLLM
# Determine execution hardware
device = "cuda" if torch.cuda.is_available() else "cpu"
# Memory cache to prevent reloading models from disk on every click
MODEL_CACHE = {}
TOKENIZER_CACHE = {}
def load_and_configure_model(model_choice):
"""
Dynamically reconstructs model configurations from weight shapes
to guarantee absolute compatibility before loading.
"""
if model_choice in MODEL_CACHE:
return MODEL_CACHE[model_choice], TOKENIZER_CACHE[model_choice]
if model_choice == "NCN 2M (TinyStories)":
weights_path = "models/ncn_2m_tinystories/model.safetensors"
tokenizer_path = "models/ncn_2m_tinystories/tokenizer.json"
if not os.path.exists(weights_path) or not os.path.exists(tokenizer_path):
raise FileNotFoundError("Model weight file or tokenizer file could not be located in the specified path.")
# Load weights safely
state_dict = load_file(weights_path)
# Reconstruct hyper-parameters based on the structural shapes of the weights
vocab_size, d_model = state_dict["token_embeddings.weight"].shape
max_position_embeddings = state_dict["position_embeddings.weight"].shape[0]
dim_feedforward = state_dict["transformer_layers.0.feed_forward.linear1.weight"].shape[0]
# Calculate the number of layers
layer_indices = set()
for key in state_dict.keys():
if key.startswith("transformer_layers."):
layer_indices.add(int(key.split(".")[1]))
num_layers = len(layer_indices) if layer_indices else 12
# Solve for number of heads (nhead) mathematically using the NCN projection bias dimension
nhead = 12
if "ncn.layer2.bias" in state_dict:
bias_length = state_dict["ncn.layer2.bias"].shape[0]
try:
# ncn_output_dim = (2 * nhead + 1) * num_layers
nhead = int(((bias_length / num_layers) - 1) / 2)
except Exception:
nhead = 12
# Extract tonic hidden dimension
ncn_hidden_dim = 128
if "ncn.layer1.weight" in state_dict:
ncn_hidden_dim = state_dict["ncn.layer1.weight"].shape[0]
# 1. Initialize configuration with matched dimensions
config = NCNConfig(
vocab_size=vocab_size,
d_model=d_model,
nhead=nhead,
num_layers=num_layers,
dim_feedforward=dim_feedforward,
max_position_embeddings=max_position_embeddings,
ncn_hidden_dim=ncn_hidden_dim
)
# 2. Instantiate and map weights to architecture
model = ModulatedLLM(config)
model.load_state_dict(state_dict, strict=True)
model.to(device)
model.eval()
# 3. Load tokenizer directly from local JSON file
tokenizer = PreTrainedTokenizerFast(tokenizer_file=tokenizer_path)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Cache model instance
MODEL_CACHE[model_choice] = model
TOKENIZER_CACHE[model_choice] = tokenizer
return model, tokenizer
raise ValueError("Selected model profile is not registered.")
@torch.no_grad()
def generate_text(model_choice, prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
"""
Autoregressive inference loop with standard KV caching and logits filtering.
"""
if not prompt.strip():
return "Please enter a starting prompt to begin generating a story."
try:
model, tokenizer = load_and_configure_model(model_choice)
except Exception as e:
return f"Error loading model: {str(e)}"
# Tokenize input
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
generated_ids = input_ids.clone()
past_key_values = None
past_rnn_state = None
for _ in range(max_new_tokens):
# KV Cache feeding optimization: process only newly added tokens
if past_key_values is None:
outputs = model(
input_ids=generated_ids,
past_key_values=None,
use_cache=True,
past_rnn_state=None
)
else:
outputs = model(
input_ids=generated_ids[:, -1:],
past_key_values=past_key_values,
use_cache=True,
past_rnn_state=past_rnn_state
)
logits, past_key_values, _, past_rnn_state = outputs
next_token_logits = logits[:, -1, :]
# Repetition Penalty logic
if repetition_penalty != 1.0:
for batch_idx in range(next_token_logits.shape[0]):
for prev_token_id in set(generated_ids[batch_idx].tolist()):
logit = next_token_logits[batch_idx, prev_token_id]
if logit < 0:
next_token_logits[batch_idx, prev_token_id] = logit * repetition_penalty
else:
next_token_logits[batch_idx, prev_token_id] = logit / repetition_penalty
# Temperature / Sampling Selection
if temperature == 0.0:
next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
else:
next_token_logits = next_token_logits / temperature
# Top-K Filtering
if top_k > 0:
indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1:]
next_token_logits[indices_to_remove] = float("-inf")
# Top-P (Nucleus) Filtering
if top_p < 1.0:
sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
sorted_indices_to_remove = cumulative_probs > top_p
# Shift indices to protect first token exceeding the top-p boundary
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = 0
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
next_token_logits[indices_to_remove] = float("-inf")
# Sample token from filtered probability distribution
probs = F.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
generated_ids = torch.cat([generated_ids, next_token], dim=-1)
# Halt generation if End-Of-Sequence token is hit
if next_token.item() == tokenizer.eos_token_id:
break
# Decode final output sequence
return tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# --- GRADIO INTERFACE CONSTRUCTION ---
css = """
footer {visibility: hidden}
.primary-btn {background-color: #5c4ff2 !important; color: white !important;}
.clear-btn {background-color: #374151 !important; color: white !important;}
"""
with gr.Blocks(title="Michael Morgan Model Catalogue", css=css) as demo:
gr.Markdown("# Michael Morgan Model Catalogue")
gr.Markdown("Select a model, enter a story starter, and adjust the generation settings.")
with gr.Row():
# Left-hand Interaction & Settings Column
with gr.Column(scale=1):
model_dropdown = gr.Dropdown(
choices=["NCN 2M (TinyStories)"],
value="NCN 2M (TinyStories)",
label="Model",
interactive=True
)
prompt_input = gr.Textbox(
lines=5,
placeholder="Type your story starter here...",
label="Story starter"
)
# Collapsible generation panel to preserve clean layout
with gr.Accordion("Generation settings", open=False):
max_tokens = gr.Slider(
minimum=1,
maximum=512,
value=128,
step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.0,
maximum=1.5,
value=0.7,
step=0.05,
label="Temperature (0 = greedy)"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.8,
step=0.05,
label="Top-p"
)
top_k = gr.Slider(
minimum=1,
maximum=100,
value=25,
step=1,
label="Top-k"
)
rep_penalty = gr.Slider(
minimum=1.0,
maximum=2.0,
value=1.1,
step=0.05,
label="Repetition penalty"
)
with gr.Row():
clear_btn = gr.Button("Clear", elem_classes=["clear-btn"])
generate_btn = gr.Button("Generate", variant="primary", elem_classes=["primary-btn"])
# Right-hand Generation Output Column
with gr.Column(scale=1):
output_display = gr.Textbox(
lines=12,
placeholder="The generated story will appear here...",
label="Generated story",
interactive=False
)
# Examples Section matching TinyStories V2 style
gr.Markdown("### Try these examples")
gr.Examples(
examples=[
["Once upon a time, there was a little dragon who"],
["Lily found a tiny wooden key buried in the sand box. She wondered what"],
["One sunny morning, a big friendly dog named Max decided to"],
["Tom had a bright yellow balloon. When he let go of the string, the balloon"]
],
inputs=prompt_input
)
# Technical footer details
gr.Markdown(
"Model: SupraLabs/NCN-2M-TinyStories | License: Apache 2.0 | CPU-only | © 2026 Michael Morgan"
)
# Event Handlers
generate_btn.click(
fn=generate_text,
inputs=[model_dropdown, prompt_input, max_tokens, temperature, top_p, top_k, rep_penalty],
outputs=output_display
)
# Simple UI clear button callback
clear_btn.click(
fn=lambda: ("", ""),
inputs=None,
outputs=[prompt_input, output_display]
)
if __name__ == "__main__":
demo.launch()