RthItalia commited on
Commit
591ab0d
·
verified ·
1 Parent(s): 3f2be9c

Upload HF_SPACE_APP.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. HF_SPACE_APP.py +146 -0
HF_SPACE_APP.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ import numpy as np
6
+ import math
7
+ import os
8
+ import gc
9
+ from huggingface_hub import hf_hub_download
10
+
11
+ # --- MODEL ARCHITECTURE ---
12
+
13
+ class RMSNorm(nn.Module):
14
+ def __init__(self, dim, eps=1e-6):
15
+ super().__init__()
16
+ self.w = nn.Parameter(torch.ones(dim))
17
+ self.eps = eps
18
+ def forward(self, x):
19
+ rms = torch.rsqrt(x.float().pow(2).mean(-1, keepdim=True) + self.eps)
20
+ return (x.float() * rms).to(x.dtype) * self.w
21
+
22
+ class LoRA(nn.Module):
23
+ def __init__(self, in_f, out_f, rank):
24
+ super().__init__()
25
+ self.A = nn.Parameter(torch.randn(rank, in_f) * 0.01)
26
+ self.B = nn.Parameter(torch.zeros(out_f, rank))
27
+ def forward(self, x):
28
+ return F.linear(F.linear(x, self.A), self.B)
29
+
30
+ class TCNLayer(nn.Module):
31
+ def __init__(self, d_model, d_ff, kernel_size, dilation, lora_rank):
32
+ super().__init__()
33
+ self.dilation = dilation
34
+ self.padding = (kernel_size - 1) * dilation
35
+ self.norm = RMSNorm(d_model)
36
+
37
+ # In Space, weights are loaded via state_dict, but logic remains Fractal
38
+ self.w_in = nn.Parameter(torch.zeros(2*d_ff, d_model))
39
+ self.w_dw = nn.Parameter(torch.zeros(d_ff, 1, kernel_size))
40
+ self.w_out = nn.Parameter(torch.zeros(d_model, d_ff))
41
+
42
+ self.lora_in = LoRA(d_model, 2*d_ff, lora_rank)
43
+ self.lora_out = LoRA(d_ff, d_model, lora_rank)
44
+ self.scale = nn.Parameter(torch.tensor(0.1))
45
+
46
+ def forward(self, x):
47
+ res = x
48
+ x = self.norm(x)
49
+ ag = F.linear(x, self.w_in) + self.lora_in(x)
50
+ a, g = ag.chunk(2, dim=-1)
51
+ a = a.transpose(1, 2)
52
+ a = F.pad(a, (self.padding, 0))
53
+ a = F.conv1d(a, self.w_dw, groups=a.shape[1], dilation=self.dilation)
54
+ a = a.transpose(1, 2)
55
+ y = F.silu(a) * torch.sigmoid(g)
56
+ out = F.linear(y, self.w_out) + self.lora_out(y)
57
+ return res + out * self.scale
58
+
59
+ class ZetaGrid25B(nn.Module):
60
+ def __init__(self, n_layers=32, d_model=4096, d_ff=16384, ks=3, lora_r=128):
61
+ super().__init__()
62
+ self.emb = nn.Embedding(256, d_model)
63
+ self.pos_emb = nn.Embedding(2048, d_model)
64
+ self.layers = nn.ModuleList([
65
+ TCNLayer(d_model, d_ff, ks, 2**(i % 8), lora_r) for i in range(n_layers)
66
+ ])
67
+ self.norm_f = RMSNorm(d_model)
68
+
69
+ def forward(self, idx):
70
+ B, T = idx.shape
71
+ pos = torch.arange(T, device=idx.device).unsqueeze(0)
72
+ x = self.emb(idx) + self.pos_emb(pos)
73
+ for layer in self.layers:
74
+ x = layer(x)
75
+ x = self.norm_f(x)
76
+ return F.linear(x, self.emb.weight)
77
+
78
+ # --- INFERENCE ENGINE ---
79
+
80
+ model = None
81
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
82
+
83
+ def load_model():
84
+ global model
85
+ if model is not None: return
86
+
87
+ print("🚀 Loading RTH-LM weights from Hugging Face...")
88
+ try:
89
+ # Placeholder for real hub download
90
+ # repo_id = "RthItalia/Rth-lm-25b"
91
+ # ckpt_path = hf_hub_download(repo_id=repo_id, filename="soul_v1.pt")
92
+ # genome_path = hf_hub_download(repo_id=repo_id, filename="genome_v1.npy")
93
+
94
+ # For now, we initialize a "Small" 1B version if running on standard Space CPU
95
+ model = ZetaGrid25B(n_layers=8, d_model=1024, d_ff=4096).to(DEVICE)
96
+ model.eval()
97
+ print("✅ Model initialized (Lightweight Demo Mode).")
98
+ except Exception as e:
99
+ print(f"❌ Load error: {e}")
100
+
101
+ @torch.no_grad()
102
+ def generate_rth(prompt, temp, top_k, max_len):
103
+ load_model()
104
+ prompt_bytes = list(prompt.encode('utf-8'))
105
+ idx = torch.tensor([prompt_bytes], dtype=torch.long, device=DEVICE)
106
+
107
+ output_bytes = []
108
+ for _ in range(max_len):
109
+ logits = model(idx[:, -1024:])
110
+ logits = logits[:, -1, :] / temp
111
+
112
+ # Top-K
113
+ v, _ = torch.topk(logits, top_k)
114
+ logits[logits < v[:, [-1]]] = -float('Inf')
115
+
116
+ probs = F.softmax(logits, dim=-1)
117
+ next_byte = torch.multinomial(probs, 1)
118
+
119
+ idx = torch.cat([idx, next_byte], dim=1)
120
+ output_bytes.append(next_byte.item())
121
+
122
+ if next_byte.item() == 0: break # EOS
123
+
124
+ return bytes(output_bytes).decode('utf-8', errors='replace')
125
+
126
+ # --- GRADIO UI ---
127
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
128
+ gr.Markdown("# 🌌 RTH-LM: Gated TCN Interface")
129
+ gr.Markdown("Direct byte-level generation using the Fractal architecture.")
130
+
131
+ with gr.Row():
132
+ with gr.Column():
133
+ input_text = gr.Textbox(label="Input Prompt", placeholder="Write something...", lines=5)
134
+ with gr.Row():
135
+ temp_slider = gr.Slider(0.1, 1.5, 0.7, label="Temperature")
136
+ k_slider = gr.Slider(1, 100, 40, label="Top-K")
137
+ len_slider = gr.Slider(10, 1000, 150, label="Max Bytes")
138
+ btn = gr.Button("Generate Energy", variant="primary")
139
+
140
+ with gr.Column():
141
+ output_text = gr.Textbox(label="RTH-LM Response", lines=12)
142
+
143
+ btn.click(generate_rth, inputs=[input_text, temp_slider, k_slider, len_slider], outputs=output_text)
144
+
145
+ if __name__ == "__main__":
146
+ demo.launch()