File size: 10,973 Bytes
ca19627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#!/usr/bin/env python3
"""
AETHER-Net 0.8B β€” Inference Test Space

Private λͺ¨λΈμ„ λ‘œλ“œν•˜μ—¬ ν…μŠ€νŠΈ 생성을 ν…ŒμŠ€νŠΈν•©λ‹ˆλ‹€.
HF Space: T4 GPU, HF_TOKEN secret ν•„μš”

Deploy: FINAL-Bench/aether-net-test
"""
import os
import sys
import time
import json
import torch
import torch.nn.functional as F
import gradio as gr
from pathlib import Path
from huggingface_hub import hf_hub_download, snapshot_download

# ── Config ──
MODEL_REPO = "FINAL-Bench/AETHER-Net-0.8B"
DONOR_REPO = "Qwen/Qwen3.5-0.8B"  # For tokenizer
HF_TOKEN = os.getenv("HF_TOKEN")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {DEVICE}")
print(f"HF_TOKEN: {'set' if HF_TOKEN else 'NOT SET'}")

# ── Download model weights from private repo ──
print(f"Downloading AETHER-Net weights from {MODEL_REPO}...")

model_dir = None
try:
    model_dir = snapshot_download(
        MODEL_REPO, token=HF_TOKEN,
        allow_patterns=["model.safetensors", "config.json"],
    )
    print(f"  Model downloaded to: {model_dir}")
except Exception as e:
    print(f"  Download failed: {e}")

# Source files are co-located in the same directory
APP_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, APP_DIR)

# ── Load model ──
MODEL = None
TOKENIZER = None


def load_model():
    global MODEL, TOKENIZER

    if MODEL is not None:
        return True

    # Load tokenizer from donor
    print("Loading tokenizer...")
    from transformers import AutoTokenizer
    try:
        TOKENIZER = AutoTokenizer.from_pretrained(
            DONOR_REPO, trust_remote_code=True, token=HF_TOKEN
        )
        print(f"  Tokenizer loaded: vocab_size={TOKENIZER.vocab_size}")
    except Exception as e:
        print(f"  Tokenizer failed: {e}")
        return False

    # Load AETHER-Net
    print("Loading AETHER-Net model...")
    try:
        from config import AetherNetConfig
        from model import AetherNetModel

        # Load config
        config_path = Path(model_dir) / "config.json" if model_dir else None
        if config_path and config_path.exists():
            with open(config_path) as f:
                cfg_dict = json.load(f)
            # Filter valid fields
            valid_fields = {k for k in AetherNetConfig.__dataclass_fields__}
            filtered = {k: v for k, v in cfg_dict.items() if k in valid_fields}
            config = AetherNetConfig(**filtered)
            print(f"  Config loaded: hidden={config.hidden_size}, layers={config.num_layers}")
        else:
            print("  No config.json, using defaults")
            config = AetherNetConfig(
                hidden_size=1024, intermediate_size=3584,
                num_layers=25, num_attention_heads=16, num_kv_heads=2,
                head_dim=64, vocab_size=248320,
                max_position_embeddings=4096,
                expert_intermediate_size=716,
                overcome_gate_hidden=64,
                sliding_window_size=1024,
                gdn_state_size=64, mamba2_state_size=64,
                tie_word_embeddings=True,
            )

        model = AetherNetModel(config)

        # Load weights
        weights_path = Path(model_dir) / "model.safetensors" if model_dir else None
        if weights_path and weights_path.exists():
            from safetensors.torch import load_file
            state = load_file(str(weights_path), device="cpu")
            model.load_state_dict(state, strict=False)
            print(f"  Weights loaded: {len(state)} tensors")
        else:
            print("  ⚠️ No weights found, using random init")

        model = model.to(DEVICE).eval()
        MODEL = model

        params = sum(p.numel() for p in model.parameters())
        mem = params * 2 / 1e9  # BF16 estimate
        print(f"  Model ready: {params:,} params (~{mem:.1f}GB)")
        return True

    except Exception as e:
        import traceback
        print(f"  Model load failed: {e}")
        traceback.print_exc()
        return False


# ── Generation ──
@torch.no_grad()
def generate(prompt, max_tokens=128, temperature=0.8, top_k=50, top_p=0.9):
    """Generate text from prompt."""
    if MODEL is None:
        success = load_model()
        if not success:
            return "❌ Model failed to load. Check logs."

    # Tokenize
    input_ids = TOKENIZER.encode(prompt, return_tensors="pt").to(DEVICE)
    generated = input_ids.clone()

    t0 = time.time()

    for i in range(max_tokens):
        # Truncate to max position
        if generated.shape[1] > 4096:
            generated = generated[:, -4096:]

        outputs = MODEL(input_ids=generated)
        logits = outputs["logits"][:, -1, :]

        # Temperature
        if temperature > 0:
            logits = logits / temperature

            # Top-k
            if top_k > 0:
                values, _ = torch.topk(logits, top_k)
                min_val = values[:, -1].unsqueeze(-1)
                logits = torch.where(logits < min_val, torch.full_like(logits, -float('inf')), logits)

            # Top-p (nucleus)
            if top_p < 1.0:
                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
                mask = cum_probs - F.softmax(sorted_logits, dim=-1) > top_p
                sorted_logits[mask] = -float('inf')
                logits = sorted_logits.scatter(1, sorted_indices, sorted_logits)

            probs = F.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1)
        else:
            next_token = logits.argmax(dim=-1, keepdim=True)

        generated = torch.cat([generated, next_token], dim=-1)

        # EOS check
        if next_token.item() == TOKENIZER.eos_token_id:
            break

    elapsed = time.time() - t0
    tokens_generated = generated.shape[1] - input_ids.shape[1]
    tps = tokens_generated / elapsed if elapsed > 0 else 0

    output_text = TOKENIZER.decode(generated[0], skip_special_tokens=True)
    stats = f"\n\n---\nπŸ“Š {tokens_generated} tokens | {tps:.1f} tok/s | {elapsed:.2f}s"

    return output_text + stats


def get_model_info():
    """Return model architecture info."""
    if MODEL is None:
        load_model()

    if MODEL is None:
        return "Model not loaded"

    info = "## AETHER-Net 0.8B β€” Architecture Info\n\n"
    info += f"| Item | Value |\n|---|---|\n"
    info += f"| Device | {DEVICE} |\n"
    info += f"| Parameters | {sum(p.numel() for p in MODEL.parameters()):,} |\n"
    info += f"| Layers | {len(MODEL.layers)} |\n"
    info += f"| Vocab | {MODEL.config.vocab_size:,} |\n"
    info += f"| Hidden | {MODEL.config.hidden_size} |\n"

    # Layer types
    from config import LAYER_TYPES, LAYER_TO_ELEMENT, ELEMENTS
    info += f"\n### Layer Map\n\n"
    info += "| Layer | Type | Element |\n|---|---|---|\n"
    for i in range(len(MODEL.layers)):
        lt = LAYER_TYPES[i]
        elem = LAYER_TO_ELEMENT[i]
        info += f"| {i} | {lt.upper()} | {elem} |\n"

    # Oheng status
    info += f"\n### Oheng Status\n\n"
    for elem in ELEMENTS:
        layers = [i for i in range(25) if LAYER_TO_ELEMENT[i] == elem]
        alphas = []
        for li in layers:
            gb = MODEL.layers[li].moe.generate_boost
            if gb is not None:
                a = torch.sigmoid(gb.alpha).detach()
                eidx = ELEMENTS.index(elem)
                if eidx < a.shape[0]:
                    alphas.append(a[eidx].item())
        avg = sum(alphas) / len(alphas) if alphas else 0
        info += f"- {elem}: Ξ±={avg:.4f}\n"

    return info


# ── Gradio UI ──
TITLE = """
<div style="text-align:center; padding:15px 0;">
    <h1>🌌 AETHER-Net 0.8B β€” Inference Test</h1>
    <p style="color:#666;">Cross-Architecture Knowledge Distillation from Qwen3.5-0.8B</p>
    <p style="color:#999; font-size:0.9em;">5Γ—5 Magic Square | Oheng MoE | 5 Attention Types</p>
</div>
"""

with gr.Blocks(title="AETHER-Net Test") as app:
    gr.HTML(TITLE)

    with gr.Tabs():
        with gr.Tab("πŸ’¬ Generate"):
            gr.Markdown("ν”„λ‘¬ν”„νŠΈλ₯Ό μž…λ ₯ν•˜λ©΄ AETHER-Net이 ν…μŠ€νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.")

            with gr.Row():
                with gr.Column(scale=3):
                    prompt = gr.Textbox(
                        label="Prompt",
                        placeholder="Enter your prompt here...",
                        lines=3,
                        value="The theory of relativity explains that"
                    )
                with gr.Column(scale=1):
                    max_tokens = gr.Slider(16, 512, value=128, step=16, label="Max Tokens")
                    temperature = gr.Slider(0.0, 2.0, value=0.8, step=0.1, label="Temperature")
                    top_k = gr.Slider(0, 100, value=50, step=5, label="Top-K")
                    top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")

            gen_btn = gr.Button("πŸš€ Generate", variant="primary", size="lg")
            output = gr.Textbox(label="Output", lines=12, interactive=False)

            gen_btn.click(
                fn=generate,
                inputs=[prompt, max_tokens, temperature, top_k, top_p],
                outputs=output,
            )

            gr.Markdown("### Quick Prompts")
            examples = gr.Examples(
                examples=[
                    ["The theory of relativity explains that"],
                    ["In Python, the most efficient way to sort a list is"],
                    ["The five elements of nature are"],
                    ["Artificial general intelligence requires"],
                    ["ν•œκ΅­μ˜ μˆ˜λ„λŠ”"],
                    ["def fibonacci(n):"],
                ],
                inputs=prompt,
            )

        with gr.Tab("πŸ” Model Info"):
            info_btn = gr.Button("Load Model Info", variant="primary")
            info_output = gr.Markdown()
            info_btn.click(fn=get_model_info, outputs=info_output)

        with gr.Tab("ℹ️ About"):
            gr.Markdown("""
## AETHER-Net 0.8B

**Cross-Architecture Knowledge Distillation from Qwen3.5-0.8B**

### Method
- **Weight Transplant**: Qwen3.5-0.8B β†’ AETHER-Net (5Γ—5 Magic Square layout)
- **3-Stage MOHAWK Distillation**: KLD β†’ Hidden Alignment β†’ Oheng Regularization
- **Cost**: ~$0 (CPU-only, 100 steps demo)

### Architecture
- 25 Layers: 5 attention types Γ— 5 elements
- GDN, Full, Mamba2, Sliding Window, Cross Attention
- Oheng MoE: 25 experts, 상생(Generate) + 상극(Overcome)

### Source
- Model: [FINAL-Bench/AETHER-Net-0.8B](https://huggingface.co/FINAL-Bench/AETHER-Net-0.8B) (private)
- Space: [FINAL-Bench/agi-model-gen](https://huggingface.co/spaces/FINAL-Bench/agi-model-gen)

---
Β© 2026 VIDRAFT / Ginigen AI
""")


# ── Preload model on startup ──
print("\n=== Pre-loading model ===")
load_model()
print("=== Ready ===\n")


if __name__ == "__main__":
    app.launch()