Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| AETHER-Net 0.8B β Inference Test Space | |
| Private λͺ¨λΈμ λ‘λνμ¬ ν μ€νΈ μμ±μ ν μ€νΈν©λλ€. | |
| HF Space: T4 GPU, HF_TOKEN secret νμ | |
| Deploy: FINAL-Bench/aether-net-test | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import json | |
| import torch | |
| import torch.nn.functional as F | |
| import gradio as gr | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download, snapshot_download | |
| # ββ Config ββ | |
| MODEL_REPO = "FINAL-Bench/AETHER-Net-0.8B" | |
| DONOR_REPO = "Qwen/Qwen3.5-0.8B" # For tokenizer | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Device: {DEVICE}") | |
| print(f"HF_TOKEN: {'set' if HF_TOKEN else 'NOT SET'}") | |
| # ββ Download model weights from private repo ββ | |
| print(f"Downloading AETHER-Net weights from {MODEL_REPO}...") | |
| model_dir = None | |
| try: | |
| model_dir = snapshot_download( | |
| MODEL_REPO, token=HF_TOKEN, | |
| allow_patterns=["model.safetensors", "config.json"], | |
| ) | |
| print(f" Model downloaded to: {model_dir}") | |
| except Exception as e: | |
| print(f" Download failed: {e}") | |
| # Source files are co-located in the same directory | |
| APP_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, APP_DIR) | |
| # ββ Load model ββ | |
| MODEL = None | |
| TOKENIZER = None | |
| def load_model(): | |
| global MODEL, TOKENIZER | |
| if MODEL is not None: | |
| return True | |
| # Load tokenizer from donor | |
| print("Loading tokenizer...") | |
| from transformers import AutoTokenizer | |
| try: | |
| TOKENIZER = AutoTokenizer.from_pretrained( | |
| DONOR_REPO, trust_remote_code=True, token=HF_TOKEN | |
| ) | |
| print(f" Tokenizer loaded: vocab_size={TOKENIZER.vocab_size}") | |
| except Exception as e: | |
| print(f" Tokenizer failed: {e}") | |
| return False | |
| # Load AETHER-Net | |
| print("Loading AETHER-Net model...") | |
| try: | |
| from config import AetherNetConfig | |
| from model import AetherNetModel | |
| # Load config | |
| config_path = Path(model_dir) / "config.json" if model_dir else None | |
| if config_path and config_path.exists(): | |
| with open(config_path) as f: | |
| cfg_dict = json.load(f) | |
| # Filter valid fields | |
| valid_fields = {k for k in AetherNetConfig.__dataclass_fields__} | |
| filtered = {k: v for k, v in cfg_dict.items() if k in valid_fields} | |
| config = AetherNetConfig(**filtered) | |
| print(f" Config loaded: hidden={config.hidden_size}, layers={config.num_layers}") | |
| else: | |
| print(" No config.json, using defaults") | |
| config = AetherNetConfig( | |
| hidden_size=1024, intermediate_size=3584, | |
| num_layers=25, num_attention_heads=16, num_kv_heads=2, | |
| head_dim=64, vocab_size=248320, | |
| max_position_embeddings=4096, | |
| expert_intermediate_size=716, | |
| overcome_gate_hidden=64, | |
| sliding_window_size=1024, | |
| gdn_state_size=64, mamba2_state_size=64, | |
| tie_word_embeddings=True, | |
| ) | |
| model = AetherNetModel(config) | |
| # Load weights | |
| weights_path = Path(model_dir) / "model.safetensors" if model_dir else None | |
| if weights_path and weights_path.exists(): | |
| from safetensors.torch import load_file | |
| state = load_file(str(weights_path), device="cpu") | |
| model.load_state_dict(state, strict=False) | |
| print(f" Weights loaded: {len(state)} tensors") | |
| else: | |
| print(" β οΈ No weights found, using random init") | |
| model = model.to(DEVICE).eval() | |
| MODEL = model | |
| params = sum(p.numel() for p in model.parameters()) | |
| mem = params * 2 / 1e9 # BF16 estimate | |
| print(f" Model ready: {params:,} params (~{mem:.1f}GB)") | |
| return True | |
| except Exception as e: | |
| import traceback | |
| print(f" Model load failed: {e}") | |
| traceback.print_exc() | |
| return False | |
| # ββ Generation ββ | |
| def generate(prompt, max_tokens=128, temperature=0.8, top_k=50, top_p=0.9): | |
| """Generate text from prompt.""" | |
| if MODEL is None: | |
| success = load_model() | |
| if not success: | |
| return "β Model failed to load. Check logs." | |
| # Tokenize | |
| input_ids = TOKENIZER.encode(prompt, return_tensors="pt").to(DEVICE) | |
| generated = input_ids.clone() | |
| t0 = time.time() | |
| for i in range(max_tokens): | |
| # Truncate to max position | |
| if generated.shape[1] > 4096: | |
| generated = generated[:, -4096:] | |
| outputs = MODEL(input_ids=generated) | |
| logits = outputs["logits"][:, -1, :] | |
| # Temperature | |
| if temperature > 0: | |
| logits = logits / temperature | |
| # Top-k | |
| if top_k > 0: | |
| values, _ = torch.topk(logits, top_k) | |
| min_val = values[:, -1].unsqueeze(-1) | |
| logits = torch.where(logits < min_val, torch.full_like(logits, -float('inf')), logits) | |
| # Top-p (nucleus) | |
| if top_p < 1.0: | |
| sorted_logits, sorted_indices = torch.sort(logits, descending=True) | |
| cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) | |
| mask = cum_probs - F.softmax(sorted_logits, dim=-1) > top_p | |
| sorted_logits[mask] = -float('inf') | |
| logits = sorted_logits.scatter(1, sorted_indices, sorted_logits) | |
| probs = F.softmax(logits, dim=-1) | |
| next_token = torch.multinomial(probs, num_samples=1) | |
| else: | |
| next_token = logits.argmax(dim=-1, keepdim=True) | |
| generated = torch.cat([generated, next_token], dim=-1) | |
| # EOS check | |
| if next_token.item() == TOKENIZER.eos_token_id: | |
| break | |
| elapsed = time.time() - t0 | |
| tokens_generated = generated.shape[1] - input_ids.shape[1] | |
| tps = tokens_generated / elapsed if elapsed > 0 else 0 | |
| output_text = TOKENIZER.decode(generated[0], skip_special_tokens=True) | |
| stats = f"\n\n---\nπ {tokens_generated} tokens | {tps:.1f} tok/s | {elapsed:.2f}s" | |
| return output_text + stats | |
| def get_model_info(): | |
| """Return model architecture info.""" | |
| if MODEL is None: | |
| load_model() | |
| if MODEL is None: | |
| return "Model not loaded" | |
| info = "## AETHER-Net 0.8B β Architecture Info\n\n" | |
| info += f"| Item | Value |\n|---|---|\n" | |
| info += f"| Device | {DEVICE} |\n" | |
| info += f"| Parameters | {sum(p.numel() for p in MODEL.parameters()):,} |\n" | |
| info += f"| Layers | {len(MODEL.layers)} |\n" | |
| info += f"| Vocab | {MODEL.config.vocab_size:,} |\n" | |
| info += f"| Hidden | {MODEL.config.hidden_size} |\n" | |
| # Layer types | |
| from config import LAYER_TYPES, LAYER_TO_ELEMENT, ELEMENTS | |
| info += f"\n### Layer Map\n\n" | |
| info += "| Layer | Type | Element |\n|---|---|---|\n" | |
| for i in range(len(MODEL.layers)): | |
| lt = LAYER_TYPES[i] | |
| elem = LAYER_TO_ELEMENT[i] | |
| info += f"| {i} | {lt.upper()} | {elem} |\n" | |
| # Oheng status | |
| info += f"\n### Oheng Status\n\n" | |
| for elem in ELEMENTS: | |
| layers = [i for i in range(25) if LAYER_TO_ELEMENT[i] == elem] | |
| alphas = [] | |
| for li in layers: | |
| gb = MODEL.layers[li].moe.generate_boost | |
| if gb is not None: | |
| a = torch.sigmoid(gb.alpha).detach() | |
| eidx = ELEMENTS.index(elem) | |
| if eidx < a.shape[0]: | |
| alphas.append(a[eidx].item()) | |
| avg = sum(alphas) / len(alphas) if alphas else 0 | |
| info += f"- {elem}: Ξ±={avg:.4f}\n" | |
| return info | |
| # ββ Gradio UI ββ | |
| TITLE = """ | |
| <div style="text-align:center; padding:15px 0;"> | |
| <h1>π AETHER-Net 0.8B β Inference Test</h1> | |
| <p style="color:#666;">Cross-Architecture Knowledge Distillation from Qwen3.5-0.8B</p> | |
| <p style="color:#999; font-size:0.9em;">5Γ5 Magic Square | Oheng MoE | 5 Attention Types</p> | |
| </div> | |
| """ | |
| with gr.Blocks(title="AETHER-Net Test") as app: | |
| gr.HTML(TITLE) | |
| with gr.Tabs(): | |
| with gr.Tab("π¬ Generate"): | |
| gr.Markdown("ν둬ννΈλ₯Ό μ λ ₯νλ©΄ AETHER-Netμ΄ ν μ€νΈλ₯Ό μμ±ν©λλ€.") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Enter your prompt here...", | |
| lines=3, | |
| value="The theory of relativity explains that" | |
| ) | |
| with gr.Column(scale=1): | |
| max_tokens = gr.Slider(16, 512, value=128, step=16, label="Max Tokens") | |
| temperature = gr.Slider(0.0, 2.0, value=0.8, step=0.1, label="Temperature") | |
| top_k = gr.Slider(0, 100, value=50, step=5, label="Top-K") | |
| top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P") | |
| gen_btn = gr.Button("π Generate", variant="primary", size="lg") | |
| output = gr.Textbox(label="Output", lines=12, interactive=False) | |
| gen_btn.click( | |
| fn=generate, | |
| inputs=[prompt, max_tokens, temperature, top_k, top_p], | |
| outputs=output, | |
| ) | |
| gr.Markdown("### Quick Prompts") | |
| examples = gr.Examples( | |
| examples=[ | |
| ["The theory of relativity explains that"], | |
| ["In Python, the most efficient way to sort a list is"], | |
| ["The five elements of nature are"], | |
| ["Artificial general intelligence requires"], | |
| ["νκ΅μ μλλ"], | |
| ["def fibonacci(n):"], | |
| ], | |
| inputs=prompt, | |
| ) | |
| with gr.Tab("π Model Info"): | |
| info_btn = gr.Button("Load Model Info", variant="primary") | |
| info_output = gr.Markdown() | |
| info_btn.click(fn=get_model_info, outputs=info_output) | |
| with gr.Tab("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## AETHER-Net 0.8B | |
| **Cross-Architecture Knowledge Distillation from Qwen3.5-0.8B** | |
| ### Method | |
| - **Weight Transplant**: Qwen3.5-0.8B β AETHER-Net (5Γ5 Magic Square layout) | |
| - **3-Stage MOHAWK Distillation**: KLD β Hidden Alignment β Oheng Regularization | |
| - **Cost**: ~$0 (CPU-only, 100 steps demo) | |
| ### Architecture | |
| - 25 Layers: 5 attention types Γ 5 elements | |
| - GDN, Full, Mamba2, Sliding Window, Cross Attention | |
| - Oheng MoE: 25 experts, μμ(Generate) + μκ·Ή(Overcome) | |
| ### Source | |
| - Model: [FINAL-Bench/AETHER-Net-0.8B](https://huggingface.co/FINAL-Bench/AETHER-Net-0.8B) (private) | |
| - Space: [FINAL-Bench/agi-model-gen](https://huggingface.co/spaces/FINAL-Bench/agi-model-gen) | |
| --- | |
| Β© 2026 VIDRAFT / Ginigen AI | |
| """) | |
| # ββ Preload model on startup ββ | |
| print("\n=== Pre-loading model ===") | |
| load_model() | |
| print("=== Ready ===\n") | |
| if __name__ == "__main__": | |
| app.launch() | |