Spaces:
Sleeping
Feat: Sync all features from main repository
Browse filesMajor feature updates and bug fixes:
**New Features:**
- Add HuggingFace Hub integration for fetching model metadata
- Add SGLang inference engine support
- Share preset models across all tabs (training, inference, multi-node)
- Add batch size optimizer API endpoint
- Add inference, multi-node, and exporter features to web UI
- Add framework config export (Accelerate, Lightning, Axolotl)
**Bug Fixes:**
- Fix critical calculation bugs and improve defensive programming
- Fix black formatting in formulas.py
- Handle null values in inference results display
- Restore two-column layout for tabbed interface
- Resolve CI failures in web/app.py
**Improvements:**
- Update dates to 2026 across codebase
- Improve error handling and validation
- Update web UI styling and accessibility
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- pyproject.toml +1 -0
- src/gpu_mem_calculator/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/config/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/config/__pycache__/parser.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/config/__pycache__/presets.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/core/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/core/__pycache__/calculator.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/core/__pycache__/formulas.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/core/__pycache__/models.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/core/__pycache__/multinode.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/engines/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/engines/__pycache__/base.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/engines/__pycache__/deepspeed.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/engines/__pycache__/fsdp.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/engines/__pycache__/megatron.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/engines/__pycache__/pytorch.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/exporters/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/exporters/__pycache__/accelerate.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/exporters/__pycache__/axolotl.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/exporters/__pycache__/lightning.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/exporters/__pycache__/manager.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/huggingface/__init__.py +19 -0
- src/gpu_mem_calculator/huggingface/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/huggingface/__pycache__/client.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/huggingface/__pycache__/exceptions.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/huggingface/__pycache__/mapper.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/huggingface/client.py +139 -0
- src/gpu_mem_calculator/huggingface/exceptions.py +25 -0
- src/gpu_mem_calculator/huggingface/mapper.py +167 -0
- src/gpu_mem_calculator/inference/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/base.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/calculator.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/huggingface.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/sglang.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/tensorrt_llm.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/tgi.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/inference/__pycache__/vllm.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- src/gpu_mem_calculator/utils/__pycache__/precision.cpython-312.pyc +0 -0
- web/__pycache__/__init__.cpython-312.pyc +0 -0
- web/__pycache__/app.cpython-312.pyc +0 -0
- web/app.py +99 -1
- web/static/css/styles.css +76 -0
- web/static/js/app.js +178 -0
- web/templates/index.html +45 -17
|
@@ -45,6 +45,7 @@ dependencies = [
|
|
| 45 |
"click>=8.1.0",
|
| 46 |
"pydantic-settings>=2.0.0",
|
| 47 |
"rich>=13.0.0",
|
|
|
|
| 48 |
]
|
| 49 |
|
| 50 |
[project.optional-dependencies]
|
|
|
|
| 45 |
"click>=8.1.0",
|
| 46 |
"pydantic-settings>=2.0.0",
|
| 47 |
"rich>=13.0.0",
|
| 48 |
+
"httpx>=0.15.0",
|
| 49 |
]
|
| 50 |
|
| 51 |
[project.optional-dependencies]
|
|
Binary file (257 Bytes)
|
|
|
|
Binary file (375 Bytes)
|
|
|
|
Binary file (14.2 kB)
|
|
|
|
Binary file (3.35 kB)
|
|
|
|
Binary file (562 Bytes)
|
|
|
|
Binary file (6.51 kB)
|
|
|
|
Binary file (7.29 kB)
|
|
|
|
Binary file (24.4 kB)
|
|
|
|
Binary file (10.8 kB)
|
|
|
|
Binary file (688 Bytes)
|
|
|
|
Binary file (8.07 kB)
|
|
|
|
Binary file (11.2 kB)
|
|
|
|
Binary file (8.07 kB)
|
|
|
|
Binary file (8.5 kB)
|
|
|
|
Binary file (3.73 kB)
|
|
|
|
Binary file (628 Bytes)
|
|
|
|
Binary file (7.81 kB)
|
|
|
|
Binary file (9.07 kB)
|
|
|
|
Binary file (9.41 kB)
|
|
|
|
Binary file (9.29 kB)
|
|
|
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hugging Face Hub integration for fetching model metadata."""
|
| 2 |
+
|
| 3 |
+
from gpu_mem_calculator.huggingface.client import HuggingFaceClient
|
| 4 |
+
from gpu_mem_calculator.huggingface.exceptions import (
|
| 5 |
+
HuggingFaceError,
|
| 6 |
+
InvalidConfigError,
|
| 7 |
+
ModelNotFoundError,
|
| 8 |
+
PrivateModelAccessError,
|
| 9 |
+
)
|
| 10 |
+
from gpu_mem_calculator.huggingface.mapper import HuggingFaceConfigMapper
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"HuggingFaceClient",
|
| 14 |
+
"HuggingFaceConfigMapper",
|
| 15 |
+
"HuggingFaceError",
|
| 16 |
+
"InvalidConfigError",
|
| 17 |
+
"ModelNotFoundError",
|
| 18 |
+
"PrivateModelAccessError",
|
| 19 |
+
]
|
|
Binary file (670 Bytes). View file
|
|
|
|
Binary file (6.87 kB). View file
|
|
|
|
Binary file (1.18 kB). View file
|
|
|
|
Binary file (5.23 kB). View file
|
|
|
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HuggingFace Hub client for fetching model metadata."""
|
| 2 |
+
|
| 3 |
+
from typing import Any, cast
|
| 4 |
+
|
| 5 |
+
import httpx
|
| 6 |
+
|
| 7 |
+
from gpu_mem_calculator.huggingface.exceptions import (
|
| 8 |
+
HuggingFaceError,
|
| 9 |
+
InvalidConfigError,
|
| 10 |
+
ModelNotFoundError,
|
| 11 |
+
PrivateModelAccessError,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class HuggingFaceClient:
|
| 16 |
+
"""Client for interacting with HuggingFace Hub API."""
|
| 17 |
+
|
| 18 |
+
def __init__(self, token: str | None = None, timeout: int = 30):
|
| 19 |
+
"""Initialize HF Hub client.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
token: HF API token for private models (optional)
|
| 23 |
+
timeout: HTTP timeout in seconds
|
| 24 |
+
"""
|
| 25 |
+
self.token = token
|
| 26 |
+
self.timeout = timeout
|
| 27 |
+
self.api_base = "https://huggingface.co/api"
|
| 28 |
+
self.raw_base = "https://huggingface.co"
|
| 29 |
+
|
| 30 |
+
def _get_headers(self) -> dict[str, str]:
|
| 31 |
+
"""Get HTTP headers with optional authentication."""
|
| 32 |
+
headers = {
|
| 33 |
+
"User-Agent": "GPU-Mem-Calculator/0.1.0",
|
| 34 |
+
"Accept": "application/json",
|
| 35 |
+
}
|
| 36 |
+
if self.token:
|
| 37 |
+
headers["Authorization"] = f"Bearer {self.token}"
|
| 38 |
+
return headers
|
| 39 |
+
|
| 40 |
+
async def get_model_info(self, model_id: str) -> dict[str, Any]:
|
| 41 |
+
"""Get model metadata from HF Hub.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
model_id: Model identifier (e.g., "meta-llama/Llama-2-7b-hf")
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Model metadata dict
|
| 48 |
+
|
| 49 |
+
Raises:
|
| 50 |
+
ModelNotFoundError: If model doesn't exist
|
| 51 |
+
PrivateModelAccessError: If authentication required
|
| 52 |
+
HuggingFaceError: For network issues
|
| 53 |
+
"""
|
| 54 |
+
model_id = model_id.strip()
|
| 55 |
+
if not model_id:
|
| 56 |
+
raise ValueError("Model ID cannot be empty")
|
| 57 |
+
|
| 58 |
+
# Sanitize model ID
|
| 59 |
+
model_id = model_id.strip("/")
|
| 60 |
+
|
| 61 |
+
url = f"{self.api_base}/models/{model_id}"
|
| 62 |
+
|
| 63 |
+
async with httpx.AsyncClient(timeout=self.timeout, follow_redirects=True) as client:
|
| 64 |
+
response = await client.get(url, headers=self._get_headers())
|
| 65 |
+
|
| 66 |
+
if response.status_code == 401:
|
| 67 |
+
raise PrivateModelAccessError(
|
| 68 |
+
f"Authentication required for model '{model_id}'. "
|
| 69 |
+
"Please provide a HuggingFace token."
|
| 70 |
+
)
|
| 71 |
+
elif response.status_code == 404:
|
| 72 |
+
raise ModelNotFoundError(f"Model '{model_id}' not found on HuggingFace Hub")
|
| 73 |
+
elif response.status_code != 200:
|
| 74 |
+
raise HuggingFaceError(f"Failed to fetch model info: HTTP {response.status_code}")
|
| 75 |
+
|
| 76 |
+
return cast(dict[str, Any], response.json())
|
| 77 |
+
|
| 78 |
+
async def get_model_config(self, model_id: str) -> dict[str, Any]:
|
| 79 |
+
"""Get model config.json from HF Hub.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
model_id: Model identifier
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
Model configuration dict
|
| 86 |
+
|
| 87 |
+
Raises:
|
| 88 |
+
ModelNotFoundError: If model doesn't exist
|
| 89 |
+
PrivateModelAccessError: If authentication required
|
| 90 |
+
InvalidConfigError: If config.json not found
|
| 91 |
+
HuggingFaceError: For network issues
|
| 92 |
+
"""
|
| 93 |
+
model_id = model_id.strip().strip("/")
|
| 94 |
+
|
| 95 |
+
# Try to fetch config.json from the repository
|
| 96 |
+
url = f"{self.raw_base}/{model_id}/raw/main/config.json"
|
| 97 |
+
|
| 98 |
+
async with httpx.AsyncClient(timeout=self.timeout, follow_redirects=True) as client:
|
| 99 |
+
response = await client.get(url, headers=self._get_headers())
|
| 100 |
+
|
| 101 |
+
if response.status_code == 404:
|
| 102 |
+
# Try alternative branches
|
| 103 |
+
for branch in ["base", "research"]:
|
| 104 |
+
url = f"{self.raw_base}/{model_id}/raw/{branch}/config.json"
|
| 105 |
+
response = await client.get(url, headers=self._get_headers())
|
| 106 |
+
if response.status_code == 200:
|
| 107 |
+
break
|
| 108 |
+
|
| 109 |
+
if response.status_code == 404:
|
| 110 |
+
raise InvalidConfigError(f"config.json not found for model '{model_id}'")
|
| 111 |
+
elif response.status_code == 401:
|
| 112 |
+
raise PrivateModelAccessError(f"Authentication required for model '{model_id}'")
|
| 113 |
+
elif response.status_code != 200:
|
| 114 |
+
raise HuggingFaceError(f"Failed to fetch model config: HTTP {response.status_code}")
|
| 115 |
+
|
| 116 |
+
return cast(dict[str, Any], response.json())
|
| 117 |
+
|
| 118 |
+
async def fetch_model_metadata(self, model_id: str) -> dict[str, Any]:
|
| 119 |
+
"""Fetch complete model metadata including info and config.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
model_id: Model identifier
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
Dictionary with 'model_info' and 'config' keys
|
| 126 |
+
|
| 127 |
+
Raises:
|
| 128 |
+
ModelNotFoundError: If model doesn't exist
|
| 129 |
+
PrivateModelAccessError: If authentication required
|
| 130 |
+
InvalidConfigError: If config.json not found
|
| 131 |
+
HuggingFaceError: For other errors
|
| 132 |
+
"""
|
| 133 |
+
model_info = await self.get_model_info(model_id)
|
| 134 |
+
model_config = await self.get_model_config(model_id)
|
| 135 |
+
|
| 136 |
+
return {
|
| 137 |
+
"model_info": model_info,
|
| 138 |
+
"config": model_config,
|
| 139 |
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Custom exceptions for HuggingFace Hub integration."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class HuggingFaceError(Exception):
|
| 5 |
+
"""Base exception for HuggingFace-related errors."""
|
| 6 |
+
|
| 7 |
+
pass
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ModelNotFoundError(HuggingFaceError):
|
| 11 |
+
"""Raised when a model is not found on HuggingFace Hub."""
|
| 12 |
+
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class PrivateModelAccessError(HuggingFaceError):
|
| 17 |
+
"""Raised when authentication is required for a private model."""
|
| 18 |
+
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class InvalidConfigError(HuggingFaceError):
|
| 23 |
+
"""Raised when model config is invalid or missing required fields."""
|
| 24 |
+
|
| 25 |
+
pass
|
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Map HuggingFace model configs to GPU Memory Calculator ModelConfig."""
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class HuggingFaceConfigMapper:
|
| 7 |
+
"""Map HF config.json to ModelConfig."""
|
| 8 |
+
|
| 9 |
+
# Mapping of HF config field names to ModelConfig fields
|
| 10 |
+
FIELD_MAPPINGS = {
|
| 11 |
+
# Direct mappings
|
| 12 |
+
"hidden_size": "hidden_size",
|
| 13 |
+
"num_hidden_layers": "num_layers",
|
| 14 |
+
"num_attention_heads": "num_attention_heads",
|
| 15 |
+
"vocab_size": "vocab_size",
|
| 16 |
+
"max_position_embeddings": "max_seq_len",
|
| 17 |
+
# Common alternatives
|
| 18 |
+
"n_layer": "num_layers",
|
| 19 |
+
"n_head": "num_attention_heads",
|
| 20 |
+
"n_embd": "hidden_size",
|
| 21 |
+
"n_positions": "max_seq_len",
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
def map_to_model_config(
|
| 25 |
+
self, hf_config: dict[str, Any], model_info: dict[str, Any] | None = None
|
| 26 |
+
) -> dict[str, Any]:
|
| 27 |
+
"""Map HF config to ModelConfig-compatible dictionary.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
hf_config: HuggingFace config.json dict
|
| 31 |
+
model_info: Optional model metadata from HF API
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
Dictionary with keys:
|
| 35 |
+
- 'config': ModelConfig-compatible dict
|
| 36 |
+
- 'missing_fields': List of required fields not found
|
| 37 |
+
- 'found_fields': List of fields that were mapped
|
| 38 |
+
"""
|
| 39 |
+
model_config = {}
|
| 40 |
+
missing_fields = []
|
| 41 |
+
found_fields = []
|
| 42 |
+
|
| 43 |
+
# Extract model name
|
| 44 |
+
if model_info:
|
| 45 |
+
model_config["name"] = model_info.get("modelId", "custom").replace("/", "-")
|
| 46 |
+
else:
|
| 47 |
+
model_config["name"] = "custom-hf-model"
|
| 48 |
+
|
| 49 |
+
# Map simple fields
|
| 50 |
+
for hf_field, our_field in self.FIELD_MAPPINGS.items():
|
| 51 |
+
if hf_field in hf_config:
|
| 52 |
+
value = hf_config[hf_field]
|
| 53 |
+
# Ensure type is int
|
| 54 |
+
if isinstance(value, (int, float)):
|
| 55 |
+
model_config[our_field] = int(value)
|
| 56 |
+
found_fields.append(our_field)
|
| 57 |
+
elif isinstance(value, str):
|
| 58 |
+
# Handle special cases like "32B"
|
| 59 |
+
if our_field == "num_parameters":
|
| 60 |
+
model_config[our_field] = value
|
| 61 |
+
found_fields.append(our_field)
|
| 62 |
+
|
| 63 |
+
# Extract MoE-specific fields
|
| 64 |
+
moe_config = self._extract_moe_config(hf_config)
|
| 65 |
+
if moe_config:
|
| 66 |
+
model_config.update(moe_config)
|
| 67 |
+
found_fields.extend(moe_config.keys())
|
| 68 |
+
|
| 69 |
+
# Handle num_parameters - compute if not provided
|
| 70 |
+
if "num_parameters" not in model_config:
|
| 71 |
+
# Try to compute from architecture
|
| 72 |
+
computed_params = self._estimate_num_parameters(hf_config, model_config)
|
| 73 |
+
if computed_params:
|
| 74 |
+
model_config["num_parameters"] = computed_params
|
| 75 |
+
found_fields.append("num_parameters")
|
| 76 |
+
|
| 77 |
+
# Identify missing fields
|
| 78 |
+
required_fields = [
|
| 79 |
+
"num_parameters",
|
| 80 |
+
"num_layers",
|
| 81 |
+
"hidden_size",
|
| 82 |
+
"num_attention_heads",
|
| 83 |
+
"vocab_size",
|
| 84 |
+
"max_seq_len",
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
for field in required_fields:
|
| 88 |
+
if field not in model_config:
|
| 89 |
+
missing_fields.append(field)
|
| 90 |
+
|
| 91 |
+
return {
|
| 92 |
+
"config": model_config,
|
| 93 |
+
"missing_fields": missing_fields,
|
| 94 |
+
"found_fields": found_fields,
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
def _extract_moe_config(self, hf_config: dict[str, Any]) -> dict[str, Any]:
|
| 98 |
+
"""Extract MoE-specific configuration from HF config.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
hf_config: HuggingFace config
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
Dict with moe_enabled, num_experts, top_k if MoE detected
|
| 105 |
+
"""
|
| 106 |
+
moe_config: dict[str, Any] = {}
|
| 107 |
+
|
| 108 |
+
# Common HF MoE field names
|
| 109 |
+
num_experts_val = hf_config.get(
|
| 110 |
+
"num_local_experts",
|
| 111 |
+
hf_config.get("num_experts", hf_config.get("n_expert")),
|
| 112 |
+
)
|
| 113 |
+
top_k_val = hf_config.get(
|
| 114 |
+
"expert_capacity",
|
| 115 |
+
hf_config.get("num_experts_per_tok", hf_config.get("top_k")),
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# Type narrowing for MoE fields
|
| 119 |
+
if isinstance(num_experts_val, (int, float)) and num_experts_val > 1:
|
| 120 |
+
moe_config["moe_enabled"] = True
|
| 121 |
+
moe_config["num_experts"] = int(num_experts_val)
|
| 122 |
+
|
| 123 |
+
if isinstance(top_k_val, (int, float)):
|
| 124 |
+
moe_config["top_k"] = int(top_k_val)
|
| 125 |
+
else:
|
| 126 |
+
moe_config["top_k"] = 2 # Default
|
| 127 |
+
|
| 128 |
+
return moe_config
|
| 129 |
+
|
| 130 |
+
def _estimate_num_parameters(
|
| 131 |
+
self, hf_config: dict[str, Any], partial_config: dict[str, Any]
|
| 132 |
+
) -> int | None:
|
| 133 |
+
"""Estimate number of parameters if not provided.
|
| 134 |
+
|
| 135 |
+
Args:
|
| 136 |
+
hf_config: Full HF config
|
| 137 |
+
partial_config: Partially built config
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
Estimated parameter count or None
|
| 141 |
+
"""
|
| 142 |
+
# Check if HF provides the count directly
|
| 143 |
+
if "num_parameters" in hf_config:
|
| 144 |
+
return int(hf_config["num_parameters"])
|
| 145 |
+
|
| 146 |
+
# Try to compute from model architecture
|
| 147 |
+
hidden_size = partial_config.get("hidden_size")
|
| 148 |
+
num_layers = partial_config.get("num_layers")
|
| 149 |
+
vocab_size = partial_config.get("vocab_size")
|
| 150 |
+
|
| 151 |
+
# Type narrowing for calculation
|
| 152 |
+
if (
|
| 153 |
+
isinstance(hidden_size, int)
|
| 154 |
+
and isinstance(num_layers, int)
|
| 155 |
+
and isinstance(vocab_size, int)
|
| 156 |
+
):
|
| 157 |
+
# Rough estimate for transformer models
|
| 158 |
+
# Based on: embeddings + transformer layers
|
| 159 |
+
embedding_params = vocab_size * hidden_size
|
| 160 |
+
layer_params = 4 * hidden_size * hidden_size * num_layers # FFN + attention
|
| 161 |
+
total = embedding_params + layer_params
|
| 162 |
+
|
| 163 |
+
# Apply scaling factor for real-world variance
|
| 164 |
+
# (accounting for biases, layernorm, etc.)
|
| 165 |
+
return int(total * 1.2)
|
| 166 |
+
|
| 167 |
+
return None
|
|
Binary file (527 Bytes)
|
|
|
|
Binary file (6.65 kB)
|
|
|
|
Binary file (4.26 kB)
|
|
|
|
Binary file (3.56 kB)
|
|
|
|
Binary file (6.34 kB)
|
|
|
|
Binary file (3.83 kB)
|
|
|
|
Binary file (3.91 kB)
|
|
|
|
Binary file (5.02 kB)
|
|
|
|
Binary file (360 Bytes)
|
|
|
|
Binary file (3.01 kB)
|
|
|
|
Binary file (212 Bytes)
|
|
|
|
Binary file (43.8 kB)
|
|
|
|
@@ -10,7 +10,7 @@ from fastapi import FastAPI, HTTPException
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
| 12 |
from fastapi.templating import Jinja2Templates
|
| 13 |
-
from pydantic import BaseModel, Field, field_validator, model_validator
|
| 14 |
from starlette.requests import Request
|
| 15 |
|
| 16 |
from gpu_mem_calculator.config.presets import load_presets
|
|
@@ -29,6 +29,14 @@ from gpu_mem_calculator.core.models import (
|
|
| 29 |
)
|
| 30 |
from gpu_mem_calculator.core.multinode import MultiNodeCalculator
|
| 31 |
from gpu_mem_calculator.exporters.manager import ExportFormat, ExportManager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
from gpu_mem_calculator.inference.calculator import InferenceMemoryCalculator
|
| 33 |
|
| 34 |
# Configure logging
|
|
@@ -154,6 +162,15 @@ class PresetInfo(BaseModel):
|
|
| 154 |
config: dict[str, Any]
|
| 155 |
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
# Simple in-memory cache for calculation results
|
| 158 |
# In production, use Redis or similar
|
| 159 |
_calculation_cache: dict[str, tuple[MemoryResult, float]] = {} # key -> (result, timestamp)
|
|
@@ -278,6 +295,87 @@ async def get_preset(preset_name: str) -> dict[str, Any]:
|
|
| 278 |
return PRESETS[preset_name].config
|
| 279 |
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
@app.post("/api/calculate")
|
| 282 |
async def calculate_memory(request: CalculateRequest) -> MemoryResult:
|
| 283 |
"""Calculate GPU memory requirements.
|
|
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
| 12 |
from fastapi.templating import Jinja2Templates
|
| 13 |
+
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
| 14 |
from starlette.requests import Request
|
| 15 |
|
| 16 |
from gpu_mem_calculator.config.presets import load_presets
|
|
|
|
| 29 |
)
|
| 30 |
from gpu_mem_calculator.core.multinode import MultiNodeCalculator
|
| 31 |
from gpu_mem_calculator.exporters.manager import ExportFormat, ExportManager
|
| 32 |
+
from gpu_mem_calculator.huggingface import (
|
| 33 |
+
HuggingFaceClient,
|
| 34 |
+
HuggingFaceConfigMapper,
|
| 35 |
+
HuggingFaceError,
|
| 36 |
+
InvalidConfigError,
|
| 37 |
+
ModelNotFoundError,
|
| 38 |
+
PrivateModelAccessError,
|
| 39 |
+
)
|
| 40 |
from gpu_mem_calculator.inference.calculator import InferenceMemoryCalculator
|
| 41 |
|
| 42 |
# Configure logging
|
|
|
|
| 162 |
config: dict[str, Any]
|
| 163 |
|
| 164 |
|
| 165 |
+
class HuggingFaceRequest(BaseModel):
|
| 166 |
+
"""Request for fetching HuggingFace model metadata."""
|
| 167 |
+
|
| 168 |
+
model_config = ConfigDict(protected_namespaces=())
|
| 169 |
+
|
| 170 |
+
model_id: str = Field(description="HuggingFace model ID (e.g., meta-llama/Llama-2-7b-hf)")
|
| 171 |
+
token: str | None = Field(default=None, description="HF token for private models")
|
| 172 |
+
|
| 173 |
+
|
| 174 |
# Simple in-memory cache for calculation results
|
| 175 |
# In production, use Redis or similar
|
| 176 |
_calculation_cache: dict[str, tuple[MemoryResult, float]] = {} # key -> (result, timestamp)
|
|
|
|
| 295 |
return PRESETS[preset_name].config
|
| 296 |
|
| 297 |
|
| 298 |
+
@app.post("/api/hf/fetch")
|
| 299 |
+
async def fetch_huggingface_model(request: HuggingFaceRequest) -> dict[str, Any]:
|
| 300 |
+
"""Fetch model metadata from HuggingFace Hub.
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
request: Request with model_id and optional token
|
| 304 |
+
|
| 305 |
+
Returns:
|
| 306 |
+
Model config with fields filled from HF, plus list of missing fields
|
| 307 |
+
|
| 308 |
+
Raises:
|
| 309 |
+
HTTPException: If model not found, access denied, or invalid config
|
| 310 |
+
"""
|
| 311 |
+
try:
|
| 312 |
+
# Initialize HF client
|
| 313 |
+
client = HuggingFaceClient(token=request.token)
|
| 314 |
+
|
| 315 |
+
# Fetch metadata
|
| 316 |
+
metadata = await client.fetch_model_metadata(request.model_id)
|
| 317 |
+
|
| 318 |
+
# Map to ModelConfig
|
| 319 |
+
mapper = HuggingFaceConfigMapper()
|
| 320 |
+
result = mapper.map_to_model_config(metadata["config"], metadata.get("model_info"))
|
| 321 |
+
|
| 322 |
+
return {
|
| 323 |
+
"model_id": request.model_id,
|
| 324 |
+
"config": result["config"],
|
| 325 |
+
"missing_fields": result["missing_fields"],
|
| 326 |
+
"found_fields": result["found_fields"],
|
| 327 |
+
"warnings": [],
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
except PrivateModelAccessError as e:
|
| 331 |
+
raise HTTPException(
|
| 332 |
+
status_code=401,
|
| 333 |
+
detail={
|
| 334 |
+
"error": "Authentication required",
|
| 335 |
+
"message": str(e),
|
| 336 |
+
"type": "auth_error",
|
| 337 |
+
},
|
| 338 |
+
) from e
|
| 339 |
+
except ModelNotFoundError as e:
|
| 340 |
+
raise HTTPException(
|
| 341 |
+
status_code=404,
|
| 342 |
+
detail={
|
| 343 |
+
"error": "Model not found",
|
| 344 |
+
"message": str(e),
|
| 345 |
+
"type": "not_found",
|
| 346 |
+
},
|
| 347 |
+
) from e
|
| 348 |
+
except InvalidConfigError as e:
|
| 349 |
+
raise HTTPException(
|
| 350 |
+
status_code=422,
|
| 351 |
+
detail={
|
| 352 |
+
"error": "Invalid model configuration",
|
| 353 |
+
"message": str(e),
|
| 354 |
+
"type": "invalid_config",
|
| 355 |
+
},
|
| 356 |
+
) from e
|
| 357 |
+
except HuggingFaceError as e:
|
| 358 |
+
logger.error(f"HuggingFace error: {str(e)}")
|
| 359 |
+
raise HTTPException(
|
| 360 |
+
status_code=500,
|
| 361 |
+
detail={
|
| 362 |
+
"error": "HuggingFace API error",
|
| 363 |
+
"message": str(e),
|
| 364 |
+
"type": "api_error",
|
| 365 |
+
},
|
| 366 |
+
) from e
|
| 367 |
+
except Exception as e:
|
| 368 |
+
logger.error(f"Unexpected error fetching HF model: {str(e)}", exc_info=True)
|
| 369 |
+
raise HTTPException(
|
| 370 |
+
status_code=500,
|
| 371 |
+
detail={
|
| 372 |
+
"error": "Internal server error",
|
| 373 |
+
"message": "An unexpected error occurred",
|
| 374 |
+
"type": "server_error",
|
| 375 |
+
},
|
| 376 |
+
) from e
|
| 377 |
+
|
| 378 |
+
|
| 379 |
@app.post("/api/calculate")
|
| 380 |
async def calculate_memory(request: CalculateRequest) -> MemoryResult:
|
| 381 |
"""Calculate GPU memory requirements.
|
|
@@ -530,3 +530,79 @@ header h1 {
|
|
| 530 |
.formula-references a:hover {
|
| 531 |
text-decoration: underline;
|
| 532 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
.formula-references a:hover {
|
| 531 |
text-decoration: underline;
|
| 532 |
}
|
| 533 |
+
|
| 534 |
+
/* Hugging Face Integration */
|
| 535 |
+
.preset-row {
|
| 536 |
+
display: flex;
|
| 537 |
+
gap: 10px;
|
| 538 |
+
align-items: center;
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
.preset-row select {
|
| 542 |
+
flex: 1;
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
.btn-tertiary {
|
| 546 |
+
background-color: #ffd700;
|
| 547 |
+
color: #1e293b;
|
| 548 |
+
border: none;
|
| 549 |
+
padding: 10px 16px;
|
| 550 |
+
border-radius: 6px;
|
| 551 |
+
font-size: 0.9rem;
|
| 552 |
+
font-weight: 600;
|
| 553 |
+
cursor: pointer;
|
| 554 |
+
transition: all 0.2s;
|
| 555 |
+
white-space: nowrap;
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
.btn-tertiary:hover {
|
| 559 |
+
background-color: #f0c000;
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.btn-tertiary:active {
|
| 563 |
+
transform: scale(0.98);
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
.hf-fetch-panel {
|
| 567 |
+
background: #f0f9ff;
|
| 568 |
+
border: 1px solid #2563eb;
|
| 569 |
+
border-radius: 8px;
|
| 570 |
+
padding: 20px;
|
| 571 |
+
margin-top: 15px;
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
.hf-fetch-panel .form-group {
|
| 575 |
+
margin-bottom: 15px;
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
.hf-fetch-panel .help-text {
|
| 579 |
+
display: block;
|
| 580 |
+
font-size: 0.85rem;
|
| 581 |
+
color: var(--text-secondary);
|
| 582 |
+
margin-top: 5px;
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
.loading-message {
|
| 586 |
+
color: var(--primary-color);
|
| 587 |
+
font-style: italic;
|
| 588 |
+
padding: 10px;
|
| 589 |
+
text-align: center;
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
.error-message {
|
| 593 |
+
background-color: #fee;
|
| 594 |
+
color: #c00;
|
| 595 |
+
padding: 12px;
|
| 596 |
+
border-radius: 6px;
|
| 597 |
+
border: 1px solid #fcc;
|
| 598 |
+
margin-top: 10px;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
.success-message {
|
| 602 |
+
background-color: #efe;
|
| 603 |
+
color: #0a0;
|
| 604 |
+
padding: 12px;
|
| 605 |
+
border-radius: 6px;
|
| 606 |
+
border: 1px solid #cfc;
|
| 607 |
+
margin-top: 10px;
|
| 608 |
+
}
|
|
@@ -32,6 +32,19 @@ class GPUMemCalculator {
|
|
| 32 |
}
|
| 33 |
});
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
// Batch size slider sync
|
| 36 |
const batchSizeInput = document.getElementById('batch-size');
|
| 37 |
const batchSizeSlider = document.getElementById('batch-size-slider');
|
|
@@ -475,6 +488,171 @@ class GPUMemCalculator {
|
|
| 475 |
}
|
| 476 |
}
|
| 477 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
applyConfig(config) {
|
| 479 |
// Set flag to prevent auto-calculation during config load
|
| 480 |
this.isApplyingConfig = true;
|
|
|
|
| 32 |
}
|
| 33 |
});
|
| 34 |
|
| 35 |
+
// Hugging Face fetch functionality
|
| 36 |
+
document.getElementById('fetch-hf-btn').addEventListener('click', () => {
|
| 37 |
+
this.showHFPFetchPanel();
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
document.getElementById('hf-fetch-submit').addEventListener('click', () => {
|
| 41 |
+
this.fetchFromHuggingFace();
|
| 42 |
+
});
|
| 43 |
+
|
| 44 |
+
document.getElementById('hf-fetch-cancel').addEventListener('click', () => {
|
| 45 |
+
this.hideHFFetchPanel();
|
| 46 |
+
});
|
| 47 |
+
|
| 48 |
// Batch size slider sync
|
| 49 |
const batchSizeInput = document.getElementById('batch-size');
|
| 50 |
const batchSizeSlider = document.getElementById('batch-size-slider');
|
|
|
|
| 488 |
}
|
| 489 |
}
|
| 490 |
|
| 491 |
+
showHFPFetchPanel() {
|
| 492 |
+
const panel = document.getElementById('hf-fetch-panel');
|
| 493 |
+
panel.style.display = 'block';
|
| 494 |
+
|
| 495 |
+
// Auto-focus model ID input
|
| 496 |
+
document.getElementById('hf-model-id').focus();
|
| 497 |
+
|
| 498 |
+
// Clear previous messages
|
| 499 |
+
document.getElementById('hf-error').style.display = 'none';
|
| 500 |
+
document.getElementById('hf-success').style.display = 'none';
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
hideHFFetchPanel() {
|
| 504 |
+
const panel = document.getElementById('hf-fetch-panel');
|
| 505 |
+
panel.style.display = 'none';
|
| 506 |
+
|
| 507 |
+
// Clear inputs
|
| 508 |
+
document.getElementById('hf-model-id').value = '';
|
| 509 |
+
document.getElementById('hf-token').value = '';
|
| 510 |
+
|
| 511 |
+
// Clear messages
|
| 512 |
+
document.getElementById('hf-loading').style.display = 'none';
|
| 513 |
+
document.getElementById('hf-error').style.display = 'none';
|
| 514 |
+
document.getElementById('hf-success').style.display = 'none';
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
async fetchFromHuggingFace() {
|
| 518 |
+
const modelId = document.getElementById('hf-model-id').value.trim();
|
| 519 |
+
const token = document.getElementById('hf-token').value.trim();
|
| 520 |
+
|
| 521 |
+
if (!modelId) {
|
| 522 |
+
document.getElementById('hf-error').textContent = 'Please enter a model ID';
|
| 523 |
+
document.getElementById('hf-error').style.display = 'block';
|
| 524 |
+
return;
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
const loadingEl = document.getElementById('hf-loading');
|
| 528 |
+
const errorEl = document.getElementById('hf-error');
|
| 529 |
+
const successEl = document.getElementById('hf-success');
|
| 530 |
+
const submitBtn = document.getElementById('hf-fetch-submit');
|
| 531 |
+
|
| 532 |
+
// Show loading state
|
| 533 |
+
loadingEl.style.display = 'block';
|
| 534 |
+
errorEl.style.display = 'none';
|
| 535 |
+
successEl.style.display = 'none';
|
| 536 |
+
submitBtn.disabled = true;
|
| 537 |
+
|
| 538 |
+
try {
|
| 539 |
+
const response = await fetch(`${this.apiBase}/hf/fetch`, {
|
| 540 |
+
method: 'POST',
|
| 541 |
+
headers: {
|
| 542 |
+
'Content-Type': 'application/json',
|
| 543 |
+
},
|
| 544 |
+
body: JSON.stringify({
|
| 545 |
+
model_id: modelId,
|
| 546 |
+
token: token || null,
|
| 547 |
+
}),
|
| 548 |
+
});
|
| 549 |
+
|
| 550 |
+
const result = await response.json();
|
| 551 |
+
|
| 552 |
+
if (!response.ok) {
|
| 553 |
+
throw new Error(result.detail?.message || result.detail || 'Failed to fetch model');
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
// Apply fetched config
|
| 557 |
+
this.applyHuggingFaceConfig(result.config);
|
| 558 |
+
|
| 559 |
+
// Show success message
|
| 560 |
+
let successMsg = `Successfully fetched ${modelId}`;
|
| 561 |
+
if (result.missing_fields.length > 0) {
|
| 562 |
+
const missingList = result.missing_fields.join(', ');
|
| 563 |
+
successMsg += `. Please provide manually: ${missingList}`;
|
| 564 |
+
// Highlight missing fields
|
| 565 |
+
result.missing_fields.forEach(field => {
|
| 566 |
+
const input = document.getElementById(this.getFieldIdFromConfigField(field));
|
| 567 |
+
if (input) {
|
| 568 |
+
input.style.borderColor = '#f59e0b';
|
| 569 |
+
input.style.borderWidth = '2px';
|
| 570 |
+
}
|
| 571 |
+
});
|
| 572 |
+
} else {
|
| 573 |
+
successMsg += '. All fields populated!';
|
| 574 |
+
}
|
| 575 |
+
successEl.textContent = successMsg;
|
| 576 |
+
successEl.style.display = 'block';
|
| 577 |
+
|
| 578 |
+
// Hide panel after 3 seconds
|
| 579 |
+
setTimeout(() => {
|
| 580 |
+
this.hideHFFetchPanel();
|
| 581 |
+
}, 3000);
|
| 582 |
+
|
| 583 |
+
} catch (error) {
|
| 584 |
+
errorEl.textContent = `Error: ${error.message}`;
|
| 585 |
+
errorEl.style.display = 'block';
|
| 586 |
+
} finally {
|
| 587 |
+
loadingEl.style.display = 'none';
|
| 588 |
+
submitBtn.disabled = false;
|
| 589 |
+
}
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
applyHuggingFaceConfig(config) {
|
| 593 |
+
// Set flag to prevent auto-calculation
|
| 594 |
+
this.isApplyingConfig = true;
|
| 595 |
+
|
| 596 |
+
// Apply model fields
|
| 597 |
+
if (config.name) {
|
| 598 |
+
document.getElementById('model-name').value = config.name;
|
| 599 |
+
}
|
| 600 |
+
if (config.num_parameters) {
|
| 601 |
+
document.getElementById('num-params').value = config.num_parameters;
|
| 602 |
+
}
|
| 603 |
+
if (config.num_layers) {
|
| 604 |
+
document.getElementById('num-layers').value = config.num_layers;
|
| 605 |
+
}
|
| 606 |
+
if (config.hidden_size) {
|
| 607 |
+
document.getElementById('hidden-size').value = config.hidden_size;
|
| 608 |
+
}
|
| 609 |
+
if (config.num_attention_heads) {
|
| 610 |
+
document.getElementById('num-heads').value = config.num_attention_heads;
|
| 611 |
+
}
|
| 612 |
+
if (config.vocab_size) {
|
| 613 |
+
document.getElementById('vocab-size').value = config.vocab_size;
|
| 614 |
+
}
|
| 615 |
+
if (config.max_seq_len) {
|
| 616 |
+
document.getElementById('seq-len').value = config.max_seq_len;
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
// Apply MoE configuration
|
| 620 |
+
if (config.moe_enabled) {
|
| 621 |
+
document.getElementById('moe-enabled').checked = true;
|
| 622 |
+
this.toggleMoEFields(true);
|
| 623 |
+
|
| 624 |
+
if (config.num_experts) {
|
| 625 |
+
document.getElementById('num-experts').value = config.num_experts;
|
| 626 |
+
}
|
| 627 |
+
if (config.top_k) {
|
| 628 |
+
document.getElementById('top-k').value = config.top_k;
|
| 629 |
+
}
|
| 630 |
+
this.updateMoEDisplay();
|
| 631 |
+
} else {
|
| 632 |
+
document.getElementById('moe-enabled').checked = false;
|
| 633 |
+
this.toggleMoEFields(false);
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
// Re-enable auto-calculation and trigger calculation
|
| 637 |
+
setTimeout(() => {
|
| 638 |
+
this.isApplyingConfig = false;
|
| 639 |
+
this.calculateMemory();
|
| 640 |
+
}, 100);
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
+
getFieldIdFromConfigField(fieldName) {
|
| 644 |
+
// Map config field names to input element IDs
|
| 645 |
+
const fieldMap = {
|
| 646 |
+
'num_parameters': 'num-params',
|
| 647 |
+
'num_layers': 'num-layers',
|
| 648 |
+
'hidden_size': 'hidden-size',
|
| 649 |
+
'num_attention_heads': 'num-heads',
|
| 650 |
+
'vocab_size': 'vocab-size',
|
| 651 |
+
'max_seq_len': 'seq-len',
|
| 652 |
+
};
|
| 653 |
+
return fieldMap[fieldName] || null;
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
applyConfig(config) {
|
| 657 |
// Set flag to prevent auto-calculation during config load
|
| 658 |
this.isApplyingConfig = true;
|
|
@@ -32,23 +32,51 @@
|
|
| 32 |
<h3>Model Settings</h3>
|
| 33 |
<div class="form-group">
|
| 34 |
<label for="preset-select">Preset Model:</label>
|
| 35 |
-
<
|
| 36 |
-
<
|
| 37 |
-
|
| 38 |
-
<
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
<
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
</div>
|
| 53 |
|
| 54 |
<div class="form-grid">
|
|
|
|
| 32 |
<h3>Model Settings</h3>
|
| 33 |
<div class="form-group">
|
| 34 |
<label for="preset-select">Preset Model:</label>
|
| 35 |
+
<div class="preset-row">
|
| 36 |
+
<select id="preset-select">
|
| 37 |
+
<option value="custom">Custom</option>
|
| 38 |
+
<optgroup label="Dense Models">
|
| 39 |
+
<option value="llama2-7b">LLaMA 2 7B</option>
|
| 40 |
+
<option value="llama2-13b">LLaMA 2 13B</option>
|
| 41 |
+
<option value="llama2-70b">LLaMA 2 70B</option>
|
| 42 |
+
<option value="gpt3-175b">GPT-3 175B</option>
|
| 43 |
+
</optgroup>
|
| 44 |
+
<optgroup label="MoE (Mixture of Experts) Models">
|
| 45 |
+
<option value="glm-4.7-355b">GLM-4.7 355B (MoE) ⭐ Latest</option>
|
| 46 |
+
<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) ⭐ Air</option>
|
| 47 |
+
<option value="glm-4-9b">GLM-4 9B (MoE)</option>
|
| 48 |
+
<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
|
| 49 |
+
<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
|
| 50 |
+
<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
|
| 51 |
+
</optgroup>
|
| 52 |
+
</select>
|
| 53 |
+
<button id="fetch-hf-btn" class="btn-tertiary" title="Fetch from HuggingFace Hub" type="button">
|
| 54 |
+
<span>🤗 Fetch from HF</span>
|
| 55 |
+
</button>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<!-- HF Fetch Panel (hidden by default) -->
|
| 60 |
+
<div id="hf-fetch-panel" style="display: none;" class="hf-fetch-panel">
|
| 61 |
+
<div class="form-group">
|
| 62 |
+
<label for="hf-model-id">HuggingFace Model ID:</label>
|
| 63 |
+
<input type="text" id="hf-model-id" placeholder="e.g., meta-llama/Llama-2-7b-hf" aria-describedby="hf-model-help">
|
| 64 |
+
<span id="hf-model-help" class="help-text">Enter the HuggingFace model repository ID (e.g., meta-llama/Llama-2-7b-hf)</span>
|
| 65 |
+
</div>
|
| 66 |
+
<div class="form-group">
|
| 67 |
+
<label for="hf-token">HF Token (optional, for private models):</label>
|
| 68 |
+
<input type="password" id="hf-token" placeholder="hf_xxxxxxxxxxxx" aria-describedby="hf-token-help">
|
| 69 |
+
<span id="hf-token-help" class="help-text">Leave empty for public models, provide token for gated/private models</span>
|
| 70 |
+
</div>
|
| 71 |
+
<div class="button-group">
|
| 72 |
+
<button id="hf-fetch-submit" class="btn-primary" type="button">Fetch Model</button>
|
| 73 |
+
<button id="hf-fetch-cancel" class="btn-secondary" type="button">Cancel</button>
|
| 74 |
+
</div>
|
| 75 |
+
<div id="hf-loading" style="display: none;" class="loading-message">
|
| 76 |
+
<p>Fetching model from HuggingFace Hub...</p>
|
| 77 |
+
</div>
|
| 78 |
+
<div id="hf-error" style="display: none;" class="error-message" aria-live="poloice"></div>
|
| 79 |
+
<div id="hf-success" style="display: none;" class="success-message" aria-live="police"></div>
|
| 80 |
</div>
|
| 81 |
|
| 82 |
<div class="form-grid">
|