Kyryll Kochkin commited on
Commit
d0fe290
1 Parent(s): 0f97387

finally added the new model in hopes this will work

Browse files
Files changed (1) hide show
  1. app/core/model_registry.py +19 -0
app/core/model_registry.py CHANGED
@@ -58,6 +58,25 @@ class ModelSpec:
58
 
59
 
60
  _DEFAULT_MODELS: List[ModelSpec] = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ModelSpec(
62
  name="GPT3-dev-350m-2805",
63
  hf_repo="k050506koch/GPT3-dev-350m-2805",
 
58
 
59
 
60
  _DEFAULT_MODELS: List[ModelSpec] = [
61
+ ModelSpec(
62
+ name="GPT4-dev-177M-1511",
63
+ hf_repo="k050506koch/GPT4-dev-177M-1511",
64
+ dtype="float16",
65
+ device="auto",
66
+ max_context_tokens=512,
67
+ metadata=ModelMetadata(
68
+ description="117M parameter GPT-4-inspired checkpoint released on 15-11-2025.",
69
+ parameter_count="117M",
70
+ training_datasets="HuggingFaceFW/fineweb",
71
+ training_steps="78,000 steps 路 sequence length 512 路 batch size 192 路 Lion optimizer",
72
+ evaluation="29.30% MMLU (author reported)",
73
+ notes="Custom GPT-4-insopired architecture that requires trust_remote_code when loading.",
74
+ sources=(
75
+ "https://huggingface.co/k050506koch/GPT4-dev-177M-1511",
76
+ ),
77
+ sources=("https://huggingface.co/k050506koch/GPT4-dev-177M-1511",),
78
+ ),
79
+ ),
80
  ModelSpec(
81
  name="GPT3-dev-350m-2805",
82
  hf_repo="k050506koch/GPT3-dev-350m-2805",