Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -119,9 +119,9 @@ def repo_exists(repo_id: str) -> bool:
|
|
| 119 |
def get_name(models: list[pd.Series], username: str, version=0) -> str:
|
| 120 |
model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
| 121 |
+ models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
| 122 |
-
+ "-
|
| 123 |
if version > 0:
|
| 124 |
-
model_name = model_name.split("-")[0] + f"-v{version}-
|
| 125 |
|
| 126 |
if repo_exists(f"{username}/{model_name}"):
|
| 127 |
get_name(models, username, version+1)
|
|
@@ -144,74 +144,50 @@ def get_license(models: list[pd.Series]) -> str:
|
|
| 144 |
|
| 145 |
|
| 146 |
def create_config(models: list[pd.Series]) -> str:
|
| 147 |
-
slerp_config = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
models:
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
- model: mlabonne/OrpoLlama-3-8B
|
| 156 |
-
parameters:
|
| 157 |
-
density: 0.55
|
| 158 |
-
weight: 0.05
|
| 159 |
merge_method: dare_ties
|
| 160 |
-
base_model:
|
| 161 |
parameters:
|
| 162 |
-
|
| 163 |
-
dtype:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
"""
|
| 165 |
-
dare_config =
|
| 166 |
-
stock_config = slerp_config
|
| 167 |
-
# slerp_config = f"""
|
| 168 |
-
# slices:
|
| 169 |
-
# - sources:
|
| 170 |
-
# - model: {models[0]["Model"]}
|
| 171 |
-
# layer_range: [0, 32]
|
| 172 |
-
# - model: {models[1]["Model"]}
|
| 173 |
-
# layer_range: [0, 32]
|
| 174 |
-
# merge_method: slerp
|
| 175 |
-
# base_model: {models[0]["Model"]}
|
| 176 |
-
# parameters:
|
| 177 |
-
# t:
|
| 178 |
-
# - filter: self_attn
|
| 179 |
-
# value: [0, 0.5, 0.3, 0.7, 1]
|
| 180 |
-
# - filter: mlp
|
| 181 |
-
# value: [1, 0.5, 0.7, 0.3, 0]
|
| 182 |
-
# - value: 0.5
|
| 183 |
-
# dtype: bfloat16
|
| 184 |
-
# random_seed: 0
|
| 185 |
-
# """
|
| 186 |
-
# dare_config = f"""
|
| 187 |
-
# models:
|
| 188 |
-
# - model: mlabonne/Meta-Llama-3-8B
|
| 189 |
-
# # No parameters necessary for base model
|
| 190 |
-
# - model: {models[0]["Model"]}
|
| 191 |
-
# parameters:
|
| 192 |
-
# density: 0.53
|
| 193 |
-
# weight: 0.5
|
| 194 |
-
# - model: {models[1]["Model"]}
|
| 195 |
-
# parameters:
|
| 196 |
-
# density: 0.53
|
| 197 |
-
# weight: 0.5
|
| 198 |
-
# merge_method: dare_ties
|
| 199 |
-
# base_model: mlabonne/Meta-Llama-3-8B
|
| 200 |
-
# parameters:
|
| 201 |
-
# int8_mask: true
|
| 202 |
-
# dtype: bfloat16
|
| 203 |
-
# random_seed: 0
|
| 204 |
-
# """
|
| 205 |
-
# stock_config = f"""
|
| 206 |
-
# models:
|
| 207 |
-
# - model: mlabonne/Meta-Llama-3-8B
|
| 208 |
-
# - model: {models[0]["Model"]}
|
| 209 |
-
# - model: {models[1]["Model"]}
|
| 210 |
-
# merge_method: model_stock
|
| 211 |
-
# base_model: mlabonne/Meta-Llama-3-8B
|
| 212 |
-
# dtype: bfloat16
|
| 213 |
-
# """
|
| 214 |
-
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
|
| 215 |
|
| 216 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
| 217 |
f.write(yaml_config)
|
|
|
|
| 119 |
def get_name(models: list[pd.Series], username: str, version=0) -> str:
|
| 120 |
model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
| 121 |
+ models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
| 122 |
+
+ "-8B"
|
| 123 |
if version > 0:
|
| 124 |
+
model_name = model_name.split("-")[0] + f"-v{version}-8B"
|
| 125 |
|
| 126 |
if repo_exists(f"{username}/{model_name}"):
|
| 127 |
get_name(models, username, version+1)
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
def create_config(models: list[pd.Series]) -> str:
|
| 147 |
+
slerp_config = f"""
|
| 148 |
+
slices:
|
| 149 |
+
- sources:
|
| 150 |
+
- model: {models[0]["Model"]}
|
| 151 |
+
layer_range: [0, 32]
|
| 152 |
+
- model: {models[1]["Model"]}
|
| 153 |
+
layer_range: [0, 32]
|
| 154 |
+
merge_method: slerp
|
| 155 |
+
base_model: {models[0]["Model"]}
|
| 156 |
+
parameters:
|
| 157 |
+
t:
|
| 158 |
+
- filter: self_attn
|
| 159 |
+
value: [0, 0.5, 0.3, 0.7, 1]
|
| 160 |
+
- filter: mlp
|
| 161 |
+
value: [1, 0.5, 0.7, 0.3, 0]
|
| 162 |
+
- value: 0.5
|
| 163 |
+
dtype: bfloat16
|
| 164 |
+
random_seed: 0
|
| 165 |
+
"""
|
| 166 |
+
dare_config = f"""
|
| 167 |
models:
|
| 168 |
+
- model: {models[0]["Model"]}
|
| 169 |
+
# No parameters necessary for base model
|
| 170 |
+
- model: {models[1]["Model"]}
|
| 171 |
+
parameters:
|
| 172 |
+
density: 0.53
|
| 173 |
+
weight: 0.6
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
merge_method: dare_ties
|
| 175 |
+
base_model: {models[0]["Model"]}
|
| 176 |
parameters:
|
| 177 |
+
int8_mask: true
|
| 178 |
+
dtype: bfloat16
|
| 179 |
+
random_seed: 0
|
| 180 |
+
"""
|
| 181 |
+
stock_config = f"""
|
| 182 |
+
models:
|
| 183 |
+
- model: mistralai/Mistral-7B-v0.1
|
| 184 |
+
- model: {models[0]["Model"]}
|
| 185 |
+
- model: {models[1]["Model"]}
|
| 186 |
+
merge_method: model_stock
|
| 187 |
+
base_model: mistralai/Mistral-7B-v0.1
|
| 188 |
+
dtype: bfloat16
|
| 189 |
"""
|
| 190 |
+
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.3, 0.6, 0.1], k=1)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
| 193 |
f.write(yaml_config)
|