Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,8 @@ MODELS = [
|
|
| 25 |
'MihaiPopa-1/CinnabarLM-1.5M-Base', 'Harley-ml/Dillionv2-1.3M', 'Eclipse-Senpai/KeyLM-75M',
|
| 26 |
'SupraLabs/Supra-Mini-v6-1M', 'AxiomicLabs/GPT-S-1.4M', 'GODELEV/Archaea-74M',
|
| 27 |
'Sandroeth/cali-0.1B', 'veyra-ai/veyra3-5m-base', 'veyra-ai/veyra-30m-base-5b-tokens',
|
| 28 |
-
'ThingAI/Quark-50m', 'ThingAI/Quark-135m', 'HuggingFaceTB/SmolLM2-135M-Instruct'
|
|
|
|
| 29 |
]
|
| 30 |
|
| 31 |
# Global class to safely manage the loaded model and tokenizer in memory
|
|
@@ -52,6 +53,7 @@ def load_new_model(model_id):
|
|
| 52 |
# Clear old model from memory
|
| 53 |
model_manager.model = None
|
| 54 |
model_manager.tokenizer = None
|
|
|
|
| 55 |
gc.collect()
|
| 56 |
if torch.cuda.is_available():
|
| 57 |
torch.cuda.empty_cache()
|
|
@@ -64,9 +66,9 @@ def load_new_model(model_id):
|
|
| 64 |
model_manager.tokenizer = tokenizer
|
| 65 |
model_manager.model = model
|
| 66 |
|
| 67 |
-
|
| 68 |
except Exception as e:
|
| 69 |
-
|
| 70 |
|
| 71 |
def run_inference(user_prompt, max_tokens, temperature, top_k, top_p, rep_penalty, ngram_size, do_sample):
|
| 72 |
"""Generates text via streaming generator."""
|
|
|
|
| 25 |
'MihaiPopa-1/CinnabarLM-1.5M-Base', 'Harley-ml/Dillionv2-1.3M', 'Eclipse-Senpai/KeyLM-75M',
|
| 26 |
'SupraLabs/Supra-Mini-v6-1M', 'AxiomicLabs/GPT-S-1.4M', 'GODELEV/Archaea-74M',
|
| 27 |
'Sandroeth/cali-0.1B', 'veyra-ai/veyra3-5m-base', 'veyra-ai/veyra-30m-base-5b-tokens',
|
| 28 |
+
'ThingAI/Quark-50m', 'ThingAI/Quark-135m', 'HuggingFaceTB/SmolLM2-135M-Instruct',
|
| 29 |
+
'Aravindan/awesome-gpt-2-coder', 'Qwen/Qwen2.5-Coder-0.5B'
|
| 30 |
]
|
| 31 |
|
| 32 |
# Global class to safely manage the loaded model and tokenizer in memory
|
|
|
|
| 53 |
# Clear old model from memory
|
| 54 |
model_manager.model = None
|
| 55 |
model_manager.tokenizer = None
|
| 56 |
+
yield f"Loading {model_id}..."
|
| 57 |
gc.collect()
|
| 58 |
if torch.cuda.is_available():
|
| 59 |
torch.cuda.empty_cache()
|
|
|
|
| 66 |
model_manager.tokenizer = tokenizer
|
| 67 |
model_manager.model = model
|
| 68 |
|
| 69 |
+
yield f"Successfully loaded {model_id} on {model_manager.device.upper()}"
|
| 70 |
except Exception as e:
|
| 71 |
+
yield f"Error loading model: {str(e)}"
|
| 72 |
|
| 73 |
def run_inference(user_prompt, max_tokens, temperature, top_k, top_p, rep_penalty, ngram_size, do_sample):
|
| 74 |
"""Generates text via streaming generator."""
|