Spaces:

stanley-00
/

slm-testing

Running

stanley-00 commited on 2 days ago

Commit

32f15bb

verified ·

1 Parent(s): a817aa1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,7 +25,8 @@ MODELS = [
     'MihaiPopa-1/CinnabarLM-1.5M-Base', 'Harley-ml/Dillionv2-1.3M', 'Eclipse-Senpai/KeyLM-75M',
     'SupraLabs/Supra-Mini-v6-1M', 'AxiomicLabs/GPT-S-1.4M', 'GODELEV/Archaea-74M',
     'Sandroeth/cali-0.1B', 'veyra-ai/veyra3-5m-base', 'veyra-ai/veyra-30m-base-5b-tokens',
-    'ThingAI/Quark-50m', 'ThingAI/Quark-135m', 'HuggingFaceTB/SmolLM2-135M-Instruct'
 ]
 # Global class to safely manage the loaded model and tokenizer in memory
@@ -52,6 +53,7 @@ def load_new_model(model_id):
     # Clear old model from memory
     model_manager.model = None
     model_manager.tokenizer = None
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
@@ -64,9 +66,9 @@ def load_new_model(model_id):
         model_manager.tokenizer = tokenizer
         model_manager.model = model
-        return f"Successfully loaded {model_id} on {model_manager.device.upper()}"
     except Exception as e:
-        return f"Error loading model: {str(e)}"
 def run_inference(user_prompt, max_tokens, temperature, top_k, top_p, rep_penalty, ngram_size, do_sample):
     """Generates text via streaming generator."""

     'MihaiPopa-1/CinnabarLM-1.5M-Base', 'Harley-ml/Dillionv2-1.3M', 'Eclipse-Senpai/KeyLM-75M',
     'SupraLabs/Supra-Mini-v6-1M', 'AxiomicLabs/GPT-S-1.4M', 'GODELEV/Archaea-74M',
     'Sandroeth/cali-0.1B', 'veyra-ai/veyra3-5m-base', 'veyra-ai/veyra-30m-base-5b-tokens',
+    'ThingAI/Quark-50m', 'ThingAI/Quark-135m', 'HuggingFaceTB/SmolLM2-135M-Instruct',
+    'Aravindan/awesome-gpt-2-coder', 'Qwen/Qwen2.5-Coder-0.5B'
 ]
 # Global class to safely manage the loaded model and tokenizer in memory
     # Clear old model from memory
     model_manager.model = None
     model_manager.tokenizer = None
+    yield f"Loading {model_id}..."
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         model_manager.tokenizer = tokenizer
         model_manager.model = model
+        yield f"Successfully loaded {model_id} on {model_manager.device.upper()}"
     except Exception as e:
+        yield f"Error loading model: {str(e)}"
 def run_inference(user_prompt, max_tokens, temperature, top_k, top_p, rep_penalty, ngram_size, do_sample):
     """Generates text via streaming generator."""