Spaces:
Runtime error
Runtime error
kwabs22
commited on
Commit
·
276706e
1
Parent(s):
3976009
image model gated
Browse files- leveraging_machine_learning.py +31 -31
leveraging_machine_learning.py
CHANGED
|
@@ -20,12 +20,12 @@ modelnames = ["stvlynn/Gemma-2-2b-Chinese-it", "unsloth/Llama-3.2-1B-Instruct",
|
|
| 20 |
"Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "HuggingFaceTB/SmolLM-135M-Instruct", "microsoft/Phi-3-mini-4k-instruct", "Groq/Llama-3-Groq-8B-Tool-Use", "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
|
| 21 |
"SpectraSuite/TriLM_3.9B_Unpacked", "h2oai/h2o-danube3-500m-chat", "OuteAI/Lite-Mistral-150M-v2-Instruct", "Zyphra/Zamba2-1.2B", "anthracite-org/magnum-v2-4b", ]
|
| 22 |
|
| 23 |
-
imagemodelnames = ["black-forest-labs/FLUX.1-schnell"]
|
| 24 |
|
| 25 |
current_model_index = 0
|
| 26 |
current_image_model_index = 0
|
| 27 |
modelname = modelnames[current_model_index]
|
| 28 |
-
imagemodelname = imagemodelnames[current_image_model_index]
|
| 29 |
lastmodelnameinloadfunction = None
|
| 30 |
lastimagemodelnameinloadfunction = None
|
| 31 |
|
|
@@ -35,7 +35,7 @@ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
| 35 |
# Initialize model and tokenizer as global variables
|
| 36 |
model = None
|
| 37 |
tokenizer = None
|
| 38 |
-
flux_pipe = None
|
| 39 |
|
| 40 |
# Dictionary to store loaded models
|
| 41 |
loaded_models = {}
|
|
@@ -90,42 +90,42 @@ def load_model(model_name):
|
|
| 90 |
f"Tokenizer size: {get_size_str(tokenizer_size)}, "
|
| 91 |
f"GPU memory used: {get_size_str(memory_used)}")
|
| 92 |
|
| 93 |
-
def load_image_model(imagemodelname):
|
| 94 |
-
|
| 95 |
|
| 96 |
-
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
-
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
|
| 130 |
|
| 131 |
def clear_all_models():
|
|
@@ -160,7 +160,7 @@ def loaded_model_list():
|
|
| 160 |
|
| 161 |
# Initial model load
|
| 162 |
load_model(modelname)
|
| 163 |
-
load_image_model(imagemodelname)
|
| 164 |
|
| 165 |
# Create embeddings for the knowledge base
|
| 166 |
knowledge_base_embeddings = embedding_model.encode([doc["content"] for doc in knowledge_base])
|
|
|
|
| 20 |
"Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "HuggingFaceTB/SmolLM-135M-Instruct", "microsoft/Phi-3-mini-4k-instruct", "Groq/Llama-3-Groq-8B-Tool-Use", "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
|
| 21 |
"SpectraSuite/TriLM_3.9B_Unpacked", "h2oai/h2o-danube3-500m-chat", "OuteAI/Lite-Mistral-150M-v2-Instruct", "Zyphra/Zamba2-1.2B", "anthracite-org/magnum-v2-4b", ]
|
| 22 |
|
| 23 |
+
# imagemodelnames = ["black-forest-labs/FLUX.1-schnell"]
|
| 24 |
|
| 25 |
current_model_index = 0
|
| 26 |
current_image_model_index = 0
|
| 27 |
modelname = modelnames[current_model_index]
|
| 28 |
+
# imagemodelname = imagemodelnames[current_image_model_index]
|
| 29 |
lastmodelnameinloadfunction = None
|
| 30 |
lastimagemodelnameinloadfunction = None
|
| 31 |
|
|
|
|
| 35 |
# Initialize model and tokenizer as global variables
|
| 36 |
model = None
|
| 37 |
tokenizer = None
|
| 38 |
+
# flux_pipe = None
|
| 39 |
|
| 40 |
# Dictionary to store loaded models
|
| 41 |
loaded_models = {}
|
|
|
|
| 90 |
f"Tokenizer size: {get_size_str(tokenizer_size)}, "
|
| 91 |
f"GPU memory used: {get_size_str(memory_used)}")
|
| 92 |
|
| 93 |
+
# def load_image_model(imagemodelname):
|
| 94 |
+
# global flux_pipe, lastimagemodelnameinloadfunction, loaded_models
|
| 95 |
|
| 96 |
+
# print(f"Loading image model: {imagemodelname}")
|
| 97 |
|
| 98 |
+
# # Record initial GPU memory usage
|
| 99 |
+
# initial_memory = torch.cuda.memory_allocated()
|
| 100 |
|
| 101 |
+
# if 'flux_pipe' in globals() and flux_pipe is not None:
|
| 102 |
+
# flux_pipe = None
|
| 103 |
|
| 104 |
+
# torch.cuda.empty_cache()
|
| 105 |
+
# gc.collect()
|
| 106 |
|
| 107 |
+
# flux_pipe = FluxPipeline.from_pretrained(imagemodelname, torch_dtype=torch.bfloat16)
|
| 108 |
+
# flux_pipe.enable_model_cpu_offload()
|
| 109 |
+
# model_size = sum(p.numel() * p.element_size() for p in flux_pipe.transformer.parameters())
|
| 110 |
+
# #tokenizer_size = 0 # FLUX doesn't use a separate tokenizer
|
| 111 |
+
# loaded_models[imagemodelname] = flux_pipe
|
| 112 |
|
| 113 |
+
# # Calculate memory usage
|
| 114 |
+
# final_memory = torch.cuda.memory_allocated()
|
| 115 |
+
# memory_used = final_memory - initial_memory
|
| 116 |
|
| 117 |
+
# loaded_models[imagemodelname] = [str(time.time()), memory_used]
|
| 118 |
|
| 119 |
+
# lastimagemodelnameinloadfunction = (imagemodelname, model_size) #, tokenizer_size)
|
| 120 |
+
# print(f"Model and tokenizer {imagemodelname} loaded successfully")
|
| 121 |
+
# print(f"Model size: {get_size_str(model_size)}")
|
| 122 |
+
# #print(f"Tokenizer size: {get_size_str(tokenizer_size)}")
|
| 123 |
+
# print(f"GPU memory used: {get_size_str(memory_used)}")
|
| 124 |
|
| 125 |
+
# return (f"Model and tokenizer {imagemodelname} loaded successfully. "
|
| 126 |
+
# f"Model size: {get_size_str(model_size)}, "
|
| 127 |
+
# #f"Tokenizer size: {get_size_str(tokenizer_size)}, "
|
| 128 |
+
# f"GPU memory used: {get_size_str(memory_used)}")
|
| 129 |
|
| 130 |
|
| 131 |
def clear_all_models():
|
|
|
|
| 160 |
|
| 161 |
# Initial model load
|
| 162 |
load_model(modelname)
|
| 163 |
+
# load_image_model(imagemodelname)
|
| 164 |
|
| 165 |
# Create embeddings for the knowledge base
|
| 166 |
knowledge_base_embeddings = embedding_model.encode([doc["content"] for doc in knowledge_base])
|