seanpedrickcase commited on
Commit
02721f3
·
1 Parent(s): cb7a4c9

Fix on returning GPU tensors to main function after embedding with zeroGPU. Representation model put under ZeroGPU spaces

Browse files
funcs/embeddings.py CHANGED
@@ -3,16 +3,12 @@ import time
3
  import numpy as np
4
  import os
5
 
6
-
7
  from sentence_transformers import SentenceTransformer
8
  from sklearn.pipeline import make_pipeline
9
  from sklearn.decomposition import TruncatedSVD
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
  from funcs.helper_functions import GPU_SPACE_DURATION
12
 
13
-
14
-
15
-
16
  # If you want to disable cuda for testing purposes
17
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
18
 
@@ -121,9 +117,22 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
121
  embeddings_out = np.round(embeddings_out, 3)
122
  embeddings_out *= 100
123
 
 
 
 
 
 
 
 
124
  return embeddings_out, embedding_model
125
 
126
  else:
127
  print("Found pre-loaded embeddings.")
 
 
 
 
 
 
128
 
129
  return embeddings_out, embedding_model
 
3
  import numpy as np
4
  import os
5
 
 
6
  from sentence_transformers import SentenceTransformer
7
  from sklearn.pipeline import make_pipeline
8
  from sklearn.decomposition import TruncatedSVD
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from funcs.helper_functions import GPU_SPACE_DURATION
11
 
 
 
 
12
  # If you want to disable cuda for testing purposes
13
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
14
 
 
117
  embeddings_out = np.round(embeddings_out, 3)
118
  embeddings_out *= 100
119
 
120
+ # Move model to CPU before returning to avoid CUDA initialization in main process
121
+ if high_quality_mode_opt == "Yes" and hasattr(embedding_model, 'to'):
122
+ try:
123
+ embedding_model = embedding_model.to('cpu')
124
+ except:
125
+ pass # If moving to CPU fails, continue anyway
126
+
127
  return embeddings_out, embedding_model
128
 
129
  else:
130
  print("Found pre-loaded embeddings.")
131
+
132
+ # Ensure embeddings are on CPU even when loaded from file
133
+ if hasattr(embeddings_out, 'cpu'):
134
+ embeddings_out = embeddings_out.cpu().numpy()
135
+ elif not isinstance(embeddings_out, np.ndarray):
136
+ embeddings_out = np.array(embeddings_out)
137
 
138
  return embeddings_out, embedding_model
funcs/representation_model.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from bertopic.representation import LlamaCPP
3
 
4
  from pydantic import BaseModel
@@ -9,7 +10,7 @@ from gradio import Warning
9
  from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
10
  from funcs.embeddings import torch_device
11
  from funcs.prompts import phi3_prompt, phi3_start
12
- from funcs.helper_functions import get_or_create_env_var
13
 
14
  chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
15
  chosen_start_tag = phi3_start #open_hermes_start # stablelm_start
@@ -38,7 +39,7 @@ print(f'The value of USE_GPU is {USE_GPU}')
38
  if USE_GPU == "1":
39
  print("Using GPU for representation functions")
40
  torch_device = "gpu"
41
- print("Cuda version installed is: ", version.cuda)
42
  high_quality_mode = "Yes"
43
  os.system("nvidia-smi")
44
  else:
@@ -156,6 +157,7 @@ def find_model_file(hf_model_name: str, hf_model_file: str, search_folder: str,
156
 
157
  return found_file
158
 
 
159
  def create_representation_model(representation_type: str, llm_config: dict, hf_model_name: str, hf_model_file: str, chosen_start_tag: str, low_resource_mode: bool) -> dict:
160
  """
161
  Creates a representation model based on the specified type and configuration.
 
1
  import os
2
+ import spaces
3
  from bertopic.representation import LlamaCPP
4
 
5
  from pydantic import BaseModel
 
10
  from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
11
  from funcs.embeddings import torch_device
12
  from funcs.prompts import phi3_prompt, phi3_start
13
+ from funcs.helper_functions import get_or_create_env_var, GPU_SPACE_DURATION
14
 
15
  chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
16
  chosen_start_tag = phi3_start #open_hermes_start # stablelm_start
 
39
  if USE_GPU == "1":
40
  print("Using GPU for representation functions")
41
  torch_device = "gpu"
42
+ #print("Cuda version installed is: ", version.cuda)
43
  high_quality_mode = "Yes"
44
  os.system("nvidia-smi")
45
  else:
 
157
 
158
  return found_file
159
 
160
+ @spaces.GPU(duration=GPU_SPACE_DURATION)
161
  def create_representation_model(representation_type: str, llm_config: dict, hf_model_name: str, hf_model_file: str, chosen_start_tag: str, low_resource_mode: bool) -> dict:
162
  """
163
  Creates a representation model based on the specified type and configuration.