Spaces:
Running
Running
Commit
·
8216d8c
1
Parent(s):
9e84863
Debugged reference to random_seed in vectorisation and reference to torch in representation_model.py
Browse files- funcs/embeddings.py +2 -1
- funcs/topic_core_funcs.py +1 -1
funcs/embeddings.py
CHANGED
|
@@ -25,7 +25,7 @@ else:
|
|
| 25 |
|
| 26 |
|
| 27 |
@spaces.GPU
|
| 28 |
-
def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1") -> np.ndarray:
|
| 29 |
"""
|
| 30 |
Create or load embeddings for the given documents.
|
| 31 |
|
|
@@ -35,6 +35,7 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
|
|
| 35 |
embeddings_out (np.ndarray): Array to store the embeddings.
|
| 36 |
embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No").
|
| 37 |
high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No").
|
|
|
|
| 38 |
|
| 39 |
Returns:
|
| 40 |
np.ndarray: The generated or loaded embeddings.
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
@spaces.GPU
|
| 28 |
+
def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1", random_seed:int=42) -> np.ndarray:
|
| 29 |
"""
|
| 30 |
Create or load embeddings for the given documents.
|
| 31 |
|
|
|
|
| 35 |
embeddings_out (np.ndarray): Array to store the embeddings.
|
| 36 |
embeddings_super_compress (str): Option to super compress embeddings ("Yes" or "No").
|
| 37 |
high_quality_mode_opt (str): Option for high quality mode ("Yes" or "No").
|
| 38 |
+
random_seed (int): Random seed for vectorisation
|
| 39 |
|
| 40 |
Returns:
|
| 41 |
np.ndarray: The generated or loaded embeddings.
|
funcs/topic_core_funcs.py
CHANGED
|
@@ -326,7 +326,7 @@ def extract_topics(
|
|
| 326 |
# UMAP model uses Bertopic defaults
|
| 327 |
umap_model = UMAP(n_neighbors=umap_n_neighbours, n_components=5, min_dist=umap_min_dist, metric=umap_metric, low_memory=True, random_state=random_seed)
|
| 328 |
|
| 329 |
-
embeddings_out, embedding_model = make_or_load_embeddings(docs, file_list, embeddings_out, embeddings_super_compress, high_quality_mode, embeddings_name)
|
| 330 |
|
| 331 |
# If you want to save your embedding files
|
| 332 |
if return_intermediate_files == "Yes":
|
|
|
|
| 326 |
# UMAP model uses Bertopic defaults
|
| 327 |
umap_model = UMAP(n_neighbors=umap_n_neighbours, n_components=5, min_dist=umap_min_dist, metric=umap_metric, low_memory=True, random_state=random_seed)
|
| 328 |
|
| 329 |
+
embeddings_out, embedding_model = make_or_load_embeddings(docs, file_list, embeddings_out, embeddings_super_compress, high_quality_mode, embeddings_name, random_seed)
|
| 330 |
|
| 331 |
# If you want to save your embedding files
|
| 332 |
if return_intermediate_files == "Yes":
|