Spaces:
Running
Running
Commit
·
9a4b420
1
Parent(s):
f42e3d1
Test update main requirements file for huggingface compatibility
Browse files- funcs/topic_core_funcs.py +4 -2
- requirements.txt +25 -23
- requirements_aws.txt +1 -0
- requirements_gpu.txt +7 -6
funcs/topic_core_funcs.py
CHANGED
|
@@ -32,9 +32,9 @@ today_rev = datetime.now().strftime("%Y%m%d")
|
|
| 32 |
|
| 33 |
# Load embeddings
|
| 34 |
if RUNNING_ON_AWS=="0":
|
| 35 |
-
embeddings_name = "mixedbread-ai/mxbai-embed-xsmall-v1" #"mixedbread-ai/mxbai-embed-large-v1"
|
| 36 |
else:
|
| 37 |
-
embeddings_name = "mixedbread-ai/mxbai-embed-xsmall-v1"
|
| 38 |
|
| 39 |
# LLM model used for representing topics
|
| 40 |
hf_model_name = "bartowski/Llama-3.2-3B-Instruct-GGUF" #"bartowski/Phi-3.1-mini-128k-instruct-GGUF"
|
|
@@ -700,6 +700,8 @@ def visualise_topics(
|
|
| 700 |
try:
|
| 701 |
topics_vis = visualize_documents_custom(topic_model, docs, hover_labels = label_list, reduced_embeddings=reduced_embeddings, hide_annotations=True, hide_document_hover=False, custom_labels=True, sample = sample_prop, width= 1200, height = 750)
|
| 702 |
|
|
|
|
|
|
|
| 703 |
topics_vis_name = output_folder + data_file_name_no_ext + '_' + 'vis_topic_docs_' + today_rev + '.html'
|
| 704 |
topics_vis.write_html(topics_vis_name)
|
| 705 |
output_list.append(topics_vis_name)
|
|
|
|
| 32 |
|
| 33 |
# Load embeddings
|
| 34 |
if RUNNING_ON_AWS=="0":
|
| 35 |
+
embeddings_name = "mixedbread-ai/mxbai-embed-large-v1" #"mixedbread-ai/mxbai-embed-xsmall-v1" #"mixedbread-ai/mxbai-embed-large-v1"
|
| 36 |
else:
|
| 37 |
+
embeddings_name = "mixedbread-ai/mxbai-embed-large-v1" #"mixedbread-ai/mxbai-embed-xsmall-v1"
|
| 38 |
|
| 39 |
# LLM model used for representing topics
|
| 40 |
hf_model_name = "bartowski/Llama-3.2-3B-Instruct-GGUF" #"bartowski/Phi-3.1-mini-128k-instruct-GGUF"
|
|
|
|
| 700 |
try:
|
| 701 |
topics_vis = visualize_documents_custom(topic_model, docs, hover_labels = label_list, reduced_embeddings=reduced_embeddings, hide_annotations=True, hide_document_hover=False, custom_labels=True, sample = sample_prop, width= 1200, height = 750)
|
| 702 |
|
| 703 |
+
#topics_vis = topic_model.visualize_documents(docs, reduced_embeddings=reduced_embeddings, hide_annotations=True, hide_document_hover=False, custom_labels=True, sample = sample_prop, width= 1200, height = 750)
|
| 704 |
+
|
| 705 |
topics_vis_name = output_folder + data_file_name_no_ext + '_' + 'vis_topic_docs_' + today_rev + '.html'
|
| 706 |
topics_vis.write_html(topics_vis_name)
|
| 707 |
output_list.append(topics_vis_name)
|
requirements.txt
CHANGED
|
@@ -1,25 +1,27 @@
|
|
| 1 |
-
hdbscan==0.8.40
|
| 2 |
-
pandas==2.
|
| 3 |
-
plotly==
|
| 4 |
-
scikit-learn==1.
|
| 5 |
-
umap-learn==0.5.
|
| 6 |
-
gradio==5.
|
| 7 |
-
boto3==1.
|
| 8 |
-
transformers==4.
|
| 9 |
-
accelerate==1.
|
| 10 |
-
bertopic==0.
|
| 11 |
-
spacy==3.8.
|
| 12 |
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
|
| 13 |
-
pyarrow
|
| 14 |
-
openpyxl
|
| 15 |
-
Faker
|
| 16 |
-
presidio_analyzer==2.2.
|
| 17 |
-
presidio_anonymizer==2.2.
|
| 18 |
-
scipy
|
| 19 |
-
polars
|
| 20 |
-
sentence-transformers==
|
| 21 |
-
torch==2.
|
| 22 |
-
llama-cpp-python
|
|
|
|
| 23 |
# Specify exact llama_cpp wheel for huggingface compatibility
|
| 24 |
-
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.
|
| 25 |
-
|
|
|
|
|
|
| 1 |
+
#hdbscan==0.8.40
|
| 2 |
+
pandas==2.3.3
|
| 3 |
+
plotly==6.3.1
|
| 4 |
+
scikit-learn==1.7.2
|
| 5 |
+
umap-learn==0.5.9.post2
|
| 6 |
+
gradio==5.49.1
|
| 7 |
+
boto3==1.40.55
|
| 8 |
+
transformers==4.57.1
|
| 9 |
+
accelerate==1.11.0
|
| 10 |
+
bertopic==0.17.3
|
| 11 |
+
spacy==3.8.7
|
| 12 |
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
|
| 13 |
+
pyarrow==21.0.0
|
| 14 |
+
openpyxl==3.1.5
|
| 15 |
+
Faker==37.11.0
|
| 16 |
+
presidio_analyzer==2.2.360
|
| 17 |
+
presidio_anonymizer==2.2.360
|
| 18 |
+
scipy==1.16.2
|
| 19 |
+
polars==1.34.0
|
| 20 |
+
sentence-transformers==5.1.1
|
| 21 |
+
torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124
|
| 22 |
+
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl # Exact wheel specified for windows
|
| 23 |
+
#llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
|
| 24 |
# Specify exact llama_cpp wheel for huggingface compatibility
|
| 25 |
+
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu121/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
|
| 26 |
+
spaces==0.42.1
|
| 27 |
+
numpy==2.2.6
|
requirements_aws.txt
CHANGED
|
@@ -15,4 +15,5 @@ presidio_anonymizer==2.2.35
|
|
| 15 |
scipy
|
| 16 |
polars
|
| 17 |
transformers==4.46.3
|
|
|
|
| 18 |
#numpy==1.26.4
|
|
|
|
| 15 |
scipy
|
| 16 |
polars
|
| 17 |
transformers==4.46.3
|
| 18 |
+
spaces
|
| 19 |
#numpy==1.26.4
|
requirements_gpu.txt
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
hdbscan==0.8.40
|
| 2 |
-
pandas==2.
|
| 3 |
plotly==5.24.1
|
| 4 |
scikit-learn==1.5.2
|
| 5 |
umap-learn==0.5.7
|
| 6 |
-
gradio==5.
|
| 7 |
-
boto3==1.
|
| 8 |
-
transformers==4.
|
| 9 |
accelerate==1.1.1
|
| 10 |
torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
|
| 11 |
bertopic==0.16.4
|
| 12 |
-
spacy==3.8.
|
| 13 |
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
|
| 14 |
pyarrow
|
| 15 |
openpyxl
|
|
@@ -18,7 +18,8 @@ presidio_analyzer==2.2.355
|
|
| 18 |
presidio_anonymizer==2.2.355
|
| 19 |
scipy
|
| 20 |
polars
|
| 21 |
-
llama-cpp-python==0.
|
| 22 |
sentence-transformers==3.3.1
|
|
|
|
| 23 |
numpy==1.26.4
|
| 24 |
|
|
|
|
| 1 |
hdbscan==0.8.40
|
| 2 |
+
pandas==2.3.0
|
| 3 |
plotly==5.24.1
|
| 4 |
scikit-learn==1.5.2
|
| 5 |
umap-learn==0.5.7
|
| 6 |
+
gradio==5.34.2
|
| 7 |
+
boto3==1.38.43
|
| 8 |
+
transformers==4.51.1
|
| 9 |
accelerate==1.1.1
|
| 10 |
torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
|
| 11 |
bertopic==0.16.4
|
| 12 |
+
spacy==3.8.4
|
| 13 |
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
|
| 14 |
pyarrow
|
| 15 |
openpyxl
|
|
|
|
| 18 |
presidio_anonymizer==2.2.355
|
| 19 |
scipy
|
| 20 |
polars
|
| 21 |
+
llama-cpp-python==0.3.4 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
|
| 22 |
sentence-transformers==3.3.1
|
| 23 |
+
spaces
|
| 24 |
numpy==1.26.4
|
| 25 |
|