Spaces:

seanpedrickcase
/

topic_modelling

Running

seanpedrickcase commited on Oct 21

Commit

9a4b420

1 Parent(s): f42e3d1

Test update main requirements file for huggingface compatibility

Files changed (4) hide show

funcs/topic_core_funcs.py CHANGED Viewed

@@ -32,9 +32,9 @@ today_rev = datetime.now().strftime("%Y%m%d")
 # Load embeddings
 if RUNNING_ON_AWS=="0":
-    embeddings_name = "mixedbread-ai/mxbai-embed-xsmall-v1" #"mixedbread-ai/mxbai-embed-large-v1"
 else:
-    embeddings_name = "mixedbread-ai/mxbai-embed-xsmall-v1"
 # LLM model used for representing topics
 hf_model_name = "bartowski/Llama-3.2-3B-Instruct-GGUF" #"bartowski/Phi-3.1-mini-128k-instruct-GGUF"
@@ -700,6 +700,8 @@ def visualise_topics(
         try:
             topics_vis = visualize_documents_custom(topic_model, docs, hover_labels = label_list, reduced_embeddings=reduced_embeddings, hide_annotations=True, hide_document_hover=False, custom_labels=True, sample = sample_prop, width= 1200, height = 750)
             topics_vis_name = output_folder + data_file_name_no_ext + '_' + 'vis_topic_docs_' + today_rev + '.html'
             topics_vis.write_html(topics_vis_name)
             output_list.append(topics_vis_name)

 # Load embeddings
 if RUNNING_ON_AWS=="0":
+    embeddings_name = "mixedbread-ai/mxbai-embed-large-v1" #"mixedbread-ai/mxbai-embed-xsmall-v1" #"mixedbread-ai/mxbai-embed-large-v1"
 else:
+    embeddings_name = "mixedbread-ai/mxbai-embed-large-v1" #"mixedbread-ai/mxbai-embed-xsmall-v1"
 # LLM model used for representing topics
 hf_model_name = "bartowski/Llama-3.2-3B-Instruct-GGUF" #"bartowski/Phi-3.1-mini-128k-instruct-GGUF"
         try:
             topics_vis = visualize_documents_custom(topic_model, docs, hover_labels = label_list, reduced_embeddings=reduced_embeddings, hide_annotations=True, hide_document_hover=False, custom_labels=True, sample = sample_prop, width= 1200, height = 750)
+            #topics_vis = topic_model.visualize_documents(docs, reduced_embeddings=reduced_embeddings, hide_annotations=True, hide_document_hover=False, custom_labels=True, sample = sample_prop, width= 1200, height = 750)
             topics_vis_name = output_folder + data_file_name_no_ext + '_' + 'vis_topic_docs_' + today_rev + '.html'
             topics_vis.write_html(topics_vis_name)
             output_list.append(topics_vis_name)

requirements.txt CHANGED Viewed

@@ -1,25 +1,27 @@
-hdbscan==0.8.40
-pandas==2.2.3
-plotly==5.24.1
-scikit-learn==1.5.2
-umap-learn==0.5.7
-gradio==5.8.0
-boto3==1.35.71
-transformers==4.46.3
-accelerate==1.1.1
-bertopic==0.16.4
-spacy==3.8.0
 en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
-pyarrow
-openpyxl
-Faker
-presidio_analyzer==2.2.355
-presidio_anonymizer==2.2.355
-scipy
-polars
-sentence-transformers==3.3.1
-torch==2.4.1 --extra-index-url https://download.pytorch.org/whl/cu121
-llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 # Specify exact llama_cpp wheel for huggingface compatibility
-#https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.90-cu121/llama_cpp_python-0.2.90-cp310-cp310-linux_x86_64.whl
-numpy==1.26.4

+#hdbscan==0.8.40
+pandas==2.3.3
+plotly==6.3.1
+scikit-learn==1.7.2
+umap-learn==0.5.9.post2
+gradio==5.49.1
+boto3==1.40.55
+transformers==4.57.1
+accelerate==1.11.0
+bertopic==0.17.3
+spacy==3.8.7
 en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
+pyarrow==21.0.0
+openpyxl==3.1.5
+Faker==37.11.0
+presidio_analyzer==2.2.360
+presidio_anonymizer==2.2.360
+scipy==1.16.2
+polars==1.34.0
+sentence-transformers==5.1.1
+torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124
+#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl # Exact wheel specified for windows
+#llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 # Specify exact llama_cpp wheel for huggingface compatibility
+#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu121/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
+spaces==0.42.1
+numpy==2.2.6

requirements_aws.txt CHANGED Viewed

@@ -15,4 +15,5 @@ presidio_anonymizer==2.2.35
 scipy
 polars
 transformers==4.46.3
 #numpy==1.26.4

 scipy
 polars
 transformers==4.46.3
+spaces
 #numpy==1.26.4

requirements_gpu.txt CHANGED Viewed

@@ -1,15 +1,15 @@
 hdbscan==0.8.40
-pandas==2.2.3
 plotly==5.24.1
 scikit-learn==1.5.2
 umap-learn==0.5.7
-gradio==5.6.0
-boto3==1.35.64
-transformers==4.46.3
 accelerate==1.1.1
 torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
 bertopic==0.16.4
-spacy==3.8.0
 en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
 pyarrow
 openpyxl
@@ -18,7 +18,8 @@ presidio_analyzer==2.2.355
 presidio_anonymizer==2.2.355
 scipy
 polars
-llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
 sentence-transformers==3.3.1
 numpy==1.26.4

 hdbscan==0.8.40
+pandas==2.3.0
 plotly==5.24.1
 scikit-learn==1.5.2
 umap-learn==0.5.7
+gradio==5.34.2
+boto3==1.38.43
+transformers==4.51.1
 accelerate==1.1.1
 torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
 bertopic==0.16.4
+spacy==3.8.4
 en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz
 pyarrow
 openpyxl
 presidio_anonymizer==2.2.355
 scipy
 polars
+llama-cpp-python==0.3.4 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
 sentence-transformers==3.3.1
+spaces
 numpy==1.26.4