bertopic / TopicModelingApp.spec
Mars203020's picture
Upload 17 files
b7b041e verified
# topicmodelingapp.spec
import sys
import os
import site
from pathlib import Path
from PyInstaller.utils.hooks import collect_all
from PyInstaller.building.datastruct import Tree
# Add the script's directory to the path for local imports
sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0])))
# --- Dynamic Path Logic (Makes the SPEC file generic) ---
def get_site_packages_path():
"""Tries to find the site-packages directory of the current environment."""
try:
# Tries the standard site.getsitepackages method
return Path(site.getsitepackages()[0])
except Exception:
# Fallback for complex environments like Conda
return Path(sys.prefix) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages'
SP_PATH_STR = str(get_site_packages_path()) + os.sep
def get_model_path(model_name):
"""Gets the absolute path to an installed spaCy model."""
spacy_path = get_site_packages_path()
model_dir = spacy_path / model_name
if not model_dir.exists():
raise FileNotFoundError(
f"spaCy model '{model_name}' not found at expected location: {model_dir}"
)
return str(model_dir)
# --- Core Dependency Collection (C-Extension Fix) ---
# Use collect_all. The output is a tuple: (datas [0], binaries [1], hiddenimports [2], excludes [3], pathex [4])
spacy_data = collect_all('spacy')
numpy_data = collect_all('numpy')
sklearn_data = collect_all('sklearn')
hdbscan_data = collect_all('hdbscan')
scipy_data = collect_all('scipy')
# 1. Consolidate ALL hidden imports (index 2 - module names/strings)
all_collected_imports = []
all_collected_imports.extend(spacy_data[2])
all_collected_imports.extend(numpy_data[2])
all_collected_imports.extend(sklearn_data[2])
all_collected_imports.extend(hdbscan_data[2])
all_collected_imports.extend(scipy_data[2])
# 2. Consolidate all collected data (index 0 - tuples)
all_collected_datas = []
all_collected_datas.extend(spacy_data[0])
all_collected_datas.extend(numpy_data[0])
all_collected_datas.extend(sklearn_data[0])
all_collected_datas.extend(hdbscan_data[0])
all_collected_datas.extend(scipy_data[0])
# 3. Consolidate all collected binaries (index 1 - tuples of C-extensions/dylibs)
all_collected_binaries = []
all_collected_binaries.extend(spacy_data[1])
all_collected_binaries.extend(numpy_data[1])
all_collected_binaries.extend(sklearn_data[1])
all_collected_binaries.extend(hdbscan_data[1])
all_collected_binaries.extend(scipy_data[1])
# --- Analysis Setup ---
a = Analysis(
# 1. Explicitly list all your source files
['run.py', 'app.py', 'text_preprocessor.py', 'topic_modeling.py', 'gini_calculator.py', 'narrative_similarity.py', 'resource_path.py', 'topic_evolution.py'],
pathex=['.'],
# *** CRITICAL FIX: Use the collected binaries list for C extensions/dylibs ***
binaries=all_collected_binaries,
# 2. The final datas list: collected tuples + manual tuples
datas=all_collected_datas + [
# Streamlit metadata (Dynamic path and wildcard)
(SP_PATH_STR + 'streamlit*.dist-info', 'streamlit_metadata'),
(SP_PATH_STR + 'streamlit/static', 'streamlit/static'),
# Application resources
(os.path.abspath('app.py'), '.'),
('readme.md', '.'),
('requirements.txt', '.'),
],
# 3. The final hiddenimports list: collected strings + manual strings
hiddenimports=all_collected_imports + [
'charset_normalizer',
'streamlit.runtime.scriptrunner.magic_funcs',
'spacy.parts_of_speech',
'scipy.spatial.ckdtree',
'thinc.extra.wrappers',
'streamlit.web.cli',
],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
# Add all collected excludes to the main excludes list
excludes=['tkinter', 'matplotlib.pyplot'] + spacy_data[3] + numpy_data[3] + sklearn_data[3] + hdbscan_data[3] + scipy_data[3],
noarchive=False,
optimize=0,
)
# 4. Explicitly include the actual spaCy model directories using Tree
a.datas.extend(
Tree(get_model_path('en_core_web_sm'), prefix='en_core_web_sm')
)
a.datas.extend(
Tree(get_model_path('xx_ent_wiki_sm'), prefix='xx_ent_wiki_sm')
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
name='TopicModelingApp',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)