# topicmodelingapp.spec import sys import os import site from pathlib import Path from PyInstaller.utils.hooks import collect_all from PyInstaller.building.datastruct import Tree # Add the script's directory to the path for local imports sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0]))) # --- Dynamic Path Logic (Makes the SPEC file generic) --- def get_site_packages_path(): """Tries to find the site-packages directory of the current environment.""" try: # Tries the standard site.getsitepackages method return Path(site.getsitepackages()[0]) except Exception: # Fallback for complex environments like Conda return Path(sys.prefix) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages' SP_PATH_STR = str(get_site_packages_path()) + os.sep def get_model_path(model_name): """Gets the absolute path to an installed spaCy model.""" spacy_path = get_site_packages_path() model_dir = spacy_path / model_name if not model_dir.exists(): raise FileNotFoundError( f"spaCy model '{model_name}' not found at expected location: {model_dir}" ) return str(model_dir) # --- Core Dependency Collection (C-Extension Fix) --- # Use collect_all. The output is a tuple: (datas [0], binaries [1], hiddenimports [2], excludes [3], pathex [4]) spacy_data = collect_all('spacy') numpy_data = collect_all('numpy') sklearn_data = collect_all('sklearn') hdbscan_data = collect_all('hdbscan') scipy_data = collect_all('scipy') # 1. Consolidate ALL hidden imports (index 2 - module names/strings) all_collected_imports = [] all_collected_imports.extend(spacy_data[2]) all_collected_imports.extend(numpy_data[2]) all_collected_imports.extend(sklearn_data[2]) all_collected_imports.extend(hdbscan_data[2]) all_collected_imports.extend(scipy_data[2]) # 2. Consolidate all collected data (index 0 - tuples) all_collected_datas = [] all_collected_datas.extend(spacy_data[0]) all_collected_datas.extend(numpy_data[0]) all_collected_datas.extend(sklearn_data[0]) all_collected_datas.extend(hdbscan_data[0]) all_collected_datas.extend(scipy_data[0]) # 3. Consolidate all collected binaries (index 1 - tuples of C-extensions/dylibs) all_collected_binaries = [] all_collected_binaries.extend(spacy_data[1]) all_collected_binaries.extend(numpy_data[1]) all_collected_binaries.extend(sklearn_data[1]) all_collected_binaries.extend(hdbscan_data[1]) all_collected_binaries.extend(scipy_data[1]) # --- Analysis Setup --- a = Analysis( # 1. Explicitly list all your source files ['run.py', 'app.py', 'text_preprocessor.py', 'topic_modeling.py', 'gini_calculator.py', 'narrative_similarity.py', 'resource_path.py', 'topic_evolution.py'], pathex=['.'], # *** CRITICAL FIX: Use the collected binaries list for C extensions/dylibs *** binaries=all_collected_binaries, # 2. The final datas list: collected tuples + manual tuples datas=all_collected_datas + [ # Streamlit metadata (Dynamic path and wildcard) (SP_PATH_STR + 'streamlit*.dist-info', 'streamlit_metadata'), (SP_PATH_STR + 'streamlit/static', 'streamlit/static'), # Application resources (os.path.abspath('app.py'), '.'), ('readme.md', '.'), ('requirements.txt', '.'), ], # 3. The final hiddenimports list: collected strings + manual strings hiddenimports=all_collected_imports + [ 'charset_normalizer', 'streamlit.runtime.scriptrunner.magic_funcs', 'spacy.parts_of_speech', 'scipy.spatial.ckdtree', 'thinc.extra.wrappers', 'streamlit.web.cli', ], hookspath=[], hooksconfig={}, runtime_hooks=[], # Add all collected excludes to the main excludes list excludes=['tkinter', 'matplotlib.pyplot'] + spacy_data[3] + numpy_data[3] + sklearn_data[3] + hdbscan_data[3] + scipy_data[3], noarchive=False, optimize=0, ) # 4. Explicitly include the actual spaCy model directories using Tree a.datas.extend( Tree(get_model_path('en_core_web_sm'), prefix='en_core_web_sm') ) a.datas.extend( Tree(get_model_path('xx_ent_wiki_sm'), prefix='xx_ent_wiki_sm') ) pyz = PYZ(a.pure) exe = EXE( pyz, a.scripts, a.binaries, a.datas, [], name='TopicModelingApp', debug=False, bootloader_ignore_signals=False, strip=False, upx=True, upx_exclude=[], runtime_tmpdir=None, console=True, disable_windowed_traceback=False, argv_emulation=False, target_arch=None, codesign_identity=None, entitlements_file=None, )