Spaces:
Sleeping
Sleeping
| # topicmodelingapp.spec | |
| import sys | |
| import os | |
| import site | |
| from pathlib import Path | |
| from PyInstaller.utils.hooks import collect_all | |
| from PyInstaller.building.datastruct import Tree | |
| # Add the script's directory to the path for local imports | |
| sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0]))) | |
| # --- Dynamic Path Logic (Makes the SPEC file generic) --- | |
| def get_site_packages_path(): | |
| """Tries to find the site-packages directory of the current environment.""" | |
| try: | |
| # Tries the standard site.getsitepackages method | |
| return Path(site.getsitepackages()[0]) | |
| except Exception: | |
| # Fallback for complex environments like Conda | |
| return Path(sys.prefix) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages' | |
| SP_PATH_STR = str(get_site_packages_path()) + os.sep | |
| def get_model_path(model_name): | |
| """Gets the absolute path to an installed spaCy model.""" | |
| spacy_path = get_site_packages_path() | |
| model_dir = spacy_path / model_name | |
| if not model_dir.exists(): | |
| raise FileNotFoundError( | |
| f"spaCy model '{model_name}' not found at expected location: {model_dir}" | |
| ) | |
| return str(model_dir) | |
| # --- Core Dependency Collection (C-Extension Fix) --- | |
| # Use collect_all. The output is a tuple: (datas [0], binaries [1], hiddenimports [2], excludes [3], pathex [4]) | |
| spacy_data = collect_all('spacy') | |
| numpy_data = collect_all('numpy') | |
| sklearn_data = collect_all('sklearn') | |
| hdbscan_data = collect_all('hdbscan') | |
| scipy_data = collect_all('scipy') | |
| # 1. Consolidate ALL hidden imports (index 2 - module names/strings) | |
| all_collected_imports = [] | |
| all_collected_imports.extend(spacy_data[2]) | |
| all_collected_imports.extend(numpy_data[2]) | |
| all_collected_imports.extend(sklearn_data[2]) | |
| all_collected_imports.extend(hdbscan_data[2]) | |
| all_collected_imports.extend(scipy_data[2]) | |
| # 2. Consolidate all collected data (index 0 - tuples) | |
| all_collected_datas = [] | |
| all_collected_datas.extend(spacy_data[0]) | |
| all_collected_datas.extend(numpy_data[0]) | |
| all_collected_datas.extend(sklearn_data[0]) | |
| all_collected_datas.extend(hdbscan_data[0]) | |
| all_collected_datas.extend(scipy_data[0]) | |
| # 3. Consolidate all collected binaries (index 1 - tuples of C-extensions/dylibs) | |
| all_collected_binaries = [] | |
| all_collected_binaries.extend(spacy_data[1]) | |
| all_collected_binaries.extend(numpy_data[1]) | |
| all_collected_binaries.extend(sklearn_data[1]) | |
| all_collected_binaries.extend(hdbscan_data[1]) | |
| all_collected_binaries.extend(scipy_data[1]) | |
| # --- Analysis Setup --- | |
| a = Analysis( | |
| # 1. Explicitly list all your source files | |
| ['run.py', 'app.py', 'text_preprocessor.py', 'topic_modeling.py', 'gini_calculator.py', 'narrative_similarity.py', 'resource_path.py', 'topic_evolution.py'], | |
| pathex=['.'], | |
| # *** CRITICAL FIX: Use the collected binaries list for C extensions/dylibs *** | |
| binaries=all_collected_binaries, | |
| # 2. The final datas list: collected tuples + manual tuples | |
| datas=all_collected_datas + [ | |
| # Streamlit metadata (Dynamic path and wildcard) | |
| (SP_PATH_STR + 'streamlit*.dist-info', 'streamlit_metadata'), | |
| (SP_PATH_STR + 'streamlit/static', 'streamlit/static'), | |
| # Application resources | |
| (os.path.abspath('app.py'), '.'), | |
| ('readme.md', '.'), | |
| ('requirements.txt', '.'), | |
| ], | |
| # 3. The final hiddenimports list: collected strings + manual strings | |
| hiddenimports=all_collected_imports + [ | |
| 'charset_normalizer', | |
| 'streamlit.runtime.scriptrunner.magic_funcs', | |
| 'spacy.parts_of_speech', | |
| 'scipy.spatial.ckdtree', | |
| 'thinc.extra.wrappers', | |
| 'streamlit.web.cli', | |
| ], | |
| hookspath=[], | |
| hooksconfig={}, | |
| runtime_hooks=[], | |
| # Add all collected excludes to the main excludes list | |
| excludes=['tkinter', 'matplotlib.pyplot'] + spacy_data[3] + numpy_data[3] + sklearn_data[3] + hdbscan_data[3] + scipy_data[3], | |
| noarchive=False, | |
| optimize=0, | |
| ) | |
| # 4. Explicitly include the actual spaCy model directories using Tree | |
| a.datas.extend( | |
| Tree(get_model_path('en_core_web_sm'), prefix='en_core_web_sm') | |
| ) | |
| a.datas.extend( | |
| Tree(get_model_path('xx_ent_wiki_sm'), prefix='xx_ent_wiki_sm') | |
| ) | |
| pyz = PYZ(a.pure) | |
| exe = EXE( | |
| pyz, | |
| a.scripts, | |
| a.binaries, | |
| a.datas, | |
| [], | |
| name='TopicModelingApp', | |
| debug=False, | |
| bootloader_ignore_signals=False, | |
| strip=False, | |
| upx=True, | |
| upx_exclude=[], | |
| runtime_tmpdir=None, | |
| console=True, | |
| disable_windowed_traceback=False, | |
| argv_emulation=False, | |
| target_arch=None, | |
| codesign_identity=None, | |
| entitlements_file=None, | |
| ) |