Spaces:
Sleeping
Sleeping
File size: 4,649 Bytes
b7b041e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# topicmodelingapp.spec
import sys
import os
import site
from pathlib import Path
from PyInstaller.utils.hooks import collect_all
from PyInstaller.building.datastruct import Tree
# Add the script's directory to the path for local imports
sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0])))
# --- Dynamic Path Logic (Makes the SPEC file generic) ---
def get_site_packages_path():
"""Tries to find the site-packages directory of the current environment."""
try:
# Tries the standard site.getsitepackages method
return Path(site.getsitepackages()[0])
except Exception:
# Fallback for complex environments like Conda
return Path(sys.prefix) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages'
SP_PATH_STR = str(get_site_packages_path()) + os.sep
def get_model_path(model_name):
"""Gets the absolute path to an installed spaCy model."""
spacy_path = get_site_packages_path()
model_dir = spacy_path / model_name
if not model_dir.exists():
raise FileNotFoundError(
f"spaCy model '{model_name}' not found at expected location: {model_dir}"
)
return str(model_dir)
# --- Core Dependency Collection (C-Extension Fix) ---
# Use collect_all. The output is a tuple: (datas [0], binaries [1], hiddenimports [2], excludes [3], pathex [4])
spacy_data = collect_all('spacy')
numpy_data = collect_all('numpy')
sklearn_data = collect_all('sklearn')
hdbscan_data = collect_all('hdbscan')
scipy_data = collect_all('scipy')
# 1. Consolidate ALL hidden imports (index 2 - module names/strings)
all_collected_imports = []
all_collected_imports.extend(spacy_data[2])
all_collected_imports.extend(numpy_data[2])
all_collected_imports.extend(sklearn_data[2])
all_collected_imports.extend(hdbscan_data[2])
all_collected_imports.extend(scipy_data[2])
# 2. Consolidate all collected data (index 0 - tuples)
all_collected_datas = []
all_collected_datas.extend(spacy_data[0])
all_collected_datas.extend(numpy_data[0])
all_collected_datas.extend(sklearn_data[0])
all_collected_datas.extend(hdbscan_data[0])
all_collected_datas.extend(scipy_data[0])
# 3. Consolidate all collected binaries (index 1 - tuples of C-extensions/dylibs)
all_collected_binaries = []
all_collected_binaries.extend(spacy_data[1])
all_collected_binaries.extend(numpy_data[1])
all_collected_binaries.extend(sklearn_data[1])
all_collected_binaries.extend(hdbscan_data[1])
all_collected_binaries.extend(scipy_data[1])
# --- Analysis Setup ---
a = Analysis(
# 1. Explicitly list all your source files
['run.py', 'app.py', 'text_preprocessor.py', 'topic_modeling.py', 'gini_calculator.py', 'narrative_similarity.py', 'resource_path.py', 'topic_evolution.py'],
pathex=['.'],
# *** CRITICAL FIX: Use the collected binaries list for C extensions/dylibs ***
binaries=all_collected_binaries,
# 2. The final datas list: collected tuples + manual tuples
datas=all_collected_datas + [
# Streamlit metadata (Dynamic path and wildcard)
(SP_PATH_STR + 'streamlit*.dist-info', 'streamlit_metadata'),
(SP_PATH_STR + 'streamlit/static', 'streamlit/static'),
# Application resources
(os.path.abspath('app.py'), '.'),
('readme.md', '.'),
('requirements.txt', '.'),
],
# 3. The final hiddenimports list: collected strings + manual strings
hiddenimports=all_collected_imports + [
'charset_normalizer',
'streamlit.runtime.scriptrunner.magic_funcs',
'spacy.parts_of_speech',
'scipy.spatial.ckdtree',
'thinc.extra.wrappers',
'streamlit.web.cli',
],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
# Add all collected excludes to the main excludes list
excludes=['tkinter', 'matplotlib.pyplot'] + spacy_data[3] + numpy_data[3] + sklearn_data[3] + hdbscan_data[3] + scipy_data[3],
noarchive=False,
optimize=0,
)
# 4. Explicitly include the actual spaCy model directories using Tree
a.datas.extend(
Tree(get_model_path('en_core_web_sm'), prefix='en_core_web_sm')
)
a.datas.extend(
Tree(get_model_path('xx_ent_wiki_sm'), prefix='xx_ent_wiki_sm')
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
name='TopicModelingApp',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
) |