File size: 4,649 Bytes
b7b041e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# topicmodelingapp.spec

import sys
import os
import site
from pathlib import Path 

from PyInstaller.utils.hooks import collect_all
from PyInstaller.building.datastruct import Tree 

# Add the script's directory to the path for local imports
sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0])))

# --- Dynamic Path Logic (Makes the SPEC file generic) ---
def get_site_packages_path():
    """Tries to find the site-packages directory of the current environment."""
    try:
        # Tries the standard site.getsitepackages method
        return Path(site.getsitepackages()[0])
    except Exception:
        # Fallback for complex environments like Conda
        return Path(sys.prefix) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages'

SP_PATH_STR = str(get_site_packages_path()) + os.sep

def get_model_path(model_name):
    """Gets the absolute path to an installed spaCy model."""
    spacy_path = get_site_packages_path()
    model_dir = spacy_path / model_name
    
    if not model_dir.exists():
        raise FileNotFoundError(
            f"spaCy model '{model_name}' not found at expected location: {model_dir}"
        )
    return str(model_dir)


# --- Core Dependency Collection (C-Extension Fix) ---

# Use collect_all. The output is a tuple: (datas [0], binaries [1], hiddenimports [2], excludes [3], pathex [4])
spacy_data = collect_all('spacy') 
numpy_data = collect_all('numpy')
sklearn_data = collect_all('sklearn')
hdbscan_data = collect_all('hdbscan') 
scipy_data = collect_all('scipy') 

# 1. Consolidate ALL hidden imports (index 2 - module names/strings)
all_collected_imports = []
all_collected_imports.extend(spacy_data[2])
all_collected_imports.extend(numpy_data[2])
all_collected_imports.extend(sklearn_data[2])
all_collected_imports.extend(hdbscan_data[2])
all_collected_imports.extend(scipy_data[2])

# 2. Consolidate all collected data (index 0 - tuples)
all_collected_datas = []
all_collected_datas.extend(spacy_data[0])
all_collected_datas.extend(numpy_data[0])
all_collected_datas.extend(sklearn_data[0])
all_collected_datas.extend(hdbscan_data[0])
all_collected_datas.extend(scipy_data[0])

# 3. Consolidate all collected binaries (index 1 - tuples of C-extensions/dylibs)
all_collected_binaries = []
all_collected_binaries.extend(spacy_data[1])
all_collected_binaries.extend(numpy_data[1])
all_collected_binaries.extend(sklearn_data[1])
all_collected_binaries.extend(hdbscan_data[1])
all_collected_binaries.extend(scipy_data[1])


# --- Analysis Setup ---

a = Analysis(
    # 1. Explicitly list all your source files 
    ['run.py', 'app.py', 'text_preprocessor.py', 'topic_modeling.py', 'gini_calculator.py', 'narrative_similarity.py', 'resource_path.py', 'topic_evolution.py'],
    pathex=['.'], 
    
    # *** CRITICAL FIX: Use the collected binaries list for C extensions/dylibs ***
    binaries=all_collected_binaries, 
    
    # 2. The final datas list: collected tuples + manual tuples
    datas=all_collected_datas + [
        # Streamlit metadata (Dynamic path and wildcard)
        (SP_PATH_STR + 'streamlit*.dist-info', 'streamlit_metadata'), 
        (SP_PATH_STR + 'streamlit/static', 'streamlit/static'),
        
        # Application resources
        (os.path.abspath('app.py'), '.'),
        ('readme.md', '.'),
        ('requirements.txt', '.'),

    ],
    
    # 3. The final hiddenimports list: collected strings + manual strings
    hiddenimports=all_collected_imports + [
        'charset_normalizer',
        'streamlit.runtime.scriptrunner.magic_funcs',
        'spacy.parts_of_speech',
        'scipy.spatial.ckdtree',
        'thinc.extra.wrappers',
        'streamlit.web.cli',
    ],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    # Add all collected excludes to the main excludes list
    excludes=['tkinter', 'matplotlib.pyplot'] + spacy_data[3] + numpy_data[3] + sklearn_data[3] + hdbscan_data[3] + scipy_data[3], 
    noarchive=False,
    optimize=0,
)

# 4. Explicitly include the actual spaCy model directories using Tree 
a.datas.extend(
    Tree(get_model_path('en_core_web_sm'), prefix='en_core_web_sm')
)
a.datas.extend(
    Tree(get_model_path('xx_ent_wiki_sm'), prefix='xx_ent_wiki_sm')
)

pyz = PYZ(a.pure)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.datas,
    [],
    name='TopicModelingApp',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    upx_exclude=[],
    runtime_tmpdir=None,
    console=True,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
)