Spaces:

Mars203020
/

bertopic

Sleeping

File size: 4,649 Bytes

b7b041e

# topicmodelingapp.spec

import sys
import os
import site
from pathlib import Path 

from PyInstaller.utils.hooks import collect_all
from PyInstaller.building.datastruct import Tree 

# Add the script's directory to the path for local imports
sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0])))

# --- Dynamic Path Logic (Makes the SPEC file generic) ---
def get_site_packages_path():
    """Tries to find the site-packages directory of the current environment."""
    try:
        # Tries the standard site.getsitepackages method
        return Path(site.getsitepackages()[0])
    except Exception:
        # Fallback for complex environments like Conda
        return Path(sys.prefix) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages'

SP_PATH_STR = str(get_site_packages_path()) + os.sep

def get_model_path(model_name):
    """Gets the absolute path to an installed spaCy model."""
    spacy_path = get_site_packages_path()
    model_dir = spacy_path / model_name
    
    if not model_dir.exists():
        raise FileNotFoundError(
            f"spaCy model '{model_name}' not found at expected location: {model_dir}"
        )
    return str(model_dir)


# --- Core Dependency Collection (C-Extension Fix) ---

# Use collect_all. The output is a tuple: (datas [0], binaries [1], hiddenimports [2], excludes [3], pathex [4])
spacy_data = collect_all('spacy') 
numpy_data = collect_all('numpy')
sklearn_data = collect_all('sklearn')
hdbscan_data = collect_all('hdbscan') 
scipy_data = collect_all('scipy') 

# 1. Consolidate ALL hidden imports (index 2 - module names/strings)
all_collected_imports = []
all_collected_imports.extend(spacy_data[2])
all_collected_imports.extend(numpy_data[2])
all_collected_imports.extend(sklearn_data[2])
all_collected_imports.extend(hdbscan_data[2])
all_collected_imports.extend(scipy_data[2])

# 2. Consolidate all collected data (index 0 - tuples)
all_collected_datas = []
all_collected_datas.extend(spacy_data[0])
all_collected_datas.extend(numpy_data[0])
all_collected_datas.extend(sklearn_data[0])
all_collected_datas.extend(hdbscan_data[0])
all_collected_datas.extend(scipy_data[0])

# 3. Consolidate all collected binaries (index 1 - tuples of C-extensions/dylibs)
all_collected_binaries = []
all_collected_binaries.extend(spacy_data[1])
all_collected_binaries.extend(numpy_data[1])
all_collected_binaries.extend(sklearn_data[1])
all_collected_binaries.extend(hdbscan_data[1])
all_collected_binaries.extend(scipy_data[1])


# --- Analysis Setup ---

a = Analysis(
    # 1. Explicitly list all your source files 
    ['run.py', 'app.py', 'text_preprocessor.py', 'topic_modeling.py', 'gini_calculator.py', 'narrative_similarity.py', 'resource_path.py', 'topic_evolution.py'],
    pathex=['.'], 
    
    # *** CRITICAL FIX: Use the collected binaries list for C extensions/dylibs ***
    binaries=all_collected_binaries, 
    
    # 2. The final datas list: collected tuples + manual tuples
    datas=all_collected_datas + [
        # Streamlit metadata (Dynamic path and wildcard)
        (SP_PATH_STR + 'streamlit*.dist-info', 'streamlit_metadata'), 
        (SP_PATH_STR + 'streamlit/static', 'streamlit/static'),
        
        # Application resources
        (os.path.abspath('app.py'), '.'),
        ('readme.md', '.'),
        ('requirements.txt', '.'),

    ],
    
    # 3. The final hiddenimports list: collected strings + manual strings
    hiddenimports=all_collected_imports + [
        'charset_normalizer',
        'streamlit.runtime.scriptrunner.magic_funcs',
        'spacy.parts_of_speech',
        'scipy.spatial.ckdtree',
        'thinc.extra.wrappers',
        'streamlit.web.cli',
    ],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    # Add all collected excludes to the main excludes list
    excludes=['tkinter', 'matplotlib.pyplot'] + spacy_data[3] + numpy_data[3] + sklearn_data[3] + hdbscan_data[3] + scipy_data[3], 
    noarchive=False,
    optimize=0,
)

# 4. Explicitly include the actual spaCy model directories using Tree 
a.datas.extend(
    Tree(get_model_path('en_core_web_sm'), prefix='en_core_web_sm')
)
a.datas.extend(
    Tree(get_model_path('xx_ent_wiki_sm'), prefix='xx_ent_wiki_sm')
)

pyz = PYZ(a.pure)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.datas,
    [],
    name='TopicModelingApp',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    upx_exclude=[],
    runtime_tmpdir=None,
    console=True,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
)