Spaces:
Running
Running
| import os | |
| import sys | |
| import subprocess | |
| from pathlib import Path | |
| # 1. Retrieve the GitHub token | |
| # Configured default model: ai4data/datause-extraction-v1 (with sample_pdfs fallback) | |
| token = os.environ.get("GITHUB_TOKEN") or os.environ.get("PAT") | |
| if not token: | |
| raise ValueError("Please set the GITHUB_TOKEN or PAT secret in Space settings.") | |
| # 2. Clone/Pull private repository | |
| repo_url = f"https://oauth2:{token}@github.com/rafmacalaba/monitoring_of_datause.git" | |
| repo_dir = Path("monitoring_of_datause") | |
| if not repo_dir.exists(): | |
| print("Cloning private repository...") | |
| try: | |
| subprocess.run(["git", "clone", repo_url, str(repo_dir)], check=True, capture_output=True, text=True) | |
| except subprocess.CalledProcessError as e: | |
| stderr_clean = e.stderr.replace(token, "********") if token else e.stderr | |
| raise RuntimeError(f"Failed to clone repository: {stderr_clean}") from None | |
| else: | |
| print("Repository exists. Pulling updates...") | |
| try: | |
| subprocess.run(["git", "-C", str(repo_dir), "pull"], check=True, capture_output=True, text=True) | |
| except subprocess.CalledProcessError as e: | |
| stderr_clean = e.stderr.replace(token, "********") if token else e.stderr | |
| raise RuntimeError(f"Failed to pull repository: {stderr_clean}") from None | |
| # 3. Explicitly install local gliner2 first (since standard pip doesn't read uv.sources) | |
| gliner2_path = repo_dir / "gliner2-src" | |
| if gliner2_path.exists(): | |
| print("Installing local gliner2 library...") | |
| subprocess.run([sys.executable, "-m", "pip", "install", "-e", str(gliner2_path)], check=True) | |
| # 4. Install other dependencies and project package | |
| requirements_file = repo_dir / "requirements.txt" | |
| if requirements_file.exists(): | |
| print("Installing dependencies...") | |
| subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], cwd=str(repo_dir), check=True) | |
| # 4.5. Pre-download models to cache during startup | |
| print("Pre-downloading models to local cache...") | |
| try: | |
| from huggingface_hub import snapshot_download | |
| snapshot_download(repo_id="fastino/gliner2-large-v1") | |
| snapshot_download(repo_id="ai4data/datause-extraction-v1") | |
| snapshot_download(repo_id="ai4data-use/bert-base-uncased-data-use") | |
| print("Models pre-downloaded successfully.") | |
| except Exception as e: | |
| print(f"Warning: Failed to pre-download models: {e}") | |
| # 5. Add to Python path and launch the app | |
| sys.path.insert(0, str(repo_dir)) | |
| sys.path.insert(0, str(repo_dir / "src")) | |
| sys.path.insert(0, str(repo_dir / "datause_extract")) | |
| import gradio as gr | |
| from app import demo, CUSTOM_CSS | |
| if __name__ == "__main__": | |
| # Pre-load models into RAM to avoid first-use delay | |
| print("Pre-loading models into RAM...") | |
| try: | |
| from ai4data import DatasetExtractor | |
| extractor = DatasetExtractor() | |
| _ = extractor.model | |
| _ = extractor.classifier | |
| print("Models successfully pre-loaded into RAM.") | |
| except Exception as e: | |
| print(f"Warning: Failed to pre-load models: {e}") | |
| theme = gr.themes.Base( | |
| primary_hue="slate", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], | |
| ) | |
| demo.launch(css=CUSTOM_CSS, theme=theme) | |