File size: 3,249 Bytes
586ccc0
 
 
 
 
 
f9c5c24
586ccc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bea081
586ccc0
1baa73a
 
 
 
 
 
 
 
 
 
 
586ccc0
 
 
 
 
3bea081
 
586ccc0
 
1baa73a
 
 
 
 
 
 
 
 
 
 
3bea081
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import sys
import subprocess
from pathlib import Path

# 1. Retrieve the GitHub token
# Configured default model: ai4data/datause-extraction-v1 (with sample_pdfs fallback)
token = os.environ.get("GITHUB_TOKEN") or os.environ.get("PAT")
if not token:
    raise ValueError("Please set the GITHUB_TOKEN or PAT secret in Space settings.")

# 2. Clone/Pull private repository
repo_url = f"https://oauth2:{token}@github.com/rafmacalaba/monitoring_of_datause.git"
repo_dir = Path("monitoring_of_datause")

if not repo_dir.exists():
    print("Cloning private repository...")
    try:
        subprocess.run(["git", "clone", repo_url, str(repo_dir)], check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        stderr_clean = e.stderr.replace(token, "********") if token else e.stderr
        raise RuntimeError(f"Failed to clone repository: {stderr_clean}") from None
else:
    print("Repository exists. Pulling updates...")
    try:
        subprocess.run(["git", "-C", str(repo_dir), "pull"], check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        stderr_clean = e.stderr.replace(token, "********") if token else e.stderr
        raise RuntimeError(f"Failed to pull repository: {stderr_clean}") from None

# 3. Explicitly install local gliner2 first (since standard pip doesn't read uv.sources)
gliner2_path = repo_dir / "gliner2-src"
if gliner2_path.exists():
    print("Installing local gliner2 library...")
    subprocess.run([sys.executable, "-m", "pip", "install", "-e", str(gliner2_path)], check=True)

# 4. Install other dependencies and project package
requirements_file = repo_dir / "requirements.txt"
if requirements_file.exists():
    print("Installing dependencies...")
    subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], cwd=str(repo_dir), check=True)

# 4.5. Pre-download models to cache during startup
print("Pre-downloading models to local cache...")
try:
    from huggingface_hub import snapshot_download
    snapshot_download(repo_id="fastino/gliner2-large-v1")
    snapshot_download(repo_id="ai4data/datause-extraction-v1")
    snapshot_download(repo_id="ai4data-use/bert-base-uncased-data-use")
    print("Models pre-downloaded successfully.")
except Exception as e:
    print(f"Warning: Failed to pre-download models: {e}")

# 5. Add to Python path and launch the app
sys.path.insert(0, str(repo_dir))
sys.path.insert(0, str(repo_dir / "src"))
sys.path.insert(0, str(repo_dir / "datause_extract"))

import gradio as gr
from app import demo, CUSTOM_CSS

if __name__ == "__main__":
    # Pre-load models into RAM to avoid first-use delay
    print("Pre-loading models into RAM...")
    try:
        from ai4data import DatasetExtractor
        extractor = DatasetExtractor()
        _ = extractor.model
        _ = extractor.classifier
        print("Models successfully pre-loaded into RAM.")
    except Exception as e:
        print(f"Warning: Failed to pre-load models: {e}")

    theme = gr.themes.Base(
        primary_hue="slate",
        neutral_hue="slate",
        font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
    )
    demo.launch(css=CUSTOM_CSS, theme=theme)