rafmacalaba's picture
Upload folder using huggingface_hub
1baa73a verified
Raw
History Blame Contribute Delete
3.25 kB
import os
import sys
import subprocess
from pathlib import Path
# 1. Retrieve the GitHub token
# Configured default model: ai4data/datause-extraction-v1 (with sample_pdfs fallback)
token = os.environ.get("GITHUB_TOKEN") or os.environ.get("PAT")
if not token:
raise ValueError("Please set the GITHUB_TOKEN or PAT secret in Space settings.")
# 2. Clone/Pull private repository
repo_url = f"https://oauth2:{token}@github.com/rafmacalaba/monitoring_of_datause.git"
repo_dir = Path("monitoring_of_datause")
if not repo_dir.exists():
print("Cloning private repository...")
try:
subprocess.run(["git", "clone", repo_url, str(repo_dir)], check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
stderr_clean = e.stderr.replace(token, "********") if token else e.stderr
raise RuntimeError(f"Failed to clone repository: {stderr_clean}") from None
else:
print("Repository exists. Pulling updates...")
try:
subprocess.run(["git", "-C", str(repo_dir), "pull"], check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
stderr_clean = e.stderr.replace(token, "********") if token else e.stderr
raise RuntimeError(f"Failed to pull repository: {stderr_clean}") from None
# 3. Explicitly install local gliner2 first (since standard pip doesn't read uv.sources)
gliner2_path = repo_dir / "gliner2-src"
if gliner2_path.exists():
print("Installing local gliner2 library...")
subprocess.run([sys.executable, "-m", "pip", "install", "-e", str(gliner2_path)], check=True)
# 4. Install other dependencies and project package
requirements_file = repo_dir / "requirements.txt"
if requirements_file.exists():
print("Installing dependencies...")
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], cwd=str(repo_dir), check=True)
# 4.5. Pre-download models to cache during startup
print("Pre-downloading models to local cache...")
try:
from huggingface_hub import snapshot_download
snapshot_download(repo_id="fastino/gliner2-large-v1")
snapshot_download(repo_id="ai4data/datause-extraction-v1")
snapshot_download(repo_id="ai4data-use/bert-base-uncased-data-use")
print("Models pre-downloaded successfully.")
except Exception as e:
print(f"Warning: Failed to pre-download models: {e}")
# 5. Add to Python path and launch the app
sys.path.insert(0, str(repo_dir))
sys.path.insert(0, str(repo_dir / "src"))
sys.path.insert(0, str(repo_dir / "datause_extract"))
import gradio as gr
from app import demo, CUSTOM_CSS
if __name__ == "__main__":
# Pre-load models into RAM to avoid first-use delay
print("Pre-loading models into RAM...")
try:
from ai4data import DatasetExtractor
extractor = DatasetExtractor()
_ = extractor.model
_ = extractor.classifier
print("Models successfully pre-loaded into RAM.")
except Exception as e:
print(f"Warning: Failed to pre-load models: {e}")
theme = gr.themes.Base(
primary_hue="slate",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
)
demo.launch(css=CUSTOM_CSS, theme=theme)