| | |
| | import os |
| | import shutil |
| | import json |
| | from pathlib import Path |
| | from huggingface_hub import HfApi, create_repo |
| | import tarfile |
| | import tempfile |
| |
|
| | class HFStorageSync: |
| | def __init__(self, repo_id, token=None, data_dir="/tmp/open-webui-data"): |
| | self.repo_id = repo_id |
| | self.data_dir = Path(data_dir) |
| | self.token = token |
| | |
| | |
| | self.api = HfApi(token=token) if token else HfApi() |
| | |
| | def ensure_repo_exists(self): |
| | """Create repository if it doesn't exist""" |
| | if not self.token: |
| | print("No token provided, cannot create repository") |
| | return False |
| | |
| | try: |
| | |
| | repo_info = self.api.repo_info(repo_id=self.repo_id, repo_type="dataset") |
| | print(f"Repository {self.repo_id} exists") |
| | return True |
| | except Exception as e: |
| | print(f"Repository {self.repo_id} not found, attempting to create...") |
| | try: |
| | create_repo( |
| | repo_id=self.repo_id, |
| | repo_type="dataset", |
| | token=self.token, |
| | private=True, |
| | exist_ok=True |
| | ) |
| | print(f"Created repository {self.repo_id}") |
| | |
| | |
| | readme_content = """# Open WebUI Storage |
| | |
| | This dataset stores persistent data for Open WebUI deployment. |
| | |
| | ## Contents |
| | |
| | - `data.tar.gz`: Compressed archive containing all Open WebUI data including: |
| | - User configurations |
| | - Chat histories |
| | - Uploaded files |
| | - Database files |
| | |
| | This repository is automatically managed by the Open WebUI sync system. |
| | """ |
| | |
| | with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as tmp: |
| | tmp.write(readme_content) |
| | tmp.flush() |
| | |
| | self.api.upload_file( |
| | path_or_fileobj=tmp.name, |
| | path_in_repo="README.md", |
| | repo_id=self.repo_id, |
| | repo_type="dataset", |
| | commit_message="Initial repository setup", |
| | token=self.token |
| | ) |
| | |
| | os.unlink(tmp.name) |
| | |
| | return True |
| | except Exception as create_error: |
| | print(f"Failed to create repository: {create_error}") |
| | return False |
| | |
| | def download_data(self): |
| | """Download and extract data from HF dataset repo""" |
| | try: |
| | print("Downloading data from Hugging Face...") |
| | |
| | |
| | self.data_dir.mkdir(parents=True, exist_ok=True) |
| | |
| | |
| | test_file = self.data_dir / "test_write" |
| | try: |
| | test_file.touch() |
| | test_file.unlink() |
| | print(f"Data directory {self.data_dir} is writable") |
| | except Exception as e: |
| | print(f"Warning: Data directory may not be writable: {e}") |
| | return |
| | |
| | if not self.token: |
| | print("No HF_TOKEN provided, skipping download") |
| | return |
| | |
| | |
| | if not self.ensure_repo_exists(): |
| | print("Could not access or create repository") |
| | return |
| | |
| | |
| | try: |
| | file_path = self.api.hf_hub_download( |
| | repo_id=self.repo_id, |
| | filename="data.tar.gz", |
| | repo_type="dataset", |
| | token=self.token |
| | ) |
| | |
| | with tarfile.open(file_path, 'r:gz') as tar: |
| | tar.extractall(self.data_dir) |
| | |
| | print(f"Data extracted to {self.data_dir}") |
| | |
| | except Exception as e: |
| | print(f"No existing data found (this is normal for first run): {e}") |
| | |
| | except Exception as e: |
| | print(f"Error during download: {e}") |
| | |
| | def upload_data(self): |
| | """Compress and upload data to HF dataset repo""" |
| | try: |
| | if not self.token: |
| | print("No HF_TOKEN provided, skipping upload") |
| | return |
| | |
| | print("Uploading data to Hugging Face...") |
| | |
| | if not self.data_dir.exists() or not any(self.data_dir.iterdir()): |
| | print("No data to upload") |
| | return |
| | |
| | |
| | if not self.ensure_repo_exists(): |
| | print("Could not access or create repository") |
| | return |
| | |
| | |
| | with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp: |
| | with tarfile.open(tmp.name, 'w:gz') as tar: |
| | for item in self.data_dir.iterdir(): |
| | if item.name not in ["test_write", ".gitkeep"]: |
| | tar.add(item, arcname=item.name) |
| | |
| | |
| | self.api.upload_file( |
| | path_or_fileobj=tmp.name, |
| | path_in_repo="data.tar.gz", |
| | repo_id=self.repo_id, |
| | repo_type="dataset", |
| | commit_message="Update Open WebUI data", |
| | token=self.token |
| | ) |
| | |
| | |
| | os.unlink(tmp.name) |
| | |
| | print("Data uploaded successfully") |
| | |
| | except Exception as e: |
| | print(f"Error uploading data: {e}") |
| |
|
| | def main(): |
| | import sys |
| | |
| | repo_id = os.getenv("HF_STORAGE_REPO", "nxdev-org/open-webui-storage") |
| | token = os.getenv("HF_TOKEN") |
| | data_dir = os.getenv("DATA_DIR", "/tmp/open-webui-data") |
| | |
| | sync = HFStorageSync(repo_id, token, data_dir) |
| | |
| | if len(sys.argv) > 1: |
| | if sys.argv[1] == "download": |
| | sync.download_data() |
| | elif sys.argv[1] == "upload": |
| | sync.upload_data() |
| | else: |
| | print("Usage: sync_storage.py [download|upload]") |
| | else: |
| | print("Usage: sync_storage.py [download|upload]") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|