Spaces:
Paused
Paused
| import os | |
| import glob | |
| from filesplit.merge import Merge | |
| from huggingface_hub import HfApi, create_repo, upload_folder, notebook_login | |
| import shutil | |
| from pathlib import Path | |
| # Configuration | |
| MODELS_DIR = "models" | |
| DATA_DIR = "data/preprocessed" | |
| HF_USERNAME = "Demosthene-OR" # Assuming this based on workspace mapping, but better to fetch from API or ask user. I will use a variable. | |
| # For now, I'll rely on the user being logged in via CLI. | |
| def reassemble_models(models_dir): | |
| """Reassembles models that were split using filesplit.""" | |
| print(f"Checking for split files in {models_dir}...") | |
| # Check for best_rnn_model split | |
| rnn_split_dir = os.path.join(models_dir, "best_rnn_model") | |
| if os.path.exists(rnn_split_dir): | |
| print("Reassembling best_rnn_model.h5...") | |
| merge = Merge(rnn_split_dir, models_dir, "best_rnn_model.h5").merge(cleanup=False) | |
| # Check for best_vgg16_model split | |
| vgg_split_dir = os.path.join(models_dir, "best_vgg16_model") | |
| if os.path.exists(vgg_split_dir): | |
| print("Reassembling best_vgg16_model.h5...") | |
| merge = Merge(vgg_split_dir, models_dir, "best_vgg16_model.h5").merge(cleanup=False) | |
| def upload_models(models_dir, repo_name): | |
| """Uploads model files to Hugging Face Hub.""" | |
| print(f"Uploading models to {repo_name}...") | |
| api = HfApi() | |
| user = api.whoami()['name'] | |
| repo_id = f"{user}/{repo_name}" | |
| # Create repo if it doesn't exist | |
| try: | |
| create_repo(repo_id, repo_type="model", exist_ok=True) | |
| print(f"Repository {repo_id} ready.") | |
| except Exception as e: | |
| print(f"Error creating/accessing repo: {e}") | |
| return | |
| # Files to upload | |
| files_to_upload = [ | |
| "best_rnn_model.h5", | |
| "best_vgg16_model.h5", | |
| "tokenizer_config.json", | |
| "best_weights.json", | |
| "mapper.json" | |
| ] | |
| for file in files_to_upload: | |
| file_path = os.path.join(models_dir, file) | |
| if os.path.exists(file_path): | |
| print(f"Uploading {file}...") | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=file, | |
| repo_id=repo_id, | |
| repo_type="model" | |
| ) | |
| else: | |
| print(f"Warning: {file} not found in {models_dir}") | |
| def upload_data(data_dir, repo_name): | |
| """Uploads dataset files to Hugging Face Hub.""" | |
| print(f"Uploading data to {repo_name}...") | |
| api = HfApi() | |
| user = api.whoami()['name'] | |
| repo_id = f"{user}/{repo_name}" | |
| # Create repo if it doesn't exist | |
| try: | |
| create_repo(repo_id, repo_type="dataset", exist_ok=True) | |
| print(f"Repository {repo_id} ready.") | |
| except Exception as e: | |
| print(f"Error creating/accessing repo: {e}") | |
| return | |
| # Upload the entire directory | |
| if os.path.exists(data_dir): | |
| upload_folder( | |
| folder_path=data_dir, | |
| repo_id=repo_id, | |
| repo_type="dataset" | |
| ) | |
| else: | |
| print(f"Error: Data directory {data_dir} does not exist.") | |
| if __name__ == "__main__": | |
| # Ensure dependencies are installed: filesplit (for legacy merge), huggingface_hub | |
| # 1. Reassemble models locally | |
| reassemble_models(MODELS_DIR) | |
| # 2. Upload Models | |
| # You might want to customize the repo name | |
| upload_models(MODELS_DIR, "rakuten-models") | |
| # 3. Upload Data | |
| # You might want to customize the repo name | |
| upload_data(DATA_DIR, "rakuten-data") | |
| print("Migration complete!") | |