Spaces:
Paused
Paused
File size: 6,247 Bytes
663a4dd a823edb 663a4dd a823edb 663a4dd a823edb 663a4dd a823edb 663a4dd a823edb 663a4dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
#!/usr/bin/env python3
"""
Manual push script - run this in your HF Space to push the trained model.
Add this file to your Space and run it separately.
Usage in Space terminal or add a button in app.py:
python push_model.py
"""
import os
import sys
from pathlib import Path
def push_trained_model():
"""Push the trained model to HuggingFace Hub"""
print("="*60)
print("π Manual Model Push to HuggingFace Hub")
print("="*60)
# Check HF_TOKEN
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("β HF_TOKEN not found in environment!")
print(" Set it in Space secrets: https://huggingface.co/spaces/OliverSlivka/testrun2/settings")
return False
print(f"β HF_TOKEN found (length: {len(hf_token)} chars)")
# Check for trained model - look in known locations
possible_locations = [
Path("./trained_adapter"), # New location
Path("./output"),
Path("OliverSlivka/qwen2.5-3b-itemset-extractor"),
Path("OliverSlivka/qwen2.5-3b-itemset-test"),
]
adapter_path = None
for loc in possible_locations:
config_file = loc / "adapter_config.json"
if config_file.exists():
adapter_path = loc
break
# Also look for checkpoint directories
checkpoint_dirs = list(Path(".").glob("**/checkpoint-*"))
adapter_files = list(Path(".").glob("**/adapter_config.json"))
print(f"\nπ Searching for trained model...")
print(f" Adapter path found: {adapter_path}")
print(f" Checkpoint dirs found: {len(checkpoint_dirs)}")
print(f" Adapter configs found: {len(adapter_files)}")
for cp in checkpoint_dirs[:5]:
print(f" - {cp}")
for af in adapter_files[:5]:
print(f" - {af}")
# Use found adapter_path or fall back to search
if not adapter_path:
if adapter_files:
adapter_path = adapter_files[0].parent
elif checkpoint_dirs:
checkpoint_dirs.sort(key=lambda x: int(x.name.split("-")[-1]) if x.name.split("-")[-1].isdigit() else 0)
adapter_path = checkpoint_dirs[-1]
if not adapter_path or not adapter_path.exists():
print("\nβ No trained model found!")
print(" The model may have been cleared from memory.")
print(" You need to run training again.")
return False
# Import required libraries
print("\nπ¦ Loading libraries...")
try:
from huggingface_hub import HfApi, login
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
except ImportError as e:
print(f"β Missing library: {e}")
print(" Run: pip install huggingface_hub peft transformers torch")
return False
# Login with token
print("\nπ Logging in to HuggingFace...")
try:
login(token=hf_token)
api = HfApi()
user_info = api.whoami()
print(f"β Logged in as: {user_info['name']}")
except Exception as e:
print(f"β Login failed: {e}")
print(" Check that your token has WRITE permissions!")
return False
# Find the best checkpoint or final adapter
adapter_path = None
# Priority: final adapter > latest checkpoint
if adapter_files:
# Use the one NOT in a checkpoint folder (final)
for af in adapter_files:
if "checkpoint" not in str(af.parent):
adapter_path = af.parent
break
if not adapter_path:
adapter_path = adapter_files[-1].parent # Use latest checkpoint
elif checkpoint_dirs:
# Use latest checkpoint
checkpoint_dirs.sort(key=lambda x: int(x.name.split("-")[-1]) if x.name.split("-")[-1].isdigit() else 0)
adapter_path = checkpoint_dirs[-1]
print(f"\nπ Using adapter from: {adapter_path}")
# Verify adapter_config.json exists
if not (adapter_path / "adapter_config.json").exists():
print(f"\\nβ adapter_config.json not found in {adapter_path}")
return False
# List files to be uploaded
print("\nπ Files to upload:")
for f in adapter_path.iterdir():
size = f.stat().st_size if f.is_file() else 0
print(f" {f.name}: {size/1024:.1f} KB")
# Push to Hub
target_repo = "OliverSlivka/qwen2.5-3b-itemset-extractor"
print(f"\nβ¬οΈ Pushing to {target_repo}...")
try:
api.upload_folder(
folder_path=str(adapter_path),
repo_id=target_repo,
repo_type="model",
)
print(f"\nβ
SUCCESS! Model pushed to:")
print(f" https://huggingface.co/{target_repo}")
return True
except Exception as e:
print(f"\nβ Push failed: {e}")
print("\n Possible causes:")
print(" 1. Token doesn't have WRITE permission")
print(" 2. You don't have write access to this repo")
print(" 3. Network error")
return False
def list_all_files():
"""Debug: List all files in current directory"""
print("\nπ All files in Space:")
for root, dirs, files in os.walk(".", topdown=True):
# Skip hidden and large dirs
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['__pycache__', '.git']]
level = root.replace(".", "").count(os.sep)
indent = " " * 2 * level
print(f"{indent}{os.path.basename(root)}/")
subindent = " " * 2 * (level + 1)
for file in files[:20]: # Limit files shown
filepath = os.path.join(root, file)
try:
size = os.path.getsize(filepath)
print(f"{subindent}{file}: {size/1024:.1f} KB")
except:
print(f"{subindent}{file}")
if len(files) > 20:
print(f"{subindent}... and {len(files)-20} more files")
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "--list":
list_all_files()
else:
list_all_files() # Always show files first
print("\n" + "="*60 + "\n")
success = push_trained_model()
sys.exit(0 if success else 1)
|