File size: 6,247 Bytes
663a4dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a823edb
 
 
 
 
 
 
663a4dd
a823edb
 
 
 
 
 
 
 
663a4dd
 
 
 
a823edb
663a4dd
 
 
 
 
 
 
 
a823edb
 
 
 
 
 
 
 
 
663a4dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a823edb
 
 
 
 
663a4dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python3
"""
Manual push script - run this in your HF Space to push the trained model.
Add this file to your Space and run it separately.

Usage in Space terminal or add a button in app.py:
    python push_model.py
"""

import os
import sys
from pathlib import Path

def push_trained_model():
    """Push the trained model to HuggingFace Hub"""
    
    print("="*60)
    print("πŸš€ Manual Model Push to HuggingFace Hub")
    print("="*60)
    
    # Check HF_TOKEN
    hf_token = os.environ.get("HF_TOKEN")
    if not hf_token:
        print("❌ HF_TOKEN not found in environment!")
        print("   Set it in Space secrets: https://huggingface.co/spaces/OliverSlivka/testrun2/settings")
        return False
    
    print(f"βœ“ HF_TOKEN found (length: {len(hf_token)} chars)")
    
    # Check for trained model - look in known locations
    possible_locations = [
        Path("./trained_adapter"),  # New location
        Path("./output"),
        Path("OliverSlivka/qwen2.5-3b-itemset-extractor"),
        Path("OliverSlivka/qwen2.5-3b-itemset-test"),
    ]
    
    adapter_path = None
    for loc in possible_locations:
        config_file = loc / "adapter_config.json"
        if config_file.exists():
            adapter_path = loc
            break
    
    # Also look for checkpoint directories
    checkpoint_dirs = list(Path(".").glob("**/checkpoint-*"))
    adapter_files = list(Path(".").glob("**/adapter_config.json"))
    
    print(f"\nπŸ“ Searching for trained model...")
    print(f"   Adapter path found: {adapter_path}")
    print(f"   Checkpoint dirs found: {len(checkpoint_dirs)}")
    print(f"   Adapter configs found: {len(adapter_files)}")
    
    for cp in checkpoint_dirs[:5]:
        print(f"   - {cp}")
    for af in adapter_files[:5]:
        print(f"   - {af}")
    
    # Use found adapter_path or fall back to search
    if not adapter_path:
        if adapter_files:
            adapter_path = adapter_files[0].parent
        elif checkpoint_dirs:
            checkpoint_dirs.sort(key=lambda x: int(x.name.split("-")[-1]) if x.name.split("-")[-1].isdigit() else 0)
            adapter_path = checkpoint_dirs[-1]
    
    if not adapter_path or not adapter_path.exists():
        print("\n❌ No trained model found!")
        print("   The model may have been cleared from memory.")
        print("   You need to run training again.")
        return False
    
    # Import required libraries
    print("\nπŸ“¦ Loading libraries...")
    try:
        from huggingface_hub import HfApi, login
        from peft import PeftModel
        from transformers import AutoModelForCausalLM, AutoTokenizer
        import torch
    except ImportError as e:
        print(f"❌ Missing library: {e}")
        print("   Run: pip install huggingface_hub peft transformers torch")
        return False
    
    # Login with token
    print("\nπŸ”‘ Logging in to HuggingFace...")
    try:
        login(token=hf_token)
        api = HfApi()
        user_info = api.whoami()
        print(f"βœ“ Logged in as: {user_info['name']}")
    except Exception as e:
        print(f"❌ Login failed: {e}")
        print("   Check that your token has WRITE permissions!")
        return False
    
    # Find the best checkpoint or final adapter
    adapter_path = None
    
    # Priority: final adapter > latest checkpoint
    if adapter_files:
        # Use the one NOT in a checkpoint folder (final)
        for af in adapter_files:
            if "checkpoint" not in str(af.parent):
                adapter_path = af.parent
                break
        if not adapter_path:
            adapter_path = adapter_files[-1].parent  # Use latest checkpoint
    elif checkpoint_dirs:
        # Use latest checkpoint
        checkpoint_dirs.sort(key=lambda x: int(x.name.split("-")[-1]) if x.name.split("-")[-1].isdigit() else 0)
        adapter_path = checkpoint_dirs[-1]
    
    print(f"\nπŸ“‚ Using adapter from: {adapter_path}")
        # Verify adapter_config.json exists
    if not (adapter_path / "adapter_config.json").exists():
        print(f"\\n❌ adapter_config.json not found in {adapter_path}")
        return False
        # List files to be uploaded
    print("\nπŸ“„ Files to upload:")
    for f in adapter_path.iterdir():
        size = f.stat().st_size if f.is_file() else 0
        print(f"   {f.name}: {size/1024:.1f} KB")
    
    # Push to Hub
    target_repo = "OliverSlivka/qwen2.5-3b-itemset-extractor"
    print(f"\n⬆️ Pushing to {target_repo}...")
    
    try:
        api.upload_folder(
            folder_path=str(adapter_path),
            repo_id=target_repo,
            repo_type="model",
        )
        print(f"\nβœ… SUCCESS! Model pushed to:")
        print(f"   https://huggingface.co/{target_repo}")
        return True
    except Exception as e:
        print(f"\n❌ Push failed: {e}")
        print("\n   Possible causes:")
        print("   1. Token doesn't have WRITE permission")
        print("   2. You don't have write access to this repo")
        print("   3. Network error")
        return False


def list_all_files():
    """Debug: List all files in current directory"""
    print("\nπŸ“ All files in Space:")
    for root, dirs, files in os.walk(".", topdown=True):
        # Skip hidden and large dirs
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['__pycache__', '.git']]
        level = root.replace(".", "").count(os.sep)
        indent = " " * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = " " * 2 * (level + 1)
        for file in files[:20]:  # Limit files shown
            filepath = os.path.join(root, file)
            try:
                size = os.path.getsize(filepath)
                print(f"{subindent}{file}: {size/1024:.1f} KB")
            except:
                print(f"{subindent}{file}")
        if len(files) > 20:
            print(f"{subindent}... and {len(files)-20} more files")


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "--list":
        list_all_files()
    else:
        list_all_files()  # Always show files first
        print("\n" + "="*60 + "\n")
        success = push_trained_model()
        sys.exit(0 if success else 1)