#!/usr/bin/env python3 """ Manual push script - run this in your HF Space to push the trained model. Add this file to your Space and run it separately. Usage in Space terminal or add a button in app.py: python push_model.py """ import os import sys from pathlib import Path def push_trained_model(): """Push the trained model to HuggingFace Hub""" print("="*60) print("šŸš€ Manual Model Push to HuggingFace Hub") print("="*60) # Check HF_TOKEN hf_token = os.environ.get("HF_TOKEN") if not hf_token: print("āŒ HF_TOKEN not found in environment!") print(" Set it in Space secrets: https://huggingface.co/spaces/OliverSlivka/testrun2/settings") return False print(f"āœ“ HF_TOKEN found (length: {len(hf_token)} chars)") # Check for trained model - look in known locations possible_locations = [ Path("./trained_adapter"), # New location Path("./output"), Path("OliverSlivka/qwen2.5-3b-itemset-extractor"), Path("OliverSlivka/qwen2.5-3b-itemset-test"), ] adapter_path = None for loc in possible_locations: config_file = loc / "adapter_config.json" if config_file.exists(): adapter_path = loc break # Also look for checkpoint directories checkpoint_dirs = list(Path(".").glob("**/checkpoint-*")) adapter_files = list(Path(".").glob("**/adapter_config.json")) print(f"\nšŸ“ Searching for trained model...") print(f" Adapter path found: {adapter_path}") print(f" Checkpoint dirs found: {len(checkpoint_dirs)}") print(f" Adapter configs found: {len(adapter_files)}") for cp in checkpoint_dirs[:5]: print(f" - {cp}") for af in adapter_files[:5]: print(f" - {af}") # Use found adapter_path or fall back to search if not adapter_path: if adapter_files: adapter_path = adapter_files[0].parent elif checkpoint_dirs: checkpoint_dirs.sort(key=lambda x: int(x.name.split("-")[-1]) if x.name.split("-")[-1].isdigit() else 0) adapter_path = checkpoint_dirs[-1] if not adapter_path or not adapter_path.exists(): print("\nāŒ No trained model found!") print(" The model may have been cleared from memory.") print(" You need to run training again.") return False # Import required libraries print("\nšŸ“¦ Loading libraries...") try: from huggingface_hub import HfApi, login from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer import torch except ImportError as e: print(f"āŒ Missing library: {e}") print(" Run: pip install huggingface_hub peft transformers torch") return False # Login with token print("\nšŸ”‘ Logging in to HuggingFace...") try: login(token=hf_token) api = HfApi() user_info = api.whoami() print(f"āœ“ Logged in as: {user_info['name']}") except Exception as e: print(f"āŒ Login failed: {e}") print(" Check that your token has WRITE permissions!") return False # Find the best checkpoint or final adapter adapter_path = None # Priority: final adapter > latest checkpoint if adapter_files: # Use the one NOT in a checkpoint folder (final) for af in adapter_files: if "checkpoint" not in str(af.parent): adapter_path = af.parent break if not adapter_path: adapter_path = adapter_files[-1].parent # Use latest checkpoint elif checkpoint_dirs: # Use latest checkpoint checkpoint_dirs.sort(key=lambda x: int(x.name.split("-")[-1]) if x.name.split("-")[-1].isdigit() else 0) adapter_path = checkpoint_dirs[-1] print(f"\nšŸ“‚ Using adapter from: {adapter_path}") # Verify adapter_config.json exists if not (adapter_path / "adapter_config.json").exists(): print(f"\\nāŒ adapter_config.json not found in {adapter_path}") return False # List files to be uploaded print("\nšŸ“„ Files to upload:") for f in adapter_path.iterdir(): size = f.stat().st_size if f.is_file() else 0 print(f" {f.name}: {size/1024:.1f} KB") # Push to Hub target_repo = "OliverSlivka/qwen2.5-3b-itemset-extractor" print(f"\nā¬†ļø Pushing to {target_repo}...") try: api.upload_folder( folder_path=str(adapter_path), repo_id=target_repo, repo_type="model", ) print(f"\nāœ… SUCCESS! Model pushed to:") print(f" https://huggingface.co/{target_repo}") return True except Exception as e: print(f"\nāŒ Push failed: {e}") print("\n Possible causes:") print(" 1. Token doesn't have WRITE permission") print(" 2. You don't have write access to this repo") print(" 3. Network error") return False def list_all_files(): """Debug: List all files in current directory""" print("\nšŸ“ All files in Space:") for root, dirs, files in os.walk(".", topdown=True): # Skip hidden and large dirs dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['__pycache__', '.git']] level = root.replace(".", "").count(os.sep) indent = " " * 2 * level print(f"{indent}{os.path.basename(root)}/") subindent = " " * 2 * (level + 1) for file in files[:20]: # Limit files shown filepath = os.path.join(root, file) try: size = os.path.getsize(filepath) print(f"{subindent}{file}: {size/1024:.1f} KB") except: print(f"{subindent}{file}") if len(files) > 20: print(f"{subindent}... and {len(files)-20} more files") if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "--list": list_all_files() else: list_all_files() # Always show files first print("\n" + "="*60 + "\n") success = push_trained_model() sys.exit(0 if success else 1)