File size: 3,311 Bytes
af9853e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import sys
import glob
import shutil
from huggingface_hub import HfApi, create_repo, upload_folder
from config import Config

def main():
    print("🚀 开始全量上传 (All-in-One) 到 robot4/sentiment-analysis-bert-finetuned ...")
    
    api = HfApi()
    try:
        user_info = api.whoami()
        username = user_info['name']
        print(f"✅ User: {username}")
    except:
        print("❌ Please login first.")
        return

    # 目标仓库 (用户指定)
    target_repo_id = "robot4/sentiment-analysis-bert-finetuned"
    
    # 1. 准备临时上传目录
    upload_dir = "hf_upload_staging"
    if os.path.exists(upload_dir):
        shutil.rmtree(upload_dir)
    os.makedirs(upload_dir)
    
    print(f"📦 正在打包所有文件到 {upload_dir}...")
    
    # A. 复制项目代码和资源
    # 包含了 data, src, docs, notebooks, demo, results/images 等
    items_to_copy = [
        "src", "notebooks", "docs", "demo", "data", 
        "README.md", "requirements.txt", "*.pptx"
    ]
    
    for pattern in items_to_copy:
        for item in glob.glob(pattern):
            dest = os.path.join(upload_dir, item)
            print(f"   - Adding {item}...")
            if os.path.isdir(item):
                shutil.copytree(item, dest, dirs_exist_ok=True)
            else:
                shutil.copy2(item, dest)

    # B. 特殊处理 results 目录 (只传图片和 logs,不传所有 checkpoint 文件夹)
    results_dest = os.path.join(upload_dir, "results")
    os.makedirs(results_dest, exist_ok=True)
    
    # 复制图片
    if os.path.exists("results/images"):
        shutil.copytree("results/images", os.path.join(results_dest, "images"), dirs_exist_ok=True)
    # 复制 txt metrics
    for txt in glob.glob("results/*.txt"):
        shutil.copy2(txt, results_dest)
        
    # C. 提取最新模型权重到根目录 (方便直接加载)
    candidates = glob.glob(os.path.join(Config.RESULTS_DIR, "checkpoint-*"))
    candidates = [c for c in candidates if os.path.isdir(c)]
    
    if candidates:
        candidates.sort(key=os.path.getmtime)
        latest_ckpt = candidates[-1]
        print(f"✅ 提取最新模型权重: {latest_ckpt} -> 根目录")
        
        model_files = ["config.json", "model.safetensors", "pytorch_model.bin", "tokenizer.json", "vocab.txt", "tokenizer_config.json", "special_tokens_map.json"]
        
        for fname in os.listdir(latest_ckpt):
            if fname in model_files or fname.endswith(".safetensors") or fname.endswith(".bin"):
                 shutil.copy2(os.path.join(latest_ckpt, fname), os.path.join(upload_dir, fname))
    else:
        print("⚠️ 未找到 Checkpoint,仅上传代码和数据。")

    # 2. 执行上传
    print(f"\n⬆️ 正在上传所有文件到 https://huggingface.co/{target_repo_id}")
    create_repo(repo_id=target_repo_id, repo_type="model", exist_ok=True)
    
    upload_folder(
        folder_path=upload_dir,
        repo_id=target_repo_id,
        repo_type="model"
    )
    
    # Cleanup
    shutil.rmtree(upload_dir)
    print("🎉 上传完毕!")

if __name__ == "__main__":
    current_dir = os.path.dirname(os.path.abspath(__file__))
    parent_dir = os.path.dirname(current_dir)
    sys.path.append(parent_dir)
    main()