File size: 3,311 Bytes

bda2946
 
 
89f9a3e
bda2946
 
 
 
7c9ceb3
bda2946
 
 
 
 
89f9a3e
 
 
bda2946
 
7c9ceb3
 
bda2946
7c9ceb3
89f9a3e
 
 
 
 
7c9ceb3
 
 
 
 
 
 
 
89f9a3e
 
 
 
7c9ceb3
89f9a3e
 
 
 
7c9ceb3
 
 
 
 
 
 
 
 
 
 
 
 
bda2946
89f9a3e
 
bda2946
 
 
7c9ceb3
89f9a3e
 
 
 
 
 
bda2946
7c9ceb3
bda2946
7c9ceb3
 
 
89f9a3e
bda2946
89f9a3e
7c9ceb3
89f9a3e
bda2946
89f9a3e
 
 
7c9ceb3
 
bda2946

import os
import sys
import glob
import shutil
from huggingface_hub import HfApi, create_repo, upload_folder
from config import Config

def main():
    print("🚀 开始全量上传 (All-in-One) 到 robot4/sentiment-analysis-bert-finetuned ...")
    
    api = HfApi()
    try:
        user_info = api.whoami()
        username = user_info['name']
        print(f"✅ User: {username}")
    except:
        print("❌ Please login first.")
        return

    # 目标仓库 (用户指定)
    target_repo_id = "robot4/sentiment-analysis-bert-finetuned"
    
    # 1. 准备临时上传目录
    upload_dir = "hf_upload_staging"
    if os.path.exists(upload_dir):
        shutil.rmtree(upload_dir)
    os.makedirs(upload_dir)
    
    print(f"📦 正在打包所有文件到 {upload_dir}...")
    
    # A. 复制项目代码和资源
    # 包含了 data, src, docs, notebooks, demo, results/images 等
    items_to_copy = [
        "src", "notebooks", "docs", "demo", "data", 
        "README.md", "requirements.txt", "*.pptx"
    ]
    
    for pattern in items_to_copy:
        for item in glob.glob(pattern):
            dest = os.path.join(upload_dir, item)
            print(f"   - Adding {item}...")
            if os.path.isdir(item):
                shutil.copytree(item, dest, dirs_exist_ok=True)
            else:
                shutil.copy2(item, dest)

    # B. 特殊处理 results 目录 (只传图片和 logs，不传所有 checkpoint 文件夹)
    results_dest = os.path.join(upload_dir, "results")
    os.makedirs(results_dest, exist_ok=True)
    
    # 复制图片
    if os.path.exists("results/images"):
        shutil.copytree("results/images", os.path.join(results_dest, "images"), dirs_exist_ok=True)
    # 复制 txt metrics
    for txt in glob.glob("results/*.txt"):
        shutil.copy2(txt, results_dest)
        
    # C. 提取最新模型权重到根目录 (方便直接加载)
    candidates = glob.glob(os.path.join(Config.RESULTS_DIR, "checkpoint-*"))
    candidates = [c for c in candidates if os.path.isdir(c)]
    
    if candidates:
        candidates.sort(key=os.path.getmtime)
        latest_ckpt = candidates[-1]
        print(f"✅ 提取最新模型权重: {latest_ckpt} -> 根目录")
        
        model_files = ["config.json", "model.safetensors", "pytorch_model.bin", "tokenizer.json", "vocab.txt", "tokenizer_config.json", "special_tokens_map.json"]
        
        for fname in os.listdir(latest_ckpt):
            if fname in model_files or fname.endswith(".safetensors") or fname.endswith(".bin"):
                 shutil.copy2(os.path.join(latest_ckpt, fname), os.path.join(upload_dir, fname))
    else:
        print("⚠️ 未找到 Checkpoint，仅上传代码和数据。")

    # 2. 执行上传
    print(f"\n⬆️ 正在上传所有文件到 https://huggingface.co/{target_repo_id}")
    create_repo(repo_id=target_repo_id, repo_type="model", exist_ok=True)
    
    upload_folder(
        folder_path=upload_dir,
        repo_id=target_repo_id,
        repo_type="model"
    )
    
    # Cleanup
    shutil.rmtree(upload_dir)
    print("🎉 上传完毕！")

if __name__ == "__main__":
    current_dir = os.path.dirname(os.path.abspath(__file__))
    parent_dir = os.path.dirname(current_dir)
    sys.path.append(parent_dir)
    main()