Upload folder using huggingface_hub
Browse files- data/processed_dataset/dataset_dict.json +1 -0
- data/processed_dataset/test/data-00000-of-00001.arrow +3 -0
- data/processed_dataset/test/dataset_info.json +33 -0
- data/processed_dataset/test/state.json +18 -0
- data/processed_dataset/train/data-00000-of-00001.arrow +3 -0
- data/processed_dataset/train/dataset_info.json +33 -0
- data/processed_dataset/train/state.json +18 -0
- results/images/data_distribution_2025-12-18_15-27-36.png +0 -0
- results/images/metrics_2025-12-18_15-06-59.txt +4 -0
- results/images/metrics_2025-12-18_15-19-18.txt +4 -0
- results/images/metrics_2025-12-18_15-25-36.txt +4 -0
- results/images/metrics_2025-12-18_15-27-41.txt +4 -0
- results/images/training_metrics_2025-12-18_15-06-59.png +0 -0
- results/images/training_metrics_2025-12-18_15-19-18.png +0 -0
- results/images/training_metrics_2025-12-18_15-25-36.png +0 -0
- results/images/training_metrics_2025-12-18_15-27-41.png +0 -0
- src/upload_to_hf.py +34 -36
data/processed_dataset/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train", "test"]}
|
data/processed_dataset/test/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a4590634c3f9bb97b2fb2047cffcbdd00122eb564e6563b8ecb9673a7aa881b
|
| 3 |
+
size 44377040
|
data/processed_dataset/test/dataset_info.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"labels": {
|
| 6 |
+
"dtype": "int64",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"input_ids": {
|
| 10 |
+
"feature": {
|
| 11 |
+
"dtype": "int32",
|
| 12 |
+
"_type": "Value"
|
| 13 |
+
},
|
| 14 |
+
"_type": "List"
|
| 15 |
+
},
|
| 16 |
+
"token_type_ids": {
|
| 17 |
+
"feature": {
|
| 18 |
+
"dtype": "int8",
|
| 19 |
+
"_type": "Value"
|
| 20 |
+
},
|
| 21 |
+
"_type": "List"
|
| 22 |
+
},
|
| 23 |
+
"attention_mask": {
|
| 24 |
+
"feature": {
|
| 25 |
+
"dtype": "int8",
|
| 26 |
+
"_type": "Value"
|
| 27 |
+
},
|
| 28 |
+
"_type": "List"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"homepage": "",
|
| 32 |
+
"license": ""
|
| 33 |
+
}
|
data/processed_dataset/test/state.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "e68a6594db5a153c",
|
| 8 |
+
"_format_columns": [
|
| 9 |
+
"attention_mask",
|
| 10 |
+
"input_ids",
|
| 11 |
+
"labels",
|
| 12 |
+
"token_type_ids"
|
| 13 |
+
],
|
| 14 |
+
"_format_kwargs": {},
|
| 15 |
+
"_format_type": null,
|
| 16 |
+
"_output_all_columns": false,
|
| 17 |
+
"_split": null
|
| 18 |
+
}
|
data/processed_dataset/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9f4e04f36632cfd2ae601cca3c4541ed2a2987279e320e5b6c544067f92871f
|
| 3 |
+
size 399379240
|
data/processed_dataset/train/dataset_info.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"labels": {
|
| 6 |
+
"dtype": "int64",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"input_ids": {
|
| 10 |
+
"feature": {
|
| 11 |
+
"dtype": "int32",
|
| 12 |
+
"_type": "Value"
|
| 13 |
+
},
|
| 14 |
+
"_type": "List"
|
| 15 |
+
},
|
| 16 |
+
"token_type_ids": {
|
| 17 |
+
"feature": {
|
| 18 |
+
"dtype": "int8",
|
| 19 |
+
"_type": "Value"
|
| 20 |
+
},
|
| 21 |
+
"_type": "List"
|
| 22 |
+
},
|
| 23 |
+
"attention_mask": {
|
| 24 |
+
"feature": {
|
| 25 |
+
"dtype": "int8",
|
| 26 |
+
"_type": "Value"
|
| 27 |
+
},
|
| 28 |
+
"_type": "List"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"homepage": "",
|
| 32 |
+
"license": ""
|
| 33 |
+
}
|
data/processed_dataset/train/state.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "c52fbe1364b1bc3b",
|
| 8 |
+
"_format_columns": [
|
| 9 |
+
"attention_mask",
|
| 10 |
+
"input_ids",
|
| 11 |
+
"labels",
|
| 12 |
+
"token_type_ids"
|
| 13 |
+
],
|
| 14 |
+
"_format_kwargs": {},
|
| 15 |
+
"_format_type": null,
|
| 16 |
+
"_output_all_columns": false,
|
| 17 |
+
"_split": null
|
| 18 |
+
}
|
results/images/data_distribution_2025-12-18_15-27-36.png
ADDED
|
results/images/metrics_2025-12-18_15-06-59.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Timestamp: 2025-12-18_15-06-59
|
| 2 |
+
Final Validation Accuracy: 0.7683
|
| 3 |
+
Final Validation Loss: 0.5479554533958435
|
| 4 |
+
Plot saved to: training_metrics_2025-12-18_15-06-59.png
|
results/images/metrics_2025-12-18_15-19-18.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Timestamp: 2025-12-18_15-19-18
|
| 2 |
+
Final Validation Accuracy: 0.7719
|
| 3 |
+
Final Validation Loss: 0.538950502872467
|
| 4 |
+
Plot saved to: training_metrics_2025-12-18_15-19-18.png
|
results/images/metrics_2025-12-18_15-25-36.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Timestamp: 2025-12-18_15-25-36
|
| 2 |
+
Final Validation Accuracy: 0.7719
|
| 3 |
+
Final Validation Loss: 0.538950502872467
|
| 4 |
+
Plot saved to: training_metrics_2025-12-18_15-25-36.png
|
results/images/metrics_2025-12-18_15-27-41.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Timestamp: 2025-12-18_15-27-41
|
| 2 |
+
Final Validation Accuracy: 0.7746
|
| 3 |
+
Final Validation Loss: 0.5276312828063965
|
| 4 |
+
Plot saved to: training_metrics_2025-12-18_15-27-41.png
|
results/images/training_metrics_2025-12-18_15-06-59.png
ADDED
|
results/images/training_metrics_2025-12-18_15-19-18.png
ADDED
|
results/images/training_metrics_2025-12-18_15-25-36.png
ADDED
|
results/images/training_metrics_2025-12-18_15-27-41.png
ADDED
|
src/upload_to_hf.py
CHANGED
|
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi, create_repo, upload_folder
|
|
| 6 |
from config import Config
|
| 7 |
|
| 8 |
def main():
|
| 9 |
-
print("🚀
|
| 10 |
|
| 11 |
api = HfApi()
|
| 12 |
try:
|
|
@@ -17,77 +17,75 @@ def main():
|
|
| 17 |
print("❌ Please login first.")
|
| 18 |
return
|
| 19 |
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
-
# 1. 准备临时上传目录
|
| 23 |
-
# create a temp dir to combine everything before uploading to ensure structure is perfect
|
| 24 |
upload_dir = "hf_upload_staging"
|
| 25 |
if os.path.exists(upload_dir):
|
| 26 |
shutil.rmtree(upload_dir)
|
| 27 |
os.makedirs(upload_dir)
|
| 28 |
|
| 29 |
-
print(f"📦
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
# A. Copy Project Code (src, notebook, etc)
|
| 32 |
-
# We want these at the root
|
| 33 |
-
items_to_copy = ["src", "notebooks", "docs", "demo", "README.md", "requirements.txt", "*.pptx"]
|
| 34 |
for pattern in items_to_copy:
|
| 35 |
for item in glob.glob(pattern):
|
| 36 |
dest = os.path.join(upload_dir, item)
|
|
|
|
| 37 |
if os.path.isdir(item):
|
| 38 |
shutil.copytree(item, dest, dirs_exist_ok=True)
|
| 39 |
else:
|
| 40 |
shutil.copy2(item, dest)
|
| 41 |
-
|
| 42 |
-
# B.
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
candidates = glob.glob(os.path.join(Config.RESULTS_DIR, "checkpoint-*"))
|
| 45 |
-
# Filter out zip files if any
|
| 46 |
candidates = [c for c in candidates if os.path.isdir(c)]
|
| 47 |
|
| 48 |
if candidates:
|
| 49 |
candidates.sort(key=os.path.getmtime)
|
| 50 |
latest_ckpt = candidates[-1]
|
| 51 |
-
print(f"✅
|
| 52 |
|
| 53 |
-
# Files to copy from checkpoint to root
|
| 54 |
model_files = ["config.json", "model.safetensors", "pytorch_model.bin", "tokenizer.json", "vocab.txt", "tokenizer_config.json", "special_tokens_map.json"]
|
| 55 |
|
| 56 |
-
found_weights = False
|
| 57 |
for fname in os.listdir(latest_ckpt):
|
| 58 |
if fname in model_files or fname.endswith(".safetensors") or fname.endswith(".bin"):
|
| 59 |
-
# Copy to root of staging
|
| 60 |
shutil.copy2(os.path.join(latest_ckpt, fname), os.path.join(upload_dir, fname))
|
| 61 |
-
if "model" in fname or "pytorch" in fname:
|
| 62 |
-
found_weights = True
|
| 63 |
-
|
| 64 |
-
if not found_weights:
|
| 65 |
-
print("⚠️ WARNING: No model weights (.bin or .safetensors) found in checkpoint!")
|
| 66 |
else:
|
| 67 |
-
print("
|
| 68 |
|
| 69 |
-
# 2.
|
| 70 |
-
print(f"\n⬆️
|
| 71 |
-
create_repo(repo_id=
|
| 72 |
|
| 73 |
upload_folder(
|
| 74 |
folder_path=upload_dir,
|
| 75 |
-
repo_id=
|
| 76 |
repo_type="model"
|
| 77 |
)
|
| 78 |
|
| 79 |
# Cleanup
|
| 80 |
shutil.rmtree(upload_dir)
|
| 81 |
-
print("🎉
|
| 82 |
-
|
| 83 |
-
# Check dataset
|
| 84 |
-
dataset_repo_id = f"{username}/sentiment-analysis-dataset-processed"
|
| 85 |
-
data_path = os.path.join(Config.DATA_DIR, "processed_dataset")
|
| 86 |
-
if os.path.exists(data_path):
|
| 87 |
-
print(f"\n⬆️ Uploading dataset to https://huggingface.co/datasets/{dataset_repo_id}")
|
| 88 |
-
create_repo(repo_id=dataset_repo_id, repo_type="dataset", exist_ok=True)
|
| 89 |
-
upload_folder(folder_path=data_path, repo_id=dataset_repo_id, repo_type="dataset")
|
| 90 |
-
|
| 91 |
if __name__ == "__main__":
|
| 92 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 93 |
parent_dir = os.path.dirname(current_dir)
|
|
|
|
| 6 |
from config import Config
|
| 7 |
|
| 8 |
def main():
|
| 9 |
+
print("🚀 开始全量上传 (All-in-One) 到 robot4/sentiment-analysis-bert-finetuned ...")
|
| 10 |
|
| 11 |
api = HfApi()
|
| 12 |
try:
|
|
|
|
| 17 |
print("❌ Please login first.")
|
| 18 |
return
|
| 19 |
|
| 20 |
+
# 目标仓库 (用户指定)
|
| 21 |
+
target_repo_id = "robot4/sentiment-analysis-bert-finetuned"
|
| 22 |
|
| 23 |
+
# 1. 准备临时上传目录
|
|
|
|
| 24 |
upload_dir = "hf_upload_staging"
|
| 25 |
if os.path.exists(upload_dir):
|
| 26 |
shutil.rmtree(upload_dir)
|
| 27 |
os.makedirs(upload_dir)
|
| 28 |
|
| 29 |
+
print(f"📦 正在打包所有文件到 {upload_dir}...")
|
| 30 |
+
|
| 31 |
+
# A. 复制项目代码和资源
|
| 32 |
+
# 包含了 data, src, docs, notebooks, demo, results/images 等
|
| 33 |
+
items_to_copy = [
|
| 34 |
+
"src", "notebooks", "docs", "demo", "data",
|
| 35 |
+
"README.md", "requirements.txt", "*.pptx"
|
| 36 |
+
]
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
for pattern in items_to_copy:
|
| 39 |
for item in glob.glob(pattern):
|
| 40 |
dest = os.path.join(upload_dir, item)
|
| 41 |
+
print(f" - Adding {item}...")
|
| 42 |
if os.path.isdir(item):
|
| 43 |
shutil.copytree(item, dest, dirs_exist_ok=True)
|
| 44 |
else:
|
| 45 |
shutil.copy2(item, dest)
|
| 46 |
+
|
| 47 |
+
# B. 特殊处理 results 目录 (只传图片和 logs,不传所有 checkpoint 文件夹)
|
| 48 |
+
results_dest = os.path.join(upload_dir, "results")
|
| 49 |
+
os.makedirs(results_dest, exist_ok=True)
|
| 50 |
+
|
| 51 |
+
# 复制图片
|
| 52 |
+
if os.path.exists("results/images"):
|
| 53 |
+
shutil.copytree("results/images", os.path.join(results_dest, "images"), dirs_exist_ok=True)
|
| 54 |
+
# 复制 txt metrics
|
| 55 |
+
for txt in glob.glob("results/*.txt"):
|
| 56 |
+
shutil.copy2(txt, results_dest)
|
| 57 |
+
|
| 58 |
+
# C. 提取最新模型权重到根目录 (方便直接加载)
|
| 59 |
candidates = glob.glob(os.path.join(Config.RESULTS_DIR, "checkpoint-*"))
|
|
|
|
| 60 |
candidates = [c for c in candidates if os.path.isdir(c)]
|
| 61 |
|
| 62 |
if candidates:
|
| 63 |
candidates.sort(key=os.path.getmtime)
|
| 64 |
latest_ckpt = candidates[-1]
|
| 65 |
+
print(f"✅ 提取最新模型权重: {latest_ckpt} -> 根目录")
|
| 66 |
|
|
|
|
| 67 |
model_files = ["config.json", "model.safetensors", "pytorch_model.bin", "tokenizer.json", "vocab.txt", "tokenizer_config.json", "special_tokens_map.json"]
|
| 68 |
|
|
|
|
| 69 |
for fname in os.listdir(latest_ckpt):
|
| 70 |
if fname in model_files or fname.endswith(".safetensors") or fname.endswith(".bin"):
|
|
|
|
| 71 |
shutil.copy2(os.path.join(latest_ckpt, fname), os.path.join(upload_dir, fname))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
else:
|
| 73 |
+
print("⚠️ 未找到 Checkpoint,仅上传代码和数据。")
|
| 74 |
|
| 75 |
+
# 2. 执行上传
|
| 76 |
+
print(f"\n⬆️ 正在上传所有文件到 https://huggingface.co/{target_repo_id}")
|
| 77 |
+
create_repo(repo_id=target_repo_id, repo_type="model", exist_ok=True)
|
| 78 |
|
| 79 |
upload_folder(
|
| 80 |
folder_path=upload_dir,
|
| 81 |
+
repo_id=target_repo_id,
|
| 82 |
repo_type="model"
|
| 83 |
)
|
| 84 |
|
| 85 |
# Cleanup
|
| 86 |
shutil.rmtree(upload_dir)
|
| 87 |
+
print("🎉 上传完毕!")
|
| 88 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
if __name__ == "__main__":
|
| 90 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 91 |
parent_dir = os.path.dirname(current_dir)
|