robot4 commited on
Commit
7c9ceb3
·
verified ·
1 Parent(s): 89f9a3e

Upload folder using huggingface_hub

Browse files
data/processed_dataset/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "test"]}
data/processed_dataset/test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4590634c3f9bb97b2fb2047cffcbdd00122eb564e6563b8ecb9673a7aa881b
3
+ size 44377040
data/processed_dataset/test/dataset_info.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "labels": {
6
+ "dtype": "int64",
7
+ "_type": "Value"
8
+ },
9
+ "input_ids": {
10
+ "feature": {
11
+ "dtype": "int32",
12
+ "_type": "Value"
13
+ },
14
+ "_type": "List"
15
+ },
16
+ "token_type_ids": {
17
+ "feature": {
18
+ "dtype": "int8",
19
+ "_type": "Value"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ }
30
+ },
31
+ "homepage": "",
32
+ "license": ""
33
+ }
data/processed_dataset/test/state.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "e68a6594db5a153c",
8
+ "_format_columns": [
9
+ "attention_mask",
10
+ "input_ids",
11
+ "labels",
12
+ "token_type_ids"
13
+ ],
14
+ "_format_kwargs": {},
15
+ "_format_type": null,
16
+ "_output_all_columns": false,
17
+ "_split": null
18
+ }
data/processed_dataset/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f4e04f36632cfd2ae601cca3c4541ed2a2987279e320e5b6c544067f92871f
3
+ size 399379240
data/processed_dataset/train/dataset_info.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "labels": {
6
+ "dtype": "int64",
7
+ "_type": "Value"
8
+ },
9
+ "input_ids": {
10
+ "feature": {
11
+ "dtype": "int32",
12
+ "_type": "Value"
13
+ },
14
+ "_type": "List"
15
+ },
16
+ "token_type_ids": {
17
+ "feature": {
18
+ "dtype": "int8",
19
+ "_type": "Value"
20
+ },
21
+ "_type": "List"
22
+ },
23
+ "attention_mask": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "List"
29
+ }
30
+ },
31
+ "homepage": "",
32
+ "license": ""
33
+ }
data/processed_dataset/train/state.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "c52fbe1364b1bc3b",
8
+ "_format_columns": [
9
+ "attention_mask",
10
+ "input_ids",
11
+ "labels",
12
+ "token_type_ids"
13
+ ],
14
+ "_format_kwargs": {},
15
+ "_format_type": null,
16
+ "_output_all_columns": false,
17
+ "_split": null
18
+ }
results/images/data_distribution_2025-12-18_15-27-36.png ADDED
results/images/metrics_2025-12-18_15-06-59.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Timestamp: 2025-12-18_15-06-59
2
+ Final Validation Accuracy: 0.7683
3
+ Final Validation Loss: 0.5479554533958435
4
+ Plot saved to: training_metrics_2025-12-18_15-06-59.png
results/images/metrics_2025-12-18_15-19-18.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Timestamp: 2025-12-18_15-19-18
2
+ Final Validation Accuracy: 0.7719
3
+ Final Validation Loss: 0.538950502872467
4
+ Plot saved to: training_metrics_2025-12-18_15-19-18.png
results/images/metrics_2025-12-18_15-25-36.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Timestamp: 2025-12-18_15-25-36
2
+ Final Validation Accuracy: 0.7719
3
+ Final Validation Loss: 0.538950502872467
4
+ Plot saved to: training_metrics_2025-12-18_15-25-36.png
results/images/metrics_2025-12-18_15-27-41.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Timestamp: 2025-12-18_15-27-41
2
+ Final Validation Accuracy: 0.7746
3
+ Final Validation Loss: 0.5276312828063965
4
+ Plot saved to: training_metrics_2025-12-18_15-27-41.png
results/images/training_metrics_2025-12-18_15-06-59.png ADDED
results/images/training_metrics_2025-12-18_15-19-18.png ADDED
results/images/training_metrics_2025-12-18_15-25-36.png ADDED
results/images/training_metrics_2025-12-18_15-27-41.png ADDED
src/upload_to_hf.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi, create_repo, upload_folder
6
  from config import Config
7
 
8
  def main():
9
- print("🚀 开始重新上传 (Code + Model Combined)...")
10
 
11
  api = HfApi()
12
  try:
@@ -17,77 +17,75 @@ def main():
17
  print("❌ Please login first.")
18
  return
19
 
20
- model_repo_id = f"{username}/sentiment-analysis-bert-finetuned"
 
21
 
22
- # 1. 准备临时上传目录 (Merge Strategy)
23
- # create a temp dir to combine everything before uploading to ensure structure is perfect
24
  upload_dir = "hf_upload_staging"
25
  if os.path.exists(upload_dir):
26
  shutil.rmtree(upload_dir)
27
  os.makedirs(upload_dir)
28
 
29
- print(f"📦 Staging files to {upload_dir}...")
 
 
 
 
 
 
 
30
 
31
- # A. Copy Project Code (src, notebook, etc)
32
- # We want these at the root
33
- items_to_copy = ["src", "notebooks", "docs", "demo", "README.md", "requirements.txt", "*.pptx"]
34
  for pattern in items_to_copy:
35
  for item in glob.glob(pattern):
36
  dest = os.path.join(upload_dir, item)
 
37
  if os.path.isdir(item):
38
  shutil.copytree(item, dest, dirs_exist_ok=True)
39
  else:
40
  shutil.copy2(item, dest)
41
-
42
- # B. Copy Model Weights (Flattened to root)
43
- # Find latest checkpoint
 
 
 
 
 
 
 
 
 
 
44
  candidates = glob.glob(os.path.join(Config.RESULTS_DIR, "checkpoint-*"))
45
- # Filter out zip files if any
46
  candidates = [c for c in candidates if os.path.isdir(c)]
47
 
48
  if candidates:
49
  candidates.sort(key=os.path.getmtime)
50
  latest_ckpt = candidates[-1]
51
- print(f"✅ Found latest checkpoint: {latest_ckpt}")
52
 
53
- # Files to copy from checkpoint to root
54
  model_files = ["config.json", "model.safetensors", "pytorch_model.bin", "tokenizer.json", "vocab.txt", "tokenizer_config.json", "special_tokens_map.json"]
55
 
56
- found_weights = False
57
  for fname in os.listdir(latest_ckpt):
58
  if fname in model_files or fname.endswith(".safetensors") or fname.endswith(".bin"):
59
- # Copy to root of staging
60
  shutil.copy2(os.path.join(latest_ckpt, fname), os.path.join(upload_dir, fname))
61
- if "model" in fname or "pytorch" in fname:
62
- found_weights = True
63
-
64
- if not found_weights:
65
- print("⚠️ WARNING: No model weights (.bin or .safetensors) found in checkpoint!")
66
  else:
67
- print(" No checkpoints found in results/!")
68
 
69
- # 2. Upload the Staged Directory
70
- print(f"\n⬆️ Uploading entire {upload_dir} to https://huggingface.co/{model_repo_id}")
71
- create_repo(repo_id=model_repo_id, repo_type="model", exist_ok=True)
72
 
73
  upload_folder(
74
  folder_path=upload_dir,
75
- repo_id=model_repo_id,
76
  repo_type="model"
77
  )
78
 
79
  # Cleanup
80
  shutil.rmtree(upload_dir)
81
- print("🎉 Done! Model and Code are now together in the repo root.")
82
-
83
- # Check dataset
84
- dataset_repo_id = f"{username}/sentiment-analysis-dataset-processed"
85
- data_path = os.path.join(Config.DATA_DIR, "processed_dataset")
86
- if os.path.exists(data_path):
87
- print(f"\n⬆️ Uploading dataset to https://huggingface.co/datasets/{dataset_repo_id}")
88
- create_repo(repo_id=dataset_repo_id, repo_type="dataset", exist_ok=True)
89
- upload_folder(folder_path=data_path, repo_id=dataset_repo_id, repo_type="dataset")
90
-
91
  if __name__ == "__main__":
92
  current_dir = os.path.dirname(os.path.abspath(__file__))
93
  parent_dir = os.path.dirname(current_dir)
 
6
  from config import Config
7
 
8
  def main():
9
+ print("🚀 开始全量上传 (All-in-One) robot4/sentiment-analysis-bert-finetuned ...")
10
 
11
  api = HfApi()
12
  try:
 
17
  print("❌ Please login first.")
18
  return
19
 
20
+ # 目标仓库 (用户指定)
21
+ target_repo_id = "robot4/sentiment-analysis-bert-finetuned"
22
 
23
+ # 1. 准备临时上传目录
 
24
  upload_dir = "hf_upload_staging"
25
  if os.path.exists(upload_dir):
26
  shutil.rmtree(upload_dir)
27
  os.makedirs(upload_dir)
28
 
29
+ print(f"📦 正在打包所有文件到 {upload_dir}...")
30
+
31
+ # A. 复制项目代码和资源
32
+ # 包含了 data, src, docs, notebooks, demo, results/images 等
33
+ items_to_copy = [
34
+ "src", "notebooks", "docs", "demo", "data",
35
+ "README.md", "requirements.txt", "*.pptx"
36
+ ]
37
 
 
 
 
38
  for pattern in items_to_copy:
39
  for item in glob.glob(pattern):
40
  dest = os.path.join(upload_dir, item)
41
+ print(f" - Adding {item}...")
42
  if os.path.isdir(item):
43
  shutil.copytree(item, dest, dirs_exist_ok=True)
44
  else:
45
  shutil.copy2(item, dest)
46
+
47
+ # B. 特殊处理 results 目录 (只传图片和 logs,不传所有 checkpoint 文件夹)
48
+ results_dest = os.path.join(upload_dir, "results")
49
+ os.makedirs(results_dest, exist_ok=True)
50
+
51
+ # 复制图片
52
+ if os.path.exists("results/images"):
53
+ shutil.copytree("results/images", os.path.join(results_dest, "images"), dirs_exist_ok=True)
54
+ # 复制 txt metrics
55
+ for txt in glob.glob("results/*.txt"):
56
+ shutil.copy2(txt, results_dest)
57
+
58
+ # C. 提取最新模型权重到根目录 (方便直接加载)
59
  candidates = glob.glob(os.path.join(Config.RESULTS_DIR, "checkpoint-*"))
 
60
  candidates = [c for c in candidates if os.path.isdir(c)]
61
 
62
  if candidates:
63
  candidates.sort(key=os.path.getmtime)
64
  latest_ckpt = candidates[-1]
65
+ print(f"✅ 提取最新模型权重: {latest_ckpt} -> 根目录")
66
 
 
67
  model_files = ["config.json", "model.safetensors", "pytorch_model.bin", "tokenizer.json", "vocab.txt", "tokenizer_config.json", "special_tokens_map.json"]
68
 
 
69
  for fname in os.listdir(latest_ckpt):
70
  if fname in model_files or fname.endswith(".safetensors") or fname.endswith(".bin"):
 
71
  shutil.copy2(os.path.join(latest_ckpt, fname), os.path.join(upload_dir, fname))
 
 
 
 
 
72
  else:
73
+ print("⚠️ 未找到 Checkpoint,仅上传代码和数据。")
74
 
75
+ # 2. 执行上传
76
+ print(f"\n⬆️ 正在上传所有文件到 https://huggingface.co/{target_repo_id}")
77
+ create_repo(repo_id=target_repo_id, repo_type="model", exist_ok=True)
78
 
79
  upload_folder(
80
  folder_path=upload_dir,
81
+ repo_id=target_repo_id,
82
  repo_type="model"
83
  )
84
 
85
  # Cleanup
86
  shutil.rmtree(upload_dir)
87
+ print("🎉 上传完毕!")
88
+
 
 
 
 
 
 
 
 
89
  if __name__ == "__main__":
90
  current_dir = os.path.dirname(os.path.abspath(__file__))
91
  parent_dir = os.path.dirname(current_dir)