Naphula commited on
Commit
fac182a
·
verified ·
1 Parent(s): 4b427d3

Upload json_reverter.py

Browse files
Files changed (1) hide show
  1. json_reverter.py +87 -0
json_reverter.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import argparse
4
+ from colorama import init, Fore, Style
5
+ from tqdm import tqdm
6
+
7
+ init()
8
+
9
+ TARGET_FILES = [
10
+ "config.json",
11
+ "tokenizer.json",
12
+ "special_tokens_map.json",
13
+ "tokenizer_config.json",
14
+ "generation_config.json"
15
+ ]
16
+
17
+ def download_original(repo_id, filename):
18
+ """Downloads a specific file from the HF main branch."""
19
+ url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
20
+ try:
21
+ response = requests.get(url, timeout=10)
22
+ if response.status_code == 200:
23
+ return response.text
24
+ else:
25
+ return None
26
+ except Exception:
27
+ return None
28
+
29
+ def main():
30
+ parser = argparse.ArgumentParser(description="Revert modified JSON files to their HF originals.")
31
+ parser.add_argument("base_dir", help="Directory to scan (e.g., B:\\12B)")
32
+ args = parser.parse_args()
33
+
34
+ if not os.path.exists(args.base_dir):
35
+ print(f"{Fore.RED}Error: Path {args.base_dir} does not exist.{Style.RESET_ALL}")
36
+ return
37
+
38
+ # Get list of folders starting with !models--
39
+ model_folders = [f for f in os.listdir(args.base_dir) if f.startswith("!models--")]
40
+
41
+ print(f"{Fore.CYAN}--- JSON REVERTER: RESTORING ORIGINALS ---{Style.RESET_ALL}")
42
+ print(f"Found {len(model_folders)} potential model folders.\n")
43
+
44
+ for folder in tqdm(model_folders, desc="Processing Models"):
45
+ # Parse repo ID: !models--author--model -> author/model
46
+ # We split by '--' and skip the first part (!models)
47
+ parts = folder.split("--")
48
+ if len(parts) < 3:
49
+ continue
50
+
51
+ author = parts[1]
52
+ model_name = "--".join(parts[2:]) # Rejoin in case model name has --
53
+ repo_id = f"{author}/{model_name}"
54
+
55
+ full_path = os.path.join(args.base_dir, folder)
56
+
57
+ # Check for 'fixed' subdirectory (some of your models had this)
58
+ target_dir = full_path
59
+ if os.path.exists(os.path.join(full_path, "fixed")):
60
+ target_dir = os.path.join(full_path, "fixed")
61
+
62
+ tqdm.write(f"\n{Fore.YELLOW}Restoring: {repo_id}{Style.RESET_ALL}")
63
+
64
+ for filename in TARGET_FILES:
65
+ original_content = download_original(repo_id, filename)
66
+
67
+ if original_content:
68
+ file_path = os.path.join(target_dir, filename)
69
+
70
+ # Backup existing if it exists
71
+ if os.path.exists(file_path):
72
+ os.replace(file_path, file_path + ".bak")
73
+
74
+ # Write original
75
+ with open(file_path, "w", encoding="utf-8") as f:
76
+ f.write(original_content)
77
+ tqdm.write(f" {Fore.GREEN}✓ Restored {filename}{Style.RESET_ALL}")
78
+ else:
79
+ # Some models don't have all 4 files (e.g. missing tokenizer.json)
80
+ tqdm.write(f" {Fore.LIGHTBLACK_EX}- Skipped {filename} (Not found on HF){Style.RESET_ALL}")
81
+
82
+ print(f"\n{Fore.CYAN}--- REVERSION COMPLETE ---{Style.RESET_ALL}")
83
+ print("All modified JSONs have been replaced with originals (Backups saved as .bak).")
84
+ print("You can now run eos_scanner.py to start fresh.")
85
+
86
+ if __name__ == "__main__":
87
+ main()