alyex commited on
Commit
2c0753f
Β·
verified Β·
1 Parent(s): 7ae51cc

Rename test.py to extract_rar.py

Browse files
Files changed (2) hide show
  1. extract_rar.py +128 -0
  2. test.py +0 -35
extract_rar.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Helper script to extract kiu_segment_metadata.json from RAR file
4
+
5
+ USAGE:
6
+ 1. Run locally: python extract_rar.py
7
+ 2. Upload the extracted JSON to your HF Space at: /data/kiu_segment_metadata/kiu_segment_metadata.json
8
+
9
+ OR add to your Space's Dockerfile:
10
+ RUN apt-get update && apt-get install -y unrar
11
+ """
12
+
13
+ import os
14
+ import shutil
15
+
16
+ try:
17
+ import rarfile
18
+ except ImportError:
19
+ print("Installing rarfile...")
20
+ os.system("pip install rarfile")
21
+ import rarfile
22
+
23
+ RAR_FILE = "kiu_segment_metadata.rar"
24
+ OUTPUT_DIR = "extracted"
25
+ OUTPUT_FILE = "kiu_segment_metadata.json"
26
+
27
+ def extract_with_unrar():
28
+ """Try using system unrar command"""
29
+ print("Attempting extraction with system unrar...")
30
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
31
+
32
+ result = os.system(f"unrar x -y {RAR_FILE} {OUTPUT_DIR}/")
33
+
34
+ if result == 0:
35
+ # Find the extracted JSON
36
+ for root, dirs, files in os.walk(OUTPUT_DIR):
37
+ for file in files:
38
+ if file.endswith('.json') and 'segment_metadata' in file:
39
+ src = os.path.join(root, file)
40
+ dst = OUTPUT_FILE
41
+ shutil.copy(src, dst)
42
+ print(f"βœ… Extracted to: {dst}")
43
+ print(f"πŸ“€ Upload this file to your HF Space")
44
+ return True
45
+
46
+ return False
47
+
48
+ def extract_with_rarfile():
49
+ """Try using rarfile library"""
50
+ print("Attempting extraction with rarfile library...")
51
+
52
+ try:
53
+ with rarfile.RarFile(RAR_FILE) as rf:
54
+ print(f"Files in RAR: {[f.filename for f in rf.infolist()]}")
55
+
56
+ # Find JSON file
57
+ json_file = None
58
+ for file_info in rf.infolist():
59
+ if file_info.filename.endswith('.json') and 'segment_metadata' in file_info.filename:
60
+ json_file = file_info.filename
61
+ break
62
+
63
+ if not json_file:
64
+ print("❌ No JSON file found in RAR")
65
+ return False
66
+
67
+ print(f"Found: {json_file}")
68
+
69
+ # Extract
70
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
71
+ rf.extract(json_file, OUTPUT_DIR)
72
+
73
+ # Copy to output location
74
+ src = os.path.join(OUTPUT_DIR, json_file)
75
+ shutil.copy(src, OUTPUT_FILE)
76
+
77
+ print(f"βœ… Extracted to: {OUTPUT_FILE}")
78
+ print(f"πŸ“€ Upload this file to your HF Space at: /data/kiu_segment_metadata/kiu_segment_metadata.json")
79
+ return True
80
+
81
+ except rarfile.RarCannotExec:
82
+ print("❌ unrar tool not found")
83
+ return False
84
+ except Exception as e:
85
+ print(f"❌ Error: {e}")
86
+ return False
87
+
88
+ def main():
89
+ if not os.path.exists(RAR_FILE):
90
+ print(f"❌ {RAR_FILE} not found in current directory")
91
+ print(f"Current directory: {os.getcwd()}")
92
+ print(f"Files: {os.listdir('.')}")
93
+ return
94
+
95
+ print(f"Found {RAR_FILE}")
96
+ print(f"Size: {os.path.getsize(RAR_FILE):,} bytes")
97
+ print()
98
+
99
+ # Try rarfile first
100
+ if extract_with_rarfile():
101
+ return
102
+
103
+ print()
104
+ print("Trying system unrar command...")
105
+ if extract_with_unrar():
106
+ return
107
+
108
+ print()
109
+ print("="*60)
110
+ print("❌ EXTRACTION FAILED")
111
+ print("="*60)
112
+ print()
113
+ print("SOLUTIONS:")
114
+ print()
115
+ print("1. Install unrar on your system:")
116
+ print(" Ubuntu/Debian: sudo apt-get install unrar")
117
+ print(" Mac: brew install unrar")
118
+ print(" Windows: Download from https://www.rarlab.com/")
119
+ print()
120
+ print("2. Extract manually using WinRAR/7-Zip")
121
+ print(f" Then upload kiu_segment_metadata.json to your HF Space")
122
+ print()
123
+ print("3. For HF Spaces, add to Dockerfile:")
124
+ print(" RUN apt-get update && apt-get install -y unrar")
125
+ print()
126
+
127
+ if __name__ == "__main__":
128
+ main()
test.py DELETED
@@ -1,35 +0,0 @@
1
- # test_download.py
2
- import os
3
- from huggingface_hub import hf_hub_download
4
-
5
- HF_TOKEN = os.getenv("HF_TOKEN")
6
- REPO_ID = "alyex/karnak_data"
7
-
8
- print("Testing download...")
9
-
10
- try:
11
- # Try with xet=True
12
- print("Trying with xet=True...")
13
- path = hf_hub_download(
14
- repo_id=REPO_ID,
15
- filename="kiu_segment_metadata.rar",
16
- token=HF_TOKEN,
17
- repo_type="dataset",
18
- xet=True
19
- )
20
- print(f"βœ… Success! File at: {path}")
21
- except Exception as e:
22
- print(f"❌ xet=True failed: {e}")
23
-
24
- try:
25
- # Try without xet
26
- print("\nTrying without xet...")
27
- path = hf_hub_download(
28
- repo_id=REPO_ID,
29
- filename="kiu_segment_metadata.rar",
30
- token=HF_TOKEN,
31
- repo_type="dataset"
32
- )
33
- print(f"βœ… Success! File at: {path}")
34
- except Exception as e2:
35
- print(f"❌ Standard download failed: {e2}")