syhrlhyn commited on
Commit
1cfad42
Β·
verified Β·
1 Parent(s): 2ff2874

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. aibys.model +3 -0
  2. aibys.vocab +0 -0
  3. push_tokenizer.py +21 -0
  4. tes.py +46 -0
aibys.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9a5e0fc0216a887c5aa8d8299fa59cadb7e72c51f3efc93c97a67587fd8058
3
+ size 770035
aibys.vocab ADDED
The diff for this file is too large to render. See raw diff
 
push_tokenizer.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+
3
+ api = HfApi()
4
+
5
+ # Ganti sesuai username kamu
6
+ REPO_ID = "syhrlhyn/aibys-tokenizer"
7
+ # Folder tempat file aibys.model dan aibys.vocab berada
8
+ FOLDER_TOKENIZER = "../tokenizer"
9
+
10
+ print(f"πŸš€ Memulai upload Tokenizer Aibys ke {REPO_ID}...")
11
+
12
+ try:
13
+ api.upload_folder(
14
+ folder_path=FOLDER_TOKENIZER,
15
+ repo_id=REPO_ID,
16
+ repo_type="model", # Tipe Model!
17
+ )
18
+ print("βœ… MANTAP! aibys.model dan aibys.vocab sudah online.")
19
+ print(f"Cek di: https://huggingface.co/{REPO_ID}")
20
+ except Exception as e:
21
+ print(f"❌ Waduh error: {e}")
tes.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sentencepiece as spm
2
+ import os
3
+
4
+ def interactive_test(model_path="aibys.model"):
5
+ if not os.path.exists(model_path):
6
+ print(f"❌ File model tidak ditemukan di: {model_path}")
7
+ return
8
+
9
+ # Load model
10
+ sp = spm.SentencePieceProcessor()
11
+ sp.load(model_path)
12
+
13
+ print("="*60)
14
+ print(" πŸ€– AIBYS TOKENIZER INTERACTIVE TESTER")
15
+ print(" Type 'exit' atau 'keluar' untuk stop")
16
+ print("="*60)
17
+ print(f"Vocab Size: {sp.vocab_size()}")
18
+
19
+ while True:
20
+ print("\n" + "-"*50)
21
+ text = input("πŸ“ Masukkan kalimat: ")
22
+
23
+ if text.lower() in ['exit', 'keluar', 'q']:
24
+ print("πŸ‘‹ Sampai jumpa, Syahril!")
25
+ break
26
+
27
+ if not text.strip():
28
+ continue
29
+
30
+ # Proses Tokenisasi
31
+ tokens = sp.encode_as_pieces(text)
32
+ ids = sp.encode_as_ids(text)
33
+
34
+ print(f"\nπŸ“Š Hasil Analisis:")
35
+ print(f" Input : {text}")
36
+ print(f" Tokens : {tokens}")
37
+ print(f" Token IDs : {ids}")
38
+ print(f" Count : {len(ids)} Token")
39
+
40
+ # Cek Efisiensi (Estimasi: makin sedikit token vs jumlah kata, makin bagus)
41
+ kata_count = len(text.split())
42
+ efisiensi = "SANGAT BAGUS" if len(ids) <= kata_count + 2 else "STANDAR"
43
+ print(f" Efisiensi : {efisiensi}")
44
+
45
+ if __name__ == "__main__":
46
+ interactive_test()