hash-map commited on
Commit
d4c5dff
·
verified ·
1 Parent(s): c45c121

Upload 4 files

Browse files
Files changed (3) hide show
  1. got_qa_transformer.h5 +3 -0
  2. part5.h5 +3 -0
  3. usage.py +27 -0
got_qa_transformer.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad12318a9e0ef690db12505c2ab0332b0b34d4938501dd13719b7cb300fa3b9
3
+ size 517216632
part5.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87388001d309ce5442316aa5058fcce1a1d84a6f64dd5e05dd2e2b281fa0e4a9
3
+ size 517216632
usage.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+ import sentencepiece as spm
5
+ from tqdm import tqdm
6
+ MODEL_PREFIX = "icefire_spm"
7
+
8
+ sp = spm.SentencePieceProcessor(model_file=f"{MODEL_PREFIX}.model")
9
+ sample = "Daenerys Targaryen rides Drogon to Winterfell."
10
+ ids = sp.encode(sample)
11
+ pieces = sp.encode(sample, out_type=str)
12
+ print("\nTest encode:")
13
+ print(f" Text : {sample}")
14
+ print(f" IDs : {ids}")
15
+ print(f" Pieces: {pieces}")
16
+ print(f" Decode: {sp.decode(ids)}")
17
+
18
+
19
+
20
+ # check an entity
21
+ entity = "Winterfell"
22
+ e_pieces = sp.encode(entity, out_type=str)
23
+ print(f"\nEntity '{entity}' → {len(e_pieces)} piece(s): {e_pieces}")
24
+
25
+ entity = "Long Claw"
26
+ e_pieces = sp.encode(entity, out_type=str)
27
+ print(f"\nEntity '{entity}' → {len(e_pieces)} piece(s): {e_pieces}")