| license: apache-2.0 | |
| language: | |
| - zh | |
| - en | |
| pipeline_tag: text-generation | |
| inference: false | |
| tags: | |
| - chinese | |
| # explain | |
| - base tokenizer from [baichuan-7B](https://github.com/baichuan-inc/baichuan-7B), this model add some maths symbol | |
| ``` | |
| "approx": 64000, | |
| "arccos": 64001, | |
| "arcsin": 64002, | |
| "arctan": 64003, | |
| "backsim": 64004, | |
| "begin{matrix}": 64005, | |
| "begin{vmatrix}": 64006, | |
| "beta": 64007, | |
| "cdot": 64008, | |
| "cdots": 64009, | |
| "cong": 64010, | |
| "delta": 64011, | |
| "dot": 64012, | |
| "downarrow": 64013, | |
| "end{matrix}": 64014, | |
| "end{vmatrix}": 64015, | |
| "exists": 64016, | |
| "forall": 64017, | |
| "gamma": 64018, | |
| "geq": 64019, | |
| "infty": 64020, | |
| "lambda": 64021, | |
| "left.": 64022, | |
| "left[": 64023, | |
| "left{": 64024, | |
| "leftrightarrow": 64025, | |
| "leq": 64026, | |
| "lg": 64027, | |
| "neq": 64028, | |
| "notin": 64029, | |
| "omega": 64030, | |
| "overline": 64031, | |
| "overrightarrow": 64032, | |
| "prime": 64033, | |
| "psi": 64034, | |
| "rho": 64035, | |
| "right.": 64036, | |
| "right}": 64037, | |
| "right]": 64038, | |
| "rightarrow": 64039, | |
| "sigma": 64040, | |
| "subset": 64041, | |
| "subseteq": 64042, | |
| "supset": 64043, | |
| "supseteq": 64044, | |
| "tan": 64045, | |
| "textcircled": 64046, | |
| "text{": 64047, | |
| "therefore": 64048, | |
| "theta": 64049, | |
| "varepsilon": 64050, | |
| "varphi": 64051, | |
| "widehat": 64052, | |
| "xrightarrow": 64053, | |
| "…": 64054, | |
| "℃": 64055, | |
| "①": 64056, | |
| "②": 64057, | |
| "③": 64058, | |
| "④": 64059, | |
| "⑤": 64060, | |
| "⑥": 64061, | |
| "⑦": 64062, | |
| "⑧": 64063, | |
| "⑨": 64064, | |
| "⑩": 64065, | |
| "%": 64066, | |
| "(": 64067, | |
| ")": 64068, | |
| "+": 64069, | |
| "-": 64070, | |
| ".": 64071, | |
| ";": 64072, | |
| "<": 64073, | |
| "=": 64074, | |
| ">": 64075 | |
| ``` |