Delete deepseek-r1_tokenizer.py
Browse files- deepseek-r1_tokenizer.py +0 -133
deepseek-r1_tokenizer.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
from transformers import AutoTokenizer, PreTrainedTokenizerFast
|
| 2 |
-
from http.server import HTTPServer, BaseHTTPRequestHandler
|
| 3 |
-
import json
|
| 4 |
-
import argparse
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
class Tokenizer_Http():
|
| 8 |
-
|
| 9 |
-
def __init__(self):
|
| 10 |
-
model_id = "deepseek-r1_tokenizer"
|
| 11 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 12 |
-
|
| 13 |
-
def encode(self, prompt):
|
| 14 |
-
messages = [
|
| 15 |
-
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
|
| 16 |
-
{"role": "user", "content": prompt}
|
| 17 |
-
]
|
| 18 |
-
text = self.tokenizer.apply_chat_template(
|
| 19 |
-
messages,
|
| 20 |
-
tokenize=False,
|
| 21 |
-
add_generation_prompt=True
|
| 22 |
-
)
|
| 23 |
-
print(text)
|
| 24 |
-
token_ids = self.tokenizer.encode(text)
|
| 25 |
-
return token_ids
|
| 26 |
-
|
| 27 |
-
def decode(self, token_ids):
|
| 28 |
-
return self.tokenizer.decode(token_ids)
|
| 29 |
-
|
| 30 |
-
@property
|
| 31 |
-
def bos_id(self):
|
| 32 |
-
return self.tokenizer.bos_token_id
|
| 33 |
-
|
| 34 |
-
@property
|
| 35 |
-
def eos_id(self):
|
| 36 |
-
return self.tokenizer.eos_token_id
|
| 37 |
-
|
| 38 |
-
@property
|
| 39 |
-
def bos_token(self):
|
| 40 |
-
return self.tokenizer.bos_token
|
| 41 |
-
|
| 42 |
-
@property
|
| 43 |
-
def eos_token(self):
|
| 44 |
-
return self.tokenizer.eos_token
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
tokenizer = Tokenizer_Http()
|
| 48 |
-
|
| 49 |
-
print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
|
| 50 |
-
print(tokenizer.encode("hello world"))
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
class Request(BaseHTTPRequestHandler):
|
| 54 |
-
#通过类继承,新定义类
|
| 55 |
-
timeout = 5
|
| 56 |
-
server_version = 'Apache'
|
| 57 |
-
|
| 58 |
-
def do_GET(self):
|
| 59 |
-
print(self.path)
|
| 60 |
-
#在新类中定义get的内容(当客户端向该服务端使用get请求时,本服务端将如下运行)
|
| 61 |
-
self.send_response(200)
|
| 62 |
-
self.send_header("type", "get") #设置响应头,可省略或设置多个
|
| 63 |
-
self.end_headers()
|
| 64 |
-
|
| 65 |
-
if self.path == '/bos_id':
|
| 66 |
-
bos_id = tokenizer.bos_id
|
| 67 |
-
# print(bos_id)
|
| 68 |
-
# to json
|
| 69 |
-
if bos_id is None:
|
| 70 |
-
msg = json.dumps({'bos_id': -1})
|
| 71 |
-
else:
|
| 72 |
-
msg = json.dumps({'bos_id': bos_id})
|
| 73 |
-
elif self.path == '/eos_id':
|
| 74 |
-
eos_id = tokenizer.eos_id
|
| 75 |
-
if eos_id is None:
|
| 76 |
-
msg = json.dumps({'eos_id': -1})
|
| 77 |
-
else:
|
| 78 |
-
msg = json.dumps({'eos_id': eos_id})
|
| 79 |
-
else:
|
| 80 |
-
msg = 'error'
|
| 81 |
-
|
| 82 |
-
print(msg)
|
| 83 |
-
msg = str(msg).encode() #转为str再转为byte格式
|
| 84 |
-
|
| 85 |
-
self.wfile.write(msg) #将byte格式的信息返回给客户端
|
| 86 |
-
|
| 87 |
-
def do_POST(self):
|
| 88 |
-
#在新类中定义post的内容(当客户端向该服务端使用post请求时,本服务端将如下运行)
|
| 89 |
-
data = self.rfile.read(int(
|
| 90 |
-
self.headers['content-length'])) #获取从客户端传入的参数(byte格式)
|
| 91 |
-
data = data.decode() #将byte格式转为str格式
|
| 92 |
-
|
| 93 |
-
self.send_response(200)
|
| 94 |
-
self.send_header("type", "post") #设置响应头,可省略或设置多个
|
| 95 |
-
self.end_headers()
|
| 96 |
-
|
| 97 |
-
if self.path == '/encode':
|
| 98 |
-
req = json.loads(data)
|
| 99 |
-
prompt = req['text']
|
| 100 |
-
|
| 101 |
-
token_ids = tokenizer.encode(prompt)
|
| 102 |
-
if token_ids is None:
|
| 103 |
-
msg = json.dumps({'token_ids': -1})
|
| 104 |
-
else:
|
| 105 |
-
msg = json.dumps({'token_ids': token_ids})
|
| 106 |
-
|
| 107 |
-
elif self.path == '/decode':
|
| 108 |
-
req = json.loads(data)
|
| 109 |
-
token_ids = req['token_ids']
|
| 110 |
-
text = tokenizer.decode(token_ids)
|
| 111 |
-
if text is None:
|
| 112 |
-
msg = json.dumps({'text': ""})
|
| 113 |
-
else:
|
| 114 |
-
msg = json.dumps({'text': text})
|
| 115 |
-
else:
|
| 116 |
-
msg = 'error'
|
| 117 |
-
print(msg)
|
| 118 |
-
msg = str(msg).encode() #转为str再转为byte格式
|
| 119 |
-
|
| 120 |
-
self.wfile.write(msg) #将byte格式的信息返回给客户端
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
if __name__ == "__main__":
|
| 124 |
-
|
| 125 |
-
args = argparse.ArgumentParser()
|
| 126 |
-
args.add_argument('--host', type=str, default='localhost')
|
| 127 |
-
args.add_argument('--port', type=int, default=8080)
|
| 128 |
-
args = args.parse_args()
|
| 129 |
-
|
| 130 |
-
host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
|
| 131 |
-
print('http://%s:%s' % host)
|
| 132 |
-
server = HTTPServer(host, Request) #根据地址端口号和新定义的类,创建服务器实例
|
| 133 |
-
server.serve_forever() #开启服务
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|