Upload folder using huggingface_hub
Browse files- modeling_chatglm.py +13 -9
- tokenization_chatglm.py +1 -1
modeling_chatglm.py
CHANGED
|
@@ -87,7 +87,7 @@ def split_tensor_along_last_dim(
|
|
| 87 |
class RotaryEmbedding(nn.Module):
|
| 88 |
def __init__(self, dim, original_impl=False, device=None, dtype=None):
|
| 89 |
super().__init__()
|
| 90 |
-
inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, device=device
|
| 91 |
self.register_buffer("inv_freq", inv_freq)
|
| 92 |
self.dim = dim
|
| 93 |
self.original_impl = original_impl
|
|
@@ -702,6 +702,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
|
|
| 702 |
dtype=config.torch_dtype, **init_kwargs)
|
| 703 |
self.gradient_checkpointing = False
|
| 704 |
|
|
|
|
|
|
|
|
|
|
| 705 |
def forward(
|
| 706 |
self,
|
| 707 |
input_ids,
|
|
@@ -932,7 +935,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
| 932 |
|
| 933 |
|
| 934 |
@torch.no_grad()
|
| 935 |
-
def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, max_length: int =
|
| 936 |
do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs):
|
| 937 |
if history is None:
|
| 938 |
history = []
|
|
@@ -951,7 +954,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
| 951 |
|
| 952 |
@torch.no_grad()
|
| 953 |
def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
|
| 954 |
-
max_length: int =
|
| 955 |
return_past_key_values=False, **kwargs):
|
| 956 |
if history is None:
|
| 957 |
history = []
|
|
@@ -976,12 +979,13 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
| 976 |
outputs, past_key_values = outputs
|
| 977 |
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
|
| 978 |
response = tokenizer.decode(outputs)
|
| 979 |
-
response
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
|
|
|
| 985 |
|
| 986 |
@torch.no_grad()
|
| 987 |
def stream_generate(
|
|
|
|
| 87 |
class RotaryEmbedding(nn.Module):
|
| 88 |
def __init__(self, dim, original_impl=False, device=None, dtype=None):
|
| 89 |
super().__init__()
|
| 90 |
+
inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, device=device).to(dtype=dtype) / dim))
|
| 91 |
self.register_buffer("inv_freq", inv_freq)
|
| 92 |
self.dim = dim
|
| 93 |
self.original_impl = original_impl
|
|
|
|
| 702 |
dtype=config.torch_dtype, **init_kwargs)
|
| 703 |
self.gradient_checkpointing = False
|
| 704 |
|
| 705 |
+
def get_input_embeddings(self):
|
| 706 |
+
return self.embedding.word_embeddings
|
| 707 |
+
|
| 708 |
def forward(
|
| 709 |
self,
|
| 710 |
input_ids,
|
|
|
|
| 935 |
|
| 936 |
|
| 937 |
@torch.no_grad()
|
| 938 |
+
def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, max_length: int = 8192, num_beams=1,
|
| 939 |
do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs):
|
| 940 |
if history is None:
|
| 941 |
history = []
|
|
|
|
| 954 |
|
| 955 |
@torch.no_grad()
|
| 956 |
def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
|
| 957 |
+
max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
|
| 958 |
return_past_key_values=False, **kwargs):
|
| 959 |
if history is None:
|
| 960 |
history = []
|
|
|
|
| 979 |
outputs, past_key_values = outputs
|
| 980 |
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
|
| 981 |
response = tokenizer.decode(outputs)
|
| 982 |
+
if response and response[-1] != "�":
|
| 983 |
+
response = self.process_response(response)
|
| 984 |
+
new_history = history + [(query, response)]
|
| 985 |
+
if return_past_key_values:
|
| 986 |
+
yield response, new_history, past_key_values
|
| 987 |
+
else:
|
| 988 |
+
yield response, new_history
|
| 989 |
|
| 990 |
@torch.no_grad()
|
| 991 |
def stream_generate(
|
tokenization_chatglm.py
CHANGED
|
@@ -69,8 +69,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 69 |
super().__init__(padding_side=padding_side, **kwargs)
|
| 70 |
self.name = "GLMTokenizer"
|
| 71 |
|
| 72 |
-
self.tokenizer = SPTokenizer(vocab_file)
|
| 73 |
self.vocab_file = vocab_file
|
|
|
|
| 74 |
self.special_tokens = {
|
| 75 |
"<bos>": self.tokenizer.bos_id,
|
| 76 |
"<eos>": self.tokenizer.eos_id,
|
|
|
|
| 69 |
super().__init__(padding_side=padding_side, **kwargs)
|
| 70 |
self.name = "GLMTokenizer"
|
| 71 |
|
|
|
|
| 72 |
self.vocab_file = vocab_file
|
| 73 |
+
self.tokenizer = SPTokenizer(vocab_file)
|
| 74 |
self.special_tokens = {
|
| 75 |
"<bos>": self.tokenizer.bos_id,
|
| 76 |
"<eos>": self.tokenizer.eos_id,
|