Spaces:

Synthia
/

ChatGal

Runtime error

App Files Files Community

wanicca commited on Apr 17, 2023

Commit

8dad166

1 Parent(s): 96ec97c

add topk and typical

Browse files

Files changed (2) hide show

app.py +11 -7
utils.py +125 -0

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ if 'ON_COLAB' in os.environ and os.environ['ON_COLAB'] == '1':
     model = RWKV(model=model_path, strategy='cuda bf16')
 else:
     model = RWKV(model=model_path, strategy='cpu bf16')
-from rwkv.utils import PIPELINE, PIPELINE_ARGS
 pipeline = PIPELINE(model, "20B_tokenizer.json")
 def infer(
@@ -31,10 +31,12 @@ def infer(
         token_count=10,
         temperature=0.7,
         top_p=1.0,
         presencePenalty = 0.05,
         countPenalty = 0.05,
 ):
-    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
                      alpha_frequency = countPenalty,
                      alpha_presence = presencePenalty,
                      token_ban = [0], # ban the generation of some tokens
@@ -63,7 +65,7 @@ def infer(
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
-        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
@@ -88,8 +90,8 @@ examples = [
 女招待：是吗。那真是太好了
-我因为撰稿的需要，而造访了这间位于信州山间的温泉宿驿。""", 200, 2.0, 0.4, 0.1, 0.1],
-    ["翡翠：欢迎回来，志贵少爷。", 200, 2.0, 0.4, 0.1, 0.1],
     ["""莲华：你的目的，就是这个万华镜吧？
 莲华拿出了万华镜。
@@ -105,7 +107,7 @@ examples = [
 深见：请让我好好看看……
-我刚想把手伸过去，莲华就一下子把它收了回去。""", 200, 2.0, 0.4, 0.1, 0.1],
     ["""嘉祥：偶尔来一次也不错。
 我坐到客厅的沙发上，拍了拍自己的大腿。
@@ -122,7 +124,7 @@ examples = [
 我摸摸各自占据住我左右两腿的两颗猫头。
-嘉祥：开心归开心，拜托你们俩别一直乱动啊，很危险的。""", 200, 2.0, 0.4, 0.1, 0.1],
 ]
 iface = gr.Interface(
@@ -150,6 +152,8 @@ iface = gr.Interface(
         gr.Slider(10, 200, step=10, value=200, label="token_count 每次生成的长度"),  # token_count
         gr.Slider(0.2, 2.0, step=0.1, value=2, label="temperature 默认0.7，高则变化丰富，低则保守求稳"),  # temperature
         gr.Slider(0.0, 1.0, step=0.05, value=0.4, label="top_p 默认1.0，高则标新立异，低则循规蹈矩"),  # top_p
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="presencePenalty 默认0.0，避免写过的类似字"),  # presencePenalty
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="countPenalty 默认0.0，额外避免写过多次的类似字"),  # countPenalty
     ],

     model = RWKV(model=model_path, strategy='cuda bf16')
 else:
     model = RWKV(model=model_path, strategy='cpu bf16')
+from utils import PIPELINE, PIPELINE_ARGS
 pipeline = PIPELINE(model, "20B_tokenizer.json")
 def infer(
         token_count=10,
         temperature=0.7,
         top_p=1.0,
+        top_k=50,
+        typical_p=1.0,
         presencePenalty = 0.05,
         countPenalty = 0.05,
 ):
+    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p), top_k=int(top_k),typical_p=float(typical_p),
                      alpha_frequency = countPenalty,
                      alpha_presence = presencePenalty,
                      token_ban = [0], # ban the generation of some tokens
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k, typical_p=args.typical_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
 女招待：是吗。那真是太好了
+我因为撰稿的需要，而造访了这间位于信州山间的温泉宿驿。""", 200, 2.0, 0.4, 0, 1.0, 0.1, 0.1],
+    ["翡翠：欢迎回来，志贵少爷。", 200, 2.0, 0.4, 0, 1.0, 0.1, 0.1],
     ["""莲华：你的目的，就是这个万华镜吧？
 莲华拿出了万华镜。
 深见：请让我好好看看……
+我刚想把手伸过去，莲华就一下子把它收了回去。""", 200, 2.0, 0.4, 0, 1.0, 0.1, 0.1],
     ["""嘉祥：偶尔来一次也不错。
 我坐到客厅的沙发上，拍了拍自己的大腿。
 我摸摸各自占据住我左右两腿的两颗猫头。
+嘉祥：开心归开心，拜托你们俩别一直乱动啊，很危险的。""", 200, 2.0, 0.4, 0, 1.0, 0.1, 0.1],
 ]
 iface = gr.Interface(
         gr.Slider(10, 200, step=10, value=200, label="token_count 每次生成的长度"),  # token_count
         gr.Slider(0.2, 2.0, step=0.1, value=2, label="temperature 默认0.7，高则变化丰富，低则保守求稳"),  # temperature
         gr.Slider(0.0, 1.0, step=0.05, value=0.4, label="top_p 默认1.0，高则标新立异，低则循规蹈矩"),  # top_p
+        gr.Slider(0, 500, step=1, value=0, label="top_k 默认0（不过滤），0以上时高则标新立异，低则循规蹈矩"),  # top_p
+        gr.Slider(0.05, 1.0, step=0.05, value=1.0, label="typical_p 默认1.0，高则保留模型天性，低则试图贴近人类典型习惯"),  # top_p
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="presencePenalty 默认0.0，避免写过的类似字"),  # presencePenalty
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="countPenalty 默认0.0，额外避免写过多次的类似字"),  # countPenalty
     ],

utils.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import json, time, random, os
+import numpy as np
+import torch
+from torch.nn import functional as F
+class PIPELINE_ARGS():
+    def __init__(self, temperature=1.0, top_p=0.85, top_k=0, typical_p=1, alpha_frequency=0.2, alpha_presence=0.2, token_ban=[], token_stop=[], chunk_len=256):
+        self.temperature = temperature
+        self.top_p = top_p
+        self.top_k = top_k
+        self.typical_p = typical_p
+        self.alpha_frequency = alpha_frequency # Frequency Penalty (as in GPT-3)
+        self.alpha_presence = alpha_presence # Presence Penalty (as in GPT-3)
+        self.token_ban = token_ban # ban the generation of some tokens
+        self.token_stop = token_stop # stop generation whenever you see any token here
+        self.chunk_len = chunk_len # split input into chunks to save VRAM (shorter -> slower)
+class PIPELINE():
+    def __init__(self, model, WORD_NAME):
+        self.model = model
+        if WORD_NAME == 'cl100k_base':
+            import tiktoken
+            self.tokenizer = tiktoken.get_encoding(WORD_NAME)
+        else:
+            from tokenizers import Tokenizer
+            self.tokenizer = Tokenizer.from_file(WORD_NAME)
+    def refine_context(self, context):
+        context = context.strip().split('\n')
+        for c in range(len(context)):
+            context[c] = context[c].strip().strip('\u3000').strip('\r')
+        context = list(filter(lambda c: c != '', context))
+        context = '\n' + ('\n'.join(context)).strip()
+        if context == '':
+            context = '\n'
+        return context
+    def encode(self, x):
+        if 'tiktoken' in str(type(self.tokenizer)):
+            return self.tokenizer.encode(x)
+        else:
+            return self.tokenizer.encode(x).ids
+    def decode(self, x):
+        return self.tokenizer.decode(x)
+    def sample_logits(self, logits, temperature=1.0, top_p=0.85, top_k=0,typical_p=1):
+        probs = F.softmax(logits.float(), dim=-1)
+        top_k = int(top_k)
+        if typical_p<1:
+            entropy = torch.nansum(-torch.log(probs) * probs, dim=-1, keepdim=True)
+            typical_scores = torch.abs(logits - entropy)
+            typical_sorted_ids = torch.argsort(typical_scores)
+            sorted_typical_scores = typical_scores[typical_sorted_ids]
+            typical_sorted_probs = probs[typical_sorted_ids]
+            cum_typical_sorted_probs = torch.cumsum(typical_sorted_probs, dim=-1).cpu().numpy()
+            typical_cutoff = float(sorted_typical_scores[np.argmax(cum_typical_sorted_probs > typical_p)])
+        if probs.device == torch.device('cpu'):
+            probs = probs.numpy()
+            sorted_ids = np.argsort(probs)
+            sorted_probs = probs[sorted_ids][::-1]
+            cumulative_probs = np.cumsum(sorted_probs)
+            cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
+            probs[probs < cutoff] = 0
+            if top_k < len(probs) and top_k > 0:
+                probs[sorted_ids[:-top_k]] = 0
+            if typical_p<1:
+                probs[typical_scores > typical_cutoff] = 0
+            if temperature != 1.0:
+                probs = probs ** (1.0 / temperature)
+            probs = probs / np.sum(probs)
+            out = np.random.choice(a=len(probs), p=probs)
+            return int(out)
+        else:
+            sorted_ids = torch.argsort(probs)
+            sorted_probs = probs[sorted_ids]
+            sorted_probs = torch.flip(sorted_probs, dims=(0,))
+            cumulative_probs = torch.cumsum(sorted_probs, dim=-1).cpu().numpy()
+            cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
+            probs[probs < cutoff] = 0
+            if top_k < len(probs) and top_k > 0:
+                probs[sorted_ids[:-top_k]] = 0
+            if typical_p<1:
+                probs[typical_scores > typical_cutoff] = 0
+            if temperature != 1.0:
+                probs = probs ** (1.0 / temperature)
+            out = torch.multinomial(probs, num_samples=1)[0]
+            return int(out)
+    def generate(self, ctx, token_count=100, args=PIPELINE_ARGS(), callback=None, state=None):
+        all_tokens = []
+        out_last = 0
+        out_str = ''
+        occurrence = {}
+        for i in range(token_count):
+            # forward & adjust prob.
+            tokens = self.encode(ctx) if i == 0 else [token]
+            while len(tokens) > 0:
+                out, state = self.model.forward(tokens[:args.chunk_len], state)
+                tokens = tokens[args.chunk_len:]
+            for n in args.token_ban:
+                out[n] = -float('inf')
+            for n in occurrence:
+                out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+            # sampler
+            token = self.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k, typical_p=args.typical_p)
+            if token in args.token_stop:
+                break
+            all_tokens += [token]
+            if token not in occurrence:
+                occurrence[token] = 1
+            else:
+                occurrence[token] += 1
+            # output
+            tmp = self.decode(all_tokens[out_last:])
+            if '\ufffd' not in tmp: # is valid utf-8 string?
+                if callback:
+                    callback(tmp)
+                out_str += tmp
+                out_last = i + 1
+        return out_str