Spaces:

BlinkDL
/

RWKV-Gradio-2

Running on T4

App Files Files Community

BlinkDL commited on Jun 5, 2024

Commit

b761794

verified ·

1 Parent(s): 8919796

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -5

app.py CHANGED Viewed

@@ -25,24 +25,36 @@ pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
 args = model.args
 eng_name = 'rwkv-x060-eng_single_round_qa-7B-20240516-ctx2048'
-chn_name = 'rwkv-x060-chn_single_round_qa-7B-20240516-ctx2048'
 eng_file = hf_hub_download(repo_id="BlinkDL/temp-latest-training-models", filename=f"{eng_name}.pth")
-chn_file = hf_hub_download(repo_id="BlinkDL/temp-latest-training-models", filename=f"{chn_name}.pth")
 state_eng_raw = torch.load(eng_file)
-state_chn_raw = torch.load(chn_file)
 state_eng = [None] * args.n_layer * 3
 state_chn = [None] * args.n_layer * 3
 for i in range(args.n_layer):
     dd = model.strategy[i]
     dev = dd.device
     atype = dd.atype
     state_eng[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
-    state_chn[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
     state_eng[i*3+1] = state_eng_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
-    state_chn[i*3+1] = state_chn_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
     state_eng[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
     state_chn[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
 def generate_prompt(instruction, input=""):
     instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
     input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
@@ -208,6 +220,56 @@ def evaluate_chn(
     torch.cuda.empty_cache()
     yield out_str.strip()
 examples = [
     ["Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response.", gen_limit, 1, 0.3, 0.5, 0.5],
     ["Assistant: How can we persuade Elon Musk to follow you on Twitter? Let's think step by step and provide an expert response.", gen_limit, 1, 0.3, 0.5, 0.5],
@@ -242,6 +304,14 @@ examples_chn = [
     ["用HTML编写一个简单的网站。当用户点击按钮时，从4个笑话的列表中随机显示一个笑话。", gen_limit_long, 1, 0.2, 0.3, 0.3],
 ]
 ##########################################################################
 with gr.Blocks(title=title) as demo:
@@ -307,6 +377,26 @@ with gr.Blocks(title=title) as demo:
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
 demo.queue(concurrency_count=1, max_size=10)
 demo.launch(share=False)

 args = model.args
 eng_name = 'rwkv-x060-eng_single_round_qa-7B-20240516-ctx2048'
 eng_file = hf_hub_download(repo_id="BlinkDL/temp-latest-training-models", filename=f"{eng_name}.pth")
 state_eng_raw = torch.load(eng_file)
 state_eng = [None] * args.n_layer * 3
+chn_name = 'rwkv-x060-chn_single_round_qa-7B-20240516-ctx2048'
+chn_file = hf_hub_download(repo_id="BlinkDL/temp-latest-training-models", filename=f"{chn_name}.pth")
+state_chn_raw = torch.load(chn_file)
 state_chn = [None] * args.n_layer * 3
+wyw_name = 'rwkv-x060-chn_文言文和古典名著_single_round_qa-7B-20240601-ctx2048'
+wyw_file = hf_hub_download(repo_id="BlinkDL/temp-latest-training-models", filename=f"{wyw_name}.pth")
+state_wyw_raw = torch.load(wyw_file)
+state_wyw = [None] * args.n_layer * 3
 for i in range(args.n_layer):
     dd = model.strategy[i]
     dev = dd.device
     atype = dd.atype
     state_eng[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
     state_eng[i*3+1] = state_eng_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
     state_eng[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
+    state_chn[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
+    state_chn[i*3+1] = state_chn_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
     state_chn[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
+    state_wyw[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
+    state_wyw[i*3+1] = state_chn_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
+    state_wyw[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
 def generate_prompt(instruction, input=""):
     instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
     input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
     torch.cuda.empty_cache()
     yield out_str.strip()
+def evaluate_wyw(
+    ctx,
+    token_count=gen_limit,
+    temperature=1.0,
+    top_p=0.3,
+    presencePenalty=0.3,
+    countPenalty=0.3,
+):
+    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
+                     alpha_frequency = countPenalty,
+                     alpha_presence = presencePenalty,
+                     token_ban = [], # ban the generation of some tokens
+                     token_stop = [0]) # stop generation whenever you see any token here
+    ctx = qa_prompt(ctx)
+    all_tokens = []
+    out_last = 0
+    out_str = ''
+    occurrence = {}
+    state = copy.deepcopy(state_wyw)
+    for i in range(int(token_count)):
+        out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
+        for n in occurrence:
+            out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
+        if token in args.token_stop:
+            break
+        all_tokens += [token]
+        for xxx in occurrence:
+            occurrence[xxx] *= penalty_decay
+        if token not in occurrence:
+            occurrence[token] = 1
+        else:
+            occurrence[token] += 1
+        tmp = pipeline.decode(all_tokens[out_last:])
+        if '\ufffd' not in tmp:
+            out_str += tmp
+            yield out_str.strip()
+            out_last = i + 1
+    gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    print(f'{timestamp} - vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
+    del out
+    del state
+    gc.collect()
+    torch.cuda.empty_cache()
+    yield out_str.strip()
 examples = [
     ["Assistant: How can we craft an engaging story featuring vampires on Mars? Let's think step by step and provide an expert response.", gen_limit, 1, 0.3, 0.5, 0.5],
     ["Assistant: How can we persuade Elon Musk to follow you on Twitter? Let's think step by step and provide an expert response.", gen_limit, 1, 0.3, 0.5, 0.5],
     ["用HTML编写一个简单的网站。当用户点击按钮时，从4个笑话的列表中随机显示一个笑话。", gen_limit_long, 1, 0.2, 0.3, 0.3],
 ]
+examples_wyw = [
+    ["我和前男友分手了", gen_limit_long, 1, 0.2, 0.3, 0.3],
+    ["量子计算机的原理", gen_limit_long, 1, 0.2, 0.3, 0.3],
+    ["李白和杜甫的结拜故事", gen_limit_long, 1, 0.2, 0.3, 0.3],
+    ["林黛玉和伏地魔的关系是什么？", gen_limit_long, 1, 0.2, 0.3, 0.3],
+    ["我被同事陷害了，帮我写一篇文言文骂他", gen_limit_long, 1, 0.2, 0.3, 0.3],
+]
 ##########################################################################
 with gr.Blocks(title=title) as demo:
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
+    with gr.Tab("=== WenYanWen Q/A ==="):
+        gr.Markdown(f"This is [RWKV-6](https://huggingface.co/BlinkDL/rwkv-6-world) state-tuned to [WenYanWen 文言文 Q/A](https://huggingface.co/BlinkDL/temp-latest-training-models/blob/main/{wyw_name}.pth). RWKV is a 100% attention-free RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM), and we have [300+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). Demo limited to ctxlen {ctx_limit}.")
+        with gr.Row():
+            with gr.Column():
+                prompt = gr.Textbox(lines=2, label="Prompt", value="我和前男友分手了")
+                token_count = gr.Slider(10, gen_limit_long, label="Max Tokens", step=10, value=gen_limit_long)
+                temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
+                top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.2)
+                presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0.3)
+                count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.3)
+            with gr.Column():
+                with gr.Row():
+                    submit = gr.Button("Submit", variant="primary")
+                    clear = gr.Button("Clear", variant="secondary")
+                output = gr.Textbox(label="Output", lines=30)
+        data = gr.Dataset(components=[prompt, token_count, temperature, top_p, presence_penalty, count_penalty], samples=examples_chn, samples_per_page=50, label="Examples", headers=["Prompt", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
+        submit.click(evaluate_wyw, [prompt, token_count, temperature, top_p, presence_penalty, count_penalty], [output])
+        clear.click(lambda: None, [], [output])
+        data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
 demo.queue(concurrency_count=1, max_size=10)
 demo.launch(share=False)