Spaces:

BlinkDL
/

RWKV-Gradio-1

Running on T4

App Files Files Community

BlinkDL commited on 18 days ago

Commit

e692a01

verified ·

1 Parent(s): 848abf2

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -7

app.py CHANGED Viewed

@@ -77,12 +77,22 @@ def evaluate(
     state = None
     for i in range(int(token_count)):
-        input_ids = pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token]
-        out, state = model.forward(input_ids, state)
         for n in occurrence:
-            out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
-        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
@@ -91,8 +101,8 @@ def evaluate(
         ttt = pipeline.decode([token])
         www = 1
-        if ttt in ' \t0123456789':
-            www = 0
         #elif ttt in '\r\n,.;?!"\':+-*/=#@$%^&_`~|<>\\()[]{}，。；“”：？！（）【】':
         #    www = 0.5
         if token not in occurrence:
@@ -138,7 +148,7 @@ with gr.Blocks(title=title, theme=gr.themes.Base()) as demo:
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title}</h1>\n</div>")
     with gr.Tab("=== Base Model (Raw Generation) ==="):
-        gr.Markdown(f'This is [RWKV7 G-series](https://huggingface.co/BlinkDL/rwkv7-g1) 2.9B reasoning base LM - an attention-free pure RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Try topp0 penalty0 for math/code/translation. Supports 100+ world languages and code. Check [400+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Can try examples (bottom of page) *** (can edit them). Demo limited to ctxlen {ctx_limit}.')
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(lines=6, label="Prompt", value="User: simulate SpaceX mars landing using python\n\nAssistant: <think")

     state = None
     for i in range(int(token_count)):
+        if i == 0:
+            input_ids = pipeline.encode(ctx)[-ctx_limit:]
+            out, state = model.forward(input_ids, state)
+            for j in range(len(state)):
+                static_state_in[j].copy_(state[j])
+            static_output.copy_(out)
+        else:
+            static_input.copy_(model.z['emb.weight'][token])
+            graph.replay()
+            for j in range(len(state)):
+                static_state_in[j].copy_(static_state_out[j])
         for n in occurrence:
+            static_output[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(static_output, temperature=args.temperature, top_p=args.top_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
         ttt = pipeline.decode([token])
         www = 1
+        #if ttt in ' \t0123456789':
+        #    www = 0
         #elif ttt in '\r\n,.;?!"\':+-*/=#@$%^&_`~|<>\\()[]{}，。；“”：？！（）【】':
         #    www = 0.5
         if token not in occurrence:
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title}</h1>\n</div>")
     with gr.Tab("=== Base Model (Raw Generation) ==="):
+        gr.Markdown(f'This is [RWKV7 G-series](https://huggingface.co/BlinkDL/rwkv7-g1) 2.9B reasoning base LM - an attention-free pure RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Try topp0 penalty0 for math/code/translation. Supports 100+ world languages and code. Check [600+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Can try examples (bottom of page) *** (can edit them). Demo limited to ctxlen {ctx_limit}.')
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(lines=6, label="Prompt", value="User: simulate SpaceX mars landing using python\n\nAssistant: <think")