Spaces:

BlinkDL
/

RWKV-Gradio-2

Running on T4

BlinkDL commited on Jul 30

Commit

cb2163d

verified ·

1 Parent(s): 34ca90e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-ctx_limit = 1000
 gen_limit = 1000
 ########################## text rwkv ################################################################
@@ -68,7 +68,11 @@ def evaluate(
     for i in range(int(token_count)):
         input_ids = pipeline_v6.encode(ctx)[-ctx_limit:] if i == 0 else [token]
-        out, state = model_v6.forward(input_ids, state)
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)

 gpu_h = nvmlDeviceGetHandleByIndex(0)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ctx_limit = 3000
 gen_limit = 1000
 ########################## text rwkv ################################################################
     for i in range(int(token_count)):
         input_ids = pipeline_v6.encode(ctx)[-ctx_limit:] if i == 0 else [token]
+        CHUNK_LEN = 512
+        # out, state = model_v6.forward(input_ids, state)
+        while len(input_ids) > 0:
+            out, state = model_v6.forward(input_ids[:CHUNK_LEN], state)
+            input_ids = input_ids[CHUNK_LEN:]
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)