Spaces:

DaniilAlpha
/

answerer-api

Paused

App Files Files Community

DaniilAlpha commited on Nov 23, 2023

Commit

91c3b11

1 Parent(s): 05dc8f9

Update answerer.py

Browse files

Files changed (1) hide show

answerer.py +27 -32

answerer.py CHANGED Viewed

@@ -5,19 +5,14 @@ from rwkv.model import RWKV
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
 class Answerer:
-  def __init__(self, repo: str, filename: str, vocab: str, strategy: str, ctx_limit: int):
     os.environ["RWKV_JIT_ON"] = "1"
     # os.environ["RWKV_CUDA_ON"] = "1"
-    self.__model = RWKV(hf_hub_download(repo, filename), strategy=strategy)
     self.__pipeline = PIPELINE(self.__model, vocab)
     self.ctx_limit = ctx_limit
-  __model: RWKV
-  __pipeline: PIPELINE
-  ctx_limit: int
   def __call__(
     self,
     input: str,
@@ -45,35 +40,35 @@ class Answerer:
     current_token = None
     state = None
     for _ in range(max_output_length_tk):
-        out, state = self.__model.forward(
-          [current_token] if current_token else self.__pipeline.encode(input)[-self.ctx_limit:],
-          state,
-        )
-        for token in occurrences:
-            out[token] -= args.alpha_presence + occurrences[token] * args.alpha_frequency
-        current_token = self.__pipeline.sample_logits(
-          out,
-          temperature=args.temperature,
-          top_p=args.top_p,
-        )
-        if current_token in args.token_stop: break
-        tokens.append(current_token)
-        for token in occurrences:
-            occurrences[token] *= 0.996
-        if current_token in occurrences:
-          occurrences[current_token] += 1
-        else:
-          occurrences[current_token] = 1
-        tmp = self.__pipeline.decode(tokens)
-        if "\ufffd" not in tmp:
-            tokens.clear()
-            result += tmp
-            yield result.strip()
     tokens.clear()
     occurrences.clear()

 from rwkv.utils import PIPELINE, PIPELINE_ARGS
 class Answerer:
+  def __init__(self, repo: str, model: str, vocab: str, strategy: str, ctx_limit: int):
     os.environ["RWKV_JIT_ON"] = "1"
     # os.environ["RWKV_CUDA_ON"] = "1"
+    self.__model = RWKV(hf_hub_download(repo, f"{model}.pth"), strategy=strategy)
     self.__pipeline = PIPELINE(self.__model, vocab)
     self.ctx_limit = ctx_limit
   def __call__(
     self,
     input: str,
     current_token = None
     state = None
     for _ in range(max_output_length_tk):
+      out, state = self.__model.forward(
+        [current_token] if current_token else self.__pipeline.encode(input)[-self.ctx_limit:],
+        state,
+      )
+      for token in occurrences:
+        out[token] -= args.alpha_presence + occurrences[token] * args.alpha_frequency
+      current_token = self.__pipeline.sample_logits(
+        out,
+        temperature=args.temperature,
+        top_p=args.top_p,
+      )
+      if current_token in args.token_stop: break
+      tokens.append(current_token)
+      for token in occurrences:
+        occurrences[token] *= 0.996
+      if current_token in occurrences:
+        occurrences[current_token] += 1
+      else:
+        occurrences[current_token] = 1
+      tmp = self.__pipeline.decode(tokens)
+      if "\ufffd" not in tmp:
+        tokens.clear()
+        result += tmp
+        yield result.strip()
     tokens.clear()
     occurrences.clear()