Spaces:

Jellyfish042
/

LLM-Compressor

Sleeping

App Files Files Community

Jellyfish042 commited on Jan 19

Commit

8d6299f

1 Parent(s): 6e818da

Init RWKV compressor Space demo

Browse files

Files changed (7) hide show

.gitignore +4 -0
README.md +24 -2
app.py +318 -0
llm_compressor.py +345 -0
requirements.txt +3 -0
support/README.txt +4 -0
support/rwkv_vocab_v20230424.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+models/*.pth
+models/.cache/
+__pycache__/
+*.pyc

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: LLM Compressor
 emoji: 🐨
 colorFrom: gray
 colorTo: pink
@@ -9,4 +9,26 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: RWKV LLM Text Compressor
 emoji: 🐨
 colorFrom: gray
 colorTo: pink
 pinned: false
 ---
+# RWKV LLM Text Compressor
+This Space demonstrates LLM-based arithmetic coding using RWKV. It is a proof of
+concept and is intentionally slow. The compressed output is only valid when the
+same model, tokenizer, and context window are used for decompression.
+## Configuration
+- `RWKV_MODEL_PATH`: Path to a local RWKV `.pth` file (or name without extension).
+- `RWKV_TOKENIZER`: Path to `rwkv_vocab_v20230424.txt`. Default: `support/rwkv_vocab_v20230424.txt`.
+- `RWKV_STRATEGY`: RWKV strategy string (example: `cpu fp32`, `cuda fp16`).
+## Notes
+- CPU-only Spaces should keep `RWKV_STRATEGY=cpu fp32`. The app forces CPU when CUDA
+  is unavailable.
+- The vocab file is not bundled; place `rwkv_vocab_v20230424.txt` in `support/` or
+  set `RWKV_TOKENIZER` to its path.
+- The app auto-detects a `.pth` model under `models/` if `RWKV_MODEL_PATH` is not set.
+- If no model is found, the app downloads `rwkv7-g1a-0.1b-20250728-ctx4096.pth` into `models/`.
+- Input text is limited to 8192 characters.
+- Compression and decompression are slow and not suitable for production use.
+- Output is not portable across different models or tokenizers.

app.py ADDED Viewed

	@@ -0,0 +1,318 @@

+import base64
+import os
+import shutil
+import tempfile
+import urllib.request
+from pathlib import Path
+import gradio as gr
+import torch
+from llm_compressor import compress_tokens, decompress_bytes, load_rwkv_model, tokenize_text
+MAX_INPUT_CHARS = 8192
+SCRIPT_DIR = Path(__file__).parent.absolute()
+SUPPORT_DIR = SCRIPT_DIR / "support"
+MODELS_DIR = SCRIPT_DIR / "models"
+DEFAULT_MODEL_FILENAME = "rwkv7-g1a-0.1b-20250728-ctx4096.pth"
+DEFAULT_MODEL_PATH = str(MODELS_DIR / DEFAULT_MODEL_FILENAME)
+DEFAULT_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/" "rwkv7-g1a-0.1b-20250728-ctx4096.pth?download=true"
+DEFAULT_TOKENIZER_PATH = str(SUPPORT_DIR / "rwkv_vocab_v20230424.txt")
+def _patch_gradio_client_schema():
+    try:
+        from gradio_client import utils as gr_client_utils
+    except Exception:
+        return
+    if getattr(gr_client_utils, "_rwkv_patch", False):
+        return
+    original_get_type = gr_client_utils.get_type
+    original_json_schema = gr_client_utils._json_schema_to_python_type
+    def _patched_get_type(schema):
+        if isinstance(schema, bool):
+            return "Any"
+        return original_get_type(schema)
+    gr_client_utils.get_type = _patched_get_type
+    gr_client_utils._json_schema_to_python_type = lambda schema, defs=None: "Any" if isinstance(schema, bool) else original_json_schema(schema, defs)
+    gr_client_utils._rwkv_patch = True
+_patch_gradio_client_schema()
+def _write_temp_file(data, suffix=".llmc"):
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    tmp.write(data)
+    tmp.flush()
+    tmp.close()
+    return tmp.name
+def _resolve_default_model_path():
+    env_model = os.getenv("RWKV_MODEL_PATH")
+    if env_model:
+        return env_model
+    default_path = Path(DEFAULT_MODEL_PATH)
+    if default_path.is_file():
+        return str(default_path)
+    if DEFAULT_MODEL_URL:
+        downloaded = _download_default_model()
+        if downloaded:
+            return downloaded
+    if MODELS_DIR.is_dir():
+        candidates = sorted(MODELS_DIR.glob("*.pth"))
+        if candidates:
+            return str(candidates[0])
+    return ""
+def _resolve_default_tokenizer_path():
+    env_tokenizer = os.getenv("RWKV_TOKENIZER")
+    if env_tokenizer:
+        return env_tokenizer
+    default_path = Path(DEFAULT_TOKENIZER_PATH)
+    if default_path.is_file():
+        return str(default_path)
+    if SUPPORT_DIR.is_dir():
+        candidates = sorted(SUPPORT_DIR.glob("rwkv_vocab_v*.txt"))
+        if candidates:
+            return str(candidates[0])
+    return str(default_path)
+def _download_default_model():
+    if not DEFAULT_MODEL_URL:
+        return ""
+    dest_path = Path(DEFAULT_MODEL_PATH)
+    if dest_path.is_file():
+        return str(dest_path)
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp_path = dest_path.with_suffix(dest_path.suffix + ".tmp")
+    try:
+        print(f"Downloading RWKV model to {dest_path}...")
+        with urllib.request.urlopen(DEFAULT_MODEL_URL) as response, open(tmp_path, "wb") as f:
+            shutil.copyfileobj(response, f)
+        tmp_path.replace(dest_path)
+        return str(dest_path)
+    except Exception as exc:
+        if tmp_path.exists():
+            tmp_path.unlink()
+        print(f"Failed to download RWKV model: {exc}")
+        return ""
+def _resolve_model_path(value):
+    if not value:
+        return None
+    path = Path(value).expanduser()
+    candidates = [path]
+    if path.suffix != ".pth":
+        candidates.append(path.with_suffix(".pth"))
+    if not path.is_absolute():
+        candidates.append(MODELS_DIR / path)
+        if path.suffix != ".pth":
+            candidates.append((MODELS_DIR / path).with_suffix(".pth"))
+    for candidate in candidates:
+        if candidate.is_file():
+            return candidate
+    return None
+def _resolve_tokenizer_path(value):
+    if not value:
+        return None
+    path = Path(value).expanduser()
+    candidates = [path]
+    if not path.is_absolute():
+        candidates.append(SUPPORT_DIR / path)
+    for candidate in candidates:
+        if candidate.is_file():
+            return candidate
+    return None
+def _resolve_strategy():
+    return _normalize_strategy(os.getenv("RWKV_STRATEGY", "cpu fp32"))
+def _extract_file_bytes(file_data):
+    if file_data is None:
+        return None
+    if isinstance(file_data, (bytes, bytearray)):
+        return bytes(file_data)
+    if isinstance(file_data, dict) and "data" in file_data:
+        return file_data["data"]
+    if isinstance(file_data, str):
+        with open(file_data, "rb") as f:
+            return f.read()
+    if hasattr(file_data, "read"):
+        return file_data.read()
+    raise gr.Error("Unsupported uploaded file format.")
+def _get_compressed_bytes(b64_data, file_data):
+    file_bytes = _extract_file_bytes(file_data)
+    if file_bytes:
+        return file_bytes
+    if not b64_data or not b64_data.strip():
+        raise gr.Error("Compressed base64 data is empty.")
+    try:
+        return base64.b64decode(b64_data.encode("ascii"), validate=True)
+    except Exception as exc:
+        raise gr.Error(f"Invalid base64 data: {exc}") from exc
+def _load_model_and_tokenizer(model_path, tokenizer_name, strategy):
+    resolved_model = _resolve_model_path(model_path)
+    if not resolved_model:
+        raise gr.Error(f"RWKV model file not found: {model_path}. Put a .pth in {MODELS_DIR} or set RWKV_MODEL_PATH.")
+    resolved_tokenizer = _resolve_tokenizer_path(tokenizer_name)
+    if not resolved_tokenizer:
+        raise gr.Error(f"Tokenizer vocab file not found: {tokenizer_name}. Put rwkv_vocab_v20230424.txt in {SUPPORT_DIR} " "or set RWKV_TOKENIZER.")
+    try:
+        return load_rwkv_model(str(resolved_model), str(resolved_tokenizer), strategy)
+    except Exception as exc:
+        raise gr.Error(f"Failed to load RWKV model: {exc}") from exc
+def _format_compress_stats(stats):
+    return "\n".join(
+        [
+            f"- Tokens: {stats['tokens']}",
+            f"- Original bytes: {stats['original_bytes']}",
+            f"- Compressed bytes: {stats['compressed_bytes']}",
+            f"- Compression ratio: {stats['ratio'] * 100:.2f}%",
+            f"- Theoretical ratio: {stats['theoretical_ratio'] * 100:.2f}%",
+            f"- Time: {stats['duration_s']:.2f}s",
+            f"- Speed: {stats['speed_toks_per_s']:.2f} tokens/s",
+        ]
+    )
+def _format_decompress_stats(stats):
+    return "\n".join(
+        [
+            f"- Tokens: {stats['tokens']}",
+            f"- Time: {stats['duration_s']:.2f}s",
+        ]
+    )
+def _normalize_strategy(strategy):
+    if "cuda" in strategy and not torch.cuda.is_available():
+        return "cpu fp32"
+    return strategy
+def compress_ui(text, context_window, progress=gr.Progress()):
+    if not text or not text.strip():
+        raise gr.Error("Input text is empty.")
+    if len(text) > MAX_INPUT_CHARS:
+        raise gr.Error(f"Input is too long ({len(text)} chars). Max is {MAX_INPUT_CHARS}.")
+    model_path = _resolve_default_model_path()
+    tokenizer_path = _resolve_default_tokenizer_path()
+    requested_strategy = os.getenv("RWKV_STRATEGY", "cpu fp32")
+    effective_strategy = _resolve_strategy()
+    model, tokenizer = _load_model_and_tokenizer(model_path, tokenizer_path, effective_strategy)
+    tokens = tokenize_text(tokenizer, text)
+    if not tokens:
+        raise gr.Error("Tokenized input is empty.")
+    original_bytes = len(text.encode("utf-8"))
+    data, stats = compress_tokens(
+        tokens,
+        model,
+        context_window=context_window,
+        original_bytes=original_bytes,
+        progress=progress,
+        progress_desc="Compressing",
+    )
+    b64 = base64.b64encode(data).decode("ascii")
+    file_path = _write_temp_file(data)
+    stats_text = _format_compress_stats(stats)
+    if effective_strategy != requested_strategy:
+        stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
+    else:
+        stats_text += f"\n- Strategy: {effective_strategy}"
+    return b64, stats_text, file_path
+def decompress_ui(b64_data, file_data, context_window):
+    raw = _get_compressed_bytes(b64_data, file_data)
+    model_path = _resolve_default_model_path()
+    tokenizer_path = _resolve_default_tokenizer_path()
+    requested_strategy = os.getenv("RWKV_STRATEGY", "cpu fp32")
+    effective_strategy = _resolve_strategy()
+    model, tokenizer = _load_model_and_tokenizer(model_path, tokenizer_path, effective_strategy)
+    text, stats = decompress_bytes(raw, model, tokenizer, context_window=context_window)
+    stats_text = _format_decompress_stats(stats)
+    if effective_strategy != requested_strategy:
+        stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
+    else:
+        stats_text += f"\n- Strategy: {effective_strategy}"
+    return text, stats_text
+def build_ui():
+    with gr.Blocks() as demo:
+        gr.Markdown("# RWKV LLM Text Compressor")
+        gr.Markdown(
+            "This is a proof-of-concept demo. Compression and decompression are slow, "
+            "and the output is not portable across different models or tokenizers."
+        )
+        context_window = gr.Slider(
+            label="Context window",
+            minimum=128,
+            maximum=4096,
+            step=128,
+            value=2048,
+        )
+        gr.Markdown(f"Max input size: {MAX_INPUT_CHARS} characters.")
+        with gr.Tabs():
+            with gr.Tab("Compress"):
+                input_text = gr.Textbox(label="Input text", lines=10)
+                compress_button = gr.Button("Compress")
+                output_b64 = gr.Textbox(label="Compressed data (base64)", lines=6)
+                compress_stats = gr.Markdown()
+                output_file = gr.File(label="Download compressed file")
+                compress_button.click(
+                    compress_ui,
+                    inputs=[input_text, context_window],
+                    outputs=[output_b64, compress_stats, output_file],
+                )
+            with gr.Tab("Decompress"):
+                input_b64 = gr.Textbox(label="Compressed data (base64)", lines=6)
+                input_file = gr.File(label="Or upload compressed file", type="binary")
+                decompress_button = gr.Button("Decompress")
+                output_text = gr.Textbox(label="Decompressed text", lines=10)
+                decompress_stats = gr.Markdown()
+                decompress_button.click(
+                    decompress_ui,
+                    inputs=[input_b64, input_file, context_window],
+                    outputs=[output_text, decompress_stats],
+                )
+    return demo
+if __name__ == "__main__":
+    build_ui().queue(max_size=16).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_api=False,
+    )

llm_compressor.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import io
+import math
+import os
+import struct
+import threading
+import time
+from functools import lru_cache
+import torch
+PROB_SCALE = 1 << 48
+ARITHMETIC_PRECISION = 64
+class BitOutputStream:
+    def __init__(self, file_obj):
+        self.file_obj = file_obj
+        self.byte = 0
+        self.bit_count = 0
+    def write_bit(self, bit):
+        self.byte = (self.byte << 1) | bit
+        self.bit_count += 1
+        if self.bit_count == 8:
+            self.file_obj.write(bytes([self.byte]))
+            self.byte = 0
+            self.bit_count = 0
+    def close(self):
+        if self.bit_count > 0:
+            self.byte <<= 8 - self.bit_count
+            self.file_obj.write(bytes([self.byte]))
+class BitInputStream:
+    def __init__(self, file_obj):
+        self.file_obj = file_obj
+        self.byte = 0
+        self.bit_count = 0
+    def read_bit(self):
+        if self.bit_count == 0:
+            bytes_data = self.file_obj.read(1)
+            if not bytes_data:
+                return -1
+            self.byte = bytes_data[0]
+            self.bit_count = 8
+        bit = (self.byte >> (self.bit_count - 1)) & 1
+        self.bit_count -= 1
+        return bit
+class ArithmeticEncoder:
+    def __init__(self, bit_output, precision=ARITHMETIC_PRECISION):
+        self.bit_output = bit_output
+        self.precision = precision
+        self.max_val = (1 << precision) - 1
+        self.quarter_val = 1 << (precision - 2)
+        self.half_val = 1 << (precision - 1)
+        self.three_quarter_val = self.quarter_val * 3
+        self.low = 0
+        self.high = self.max_val
+        self.pending_bits = 0
+    def encode_symbol(self, low_count, high_count, total_count):
+        range_val = self.high - self.low + 1
+        self.high = self.low + (range_val * high_count) // total_count - 1
+        self.low = self.low + (range_val * low_count) // total_count
+        while True:
+            if self.high < self.half_val:
+                self._write_bit(0)
+            elif self.low >= self.half_val:
+                self._write_bit(1)
+                self.low -= self.half_val
+                self.high -= self.half_val
+            elif self.low >= self.quarter_val and self.high < self.three_quarter_val:
+                self.pending_bits += 1
+                self.low -= self.quarter_val
+                self.high -= self.quarter_val
+            else:
+                break
+            self.low <<= 1
+            self.high = (self.high << 1) | 1
+    def _write_bit(self, bit):
+        self.bit_output.write_bit(bit)
+        while self.pending_bits > 0:
+            self.bit_output.write_bit(1 - bit)
+            self.pending_bits -= 1
+    def finish(self):
+        self.pending_bits += 1
+        if self.low < self.quarter_val:
+            self._write_bit(0)
+        else:
+            self._write_bit(1)
+class ArithmeticDecoder:
+    def __init__(self, bit_input, precision=ARITHMETIC_PRECISION):
+        self.bit_input = bit_input
+        self.precision = precision
+        self.max_val = (1 << precision) - 1
+        self.quarter_val = 1 << (precision - 2)
+        self.half_val = 1 << (precision - 1)
+        self.three_quarter_val = self.quarter_val * 3
+        self.low = 0
+        self.high = self.max_val
+        self.value = 0
+        for _ in range(precision):
+            read_val = self.bit_input.read_bit()
+            if read_val == -1:
+                read_val = 0
+            self.value = (self.value << 1) | read_val
+    def decode_symbol_find_count(self, total_count):
+        range_val = self.high - self.low + 1
+        count = ((self.value - self.low + 1) * total_count - 1) // range_val
+        return count
+    def update_range(self, low_count, high_count, total_count):
+        range_val = self.high - self.low + 1
+        self.high = self.low + (range_val * high_count) // total_count - 1
+        self.low = self.low + (range_val * low_count) // total_count
+        while True:
+            if self.high < self.half_val:
+                pass
+            elif self.low >= self.half_val:
+                self.value -= self.half_val
+                self.low -= self.half_val
+                self.high -= self.half_val
+            elif self.low >= self.quarter_val and self.high < self.three_quarter_val:
+                self.value -= self.quarter_val
+                self.low -= self.quarter_val
+                self.high -= self.quarter_val
+            else:
+                break
+            self.low <<= 1
+            self.high = (self.high << 1) | 1
+            bit = self.bit_input.read_bit()
+            if bit == -1:
+                bit = 0
+            self.value = (self.value << 1) | bit
+def _strip_pth(model_path):
+    return model_path[:-4] if model_path.endswith(".pth") else model_path
+def _prepare_logits(logits):
+    if not isinstance(logits, torch.Tensor):
+        logits = torch.tensor(logits)
+    if logits.ndim > 1:
+        logits = logits[-1]
+    return logits.float()
+def tokenize_text(tokenizer, text):
+    tokenized = tokenizer.encode(text)
+    if hasattr(tokenized, "ids"):
+        tokenized = tokenized.ids
+    return [int(token_id) for token_id in tokenized]
+def decode_tokens(tokenizer, tokens):
+    return tokenizer.decode(tokens)
+_MODEL_LOCK = threading.Lock()
+@lru_cache(maxsize=2)
+def load_rwkv_model(model_path, tokenizer_name, strategy):
+    if not model_path:
+        raise ValueError("RWKV model path is required.")
+    if not tokenizer_name:
+        raise ValueError("RWKV tokenizer name or path is required.")
+    if "cuda" in strategy and not torch.cuda.is_available():
+        strategy = "cpu fp32"
+    os.environ["RWKV_JIT_ON"] = "1"
+    os.environ["RWKV_V7_ON"] = "1"
+    os.environ["RWKV_CUDA_ON"] = "1" if "cuda" in strategy else "0"
+    with _MODEL_LOCK:
+        from rwkv.model import RWKV
+        from rwkv.rwkv_tokenizer import TRIE_TOKENIZER
+        model = RWKV(model=_strip_pth(model_path), strategy=strategy)
+        tokenizer = TRIE_TOKENIZER(tokenizer_name)
+        return model, tokenizer
+def compress_tokens(
+    tokens,
+    model,
+    context_window=2048,
+    original_bytes=None,
+    progress=None,
+    progress_desc="Compressing",
+):
+    if context_window <= 0:
+        raise ValueError("context_window must be positive.")
+    token_ids = [int(token_id) for token_id in tokens]
+    if not token_ids:
+        raise ValueError("No tokens to compress.")
+    output = io.BytesIO()
+    output.write(struct.pack(">I", len(token_ids)))
+    bit_output = BitOutputStream(output)
+    encoder = ArithmeticEncoder(bit_output, precision=ARITHMETIC_PRECISION)
+    context_tokens = []
+    state = None
+    total_nll = 0.0
+    start_time = time.time()
+    total_tokens = len(token_ids)
+    if progress is not None:
+        progress((0, total_tokens), desc=progress_desc, unit="token")
+    with torch.inference_mode():
+        for idx, token_id in enumerate(token_ids):
+            if len(context_tokens) >= context_window:
+                context_tokens = []
+                state = None
+            input_token = context_tokens[-1] if context_tokens else 0
+            logits, state = model.forward([input_token], state)
+            next_logits = _prepare_logits(logits)
+            probs = torch.softmax(next_logits, dim=-1)
+            counts = (probs * PROB_SCALE).to(torch.long)
+            counts = torch.clamp(counts, min=1)
+            cdf = torch.cumsum(counts, dim=-1)
+            total_count = int(cdf[-1].item())
+            prob_val = probs[token_id]
+            total_nll += float((-torch.log(prob_val)).item())
+            low_val = int(cdf[token_id - 1].item()) if token_id > 0 else 0
+            high_val = int(cdf[token_id].item())
+            encoder.encode_symbol(low_val, high_val, total_count)
+            context_tokens.append(token_id)
+            if progress is not None:
+                progress((idx + 1, total_tokens), desc=progress_desc, unit="token")
+    encoder.finish()
+    bit_output.close()
+    data = output.getvalue()
+    end_time = time.time()
+    original_bytes = int(original_bytes or 0)
+    compressed_bytes = len(data)
+    ratio = compressed_bytes / original_bytes if original_bytes > 0 else 0.0
+    theoretical_bits = total_nll / math.log(2)
+    theoretical_bytes = theoretical_bits / 8
+    theoretical_ratio = theoretical_bytes / original_bytes if original_bytes > 0 else 0.0
+    duration = end_time - start_time
+    speed = len(token_ids) / duration if duration > 0 else 0.0
+    stats = {
+        "tokens": len(token_ids),
+        "original_bytes": original_bytes,
+        "compressed_bytes": compressed_bytes,
+        "ratio": ratio,
+        "theoretical_ratio": theoretical_ratio,
+        "duration_s": duration,
+        "speed_toks_per_s": speed,
+    }
+    return data, stats
+def compress_text(text, model, tokenizer, context_window=2048):
+    tokens = tokenize_text(tokenizer, text)
+    original_bytes = len(text.encode("utf-8"))
+    return compress_tokens(tokens, model, context_window=context_window, original_bytes=original_bytes)
+def decompress_bytes(data, model, tokenizer, context_window=2048):
+    if context_window <= 0:
+        raise ValueError("context_window must be positive.")
+    if not data or len(data) < 4:
+        raise ValueError("Compressed data is empty or invalid.")
+    buffer = io.BytesIO(data)
+    total_tokens_bytes = buffer.read(4)
+    total_tokens = struct.unpack(">I", total_tokens_bytes)[0]
+    bit_input = BitInputStream(buffer)
+    decoder = ArithmeticDecoder(bit_input, precision=ARITHMETIC_PRECISION)
+    decoded_tokens = []
+    context_tokens = []
+    state = None
+    start_time = time.time()
+    with torch.inference_mode():
+        for _ in range(total_tokens):
+            if len(context_tokens) >= context_window:
+                context_tokens = []
+                state = None
+            input_token = context_tokens[-1] if context_tokens else 0
+            logits, state = model.forward([input_token], state)
+            next_logits = _prepare_logits(logits)
+            probs = torch.softmax(next_logits, dim=-1)
+            counts = (probs * PROB_SCALE).to(torch.long)
+            counts = torch.clamp(counts, min=1)
+            cdf = torch.cumsum(counts, dim=-1)
+            total_count = int(cdf[-1].item())
+            count_val = decoder.decode_symbol_find_count(total_count)
+            count_val_tensor = torch.tensor(count_val, device=cdf.device)
+            target_token_id = int(torch.searchsorted(cdf, count_val_tensor, right=True).item())
+            decoded_tokens.append(target_token_id)
+            context_tokens.append(target_token_id)
+            low_val = int(cdf[target_token_id - 1].item()) if target_token_id > 0 else 0
+            high_val = int(cdf[target_token_id].item())
+            decoder.update_range(low_val, high_val, total_count)
+    text = decode_tokens(tokenizer, decoded_tokens)
+    duration = time.time() - start_time
+    stats = {
+        "tokens": total_tokens,
+        "duration_s": duration,
+    }
+    return text, stats

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio>=4.0.0
+rwkv
+torch

support/README.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+Place the RWKV vocab file here:
+- rwkv_vocab_v20230424.txt
+You can also set RWKV_TOKENIZER to point to a different vocab path.

support/rwkv_vocab_v20230424.txt ADDED Viewed

The diff for this file is too large to render. See raw diff