Upload 12 files

Browse files

Files changed (13) hide show

.gitattributes +4 -0
API_DEMO_CHAT.py +140 -0
MiniMind2_tokenizer/special_tokens_map.json +30 -0
MiniMind2_tokenizer/tokenizer.json +0 -0
MiniMind2_tokenizer/tokenizer_config.json +44 -0
README.md +85 -3
img/img_1.png +3 -0
img/img_2.png +3 -0
img/img_3.png +3 -0
miniGoose.png +3 -0
rwkv-final-sft-1024.pth +3 -0
rwkv-final-sft-2048.pth +3 -0
rwkv-final-sft-512.pth +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+img/img_1.png filter=lfs diff=lfs merge=lfs -text
+img/img_2.png filter=lfs diff=lfs merge=lfs -text
+img/img_3.png filter=lfs diff=lfs merge=lfs -text
+miniGoose.png filter=lfs diff=lfs merge=lfs -text

API_DEMO_CHAT.py ADDED Viewed

	@@ -0,0 +1,140 @@

+########################################################################################################
+# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
+########################################################################################################
+print("RWKV Chat Simple Demo")
+import os, copy, types, gc, sys, re
+import numpy as np
+from prompt_toolkit import prompt
+import torch
+from transformers import AutoTokenizer
+torch.backends.cudnn.benchmark = True
+torch.backends.cudnn.allow_tf32 = True
+torch.backends.cuda.matmul.allow_tf32 = True
+os.environ["RWKV_V7_ON"] = "1" # enable this for rwkv-7 models
+os.environ["RWKV_JIT_ON"] = "1"
+os.environ["RWKV_CUDA_ON"] = "0"  # !!! '1' to compile CUDA kernel (10x faster), requires c++ compiler & cuda libraries !!!
+from rwkv.model import RWKV
+from rwkv.utils import PIPELINE
+########################################################################################################
+args = types.SimpleNamespace()
+args.strategy = "cuda fp16"  # use CUDA, fp16
+args.MODEL_NAME = "./rwkv-final-sft-2048.pth"
+########################################################################################################
+STATE_NAME = None # use vanilla zero initial state?
+# use custom state? much better chat results (download from https://huggingface.co/BlinkDL/temp-latest-training-models/tree/main)
+# note: this is English Single-round QA state (will forget what you previously say)
+# STATE_NAME = "E://RWKV-Runner//models//rwkv-x060-eng_single_round_qa-1B6-20240516-ctx2048"
+########################################################################################################
+GEN_TEMP = 1.0
+GEN_TOP_P = 0.3
+GEN_alpha_presence = 0.5
+GEN_alpha_frequency = 0.5
+GEN_penalty_decay = 0.996
+if STATE_NAME != None:
+    GEN_TOP_P = 0.2
+    GEN_alpha_presence = 0.3
+    GEN_alpha_frequency = 0.3
+CHUNK_LEN = 16  # split input into chunks to save VRAM (shorter -> slower, but saves VRAM)
+########################################################################################################
+print(f"Loading model - {args.MODEL_NAME}")
+model = RWKV(model=args.MODEL_NAME, strategy=args.strategy)
+pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
+tokenizer = AutoTokenizer.from_pretrained("./MiniMind2_tokenizer")
+model_tokens = []
+model_state = None
+if STATE_NAME != None: # load custom state
+    args = model.args
+    state_raw = torch.load(STATE_NAME + '.pth')
+    state_init = [None for i in range(args.n_layer * 3)]
+    for i in range(args.n_layer):
+        dd = model.strategy[i]
+        dev = dd.device
+        atype = dd.atype
+        state_init[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
+        state_init[i*3+1] = state_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
+        state_init[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
+    model_state = copy.deepcopy(state_init)
+def run_rnn(ctx):
+    global model_tokens, model_state
+    ctx = ctx.replace("\r\n", "\n")
+    tokens = tokenizer.encode(ctx)
+    tokens = [int(x) for x in tokens]
+    model_tokens += tokens
+    # print(f"### model ###\n{model_tokens}\n[{pipeline.decode(model_tokens)}]")  # debug
+    while len(tokens) > 0:
+        out, model_state = model.forward(tokens[:CHUNK_LEN], model_state)
+        tokens = tokens[CHUNK_LEN:]
+    return out
+if STATE_NAME == None: # use initial prompt if we are not loading a state
+    init_ctx = "User: hi" + "\n\n"
+    init_ctx += "Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it." + "\n\n"
+    # run_rnn(init_ctx)
+    # print(init_ctx, end="")
+while True:
+    msg = prompt("<|im_start|>user:")
+    msg = msg.strip()
+    msg = re.sub(r"\n+", "\n", msg)
+    if len(msg) > 0:
+        occurrence = {}
+        out_tokens = []
+        out_last = 0
+        out = run_rnn("<|im_start|>user\n" + msg + "<|im_end|>\n" + "<|im_start|>assistant\n")
+        print("\nAssistant:", end="")
+        eos_token_id = tokenizer.eos_token_id
+        pad_token_id = tokenizer.pad_token_id
+        for i in range(99999):
+            for n in occurrence:
+                out[n] -= GEN_alpha_presence + occurrence[n] * GEN_alpha_frequency  # repetition penalty
+            out[0] -= 1e10  # disable END_OF_TEXT
+            token = pipeline.sample_logits(out, temperature=GEN_TEMP, top_p=GEN_TOP_P)
+            out, model_state = model.forward([token], model_state)
+            model_tokens += [token]
+            out_tokens += [token]
+            for xxx in occurrence:
+                occurrence[xxx] *= GEN_penalty_decay
+            occurrence[token] = 1 + (occurrence[token] if token in occurrence else 0)
+            tmp = tokenizer.decode(out_tokens[out_last:])
+            if ("\ufffd" not in tmp) and (not tmp.endswith("\n")):
+                print(tmp, end="", flush=True)
+                out_last = i + 1
+            # 使用 token_id 判断是否为 eos_token
+            if token == eos_token_id:
+                print(tmp, end="\n\n", flush=True)
+                break
+    else:
+        print("!!! Error: please say something !!!")

MiniMind2_tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|im_start|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

MiniMind2_tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

MiniMind2_tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<|im_start|>",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% else %}{{ '<|im_start|>system\\nYou are a helpful assistant<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "PreTrainedTokenizer",
+  "unk_token": "<|endoftext|>"
+}

README.md CHANGED Viewed

@@ -1,3 +1,85 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+datasets:
+- jingyaogong/minimind_dataset
+language:
+- zh
+- en
+tags:
+- 34.2M
+---
+# 🪿 Mini-RWKV-V7-LM
+🚀 让我们来从头训练一个属于自己的Mini-RWKV-7吧~ 小小的鹅也能飞得很高喔~
+<div align="center">
+  <img src="./miniGoose.png" width="200" height="200" style="display: block; margin: auto;">
+</div>
+## 🌟 模型简介
+前往 [**Mini_RWKV_7**](https://github.com/Alic-Li/Mini_RWKV_7 ) 查看完整项目
+本模型是基于 **RWKV-V7 架构** 训练的一个 **34M 参数量** 的语言模型`Mini-RWKV-V7-LM-34M`。它在保持轻量的同时，具备良好的语言理解和生成能力，非常适合资源极其有限的设备部署和快速迭代开发。
+---
+## 📦 模型结构
+| 参数 | 数值 |
+|------|------|
+| 参数量 | 34.2M 🎯 |
+| 层数 | 8 🧱 |
+| 隐藏维度 | 512 📐 |
+| 上下文长度 | 512->1024->2048 📏 |
+| 词表大小 | 6400 📚 |
+- Vocab 和MiniMind的保持一致
+---
+## 🧪 训练信息
+- 🪿 架构：[RWKV-V7](https://github.com/BlinkDL/RWKV-LM)
+- 📚 数据源：[minimind_dataset](https://huggingface.co/datasets/jingyaogong/minimind_dataset) 特别感谢MiniMind的作者 [@jingyaogong](https://github.com/jingyaogong)开源了训练数据集 🤗
+- 📈 学习率：动态调整
+- 🖥️ 硬件：可以使用4060laptop等显卡进行训练，甚至Radeon 780M 核显也可以在轻薄本上进行训练 😜
+- 👀我是在AMD Instinct MI300X 上快速复现的(十分感谢AMD公司的对我个人以及RWKV的云算力赞助)😊
+- 📦 模型大小：68.4MB 参数量 34.2M Params
+- 📊 预损失曲线：预训练收敛稳定 loss = 2.12左右波动(因为预训练数据量比较少)
+- 📊 SFT训练损失曲线 SFT训练最终loss=0.5左右波动
+---
+## 🎉 效果展示
+![火星旅行小说](./img/img_1.png)
+![冒牌排序代码](./img/img_2.png)
+![心理问答](./img/img_3.png)
+---
+## 🧰 推理方法
+### 🐍 安装依赖
+```bash
+pip install -r torch numpy prompt_toolkit transformers rwkv
+```
+- 如果你使用的是AAMD显卡，请安装对应最新版本的torch
+- 比如说```pip3 install torch --index-url https://download.pytorch.org/whl/rocm6.3```
+- 具体安装指令可以参考[Pytorch官网下载链接](https://pytorch.org/get-started/locally/)
+### 🧪 加载模型 & 推理示例
+```bash
+python3 ./API_DEMO_CHAT.py
+```
+## 📢 致谢
+- 🖥️ 感谢AMD公司的对我个人以及RWKV的云算力赞助
+- 🙌 感谢 RWKV 社区提供的开源代码和训练框架！
+- 🚀 感谢 [MiniMind](https://github.com/jingyaogong/minimind) 提供的 README 模板灵感！
+- 如发现 bug 或有任何建议，欢迎提交 issue 或 PR 🛠️
+---
+🎉 感谢小伙伴们使用 **Mini_RWKV_7**！如果你喜欢这个项目，欢迎推给大家一起来玩！🌟
+---

img/img_1.png ADDED Viewed

Git LFS Details

SHA256: 342bec9c43999bc6ffdb3263398827b4cc6b3226a19312632db9eba5a1e9b716
Pointer size: 131 Bytes
Size of remote file: 260 kB

img/img_2.png ADDED Viewed

Git LFS Details

SHA256: 2bf560bd59577bfb5e3cf7182c8aaeefa983b19dd477fdd7e3245cb312b0e78b
Pointer size: 131 Bytes
Size of remote file: 272 kB

img/img_3.png ADDED Viewed

Git LFS Details

SHA256: d29f08a86171af93877b96597dd02a30f56956054a8fcf16291912c635e7a867
Pointer size: 131 Bytes
Size of remote file: 349 kB

miniGoose.png ADDED Viewed

Git LFS Details

SHA256: f3b3cdb84721cda5d1944473e2e1b37d4cfb7a078a05ef7211c5edcc17909a0c
Pointer size: 131 Bytes
Size of remote file: 862 kB

rwkv-final-sft-1024.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a03dd08fbbc44e93fda601a8db61e7018bfd10831c871c9b2c5beaed9dab4f28
+size 68354364

rwkv-final-sft-2048.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09459cc9b8cf413e71ab867d7be5673f4d5b554d8fb87cf8669e4aa34599152f
+size 68354364

rwkv-final-sft-512.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da5384f647c2eb6cebe067acce030d0590e047c61b54dee21179083a6d42b672
+size 68354364