josephilo commited on
Commit
ad505f5
·
verified ·
1 Parent(s): 7007a01

Delete Untitled.ipynb

Browse files
Files changed (1) hide show
  1. Untitled.ipynb +0 -178
Untitled.ipynb DELETED
@@ -1,178 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "1450e971-02a8-456f-927d-6a7cd1237fb0",
7
- "metadata": {
8
- "tags": []
9
- },
10
- "outputs": [],
11
- "source": [
12
- "import torch"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": 2,
18
- "id": "ac7202ad-16e2-4b5c-b622-3b459d537844",
19
- "metadata": {
20
- "tags": []
21
- },
22
- "outputs": [],
23
- "source": [
24
- "a = torch.load(\"pytorch_model.bin\")"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": 16,
30
- "id": "2f0a9726-609a-40ab-96ea-ebc783d2bd6d",
31
- "metadata": {
32
- "tags": []
33
- },
34
- "outputs": [
35
- {
36
- "data": {
37
- "text/plain": [
38
- "dict_keys(['language_model.model.tok_embeddings.weight', 'language_model.model.layers.0.attention.wqkv.weight', 'language_model.model.layers.0.attention.wo.weight', 'language_model.model.layers.0.feed_forward.w1.weight', 'language_model.model.layers.0.feed_forward.w3.weight', 'language_model.model.layers.0.feed_forward.w2.weight', 'language_model.model.layers.0.attention_norm.weight', 'language_model.model.layers.0.ffn_norm.weight', 'language_model.model.layers.1.attention.wqkv.weight', 'language_model.model.layers.1.attention.wo.weight', 'language_model.model.layers.1.feed_forward.w1.weight', 'language_model.model.layers.1.feed_forward.w3.weight', 'language_model.model.layers.1.feed_forward.w2.weight', 'language_model.model.layers.1.attention_norm.weight', 'language_model.model.layers.1.ffn_norm.weight', 'language_model.model.layers.2.attention.wqkv.weight', 'language_model.model.layers.2.attention.wo.weight', 'language_model.model.layers.2.feed_forward.w1.weight', 'language_model.model.layers.2.feed_forward.w3.weight', 'language_model.model.layers.2.feed_forward.w2.weight', 'language_model.model.layers.2.attention_norm.weight', 'language_model.model.layers.2.ffn_norm.weight', 'language_model.model.layers.3.attention.wqkv.weight', 'language_model.model.layers.3.attention.wo.weight', 'language_model.model.layers.3.feed_forward.w1.weight', 'language_model.model.layers.3.feed_forward.w3.weight', 'language_model.model.layers.3.feed_forward.w2.weight', 'language_model.model.layers.3.attention_norm.weight', 'language_model.model.layers.3.ffn_norm.weight', 'language_model.model.layers.4.attention.wqkv.weight', 'language_model.model.layers.4.attention.wo.weight', 'language_model.model.layers.4.feed_forward.w1.weight', 'language_model.model.layers.4.feed_forward.w3.weight', 'language_model.model.layers.4.feed_forward.w2.weight', 'language_model.model.layers.4.attention_norm.weight', 'language_model.model.layers.4.ffn_norm.weight', 'language_model.model.layers.5.attention.wqkv.weight', 'language_model.model.layers.5.attention.wo.weight', 'language_model.model.layers.5.feed_forward.w1.weight', 'language_model.model.layers.5.feed_forward.w3.weight', 'language_model.model.layers.5.feed_forward.w2.weight', 'language_model.model.layers.5.attention_norm.weight', 'language_model.model.layers.5.ffn_norm.weight', 'language_model.model.layers.6.attention.wqkv.weight', 'language_model.model.layers.6.attention.wo.weight', 'language_model.model.layers.6.feed_forward.w1.weight', 'language_model.model.layers.6.feed_forward.w3.weight', 'language_model.model.layers.6.feed_forward.w2.weight', 'language_model.model.layers.6.attention_norm.weight', 'language_model.model.layers.6.ffn_norm.weight', 'language_model.model.layers.7.attention.wqkv.weight', 'language_model.model.layers.7.attention.wo.weight', 'language_model.model.layers.7.feed_forward.w1.weight', 'language_model.model.layers.7.feed_forward.w3.weight', 'language_model.model.layers.7.feed_forward.w2.weight', 'language_model.model.layers.7.attention_norm.weight', 'language_model.model.layers.7.ffn_norm.weight', 'language_model.model.layers.8.attention.wqkv.weight', 'language_model.model.layers.8.attention.wo.weight', 'language_model.model.layers.8.feed_forward.w1.weight', 'language_model.model.layers.8.feed_forward.w3.weight', 'language_model.model.layers.8.feed_forward.w2.weight', 'language_model.model.layers.8.attention_norm.weight', 'language_model.model.layers.8.ffn_norm.weight', 'language_model.model.layers.9.attention.wqkv.weight', 'language_model.model.layers.9.attention.wo.weight', 'language_model.model.layers.9.feed_forward.w1.weight', 'language_model.model.layers.9.feed_forward.w3.weight', 'language_model.model.layers.9.feed_forward.w2.weight', 'language_model.model.layers.9.attention_norm.weight', 'language_model.model.layers.9.ffn_norm.weight', 'language_model.model.layers.10.attention.wqkv.weight', 'language_model.model.layers.10.attention.wo.weight', 'language_model.model.layers.10.feed_forward.w1.weight', 'language_model.model.layers.10.feed_forward.w3.weight', 'language_model.model.layers.10.feed_forward.w2.weight', 'language_model.model.layers.10.attention_norm.weight', 'language_model.model.layers.10.ffn_norm.weight', 'language_model.model.layers.11.attention.wqkv.weight', 'language_model.model.layers.11.attention.wo.weight', 'language_model.model.layers.11.feed_forward.w1.weight', 'language_model.model.layers.11.feed_forward.w3.weight', 'language_model.model.layers.11.feed_forward.w2.weight', 'language_model.model.layers.11.attention_norm.weight', 'language_model.model.layers.11.ffn_norm.weight', 'language_model.model.layers.12.attention.wqkv.weight', 'language_model.model.layers.12.attention.wo.weight', 'language_model.model.layers.12.feed_forward.w1.weight', 'language_model.model.layers.12.feed_forward.w3.weight', 'language_model.model.layers.12.feed_forward.w2.weight', 'language_model.model.layers.12.attention_norm.weight', 'language_model.model.layers.12.ffn_norm.weight', 'language_model.model.layers.13.attention.wqkv.weight', 'language_model.model.layers.13.attention.wo.weight', 'language_model.model.layers.13.feed_forward.w1.weight', 'language_model.model.layers.13.feed_forward.w3.weight', 'language_model.model.layers.13.feed_forward.w2.weight', 'language_model.model.layers.13.attention_norm.weight', 'language_model.model.layers.13.ffn_norm.weight', 'language_model.model.layers.14.attention.wqkv.weight', 'language_model.model.layers.14.attention.wo.weight', 'language_model.model.layers.14.feed_forward.w1.weight', 'language_model.model.layers.14.feed_forward.w3.weight', 'language_model.model.layers.14.feed_forward.w2.weight', 'language_model.model.layers.14.attention_norm.weight', 'language_model.model.layers.14.ffn_norm.weight', 'language_model.model.layers.15.attention.wqkv.weight', 'language_model.model.layers.15.attention.wo.weight', 'language_model.model.layers.15.feed_forward.w1.weight', 'language_model.model.layers.15.feed_forward.w3.weight', 'language_model.model.layers.15.feed_forward.w2.weight', 'language_model.model.layers.15.attention_norm.weight', 'language_model.model.layers.15.ffn_norm.weight', 'language_model.model.layers.16.attention.wqkv.weight', 'language_model.model.layers.16.attention.wo.weight', 'language_model.model.layers.16.feed_forward.w1.weight', 'language_model.model.layers.16.feed_forward.w3.weight', 'language_model.model.layers.16.feed_forward.w2.weight', 'language_model.model.layers.16.attention_norm.weight', 'language_model.model.layers.16.ffn_norm.weight', 'language_model.model.layers.17.attention.wqkv.weight', 'language_model.model.layers.17.attention.wo.weight', 'language_model.model.layers.17.feed_forward.w1.weight', 'language_model.model.layers.17.feed_forward.w3.weight', 'language_model.model.layers.17.feed_forward.w2.weight', 'language_model.model.layers.17.attention_norm.weight', 'language_model.model.layers.17.ffn_norm.weight', 'language_model.model.layers.18.attention.wqkv.weight', 'language_model.model.layers.18.attention.wo.weight', 'language_model.model.layers.18.feed_forward.w1.weight', 'language_model.model.layers.18.feed_forward.w3.weight', 'language_model.model.layers.18.feed_forward.w2.weight', 'language_model.model.layers.18.attention_norm.weight', 'language_model.model.layers.18.ffn_norm.weight', 'language_model.model.layers.19.attention.wqkv.weight', 'language_model.model.layers.19.attention.wo.weight', 'language_model.model.layers.19.feed_forward.w1.weight', 'language_model.model.layers.19.feed_forward.w3.weight', 'language_model.model.layers.19.feed_forward.w2.weight', 'language_model.model.layers.19.attention_norm.weight', 'language_model.model.layers.19.ffn_norm.weight', 'language_model.model.layers.20.attention.wqkv.weight', 'language_model.model.layers.20.attention.wo.weight', 'language_model.model.layers.20.feed_forward.w1.weight', 'language_model.model.layers.20.feed_forward.w3.weight', 'language_model.model.layers.20.feed_forward.w2.weight', 'language_model.model.layers.20.attention_norm.weight', 'language_model.model.layers.20.ffn_norm.weight', 'language_model.model.layers.21.attention.wqkv.weight', 'language_model.model.layers.21.attention.wo.weight', 'language_model.model.layers.21.feed_forward.w1.weight', 'language_model.model.layers.21.feed_forward.w3.weight', 'language_model.model.layers.21.feed_forward.w2.weight', 'language_model.model.layers.21.attention_norm.weight', 'language_model.model.layers.21.ffn_norm.weight', 'language_model.model.layers.22.attention.wqkv.weight', 'language_model.model.layers.22.attention.wo.weight', 'language_model.model.layers.22.feed_forward.w1.weight', 'language_model.model.layers.22.feed_forward.w3.weight', 'language_model.model.layers.22.feed_forward.w2.weight', 'language_model.model.layers.22.attention_norm.weight', 'language_model.model.layers.22.ffn_norm.weight', 'language_model.model.layers.23.attention.wqkv.weight', 'language_model.model.layers.23.attention.wo.weight', 'language_model.model.layers.23.feed_forward.w1.weight', 'language_model.model.layers.23.feed_forward.w3.weight', 'language_model.model.layers.23.feed_forward.w2.weight', 'language_model.model.layers.23.attention_norm.weight', 'language_model.model.layers.23.ffn_norm.weight', 'language_model.model.layers.24.attention.wqkv.weight', 'language_model.model.layers.24.attention.wo.weight', 'language_model.model.layers.24.feed_forward.w1.weight', 'language_model.model.layers.24.feed_forward.w3.weight', 'language_model.model.layers.24.feed_forward.w2.weight', 'language_model.model.layers.24.attention_norm.weight', 'language_model.model.layers.24.ffn_norm.weight', 'language_model.model.layers.25.attention.wqkv.weight', 'language_model.model.layers.25.attention.wo.weight', 'language_model.model.layers.25.feed_forward.w1.weight', 'language_model.model.layers.25.feed_forward.w3.weight', 'language_model.model.layers.25.feed_forward.w2.weight', 'language_model.model.layers.25.attention_norm.weight', 'language_model.model.layers.25.ffn_norm.weight', 'language_model.model.layers.26.attention.wqkv.weight', 'language_model.model.layers.26.attention.wo.weight', 'language_model.model.layers.26.feed_forward.w1.weight', 'language_model.model.layers.26.feed_forward.w3.weight', 'language_model.model.layers.26.feed_forward.w2.weight', 'language_model.model.layers.26.attention_norm.weight', 'language_model.model.layers.26.ffn_norm.weight', 'language_model.model.layers.27.attention.wqkv.weight', 'language_model.model.layers.27.attention.wo.weight', 'language_model.model.layers.27.feed_forward.w1.weight', 'language_model.model.layers.27.feed_forward.w3.weight', 'language_model.model.layers.27.feed_forward.w2.weight', 'language_model.model.layers.27.attention_norm.weight', 'language_model.model.layers.27.ffn_norm.weight', 'language_model.model.layers.28.attention.wqkv.weight', 'language_model.model.layers.28.attention.wo.weight', 'language_model.model.layers.28.feed_forward.w1.weight', 'language_model.model.layers.28.feed_forward.w3.weight', 'language_model.model.layers.28.feed_forward.w2.weight', 'language_model.model.layers.28.attention_norm.weight', 'language_model.model.layers.28.ffn_norm.weight', 'language_model.model.layers.29.attention.wqkv.weight', 'language_model.model.layers.29.attention.wo.weight', 'language_model.model.layers.29.feed_forward.w1.weight', 'language_model.model.layers.29.feed_forward.w3.weight', 'language_model.model.layers.29.feed_forward.w2.weight', 'language_model.model.layers.29.attention_norm.weight', 'language_model.model.layers.29.ffn_norm.weight', 'language_model.model.layers.30.attention.wqkv.weight', 'language_model.model.layers.30.attention.wo.weight', 'language_model.model.layers.30.feed_forward.w1.weight', 'language_model.model.layers.30.feed_forward.w3.weight', 'language_model.model.layers.30.feed_forward.w2.weight', 'language_model.model.layers.30.attention_norm.weight', 'language_model.model.layers.30.ffn_norm.weight', 'language_model.model.layers.31.attention.wqkv.weight', 'language_model.model.layers.31.attention.wo.weight', 'language_model.model.layers.31.feed_forward.w1.weight', 'language_model.model.layers.31.feed_forward.w3.weight', 'language_model.model.layers.31.feed_forward.w2.weight', 'language_model.model.layers.31.attention_norm.weight', 'language_model.model.layers.31.ffn_norm.weight', 'language_model.model.layers.32.attention.wqkv.weight', 'language_model.model.layers.32.attention.wo.weight', 'language_model.model.layers.32.feed_forward.w1.weight', 'language_model.model.layers.32.feed_forward.w3.weight', 'language_model.model.layers.32.feed_forward.w2.weight', 'language_model.model.layers.32.attention_norm.weight', 'language_model.model.layers.32.ffn_norm.weight', 'language_model.model.layers.33.attention.wqkv.weight', 'language_model.model.layers.33.attention.wo.weight', 'language_model.model.layers.33.feed_forward.w1.weight', 'language_model.model.layers.33.feed_forward.w3.weight', 'language_model.model.layers.33.feed_forward.w2.weight', 'language_model.model.layers.33.attention_norm.weight', 'language_model.model.layers.33.ffn_norm.weight', 'language_model.model.layers.34.attention.wqkv.weight', 'language_model.model.layers.34.attention.wo.weight', 'language_model.model.layers.34.feed_forward.w1.weight', 'language_model.model.layers.34.feed_forward.w3.weight', 'language_model.model.layers.34.feed_forward.w2.weight', 'language_model.model.layers.34.attention_norm.weight', 'language_model.model.layers.34.ffn_norm.weight', 'language_model.model.layers.35.attention.wqkv.weight', 'language_model.model.layers.35.attention.wo.weight', 'language_model.model.layers.35.feed_forward.w1.weight', 'language_model.model.layers.35.feed_forward.w3.weight', 'language_model.model.layers.35.feed_forward.w2.weight', 'language_model.model.layers.35.attention_norm.weight', 'language_model.model.layers.35.ffn_norm.weight', 'language_model.model.layers.36.attention.wqkv.weight', 'language_model.model.layers.36.attention.wo.weight', 'language_model.model.layers.36.feed_forward.w1.weight', 'language_model.model.layers.36.feed_forward.w3.weight', 'language_model.model.layers.36.feed_forward.w2.weight', 'language_model.model.layers.36.attention_norm.weight', 'language_model.model.layers.36.ffn_norm.weight', 'language_model.model.layers.37.attention.wqkv.weight', 'language_model.model.layers.37.attention.wo.weight', 'language_model.model.layers.37.feed_forward.w1.weight', 'language_model.model.layers.37.feed_forward.w3.weight', 'language_model.model.layers.37.feed_forward.w2.weight', 'language_model.model.layers.37.attention_norm.weight', 'language_model.model.layers.37.ffn_norm.weight', 'language_model.model.layers.38.attention.wqkv.weight', 'language_model.model.layers.38.attention.wo.weight', 'language_model.model.layers.38.feed_forward.w1.weight', 'language_model.model.layers.38.feed_forward.w3.weight', 'language_model.model.layers.38.feed_forward.w2.weight', 'language_model.model.layers.38.attention_norm.weight', 'language_model.model.layers.38.ffn_norm.weight', 'language_model.model.layers.39.attention.wqkv.weight', 'language_model.model.layers.39.attention.wo.weight', 'language_model.model.layers.39.feed_forward.w1.weight', 'language_model.model.layers.39.feed_forward.w3.weight', 'language_model.model.layers.39.feed_forward.w2.weight', 'language_model.model.layers.39.attention_norm.weight', 'language_model.model.layers.39.ffn_norm.weight', 'language_model.model.layers.40.attention.wqkv.weight', 'language_model.model.layers.40.attention.wo.weight', 'language_model.model.layers.40.feed_forward.w1.weight', 'language_model.model.layers.40.feed_forward.w3.weight', 'language_model.model.layers.40.feed_forward.w2.weight', 'language_model.model.layers.40.attention_norm.weight', 'language_model.model.layers.40.ffn_norm.weight', 'language_model.model.layers.41.attention.wqkv.weight', 'language_model.model.layers.41.attention.wo.weight', 'language_model.model.layers.41.feed_forward.w1.weight', 'language_model.model.layers.41.feed_forward.w3.weight', 'language_model.model.layers.41.feed_forward.w2.weight', 'language_model.model.layers.41.attention_norm.weight', 'language_model.model.layers.41.ffn_norm.weight', 'language_model.model.layers.42.attention.wqkv.weight', 'language_model.model.layers.42.attention.wo.weight', 'language_model.model.layers.42.feed_forward.w1.weight', 'language_model.model.layers.42.feed_forward.w3.weight', 'language_model.model.layers.42.feed_forward.w2.weight', 'language_model.model.layers.42.attention_norm.weight', 'language_model.model.layers.42.ffn_norm.weight', 'language_model.model.layers.43.attention.wqkv.weight', 'language_model.model.layers.43.attention.wo.weight', 'language_model.model.layers.43.feed_forward.w1.weight', 'language_model.model.layers.43.feed_forward.w3.weight', 'language_model.model.layers.43.feed_forward.w2.weight', 'language_model.model.layers.43.attention_norm.weight', 'language_model.model.layers.43.ffn_norm.weight', 'language_model.model.layers.44.attention.wqkv.weight', 'language_model.model.layers.44.attention.wo.weight', 'language_model.model.layers.44.feed_forward.w1.weight', 'language_model.model.layers.44.feed_forward.w3.weight', 'language_model.model.layers.44.feed_forward.w2.weight', 'language_model.model.layers.44.attention_norm.weight', 'language_model.model.layers.44.ffn_norm.weight', 'language_model.model.layers.45.attention.wqkv.weight', 'language_model.model.layers.45.attention.wo.weight', 'language_model.model.layers.45.feed_forward.w1.weight', 'language_model.model.layers.45.feed_forward.w3.weight', 'language_model.model.layers.45.feed_forward.w2.weight', 'language_model.model.layers.45.attention_norm.weight', 'language_model.model.layers.45.ffn_norm.weight', 'language_model.model.layers.46.attention.wqkv.weight', 'language_model.model.layers.46.attention.wo.weight', 'language_model.model.layers.46.feed_forward.w1.weight', 'language_model.model.layers.46.feed_forward.w3.weight', 'language_model.model.layers.46.feed_forward.w2.weight', 'language_model.model.layers.46.attention_norm.weight', 'language_model.model.layers.46.ffn_norm.weight', 'language_model.model.layers.47.attention.wqkv.weight', 'language_model.model.layers.47.attention.wo.weight', 'language_model.model.layers.47.feed_forward.w1.weight', 'language_model.model.layers.47.feed_forward.w3.weight', 'language_model.model.layers.47.feed_forward.w2.weight', 'language_model.model.layers.47.attention_norm.weight', 'language_model.model.layers.47.ffn_norm.weight', 'language_model.model.norm.weight', 'language_model.output.weight'])"
39
- ]
40
- },
41
- "execution_count": 16,
42
- "metadata": {},
43
- "output_type": "execute_result"
44
- }
45
- ],
46
- "source": [
47
- "a.keys()"
48
- ]
49
- },
50
- {
51
- "cell_type": "code",
52
- "execution_count": 24,
53
- "id": "297b187d-3e32-4422-bdad-5963d35b442b",
54
- "metadata": {
55
- "tags": []
56
- },
57
- "outputs": [
58
- {
59
- "data": {
60
- "text/plain": [
61
- "dict_keys(['model.tok_embeddings.weight', 'model.layers.0.attention.wqkv.weight', 'model.layers.0.attention.wo.weight', 'model.layers.0.feed_forward.w1.weight', 'model.layers.0.feed_forward.w3.weight', 'model.layers.0.feed_forward.w2.weight', 'model.layers.0.attention_norm.weight', 'model.layers.0.ffn_norm.weight', 'model.layers.1.attention.wqkv.weight', 'model.layers.1.attention.wo.weight', 'model.layers.1.feed_forward.w1.weight', 'model.layers.1.feed_forward.w3.weight', 'model.layers.1.feed_forward.w2.weight', 'model.layers.1.attention_norm.weight', 'model.layers.1.ffn_norm.weight', 'model.layers.2.attention.wqkv.weight', 'model.layers.2.attention.wo.weight', 'model.layers.2.feed_forward.w1.weight', 'model.layers.2.feed_forward.w3.weight', 'model.layers.2.feed_forward.w2.weight', 'model.layers.2.attention_norm.weight', 'model.layers.2.ffn_norm.weight', 'model.layers.3.attention.wqkv.weight', 'model.layers.3.attention.wo.weight', 'model.layers.3.feed_forward.w1.weight', 'model.layers.3.feed_forward.w3.weight', 'model.layers.3.feed_forward.w2.weight', 'model.layers.3.attention_norm.weight', 'model.layers.3.ffn_norm.weight', 'model.layers.4.attention.wqkv.weight', 'model.layers.4.attention.wo.weight', 'model.layers.4.feed_forward.w1.weight', 'model.layers.4.feed_forward.w3.weight', 'model.layers.4.feed_forward.w2.weight', 'model.layers.4.attention_norm.weight', 'model.layers.4.ffn_norm.weight', 'model.layers.5.attention.wqkv.weight', 'model.layers.5.attention.wo.weight', 'model.layers.5.feed_forward.w1.weight', 'model.layers.5.feed_forward.w3.weight', 'model.layers.5.feed_forward.w2.weight', 'model.layers.5.attention_norm.weight', 'model.layers.5.ffn_norm.weight', 'model.layers.6.attention.wqkv.weight', 'model.layers.6.attention.wo.weight', 'model.layers.6.feed_forward.w1.weight', 'model.layers.6.feed_forward.w3.weight', 'model.layers.6.feed_forward.w2.weight', 'model.layers.6.attention_norm.weight', 'model.layers.6.ffn_norm.weight', 'model.layers.7.attention.wqkv.weight', 'model.layers.7.attention.wo.weight', 'model.layers.7.feed_forward.w1.weight', 'model.layers.7.feed_forward.w3.weight', 'model.layers.7.feed_forward.w2.weight', 'model.layers.7.attention_norm.weight', 'model.layers.7.ffn_norm.weight', 'model.layers.8.attention.wqkv.weight', 'model.layers.8.attention.wo.weight', 'model.layers.8.feed_forward.w1.weight', 'model.layers.8.feed_forward.w3.weight', 'model.layers.8.feed_forward.w2.weight', 'model.layers.8.attention_norm.weight', 'model.layers.8.ffn_norm.weight', 'model.layers.9.attention.wqkv.weight', 'model.layers.9.attention.wo.weight', 'model.layers.9.feed_forward.w1.weight', 'model.layers.9.feed_forward.w3.weight', 'model.layers.9.feed_forward.w2.weight', 'model.layers.9.attention_norm.weight', 'model.layers.9.ffn_norm.weight', 'model.layers.10.attention.wqkv.weight', 'model.layers.10.attention.wo.weight', 'model.layers.10.feed_forward.w1.weight', 'model.layers.10.feed_forward.w3.weight', 'model.layers.10.feed_forward.w2.weight', 'model.layers.10.attention_norm.weight', 'model.layers.10.ffn_norm.weight', 'model.layers.11.attention.wqkv.weight', 'model.layers.11.attention.wo.weight', 'model.layers.11.feed_forward.w1.weight', 'model.layers.11.feed_forward.w3.weight', 'model.layers.11.feed_forward.w2.weight', 'model.layers.11.attention_norm.weight', 'model.layers.11.ffn_norm.weight', 'model.layers.12.attention.wqkv.weight', 'model.layers.12.attention.wo.weight', 'model.layers.12.feed_forward.w1.weight', 'model.layers.12.feed_forward.w3.weight', 'model.layers.12.feed_forward.w2.weight', 'model.layers.12.attention_norm.weight', 'model.layers.12.ffn_norm.weight', 'model.layers.13.attention.wqkv.weight', 'model.layers.13.attention.wo.weight', 'model.layers.13.feed_forward.w1.weight', 'model.layers.13.feed_forward.w3.weight', 'model.layers.13.feed_forward.w2.weight', 'model.layers.13.attention_norm.weight', 'model.layers.13.ffn_norm.weight', 'model.layers.14.attention.wqkv.weight', 'model.layers.14.attention.wo.weight', 'model.layers.14.feed_forward.w1.weight', 'model.layers.14.feed_forward.w3.weight', 'model.layers.14.feed_forward.w2.weight', 'model.layers.14.attention_norm.weight', 'model.layers.14.ffn_norm.weight', 'model.layers.15.attention.wqkv.weight', 'model.layers.15.attention.wo.weight', 'model.layers.15.feed_forward.w1.weight', 'model.layers.15.feed_forward.w3.weight', 'model.layers.15.feed_forward.w2.weight', 'model.layers.15.attention_norm.weight', 'model.layers.15.ffn_norm.weight', 'model.layers.16.attention.wqkv.weight', 'model.layers.16.attention.wo.weight', 'model.layers.16.feed_forward.w1.weight', 'model.layers.16.feed_forward.w3.weight', 'model.layers.16.feed_forward.w2.weight', 'model.layers.16.attention_norm.weight', 'model.layers.16.ffn_norm.weight', 'model.layers.17.attention.wqkv.weight', 'model.layers.17.attention.wo.weight', 'model.layers.17.feed_forward.w1.weight', 'model.layers.17.feed_forward.w3.weight', 'model.layers.17.feed_forward.w2.weight', 'model.layers.17.attention_norm.weight', 'model.layers.17.ffn_norm.weight', 'model.layers.18.attention.wqkv.weight', 'model.layers.18.attention.wo.weight', 'model.layers.18.feed_forward.w1.weight', 'model.layers.18.feed_forward.w3.weight', 'model.layers.18.feed_forward.w2.weight', 'model.layers.18.attention_norm.weight', 'model.layers.18.ffn_norm.weight', 'model.layers.19.attention.wqkv.weight', 'model.layers.19.attention.wo.weight', 'model.layers.19.feed_forward.w1.weight', 'model.layers.19.feed_forward.w3.weight', 'model.layers.19.feed_forward.w2.weight', 'model.layers.19.attention_norm.weight', 'model.layers.19.ffn_norm.weight', 'model.layers.20.attention.wqkv.weight', 'model.layers.20.attention.wo.weight', 'model.layers.20.feed_forward.w1.weight', 'model.layers.20.feed_forward.w3.weight', 'model.layers.20.feed_forward.w2.weight', 'model.layers.20.attention_norm.weight', 'model.layers.20.ffn_norm.weight', 'model.layers.21.attention.wqkv.weight', 'model.layers.21.attention.wo.weight', 'model.layers.21.feed_forward.w1.weight', 'model.layers.21.feed_forward.w3.weight', 'model.layers.21.feed_forward.w2.weight', 'model.layers.21.attention_norm.weight', 'model.layers.21.ffn_norm.weight', 'model.layers.22.attention.wqkv.weight', 'model.layers.22.attention.wo.weight', 'model.layers.22.feed_forward.w1.weight', 'model.layers.22.feed_forward.w3.weight', 'model.layers.22.feed_forward.w2.weight', 'model.layers.22.attention_norm.weight', 'model.layers.22.ffn_norm.weight', 'model.layers.23.attention.wqkv.weight', 'model.layers.23.attention.wo.weight', 'model.layers.23.feed_forward.w1.weight', 'model.layers.23.feed_forward.w3.weight', 'model.layers.23.feed_forward.w2.weight', 'model.layers.23.attention_norm.weight', 'model.layers.23.ffn_norm.weight', 'model.layers.24.attention.wqkv.weight', 'model.layers.24.attention.wo.weight', 'model.layers.24.feed_forward.w1.weight', 'model.layers.24.feed_forward.w3.weight', 'model.layers.24.feed_forward.w2.weight', 'model.layers.24.attention_norm.weight', 'model.layers.24.ffn_norm.weight', 'model.layers.25.attention.wqkv.weight', 'model.layers.25.attention.wo.weight', 'model.layers.25.feed_forward.w1.weight', 'model.layers.25.feed_forward.w3.weight', 'model.layers.25.feed_forward.w2.weight', 'model.layers.25.attention_norm.weight', 'model.layers.25.ffn_norm.weight', 'model.layers.26.attention.wqkv.weight', 'model.layers.26.attention.wo.weight', 'model.layers.26.feed_forward.w1.weight', 'model.layers.26.feed_forward.w3.weight', 'model.layers.26.feed_forward.w2.weight', 'model.layers.26.attention_norm.weight', 'model.layers.26.ffn_norm.weight', 'model.layers.27.attention.wqkv.weight', 'model.layers.27.attention.wo.weight', 'model.layers.27.feed_forward.w1.weight', 'model.layers.27.feed_forward.w3.weight', 'model.layers.27.feed_forward.w2.weight', 'model.layers.27.attention_norm.weight', 'model.layers.27.ffn_norm.weight', 'model.layers.28.attention.wqkv.weight', 'model.layers.28.attention.wo.weight', 'model.layers.28.feed_forward.w1.weight', 'model.layers.28.feed_forward.w3.weight', 'model.layers.28.feed_forward.w2.weight', 'model.layers.28.attention_norm.weight', 'model.layers.28.ffn_norm.weight', 'model.layers.29.attention.wqkv.weight', 'model.layers.29.attention.wo.weight', 'model.layers.29.feed_forward.w1.weight', 'model.layers.29.feed_forward.w3.weight', 'model.layers.29.feed_forward.w2.weight', 'model.layers.29.attention_norm.weight', 'model.layers.29.ffn_norm.weight', 'model.layers.30.attention.wqkv.weight', 'model.layers.30.attention.wo.weight', 'model.layers.30.feed_forward.w1.weight', 'model.layers.30.feed_forward.w3.weight', 'model.layers.30.feed_forward.w2.weight', 'model.layers.30.attention_norm.weight', 'model.layers.30.ffn_norm.weight', 'model.layers.31.attention.wqkv.weight', 'model.layers.31.attention.wo.weight', 'model.layers.31.feed_forward.w1.weight', 'model.layers.31.feed_forward.w3.weight', 'model.layers.31.feed_forward.w2.weight', 'model.layers.31.attention_norm.weight', 'model.layers.31.ffn_norm.weight', 'model.layers.32.attention.wqkv.weight', 'model.layers.32.attention.wo.weight', 'model.layers.32.feed_forward.w1.weight', 'model.layers.32.feed_forward.w3.weight', 'model.layers.32.feed_forward.w2.weight', 'model.layers.32.attention_norm.weight', 'model.layers.32.ffn_norm.weight', 'model.layers.33.attention.wqkv.weight', 'model.layers.33.attention.wo.weight', 'model.layers.33.feed_forward.w1.weight', 'model.layers.33.feed_forward.w3.weight', 'model.layers.33.feed_forward.w2.weight', 'model.layers.33.attention_norm.weight', 'model.layers.33.ffn_norm.weight', 'model.layers.34.attention.wqkv.weight', 'model.layers.34.attention.wo.weight', 'model.layers.34.feed_forward.w1.weight', 'model.layers.34.feed_forward.w3.weight', 'model.layers.34.feed_forward.w2.weight', 'model.layers.34.attention_norm.weight', 'model.layers.34.ffn_norm.weight', 'model.layers.35.attention.wqkv.weight', 'model.layers.35.attention.wo.weight', 'model.layers.35.feed_forward.w1.weight', 'model.layers.35.feed_forward.w3.weight', 'model.layers.35.feed_forward.w2.weight', 'model.layers.35.attention_norm.weight', 'model.layers.35.ffn_norm.weight', 'model.layers.36.attention.wqkv.weight', 'model.layers.36.attention.wo.weight', 'model.layers.36.feed_forward.w1.weight', 'model.layers.36.feed_forward.w3.weight', 'model.layers.36.feed_forward.w2.weight', 'model.layers.36.attention_norm.weight', 'model.layers.36.ffn_norm.weight', 'model.layers.37.attention.wqkv.weight', 'model.layers.37.attention.wo.weight', 'model.layers.37.feed_forward.w1.weight', 'model.layers.37.feed_forward.w3.weight', 'model.layers.37.feed_forward.w2.weight', 'model.layers.37.attention_norm.weight', 'model.layers.37.ffn_norm.weight', 'model.layers.38.attention.wqkv.weight', 'model.layers.38.attention.wo.weight', 'model.layers.38.feed_forward.w1.weight', 'model.layers.38.feed_forward.w3.weight', 'model.layers.38.feed_forward.w2.weight', 'model.layers.38.attention_norm.weight', 'model.layers.38.ffn_norm.weight', 'model.layers.39.attention.wqkv.weight', 'model.layers.39.attention.wo.weight', 'model.layers.39.feed_forward.w1.weight', 'model.layers.39.feed_forward.w3.weight', 'model.layers.39.feed_forward.w2.weight', 'model.layers.39.attention_norm.weight', 'model.layers.39.ffn_norm.weight', 'model.layers.40.attention.wqkv.weight', 'model.layers.40.attention.wo.weight', 'model.layers.40.feed_forward.w1.weight', 'model.layers.40.feed_forward.w3.weight', 'model.layers.40.feed_forward.w2.weight', 'model.layers.40.attention_norm.weight', 'model.layers.40.ffn_norm.weight', 'model.layers.41.attention.wqkv.weight', 'model.layers.41.attention.wo.weight', 'model.layers.41.feed_forward.w1.weight', 'model.layers.41.feed_forward.w3.weight', 'model.layers.41.feed_forward.w2.weight', 'model.layers.41.attention_norm.weight', 'model.layers.41.ffn_norm.weight', 'model.layers.42.attention.wqkv.weight', 'model.layers.42.attention.wo.weight', 'model.layers.42.feed_forward.w1.weight', 'model.layers.42.feed_forward.w3.weight', 'model.layers.42.feed_forward.w2.weight', 'model.layers.42.attention_norm.weight', 'model.layers.42.ffn_norm.weight', 'model.layers.43.attention.wqkv.weight', 'model.layers.43.attention.wo.weight', 'model.layers.43.feed_forward.w1.weight', 'model.layers.43.feed_forward.w3.weight', 'model.layers.43.feed_forward.w2.weight', 'model.layers.43.attention_norm.weight', 'model.layers.43.ffn_norm.weight', 'model.layers.44.attention.wqkv.weight', 'model.layers.44.attention.wo.weight', 'model.layers.44.feed_forward.w1.weight', 'model.layers.44.feed_forward.w3.weight', 'model.layers.44.feed_forward.w2.weight', 'model.layers.44.attention_norm.weight', 'model.layers.44.ffn_norm.weight', 'model.layers.45.attention.wqkv.weight', 'model.layers.45.attention.wo.weight', 'model.layers.45.feed_forward.w1.weight', 'model.layers.45.feed_forward.w3.weight', 'model.layers.45.feed_forward.w2.weight', 'model.layers.45.attention_norm.weight', 'model.layers.45.ffn_norm.weight', 'model.layers.46.attention.wqkv.weight', 'model.layers.46.attention.wo.weight', 'model.layers.46.feed_forward.w1.weight', 'model.layers.46.feed_forward.w3.weight', 'model.layers.46.feed_forward.w2.weight', 'model.layers.46.attention_norm.weight', 'model.layers.46.ffn_norm.weight', 'model.layers.47.attention.wqkv.weight', 'model.layers.47.attention.wo.weight', 'model.layers.47.feed_forward.w1.weight', 'model.layers.47.feed_forward.w3.weight', 'model.layers.47.feed_forward.w2.weight', 'model.layers.47.attention_norm.weight', 'model.layers.47.ffn_norm.weight', 'model.norm.weight', 'output.weight'])"
62
- ]
63
- },
64
- "execution_count": 24,
65
- "metadata": {},
66
- "output_type": "execute_result"
67
- }
68
- ],
69
- "source": [
70
- "b.keys()"
71
- ]
72
- },
73
- {
74
- "cell_type": "code",
75
- "execution_count": 17,
76
- "id": "b3725908-c731-41e0-92d2-3f7ed2926834",
77
- "metadata": {
78
- "tags": []
79
- },
80
- "outputs": [],
81
- "source": [
82
- "import torch\n",
83
- "torch.cuda.empty_cache() "
84
- ]
85
- },
86
- {
87
- "cell_type": "code",
88
- "execution_count": 22,
89
- "id": "18eb9972-f475-471e-9d3d-29b418dd01b4",
90
- "metadata": {
91
- "tags": []
92
- },
93
- "outputs": [],
94
- "source": [
95
- "b = {}"
96
- ]
97
- },
98
- {
99
- "cell_type": "code",
100
- "execution_count": 13,
101
- "id": "bb93ed26-2f16-4174-bd2b-93c7dd241daa",
102
- "metadata": {
103
- "tags": []
104
- },
105
- "outputs": [],
106
- "source": [
107
- "dic = []"
108
- ]
109
- },
110
- {
111
- "cell_type": "code",
112
- "execution_count": 14,
113
- "id": "57ef76ea-be23-48bd-8605-a8df99e3e991",
114
- "metadata": {
115
- "tags": []
116
- },
117
- "outputs": [],
118
- "source": [
119
- "for it in a.keys():\n",
120
- " dic.append(it)"
121
- ]
122
- },
123
- {
124
- "cell_type": "code",
125
- "execution_count": 23,
126
- "id": "db40fe6b-761f-4982-b3a3-f1d368be1f45",
127
- "metadata": {
128
- "tags": []
129
- },
130
- "outputs": [],
131
- "source": [
132
- "for it in a.keys():\n",
133
- " b[it.replace(\"language_model.\",\"\")] = a[it]"
134
- ]
135
- },
136
- {
137
- "cell_type": "code",
138
- "execution_count": 25,
139
- "id": "776afc7a-3237-419f-a808-fc37472de9ff",
140
- "metadata": {
141
- "tags": []
142
- },
143
- "outputs": [],
144
- "source": [
145
- "torch.save(b, \"pytorch_model.bin\")"
146
- ]
147
- },
148
- {
149
- "cell_type": "code",
150
- "execution_count": null,
151
- "id": "b16a09b9-c25f-432d-929c-256cd5f21a3b",
152
- "metadata": {},
153
- "outputs": [],
154
- "source": []
155
- }
156
- ],
157
- "metadata": {
158
- "kernelspec": {
159
- "display_name": "Python 3 (ipykernel)",
160
- "language": "python",
161
- "name": "python3"
162
- },
163
- "language_info": {
164
- "codemirror_mode": {
165
- "name": "ipython",
166
- "version": 3
167
- },
168
- "file_extension": ".py",
169
- "mimetype": "text/x-python",
170
- "name": "python",
171
- "nbconvert_exporter": "python",
172
- "pygments_lexer": "ipython3",
173
- "version": "3.11.7"
174
- }
175
- },
176
- "nbformat": 4,
177
- "nbformat_minor": 5
178
- }