IkIzma commited on
Commit
e76cc46
·
1 Parent(s): c22392b

Delete Generate_text_with_RuGPTs_HF.ipynb

Browse files
Files changed (1) hide show
  1. Generate_text_with_RuGPTs_HF.ipynb +0 -313
Generate_text_with_RuGPTs_HF.ipynb DELETED
@@ -1,313 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {
6
- "id": "BO7MEGbb6mtB"
7
- },
8
- "source": [
9
- "# Generate text with RuGPTs in huggingface\n",
10
- "How to generate text with pretrained RuGPTs models with huggingface.\n",
11
- "\n",
12
- "This notebook is valid for all RuGPTs models except RuGPT3XL.\n",
13
- "## Install env"
14
- ]
15
- },
16
- {
17
- "cell_type": "code",
18
- "execution_count": null,
19
- "metadata": {
20
- "id": "H73-Pizb6c8n"
21
- },
22
- "outputs": [],
23
- "source": [
24
- "!pip3 install transformers"
25
- ]
26
- },
27
- {
28
- "cell_type": "markdown",
29
- "metadata": {
30
- "id": "QvgntLymArg3"
31
- },
32
- "source": [
33
- "## Generate"
34
- ]
35
- },
36
- {
37
- "cell_type": "code",
38
- "execution_count": 3,
39
- "metadata": {
40
- "id": "csHcDJXFDdaW"
41
- },
42
- "outputs": [],
43
- "source": [
44
- "import numpy as np\n",
45
- "import torch"
46
- ]
47
- },
48
- {
49
- "cell_type": "code",
50
- "execution_count": null,
51
- "metadata": {
52
- "id": "TJxPg-cJDhAB"
53
- },
54
- "outputs": [],
55
- "source": [
56
- "np.random.seed(42)\n",
57
- "torch.manual_seed(42)"
58
- ]
59
- },
60
- {
61
- "cell_type": "code",
62
- "execution_count": 5,
63
- "metadata": {
64
- "id": "AkUrzKsy_16F"
65
- },
66
- "outputs": [],
67
- "source": [
68
- "from transformers import GPT2LMHeadModel, GPT2Tokenizer"
69
- ]
70
- },
71
- {
72
- "cell_type": "code",
73
- "execution_count": 6,
74
- "metadata": {
75
- "id": "tV7tt-t2FQc3"
76
- },
77
- "outputs": [],
78
- "source": [
79
- "def load_tokenizer_and_model(model_name_or_path):\n",
80
- " return GPT2Tokenizer.from_pretrained(model_name_or_path), GPT2LMHeadModel.from_pretrained(model_name_or_path).cuda()\n",
81
- "\n",
82
- "\n",
83
- "def generate(\n",
84
- " model, tok, text,\n",
85
- " do_sample=True, max_length=50, repetition_penalty=5.0,\n",
86
- " top_k=5, top_p=0.95, temperature=1,\n",
87
- " num_beams=None,\n",
88
- " no_repeat_ngram_size=3\n",
89
- " ):\n",
90
- " input_ids = tok.encode(text, return_tensors=\"pt\").cuda()\n",
91
- " out = model.generate(\n",
92
- " input_ids.cuda(),\n",
93
- " max_length=max_length,\n",
94
- " repetition_penalty=repetition_penalty,\n",
95
- " do_sample=do_sample,\n",
96
- " top_k=top_k, top_p=top_p, temperature=temperature,\n",
97
- " num_beams=num_beams, no_repeat_ngram_size=no_repeat_ngram_size\n",
98
- " )\n",
99
- " return list(map(tok.decode, out))"
100
- ]
101
- },
102
- {
103
- "cell_type": "markdown",
104
- "metadata": {
105
- "id": "7sPySei8FO_r"
106
- },
107
- "source": [
108
- "### RuGPT2Large"
109
- ]
110
- },
111
- {
112
- "cell_type": "code",
113
- "execution_count": null,
114
- "metadata": {
115
- "id": "x_EMbgO0BTvb"
116
- },
117
- "outputs": [],
118
- "source": [
119
- "tok, model = load_tokenizer_and_model(\"sberbank-ai/rugpt2large\")\n",
120
- "generated = generate(model, tok, \"Александр Сергеевич Пушкин родился в \", num_beams=10)\n",
121
- "generated[0]"
122
- ]
123
- },
124
- {
125
- "cell_type": "markdown",
126
- "metadata": {
127
- "id": "F4X-d7fIIZFC"
128
- },
129
- "source": [
130
- "### RuGPT3Small"
131
- ]
132
- },
133
- {
134
- "cell_type": "code",
135
- "execution_count": null,
136
- "metadata": {
137
- "id": "24oUrAfBIk6G"
138
- },
139
- "outputs": [],
140
- "source": [
141
- "tok, model = load_tokenizer_and_model(\"sberbank-ai/rugpt3small_based_on_gpt2\")\n",
142
- "generated = generate(model, tok, \"Александр Сергеевич Пушкин родился в \", num_beams=10)"
143
- ]
144
- },
145
- {
146
- "cell_type": "code",
147
- "execution_count": 9,
148
- "metadata": {
149
- "colab": {
150
- "base_uri": "https://localhost:8080/",
151
- "height": 53
152
- },
153
- "id": "SGTZin-JIu_N",
154
- "outputId": "52795a45-12ef-47f8-e7f9-84f10077f986"
155
- },
156
- "outputs": [
157
- {
158
- "output_type": "execute_result",
159
- "data": {
160
- "text/plain": [
161
- "'Александр Сергеевич Пушкин родился в 1825 г. в семье поэта Александра Сергеевича Пушкина и его жены Александры Николаевны Пушкиной (урожденной Пушкиных). В 1783 г. он поступил на юридический факультет Санкт-Петербургского университета'"
162
- ],
163
- "application/vnd.google.colaboratory.intrinsic+json": {
164
- "type": "string"
165
- }
166
- },
167
- "metadata": {},
168
- "execution_count": 9
169
- }
170
- ],
171
- "source": [
172
- "generated[0]"
173
- ]
174
- },
175
- {
176
- "cell_type": "markdown",
177
- "metadata": {
178
- "id": "GHrO9tovIyyj"
179
- },
180
- "source": [
181
- "### RuGPT3Medium"
182
- ]
183
- },
184
- {
185
- "cell_type": "code",
186
- "execution_count": null,
187
- "metadata": {
188
- "id": "2MVyT8zAIyys"
189
- },
190
- "outputs": [],
191
- "source": [
192
- "tok, model = load_tokenizer_and_model(\"sberbank-ai/rugpt3medium_based_on_gpt2\")\n",
193
- "generated = generate(model, tok, \"Александр Сергеевич Пушкин родился в \", num_beams=10)"
194
- ]
195
- },
196
- {
197
- "cell_type": "code",
198
- "execution_count": 11,
199
- "metadata": {
200
- "colab": {
201
- "base_uri": "https://localhost:8080/",
202
- "height": 53
203
- },
204
- "id": "W3SWmttlJHF7",
205
- "outputId": "d4e97e47-3ac0-4072-f9b2-bc0aca2b802c"
206
- },
207
- "outputs": [
208
- {
209
- "output_type": "execute_result",
210
- "data": {
211
- "text/plain": [
212
- "'Александр Сергеевич Пушкин родился в 1799 году, умер в 1837-м. Он был одним из самых образованных и одаренных людей своего времени. У него было много увлечений: он увлекался математикой, физикой, астрономией,'"
213
- ],
214
- "application/vnd.google.colaboratory.intrinsic+json": {
215
- "type": "string"
216
- }
217
- },
218
- "metadata": {},
219
- "execution_count": 11
220
- }
221
- ],
222
- "source": [
223
- "generated[0]"
224
- ]
225
- },
226
- {
227
- "cell_type": "markdown",
228
- "metadata": {
229
- "id": "HnU-9k3dIzfy"
230
- },
231
- "source": [
232
- "### RuGPT3Large"
233
- ]
234
- },
235
- {
236
- "cell_type": "code",
237
- "execution_count": null,
238
- "metadata": {
239
- "id": "Z14U66yuIzfz"
240
- },
241
- "outputs": [],
242
- "source": [
243
- "tok, model = load_tokenizer_and_model(\"sberbank-ai/rugpt3large_based_on_gpt2\")\n",
244
- "generated = generate(model, tok, \"Александр Сергеевич Пушкин родился в \", num_beams=10)"
245
- ]
246
- },
247
- {
248
- "cell_type": "code",
249
- "execution_count": 13,
250
- "metadata": {
251
- "colab": {
252
- "base_uri": "https://localhost:8080/",
253
- "height": 53
254
- },
255
- "id": "VFuy-V2xJmwu",
256
- "outputId": "c50acf5d-df76-4b06-a325-14a7148a24ee"
257
- },
258
- "outputs": [
259
- {
260
- "output_type": "execute_result",
261
- "data": {
262
- "text/plain": [
263
- "'Александр Сергеевич Пушкин родился в \\n1799 году. Его отец был крепостным крестьянином, а мать – крепостной крестьянкой. Детство и юность поэта прошли в селе Михайловском Пензенской губернии. В 1820-х годах семья переехала'"
264
- ],
265
- "application/vnd.google.colaboratory.intrinsic+json": {
266
- "type": "string"
267
- }
268
- },
269
- "metadata": {},
270
- "execution_count": 13
271
- }
272
- ],
273
- "source": [
274
- "generated[0]"
275
- ]
276
- },
277
- {
278
- "cell_type": "code",
279
- "source": [],
280
- "metadata": {
281
- "id": "WCfz5Cs5ENOo"
282
- },
283
- "execution_count": null,
284
- "outputs": []
285
- }
286
- ],
287
- "metadata": {
288
- "accelerator": "GPU",
289
- "colab": {
290
- "name": "Generate_text_with_RuGPTs_HF",
291
- "provenance": []
292
- },
293
- "kernelspec": {
294
- "display_name": "Python 3",
295
- "language": "python",
296
- "name": "python3"
297
- },
298
- "language_info": {
299
- "codemirror_mode": {
300
- "name": "ipython",
301
- "version": 3
302
- },
303
- "file_extension": ".py",
304
- "mimetype": "text/x-python",
305
- "name": "python",
306
- "nbconvert_exporter": "python",
307
- "pygments_lexer": "ipython3",
308
- "version": "3.7.8"
309
- }
310
- },
311
- "nbformat": 4,
312
- "nbformat_minor": 0
313
- }