sangdal commited on
Commit
850cfc6
·
1 Parent(s): 63d4b57

코랩 소스

Browse files
Files changed (1) hide show
  1. 허깅페이지_배포_확인.ipynb +175 -0
허깅페이지_배포_확인.ipynb ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "source": [
20
+ "!pip install torch transformers"
21
+ ],
22
+ "metadata": {
23
+ "colab": {
24
+ "base_uri": "https://localhost:8080/"
25
+ },
26
+ "id": "5YNG-J8gp0zo",
27
+ "outputId": "427f945a-a7ad-4bcf-8b75-2f205c95c1de"
28
+ },
29
+ "execution_count": 7,
30
+ "outputs": [
31
+ {
32
+ "output_type": "stream",
33
+ "name": "stdout",
34
+ "text": [
35
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
36
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
37
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.2)\n",
38
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.7.1)\n",
39
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
40
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
41
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
42
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
43
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
44
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.6)\n",
45
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n",
46
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n",
47
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
48
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
49
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
50
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
51
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
52
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n",
53
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n",
54
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
55
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n",
56
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n",
57
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
58
+ "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
59
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
60
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n"
61
+ ]
62
+ }
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "source": [
68
+ "import torch\n",
69
+ "from argparse import ArgumentParser\n",
70
+ "from tokenizers import SentencePieceBPETokenizer\n",
71
+ "from transformers import GPT2LMHeadModel, AutoModel, AutoTokenizer, AutoModelForCausalLM\n",
72
+ "\n",
73
+ "parser = ArgumentParser()\n",
74
+ "parser.add_argument(\"-m\", \"--model-path\", type=str, required=True)\n",
75
+ "parser.add_argument(\"-o\", \"--output-path\", type=str, required=True)\n",
76
+ "parser.add_argument(\"-b\", \"--num-beams\", type=int, default=5)\n",
77
+ "\n",
78
+ "'''\n",
79
+ "언어 생성 모델에서 빔서치(Beam Search)를 사용할 때 빔의 개수를 지정하는 옵션입니다.\n",
80
+ "빔서치는 생성 모델이 다음 ��어를 예측할 때 사용되는 방법 중 하나로,\n",
81
+ "여러 개의 후보 단어를 유지하고 확률적으로 가장 적합한 단어를 선택합니다.\n",
82
+ "'''\n",
83
+ "\n",
84
+ "\n",
85
+ "def generate_question(context, num_beams=5):\n",
86
+ "\n",
87
+ " # 모델 경로 위치 잡기\n",
88
+ " model = GPT2LMHeadModel.from_pretrained(\"sangdal/ChatBot\")\n",
89
+ " device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
90
+ " model = model.to(device)\n",
91
+ "\n",
92
+ " # tokenizer폴더 경로위치 잡기\n",
93
+ " tokenizer = SentencePieceBPETokenizer.from_file(\n",
94
+ " vocab_filename=\"/content/drive/MyDrive/프로젝트(web)/tokenizer/vocab.json\", merges_filename=\"/content/drive/MyDrive/프로젝트(web)/tokenizer/merges.txt\", add_prefix_space=False\n",
95
+ " )\n",
96
+ "\n",
97
+ " example = {\"context\": context, \"question\": \"\", \"answer\": \"\"}\n",
98
+ " # example = {\"context\": context, \"question\": \"당신의 장점은? \", \"answer\": \"착함\"}\n",
99
+ "\n",
100
+ " inputs = tokenizer.encode(example[\"context\"])\n",
101
+ " input_ids = torch.tensor(inputs.ids, dtype=torch.long).unsqueeze(0).to(device) # Specify dtype as torch.long\n",
102
+ "\n",
103
+ "\n",
104
+ " model = model.to(device)\n",
105
+ " model.eval()\n",
106
+ "\n",
107
+ " generated_results = []\n",
108
+ "\n",
109
+ " origin_seq_len = input_ids.size(-1)\n",
110
+ "\n",
111
+ " decoded_sequences = model.generate(\n",
112
+ " input_ids=input_ids,\n",
113
+ " max_length=origin_seq_len + 100, # 질문의 최대길이\n",
114
+ " min_length=origin_seq_len + 5, # 질문의 최소길이\n",
115
+ " pad_token_id=0,\n",
116
+ " bos_token_id=1,\n",
117
+ " eos_token_id=2,\n",
118
+ " num_beams=num_beams,\n",
119
+ " repetition_penalty=1.3,\n",
120
+ " no_repeat_ngram_size=3,\n",
121
+ " num_return_sequences=1,\n",
122
+ " )\n",
123
+ "\n",
124
+ " for decoded_tokens in decoded_sequences.tolist():\n",
125
+ " decoded_question_text = tokenizer.decode(decoded_tokens[origin_seq_len:])\n",
126
+ " decoded_question_text = decoded_question_text.split(\"</s>\")[0].replace(\"<s>\", \"\")\n",
127
+ " decoded_question_text = decoded_question_text.split(\"질문:\")[-1]\n",
128
+ " generated_results.append(decoded_question_text)\n",
129
+ "\n",
130
+ " return generated_results\n",
131
+ "\n",
132
+ "\n",
133
+ "\n",
134
+ "if __name__ == \"__main__\":\n",
135
+ " context = input(\"문맥을 입력하세요: \")\n",
136
+ " # num_beams = int(input(\"num_beams를 입력하세요 (기본값: 5): \") or 5)\n",
137
+ "\n",
138
+ " generated_question = generate_question(context)\n",
139
+ "\n",
140
+ " print(f\"생성된 질문: {generated_question}\")\n",
141
+ "\n",
142
+ " # print(generated_question)\n",
143
+ " # print(type(generated_question))\n",
144
+ "\n"
145
+ ],
146
+ "metadata": {
147
+ "colab": {
148
+ "base_uri": "https://localhost:8080/"
149
+ },
150
+ "id": "_5CCqm65p1fG",
151
+ "outputId": "bd143207-0ed0-434c-aeb2-6a26f19f507e"
152
+ },
153
+ "execution_count": 9,
154
+ "outputs": [
155
+ {
156
+ "output_type": "stream",
157
+ "name": "stdout",
158
+ "text": [
159
+ "문맥을 입력하세요: 스스로가 학업에 집중하면서도 봉사, 파트타임직, 인턴 등 수많은 활동을 성실히 수행해 왔습니다. 1년간 빵집에서 제조 기사로 일하며 재료 발주부터 포장까지의 전 과정을 담당해 보기도 하고, 2년간 외국 학생들의 생활을 돕는 버디로서 커뮤니케이션 역량을 쌓기도 했습니다. 책임감과 높은 목표치를 기반으로 다양한 분야에서 활동해 왔습니다. 또한 제 성격으로, 저를 가장 잘 표현할 수 있는 단어는 ‘배려’입니다. 평범하지만 그만큼 정감 있고 누군가를 상대함에 있어 필수 요건입니다. 저는 대화의 자리에 임할 때 제 언행이 상대방에게 어떤 영향을 미칠지를 항상 생각합니다. 가끔은 이 배려가 지나쳐 상대에게 부담을 줄 수 있다는 점이 단점이겠으나, 그렇다고 협상의 자리에서 먼저 물러나거나 공동체 이익에 반하는 선택을 하진 않습니다. 제 자신의 의견은 확실히 전달하면서도 ‘상대를 위한 커뮤니케이션’이라는 나름의 원칙을 지키는 것입니다.\n",
160
+ "생성된 질문: ['주먹이 운다에서 2년간 무엇을 통해 커뮤니케이션 능력을 쌓았는가?']\n"
161
+ ]
162
+ }
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "source": [],
168
+ "metadata": {
169
+ "id": "78M6fCldqS7f"
170
+ },
171
+ "execution_count": null,
172
+ "outputs": []
173
+ }
174
+ ]
175
+ }