akashjoy commited on
Commit
86a0899
·
verified ·
1 Parent(s): 85c4732

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +256 -0
app.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #-------------------- Deployment Modules------------------------#
2
+ import flask
3
+ #from flask import Flask, jsonify, request, render_template
4
+ from flask import Flask, request, render_template
5
+ import joblib
6
+ # import jsonify
7
+ # import json
8
+ #-------------------- Deployment Modules------------------------#
9
+
10
+ #-------------------- Data Modules-----------------------------#
11
+ import numpy as np
12
+ import pandas as pd
13
+ import re
14
+ #import json
15
+ import random
16
+ import math
17
+ import time
18
+ import unicodedata
19
+ #import csv
20
+ import itertools
21
+ import os
22
+ import codecs
23
+ #-------------------- Data Modules-----------------------------#
24
+ #import spacy
25
+ #spacy_english = spacy.load('en_core_web_sm')
26
+
27
+ #-------------------- NLP Modules------------------------------#
28
+
29
+ #-----------------Machine Learning Modules--------------------#
30
+ import torch
31
+ from torch.jit import script, trace
32
+ import torch.nn as nn
33
+ import torch.nn.functional as F
34
+ import torch.optim as optim
35
+ from torch.utils.data import DataLoader, TensorDataset
36
+ #from __future__ import division
37
+ #from __future__ import print_function
38
+ #from __future__ import unicode_literals
39
+ #from __future__ import absolute_import
40
+ #-----------------Machine Learning Modules--------------------#
41
+
42
+ app = Flask(__name__)
43
+ @app.route('/')
44
+ def index():
45
+ return render_template('index.html')
46
+
47
+ @app.route('/chat', methods = ['POST'])
48
+ def chat():
49
+
50
+ class Vocabulary:
51
+ def __init__(self, name):
52
+ self.name = name
53
+ self.trimmed = False
54
+ self.word2index = {}
55
+ self.index2word = {}
56
+ self.word2count = {}
57
+ self.index2word = {PAD_token: 'PAD', SOS_token: 'SOS', EOS_token : 'EOS'}
58
+ self.num_words = 3
59
+
60
+ def addWord(self, w):
61
+ if w not in self.word2index:
62
+ self.word2index[w] = self.num_words
63
+ self.index2word[self.num_words] = w
64
+ self.word2count[w] = 1
65
+ self.num_words += 1
66
+ else:
67
+ self.word2count[w] += 1
68
+
69
+ def addSentence(self, sent):
70
+ for word in sent.split(' '):
71
+ self.addWord(word)
72
+
73
+ def trim(self, min_cnt):
74
+ if self.trimmed:
75
+ return
76
+ self.trimmed = True
77
+ words_to_keep = []
78
+ for key, value in self.word2count.items():
79
+ if value > min_cnt:
80
+ words_to_keep.append(key)
81
+ print('Words to Keep: {}/{} = {:.2f}%'.format(len(words_to_keep),len(self.word2count),len(words_to_keep)/len(self.word2count)))
82
+ self.word2index = {}
83
+ self.word2count = {}
84
+ self.index2word = {PAD_token: 'PAD', SOS_token: 'SOS', EOS_token : 'EOS'}
85
+ self.num_words = 3
86
+ for w in words_to_keep:
87
+ self.addWord(w)
88
+
89
+
90
+ class EncoderRNN(nn.Module):
91
+ def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
92
+ super(EncoderRNN, self).__init__()
93
+ self.n_layers = n_layers
94
+ self.hidden_size = hidden_size
95
+ self.embedding = embedding
96
+
97
+ self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
98
+ dropout=(0 if n_layers == 1 else dropout), bidirectional=True)
99
+
100
+ def forward(self, input_seq, input_lengths, hidden=None):
101
+ embedded = self.embedding(input_seq)
102
+ packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
103
+ outputs, hidden = self.gru(packed, hidden)
104
+ # Unpack padding
105
+ outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
106
+ # Sum bidirectional GRU outputs
107
+ outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
108
+ # Return output and final hidden state
109
+ return outputs, hidden
110
+
111
+ class Attn(nn.Module):
112
+ def __init__(self, hidden_size):
113
+ super(Attn, self).__init__()
114
+ self.hidden_size = hidden_size
115
+
116
+ def dot_score(self, hidden, encoder_output):
117
+ return torch.sum(hidden * encoder_output, dim=2)
118
+
119
+ def forward(self, hidden, encoder_outputs):
120
+ attn_energies = self.dot_score(hidden, encoder_outputs)
121
+ attn_energies = attn_energies.t()
122
+ return F.softmax(attn_energies, dim=1).unsqueeze(1)
123
+
124
+ class DecoderRNN(nn.Module):
125
+ def __init__(self, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
126
+ super(DecoderRNN, self).__init__()
127
+
128
+ self.hidden_size = hidden_size
129
+ self.output_size = output_size
130
+ self.n_layers = n_layers
131
+ self.dropout = dropout
132
+
133
+ self.embedding = embedding
134
+ self.embedding_dropout = nn.Dropout(dropout)
135
+ self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
136
+ self.concat = nn.Linear(2 * hidden_size, hidden_size)
137
+ self.out = nn.Linear(hidden_size, output_size)
138
+
139
+ self.attn = Attn(hidden_size)
140
+
141
+ def forward(self, input_step, last_hidden, encoder_outputs):
142
+ embedded = self.embedding(input_step)
143
+ embedded = self.embedding_dropout(embedded)
144
+ rnn_output, hidden = self.gru(embedded, last_hidden)
145
+ attn_weights = self.attn(rnn_output, encoder_outputs)
146
+ context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
147
+ rnn_output = rnn_output.squeeze(0)
148
+ context = context.squeeze(1)
149
+ concat_input = torch.cat((rnn_output, context), 1)
150
+ concat_output = torch.tanh(self.concat(concat_input))
151
+ output = self.out(concat_output)
152
+ output = F.softmax(output, dim=1)
153
+ return output, hidden
154
+
155
+ class GreedySearchDecoder(nn.Module):
156
+ def __init__(self, encoder, decoder):
157
+ super(GreedySearchDecoder, self).__init__()
158
+ self.encoder = encoder
159
+ self.decoder = decoder
160
+
161
+ def forward(self, input_seq, input_length, max_length):
162
+ encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
163
+ decoder_hidden = encoder_hidden[:decoder.n_layers]
164
+ #decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
165
+ #all_tokens = torch.zeros([0], device=device, dtype=torch.long)
166
+ #all_scores = torch.zeros([0], device=device)
167
+ decoder_input = torch.ones(1, 1, dtype=torch.long) * SOS_token
168
+ all_tokens = torch.zeros([0], dtype=torch.long)
169
+ all_scores = torch.zeros([0])
170
+ for _ in range(max_length):
171
+ decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
172
+ decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
173
+ all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
174
+ all_scores = torch.cat((all_scores, decoder_scores), dim=0)
175
+ decoder_input = torch.unsqueeze(decoder_input, 0)
176
+ return all_tokens, all_scores
177
+
178
+
179
+ def unicodeToASCII(s):
180
+ return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
181
+
182
+ def cleanString(s):
183
+ s = unicodeToASCII(s.lower().strip())
184
+ s = re.sub(r"([.!?])", r" \1", s)
185
+ s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
186
+ s = re.sub(r"\s+", r" ", s).strip()
187
+ return s
188
+
189
+
190
+ def indexFromSentence(voc, sent):
191
+ return [voc.word2index[w] for w in sent.split(' ')] + [EOS_token]
192
+
193
+
194
+ def evaluate(encoder, decoder, searcher, voc, sentence, max_length=10):
195
+ indices = [indexFromSentence(voc, sentence)]
196
+ lengths = torch.tensor([len(indexes) for indexes in indices])
197
+ input_batch = torch.LongTensor(indices).transpose(0, 1)
198
+ input_batch = input_batch
199
+ #lengths = lengths.to(device)
200
+ tokens, scores = searcher(input_batch, lengths, max_length)
201
+ decoded_words = [voc.index2word[token.item()] for token in tokens]
202
+ return decoded_words
203
+
204
+ PAD_token = 0
205
+ SOS_token = 1
206
+ EOS_token = 2
207
+ model_name = 'chatbot_model'
208
+ hidden_size = 500
209
+ encoder_n_layers = 2
210
+ decoder_n_layers = 2
211
+ dropout = 0.15
212
+ batch_size = 64
213
+ corpus_name = 'movie_corpus'
214
+ max_length = 10
215
+ voc = Vocabulary(corpus_name)
216
+ #loadFilename = 'D:\\PracticeProjects\\Chatbot\\chatbotAPI\\chatbot_model\\movie_corpus\\2-2_500\\4000_checkpoint.tar'
217
+ loadFilename = '/home/ubuntu/4000_checkpoint.tar'
218
+ checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
219
+ encoder_sd = checkpoint['en']
220
+ decoder_sd = checkpoint['de']
221
+ encoder_optimizer_sd = checkpoint['en_opt']
222
+ decoder_optimizer_sd = checkpoint['de_opt']
223
+ embedding_sd = checkpoint['embedding']
224
+ voc.__dict__ = checkpoint['voc_dict']
225
+ embedding_sd = checkpoint['embedding']
226
+ embedding = nn.Embedding(voc.num_words, hidden_size)
227
+ embedding.load_state_dict(embedding_sd)
228
+ encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
229
+ decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
230
+ encoder.load_state_dict(encoder_sd)
231
+ decoder.load_state_dict(decoder_sd)
232
+ encoder.eval()
233
+ decoder.eval()
234
+ searcher = GreedySearchDecoder(encoder, decoder)
235
+ #request_json = request.get_json(force=True)
236
+ #input_review = str(request_json["input"])
237
+ input_review = str(request.form.get('chatbox'))
238
+ input_sentence = ''
239
+ #while(1):
240
+ if input_review == 'quit':return 'exit'
241
+
242
+ try:
243
+ input_sentence = cleanString(input_review)
244
+ output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
245
+ output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
246
+ #response = json.dumps({'response':' '.join(output_words)})
247
+ response = ' '.join(output_words)
248
+ return render_template('index.html', response = response)
249
+ except KeyError:
250
+ #response = json.dumps({'response':"Error: Unknown Word"})
251
+ return render_template('index.html', response ='Error: Unknown Word')
252
+
253
+
254
+ if __name__ == '__main__':
255
+ #app.run(port=5000, debug=True)
256
+ app.run(host = '0.0.0.0', port=5000)