vinay0123 commited on
Commit
aa5d685
·
verified ·
1 Parent(s): d3ca118

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -126
app.py CHANGED
@@ -1,126 +1,126 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.optim as optim
4
- import pandas as pd
5
- from torch.utils.data import Dataset, DataLoader
6
- from flask import Flask, request, jsonify, Response, stream_with_context
7
- from sklearn.model_selection import train_test_split
8
- import os
9
- import time
10
- import json
11
-
12
- url = "https://drive.google.com/uc?id=1RCZShB5ohy1HdU-mogcP16TbeVv9txpY"
13
- df = pd.read_csv(url)
14
- # Tokenizer
15
- class ScratchTokenizer:
16
- def __init__(self):
17
- self.word2idx = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
18
- self.idx2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
19
- self.vocab_size = 4
20
-
21
- def build_vocab(self, texts):
22
- for text in texts:
23
- for word in text.split():
24
- if word not in self.word2idx:
25
- self.word2idx[word] = self.vocab_size
26
- self.idx2word[self.vocab_size] = word
27
- self.vocab_size += 1
28
-
29
- def encode(self, text, max_len=200):
30
- tokens = [self.word2idx.get(word, 3) for word in text.split()]
31
- tokens = [1] + tokens[:max_len - 2] + [2]
32
- return tokens + [0] * (max_len - len(tokens))
33
-
34
- def decode(self, tokens):
35
- return " ".join([self.idx2word.get(idx, "<UNK>") for idx in tokens if idx > 0])
36
-
37
- # Train-Test Split
38
- train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
39
-
40
- # Initialize Tokenizer
41
- tokenizer = ScratchTokenizer()
42
- tokenizer.build_vocab(train_data["instruction"].tolist() + train_data["response"].tolist())
43
-
44
- # Model
45
- class GPTModel(nn.Module):
46
- def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200):
47
- super(GPTModel, self).__init__()
48
- self.embedding = nn.Embedding(vocab_size, embed_size)
49
- self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size))
50
- self.transformer = nn.TransformerDecoder(
51
- nn.TransformerDecoderLayer(d_model=embed_size, nhead=num_heads),
52
- num_layers=num_layers
53
- )
54
- self.fc_out = nn.Linear(embed_size, vocab_size)
55
-
56
- def forward(self, src, tgt):
57
- src_emb = self.embedding(src) + self.pos_embedding[:, :src.size(1), :]
58
- tgt_emb = self.embedding(tgt) + self.pos_embedding[:, :tgt.size(1), :]
59
- tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
60
- output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask)
61
- return self.fc_out(output.permute(1, 0, 2))
62
-
63
- # Load model
64
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65
- model = GPTModel(tokenizer.vocab_size).to(device)
66
-
67
- def load_model(model, path=r"C:\Users\nandu\Documents\travis\models\gpt_model.pth"):
68
- if os.path.exists(path):
69
- model.load_state_dict(torch.load(path, map_location=device))
70
- model.eval()
71
- print("Model loaded successfully.")
72
- else:
73
- print("Model file not found!")
74
-
75
- load_model(model)
76
-
77
- def generate_response_stream(model, query, max_length=200):
78
- model.eval()
79
- with torch.no_grad():
80
- src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
81
- tgt = torch.tensor([[1]]).to(device) # <SOS>
82
-
83
- for _ in range(max_length):
84
- output = model(src, tgt)
85
- next_token = output[:, -1, :].argmax(dim=-1, keepdim=True)
86
- tgt = torch.cat([tgt, next_token], dim=1)
87
-
88
- # Get the current word
89
- current_word = tokenizer.idx2word.get(next_token.item(), "<UNK>")
90
- if current_word != "<PAD>":
91
- yield current_word + " "
92
-
93
- if next_token.item() == 2: # <EOS>
94
- break
95
-
96
- # Flask App
97
- app = Flask(__name__)
98
-
99
- @app.route("/")
100
- def home():
101
- return {"message": "Streaming Transformer-based Response Generator API is running!"}
102
-
103
- @app.route("/intent")
104
- def intents():
105
- return jsonify({"intents": list(set(df['intent'].dropna()))})
106
-
107
- @app.route("/query", methods=["POST"])
108
- def query_model():
109
- data = request.get_json()
110
- query = data.get("query", "")
111
- if not query:
112
- return jsonify({"error": "Query cannot be empty"}), 400
113
-
114
- def generate():
115
- start = time.time()
116
- for word in generate_response_stream(model, query):
117
- response_data = {
118
- "word": word,
119
- "timestamp": time.time() - start
120
- }
121
- yield f"data: {json.dumps(response_data)}\n\n"
122
-
123
- return Response(stream_with_context(generate()), mimetype='text/event-stream')
124
-
125
- if __name__ == "__main__":
126
- app.run(host="0.0.0.0", port=7861)
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import pandas as pd
5
+ from torch.utils.data import Dataset, DataLoader
6
+ from flask import Flask, request, jsonify, Response, stream_with_context
7
+ from sklearn.model_selection import train_test_split
8
+ import os
9
+ import time
10
+ import json
11
+
12
+ url = "https://drive.google.com/uc?id=1RCZShB5ohy1HdU-mogcP16TbeVv9txpY"
13
+ df = pd.read_csv(url)
14
+ # Tokenizer
15
+ class ScratchTokenizer:
16
+ def __init__(self):
17
+ self.word2idx = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
18
+ self.idx2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
19
+ self.vocab_size = 4
20
+
21
+ def build_vocab(self, texts):
22
+ for text in texts:
23
+ for word in text.split():
24
+ if word not in self.word2idx:
25
+ self.word2idx[word] = self.vocab_size
26
+ self.idx2word[self.vocab_size] = word
27
+ self.vocab_size += 1
28
+
29
+ def encode(self, text, max_len=200):
30
+ tokens = [self.word2idx.get(word, 3) for word in text.split()]
31
+ tokens = [1] + tokens[:max_len - 2] + [2]
32
+ return tokens + [0] * (max_len - len(tokens))
33
+
34
+ def decode(self, tokens):
35
+ return " ".join([self.idx2word.get(idx, "<UNK>") for idx in tokens if idx > 0])
36
+
37
+ # Train-Test Split
38
+ train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
39
+
40
+ # Initialize Tokenizer
41
+ tokenizer = ScratchTokenizer()
42
+ tokenizer.build_vocab(train_data["instruction"].tolist() + train_data["response"].tolist())
43
+
44
+ # Model
45
+ class GPTModel(nn.Module):
46
+ def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200):
47
+ super(GPTModel, self).__init__()
48
+ self.embedding = nn.Embedding(vocab_size, embed_size)
49
+ self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size))
50
+ self.transformer = nn.TransformerDecoder(
51
+ nn.TransformerDecoderLayer(d_model=embed_size, nhead=num_heads),
52
+ num_layers=num_layers
53
+ )
54
+ self.fc_out = nn.Linear(embed_size, vocab_size)
55
+
56
+ def forward(self, src, tgt):
57
+ src_emb = self.embedding(src) + self.pos_embedding[:, :src.size(1), :]
58
+ tgt_emb = self.embedding(tgt) + self.pos_embedding[:, :tgt.size(1), :]
59
+ tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
60
+ output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask)
61
+ return self.fc_out(output.permute(1, 0, 2))
62
+
63
+ # Load model
64
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65
+ model = GPTModel(tokenizer.vocab_size).to(device)
66
+
67
+ def load_model(model, path=r"C:\Users\nandu\Documents\travis\models\gpt_model.pth"):
68
+ if os.path.exists(path):
69
+ model.load_state_dict(torch.load(path, map_location=device))
70
+ model.eval()
71
+ print("Model loaded successfully.")
72
+ else:
73
+ print("Model file not found!")
74
+
75
+ load_model(model)
76
+
77
+ def generate_response_stream(model, query, max_length=200):
78
+ model.eval()
79
+ with torch.no_grad():
80
+ src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
81
+ tgt = torch.tensor([[1]]).to(device) # <SOS>
82
+
83
+ for _ in range(max_length):
84
+ output = model(src, tgt)
85
+ next_token = output[:, -1, :].argmax(dim=-1, keepdim=True)
86
+ tgt = torch.cat([tgt, next_token], dim=1)
87
+
88
+ # Get the current word
89
+ current_word = tokenizer.idx2word.get(next_token.item(), "<UNK>")
90
+ if current_word != "<PAD>":
91
+ yield current_word + " "
92
+
93
+ if next_token.item() == 2: # <EOS>
94
+ break
95
+
96
+ # Flask App
97
+ app = Flask(__name__)
98
+
99
+ @app.route("/")
100
+ def home():
101
+ return {"message": "Streaming Transformer-based Response Generator API is running!"}
102
+
103
+ @app.route("/intent")
104
+ def intents():
105
+ return jsonify({"intents": list(set(df['intent'].dropna()))})
106
+
107
+ @app.route("/query", methods=["POST"])
108
+ def query_model():
109
+ data = request.get_json()
110
+ query = data.get("query", "")
111
+ if not query:
112
+ return jsonify({"error": "Query cannot be empty"}), 400
113
+
114
+ def generate():
115
+ start = time.time()
116
+ for word in generate_response_stream(model, query):
117
+ response_data = {
118
+ "word": word,
119
+ "timestamp": time.time() - start
120
+ }
121
+ yield f"data: {json.dumps(response_data)}\n\n"
122
+
123
+ return Response(stream_with_context(generate()), mimetype='text/event-stream')
124
+
125
+ if __name__ == "__main__":
126
+ app.run(host="0.0.0.0", port=7860)