Akshitha1 commited on
Commit
f7dab62
·
verified ·
1 Parent(s): 65dfdd3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import pandas as pd
5
+ from torch.utils.data import Dataset, DataLoader
6
+ from flask import Flask, request, jsonify
7
+ from sklearn.model_selection import train_test_split
8
+ import torch
9
+ import os
10
+
11
+ PORT=7001
12
+ url = f"https://drive.google.com/uc?id=1RCZShB5ohy1HdU-mogcP16TbeVv9txpY"
13
+ df = pd.read_csv(url)
14
+
15
+
16
+ # Tokenizer (Scratch)
17
+ class ScratchTokenizer:
18
+ def __init__(self):
19
+ self.word2idx = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
20
+ self.idx2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
21
+ self.vocab_size = 4
22
+
23
+ def build_vocab(self, texts):
24
+ for text in texts:
25
+ for word in text.split():
26
+ if word not in self.word2idx:
27
+ self.word2idx[word] = self.vocab_size
28
+ self.idx2word[self.vocab_size] = word
29
+ self.vocab_size += 1
30
+
31
+ def encode(self, text, max_len=200):
32
+ tokens = [self.word2idx.get(word, 3) for word in text.split()]
33
+ tokens = [1] + tokens[:max_len - 2] + [2]
34
+ return tokens + [0] * (max_len - len(tokens))
35
+
36
+ def decode(self, tokens):
37
+ return " ".join([self.idx2word.get(idx, "<UNK>") for idx in tokens if idx > 0])
38
+
39
+ # Train-Test Split
40
+ train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
41
+
42
+ # Initialize Tokenizer
43
+ tokenizer = ScratchTokenizer()
44
+ tokenizer.build_vocab(train_data["instruction"].tolist() + train_data["response"].tolist())
45
+
46
+ # Dataset Class
47
+ class TextDataset(Dataset):
48
+ def __init__(self, data, tokenizer, max_len=200):
49
+ self.data = data
50
+ self.tokenizer = tokenizer
51
+ self.max_len = max_len
52
+
53
+ def __len__(self):
54
+ return len(self.data)
55
+
56
+ def __getitem__(self, idx):
57
+ src_text = self.data.iloc[idx]["instruction"]
58
+ tgt_text = self.data.iloc[idx]["response"]
59
+ src = torch.tensor(self.tokenizer.encode(src_text), dtype=torch.long)
60
+ tgt = torch.tensor(self.tokenizer.encode(tgt_text), dtype=torch.long)
61
+ return src, tgt
62
+
63
+ # Load Dataset
64
+ train_dataset = TextDataset(train_data, tokenizer)
65
+ test_dataset = TextDataset(test_data, tokenizer)
66
+ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
67
+ test_loader = DataLoader(test_dataset, batch_size=8)
68
+
69
+ # Improved GPT-Style Transformer Model
70
+
71
+ class GPTModel(nn.Module):
72
+ def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200):
73
+ super(GPTModel, self).__init__()
74
+ self.embedding = nn.Embedding(vocab_size, embed_size)
75
+ self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size))
76
+ # The problem was here, setting num_encoder_layers to 0
77
+ # makes the model try to access a non-existent layer.
78
+ # The solution is to remove the encoder completely.
79
+ self.transformer = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model=embed_size, nhead=num_heads), num_layers=num_layers)
80
+ self.fc_out = nn.Linear(embed_size, vocab_size)
81
+
82
+ def forward(self, src, tgt):
83
+ src_emb = self.embedding(src) + self.pos_embedding[:, :src.size(1), :]
84
+ tgt_emb = self.embedding(tgt) + self.pos_embedding[:, :tgt.size(1), :]
85
+
86
+ # Causal Mask for Auto-Regressive Decoding
87
+ tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
88
+
89
+ # Since we are using only the decoder now,
90
+ # we need to pass the source embeddings as memory.
91
+ output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask)
92
+ return self.fc_out(output.permute(1, 0, 2))
93
+
94
+ # Initialize Model
95
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
96
+ model = GPTModel(tokenizer.vocab_size).to(device)
97
+ optimizer = optim.AdamW(model.parameters(), lr=2e-4)
98
+ criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
99
+ # Load the model
100
+ def load_model(model, path="gpt_model.pth"):
101
+ if os.path.exists(path):
102
+ model.load_state_dict(torch.load(path, map_location=device))
103
+ model.to(device)
104
+ model.eval()
105
+ print("Model loaded successfully.")
106
+ else:
107
+ print("Model file not found!")
108
+
109
+ load_model(model)
110
+
111
+ # Training Function
112
+ def train_epoch(model, loader, optimizer, criterion, device):
113
+ model.train()
114
+ total_loss = 0
115
+ for src, tgt in loader:
116
+ src, tgt = src.to(device), tgt.to(device)
117
+ optimizer.zero_grad()
118
+ output = model(src, tgt[:, :-1])
119
+ loss = criterion(output.reshape(-1, tokenizer.vocab_size), tgt[:, 1:].reshape(-1))
120
+ loss.backward()
121
+ optimizer.step()
122
+ total_loss += loss.item()
123
+ return total_loss / len(loader)
124
+
125
+
126
+
127
+ # Generate Response
128
+ def generate_response(model, query, max_length=200):
129
+ model.eval()
130
+ src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
131
+ tgt = torch.tensor([[1]]).to(device) # <SOS>
132
+
133
+ for _ in range(max_length):
134
+ output = model(src, tgt)
135
+ next_word = output.argmax(-1)[:, -1].unsqueeze(1)
136
+ tgt = torch.cat([tgt, next_word], dim=1)
137
+ if next_word.item() == 2: # <EOS>
138
+ break
139
+
140
+ return tokenizer.decode(tgt.squeeze(0).tolist())
141
+ print(set(df['intent']))
142
+
143
+ # Test Query
144
+ app = Flask(__name__)
145
+
146
+ @app.route("/")
147
+ def home():
148
+ return "Transformer-based Response Generator API is running!"
149
+
150
+ @app.route("/query", methods=["POST"])
151
+ def query_model():
152
+ data = request.get_json()
153
+ query = data.get("query", "")
154
+
155
+ if not query:
156
+ return jsonify({"error": "Query cannot be empty"}), 400
157
+
158
+ response = generate_response(model, query)
159
+ return jsonify({"query": query, "response": response})
160
+