Update response_1.py

#1
by shalem007 - opened
Files changed (1) hide show
  1. response_1.py +30 -29
response_1.py CHANGED
@@ -1,13 +1,16 @@
1
  import torch
2
  import torch.nn as nn
3
- import torch.optim as optim
4
  import pandas as pd
5
- from torch.utils.data import Dataset, DataLoader
6
  from flask import Flask, request, jsonify
7
  from sklearn.model_selection import train_test_split
8
  import os
9
  import time
10
 
 
 
 
 
11
  # Load data
12
  url = "https://drive.google.com/uc?id=1RCZShB5ohy1HdU-mogcP16TbeVv9txpY"
13
  df = pd.read_csv(url)
@@ -42,7 +45,7 @@ train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
42
  tokenizer = ScratchTokenizer()
43
  tokenizer.build_vocab(train_data["instruction"].tolist() + train_data["response"].tolist())
44
 
45
- # Dataset Class
46
  class TextDataset(Dataset):
47
  def __init__(self, data, tokenizer, max_len=200):
48
  self.data = data
@@ -78,7 +81,7 @@ class GPTModel(nn.Module):
78
  output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask)
79
  return self.fc_out(output.permute(1, 0, 2))
80
 
81
- # Load model
82
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
83
  model = GPTModel(tokenizer.vocab_size).to(device)
84
 
@@ -92,35 +95,33 @@ def load_model(model, path="gpt_model.pth"):
92
 
93
  load_model(model)
94
 
95
- # Generate Response
96
- # def generate_response(model, query, max_length=200):
97
- # model.eval()
98
- # src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
99
- # tgt = torch.tensor([[1]]).to(device) # <SOS>
100
- # for _ in range(max_length):
101
- # output = model(src, tgt)
102
- # next_word = output.argmax(-1)[:, -1].unsqueeze(1)
103
- # tgt = torch.cat([tgt, next_word], dim=1)
104
- # if next_word.item() == 2: # <EOS>
105
- # break
106
- # return tokenizer.decode(tgt.squeeze(0).tolist())
107
-
108
- def generate_response(model, query, max_length=200):
109
  model.eval()
110
- with torch.no_grad(): # Disable gradient tracking
111
  src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
112
  tgt = torch.tensor([[1]]).to(device) # <SOS>
113
-
114
- for _ in range(max_length):
115
- output = model(src, tgt)
116
- next_token = output[:, -1, :].argmax(dim=-1, keepdim=True)
117
- tgt = torch.cat([tgt, next_token], dim=1)
118
- if next_token.item() == 2: # <EOS>
119
- break
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  return tokenizer.decode(tgt.squeeze(0).tolist())
122
 
123
-
124
  # Flask App
125
  app = Flask(__name__)
126
 
@@ -130,7 +131,7 @@ def home():
130
 
131
  @app.route("/intent")
132
  def intents():
133
- return jsonify({"intents" :list(set(df['intent'].dropna()))})
134
 
135
  @app.route("/query", methods=["POST"])
136
  def query_model():
@@ -141,7 +142,7 @@ def query_model():
141
  start = time.time()
142
  response = generate_response(model, query)
143
  end = time.time()
144
- return jsonify({"query": query, "response": response,"response_time":(end-start)})
145
 
146
  if __name__ == "__main__":
147
  load_model(model)
 
1
  import torch
2
  import torch.nn as nn
 
3
  import pandas as pd
4
+ from torch.utils.data import Dataset
5
  from flask import Flask, request, jsonify
6
  from sklearn.model_selection import train_test_split
7
  import os
8
  import time
9
 
10
+ # Enable cudnn benchmark for better GPU performance
11
+ if torch.cuda.is_available():
12
+ torch.backends.cudnn.benchmark = True
13
+
14
  # Load data
15
  url = "https://drive.google.com/uc?id=1RCZShB5ohy1HdU-mogcP16TbeVv9txpY"
16
  df = pd.read_csv(url)
 
45
  tokenizer = ScratchTokenizer()
46
  tokenizer.build_vocab(train_data["instruction"].tolist() + train_data["response"].tolist())
47
 
48
+ # Dataset Class (not used in this file but kept for completeness)
49
  class TextDataset(Dataset):
50
  def __init__(self, data, tokenizer, max_len=200):
51
  self.data = data
 
81
  output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask)
82
  return self.fc_out(output.permute(1, 0, 2))
83
 
84
+ # Device setup
85
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
86
  model = GPTModel(tokenizer.vocab_size).to(device)
87
 
 
95
 
96
  load_model(model)
97
 
98
+ # Generate Response with mixed precision if CUDA is available
99
+ def generate_response(model, query, max_length=100):
 
 
 
 
 
 
 
 
 
 
 
 
100
  model.eval()
101
+ with torch.no_grad():
102
  src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
103
  tgt = torch.tensor([[1]]).to(device) # <SOS>
104
+ if device.type == "cuda":
105
+ scaler = torch.cuda.amp.autocast()
106
+ with torch.cuda.amp.autocast():
107
+ for _ in range(max_length):
108
+ output = model(src, tgt)
109
+ logits = output[:, -1, :]
110
+ next_token = torch.argmax(logits, dim=-1, keepdim=True)
111
+ tgt = torch.cat([tgt, next_token], dim=1)
112
+ if next_token.item() == 2:
113
+ break
114
+ else:
115
+ for _ in range(max_length):
116
+ output = model(src, tgt)
117
+ logits = output[:, -1, :]
118
+ next_token = torch.argmax(logits, dim=-1, keepdim=True)
119
+ tgt = torch.cat([tgt, next_token], dim=1)
120
+ if next_token.item() == 2:
121
+ break
122
 
123
  return tokenizer.decode(tgt.squeeze(0).tolist())
124
 
 
125
  # Flask App
126
  app = Flask(__name__)
127
 
 
131
 
132
  @app.route("/intent")
133
  def intents():
134
+ return jsonify({"intents": list(set(df['intent'].dropna()))})
135
 
136
  @app.route("/query", methods=["POST"])
137
  def query_model():
 
142
  start = time.time()
143
  response = generate_response(model, query)
144
  end = time.time()
145
+ return jsonify({"query": query, "response": response, "response_time": end - start})
146
 
147
  if __name__ == "__main__":
148
  load_model(model)