vinay0123 commited on
Commit
0fbd01a
·
verified ·
1 Parent(s): d932d5c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +126 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import pandas as pd
5
+ from torch.utils.data import Dataset, DataLoader
6
+ from flask import Flask, request, jsonify, Response, stream_with_context
7
+ from sklearn.model_selection import train_test_split
8
+ import os
9
+ import time
10
+ import json
11
+
12
+ url = "https://drive.google.com/uc?id=1RCZShB5ohy1HdU-mogcP16TbeVv9txpY"
13
+ df = pd.read_csv(url)
14
+ # Tokenizer
15
+ class ScratchTokenizer:
16
+ def __init__(self):
17
+ self.word2idx = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
18
+ self.idx2word = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
19
+ self.vocab_size = 4
20
+
21
+ def build_vocab(self, texts):
22
+ for text in texts:
23
+ for word in text.split():
24
+ if word not in self.word2idx:
25
+ self.word2idx[word] = self.vocab_size
26
+ self.idx2word[self.vocab_size] = word
27
+ self.vocab_size += 1
28
+
29
+ def encode(self, text, max_len=200):
30
+ tokens = [self.word2idx.get(word, 3) for word in text.split()]
31
+ tokens = [1] + tokens[:max_len - 2] + [2]
32
+ return tokens + [0] * (max_len - len(tokens))
33
+
34
+ def decode(self, tokens):
35
+ return " ".join([self.idx2word.get(idx, "<UNK>") for idx in tokens if idx > 0])
36
+
37
+ # Train-Test Split
38
+ train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
39
+
40
+ # Initialize Tokenizer
41
+ tokenizer = ScratchTokenizer()
42
+ tokenizer.build_vocab(train_data["instruction"].tolist() + train_data["response"].tolist())
43
+
44
+ # Model
45
+ class GPTModel(nn.Module):
46
+ def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200):
47
+ super(GPTModel, self).__init__()
48
+ self.embedding = nn.Embedding(vocab_size, embed_size)
49
+ self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size))
50
+ self.transformer = nn.TransformerDecoder(
51
+ nn.TransformerDecoderLayer(d_model=embed_size, nhead=num_heads),
52
+ num_layers=num_layers
53
+ )
54
+ self.fc_out = nn.Linear(embed_size, vocab_size)
55
+
56
+ def forward(self, src, tgt):
57
+ src_emb = self.embedding(src) + self.pos_embedding[:, :src.size(1), :]
58
+ tgt_emb = self.embedding(tgt) + self.pos_embedding[:, :tgt.size(1), :]
59
+ tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
60
+ output = self.transformer(tgt_emb.permute(1, 0, 2), src_emb.permute(1, 0, 2), tgt_mask=tgt_mask)
61
+ return self.fc_out(output.permute(1, 0, 2))
62
+
63
+ # Load model
64
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65
+ model = GPTModel(tokenizer.vocab_size).to(device)
66
+
67
+ def load_model(model, path="gpt_model.pth"):
68
+ if os.path.exists(path):
69
+ model.load_state_dict(torch.load(path, map_location=device))
70
+ model.eval()
71
+ print("Model loaded successfully.")
72
+ else:
73
+ print("Model file not found!")
74
+
75
+
76
+ def generate_response_stream(model, query, max_length=200):
77
+ model.eval()
78
+ with torch.no_grad():
79
+ src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
80
+ tgt = torch.tensor([[1]]).to(device) # <SOS>
81
+
82
+ for _ in range(max_length):
83
+ output = model(src, tgt)
84
+ next_token = output[:, -1, :].argmax(dim=-1, keepdim=True)
85
+ tgt = torch.cat([tgt, next_token], dim=1)
86
+
87
+ # Get the current word
88
+ current_word = tokenizer.idx2word.get(next_token.item(), "<UNK>")
89
+ if current_word != "<PAD>":
90
+ yield current_word + " "
91
+
92
+ if next_token.item() == 2: # <EOS>
93
+ break
94
+
95
+ # Flask App
96
+ app = Flask(__name__)
97
+
98
+ @app.route("/")
99
+ def home():
100
+ return {"message": "Streaming Transformer-based Response Generator API is running!"}
101
+
102
+ @app.route("/intent")
103
+ def intents():
104
+ return jsonify({"intents": list(set(df['intent'].dropna()))})
105
+
106
+ @app.route("/query", methods=["POST"])
107
+ def query_model():
108
+ data = request.get_json()
109
+ query = data.get("query", "")
110
+ if not query:
111
+ return jsonify({"error": "Query cannot be empty"}), 400
112
+
113
+ def generate():
114
+ start = time.time()
115
+ for word in generate_response_stream(model, query):
116
+ response_data = {
117
+ "word": word,
118
+ "timestamp": time.time() - start
119
+ }
120
+ yield f"data: {json.dumps(response_data)}\n\n"
121
+
122
+ return Response(stream_with_context(generate()), mimetype='text/event-stream')
123
+
124
+ if __name__ == "__main__":
125
+ load_model(model)
126
+ app.run(host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ pandas>=1.3.0
3
+ scikit-learn>=1.0
4
+ flask>=2.0
5
+ flask-cors>=3.0