vinay0123 commited on
Commit
efba25f
·
verified ·
1 Parent(s): 104c9ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -24
app.py CHANGED
@@ -20,6 +20,7 @@ import os
20
  import time
21
  import json
22
  import io
 
23
 
24
  # Set PyTorch to use all available CPU threads
25
  torch.set_num_threads(os.cpu_count())
@@ -98,17 +99,35 @@ test_dataset = TextDataset(test_data, tokenizer)
98
  train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
99
  test_loader = DataLoader(test_dataset, batch_size=8)
100
 
101
- # Improved GPT-Style Transformer Model with optimizations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  class GPTModel(nn.Module):
103
- def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200):
104
  super(GPTModel, self).__init__()
 
 
 
105
  self.embedding = nn.Embedding(vocab_size, embed_size)
106
  self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size))
107
  self.transformer = nn.TransformerDecoder(
108
  nn.TransformerDecoderLayer(
109
  d_model=embed_size,
110
  nhead=num_heads,
111
- dim_feedforward=embed_size * 4, # Increased feedforward dimension
112
  dropout=0.1,
113
  batch_first=True # Enable batch first for better performance
114
  ),
@@ -134,27 +153,36 @@ class GPTModel(nn.Module):
134
  output = self.transformer(tgt_emb, src_emb, tgt_mask=tgt_mask)
135
  return self.fc_out(output)
136
 
137
- # Initialize Model with optimizations
138
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
139
  print(f"Using device: {device}")
140
 
141
- model = GPTModel(tokenizer.vocab_size).to(device)
 
 
 
 
 
 
 
 
142
  optimizer = optim.AdamW(model.parameters(), lr=2e-4, weight_decay=0.01) # Added weight decay
143
  criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
144
 
145
- # Remove JIT compilation as it can cause issues with dynamic models
146
- # model = torch.jit.script(model) # Commented out
147
-
148
  def load_model(model, path="gpt_model.pth"):
149
  if os.path.exists(path):
150
  try:
151
- model.load_state_dict(torch.load(path, map_location=device))
 
152
  model.eval()
153
- print("Model loaded successfully.")
 
154
  except Exception as e:
155
- print(f"Error loading model: {e}")
 
156
  else:
157
- print("Model file not found!")
 
158
 
159
  load_model(model)
160
 
@@ -215,6 +243,18 @@ def build_vocab(sentences, tokenizer, min_freq):
215
  vocab[word] = len(vocab)
216
  return vocab
217
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  # ==== Dataset ====
219
  class TranslationDataset(Dataset):
220
  def __init__(self, df, en_vocab, te_vocab):
@@ -278,19 +318,32 @@ def translate(model, sentence, en_vocab, te_vocab, te_inv_vocab, max_len=MAX_LEN
278
  translated = [te_inv_vocab[idx.item()] for idx in tgt_ids[0][1:]]
279
  return ' '.join(translated[:-1]) if translated[-1] == '<eos>' else ' '.join(translated)
280
 
281
- # ==== Load Translation Data ====
282
  try:
283
  df_telugu = pd.read_csv("merged_translated_responses.csv")
284
  df_telugu = df_telugu.dropna(subset=['response', 'translated_response'])
285
  df_telugu['response'] = df_telugu['response'].astype(str)
286
  df_telugu['translated_response'] = df_telugu['translated_response'].astype(str)
287
 
288
- # Build vocabularies
289
- en_vocab = build_vocab(df_telugu['response'], tokenize_en, MIN_FREQ)
290
- te_vocab = build_vocab(df_telugu['translated_response'], tokenize_te, MIN_FREQ)
 
 
 
 
 
 
 
 
 
 
 
 
291
  te_inv_vocab = {idx: tok for tok, idx in te_vocab.items()}
292
 
293
- print("Telugu translation dataset loaded successfully")
 
294
  translation_available = True
295
  except Exception as e:
296
  print(f"Error loading Telugu dataset: {e}")
@@ -300,17 +353,47 @@ except Exception as e:
300
  te_inv_vocab = {idx: tok for tok, idx in te_vocab.items()}
301
  translation_available = False
302
 
303
- # Initialize Translation Model
304
  model_telugu = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE,
305
  len(en_vocab), len(te_vocab), NHEAD, FFN_HID_DIM).to(DEVICE)
306
 
307
  # Load saved weights for translation model
308
- try:
309
- model_telugu.load_state_dict(torch.load("english_telugu_transformer.pth", map_location=torch.device('cpu')))
310
- model_telugu.eval()
311
- print("Telugu translation model loaded successfully")
312
- except Exception as e:
313
- print(f"Error loading Telugu translation model: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  translation_available = False
315
 
316
  # Flask App
@@ -378,6 +461,7 @@ def translate_text():
378
  def generate_text():
379
  data = request.get_json()
380
  query = data.get("query", "")
 
381
  if not query:
382
  return jsonify({"error": "Query cannot be empty"}), 400
383
 
@@ -513,4 +597,5 @@ def get_audio():
513
  if __name__ == "__main__":
514
  print("Starting Flask application...")
515
  print(f"Translation service available: {translation_available}")
 
516
  app.run(host="0.0.0.0", debug=True)
 
20
  import time
21
  import json
22
  import io
23
+ import pickle
24
 
25
  # Set PyTorch to use all available CPU threads
26
  torch.set_num_threads(os.cpu_count())
 
99
  train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
100
  test_loader = DataLoader(test_dataset, batch_size=8)
101
 
102
+ # Function to detect model architecture from saved file
103
+ def detect_model_architecture(model_path):
104
+ try:
105
+ checkpoint = torch.load(model_path, map_location='cpu')
106
+ # Check the feedforward dimension from the transformer layers
107
+ for key in checkpoint.keys():
108
+ if 'transformer.layers.0.linear1.weight' in key:
109
+ feedforward_dim = checkpoint[key].shape[0] # Output dimension of first linear layer
110
+ embed_size = checkpoint[key].shape[1] # Input dimension (embed_size)
111
+ return embed_size, feedforward_dim
112
+ return 256, 1024 # Default values
113
+ except Exception as e:
114
+ print(f"Could not detect architecture: {e}")
115
+ return 256, 1024
116
+
117
+ # Improved GPT-Style Transformer Model with configurable architecture
118
  class GPTModel(nn.Module):
119
+ def __init__(self, vocab_size, embed_size=256, num_heads=8, num_layers=6, max_len=200, feedforward_dim=None):
120
  super(GPTModel, self).__init__()
121
+ if feedforward_dim is None:
122
+ feedforward_dim = embed_size * 4
123
+
124
  self.embedding = nn.Embedding(vocab_size, embed_size)
125
  self.pos_embedding = nn.Parameter(torch.randn(1, max_len, embed_size))
126
  self.transformer = nn.TransformerDecoder(
127
  nn.TransformerDecoderLayer(
128
  d_model=embed_size,
129
  nhead=num_heads,
130
+ dim_feedforward=feedforward_dim, # Use detected or provided feedforward dimension
131
  dropout=0.1,
132
  batch_first=True # Enable batch first for better performance
133
  ),
 
153
  output = self.transformer(tgt_emb, src_emb, tgt_mask=tgt_mask)
154
  return self.fc_out(output)
155
 
156
+ # Initialize Model with proper architecture detection
157
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
158
  print(f"Using device: {device}")
159
 
160
+ # Detect architecture from saved model
161
+ model_path = "gpt_model.pth"
162
+ if os.path.exists(model_path):
163
+ embed_size, feedforward_dim = detect_model_architecture(model_path)
164
+ print(f"Detected model architecture: embed_size={embed_size}, feedforward_dim={feedforward_dim}")
165
+ model = GPTModel(tokenizer.vocab_size, embed_size=embed_size, feedforward_dim=feedforward_dim).to(device)
166
+ else:
167
+ model = GPTModel(tokenizer.vocab_size).to(device)
168
+
169
  optimizer = optim.AdamW(model.parameters(), lr=2e-4, weight_decay=0.01) # Added weight decay
170
  criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
171
 
 
 
 
172
  def load_model(model, path="gpt_model.pth"):
173
  if os.path.exists(path):
174
  try:
175
+ checkpoint = torch.load(path, map_location=device)
176
+ model.load_state_dict(checkpoint)
177
  model.eval()
178
+ print("GPT Model loaded successfully.")
179
+ return True
180
  except Exception as e:
181
+ print(f"Error loading GPT model: {e}")
182
+ return False
183
  else:
184
+ print("GPT Model file not found!")
185
+ return False
186
 
187
  load_model(model)
188
 
 
243
  vocab[word] = len(vocab)
244
  return vocab
245
 
246
+ # Save and load vocabulary functions
247
+ def save_vocab(vocab, path):
248
+ with open(path, 'wb') as f:
249
+ pickle.dump(vocab, f)
250
+
251
+ def load_vocab(path):
252
+ try:
253
+ with open(path, 'rb') as f:
254
+ return pickle.load(f)
255
+ except:
256
+ return None
257
+
258
  # ==== Dataset ====
259
  class TranslationDataset(Dataset):
260
  def __init__(self, df, en_vocab, te_vocab):
 
318
  translated = [te_inv_vocab[idx.item()] for idx in tgt_ids[0][1:]]
319
  return ' '.join(translated[:-1]) if translated[-1] == '<eos>' else ' '.join(translated)
320
 
321
+ # ==== Load Translation Data and Vocabularies ====
322
  try:
323
  df_telugu = pd.read_csv("merged_translated_responses.csv")
324
  df_telugu = df_telugu.dropna(subset=['response', 'translated_response'])
325
  df_telugu['response'] = df_telugu['response'].astype(str)
326
  df_telugu['translated_response'] = df_telugu['translated_response'].astype(str)
327
 
328
+ # Try to load saved vocabularies first
329
+ en_vocab = load_vocab('en_vocab.pkl')
330
+ te_vocab = load_vocab('te_vocab.pkl')
331
+
332
+ if en_vocab is None or te_vocab is None:
333
+ print("Building new vocabularies...")
334
+ # Build vocabularies
335
+ en_vocab = build_vocab(df_telugu['response'], tokenize_en, MIN_FREQ)
336
+ te_vocab = build_vocab(df_telugu['translated_response'], tokenize_te, MIN_FREQ)
337
+ # Save vocabularies
338
+ save_vocab(en_vocab, 'en_vocab.pkl')
339
+ save_vocab(te_vocab, 'te_vocab.pkl')
340
+ else:
341
+ print("Loaded saved vocabularies")
342
+
343
  te_inv_vocab = {idx: tok for tok, idx in te_vocab.items()}
344
 
345
+ print(f"Telugu translation dataset loaded successfully")
346
+ print(f"English vocab size: {len(en_vocab)}, Telugu vocab size: {len(te_vocab)}")
347
  translation_available = True
348
  except Exception as e:
349
  print(f"Error loading Telugu dataset: {e}")
 
353
  te_inv_vocab = {idx: tok for tok, idx in te_vocab.items()}
354
  translation_available = False
355
 
356
+ # Initialize Translation Model with correct vocabulary sizes
357
  model_telugu = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE,
358
  len(en_vocab), len(te_vocab), NHEAD, FFN_HID_DIM).to(DEVICE)
359
 
360
  # Load saved weights for translation model
361
+ def load_telugu_model():
362
+ model_path = "english_telugu_transformer.pth"
363
+ if not os.path.exists(model_path):
364
+ print("Telugu model file not found!")
365
+ return False
366
+
367
+ try:
368
+ checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
369
+
370
+ # Check if vocabulary sizes match
371
+ if 'src_tok_emb.weight' in checkpoint:
372
+ saved_en_vocab_size = checkpoint['src_tok_emb.weight'].shape[0]
373
+ saved_te_vocab_size = checkpoint['tgt_tok_emb.weight'].shape[0]
374
+ current_en_vocab_size = len(en_vocab)
375
+ current_te_vocab_size = len(te_vocab)
376
+
377
+ print(f"Saved model vocabs - EN: {saved_en_vocab_size}, TE: {saved_te_vocab_size}")
378
+ print(f"Current model vocabs - EN: {current_en_vocab_size}, TE: {current_te_vocab_size}")
379
+
380
+ if saved_en_vocab_size != current_en_vocab_size or saved_te_vocab_size != current_te_vocab_size:
381
+ print("Vocabulary size mismatch! Creating new model with saved vocabulary sizes...")
382
+ global model_telugu
383
+ model_telugu = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE,
384
+ saved_en_vocab_size, saved_te_vocab_size, NHEAD, FFN_HID_DIM).to(DEVICE)
385
+
386
+ model_telugu.load_state_dict(checkpoint)
387
+ model_telugu.eval()
388
+ print("Telugu translation model loaded successfully")
389
+ return True
390
+ except Exception as e:
391
+ print(f"Error loading Telugu translation model: {e}")
392
+ return False
393
+
394
+ # Load Telugu model
395
+ telugu_model_loaded = load_telugu_model()
396
+ if not telugu_model_loaded:
397
  translation_available = False
398
 
399
  # Flask App
 
461
  def generate_text():
462
  data = request.get_json()
463
  query = data.get("query", "")
464
+ print("entered /generate")
465
  if not query:
466
  return jsonify({"error": "Query cannot be empty"}), 400
467
 
 
597
  if __name__ == "__main__":
598
  print("Starting Flask application...")
599
  print(f"Translation service available: {translation_available}")
600
+ print(f"Telugu model loaded: {telugu_model_loaded}")
601
  app.run(host="0.0.0.0", debug=True)