Spaces:

amaresh8053
/

ubuntu-chatbot-seq2seq

Sleeping

App Files Files Community

amaresh8053 commited on Dec 14, 2025

Commit

9391674

verified ·

1 Parent(s): 9aba057

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -16

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ nltk.download(['punkt', 'punkt_tab'], quiet=True)
 DEVICE = torch.device("cpu")
-CACHE_FILE = "ubuntu_data_cache.pt"       # To get the Vocab from cache
 MODEL_FILE_WITH_ATTN = "ubuntu_chatbot_with_attn.pt"     # trained model with attn
 MODEL_FILE_NO_ATTN = "ubuntu_chatbot_no_attn.pt"  # trained model without attn
@@ -53,7 +53,7 @@ class Vocab:
 # ------------- load vocab from cache -------------
 print("Loading vocab...")
-data = torch.load("ubuntu_vocab_only.pt", map_location="cpu", weights_only=False)
 vocab = data["vocab"]
 PAD_IDX = vocab.word2idx["<PAD>"]
@@ -115,18 +115,12 @@ class Decoder_with_attn(nn.Module):
         self.norm = nn.LayerNorm(512)
     def forward(self, inp, hidden, enc_out):
-        """
-        inp:    [B, 1]      token IDs
-        hidden: [2, B, 512] encoder hidden (num_layers, batch, hidden)
-        enc_out:[B, T, 512]
-        """
         e = self.dropout(self.emb(inp))
         # attention over encoder outputs
         energy = self.attn(enc_out)
         # use top layer hidden state for attention
-        attn_scores = torch.bmm(hidden[-1].unsqueeze(1), energy.transpose(1, 2))  # [B,1,T]
-        attn_weights = F.softmax(attn_scores.squeeze(1), dim=-1).unsqueeze(1)     # [B,1,T]
         ctx = torch.bmm(attn_weights, enc_out)
         x = torch.cat((e, ctx), dim=-1)
@@ -160,10 +154,10 @@ class Model_with_attn(nn.Module):
     def forward(self, src, tgt, tf=0.5):
         enc_out, h = self.encoder(src)
-        dec_in = tgt[:, 0]                # <SOS>
         outs = []
         for t in range(1, tgt.size(1)):
-            dec_in = dec_in.unsqueeze(1)  # [B,1]
             out, h = self.decoder(dec_in, h, enc_out)
             outs.append(out)
             use_tf = random.random() < tf
@@ -178,10 +172,10 @@ class Model_no_attn(nn.Module):
     def forward(self, src, tgt, tf=0.5):
         enc_out, h = self.encoder(src)
-        dec_in = tgt[:, 0]                # <SOS>
         outs = []
         for t in range(1, tgt.size(1)):
-            dec_in = dec_in.unsqueeze(1)  # [B,1]
             out, h = self.decoder(dec_in, h)
             outs.append(out)
             use_tf = random.random() < tf
@@ -266,7 +260,6 @@ def beam_generate_v2(model, src_tensor, beam=5, max_len=50, alpha=0.7):
 # ------------- wrapper to go from user text → reply -------------
 def generate_reply_attn(user_text: str) -> str:
-    # replicate notebook logic: reverse the input sentence
     user_text_rev = reverse(user_text)
     tokens = tokenize(user_text_rev)
     ids = [SOS_IDX] + [vocab.word2idx.get(w, UNK_IDX) for w in tokens] + [EOS_IDX]
@@ -277,7 +270,6 @@ def generate_reply_attn(user_text: str) -> str:
     return reply
 def generate_reply_no_attn(user_text: str) -> str:
-    # replicate notebook logic: reverse the input sentence
     user_text_rev = reverse(user_text)
     tokens = tokenize(user_text_rev)
     ids = [SOS_IDX] + [vocab.word2idx.get(w, UNK_IDX) for w in tokens] + [EOS_IDX]

 DEVICE = torch.device("cpu")
+VOCAB_FILE = "ubuntu_vocab_only.pt"       # To get the Vocab from cache
 MODEL_FILE_WITH_ATTN = "ubuntu_chatbot_with_attn.pt"     # trained model with attn
 MODEL_FILE_NO_ATTN = "ubuntu_chatbot_no_attn.pt"  # trained model without attn
 # ------------- load vocab from cache -------------
 print("Loading vocab...")
+data = torch.load(VOCAB_FILE, map_location="cpu", weights_only=False)
 vocab = data["vocab"]
 PAD_IDX = vocab.word2idx["<PAD>"]
         self.norm = nn.LayerNorm(512)
     def forward(self, inp, hidden, enc_out):
         e = self.dropout(self.emb(inp))
         # attention over encoder outputs
         energy = self.attn(enc_out)
         # use top layer hidden state for attention
+        attn_scores = torch.bmm(hidden[-1].unsqueeze(1), energy.transpose(1, 2))
+        attn_weights = F.softmax(attn_scores.squeeze(1), dim=-1).unsqueeze(1)
         ctx = torch.bmm(attn_weights, enc_out)
         x = torch.cat((e, ctx), dim=-1)
     def forward(self, src, tgt, tf=0.5):
         enc_out, h = self.encoder(src)
+        dec_in = tgt[:, 0]
         outs = []
         for t in range(1, tgt.size(1)):
+            dec_in = dec_in.unsqueeze(1)
             out, h = self.decoder(dec_in, h, enc_out)
             outs.append(out)
             use_tf = random.random() < tf
     def forward(self, src, tgt, tf=0.5):
         enc_out, h = self.encoder(src)
+        dec_in = tgt[:, 0]
         outs = []
         for t in range(1, tgt.size(1)):
+            dec_in = dec_in.unsqueeze(1)
             out, h = self.decoder(dec_in, h)
             outs.append(out)
             use_tf = random.random() < tf
 # ------------- wrapper to go from user text → reply -------------
 def generate_reply_attn(user_text: str) -> str:
     user_text_rev = reverse(user_text)
     tokens = tokenize(user_text_rev)
     ids = [SOS_IDX] + [vocab.word2idx.get(w, UNK_IDX) for w in tokens] + [EOS_IDX]
     return reply
 def generate_reply_no_attn(user_text: str) -> str:
     user_text_rev = reverse(user_text)
     tokens = tokenize(user_text_rev)
     ids = [SOS_IDX] + [vocab.word2idx.get(w, UNK_IDX) for w in tokens] + [EOS_IDX]