Spaces:

amaresh8053
/

ubuntu-chatbot-seq2seq

Sleeping

App Files Files Community

amaresh8053 commited on Dec 11, 2025

Commit

5f9f2be

1 Parent(s): 12fcc4e

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -18

app.py CHANGED Viewed

@@ -14,8 +14,8 @@ nltk.download(['punkt', 'punkt_tab'], quiet=True)
 DEVICE = torch.device("cpu")
-CACHE_FILE = "ubuntu_data_cache.pt"       # from your notebook
-MODEL_FILE = "ubuntu_chatbot_best.pt"     # trained model checkpoint
 # ------------- tokenization + helpers -------------
@@ -93,21 +93,21 @@ class Encoder(nn.Module):
         )
         # projection from 1024 (2 * 512) back to 512
         self.fc = nn.Linear(1024, 512)
-        self.norm = nn.LayerNorm(512)   # defined in notebook (even if not used there)
     def forward(self, x):
         # x: [B, T]
-        e = self.emb(x)                 # [B, T, 256]
-        out, h = self.gru(e)            # out:[B,T,1024], h:[4,B,512] (2 layers * 2 dirs)
-        # project encoder outputs back to 512
-        out = self.fc(out)              # [B,T,512]
-        # combine directions in h: reshape [layers*dirs, B, H] -> [layers, dirs, B, H]
-        h = h.view(2, 2, h.size(1), -1)  # [2,2,B,512]
-        h = torch.sum(h, dim=1)          # sum over directions -> [2,B,512]
-        return out, h                    # enc_out:[B,T,512], h:[2,B,512]
 class Decoder(nn.Module):
@@ -132,19 +132,19 @@ class Decoder(nn.Module):
         hidden: [2, B, 512] encoder hidden (num_layers, batch, hidden)
         enc_out:[B, T, 512]
         """
-        e = self.dropout(self.emb(inp))           # [B,1,256]
         # attention over encoder outputs
-        energy = self.attn(enc_out)               # [B,T,512]
         # use top layer hidden state for attention
         attn_scores = torch.bmm(hidden[-1].unsqueeze(1), energy.transpose(1, 2))  # [B,1,T]
         attn_weights = F.softmax(attn_scores.squeeze(1), dim=-1).unsqueeze(1)     # [B,1,T]
-        ctx = torch.bmm(attn_weights, enc_out)    # [B,1,512]
-        x = torch.cat((e, ctx), dim=-1)           # [B,1,768]
-        out, hidden = self.gru(x, hidden)         # out:[B,1,512], hidden:[2,B,512]
-        out = self.norm(out.squeeze(1))           # [B,512]
-        logits = self.out(out)                    # [B,vocab]
         return logits, hidden

 DEVICE = torch.device("cpu")
+CACHE_FILE = "ubuntu_data_cache.pt"       # To get the Vocab from cache
+MODEL_FILE = "ubuntu_chatbot_best.pt"     # trained model
 # ------------- tokenization + helpers -------------
         )
         # projection from 1024 (2 * 512) back to 512
         self.fc = nn.Linear(1024, 512)
+        self.norm = nn.LayerNorm(512)
     def forward(self, x):
         # x: [B, T]
+        e = self.emb(x)
+        out, h = self.gru(e)
+        out = self.fc(out)
+        h = h.view(2, 2, h.size(1), -1)
+        h = torch.sum(h, dim=1)
+        return out, h
 class Decoder(nn.Module):
         hidden: [2, B, 512] encoder hidden (num_layers, batch, hidden)
         enc_out:[B, T, 512]
         """
+        e = self.dropout(self.emb(inp))
         # attention over encoder outputs
+        energy = self.attn(enc_out)
         # use top layer hidden state for attention
         attn_scores = torch.bmm(hidden[-1].unsqueeze(1), energy.transpose(1, 2))  # [B,1,T]
         attn_weights = F.softmax(attn_scores.squeeze(1), dim=-1).unsqueeze(1)     # [B,1,T]
+        ctx = torch.bmm(attn_weights, enc_out)
+        x = torch.cat((e, ctx), dim=-1)
+        out, hidden = self.gru(x, hidden)
+        out = self.norm(out.squeeze(1))
+        logits = self.out(out)
         return logits, hidden