Spaces:

eshan13
/

FootyComm

Sleeping

App Files Files Community

eshan13 commited on Feb 26, 2025

Commit

e343fdd

verified ·

1 Parent(s): 1d8e6d3

Upload 5 files

Browse files

Uploading streamlit app , model files & dataset

Files changed (6) hide show

.gitattributes +1 -0
app.py +95 -0
load_data.py +84 -0
pred.py +345 -0
seq2seq_checkpoint.pt +3 -0
test_data.csv +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+test_data.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+import torch
+from transformers import GPT2Tokenizer
+from pathlib import Path
+import streamlit as st
+from typing import List, Dict, Any, Callable
+from pred import *
+from load_data import *
+def main():
+    tokenizer = GPT2Tokenizer.from_pretrained('gpt2', add_bos_token=True)
+    tokenizer.pad_token = tokenizer.eos_token
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    encoder = Encoder(h=64,n=2, e=64, a=4, o=64).to(device)
+    decoder = Decoder(h=64,n=2, e=64, a=4, o=50257).to(device)
+    model = Seq2Seq(encoder, decoder).to(device)
+    checkpoint = torch.load('./seq2seq_checkpoint.pt', weights_only=True, map_location=device)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    st.title("Footy Commentary Generator")
+    # Sidebar for configuration
+    st.sidebar.header("Configuration")
+    # Tab selection
+    tab_selection = st.sidebar.radio(
+        "Select Input Method:",
+        ["Random Sample from Test Set", "Custom Input"]
+    )
+    # Decoding configuration section
+    st.sidebar.header("Decoding Configuration")
+    st.session_state.decoding_mode = st.sidebar.selectbox(
+        "Decoding Mode",
+        ["greedy", "sample", "top-k", "diverse-beam-search", "min-bayes-risk"]
+    )
+    # Parameters based on decoding mode
+    st.session_state.decoding_params = {}
+    st.session_state.decoding_params['max_len'] = st.sidebar.slider('Max length', 1, 500, 50)
+    st.session_state.decoding_params['temperature'] = st.sidebar.slider('Temperature', 0.0, 1.0, 0.1)
+    if st.session_state.decoding_mode == "top-k":
+        st.session_state.decoding_params["k"] = st.sidebar.slider("k value", 1, 100, 5)
+    elif st.session_state.decoding_mode == "diverse-beam-search":
+        st.session_state.decoding_params["beam_width"] = st.sidebar.slider("beam width", 1, 10, 1)
+        st.session_state.decoding_params["diversity_penalty"] = st.sidebar.slider("diversity penalty", 0.0, 1.0, 0.1)
+    elif st.session_state.decoding_mode == "min-bayes-risk":
+        st.session_state.decoding_params["num_candidates"] = st.sidebar.slider("Number of candidates", 1, 30, 4)
+    if tab_selection == "Random Sample from Test Set":
+        st.header("Generate from Test Dataset")
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            # Number of samples in the test dataset
+            st.write(f"Test dataset contains 5000 samples")
+        with col2:
+            # Button to generate a random sample
+            if st.button("Generate Random Sample"):
+                random_idx = np.random.randint(1, 5000)
+                st.session_state.random_idx = random_idx
+                st.session_state.ip, st.session_state.ip_mask, st.session_state.tg, st.session_state.tg_mask = get_sample(random_idx)
+        # Display the selected sample
+        if hasattr(st.session_state, 'random_idx'):
+            st.subheader(f"Sample #{st.session_state.random_idx}")
+            st.session_state.x = tokenizer.decode(st.session_state.ip.tolist()[0])
+            st.session_state.y = tokenizer.decode(st.session_state.tg.tolist())
+            # Display sample details in a table
+            df = pd.DataFrame.from_dict({'X': [st.session_state.x], 'y': [st.session_state.y]})
+            st.dataframe(df.T.reset_index(), width=800)
+            # Generate output
+            if st.button("Generate Sequence"):
+                with st.spinner("Generating sequence..."):
+                    print(f'Ip: {st.session_state.ip} | Mask: {st.session_state.ip_mask} \n Mode: {st.session_state.decoding_mode} | Params: {st.session_state.decoding_params}')
+                    st.session_state.tok_output = genOp(
+                        encoder, decoder, device,
+                        st.session_state.ip,  # Convert to string for the placeholder function
+                        st.session_state.ip_mask,
+                        mode=st.session_state.decoding_mode,
+                        **st.session_state.decoding_params
+                    )
+                    print(f'\n\n\nOutput: {st.session_state.tok_output} \n')
+                    st.session_state.output = tokenizer.decode(st.session_state.tok_output)
+            # Display output
+            if hasattr(st.session_state, 'output'):
+                st.subheader("Generated Sequence")
+                st.write(st.session_state.output)
+if __name__ == "__main__":
+    main()
+1

load_data.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import pandas as pd
+import torch
+# import numpy as np
+# import torch
+from transformers import GPT2Tokenizer
+# device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# train_size = 50000
+# test_size = 2500
+# val_size = 2500
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2', add_bos_token=True)
+tokenizer.pad_token = tokenizer.eos_token
+# df = pd.read_csv('./prepro_data.csv')
+# train_df = df[:train_size]
+# test_df = df[train_size:train_size + test_size + val_size]
+# test_df.to_csv('test_data.csv')
+# print('Test df saved...')
+test_df = pd.read_csv('./test_data.csv')
+test_df = test_df.reset_index(drop=True)
+# print(test_df.index)
+class TextDataset(torch.utils.data.Dataset):
+        def __init__(self, X, y):
+            self.X = X
+            self.y = y
+        def __len__(self):
+            return len(self.X)
+        # def __getitem__(self, idx):
+        #     return self.X[idx]['input_ids'], self.X[idx]['attention_mask'] , self.y[idx]['input_ids'], self.y[idx]['attention_mask']
+        def __getitem__(self, idx):
+            return self.X[idx], self.y[idx]
+def collate_fn(batch):
+    X = [i[0] for i in batch]
+    y = [i[1] for i in batch]
+    lenX = []
+    maxlen = max([len(tokenizer.tokenize(i)) for i in X])
+    maylen = max([len(tokenizer.tokenize(i)) for i in y])
+    # print(f'maxlen: {maxlen} | maylen: {maylen}')
+    inputs = [tokenizer(i, max_length=maxlen, padding='max_length', truncation=True, return_tensors='pt', return_attention_mask=True) for i in X]
+    targets = [tokenizer(i, max_length=maylen, padding='max_length', truncation=True, return_tensors='pt', return_attention_mask=True) for i in y]
+    input_ids, input_mask = [], []
+    for i in inputs:
+        input_ids.append(i['input_ids'])
+        input_mask.append(i['attention_mask'])
+    target_ids, target_mask = [], []
+    for i in targets:
+        target_ids.append(i['input_ids'])
+        target_mask.append(i['attention_mask'])
+    return (torch.vstack(input_ids), torch.vstack(input_mask), torch.vstack(target_ids), torch.vstack(target_mask))
+val_ds = TextDataset(test_df['X'].values, test_df['y'].values)
+valloader = torch.utils.data.DataLoader(val_ds, batch_size=5000, shuffle=False, collate_fn=collate_fn)
+# print(test_df.head())
+def get_sample(i, device='cpu'):
+    # X,y = test_df['X'][idx], test_df['y'][idx]
+    # tok_X = tokenizer(X, return_tensors='pt', return_attention_mask=True)
+    # tok_y = tokenizer(y, return_tensors='pt', return_attention_mask=True)
+    # return X,y, tok_X, tok_y
+    # return X,y
+    val_batch = next(iter(valloader))
+    return val_batch[0][i].unsqueeze(dim=0).to(device), val_batch[1][i].unsqueeze(dim=0).type(torch.float32).to(device), val_batch[2][i].to(device), val_batch[3][i].to(device)
+# X, y, tok_X, tok_y = get_sample(1)
+# print(f'X: {X} \n y: {y}')
+# print(type(tok_X))
+# print(tok_X)
+# print(tok_X.shape, tok_y.shape)

pred.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+import torch
+import string
+import pandas as pd
+import numpy as np
+from torch import nn
+from sklearn.model_selection import train_test_split
+# from gensim.models import Word2Vec
+from torch.nn.utils.rnn import pack_padded_sequence
+from pathlib import Path
+import argparse
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,  Trainer, TrainingArguments, AdamW, GPT2Tokenizer, GPT2Model, GPT2LMHeadModel
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer ,GPTNeoConfig
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel,BertTokenizer
+from transformers import GPT2TokenizerFast
+# from peft import LoraModel, LoraConfig
+from pathlib import Path
+import datetime
+from tqdm import tqdm
+import random
+from tqdm import tqdm
+from torch.cuda.amp import autocast, GradScaler
+import gc
+import matplotlib.pyplot as plt
+class Encoder(torch.nn.Module): #8,18,24 -> 8,40,24   (8x720 and 432x960)
+    def __init__(self,h=128,n=8, e=64, a=4, o=1280):
+        super(Encoder, self).__init__()
+        self.embed = nn.Embedding(50257,e)
+        # self.ip = nn.Sequential(
+        #         nn.Linear(e,e//2),
+        #         nn.ReLU(),
+        #         nn.Linear(e//2,e)
+        # )
+        self.lstm = nn.LSTM(input_size=e,hidden_size=h,num_layers=n, batch_first=True, bidirectional=True)
+        self.sa = nn.MultiheadAttention(h*2, a, dropout=0.1, batch_first=True)
+        self.op = nn.Sequential(
+                nn.Linear(2*h, h//2),
+                nn.ReLU(),
+                nn.Linear(h//2 , o),
+        )
+        # self.__init_weights()
+    def forward(self, X):
+        emb = self.embed(X)  #bs,seq ,e
+        # emb = self.ip(emb)
+        enc, (hidden, cell) = self.lstm(emb) #bs, seq, h   #1,bs,h
+        query = enc #nn.MA expects ; seq, bs, h
+        atOp , atW = self.sa(query, query, query)
+        #convert back to bs,seq, h
+        # print(f'AtOp: {atOp.shape} | enc: {enc.shape}')
+        logits = self.op(atOp + enc)
+        # logits = self.op(enc)
+        return logits , hidden , cell
+    # def __init_weights(self):
+    #     for module in [self.ip, self.op]:
+    #         if isinstance(module, torch.nn.Linear):
+    #             torch.nn.init.normal_(module.weight,mean = 0.0 , std=0.02)
+    #             if module.bias is not None:
+    #                 torch.nn.init.zeros_(module.bias)
+class Decoder(torch.nn.Module):
+    def __init__(self,h=128,n=8, e=64, a=4, o=50257):
+        super(Decoder, self).__init__()
+        self.embed = nn.Embedding(50257,e)
+        # self.ip = nn.Sequential(
+        #         nn.Linear(e,e),
+        #         nn.ReLU(),
+        #         nn.Linear(e,e)
+        # )
+        self.lstm = nn.LSTM(input_size=e,hidden_size=h,num_layers=n, batch_first=True, bidirectional=True)
+        self.sa = nn.MultiheadAttention(h, a, dropout=0.1, batch_first=True)
+        self.op = nn.Sequential(
+                nn.Linear(2*h + e, h//2),
+                nn.ReLU(),
+                nn.Linear(h//2 , o),
+        )
+        # self.__init_weights()
+    def forward(self, ip, ho, co, enc, mask):
+        emb = self.embed(ip)  #bs, seq_i, e
+        # emb = self.ip(emb)
+        dec, (ho, co) = self.lstm(emb, (ho, co)) #bs, seq_i, h   #1,bs,h
+        query = emb #bs, seq_i, e
+        key = enc #bs, seq_e, o
+        value = enc #bs, seq_e, o
+        # print(f'Q:{query.shape} | K:{key.shape} | V:{value.shape}')
+        atOp , atW = self.sa(query, key, value, key_padding_mask=mask) #bs, seq_i, e
+        # print(f'Dec: {dec.shape} | atOp : {atOp.shape}')
+        op = torch.cat([dec.squeeze(dim=1), atOp.squeeze(dim=1)], dim=1) #bs, seq_i, 2*h + bs, seq_i, e -> bs, 2*h + r
+        # op = torch.cat([ho[-1], co[-1], atOp.reshape(atOp.size(0), -1)], dim=-1)
+        logits = self.op(op) #bs, o
+        return logits, ho ,co
+    # def __init_weights(self):
+    #     for module in [self.ip, self.op]:
+    #         if isinstance(module, torch.nn.Linear):
+    #             torch.nn.init.normal_(module.weight,mean = 0.0 , std=0.02)
+    #             if module.bias is not None:
+    #                 torch.nn.init.zeros_(module.bias)
+    def init_state(self, batch_size):
+        return (torch.zeros(2*self.n,batch_size, self.h).to(device),torch.zeros(2*self.n,batch_size, self.h).to(device))
+class Seq2Seq(nn.Module):
+    def __init__(self, encoder, decoder):
+        super(Seq2Seq, self).__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+    def forward(self, seq_ip, ip_mask, seq_tg):
+        enc, hidden, cell = self.encoder(seq_ip)
+        outputs = []
+        len_tg = seq_tg.shape[1]
+        dec_ip = seq_tg[:,0].unsqueeze(dim=-1)
+        # print('Target length: ')
+        for t in range(1, len_tg):  # Teacher Forcing
+            op , hidden, cell = self.decoder(dec_ip, hidden, cell, enc, ip_mask)
+            outputs.append(op)
+            dec_ip = seq_tg[:,t].unsqueeze(dim=-1)
+        torch.stack(outputs, dim=1)
+        return outputs
+def diverse_beam_search(decoder, encoder_output, ip_mask, hidden, cell, device, beam_width=5, diversity_penalty=0.7, max_len=100):
+        dec_ip = torch.tensor([50256]).type(torch.int64).to(device)  # Start token
+        beams = [(0.0, [dec_ip.item()], hidden.clone(), cell.clone())]  # (score, sequence, hidden, cell)
+        count = 0
+        for _ in range(max_len):
+            all_candidates = []
+            for score, seq, h, c in beams:
+                if seq[-1] == 50256 and count > 0:  # EOS reached
+                    all_candidates.append((score, seq, h, c))
+                    continue
+                dec_out, h_new, c_new = decoder(
+                    torch.tensor([seq[-1]]).unsqueeze(0).to(device), h, c, encoder_output, ip_mask
+                )
+                log_probs = torch.nn.functional.log_softmax(dec_out, dim=-1)  # Shape: [1, vocab_size]
+                top_k_log_probs, top_k_tokens = torch.topk(log_probs, beam_width, dim=-1)
+                for i in range(beam_width):
+                    new_score = score + top_k_log_probs[0, i].item() - (diversity_penalty * i)  # Diversity penalty
+                    new_seq = seq + [top_k_tokens[0, i].item()]
+                    all_candidates.append((new_score, new_seq, h_new.clone(), c_new.clone()))
+                count = 1
+            # Select top beam_width candidates
+            beams = sorted(all_candidates, key=lambda x: x[0], reverse=True)[:beam_width]
+            if all(seq[-1] == 50256 for _, seq, _, _ in beams):  # All beams ended
+                break
+        return beams[0][1]  # Return highest-scoring sequence
+def mbr_decoding(decoder, encoder_output, ip_mask, hidden, cell, device, num_candidates=10, max_len=100):
+        # Generate candidate sequences using top-k sampling
+        candidates = []
+        for _ in range(num_candidates):
+            dec_ip = torch.tensor([50256]).type(torch.int64).to(device)
+            seq = [dec_ip.item()]
+            h, c = hidden.clone(), cell.clone()
+            for _ in range(max_len):
+                dec_out, h, c = decoder(dec_ip.unsqueeze(0), h, c, encoder_output, ip_mask)
+                dec_ip = top_k_sampling(dec_out, k=5).unsqueeze(dim=0)  # Use top-k for diversity
+                seq.append(dec_ip.item())
+                if dec_ip.item() == 50256:
+                    break
+            candidates.append(seq)
+        # Score candidates by similarity (e.g., average overlap with others)
+        best_seq, best_score = None, float('-inf')
+        for i, cand in enumerate(candidates):
+            score = sum(sum(1 for t1, t2 in zip(cand, other) if t1 == t2)
+                        for other in candidates if other != cand) / (len(candidates) - 1)
+            if score > best_score:
+                best_score, best_seq = score, cand
+        return best_seq
+def top_k_sampling(logits, k=10, temperature=1.0):
+        logits = logits / temperature  # Temperature scaling for diversity
+        probs = torch.nn.functional.softmax(logits, dim=-1)
+        top_k_probs, top_k_indices = torch.topk(probs, k, dim=-1)
+        sampled_idx = torch.multinomial(top_k_probs, num_samples=1)
+        return top_k_indices[0, sampled_idx.item()]
+def genOp(encoder, decoder, device, ip, ip_mask, mode='greedy', temperature=1.0, k=13, beam_width=5, diversity_penalty=0.7, num_candidates=10, max_len=100):
+    encoder.eval()
+    decoder.eval()
+    # model.eval()
+    print(f'\n\n\n GENOP FX CALL \n\n\n')
+    with torch.no_grad():
+        enc, hidden, cell = encoder(ip)
+        print(f'Hidden : {hidden.shape} | Cell : {cell.shape}')
+        if mode == 'greedy':
+            outputs = []
+            dec_ip = torch.tensor([50256]).type(torch.int64).to(device)
+            count = 0
+            while True:
+                dec, hidden, cell = decoder(dec_ip.unsqueeze(dim=0), hidden, cell, enc, ip_mask)
+                dec_ip = torch.argmax(dec, dim=-1)
+                outputs.append(dec_ip.item())
+                count += 1
+                if count > max_len:
+                    break
+                if dec_ip.item() == 50256:
+                    print('Self terminated !!!')
+                    break
+            return outputs
+        elif mode=='sample':
+            outputs = []
+            dec_ip = torch.tensor([50256]).type(torch.int64).to(device)
+            count = 0
+            while True:
+                dec, hidden, cell = decoder(dec_ip.unsqueeze(dim=0), hidden, cell, enc, ip_mask)
+                # print(dec)
+                dec = dec/temperature
+                dec = torch.nn.functional.softmax(dec, dim=-1)
+                dec_ip = torch.multinomial(input=dec, num_samples=1, replacement=True).squeeze(0)
+                outputs.append(dec_ip.item())
+                count += 1
+                if count > max_len:
+                    break
+                if dec_ip.item() == 50256:
+                    print('Self terminated !!!')
+                    break
+            return outputs
+        elif mode=='top_k':
+            outputs = []
+            dec_ip = torch.tensor([50256]).type(torch.int64).to(device)
+            count = 0
+            while True:
+                dec, hidden, cell = decoder(dec_ip.unsqueeze(dim=0), hidden, cell, enc, ip_mask)
+                dec = torch.nn.functional.softmax(dec, dim=-1)
+                top_k_probs , top_k_indices = torch.topk(dec, k, dim=-1)
+                dec_ip = torch.multinomial(input=top_k_probs, num_samples=1, replacement=True).squeeze(0)
+                dec_ip = top_k_indices[0, dec_ip.item()].unsqueeze(dim=0)
+                outputs.append(dec_ip.item())
+                count += 1
+                if count > max_len:
+                    break
+                if dec_ip.item() == 50256:
+                    print('Self terminated !!!')
+                    break
+            return outputs
+        elif mode=='diverse-beam-search':
+            outputs = diverse_beam_search(decoder, enc, ip_mask, hidden, cell, device, beam_width=beam_width, diversity_penalty=diversity_penalty)
+            # print(f'GenOP stack trace: {outputs}')
+            return outputs
+        elif mode=='min-bayes-risk':
+            outputs = mbr_decoding(decoder, enc, ip_mask, hidden, cell, device, num_candidates=num_candidates, max_len=max_len)
+            return outputs
+# ip = torch.tensor([[50256, 11195,   318, 13837,    11,  8272,   318,  2688,  4345,  1578,
+#             11,  4475,   318,  3909,    11,  3035,   767,    11,  1941,   318,
+#           4793,    11,  2435,   357,   315,    66,     8,   318,  1478,    25,
+#            405,    11,  1078,   437,   590,   318,  3126,    11,  2931,    23,
+#             11,  4080,   318, 24880, 10499,    11,  3576,    11,  4492,    11,
+#          19316,   318,  4793,    12, 12726, 37985,  9952,  4041,    11,  6057,
+#             62, 13376,   318, 19446,    11, 30408,   448,   318, 10352,    11,
+#          11195,    62, 26675,   318,   657,    11,  8272,    62, 26675,   318,
+#            352,    11, 11195,    62,    79, 49809,    47,   310,   318,  5598,
+#           7441,  8272,    62,    79, 49809,    47,   310,   318,  4570,  7441,
+#          11195,    62, 20910, 22093,   318,  1542,   357,  1314,   828,  8272,
+#             62, 20910, 22093,   318,   718,   357,    20,   828, 11195,    62,
+#             69, 42033,  6935,  2175,   318,   838,    13,    15,    11,  8272,
+#             62,    69, 42033,  6935,  2175,   318,  1315,    13,    15,    11,
+#          11195,    62, 36022,    34,  1371,   318,   657,    13,    15,    11,
+#           8272,    62, 36022,    34,  1371,   318,   352,    13,    15,    11,
+#          11195,    62,   445,    34,  1371,   318,   657,    13,    15,    11,
+#           8272,    62,   445,    34,  1371,   318,   657,    13,    15,    11,
+#          11195,    62,  8210,  1460,   318,   657,    13,    15,    11,  8272,
+#             62,  8210,  1460,   318,   604,    13,    15,    11, 11195,    62,
+#          26502, 41389,   364,   318,  1478,    13,    15,    11,  8272,    62,
+#          26502, 41389,   364,   318,   352,    13,    15,    11, 11195,    62,
+#             82,  3080,   318,   642,    13,    15,    11,  8272,    62,    82,
+#           3080,   318,  1596,    13,    15,    11, 11195,    62,  1161,   318,
+#          16185,    11,  8272,    62,  1161,   318, 16185,    11, 24623,   318,
+#           3594,  9952,  4041,    11, 16060,    62, 15592,   318,   449,   641,
+#          29921,  9038,    11, 17121,  7096,   292,    11,    42, 14057,  9852,
+#           2634,    11, 10161, 18713, 12119,   280,  2634,    11, 35389, 26689,
+#             75,  1012,   488,    88,    11, 30847, 11979,   406,    73,  2150,
+#           3900,    11, 13787,   292, 10018, 17479,    11, 40747, 32371, 23720,
+#             11, 15309, 38142,    81,   367,   293,    65,    11,    34,  3798,
+#            376, 24247,    65,  2301,   292,    11, 10161, 18713,  1215,  1765,
+#            323,   273,    11,  5124,  2731,   978,  6199,   544,    11, 49680,
+#             68,   311,  2194,   418,    11,    41, 21356, 48590, 18226, 12523,
+#             11,  4826,   280,  6031,  3930,    11, 31579, 44871, 12104,   324,
+#          13235,    11,    32,  1014,    62, 15592,   318,  5199,  3469,    11,
+#          22946,   292,  3169,   359,    11, 20191, 44677,    11, 13217,   261,
+#          44312,    11, 14731, 14006,    11, 24338,  9740,  9860,    11, 25372,
+#          20017,  9557,    11,    45, 47709,   797,    78,    12,    34, 11020,
+#             11,  9704, 20833,    11,    33, 11369, 38343,  5799,    11, 26886,
+#            418,  1665, 33425,    11, 32027, 21298,    11, 31306,  6559, 19574,
+#           1040,    11, 30365, 13058,   273,    11, 25596,   271,  3248,    64,
+#          10788,    68,    11,    42,   538,    64,    11,  7575,   318,  4153,
+#              6]])
+# ip_mask = torch.tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True, True,
+#          True, True, True, True, True, True, True, True, True, True, True]])
+# device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# encoder = Encoder(h=64,n=2, e=64, a=4, o=64).to(device)
+# decoder = Decoder(h=64,n=2, e=64, a=4, o=50257).to(device)
+# model = Seq2Seq(encoder, decoder).to(device)
+# # checkpoint = torch.load('./seq2seq_checkpoint.pt', weights_only=True, map_location=device)
+# # model.load_state_dict(checkpoint['model_state_dict'])
+# print(genOp(model.encoder, model.decoder, device, ip, ip_mask, mode='greedy', temperature=1.0, k=13, beam_width=5, diversity_penalty=0.7, num_candidates=10, max_len=100))

seq2seq_checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f17ff3f8e345cc56e5bb5dcdb329832e43d68f10043bcca999b831b14ac7926
+size 102274136

test_data.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:838b1509bd748deb39f9cb52bd4c1d7e733e4195a29d5e6da19ed4cf641c97cc
+size 13032378