habdine
/

Prot2Text-Small-v1-1

Text Generation

feature-extraction

Causal Language Modeling

Model card Files Files and versions

habdine commited on Sep 13, 2024

Commit

b8cc45f

·

verified ·

1 Parent(s): e1f8bd2

Update modeling_prot2text.py

Files changed (1) hide show

modeling_prot2text.py +2 -18

modeling_prot2text.py CHANGED Viewed

@@ -323,8 +323,8 @@ class Prot2TextModel(PreTrainedModel):
             tok_ids = self.decoder.generate(input_ids=inputs['decoder_input_ids'],
                                             encoder_outputs=encoder_state,
                                             use_cache=True,
-                                            output_attentions=True,
-                                            output_scores=True,
                                             return_dict_in_generate=True,
                                             encoder_attention_mask=inputs['attention_mask'],
                                             length_penalty=1.0,
@@ -333,22 +333,6 @@ class Prot2TextModel(PreTrainedModel):
                                             num_beams=1)
             generated = tokenizer.batch_decode(tok_ids.get('sequences'), skip_special_tokens=True)
-            print(tok_ids.get('scores')[0].size())
-            m = torch.nn.Softmax()
-            att_w = []
-            print(len(gpdb.sequence[0]))
-            score = 0
-            for i in range(len(tok_ids.get('cross_attentions'))):
-                att_w.append(torch.mul(tok_ids.get('cross_attentions')[i][-1].squeeze().mean(dim=0), inputs['attention_mask'][-1].squeeze())[:len(gpdb.sequence[0])].tolist())
-                score += np.log(torch.max(m(tok_ids.get('scores')[i]).squeeze()).item())
-            score = score / len(tok_ids.get('cross_attentions'))
-            # print(str(score))
-            # import seaborn as sns
-            # import matplotlib.pylab as plt
-            # plt.figure().set_figwidth(150)
-            # ax = sns.heatmap(att_w, cmap="YlGnBu", robust=True, xticklabels=gpdb.sequence[0])#, yticklabels=generated[0])
-            # plt.savefig("seaborn_plot.png")
             os.remove(structure_filename)
             os.remove(graph_filename)

             tok_ids = self.decoder.generate(input_ids=inputs['decoder_input_ids'],
                                             encoder_outputs=encoder_state,
                                             use_cache=True,
+                                            output_attentions=False,
+                                            output_scores=False,
                                             return_dict_in_generate=True,
                                             encoder_attention_mask=inputs['attention_mask'],
                                             length_penalty=1.0,
                                             num_beams=1)
             generated = tokenizer.batch_decode(tok_ids.get('sequences'), skip_special_tokens=True)
             os.remove(structure_filename)
             os.remove(graph_filename)