| import pandas as pd | |
| path = "/home/sg666/MDpLM/benchmarks/MLM" | |
| df = pd.read_csv(path + "/mlm_uppercase_results.csv") | |
| all_sequences = df['Original Sequence'].tolist() | |
| seq_len_sum = sum(len(seq) for seq in all_sequences) | |
| ppls = [ppl for ppl in df['Perplexity'].tolist() if ppl != 10000] | |
| ppl_mean = sum(ppls) / len(ppls) | |
| cos_mean = df.loc[:, 'Cosine Similarity'].mean() | |
| hamming_mean = sum(dist for dist in df['Hamming Distance'].tolist()) / seq_len_sum | |
| print(ppl_mean) | |
| print(cos_mean) | |
| print(hamming_mean) |