|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
from fuson_plm.utils.visualizing import set_font |
|
|
|
|
|
global pos_id_label_dict |
|
|
pos_id_label_dict = { |
|
|
'top_UniProt_nIdentities': 'Identities', |
|
|
'top_UniProt_nPositives': 'Positives' |
|
|
} |
|
|
|
|
|
def plot_pos_or_id_pcnt_hist(data, column_name, save_path=None, ax=None): |
|
|
""" |
|
|
column_name is Positives or Identities |
|
|
""" |
|
|
set_font() |
|
|
|
|
|
if ax is None: |
|
|
fig, ax = plt.subplots(figsize=(10, 7)) |
|
|
|
|
|
|
|
|
data = data[['aa_seq_len', column_name]].dropna() |
|
|
data[column_name] = data[column_name]*100 |
|
|
data[f"{column_name} Percent Coverage"] = data[column_name] / data['aa_seq_len'] |
|
|
|
|
|
|
|
|
mean_coverage = data[f"{column_name} Percent Coverage"].mean() |
|
|
median_coverage = data[f"{column_name} Percent Coverage"].median() |
|
|
|
|
|
|
|
|
ax.hist(data[f"{column_name} Percent Coverage"], bins=50, edgecolor='grey', alpha=0.8, color='mediumpurple') |
|
|
|
|
|
|
|
|
ax.axvline(mean_coverage, color='black', linestyle='--', linewidth=2) |
|
|
|
|
|
|
|
|
ax.axvline(median_coverage, color='black', linestyle='-', linewidth=2) |
|
|
|
|
|
|
|
|
ax.text(mean_coverage, ax.get_ylim()[1] * 0.9, f'Mean: {mean_coverage:.1f}%', color='black', |
|
|
ha='center', va='top', fontsize=40, backgroundcolor='white') |
|
|
|
|
|
|
|
|
ax.text(median_coverage, ax.get_ylim()[1] * 0.8, f'Median: {median_coverage:.1f}%', color='black', |
|
|
ha='center', va='top', fontsize=40, backgroundcolor='white') |
|
|
|
|
|
|
|
|
plt.xticks(fontsize=24) |
|
|
plt.yticks(fontsize=24) |
|
|
ax.set_xlabel(f"Max % {pos_id_label_dict[column_name]}", fontsize=40) |
|
|
ax.set_ylabel("Count", fontsize=40) |
|
|
|
|
|
|
|
|
plt.tight_layout() |
|
|
|
|
|
|
|
|
if save_path is not None: |
|
|
plt.savefig(save_path, dpi=300) |
|
|
|
|
|
|
|
|
if ax is None: |
|
|
plt.show() |
|
|
|
|
|
def group_pos_id_plot(data): |
|
|
set_font() |
|
|
|
|
|
plot_pos_or_id_pcnt_hist(data, 'top_UniProt_nIdentities', save_path=f"figures/identities_hist.png", ax=None) |
|
|
|
|
|
def main(): |
|
|
swissprot_top_alignments_df = pd.read_csv("blast_outputs/swissprot_top_alignments.csv") |
|
|
plot_pos_or_id_pcnt_hist(swissprot_top_alignments_df, |
|
|
'top_UniProt_nIdentities', save_path=f"figures/identities_hist.png", ax=None) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
main() |
|
|
|