| import numpy as np | |
| import pandas as pd | |
| import scipy.stats as stats | |
| import statsmodels.api as sm | |
| from statsmodels.formula.api import ols | |
| from statsmodels.regression.linear_model import RegressionResultsWrapper | |
| from statsmodels.stats.multicomp import pairwise_tukeyhsd | |
| from matplotlib.figure import Figure | |
| import seaborn as sns | |
| import panel as pn | |
| import com_const as cc | |
| import com_func as cf | |
| import com_image as ci | |
| stars = [-np.log(0.05), -np.log(0.01), -np.log(0.001), -np.log(0.0001)] | |
| def plot_single_progression( | |
| ax, | |
| df, | |
| target, | |
| title: str, | |
| hue="gen", | |
| style="gen", | |
| show_legend: bool = False, | |
| ): | |
| lp = sns.lineplot( | |
| df.sort_values(hue), | |
| x="dpi", | |
| y=target, | |
| hue=hue, | |
| markers=True, | |
| style=style, | |
| dashes=False, | |
| palette="tab10", | |
| markersize=12, | |
| ax=ax, | |
| ) | |
| lp.set_yticklabels(["", "3", "", "5", "", "7", "", "9"]) | |
| ax.set_title(title) | |
| if show_legend is True: | |
| sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1)) | |
| else: | |
| ax.get_legend().set_visible(False) | |
| def get_model( | |
| df: pd.DataFrame, target: str, formula: str, dpi: int = None | |
| ) -> RegressionResultsWrapper: | |
| df_ = df[df.dpi == dpi] if dpi is not None else df | |
| return ols(f"{target} {formula}", data=df_).fit() | |
| def anova_table(aov, add_columns: bool = True): | |
| """ | |
| The function below was created specifically for the one-way ANOVA table | |
| results returned for Type II sum of squares | |
| """ | |
| if add_columns is True: | |
| aov["mean_sq"] = aov[:]["sum_sq"] / aov[:]["df"] | |
| aov["eta_sq"] = aov[:-1]["sum_sq"] / sum(aov["sum_sq"]) | |
| aov["omega_sq"] = ( | |
| aov[:-1]["sum_sq"] - (aov[:-1]["df"] * aov["mean_sq"][-1]) | |
| ) / (sum(aov["sum_sq"]) + aov["mean_sq"][-1]) | |
| cols = ["sum_sq", "df", "mean_sq", "F", "PR(>F)", "eta_sq", "omega_sq"] | |
| aov = aov[cols] | |
| return aov | |
| def plot_assumptions(models: list, titles: list, figsize=(12, 4)): | |
| fig = Figure(figsize=figsize) | |
| fig.suptitle("Probability plot of model residual's", fontsize="x-large") | |
| axii = fig.subplots(1, len(models)) | |
| for ax, model, title in zip(axii, models, titles): | |
| _ = stats.probplot(model.resid, plot=ax, rvalue=True) | |
| ax.set_title(title) | |
| return fig | |
| def hghlight_rejection(s): | |
| df = pd.DataFrame(columns=s.columns, index=s.index) | |
| df.loc[s["reject_pred"].ne(s["reject_obs"]), ["group1", "group2"]] = ( | |
| "background: red" | |
| ) | |
| df.loc[s["reject_pred"].eq(s["reject_obs"]), ["group1", "group2"]] = ( | |
| "background: green" | |
| ) | |
| df.loc[s.reject_pred, ["reject_pred"]] = "background: green" | |
| df.loc[~s.reject_pred, ["reject_pred"]] = "background: red" | |
| df.loc[s.reject_obs, ["reject_obs"]] = "background: green" | |
| df.loc[~s.reject_obs, ["reject_obs"]] = "background: red" | |
| return df | |
| def get_tuckey_df(endog, groups, df_genotypes) -> pd.DataFrame: | |
| tukey = pairwise_tukeyhsd(endog=endog, groups=groups) | |
| df_tuc = pd.DataFrame(tukey._results_table) | |
| df_tuc.columns = [str(c) for c in df_tuc.iloc[0]] | |
| ret = ( | |
| df_tuc.drop(df_tuc.index[0]) | |
| .assign(group1=lambda s: s.group1.astype(str)) | |
| .assign(group2=lambda s: s.group2.astype(str)) | |
| .assign(reject=lambda s: s.reject.astype(str) == "True") | |
| ) | |
| ret["p-adj"] = tukey.pvalues | |
| if df_genotypes is None: | |
| return ret | |
| else: | |
| return ( | |
| ret.merge(right=df_genotypes, how="left", left_on="group1", right_on="gen") | |
| .drop(["gen"], axis=1) | |
| .rename(columns={"rpvloci": "group1_rpvloci"}) | |
| .merge(right=df_genotypes, how="left", left_on="group2", right_on="gen") | |
| .drop(["gen"], axis=1) | |
| .rename(columns={"rpvloci": "group2_rpvloci"}) | |
| ) | |
| def get_tuckey_compare(df, df_genotypes=None, groups: str = "gen"): | |
| merge_on = ( | |
| ["group1", "group2"] | |
| if df_genotypes is None | |
| else ["group1", "group2", "group1_rpvloci", "group2_rpvloci"] | |
| ) | |
| df_poiv = get_tuckey_df(df.p_oiv, df[groups], df_genotypes=df_genotypes) | |
| df_oiv = get_tuckey_df(df.oiv, df[groups], df_genotypes=df_genotypes) | |
| df = pd.merge(left=df_poiv, right=df_oiv, on=merge_on, suffixes=["_pred", "_obs"]) | |
| return df | |
| def df_tukey_cmp_plot(df, groups): | |
| df_tukey = ( | |
| get_tuckey_compare(df=df, groups=groups, df_genotypes=None) | |
| .assign(pair_groups=lambda s: s.group1 + "\n" + s.group2) | |
| .sort_values("p-adj_obs") | |
| ) | |
| df_tukey_reject = df_tukey[df_tukey.reject_obs & df_tukey.reject_pred] | |
| df_tukey_accept = df_tukey[~df_tukey.reject_obs & ~df_tukey.reject_pred] | |
| df_tukey_diverge = df_tukey[df_tukey.reject_obs != df_tukey.reject_pred] | |
| fig = Figure(figsize=(20, 6)) | |
| ax_reject, ax_diverge, ax_accept = fig.subplots( | |
| 1, | |
| 3, | |
| gridspec_kw={ | |
| "width_ratios": [ | |
| len(df_tukey_reject), | |
| len(df_tukey_diverge), | |
| len(df_tukey_accept), | |
| ] | |
| }, | |
| sharey=True, | |
| ) | |
| for ax in [ax_reject, ax_accept, ax_diverge]: | |
| ax.set_yticks(ticks=stars, labels=["*", "**", "***", "****"]) | |
| ax.grid(False) | |
| ax_reject.set_title("Rejected") | |
| ax_diverge.set_title("Conflict") | |
| ax_accept.set_title("Accepted") | |
| for ax, df in zip( | |
| [ax_reject, ax_accept, ax_diverge], | |
| [df_tukey_reject, df_tukey_accept, df_tukey_diverge], | |
| ): | |
| for star in stars: | |
| ax.axhline(y=star, linestyle="-", color="black", alpha=0.5) | |
| ax.bar( | |
| x=df["pair_groups"], | |
| height=-np.log(df["p-adj_pred"]), | |
| width=-0.4, | |
| align="edge", | |
| color="green", | |
| label="predictions", | |
| ) | |
| ax.bar( | |
| x=df["pair_groups"], | |
| height=-np.log(df["p-adj_obs"]), | |
| width=0.4, | |
| align="edge", | |
| color="blue", | |
| label="scorings", | |
| ) | |
| ax.margins(0.01) | |
| ax_accept.legend(loc="upper left", bbox_to_anchor=[0, 1], ncols=1, fancybox=True) | |
| ax_reject.set_ylabel("-log(p value)") | |
| ax_reject.tick_params(axis="y", which="major", labelsize=16) | |
| fig.subplots_adjust(wspace=0.05, hspace=0.05) | |
| return fig | |
| def plot_patches(df, diff_only: bool = True): | |
| if diff_only is True: | |
| df = df[(df.oiv != df.p_oiv)] | |
| df = df.assign(diff=lambda s: s.oiv != s.p_oiv).sort_values( | |
| ["diff", "oiv", "p_oiv"] | |
| ) | |
| return pn.GridBox( | |
| *[ | |
| pn.Column( | |
| pn.pane.Markdown(f"### {row.file_name}|{row.oiv}->p{row.p_oiv}"), | |
| pn.pane.Image( | |
| object=ci.enhance_pil_image( | |
| image=ci.load_image( | |
| file_name=row.file_name, | |
| path_to_images=cc.path_to_leaf_patches, | |
| ), | |
| brightness=1.5, | |
| ) | |
| ), | |
| ) | |
| for _, row in df.iterrows() | |
| ], | |
| ncols=len(df), | |
| ) | |