|
|
import math |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import plotly.graph_objects as go |
|
|
from plotly.express import colors |
|
|
from plotly.subplots import make_subplots |
|
|
import base64 |
|
|
|
|
|
|
|
|
class _CustomizedMarker: |
|
|
def regular_polygon_coords(self, corners: int) -> list[np.array]: |
|
|
if corners < 3: |
|
|
raise ValueError("A polygon must have at least 3 corners.") |
|
|
|
|
|
radius = 0.4 |
|
|
angle_step = 2 * math.pi / corners |
|
|
polygon_coordinates = [] |
|
|
for i in range(corners): |
|
|
angle = i * angle_step |
|
|
x = radius * math.sin(angle) |
|
|
y = radius * math.cos(angle) |
|
|
polygon_coordinates.append(np.array([x, y])) |
|
|
polygon_coordinates.append(polygon_coordinates[0]) |
|
|
return polygon_coordinates |
|
|
|
|
|
def clock_marker_coords(self, corners: int, target_corner: int) -> list[np.array]: |
|
|
if target_corner > corners or target_corner <= 0: |
|
|
raise ValueError("Target corner outside available value range.") |
|
|
|
|
|
target_corner -= 1 |
|
|
polygon_coords = self.regular_polygon_coords(corners) |
|
|
corner_coord = polygon_coords[target_corner] |
|
|
left_coord = ( |
|
|
corner_coord + polygon_coords[(target_corner+1) % corners]) / 2 |
|
|
right_coord = ( |
|
|
corner_coord + polygon_coords[(target_corner-1) % corners]) / 2 |
|
|
center_coord = np.array([0, 0]) |
|
|
|
|
|
return [corner_coord, right_coord, center_coord, left_coord, corner_coord] |
|
|
|
|
|
def marker_to_scatter_line_coords(self, clock_marker_coords, x_coords, y_coords): |
|
|
scatter_marker_x_coords = [] |
|
|
scatter_marker_y_coords = [] |
|
|
for x_coord, y_coord in zip(x_coords, y_coords): |
|
|
scatter_marker_x_coords.extend([marker_coord[0] + x_coord |
|
|
for marker_coord in clock_marker_coords]) |
|
|
scatter_marker_x_coords.append(None) |
|
|
scatter_marker_y_coords.extend([marker_coord[1] + y_coord |
|
|
for marker_coord in clock_marker_coords]) |
|
|
scatter_marker_y_coords.append(None) |
|
|
return scatter_marker_x_coords, scatter_marker_y_coords |
|
|
|
|
|
|
|
|
class QCVisualizer: |
|
|
@staticmethod |
|
|
def visualize(site_ins_qc_results: pd.DataFrame, |
|
|
file_sets: list[str], |
|
|
file_qc_results: pd.DataFrame, |
|
|
qc_issues: list[str] |
|
|
) -> go.Figure: |
|
|
|
|
|
site_ins_coord_mapping = dict( |
|
|
zip(list(file_qc_results["Dataset"].unique()), list(range(len(file_qc_results["Dataset"].unique()))))) |
|
|
file_coord_mapping = dict( |
|
|
zip(file_qc_results["File"].unique(), list(range(len(file_qc_results["File"].unique()))))) |
|
|
|
|
|
fig = make_subplots(rows=1, cols=2, |
|
|
shared_yaxes=True) |
|
|
|
|
|
|
|
|
site_ins_plot_table = pd.melt(site_ins_qc_results, |
|
|
id_vars=[ |
|
|
"Dataset", "Site (anonymized)", "Instrument model"], |
|
|
value_vars=file_sets, |
|
|
var_name="File set", |
|
|
value_name="status") |
|
|
status_marker_mapping = {"Completed": "circle", "Incomplete": "x"} |
|
|
|
|
|
default_colors = colors.qualitative.Set1 |
|
|
status_marker_color_mapping = { |
|
|
"Completed": default_colors[1], "Incomplete": default_colors[0]} |
|
|
default_colors = [default_colors[i] |
|
|
for i in range(len(default_colors)) if i not in [0, 1]] |
|
|
for status in ["Completed", "Incomplete"]: |
|
|
sub_site_ins_plot_table = site_ins_plot_table[site_ins_plot_table["status"] == status] |
|
|
sub_site_ins_plot_table.loc[:, "Dataset"] = sub_site_ins_plot_table["Dataset"].map( |
|
|
site_ins_coord_mapping) |
|
|
if status == "Incomplete": |
|
|
for y in sub_site_ins_plot_table["Dataset"].unique(): |
|
|
fig.add_shape( |
|
|
type="line", |
|
|
y0=y, y1=y, |
|
|
x0=-0.5, x1=1.5, |
|
|
line=dict(color="red", width=1), |
|
|
layer="between", |
|
|
row=1, col=1 |
|
|
) |
|
|
fig.add_shape( |
|
|
type="line", |
|
|
y0=y, y1=y, |
|
|
x0=-2, x1=(len(file_qc_results["File"].unique())-1)+2, |
|
|
line=dict(color="red", width=1), |
|
|
layer="between", |
|
|
row=1, col=2 |
|
|
) |
|
|
if len(sub_site_ins_plot_table) == 0: |
|
|
fig.add_trace(go.Scatter(x=[None], |
|
|
y=[None], |
|
|
mode="markers", |
|
|
marker=dict(symbol=status_marker_mapping[status], |
|
|
color=status_marker_color_mapping[status], |
|
|
size=12, |
|
|
), |
|
|
name=f"File set {status.lower()}", |
|
|
visible="legendonly" |
|
|
), |
|
|
row=1, col=1) |
|
|
else: |
|
|
hover_info = [] |
|
|
for _, row in sub_site_ins_plot_table.iterrows(): |
|
|
hover_info.append( |
|
|
[row["Site (anonymized)"], row["Instrument model"], row["status"]]) |
|
|
fig.add_trace(go.Scatter(x=sub_site_ins_plot_table["File set"], |
|
|
y=sub_site_ins_plot_table["Dataset"], |
|
|
mode="markers", |
|
|
marker=dict(symbol=status_marker_mapping[status], |
|
|
color=status_marker_color_mapping[status], |
|
|
size=12, |
|
|
), |
|
|
name=f"File set {status.lower()}", |
|
|
customdata=hover_info, |
|
|
hovertemplate=("Dataset: %{y}<br>" + |
|
|
"Site (anonymized): %{customdata[0]}<br>" + |
|
|
"Instrument model: %{customdata[1]}<br>" + |
|
|
"File set: %{x}<br>" + |
|
|
"Status: %{customdata[2]}" + |
|
|
"<extra></extra>" |
|
|
) |
|
|
), |
|
|
row=1, col=1) |
|
|
fig.update_xaxes(title_text="File set", |
|
|
range=[ |
|
|
0-0.5, (len(site_ins_plot_table["File set"].unique())-1)+0.5], |
|
|
tickangle=90, |
|
|
gridcolor="lightgray", |
|
|
zeroline=False, |
|
|
showline=False, |
|
|
row=1, col=1) |
|
|
|
|
|
|
|
|
file_plot_table = file_qc_results[file_qc_results[qc_issues].any( |
|
|
axis=1)] |
|
|
clock_marker_index = 1 |
|
|
hover_information = pd.DataFrame( |
|
|
columns=["x", "y", "Dataset", "Site (anonymized)", "Instrument model", "File", "Issues"]) |
|
|
for issue_index, qc_issue in enumerate(qc_issues): |
|
|
sub_file_plot_table = file_plot_table[file_plot_table[qc_issue]] |
|
|
if len(sub_file_plot_table) == 0: |
|
|
fig.add_trace(go.Scatter(x=[None], |
|
|
y=[None], |
|
|
mode="lines", |
|
|
fill="toself", |
|
|
fillcolor=default_colors[issue_index], |
|
|
line=dict(color="black", width=0.5), |
|
|
name=qc_issue, |
|
|
visible="legendonly" |
|
|
), |
|
|
row=1, col=2) |
|
|
if qc_issue != "Missing file": |
|
|
clock_marker_index += 1 |
|
|
else: |
|
|
x_coords = [file_coord_mapping[file_code] |
|
|
for file_code in sub_file_plot_table["File"]] |
|
|
y_coords = [site_ins_coord_mapping[site_ins_code] |
|
|
for site_ins_code in sub_file_plot_table["Dataset"]] |
|
|
hover_information = pd.concat( |
|
|
[hover_information, pd.DataFrame({"x": x_coords, |
|
|
"y": y_coords, |
|
|
"Dataset": sub_file_plot_table["Dataset"], |
|
|
"Site (anonymized)": sub_file_plot_table["Site (anonymized)"], |
|
|
"Instrument model": sub_file_plot_table["Instrument model"], |
|
|
"File": sub_file_plot_table["File"], |
|
|
"Issues": qc_issue})]) |
|
|
if qc_issue == "Missing file": |
|
|
marker_coords = _CustomizedMarker().regular_polygon_coords(len(qc_issues)-1) |
|
|
else: |
|
|
marker_coords = _CustomizedMarker().clock_marker_coords( |
|
|
len(qc_issues)-1, clock_marker_index) |
|
|
clock_marker_index += 1 |
|
|
issue_marker_x_coords, issue_marker_y_coords = \ |
|
|
_CustomizedMarker().marker_to_scatter_line_coords(marker_coords, |
|
|
x_coords, |
|
|
y_coords) |
|
|
fig.add_trace(go.Scatter(x=issue_marker_x_coords, |
|
|
y=issue_marker_y_coords, |
|
|
mode="lines", |
|
|
fill="toself", |
|
|
fillcolor=default_colors[issue_index], |
|
|
line=dict(color="black", width=0.5), |
|
|
name=qc_issue, |
|
|
hoverinfo="skip" |
|
|
), |
|
|
row=1, col=2) |
|
|
|
|
|
qc_issues_woMissingFile = [ |
|
|
issue for issue in qc_issues if issue != "Missing file"] |
|
|
for issue_index, qc_issue in enumerate(qc_issues_woMissingFile): |
|
|
sub_file_plot_table = file_plot_table[(~file_plot_table[qc_issue]) & ( |
|
|
file_plot_table[[issue for issue in qc_issues_woMissingFile if issue != qc_issue]].any(axis=1))] |
|
|
x_coords = [file_coord_mapping[file_code] |
|
|
for file_code in sub_file_plot_table["File"]] |
|
|
y_coords = [site_ins_coord_mapping[site_ins_code] |
|
|
for site_ins_code in sub_file_plot_table["Dataset"]] |
|
|
marker_coords = _CustomizedMarker().clock_marker_coords( |
|
|
len(qc_issues)-1, issue_index+1) |
|
|
issue_marker_x_coords, issue_marker_y_coords = \ |
|
|
_CustomizedMarker().marker_to_scatter_line_coords(marker_coords, |
|
|
x_coords, |
|
|
y_coords) |
|
|
fig.add_trace(go.Scatter(x=issue_marker_x_coords, |
|
|
y=issue_marker_y_coords, |
|
|
mode="lines", |
|
|
fill="toself", |
|
|
fillcolor="rgba(0,0,0,0)", |
|
|
line=dict(color="black", width=0.5), |
|
|
showlegend=False, |
|
|
hoverinfo="skip" |
|
|
), |
|
|
row=1, col=2) |
|
|
pass |
|
|
|
|
|
hover_information = hover_information.groupby(["x", "y", "Dataset", "Site (anonymized)", "Instrument model", "File"], dropna=False)[ |
|
|
"Issues"].apply(lambda x: ", ".join(x.astype(str))).reset_index() |
|
|
|
|
|
marker_coords = _CustomizedMarker().regular_polygon_coords(len(qc_issues)-1) |
|
|
hover_marker_x_coords = [] |
|
|
hover_marker_y_coords = [] |
|
|
hover_marker_customdata = [] |
|
|
for row_index, row in hover_information.iterrows(): |
|
|
hover_marker_x_coords.extend([marker_coord[0] + row["x"] |
|
|
for marker_coord in marker_coords]) |
|
|
hover_marker_x_coords.append(None) |
|
|
hover_marker_y_coords.extend([marker_coord[1] + row["y"] |
|
|
for marker_coord in marker_coords]) |
|
|
hover_marker_y_coords.append(None) |
|
|
for marker_coord in marker_coords: |
|
|
hover_marker_customdata.append( |
|
|
[row["Dataset"], row["Site (anonymized)"], row["Instrument model"], row["File"], row["Issues"]]) |
|
|
hover_marker_customdata.append(None) |
|
|
fig.add_trace(go.Scatter(x=hover_marker_x_coords, |
|
|
y=hover_marker_y_coords, |
|
|
showlegend=False, |
|
|
mode="none", |
|
|
customdata=hover_marker_customdata, |
|
|
hovertemplate=("Dataset: %{customdata[0]}<br>" + |
|
|
"Site (anonymized): %{customdata[1]}<br>" + |
|
|
"Instrument model: %{customdata[2]}<br>" + |
|
|
"File: %{customdata[3]}<br>" + |
|
|
"Issues: %{customdata[4]}" + |
|
|
"<extra></extra>" |
|
|
) |
|
|
), |
|
|
row=1, col=2) |
|
|
|
|
|
fig.update_xaxes(title_text="File", |
|
|
range=[ |
|
|
0-2, (len(file_qc_results["File"].unique())-1)+2], |
|
|
tickvals=list(file_coord_mapping.values()), |
|
|
ticktext=list(file_coord_mapping.keys()), |
|
|
tickangle=90, |
|
|
gridcolor="lightgray", |
|
|
zeroline=False, |
|
|
showline=False, |
|
|
row=1, col=2) |
|
|
|
|
|
|
|
|
legend_fig = go.Figure() |
|
|
legend_list = ["Completed", "Incomplete"] + qc_issues |
|
|
for status_index, status in enumerate(["Completed", "Incomplete"]): |
|
|
legend_fig.add_trace(go.Scatter(x=[0], y=[status_index], |
|
|
marker=dict(symbol=status_marker_mapping[status], |
|
|
color=status_marker_color_mapping[status], |
|
|
size=12, |
|
|
), |
|
|
hoverinfo="skip")) |
|
|
legend_fig.add_annotation(x=0, y=status_index, |
|
|
text=status, |
|
|
xanchor="left", |
|
|
yanchor="middle", |
|
|
showarrow=False, |
|
|
xshift=15) |
|
|
clock_marker_count = 1 |
|
|
indexes = [i for i in range(len(legend_list)) if legend_list[i] not in [ |
|
|
"Completed", "Incomplete", "Missing file"]] |
|
|
for issue_index, qc_issue in enumerate(qc_issues): |
|
|
if qc_issue == "Missing file": |
|
|
marker_coords = _CustomizedMarker().regular_polygon_coords(len(qc_issues)-1) |
|
|
else: |
|
|
marker_coords = _CustomizedMarker().clock_marker_coords(len(qc_issues)-1, |
|
|
clock_marker_count) |
|
|
clock_marker_count += 1 |
|
|
|
|
|
x, y = _CustomizedMarker().marker_to_scatter_line_coords(marker_coords, |
|
|
[0], [legend_list.index(qc_issue)]) |
|
|
legend_fig.add_trace(go.Scatter(x=x, y=y, |
|
|
mode="lines", |
|
|
fill="toself", |
|
|
fillcolor=default_colors[issue_index], |
|
|
line=dict( |
|
|
color="black", width=0.5), |
|
|
hoverinfo="skip")) |
|
|
if qc_issue != "Miising file": |
|
|
x, y = _CustomizedMarker().marker_to_scatter_line_coords(marker_coords, |
|
|
[0] * |
|
|
(len( |
|
|
indexes)-1), |
|
|
[i for i in indexes |
|
|
if i != issue_index+2]) |
|
|
legend_fig.add_trace(go.Scatter(x=x, y=y, |
|
|
mode="lines", |
|
|
fill="toself", |
|
|
fillcolor="rgba(0,0,0,0)", |
|
|
line=dict( |
|
|
color="black", width=0.5), |
|
|
hoverinfo="skip")) |
|
|
legend_fig.add_annotation(x=0, y=issue_index+2, |
|
|
text=qc_issue, |
|
|
xanchor="left", |
|
|
yanchor="middle", |
|
|
showarrow=False, |
|
|
xshift=15) |
|
|
legend_fig.update_xaxes(visible=False, |
|
|
range=[0-1, 0+7]) |
|
|
legend_fig.update_yaxes(visible=False, |
|
|
autorange="reversed") |
|
|
legend_fig_aspect = [25*8, 25*(2+len(qc_issues)+2)] |
|
|
legend_fig.update_layout(width=legend_fig_aspect[0], |
|
|
height=legend_fig_aspect[1], |
|
|
title="Marker Legend", |
|
|
margin=dict(l=0, r=0, t=50, b=0), |
|
|
showlegend=False, |
|
|
plot_bgcolor="white") |
|
|
|
|
|
image_bytes = legend_fig.to_image(format="png", scale=2) |
|
|
base64_image_string = base64.b64encode(image_bytes).decode("utf-8") |
|
|
image_data_uri = f"data:image/png;base64,{base64_image_string}" |
|
|
|
|
|
file_counts = len(file_qc_results["File"].unique()) |
|
|
|
|
|
fig.update_yaxes(title_text="Dataset", |
|
|
showticklabels=True, |
|
|
range=[0-2, (len(site_ins_coord_mapping)-1)+2], |
|
|
tickvals=list(site_ins_coord_mapping.values()), |
|
|
ticktext=list(site_ins_coord_mapping.keys()), |
|
|
gridcolor="lightgray", |
|
|
zeroline=False, |
|
|
showline=False) |
|
|
|
|
|
def _normalize(values: list, value_range: list): |
|
|
return [(value-min(value_range))/(max(value_range)-min(value_range)) for value in values] |
|
|
|
|
|
margin_left = 200 |
|
|
subplot1_width = 85 |
|
|
space12 = 200 |
|
|
subplot2_width = 25*(file_counts+4) |
|
|
margin_right = 250 |
|
|
|
|
|
center_plots_width = subplot1_width + space12 + subplot2_width |
|
|
figure_width = margin_left + center_plots_width + margin_right |
|
|
|
|
|
subplot1_x_domain = _normalize([0, |
|
|
subplot1_width], |
|
|
[0, center_plots_width]) |
|
|
subplot2_x_domain = _normalize([subplot1_width + space12, |
|
|
subplot1_width + space12 + subplot2_width], |
|
|
[0, center_plots_width]) |
|
|
|
|
|
margin_top = 30 |
|
|
center_plots_height = subplot12_height = 25 * \ |
|
|
(len(site_ins_coord_mapping)+4) |
|
|
margin_bottom = 300 |
|
|
figure_height = margin_top + subplot12_height + margin_bottom |
|
|
|
|
|
legend_image_x = (center_plots_width + 20) / center_plots_width |
|
|
legend_image_sizex = legend_fig_aspect[0]*1.2 |
|
|
legend_image_sizey = legend_image_sizex*(legend_fig_aspect[1]/legend_fig_aspect[0]) |
|
|
fig.add_layout_image(dict(source=image_data_uri, |
|
|
xref="paper", yref="paper", |
|
|
x=legend_image_x, y=0.98, |
|
|
sizex=legend_image_sizex/center_plots_width, |
|
|
sizey=legend_image_sizey/center_plots_height, |
|
|
sizing="contain", |
|
|
xanchor="left", yanchor="top", |
|
|
layer="below" |
|
|
)) |
|
|
|
|
|
fig.add_annotation(x=np.mean(subplot1_x_domain), y=1, yshift=5, |
|
|
xref="paper", yref="paper", |
|
|
text="Dataset QC", |
|
|
showarrow=False, |
|
|
xanchor="center", |
|
|
yanchor="bottom", |
|
|
font=dict(size=16)) |
|
|
fig.add_annotation(x=np.mean(subplot2_x_domain), y=1, yshift=5, |
|
|
xref="paper", yref="paper", |
|
|
text="File QC", |
|
|
showarrow=False, |
|
|
xanchor="center", |
|
|
yanchor="bottom", |
|
|
font=dict(size=16)) |
|
|
|
|
|
fig.update_layout(height=figure_height, width=figure_width, |
|
|
xaxis=dict(domain=subplot1_x_domain), |
|
|
xaxis2=dict(domain=subplot2_x_domain), |
|
|
yaxis=dict(automargin=False), |
|
|
margin=dict( |
|
|
t=margin_top, b=margin_bottom, l=margin_left, r=margin_right), |
|
|
autosize=False, |
|
|
showlegend=False, |
|
|
hoverlabel=dict(bgcolor="white", |
|
|
font_color="black", |
|
|
align="left"), |
|
|
plot_bgcolor="white" |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
class AnalysisVisualizer: |
|
|
def visualize_barplot(analyzed_gating_results: pd.DataFrame, |
|
|
visualized_results: list[str] |
|
|
) -> go.Figure: |
|
|
fig = make_subplots(rows=1, cols=len(visualized_results), |
|
|
horizontal_spacing=0.4/len(visualized_results), |
|
|
subplot_titles=visualized_results |
|
|
) |
|
|
|
|
|
dataset_counts = [] |
|
|
for result_index, visualized_result in enumerate(visualized_results): |
|
|
result_index += 1 |
|
|
|
|
|
df = analyzed_gating_results.copy() |
|
|
|
|
|
df[f"{visualized_result}_count"] = df[f"{visualized_result}_count"].astype( |
|
|
int) |
|
|
df["Dataset_wCount"] = df["Dataset"].astype( |
|
|
str) + " (" + df[f"{visualized_result}_count"].astype(str) + ")" |
|
|
dataset_wCount = df["Dataset_wCount"].to_list() |
|
|
dataset_wCount_mapping = dict( |
|
|
zip(dataset_wCount[::-1], list(range(len(dataset_wCount))))) |
|
|
df["Dataset_wCount_index"] = df["Dataset_wCount"].map( |
|
|
dataset_wCount_mapping) |
|
|
|
|
|
if (df[[f"{visualized_result}_mean", f"{visualized_result}_std"]] == "Not reportable").all(axis=None): |
|
|
fig.add_annotation(x=0, y=np.percentile(list(dataset_wCount_mapping.values()), 50), |
|
|
text="Not reportable", |
|
|
xanchor="center", |
|
|
yanchor="middle", |
|
|
font=dict(size=24), |
|
|
showarrow=False, |
|
|
xshift=0, |
|
|
row=1, col=result_index) |
|
|
fig.update_xaxes(range=[-1, 1], |
|
|
showticklabels=False, |
|
|
row=1, col=result_index) |
|
|
else: |
|
|
df = df[df[f"{visualized_result}_count"] |
|
|
!= 0].reset_index(drop=True) |
|
|
|
|
|
df[f"{visualized_result}_std"] = df[f"{visualized_result}_std"].replace({ |
|
|
"Only one data": None}) |
|
|
|
|
|
for analysis in ["mean", "std"]: |
|
|
df[f"{visualized_result}_{analysis}"] = df[f"{visualized_result}_{analysis}"].astype( |
|
|
float).round(2) |
|
|
|
|
|
statistic = {} |
|
|
statistic["Q1"] = df[f"{visualized_result}_mean"].quantile( |
|
|
0.25) |
|
|
statistic["Q2"] = df[f"{visualized_result}_mean"].quantile(0.5) |
|
|
statistic["Q3"] = df[f"{visualized_result}_mean"].quantile( |
|
|
0.75) |
|
|
statistic["IQR"] = statistic["Q3"] - statistic["Q1"] |
|
|
statistic["Lower fence"] = statistic["Q1"] - \ |
|
|
1.5*statistic["IQR"] |
|
|
statistic["Upper fence"] = statistic["Q3"] + \ |
|
|
1.5*statistic["IQR"] |
|
|
statistic["Extreme lower fence"] = statistic["Q1"] - \ |
|
|
3*statistic["IQR"] |
|
|
statistic["Extreme upper fence"] = statistic["Q3"] + \ |
|
|
3*statistic["IQR"] |
|
|
for index in df.index.to_list(): |
|
|
if df.loc[index, f"{visualized_result}_mean"] >= statistic["Lower fence"] and \ |
|
|
df.loc[index, f"{visualized_result}_mean"] <= statistic["Upper fence"]: |
|
|
df.loc[index, "distribution"] = "Normal" |
|
|
df.loc[index, "bar_color"] = "#636EFA" |
|
|
elif df.loc[index, f"{visualized_result}_mean"] >= statistic["Extreme lower fence"] and \ |
|
|
df.loc[index, f"{visualized_result}_mean"] <= statistic["Extreme upper fence"]: |
|
|
df.loc[index, "distribution"] = "Outlier" |
|
|
df.loc[index, "bar_color"] = "#FFA15A" |
|
|
else: |
|
|
df.loc[index, "distribution"] = "Extreme outlier" |
|
|
df.loc[index, "bar_color"] = "#EF553B" |
|
|
|
|
|
if visualized_result == "Cell population (%)": |
|
|
xmin = 0 |
|
|
xmax = 100 |
|
|
else: |
|
|
xmin = df[f"{visualized_result}_mean"].min() |
|
|
xmax = df[f"{visualized_result}_mean"].max() |
|
|
xrange = [xmin-0.02*(xmax-xmin), xmax+0.02*(xmax-xmin)] |
|
|
|
|
|
for distribution in ["Normal", "Outlier", "Extreme outlier"]: |
|
|
sub_df = df[ |
|
|
df["distribution"] == distribution] |
|
|
hover_info = [[row["Dataset"], row["Site (anonymized)"], row["Instrument model"], |
|
|
row[f"{visualized_result}_count"], row[f"{visualized_result}_mean"], row[f"{visualized_result}_std"], |
|
|
row["distribution"]] |
|
|
for row_index, row in sub_df.iterrows()] |
|
|
fig.add_trace(go.Bar(x=sub_df[f"{visualized_result}_mean"], |
|
|
y=sub_df["Dataset_wCount_index"], |
|
|
base=[xrange[0]] * len(sub_df), |
|
|
orientation="h", |
|
|
error_x=dict(type="data", |
|
|
array=sub_df[f"{visualized_result}_std"], |
|
|
visible=True), |
|
|
marker_color=sub_df["bar_color"], |
|
|
width=0.5, |
|
|
name=distribution, |
|
|
customdata=hover_info, |
|
|
hovertemplate=("Dataset: %{customdata[0]}<br>" + |
|
|
"Site (anonymized): %{customdata[1]}<br>" + |
|
|
"Instrument model: %{customdata[2]}<br>" + |
|
|
"Data count: %{customdata[3]}<br>" + |
|
|
"Mean (bar): %{customdata[4]}<br>" + |
|
|
"STD (error bar): %{customdata[5]}<br>" + |
|
|
"Distribution: %{customdata[6]}" + |
|
|
"<extra></extra>" |
|
|
)), |
|
|
row=1, col=result_index) |
|
|
|
|
|
for statistic_key, statistic_value in statistic.items(): |
|
|
if statistic_key != "IQR" and (statistic_value >= xrange[0] and statistic_value <= xrange[1]): |
|
|
if statistic_key in ["Q1", "Q2", "Q3"]: |
|
|
line_color = "blue" |
|
|
elif statistic_key in ["Lower fence", "Upper fence"]: |
|
|
line_color = "orange" |
|
|
elif statistic_key in ["Extreme lower fence", "Extreme upper fence"]: |
|
|
line_color = "red" |
|
|
fig.add_trace(go.Scatter(x=[statistic_value]*(len(dataset_wCount_mapping)+2), |
|
|
y=[min(list(dataset_wCount_mapping.values()))-0.5] + |
|
|
list(dataset_wCount_mapping.values()) + |
|
|
[max( |
|
|
list(dataset_wCount_mapping.values()))+0.5], |
|
|
mode="lines", |
|
|
line=dict( |
|
|
color=line_color, width=2), |
|
|
showlegend=False, |
|
|
hoverinfo="text", |
|
|
hovertext=f"{statistic_key} ({statistic_value})" |
|
|
), |
|
|
row=1, col=result_index |
|
|
) |
|
|
|
|
|
fig.update_xaxes(title_text=visualized_result, |
|
|
range=xrange, |
|
|
automargin=False, |
|
|
row=1, col=result_index) |
|
|
|
|
|
fig.update_yaxes(title_text="Dataset (Result counts)", |
|
|
range=[min(list(dataset_wCount_mapping.values()))-1, |
|
|
max(list(dataset_wCount_mapping.values()))+1], |
|
|
tickvals=list( |
|
|
dataset_wCount_mapping.values()), |
|
|
ticktext=list(dataset_wCount_mapping.keys()), |
|
|
autorange=False, |
|
|
automargin=False, |
|
|
row=1, col=result_index) |
|
|
|
|
|
dataset_counts.append(len(dataset_wCount_mapping)) |
|
|
|
|
|
margin_top = 30 |
|
|
margin_bottom = 50 |
|
|
margin_left = 200 |
|
|
plot_height = 40 * max(dataset_counts) |
|
|
fig.update_layout(height=max([margin_top + plot_height + margin_bottom, |
|
|
250]), |
|
|
width=800*len(visualized_results), |
|
|
margin=dict(t=margin_top, b=margin_bottom, l=margin_left)) |
|
|
|
|
|
return fig |
|
|
|
|
|
def visualize_heatmap(analyzed_gating_results: pd.DataFrame, |
|
|
visualized_results: list[str] |
|
|
) -> go.Figure: |
|
|
|
|
|
space_between_ratio = 0.4/(len(visualized_results)) |
|
|
fig = make_subplots(rows=1, cols=len(visualized_results), |
|
|
subplot_titles=visualized_results, |
|
|
horizontal_spacing=space_between_ratio, |
|
|
shared_xaxes=True, shared_yaxes=True |
|
|
) |
|
|
|
|
|
for result_index, visualized_result in enumerate(visualized_results): |
|
|
result_index += 1 |
|
|
|
|
|
std_table = analyzed_gating_results.pivot( |
|
|
index=["Dataset", "Site (anonymized)", "Instrument model"], |
|
|
columns="Result ID", values=f"{visualized_result}_std") |
|
|
|
|
|
count_table = analyzed_gating_results.pivot( |
|
|
index=["Dataset", "Site (anonymized)", "Instrument model"], |
|
|
columns="Result ID", values=f"{visualized_result}_count") |
|
|
|
|
|
std_table = std_table[sorted( |
|
|
std_table.columns.to_list(), key=lambda s: int(s.split(" ")[0]))] |
|
|
count_table = count_table[sorted( |
|
|
count_table.columns.to_list(), key=lambda s: int(s.split(" ")[0]))] |
|
|
|
|
|
numeric_values = pd.to_numeric( |
|
|
std_table.values.flatten(), errors="coerce") |
|
|
numeric_values = numeric_values[~np.isnan(numeric_values)] |
|
|
|
|
|
def scientific_anno(x): |
|
|
try: |
|
|
x = float(x) |
|
|
if x >= 1000: |
|
|
return f"{x:.1e}" |
|
|
else: |
|
|
return f"{round(x, 2)}" |
|
|
except: |
|
|
return x |
|
|
|
|
|
annotation_table = pd.DataFrame() |
|
|
for col in std_table.columns: |
|
|
annotation_table[col] = std_table[col].apply( |
|
|
scientific_anno) |
|
|
annotation_table = annotation_table.replace({"Not reportable": "Not<br>reportable", |
|
|
"Only one data": "Only<br>one data"}) |
|
|
|
|
|
hover_info = [[[row["Site (anonymized)"], row["Instrument model"], count_table.iloc[row_index, col_index]] |
|
|
for col_index, col_key in enumerate(std_table.columns.to_list())] |
|
|
for row_index, row in std_table.reset_index().iterrows()] |
|
|
subplot_ratio = ( |
|
|
1 - space_between_ratio * (len(visualized_results)-1))/len(visualized_results) |
|
|
|
|
|
if len(numeric_values) > 0: |
|
|
fig.add_trace(go.Heatmap(z=std_table.values, |
|
|
text=annotation_table.values, |
|
|
texttemplate="%{text}", |
|
|
x=std_table.columns, |
|
|
y=std_table.reset_index()["Dataset"], |
|
|
xgap=2, ygap=2, |
|
|
colorbar_x=( |
|
|
(subplot_ratio+space_between_ratio)*result_index - space_between_ratio*(8/9)), |
|
|
colorscale="Magma", |
|
|
zauto=False, |
|
|
zmin=np.percentile(numeric_values, 2), |
|
|
zmax=np.percentile( |
|
|
numeric_values, 98), |
|
|
customdata=hover_info, |
|
|
hovertemplate=("Dataset: %{y}<br>" + |
|
|
"Site (anonymized): %{customdata[0]}<br>" + |
|
|
"Instrument model: %{customdata[1]}<br>" + |
|
|
"Result ID: %{x}<br>" + |
|
|
"Data count: %{customdata[2]}<br>" + |
|
|
"STD value: %{z}" + |
|
|
"<extra></extra>" |
|
|
) |
|
|
), |
|
|
row=1, col=result_index) |
|
|
else: |
|
|
fig.add_trace(go.Heatmap(z=pd.DataFrame(0.5, index=std_table.index, columns=std_table.columns), |
|
|
text=annotation_table.values, |
|
|
texttemplate="%{text}", |
|
|
x=std_table.columns, |
|
|
y=std_table.reset_index()["Dataset"], |
|
|
xgap=2, ygap=2, |
|
|
showscale=False, |
|
|
colorscale=[ |
|
|
[0, "blue"], [0.5, "rgba(0,0,0,0)"], [1, "red"]], |
|
|
zauto=False, zmin=0, zmax=1, |
|
|
customdata=hover_info, |
|
|
hovertemplate=("Dataset: %{y}<br>" + |
|
|
"Site (anonymized): %{customdata[0]}<br>" + |
|
|
"Instrument model: %{customdata[1]}<br>" + |
|
|
"Result ID: %{x}<br>" + |
|
|
"Data count: %{customdata[2]}<br>" + |
|
|
"STD value: %{z}" + |
|
|
"<extra></extra>" |
|
|
) |
|
|
), |
|
|
row=1, col=result_index) |
|
|
|
|
|
|
|
|
fig.update_xaxes(title_text="Result ID", |
|
|
automargin=False, |
|
|
showticklabels=True, |
|
|
type="category", |
|
|
tickmode="array", |
|
|
tickvals=std_table.columns.to_list(), |
|
|
side="top" |
|
|
) |
|
|
|
|
|
for col_index in range(len(visualized_results)): |
|
|
col_index += 1 |
|
|
fig.layout[f"xaxis{len(visualized_results) + col_index}"] = { |
|
|
"title_text": None, |
|
|
"automargin": False, |
|
|
"showticklabels": True, |
|
|
"type": "category", |
|
|
"tickmode": "array", |
|
|
"tickvals": std_table.columns.to_list(), |
|
|
"mirror": "allticks", |
|
|
"overlaying": f"x{col_index}", |
|
|
"anchor": f"y{col_index}", |
|
|
"side": "top" |
|
|
} |
|
|
|
|
|
fig.update_yaxes(title_text="Dataset", |
|
|
autorange="reversed", |
|
|
automargin=False, |
|
|
showticklabels=True, |
|
|
type="category", |
|
|
tickmode="array", |
|
|
tickvals=std_table.reset_index()["Dataset"].to_list()) |
|
|
|
|
|
margin_top = 200 |
|
|
margin_bottom = 30 |
|
|
margin_left = 200 |
|
|
margin_right = 30 |
|
|
plot_height = 50 * len(std_table.reset_index()["Dataset"].to_list()) |
|
|
plot_width = 900*len(visualized_results) |
|
|
fig.update_layout(height=max([margin_top + plot_height + margin_bottom, |
|
|
200]), |
|
|
width=plot_width, |
|
|
margin=dict(t=margin_top, b=margin_bottom, l=margin_left, r=margin_right)) |
|
|
|
|
|
for annotation in fig.layout.annotations: |
|
|
annotation.xshift = -400 |
|
|
annotation.yshift = 5 |
|
|
|
|
|
return fig |
|
|
|