wenjun99 commited on
Commit
177e320
Β·
verified Β·
1 Parent(s): f532ad3

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +224 -188
src/app.py CHANGED
@@ -10,7 +10,6 @@ import matplotlib
10
  import matplotlib.pyplot as plt
11
  import matplotlib.colors as mcolors
12
  from matplotlib.ticker import MultipleLocator
13
-
14
  from scipy.stats import gaussian_kde
15
  from PIL import Image
16
 
@@ -310,7 +309,7 @@ with tab1:
310
  index=0,
311
  key="enc_scheme",
312
  help=(
313
- "**6-bit LNS** – Custom 57-character table (A-Z, 0-9, punctuation). 6 bits/char.\n\n"
314
  "**Base64 (6-bit)** – Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
315
  "**ASCII (7-bit)** – Standard 7-bit ASCII. 7 bits/char.\n\n"
316
  "**UTF-8 (8-bit)** – Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
@@ -391,7 +390,7 @@ with tab1:
391
  columns = [f"Position {i+1}" for i in range(group_size)]
392
  df = pd.DataFrame(groups, columns=columns)
393
  df.insert(0, "Sample", range(1, len(df) + 1))
394
- st.dataframe(df, width="stretch")
395
 
396
  st.download_button(
397
  "⬇️ Download as CSV",
@@ -1015,213 +1014,250 @@ with tab2:
1015
  # --------------------------------------------------
1016
  with tab3:
1017
  st.header("πŸ“Š Data Analytics")
1018
- st.markdown("""
1019
- Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
1020
- The file should contain Reactions as rows and position columns with editing values.
1021
- This tab provides visualizations **before** any binary labelling.
1022
- """)
1023
 
1024
- analytics_uploaded = st.file_uploader(
1025
- "πŸ“€ Upload data file",
1026
- type=["xlsx", "csv"],
1027
- key="analytics_uploader"
1028
  )
1029
 
1030
- if analytics_uploaded is not None:
1031
- try:
1032
- if analytics_uploaded.name.endswith(".xlsx"):
1033
- adf = pd.read_excel(analytics_uploaded)
1034
- else:
1035
- adf = pd.read_csv(analytics_uploaded)
 
1036
 
1037
- st.success(f"βœ… Loaded file with {len(adf)} rows and {len(adf.columns)} columns")
1038
- adf.columns = [str(c).strip() for c in adf.columns]
 
 
 
1039
 
1040
- non_pos_keywords = {"sample", "description", "descritpion", "total edited",
1041
- 'volume per "1"', "volume per 1", "id", "name"}
1042
- position_cols = [c for c in adf.columns
1043
- if c.lower() not in non_pos_keywords
1044
- and pd.to_numeric(adf[c], errors="coerce").notna().any()]
 
1045
 
1046
- def pos_sort_key(col_name: str):
1047
- m = re.search(r"(\d+)", col_name)
1048
- return int(m.group(1)) if m else 10**9
1049
- position_cols = sorted(position_cols, key=pos_sort_key)
1050
 
1051
- if not position_cols:
1052
- st.error("No numeric position columns detected.")
1053
- st.stop()
 
 
1054
 
1055
- st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** Reactions.")
 
 
 
1056
 
1057
- pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
 
 
1058
 
1059
- if "Total edited" in adf.columns:
1060
- total_edited = pd.to_numeric(adf["Total edited"], errors="coerce").fillna(0.0)
1061
- else:
1062
- total_edited = pos_data.sum(axis=1)
1063
 
1064
- st.markdown("### 1️⃣ Raw Data Distribution")
1065
- st.caption("Visualize editing values across all positions and Reactions - before any binary labelling.")
1066
 
1067
- transform_option = st.selectbox(
1068
- "Value transformation:",
1069
- ["Raw (linear)", "ln(1+x)", "ln(ln(1+x))"],
1070
- index=0,
1071
- key="transform_select",
1072
- help=(
1073
- "**Raw** - No transformation.\n\n"
1074
- "**ln(1+x)** - `ln(1 + x)`. Compresses high values, spreads low range.\n\n"
1075
- "**ln(ln(1+x))** - Double ln. Even stronger compression.\n\n"
 
 
 
 
 
 
 
 
 
1076
  )
1077
- )
1078
 
1079
- # def robust_pos_normalize_log1p(data: pd.DataFrame) -> pd.DataFrame:
1080
- # logged = np.log1p(data)
1081
- # result = logged.copy()
1082
- # for col in result.columns:
1083
- # med = result[col].median()
1084
- # q75, q25 = result[col].quantile(0.75), result[col].quantile(0.25)
1085
- # iqr = q75 - q25
1086
- # if iqr > 0:
1087
- # result[col] = (result[col] - med) / iqr
1088
- # else:
1089
- # result[col] = result[col] - med
1090
- # return result
1091
-
1092
- if transform_option == "ln(1+x)":
1093
- transformed = np.log1p(pos_data)
1094
- value_label = "Editing Value (ln(1+x))"
1095
- transform_tag = "ln(1+x)"
1096
- elif transform_option == "ln(ln(1+x))":
1097
- transformed = np.log1p(np.log1p(pos_data))
1098
- value_label = "Editing Value (ln(ln(1+x)))"
1099
- transform_tag = "ln(ln(1+x))"
1100
- # elif transform_option == "log1p β†’ pos. norm.":
1101
- # transformed = robust_pos_normalize_log1p(pos_data)
1102
- # value_label = "Editing Value (log1p β†’ pos. norm.)"
1103
- # transform_tag = "log1p_posnorm"
1104
- else:
1105
- transformed = pos_data
1106
- value_label = "Editing Value"
1107
- transform_tag = "raw"
1108
 
1109
- melted = transformed.melt(var_name="Position", value_name="Value")
1110
- melted["Position_idx"] = melted["Position"].apply(
1111
- lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
1112
- )
1113
 
 
1114
 
1115
- st.markdown("#### πŸ“Š Histogram - All Values")
1116
 
1117
- n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
 
1118
 
1119
- matplotlib.rcParams["font.family"] = "Arial"
1120
- matplotlib.rcParams["font.sans-serif"] = ["Arial", "DejaVu Sans", "Liberation Sans", "sans-serif"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
 
1122
- fig2, ax2 = plt.subplots(figsize=(7, 5))
1123
- all_vals = melted["Value"].values
1124
- ax2.hist(all_vals, bins=n_bins, color="#808080", alpha=0.8, label="All")
1125
- ax2.set_xlim(left=0.0)
1126
- val_range = np.nanmax(all_vals) - np.nanmin(all_vals)
1127
- if val_range <= 5:
1128
- major_step, minor_step = 0.5, 0.1
1129
- elif val_range <= 20:
1130
- major_step, minor_step = 2, 0.5
1131
- elif val_range <= 50:
1132
- major_step, minor_step = 5, 1
1133
- else:
1134
- major_step, minor_step = 10, 2
1135
- ax2.xaxis.set_major_locator(MultipleLocator(major_step))
1136
- ax2.xaxis.set_minor_locator(MultipleLocator(minor_step))
1137
- ax2.tick_params(axis="x", which="major", length=6)
1138
- ax2.tick_params(axis="x", which="minor", length=3)
1139
- ax2.legend(fontsize=8)
1140
- ax2.set_title(f"Raw Values Distribution ({transform_tag})")
1141
- ax2.set_xlabel(value_label)
1142
- ax2.set_ylabel("Counts")
1143
-
1144
- # Apply axis styling
1145
- for attr in [ax2.xaxis.label, ax2.yaxis.label, ax2.title]:
1146
- attr.set_fontsize(12)
1147
- attr.set_fontweight("bold")
1148
- attr.set_fontfamily("Arial")
1149
- for spine in ax2.spines.values():
1150
- spine.set_linewidth(1.5)
1151
- ax2.tick_params(axis="both", which="both", width=1.2, labelsize=10)
1152
- ax2.spines['top'].set_visible(False)
1153
- ax2.spines['right'].set_visible(False)
1154
- for ticklab in ax2.get_xticklabels() + ax2.get_yticklabels():
1155
- ticklab.set_fontfamily("Arial")
1156
- ticklab.set_fontweight("normal")
1157
-
1158
- fig2.tight_layout()
1159
- st.pyplot(fig2)
1160
-
1161
- # st.markdown("#### πŸ“Š Histogram - All Values")
1162
-
1163
- # n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
1164
-
1165
- # fig2, ax2 = plt.subplots(figsize=(10, 4))
1166
- # ax2.hist(melted["Value"].values, bins=n_bins, color="#4F46E5", edgecolor="white", linewidth=0.3)
1167
- # ax2.set_xlabel(value_label)
1168
- # ax2.set_ylabel("Count")
1169
- # ax2.set_title(f"Raw Values Distribution ({transform_tag})")
1170
- # val_min = melted["Value"].min()
1171
- # val_max = melted["Value"].max()
1172
- # val_range = val_max - val_min
1173
- # if val_range <= 2:
1174
- # tick_step = 0.1
1175
- # elif val_range <= 6:
1176
- # tick_step = 0.2
1177
- # elif val_range <= 20:
1178
- # tick_step = 1
1179
- # else:
1180
- # tick_step = 5
1181
- # ax2.set_xticks(np.arange(np.floor(val_min / tick_step) * tick_step,
1182
- # val_max + tick_step, tick_step))
1183
- # ax2.tick_params(axis='x', labelsize=8, rotation=45)
1184
- # ax2.grid(axis='y', alpha=0.3)
1185
- # fig2.tight_layout()
1186
- # st.pyplot(fig2)
1187
-
1188
- st.markdown("#### 2️⃣ Density Scatter Plot")
1189
- st.caption("Each dot = one measurement (sample Γ— position). Color = local point density.")
1190
-
1191
- x_vals = melted["Position_idx"].values.astype(float)
1192
- y_vals = melted["Value"].values.astype(float)
1193
-
1194
- x_jittered = x_vals + np.random.default_rng(42).uniform(-0.3, 0.3, size=len(x_vals))
1195
-
1196
- with st.spinner("Computing point density..."):
1197
- try:
1198
- xy = np.vstack([x_jittered, y_vals])
1199
- density = gaussian_kde(xy)(xy)
1200
- except np.linalg.LinAlgError:
1201
- density = np.ones(len(x_vals))
1202
-
1203
- sort_idx = density.argsort()
1204
- x_plot = x_jittered[sort_idx]
1205
- y_plot = y_vals[sort_idx]
1206
- d_plot = density[sort_idx]
1207
-
1208
- fig3, ax3 = plt.subplots(figsize=(12, 6))
1209
- scatter = ax3.scatter(x_plot, y_plot, c=d_plot, cmap="jet", s=8, alpha=0.7, edgecolors="none")
1210
- cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
1211
- ax3.set_xlabel("Position")
1212
- ax3.set_ylabel(value_label)
1213
- ax3.set_title(f"Density Scatter - {value_label} by Position")
1214
- ax3.set_xticks(sorted(melted["Position_idx"].unique()))
1215
- ax3.grid(alpha=0.2)
1216
- fig3.tight_layout()
1217
- st.pyplot(fig3)
1218
 
1219
- except Exception as e:
1220
- st.error(f"❌ Error processing file: {e}")
1221
- import traceback
1222
- st.code(traceback.format_exc())
1223
  else:
1224
- st.info("πŸ‘† Upload a data file (CSV or Excel) to start exploring.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1225
 
1226
  # --------------------------------------------------
1227
  # TAB 4: Pipetting Command Generator
 
10
  import matplotlib.pyplot as plt
11
  import matplotlib.colors as mcolors
12
  from matplotlib.ticker import MultipleLocator
 
13
  from scipy.stats import gaussian_kde
14
  from PIL import Image
15
 
 
309
  index=0,
310
  key="enc_scheme",
311
  help=(
312
+ "**6-bit LNS** – Custom 56-character table (A-Z, 0-9, punctuation). 6 bits/char.\n\n"
313
  "**Base64 (6-bit)** – Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
314
  "**ASCII (7-bit)** – Standard 7-bit ASCII. 7 bits/char.\n\n"
315
  "**UTF-8 (8-bit)** – Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
 
390
  columns = [f"Position {i+1}" for i in range(group_size)]
391
  df = pd.DataFrame(groups, columns=columns)
392
  df.insert(0, "Sample", range(1, len(df) + 1))
393
+ st.dataframe(df, hide_index=True, width="stretch")
394
 
395
  st.download_button(
396
  "⬇️ Download as CSV",
 
1014
  # --------------------------------------------------
1015
  with tab3:
1016
  st.header("πŸ“Š Data Analytics")
 
 
 
 
 
1017
 
1018
+ section_choice = st.radio(
1019
+ "Select analysis type:",
1020
+ ["Experimental Editing Values", "Binary Values"],
1021
+ horizontal=True
1022
  )
1023
 
1024
+ # ── Section 1: Experimental Editing Values ──
1025
+ if section_choice == "Experimental Editing Values":
1026
+ st.markdown("""
1027
+ Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
1028
+ The file should contain Reactions as rows and position columns with editing values.
1029
+ This section provides visualizations **before** any binary labelling.
1030
+ """)
1031
 
1032
+ analytics_uploaded = st.file_uploader(
1033
+ "πŸ“€ Upload data file",
1034
+ type=["xlsx", "csv"],
1035
+ key="analytics_uploader"
1036
+ )
1037
 
1038
+ if analytics_uploaded is not None:
1039
+ try:
1040
+ if analytics_uploaded.name.endswith(".xlsx"):
1041
+ adf = pd.read_excel(analytics_uploaded)
1042
+ else:
1043
+ adf = pd.read_csv(analytics_uploaded)
1044
 
1045
+ st.success(f"βœ… Loaded file with {len(adf)} rows and {len(adf.columns)} columns")
1046
+ adf.columns = [str(c).strip() for c in adf.columns]
 
 
1047
 
1048
+ non_pos_keywords = {"sample", "description", "descritpion", "total edited",
1049
+ 'volume per "1"', "volume per 1", "id", "name"}
1050
+ position_cols = [c for c in adf.columns
1051
+ if c.lower() not in non_pos_keywords
1052
+ and pd.to_numeric(adf[c], errors="coerce").notna().any()]
1053
 
1054
+ def pos_sort_key(col_name: str):
1055
+ m = re.search(r"(\d+)", col_name)
1056
+ return int(m.group(1)) if m else 10**9
1057
+ position_cols = sorted(position_cols, key=pos_sort_key)
1058
 
1059
+ if not position_cols:
1060
+ st.error("No numeric position columns detected.")
1061
+ st.stop()
1062
 
1063
+ st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** Reactions.")
 
 
 
1064
 
1065
+ pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
 
1066
 
1067
+ if "Total edited" in adf.columns:
1068
+ total_edited = pd.to_numeric(adf["Total edited"], errors="coerce").fillna(0.0)
1069
+ else:
1070
+ total_edited = pos_data.sum(axis=1)
1071
+
1072
+ st.markdown("### 1️⃣ Raw Data Distribution")
1073
+ st.caption("Visualize editing values across all positions and Reactions - before any binary labelling.")
1074
+
1075
+ transform_option = st.selectbox(
1076
+ "Value transformation:",
1077
+ ["Raw (linear)", "log1p", "log1p β†’ log1p"],
1078
+ index=0,
1079
+ key="transform_select",
1080
+ help=(
1081
+ "**Raw** - No transformation.\n\n"
1082
+ "**log1p** - `log(1 + x)`. Compresses high values, spreads low range.\n\n"
1083
+ "**log1p β†’ log1p** - Double log1p. Even stronger compression.\n\n"
1084
+ )
1085
  )
 
1086
 
1087
+ if transform_option == "log1p":
1088
+ transformed = np.log1p(pos_data)
1089
+ value_label = "Editing Value (log1p)"
1090
+ transform_tag = "log1p"
1091
+ elif transform_option == "log1p β†’ log1p":
1092
+ transformed = np.log1p(np.log1p(pos_data))
1093
+ value_label = "Editing Value (log1p β†’ log1p)"
1094
+ transform_tag = "log1p_log1p"
1095
+ else:
1096
+ transformed = pos_data
1097
+ value_label = "Editing Value"
1098
+ transform_tag = "raw"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1099
 
1100
+ melted = transformed.melt(var_name="Position", value_name="Value")
1101
+ melted["Position_idx"] = melted["Position"].apply(
1102
+ lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
1103
+ )
1104
 
1105
+ st.markdown("#### πŸ“Š Histogram - All Values")
1106
 
1107
+ n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
1108
 
1109
+ matplotlib.rcParams["font.family"] = "Arial"
1110
+ matplotlib.rcParams["font.sans-serif"] = ["Arial", "DejaVu Sans", "Liberation Sans", "sans-serif"]
1111
 
1112
+ fig2, ax2 = plt.subplots(figsize=(7, 5))
1113
+ all_vals = melted["Value"].values
1114
+ ax2.hist(all_vals, bins=n_bins, color="#808080", alpha=0.8, label="All")
1115
+ ax2.set_xlim(left=0.0)
1116
+ val_range = np.nanmax(all_vals) - np.nanmin(all_vals)
1117
+ if val_range <= 5:
1118
+ major_step, minor_step = 0.5, 0.1
1119
+ elif val_range <= 20:
1120
+ major_step, minor_step = 2, 0.5
1121
+ elif val_range <= 50:
1122
+ major_step, minor_step = 5, 1
1123
+ else:
1124
+ major_step, minor_step = 10, 2
1125
+ ax2.xaxis.set_major_locator(MultipleLocator(major_step))
1126
+ ax2.xaxis.set_minor_locator(MultipleLocator(minor_step))
1127
+ ax2.tick_params(axis="x", which="major", length=6)
1128
+ ax2.tick_params(axis="x", which="minor", length=3)
1129
+ ax2.legend(fontsize=8)
1130
+ ax2.set_title(f"Raw Values Distribution ({transform_tag})")
1131
+ ax2.set_xlabel(value_label)
1132
+ ax2.set_ylabel("Counts")
1133
+
1134
+ for attr in [ax2.xaxis.label, ax2.yaxis.label, ax2.title]:
1135
+ attr.set_fontsize(12)
1136
+ attr.set_fontweight("bold")
1137
+ attr.set_fontfamily("Arial")
1138
+ for spine in ax2.spines.values():
1139
+ spine.set_linewidth(1.5)
1140
+ ax2.tick_params(axis="both", which="both", width=1.2, labelsize=10)
1141
+ ax2.spines['top'].set_visible(False)
1142
+ ax2.spines['right'].set_visible(False)
1143
+ for ticklab in ax2.get_xticklabels() + ax2.get_yticklabels():
1144
+ ticklab.set_fontfamily("Arial")
1145
+ ticklab.set_fontweight("normal")
1146
+
1147
+ fig2.tight_layout()
1148
+ st.pyplot(fig2)
1149
+
1150
+ st.markdown("#### 2️⃣ Density Scatter Plot")
1151
+ st.caption("Each dot = one measurement (sample Γ— position). Color = local point density.")
1152
+
1153
+ x_vals = melted["Position_idx"].values.astype(float)
1154
+ y_vals = melted["Value"].values.astype(float)
1155
+
1156
+ x_jittered = x_vals + np.random.default_rng(42).uniform(-0.3, 0.3, size=len(x_vals))
1157
+
1158
+ with st.spinner("Computing point density..."):
1159
+ try:
1160
+ xy = np.vstack([x_jittered, y_vals])
1161
+ density = gaussian_kde(xy)(xy)
1162
+ except np.linalg.LinAlgError:
1163
+ density = np.ones(len(x_vals))
1164
+
1165
+ sort_idx = density.argsort()
1166
+ x_plot = x_jittered[sort_idx]
1167
+ y_plot = y_vals[sort_idx]
1168
+ d_plot = density[sort_idx]
1169
+
1170
+ fig3, ax3 = plt.subplots(figsize=(12, 6))
1171
+ scatter = ax3.scatter(x_plot, y_plot, c=d_plot, cmap="jet", s=8, alpha=0.7, edgecolors="none")
1172
+ cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
1173
+ ax3.set_xlabel("Position")
1174
+ ax3.set_ylabel(value_label)
1175
+ ax3.set_title(f"Density Scatter - {value_label} by Position")
1176
+ ax3.set_xticks(sorted(melted["Position_idx"].unique()))
1177
+ ax3.grid(alpha=0.2)
1178
+ fig3.tight_layout()
1179
+ st.pyplot(fig3)
1180
 
1181
+ except Exception as e:
1182
+ st.error(f"❌ Error processing file: {e}")
1183
+ import traceback
1184
+ st.code(traceback.format_exc())
1185
+ else:
1186
+ st.info("πŸ‘† Upload a data file (CSV or Excel) to start exploring.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
 
1188
+ # ── Section 2: Binary Values ──
 
 
 
1189
  else:
1190
+ st.markdown("""
1191
+ Upload a binary labels CSV file (rows = reactions, columns = positions with 0/1 values).
1192
+ A **Total_Edited** column will be computed automatically as the sum of 1s per reaction,
1193
+ and a box plot of Total Edited counts will be displayed.
1194
+ """)
1195
+
1196
+ binary_uploaded = st.file_uploader(
1197
+ "πŸ“€ Upload binary labels CSV",
1198
+ type=["csv"],
1199
+ key="binary_uploader"
1200
+ )
1201
+
1202
+ if binary_uploaded is not None:
1203
+ try:
1204
+ bdf = pd.read_csv(binary_uploaded)
1205
+ st.success(f"βœ… Loaded file with {len(bdf)} rows and {len(bdf.columns)} columns")
1206
+
1207
+ # All columns should be position columns (0/1 values)
1208
+ bdf.columns = [str(c).strip() for c in bdf.columns]
1209
+ pos_cols = [c for c in bdf.columns
1210
+ if pd.to_numeric(bdf[c], errors="coerce").notna().any()]
1211
+ pos_data_bin = bdf[pos_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
1212
+
1213
+ # Compute Total_Edited
1214
+ pos_data_bin["Total_Edited"] = pos_data_bin.sum(axis=1)
1215
+
1216
+ st.info(f"Detected **{len(pos_cols)}** position columns across **{len(bdf)}** reactions.")
1217
+ st.dataframe(pos_data_bin, hide_index=True)
1218
+
1219
+ # Box plot
1220
+ matplotlib.rcParams["font.family"] = "Arial"
1221
+ matplotlib.rcParams["font.sans-serif"] = ["Arial", "DejaVu Sans", "Liberation Sans", "sans-serif"]
1222
+
1223
+ fig_box, ax_box = plt.subplots(figsize=(7, 5))
1224
+ bp = ax_box.boxplot(pos_data_bin["Total_Edited"].values, vert=True, patch_artist=True,
1225
+ boxprops=dict(facecolor="#808080", alpha=0.8),
1226
+ medianprops=dict(color="black", linewidth=1.5),
1227
+ whiskerprops=dict(linewidth=1.2),
1228
+ capprops=dict(linewidth=1.2))
1229
+ ax_box.set_ylabel("Total Edited (sum of 1s)")
1230
+ ax_box.set_title("Distribution of Total Edited per Reaction")
1231
+ ax_box.set_xticklabels(["All Reactions"])
1232
+
1233
+ # Apply axis styling
1234
+ for attr in [ax_box.xaxis.label, ax_box.yaxis.label, ax_box.title]:
1235
+ attr.set_fontsize(12)
1236
+ attr.set_fontweight("bold")
1237
+ attr.set_fontfamily("Arial")
1238
+ for spine in ax_box.spines.values():
1239
+ spine.set_linewidth(1.5)
1240
+ ax_box.tick_params(axis="both", which="both", width=1.2, labelsize=10)
1241
+ ax_box.spines['top'].set_visible(False)
1242
+ ax_box.spines['right'].set_visible(False)
1243
+ for ticklab in ax_box.get_xticklabels() + ax_box.get_yticklabels():
1244
+ ticklab.set_fontfamily("Arial")
1245
+ ticklab.set_fontweight("normal")
1246
+
1247
+ fig_box.tight_layout()
1248
+ st.pyplot(fig_box)
1249
+
1250
+ # Summary stats
1251
+ st.markdown("#### Summary Statistics")
1252
+ stats = pos_data_bin["Total_Edited"].describe()
1253
+ st.dataframe(stats.to_frame("Total_Edited").T)
1254
+
1255
+ except Exception as e:
1256
+ st.error(f"❌ Error processing file: {e}")
1257
+ import traceback
1258
+ st.code(traceback.format_exc())
1259
+ else:
1260
+ st.info("πŸ‘† Upload a binary labels CSV file to start exploring.")
1261
 
1262
  # --------------------------------------------------
1263
  # TAB 4: Pipetting Command Generator