Spaces:
Sleeping
Sleeping
Update src/app.py
Browse files- src/app.py +224 -188
src/app.py
CHANGED
|
@@ -10,7 +10,6 @@ import matplotlib
|
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
import matplotlib.colors as mcolors
|
| 12 |
from matplotlib.ticker import MultipleLocator
|
| 13 |
-
|
| 14 |
from scipy.stats import gaussian_kde
|
| 15 |
from PIL import Image
|
| 16 |
|
|
@@ -310,7 +309,7 @@ with tab1:
|
|
| 310 |
index=0,
|
| 311 |
key="enc_scheme",
|
| 312 |
help=(
|
| 313 |
-
"**6-bit LNS** β Custom
|
| 314 |
"**Base64 (6-bit)** β Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
|
| 315 |
"**ASCII (7-bit)** β Standard 7-bit ASCII. 7 bits/char.\n\n"
|
| 316 |
"**UTF-8 (8-bit)** β Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
|
|
@@ -391,7 +390,7 @@ with tab1:
|
|
| 391 |
columns = [f"Position {i+1}" for i in range(group_size)]
|
| 392 |
df = pd.DataFrame(groups, columns=columns)
|
| 393 |
df.insert(0, "Sample", range(1, len(df) + 1))
|
| 394 |
-
st.dataframe(df, width="stretch")
|
| 395 |
|
| 396 |
st.download_button(
|
| 397 |
"β¬οΈ Download as CSV",
|
|
@@ -1015,213 +1014,250 @@ with tab2:
|
|
| 1015 |
# --------------------------------------------------
|
| 1016 |
with tab3:
|
| 1017 |
st.header("π Data Analytics")
|
| 1018 |
-
st.markdown("""
|
| 1019 |
-
Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
|
| 1020 |
-
The file should contain Reactions as rows and position columns with editing values.
|
| 1021 |
-
This tab provides visualizations **before** any binary labelling.
|
| 1022 |
-
""")
|
| 1023 |
|
| 1024 |
-
|
| 1025 |
-
"
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
)
|
| 1029 |
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
|
|
|
| 1036 |
|
| 1037 |
-
|
| 1038 |
-
|
|
|
|
|
|
|
|
|
|
| 1039 |
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
|
|
|
| 1045 |
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
return int(m.group(1)) if m else 10**9
|
| 1049 |
-
position_cols = sorted(position_cols, key=pos_sort_key)
|
| 1050 |
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
|
|
|
|
|
|
| 1054 |
|
| 1055 |
-
|
|
|
|
|
|
|
|
|
|
| 1056 |
|
| 1057 |
-
|
|
|
|
|
|
|
| 1058 |
|
| 1059 |
-
|
| 1060 |
-
total_edited = pd.to_numeric(adf["Total edited"], errors="coerce").fillna(0.0)
|
| 1061 |
-
else:
|
| 1062 |
-
total_edited = pos_data.sum(axis=1)
|
| 1063 |
|
| 1064 |
-
|
| 1065 |
-
st.caption("Visualize editing values across all positions and Reactions - before any binary labelling.")
|
| 1066 |
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1076 |
)
|
| 1077 |
-
)
|
| 1078 |
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
if transform_option == "ln(1+x)":
|
| 1093 |
-
transformed = np.log1p(pos_data)
|
| 1094 |
-
value_label = "Editing Value (ln(1+x))"
|
| 1095 |
-
transform_tag = "ln(1+x)"
|
| 1096 |
-
elif transform_option == "ln(ln(1+x))":
|
| 1097 |
-
transformed = np.log1p(np.log1p(pos_data))
|
| 1098 |
-
value_label = "Editing Value (ln(ln(1+x)))"
|
| 1099 |
-
transform_tag = "ln(ln(1+x))"
|
| 1100 |
-
# elif transform_option == "log1p β pos. norm.":
|
| 1101 |
-
# transformed = robust_pos_normalize_log1p(pos_data)
|
| 1102 |
-
# value_label = "Editing Value (log1p β pos. norm.)"
|
| 1103 |
-
# transform_tag = "log1p_posnorm"
|
| 1104 |
-
else:
|
| 1105 |
-
transformed = pos_data
|
| 1106 |
-
value_label = "Editing Value"
|
| 1107 |
-
transform_tag = "raw"
|
| 1108 |
|
| 1109 |
-
|
| 1110 |
-
|
| 1111 |
-
|
| 1112 |
-
|
| 1113 |
|
|
|
|
| 1114 |
|
| 1115 |
-
|
| 1116 |
|
| 1117 |
-
|
|
|
|
| 1118 |
|
| 1119 |
-
|
| 1120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1121 |
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
major_step, minor_step = 0.5, 0.1
|
| 1129 |
-
elif val_range <= 20:
|
| 1130 |
-
major_step, minor_step = 2, 0.5
|
| 1131 |
-
elif val_range <= 50:
|
| 1132 |
-
major_step, minor_step = 5, 1
|
| 1133 |
-
else:
|
| 1134 |
-
major_step, minor_step = 10, 2
|
| 1135 |
-
ax2.xaxis.set_major_locator(MultipleLocator(major_step))
|
| 1136 |
-
ax2.xaxis.set_minor_locator(MultipleLocator(minor_step))
|
| 1137 |
-
ax2.tick_params(axis="x", which="major", length=6)
|
| 1138 |
-
ax2.tick_params(axis="x", which="minor", length=3)
|
| 1139 |
-
ax2.legend(fontsize=8)
|
| 1140 |
-
ax2.set_title(f"Raw Values Distribution ({transform_tag})")
|
| 1141 |
-
ax2.set_xlabel(value_label)
|
| 1142 |
-
ax2.set_ylabel("Counts")
|
| 1143 |
-
|
| 1144 |
-
# Apply axis styling
|
| 1145 |
-
for attr in [ax2.xaxis.label, ax2.yaxis.label, ax2.title]:
|
| 1146 |
-
attr.set_fontsize(12)
|
| 1147 |
-
attr.set_fontweight("bold")
|
| 1148 |
-
attr.set_fontfamily("Arial")
|
| 1149 |
-
for spine in ax2.spines.values():
|
| 1150 |
-
spine.set_linewidth(1.5)
|
| 1151 |
-
ax2.tick_params(axis="both", which="both", width=1.2, labelsize=10)
|
| 1152 |
-
ax2.spines['top'].set_visible(False)
|
| 1153 |
-
ax2.spines['right'].set_visible(False)
|
| 1154 |
-
for ticklab in ax2.get_xticklabels() + ax2.get_yticklabels():
|
| 1155 |
-
ticklab.set_fontfamily("Arial")
|
| 1156 |
-
ticklab.set_fontweight("normal")
|
| 1157 |
-
|
| 1158 |
-
fig2.tight_layout()
|
| 1159 |
-
st.pyplot(fig2)
|
| 1160 |
-
|
| 1161 |
-
# st.markdown("#### π Histogram - All Values")
|
| 1162 |
-
|
| 1163 |
-
# n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
|
| 1164 |
-
|
| 1165 |
-
# fig2, ax2 = plt.subplots(figsize=(10, 4))
|
| 1166 |
-
# ax2.hist(melted["Value"].values, bins=n_bins, color="#4F46E5", edgecolor="white", linewidth=0.3)
|
| 1167 |
-
# ax2.set_xlabel(value_label)
|
| 1168 |
-
# ax2.set_ylabel("Count")
|
| 1169 |
-
# ax2.set_title(f"Raw Values Distribution ({transform_tag})")
|
| 1170 |
-
# val_min = melted["Value"].min()
|
| 1171 |
-
# val_max = melted["Value"].max()
|
| 1172 |
-
# val_range = val_max - val_min
|
| 1173 |
-
# if val_range <= 2:
|
| 1174 |
-
# tick_step = 0.1
|
| 1175 |
-
# elif val_range <= 6:
|
| 1176 |
-
# tick_step = 0.2
|
| 1177 |
-
# elif val_range <= 20:
|
| 1178 |
-
# tick_step = 1
|
| 1179 |
-
# else:
|
| 1180 |
-
# tick_step = 5
|
| 1181 |
-
# ax2.set_xticks(np.arange(np.floor(val_min / tick_step) * tick_step,
|
| 1182 |
-
# val_max + tick_step, tick_step))
|
| 1183 |
-
# ax2.tick_params(axis='x', labelsize=8, rotation=45)
|
| 1184 |
-
# ax2.grid(axis='y', alpha=0.3)
|
| 1185 |
-
# fig2.tight_layout()
|
| 1186 |
-
# st.pyplot(fig2)
|
| 1187 |
-
|
| 1188 |
-
st.markdown("#### 2οΈβ£ Density Scatter Plot")
|
| 1189 |
-
st.caption("Each dot = one measurement (sample Γ position). Color = local point density.")
|
| 1190 |
-
|
| 1191 |
-
x_vals = melted["Position_idx"].values.astype(float)
|
| 1192 |
-
y_vals = melted["Value"].values.astype(float)
|
| 1193 |
-
|
| 1194 |
-
x_jittered = x_vals + np.random.default_rng(42).uniform(-0.3, 0.3, size=len(x_vals))
|
| 1195 |
-
|
| 1196 |
-
with st.spinner("Computing point density..."):
|
| 1197 |
-
try:
|
| 1198 |
-
xy = np.vstack([x_jittered, y_vals])
|
| 1199 |
-
density = gaussian_kde(xy)(xy)
|
| 1200 |
-
except np.linalg.LinAlgError:
|
| 1201 |
-
density = np.ones(len(x_vals))
|
| 1202 |
-
|
| 1203 |
-
sort_idx = density.argsort()
|
| 1204 |
-
x_plot = x_jittered[sort_idx]
|
| 1205 |
-
y_plot = y_vals[sort_idx]
|
| 1206 |
-
d_plot = density[sort_idx]
|
| 1207 |
-
|
| 1208 |
-
fig3, ax3 = plt.subplots(figsize=(12, 6))
|
| 1209 |
-
scatter = ax3.scatter(x_plot, y_plot, c=d_plot, cmap="jet", s=8, alpha=0.7, edgecolors="none")
|
| 1210 |
-
cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
|
| 1211 |
-
ax3.set_xlabel("Position")
|
| 1212 |
-
ax3.set_ylabel(value_label)
|
| 1213 |
-
ax3.set_title(f"Density Scatter - {value_label} by Position")
|
| 1214 |
-
ax3.set_xticks(sorted(melted["Position_idx"].unique()))
|
| 1215 |
-
ax3.grid(alpha=0.2)
|
| 1216 |
-
fig3.tight_layout()
|
| 1217 |
-
st.pyplot(fig3)
|
| 1218 |
|
| 1219 |
-
|
| 1220 |
-
st.error(f"β Error processing file: {e}")
|
| 1221 |
-
import traceback
|
| 1222 |
-
st.code(traceback.format_exc())
|
| 1223 |
else:
|
| 1224 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1225 |
|
| 1226 |
# --------------------------------------------------
|
| 1227 |
# TAB 4: Pipetting Command Generator
|
|
|
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
import matplotlib.colors as mcolors
|
| 12 |
from matplotlib.ticker import MultipleLocator
|
|
|
|
| 13 |
from scipy.stats import gaussian_kde
|
| 14 |
from PIL import Image
|
| 15 |
|
|
|
|
| 309 |
index=0,
|
| 310 |
key="enc_scheme",
|
| 311 |
help=(
|
| 312 |
+
"**6-bit LNS** β Custom 56-character table (A-Z, 0-9, punctuation). 6 bits/char.\n\n"
|
| 313 |
"**Base64 (6-bit)** β Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
|
| 314 |
"**ASCII (7-bit)** β Standard 7-bit ASCII. 7 bits/char.\n\n"
|
| 315 |
"**UTF-8 (8-bit)** β Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
|
|
|
|
| 390 |
columns = [f"Position {i+1}" for i in range(group_size)]
|
| 391 |
df = pd.DataFrame(groups, columns=columns)
|
| 392 |
df.insert(0, "Sample", range(1, len(df) + 1))
|
| 393 |
+
st.dataframe(df, hide_index=True, width="stretch")
|
| 394 |
|
| 395 |
st.download_button(
|
| 396 |
"β¬οΈ Download as CSV",
|
|
|
|
| 1014 |
# --------------------------------------------------
|
| 1015 |
with tab3:
|
| 1016 |
st.header("π Data Analytics")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1017 |
|
| 1018 |
+
section_choice = st.radio(
|
| 1019 |
+
"Select analysis type:",
|
| 1020 |
+
["Experimental Editing Values", "Binary Values"],
|
| 1021 |
+
horizontal=True
|
| 1022 |
)
|
| 1023 |
|
| 1024 |
+
# ββ Section 1: Experimental Editing Values ββ
|
| 1025 |
+
if section_choice == "Experimental Editing Values":
|
| 1026 |
+
st.markdown("""
|
| 1027 |
+
Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
|
| 1028 |
+
The file should contain Reactions as rows and position columns with editing values.
|
| 1029 |
+
This section provides visualizations **before** any binary labelling.
|
| 1030 |
+
""")
|
| 1031 |
|
| 1032 |
+
analytics_uploaded = st.file_uploader(
|
| 1033 |
+
"π€ Upload data file",
|
| 1034 |
+
type=["xlsx", "csv"],
|
| 1035 |
+
key="analytics_uploader"
|
| 1036 |
+
)
|
| 1037 |
|
| 1038 |
+
if analytics_uploaded is not None:
|
| 1039 |
+
try:
|
| 1040 |
+
if analytics_uploaded.name.endswith(".xlsx"):
|
| 1041 |
+
adf = pd.read_excel(analytics_uploaded)
|
| 1042 |
+
else:
|
| 1043 |
+
adf = pd.read_csv(analytics_uploaded)
|
| 1044 |
|
| 1045 |
+
st.success(f"β
Loaded file with {len(adf)} rows and {len(adf.columns)} columns")
|
| 1046 |
+
adf.columns = [str(c).strip() for c in adf.columns]
|
|
|
|
|
|
|
| 1047 |
|
| 1048 |
+
non_pos_keywords = {"sample", "description", "descritpion", "total edited",
|
| 1049 |
+
'volume per "1"', "volume per 1", "id", "name"}
|
| 1050 |
+
position_cols = [c for c in adf.columns
|
| 1051 |
+
if c.lower() not in non_pos_keywords
|
| 1052 |
+
and pd.to_numeric(adf[c], errors="coerce").notna().any()]
|
| 1053 |
|
| 1054 |
+
def pos_sort_key(col_name: str):
|
| 1055 |
+
m = re.search(r"(\d+)", col_name)
|
| 1056 |
+
return int(m.group(1)) if m else 10**9
|
| 1057 |
+
position_cols = sorted(position_cols, key=pos_sort_key)
|
| 1058 |
|
| 1059 |
+
if not position_cols:
|
| 1060 |
+
st.error("No numeric position columns detected.")
|
| 1061 |
+
st.stop()
|
| 1062 |
|
| 1063 |
+
st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** Reactions.")
|
|
|
|
|
|
|
|
|
|
| 1064 |
|
| 1065 |
+
pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
|
|
|
|
| 1066 |
|
| 1067 |
+
if "Total edited" in adf.columns:
|
| 1068 |
+
total_edited = pd.to_numeric(adf["Total edited"], errors="coerce").fillna(0.0)
|
| 1069 |
+
else:
|
| 1070 |
+
total_edited = pos_data.sum(axis=1)
|
| 1071 |
+
|
| 1072 |
+
st.markdown("### 1οΈβ£ Raw Data Distribution")
|
| 1073 |
+
st.caption("Visualize editing values across all positions and Reactions - before any binary labelling.")
|
| 1074 |
+
|
| 1075 |
+
transform_option = st.selectbox(
|
| 1076 |
+
"Value transformation:",
|
| 1077 |
+
["Raw (linear)", "log1p", "log1p β log1p"],
|
| 1078 |
+
index=0,
|
| 1079 |
+
key="transform_select",
|
| 1080 |
+
help=(
|
| 1081 |
+
"**Raw** - No transformation.\n\n"
|
| 1082 |
+
"**log1p** - `log(1 + x)`. Compresses high values, spreads low range.\n\n"
|
| 1083 |
+
"**log1p β log1p** - Double log1p. Even stronger compression.\n\n"
|
| 1084 |
+
)
|
| 1085 |
)
|
|
|
|
| 1086 |
|
| 1087 |
+
if transform_option == "log1p":
|
| 1088 |
+
transformed = np.log1p(pos_data)
|
| 1089 |
+
value_label = "Editing Value (log1p)"
|
| 1090 |
+
transform_tag = "log1p"
|
| 1091 |
+
elif transform_option == "log1p β log1p":
|
| 1092 |
+
transformed = np.log1p(np.log1p(pos_data))
|
| 1093 |
+
value_label = "Editing Value (log1p β log1p)"
|
| 1094 |
+
transform_tag = "log1p_log1p"
|
| 1095 |
+
else:
|
| 1096 |
+
transformed = pos_data
|
| 1097 |
+
value_label = "Editing Value"
|
| 1098 |
+
transform_tag = "raw"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1099 |
|
| 1100 |
+
melted = transformed.melt(var_name="Position", value_name="Value")
|
| 1101 |
+
melted["Position_idx"] = melted["Position"].apply(
|
| 1102 |
+
lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
|
| 1103 |
+
)
|
| 1104 |
|
| 1105 |
+
st.markdown("#### π Histogram - All Values")
|
| 1106 |
|
| 1107 |
+
n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
|
| 1108 |
|
| 1109 |
+
matplotlib.rcParams["font.family"] = "Arial"
|
| 1110 |
+
matplotlib.rcParams["font.sans-serif"] = ["Arial", "DejaVu Sans", "Liberation Sans", "sans-serif"]
|
| 1111 |
|
| 1112 |
+
fig2, ax2 = plt.subplots(figsize=(7, 5))
|
| 1113 |
+
all_vals = melted["Value"].values
|
| 1114 |
+
ax2.hist(all_vals, bins=n_bins, color="#808080", alpha=0.8, label="All")
|
| 1115 |
+
ax2.set_xlim(left=0.0)
|
| 1116 |
+
val_range = np.nanmax(all_vals) - np.nanmin(all_vals)
|
| 1117 |
+
if val_range <= 5:
|
| 1118 |
+
major_step, minor_step = 0.5, 0.1
|
| 1119 |
+
elif val_range <= 20:
|
| 1120 |
+
major_step, minor_step = 2, 0.5
|
| 1121 |
+
elif val_range <= 50:
|
| 1122 |
+
major_step, minor_step = 5, 1
|
| 1123 |
+
else:
|
| 1124 |
+
major_step, minor_step = 10, 2
|
| 1125 |
+
ax2.xaxis.set_major_locator(MultipleLocator(major_step))
|
| 1126 |
+
ax2.xaxis.set_minor_locator(MultipleLocator(minor_step))
|
| 1127 |
+
ax2.tick_params(axis="x", which="major", length=6)
|
| 1128 |
+
ax2.tick_params(axis="x", which="minor", length=3)
|
| 1129 |
+
ax2.legend(fontsize=8)
|
| 1130 |
+
ax2.set_title(f"Raw Values Distribution ({transform_tag})")
|
| 1131 |
+
ax2.set_xlabel(value_label)
|
| 1132 |
+
ax2.set_ylabel("Counts")
|
| 1133 |
+
|
| 1134 |
+
for attr in [ax2.xaxis.label, ax2.yaxis.label, ax2.title]:
|
| 1135 |
+
attr.set_fontsize(12)
|
| 1136 |
+
attr.set_fontweight("bold")
|
| 1137 |
+
attr.set_fontfamily("Arial")
|
| 1138 |
+
for spine in ax2.spines.values():
|
| 1139 |
+
spine.set_linewidth(1.5)
|
| 1140 |
+
ax2.tick_params(axis="both", which="both", width=1.2, labelsize=10)
|
| 1141 |
+
ax2.spines['top'].set_visible(False)
|
| 1142 |
+
ax2.spines['right'].set_visible(False)
|
| 1143 |
+
for ticklab in ax2.get_xticklabels() + ax2.get_yticklabels():
|
| 1144 |
+
ticklab.set_fontfamily("Arial")
|
| 1145 |
+
ticklab.set_fontweight("normal")
|
| 1146 |
+
|
| 1147 |
+
fig2.tight_layout()
|
| 1148 |
+
st.pyplot(fig2)
|
| 1149 |
+
|
| 1150 |
+
st.markdown("#### 2οΈβ£ Density Scatter Plot")
|
| 1151 |
+
st.caption("Each dot = one measurement (sample Γ position). Color = local point density.")
|
| 1152 |
+
|
| 1153 |
+
x_vals = melted["Position_idx"].values.astype(float)
|
| 1154 |
+
y_vals = melted["Value"].values.astype(float)
|
| 1155 |
+
|
| 1156 |
+
x_jittered = x_vals + np.random.default_rng(42).uniform(-0.3, 0.3, size=len(x_vals))
|
| 1157 |
+
|
| 1158 |
+
with st.spinner("Computing point density..."):
|
| 1159 |
+
try:
|
| 1160 |
+
xy = np.vstack([x_jittered, y_vals])
|
| 1161 |
+
density = gaussian_kde(xy)(xy)
|
| 1162 |
+
except np.linalg.LinAlgError:
|
| 1163 |
+
density = np.ones(len(x_vals))
|
| 1164 |
+
|
| 1165 |
+
sort_idx = density.argsort()
|
| 1166 |
+
x_plot = x_jittered[sort_idx]
|
| 1167 |
+
y_plot = y_vals[sort_idx]
|
| 1168 |
+
d_plot = density[sort_idx]
|
| 1169 |
+
|
| 1170 |
+
fig3, ax3 = plt.subplots(figsize=(12, 6))
|
| 1171 |
+
scatter = ax3.scatter(x_plot, y_plot, c=d_plot, cmap="jet", s=8, alpha=0.7, edgecolors="none")
|
| 1172 |
+
cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
|
| 1173 |
+
ax3.set_xlabel("Position")
|
| 1174 |
+
ax3.set_ylabel(value_label)
|
| 1175 |
+
ax3.set_title(f"Density Scatter - {value_label} by Position")
|
| 1176 |
+
ax3.set_xticks(sorted(melted["Position_idx"].unique()))
|
| 1177 |
+
ax3.grid(alpha=0.2)
|
| 1178 |
+
fig3.tight_layout()
|
| 1179 |
+
st.pyplot(fig3)
|
| 1180 |
|
| 1181 |
+
except Exception as e:
|
| 1182 |
+
st.error(f"β Error processing file: {e}")
|
| 1183 |
+
import traceback
|
| 1184 |
+
st.code(traceback.format_exc())
|
| 1185 |
+
else:
|
| 1186 |
+
st.info("π Upload a data file (CSV or Excel) to start exploring.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1187 |
|
| 1188 |
+
# ββ Section 2: Binary Values ββ
|
|
|
|
|
|
|
|
|
|
| 1189 |
else:
|
| 1190 |
+
st.markdown("""
|
| 1191 |
+
Upload a binary labels CSV file (rows = reactions, columns = positions with 0/1 values).
|
| 1192 |
+
A **Total_Edited** column will be computed automatically as the sum of 1s per reaction,
|
| 1193 |
+
and a box plot of Total Edited counts will be displayed.
|
| 1194 |
+
""")
|
| 1195 |
+
|
| 1196 |
+
binary_uploaded = st.file_uploader(
|
| 1197 |
+
"π€ Upload binary labels CSV",
|
| 1198 |
+
type=["csv"],
|
| 1199 |
+
key="binary_uploader"
|
| 1200 |
+
)
|
| 1201 |
+
|
| 1202 |
+
if binary_uploaded is not None:
|
| 1203 |
+
try:
|
| 1204 |
+
bdf = pd.read_csv(binary_uploaded)
|
| 1205 |
+
st.success(f"β
Loaded file with {len(bdf)} rows and {len(bdf.columns)} columns")
|
| 1206 |
+
|
| 1207 |
+
# All columns should be position columns (0/1 values)
|
| 1208 |
+
bdf.columns = [str(c).strip() for c in bdf.columns]
|
| 1209 |
+
pos_cols = [c for c in bdf.columns
|
| 1210 |
+
if pd.to_numeric(bdf[c], errors="coerce").notna().any()]
|
| 1211 |
+
pos_data_bin = bdf[pos_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
|
| 1212 |
+
|
| 1213 |
+
# Compute Total_Edited
|
| 1214 |
+
pos_data_bin["Total_Edited"] = pos_data_bin.sum(axis=1)
|
| 1215 |
+
|
| 1216 |
+
st.info(f"Detected **{len(pos_cols)}** position columns across **{len(bdf)}** reactions.")
|
| 1217 |
+
st.dataframe(pos_data_bin, hide_index=True)
|
| 1218 |
+
|
| 1219 |
+
# Box plot
|
| 1220 |
+
matplotlib.rcParams["font.family"] = "Arial"
|
| 1221 |
+
matplotlib.rcParams["font.sans-serif"] = ["Arial", "DejaVu Sans", "Liberation Sans", "sans-serif"]
|
| 1222 |
+
|
| 1223 |
+
fig_box, ax_box = plt.subplots(figsize=(7, 5))
|
| 1224 |
+
bp = ax_box.boxplot(pos_data_bin["Total_Edited"].values, vert=True, patch_artist=True,
|
| 1225 |
+
boxprops=dict(facecolor="#808080", alpha=0.8),
|
| 1226 |
+
medianprops=dict(color="black", linewidth=1.5),
|
| 1227 |
+
whiskerprops=dict(linewidth=1.2),
|
| 1228 |
+
capprops=dict(linewidth=1.2))
|
| 1229 |
+
ax_box.set_ylabel("Total Edited (sum of 1s)")
|
| 1230 |
+
ax_box.set_title("Distribution of Total Edited per Reaction")
|
| 1231 |
+
ax_box.set_xticklabels(["All Reactions"])
|
| 1232 |
+
|
| 1233 |
+
# Apply axis styling
|
| 1234 |
+
for attr in [ax_box.xaxis.label, ax_box.yaxis.label, ax_box.title]:
|
| 1235 |
+
attr.set_fontsize(12)
|
| 1236 |
+
attr.set_fontweight("bold")
|
| 1237 |
+
attr.set_fontfamily("Arial")
|
| 1238 |
+
for spine in ax_box.spines.values():
|
| 1239 |
+
spine.set_linewidth(1.5)
|
| 1240 |
+
ax_box.tick_params(axis="both", which="both", width=1.2, labelsize=10)
|
| 1241 |
+
ax_box.spines['top'].set_visible(False)
|
| 1242 |
+
ax_box.spines['right'].set_visible(False)
|
| 1243 |
+
for ticklab in ax_box.get_xticklabels() + ax_box.get_yticklabels():
|
| 1244 |
+
ticklab.set_fontfamily("Arial")
|
| 1245 |
+
ticklab.set_fontweight("normal")
|
| 1246 |
+
|
| 1247 |
+
fig_box.tight_layout()
|
| 1248 |
+
st.pyplot(fig_box)
|
| 1249 |
+
|
| 1250 |
+
# Summary stats
|
| 1251 |
+
st.markdown("#### Summary Statistics")
|
| 1252 |
+
stats = pos_data_bin["Total_Edited"].describe()
|
| 1253 |
+
st.dataframe(stats.to_frame("Total_Edited").T)
|
| 1254 |
+
|
| 1255 |
+
except Exception as e:
|
| 1256 |
+
st.error(f"β Error processing file: {e}")
|
| 1257 |
+
import traceback
|
| 1258 |
+
st.code(traceback.format_exc())
|
| 1259 |
+
else:
|
| 1260 |
+
st.info("π Upload a binary labels CSV file to start exploring.")
|
| 1261 |
|
| 1262 |
# --------------------------------------------------
|
| 1263 |
# TAB 4: Pipetting Command Generator
|