Spaces:
Sleeping
Sleeping
Update src/app.py
Browse files- src/app.py +53 -79
src/app.py
CHANGED
|
@@ -20,10 +20,10 @@ st.title("Bitconverter")
|
|
| 20 |
# =========================
|
| 21 |
# Encoding Schemes
|
| 22 |
# =========================
|
| 23 |
-
ENCODING_OPTIONS = ["
|
| 24 |
|
| 25 |
BITS_PER_UNIT = {
|
| 26 |
-
"
|
| 27 |
"Base64 (6-bit)": 6,
|
| 28 |
"ASCII (7-bit)": 7,
|
| 29 |
"UTF-8 (8-bit)": 8,
|
|
@@ -39,7 +39,7 @@ voyager_table = {
|
|
| 39 |
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
|
| 40 |
'3', '4', '5', '6', '7', '8', '9', '.', ',', '(',
|
| 41 |
')','+', '-', '*', '/', '=', '$', '!', ':', '%',
|
| 42 |
-
'"', '#', '@', "'", '?', '&'
|
| 43 |
])
|
| 44 |
}
|
| 45 |
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
|
@@ -176,7 +176,7 @@ def encode_to_binary(text: str, scheme: str) -> tuple[list[int], list[str], list
|
|
| 176 |
- display_units: the encoded representation (Base64 symbol, hex byte, ASCII code, Voyager char)
|
| 177 |
- source_chars: the original text character each chunk maps to
|
| 178 |
"""
|
| 179 |
-
if scheme == "
|
| 180 |
bits = []
|
| 181 |
for char in text:
|
| 182 |
val = reverse_voyager_table.get(char.upper(), 0)
|
|
@@ -233,7 +233,7 @@ def encode_to_binary(text: str, scheme: str) -> tuple[list[int], list[str], list
|
|
| 233 |
# Decoding Functions
|
| 234 |
# =========================
|
| 235 |
def decode_from_binary(bits: list[int], scheme: str) -> str:
|
| 236 |
-
if scheme == "
|
| 237 |
chars = []
|
| 238 |
for i in range(0, len(bits), 6):
|
| 239 |
chunk = bits[i:i + 6]
|
|
@@ -307,7 +307,7 @@ with tab1:
|
|
| 307 |
index=0,
|
| 308 |
key="enc_scheme",
|
| 309 |
help=(
|
| 310 |
-
"**
|
| 311 |
"**Base64 (6-bit)** β Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
|
| 312 |
"**ASCII (7-bit)** β Standard 7-bit ASCII. 7 bits/char.\n\n"
|
| 313 |
"**UTF-8 (8-bit)** β Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
|
|
@@ -316,7 +316,7 @@ with tab1:
|
|
| 316 |
|
| 317 |
bits_per = BITS_PER_UNIT[encoding_scheme]
|
| 318 |
|
| 319 |
-
if encoding_scheme == "
|
| 320 |
supported = ''.join(voyager_table[i] for i in range(len(voyager_table)))
|
| 321 |
st.caption(f"Supported characters ({len(voyager_table)}): `{supported}`")
|
| 322 |
|
|
@@ -335,7 +335,7 @@ with tab1:
|
|
| 335 |
binary_concat = ''.join(map(str, binary_labels))
|
| 336 |
|
| 337 |
st.markdown("### Output 1 β Binary Labels per Character")
|
| 338 |
-
st.caption(f"Encoding: **{encoding_scheme}**
|
| 339 |
|
| 340 |
grouped_bits = [binary_labels[i:i + bits_per] for i in range(0, len(binary_labels), bits_per)]
|
| 341 |
scroll_html = (
|
|
@@ -345,7 +345,7 @@ with tab1:
|
|
| 345 |
for i, bits in enumerate(grouped_bits):
|
| 346 |
src = source_chars[i] if i < len(source_chars) else "?"
|
| 347 |
enc = display_units[i] if i < len(display_units) else "?"
|
| 348 |
-
if encoding_scheme == "
|
| 349 |
scroll_html += f"<div>'{src}' β {bits}</div>"
|
| 350 |
else:
|
| 351 |
scroll_html += f"<div>'{src}' β '{enc}' β {bits}</div>"
|
|
@@ -357,7 +357,7 @@ with tab1:
|
|
| 357 |
src = source_chars[i] if i < len(source_chars) else "?"
|
| 358 |
enc = display_units[i] if i < len(display_units) else "?"
|
| 359 |
bit_str = ''.join(map(str, bits))
|
| 360 |
-
if encoding_scheme == "
|
| 361 |
per_char_lines.append(f"'{src}' β {bit_str}")
|
| 362 |
else:
|
| 363 |
per_char_lines.append(f"'{src}' β '{enc}' β {bit_str}")
|
|
@@ -411,8 +411,8 @@ with tab1:
|
|
| 411 |
["Black & White (1-bit)", "Grayscale (4-bit)"],
|
| 412 |
key="enc_image_type",
|
| 413 |
help=(
|
| 414 |
-
"**Black & White (1-bit)**
|
| 415 |
-
"**Grayscale (4-bit)**
|
| 416 |
"Uniform quantization in sRGB/BT.601 luma space. 0 = black, 15 = white. "
|
| 417 |
"Two pixels per byte, high-nibble first; rows top-to-bottom, no row padding."
|
| 418 |
)
|
|
@@ -429,7 +429,7 @@ with tab1:
|
|
| 429 |
orig_w, orig_h = img.size
|
| 430 |
aspect = orig_h / orig_w
|
| 431 |
|
| 432 |
-
st.image(img, caption=f"Original (grayscale)
|
| 433 |
|
| 434 |
st.markdown("#### βοΈ Resolution")
|
| 435 |
target_width = st.slider(
|
|
@@ -456,7 +456,7 @@ with tab1:
|
|
| 456 |
|
| 457 |
binary_matrix = (img_array < threshold).astype(int)
|
| 458 |
|
| 459 |
-
st.markdown("### Preview
|
| 460 |
col_prev1, col_prev2 = st.columns(2)
|
| 461 |
with col_prev1:
|
| 462 |
st.image(img_resized, caption=f"Resized grayscale ({target_width}Γ{target_height})", use_container_width=True)
|
|
@@ -485,7 +485,7 @@ with tab1:
|
|
| 485 |
key="download_img_binary_txt"
|
| 486 |
)
|
| 487 |
|
| 488 |
-
st.markdown("### Output 2 β Binary Matrix by dimension (
|
| 489 |
columns = [f"Position {i+1}" for i in range(target_width)]
|
| 490 |
df_img = pd.DataFrame(binary_matrix, columns=columns)
|
| 491 |
df_img.insert(0, "Sample", range(1, len(df_img) + 1))
|
|
@@ -548,7 +548,7 @@ with tab1:
|
|
| 548 |
gray4_matrix = quantize_to_4bit(img_array)
|
| 549 |
gray8_preview = gray4_to_gray8(gray4_matrix)
|
| 550 |
|
| 551 |
-
st.markdown("### Preview
|
| 552 |
col_prev1, col_prev2 = st.columns(2)
|
| 553 |
with col_prev1:
|
| 554 |
st.image(img_resized, caption=f"Original resized ({target_width}Γ{target_height}, 256 levels)", use_container_width=True)
|
|
@@ -740,8 +740,8 @@ with tab2:
|
|
| 740 |
["Black & White (1-bit)", "Grayscale (4-bit)"],
|
| 741 |
key="dec_image_type",
|
| 742 |
help=(
|
| 743 |
-
"**Black & White**
|
| 744 |
-
"**Grayscale (4-bit)**
|
| 745 |
"(every 4 bits = one pixel), or a packed **.g4 file**."
|
| 746 |
)
|
| 747 |
)
|
|
@@ -817,7 +817,7 @@ with tab2:
|
|
| 817 |
display_w = img_width * display_scale
|
| 818 |
display_h = img_height * display_scale
|
| 819 |
pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
|
| 820 |
-
st.image(pil_display, caption=f"Binary image
|
| 821 |
|
| 822 |
ones = int(bits_matrix.sum())
|
| 823 |
st.markdown(
|
|
@@ -861,10 +861,10 @@ with tab2:
|
|
| 861 |
["Value matrix (0β15)", "Binary (4 bits per pixel)", "Packed .g4 file"],
|
| 862 |
key="g4_input_format",
|
| 863 |
help=(
|
| 864 |
-
"**Value matrix**
|
| 865 |
"Rows = pixel rows, columns = pixel columns.\n\n"
|
| 866 |
-
"**Binary**
|
| 867 |
-
"**Packed .g4 file**
|
| 868 |
"(two pixels per byte, high-nibble first)."
|
| 869 |
)
|
| 870 |
)
|
|
@@ -969,7 +969,7 @@ with tab2:
|
|
| 969 |
display_w = img_width * display_scale
|
| 970 |
display_h = img_height * display_scale
|
| 971 |
pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
|
| 972 |
-
st.image(pil_display, caption=f"4-bit grayscale
|
| 973 |
|
| 974 |
# Stats
|
| 975 |
unique_vals, counts = np.unique(gray4_matrix, return_counts=True)
|
|
@@ -1014,7 +1014,7 @@ with tab3:
|
|
| 1014 |
st.header("π Data Analytics")
|
| 1015 |
st.markdown("""
|
| 1016 |
Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
|
| 1017 |
-
The file should contain
|
| 1018 |
This tab provides visualizations **before** any binary labelling.
|
| 1019 |
""")
|
| 1020 |
|
|
@@ -1049,7 +1049,7 @@ with tab3:
|
|
| 1049 |
st.error("No numeric position columns detected.")
|
| 1050 |
st.stop()
|
| 1051 |
|
| 1052 |
-
st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}**
|
| 1053 |
|
| 1054 |
pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
|
| 1055 |
|
|
@@ -1059,34 +1059,32 @@ with tab3:
|
|
| 1059 |
total_edited = pos_data.sum(axis=1)
|
| 1060 |
|
| 1061 |
st.markdown("### 1οΈβ£ Raw Data Distribution")
|
| 1062 |
-
st.caption("Visualize editing values across all positions and
|
| 1063 |
|
| 1064 |
transform_option = st.selectbox(
|
| 1065 |
"Value transformation:",
|
| 1066 |
-
["Raw (linear)", "log1p", "log1p β log1p"
|
| 1067 |
index=0,
|
| 1068 |
key="transform_select",
|
| 1069 |
help=(
|
| 1070 |
-
"**Raw**
|
| 1071 |
-
"**log1p**
|
| 1072 |
-
"**log1p β log1p**
|
| 1073 |
-
"**log1p β pos. norm.** β log1p then robust per-position normalization "
|
| 1074 |
-
"(median / IQR scaling per position column)."
|
| 1075 |
)
|
| 1076 |
)
|
| 1077 |
|
| 1078 |
-
def robust_pos_normalize_log1p(data: pd.DataFrame) -> pd.DataFrame:
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
|
| 1091 |
if transform_option == "log1p":
|
| 1092 |
transformed = np.log1p(pos_data)
|
|
@@ -1096,10 +1094,10 @@ with tab3:
|
|
| 1096 |
transformed = np.log1p(np.log1p(pos_data))
|
| 1097 |
value_label = "Editing Value (log1p β log1p)"
|
| 1098 |
transform_tag = "log1p_log1p"
|
| 1099 |
-
elif transform_option == "log1p β pos. norm.":
|
| 1100 |
-
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
else:
|
| 1104 |
transformed = pos_data
|
| 1105 |
value_label = "Editing Value"
|
|
@@ -1110,7 +1108,7 @@ with tab3:
|
|
| 1110 |
lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
|
| 1111 |
)
|
| 1112 |
|
| 1113 |
-
st.markdown("#### π Histogram
|
| 1114 |
|
| 1115 |
n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
|
| 1116 |
|
|
@@ -1137,7 +1135,7 @@ with tab3:
|
|
| 1137 |
fig2.tight_layout()
|
| 1138 |
st.pyplot(fig2)
|
| 1139 |
|
| 1140 |
-
st.markdown("#### 2οΈβ£ Density Scatter Plot
|
| 1141 |
st.caption("Each dot = one measurement (sample Γ position). Color = local point density.")
|
| 1142 |
|
| 1143 |
x_vals = melted["Position_idx"].values.astype(float)
|
|
@@ -1162,36 +1160,12 @@ with tab3:
|
|
| 1162 |
cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
|
| 1163 |
ax3.set_xlabel("Position")
|
| 1164 |
ax3.set_ylabel(value_label)
|
| 1165 |
-
ax3.set_title(f"Density Scatter
|
| 1166 |
ax3.set_xticks(sorted(melted["Position_idx"].unique()))
|
| 1167 |
ax3.grid(alpha=0.2)
|
| 1168 |
fig3.tight_layout()
|
| 1169 |
st.pyplot(fig3)
|
| 1170 |
|
| 1171 |
-
st.markdown("#### 3οΈβ£ 2D Density Heatmap")
|
| 1172 |
-
st.caption("Binned heatmap of editing values by position β similar to a FACS density plot.")
|
| 1173 |
-
|
| 1174 |
-
y_bins = st.slider("Vertical bins:", min_value=20, max_value=150, value=60, key="heatmap_ybins")
|
| 1175 |
-
|
| 1176 |
-
positions_unique = sorted(melted["Position_idx"].unique())
|
| 1177 |
-
n_positions = len(positions_unique)
|
| 1178 |
-
|
| 1179 |
-
fig4, ax4 = plt.subplots(figsize=(12, 6))
|
| 1180 |
-
h = ax4.hist2d(
|
| 1181 |
-
x_vals, y_vals,
|
| 1182 |
-
bins=[n_positions, y_bins],
|
| 1183 |
-
cmap="jet",
|
| 1184 |
-
norm=mcolors.LogNorm() if melted["Value"].max() > 0 else None,
|
| 1185 |
-
)
|
| 1186 |
-
fig4.colorbar(h[3], ax=ax4, label="Count (log scale)")
|
| 1187 |
-
ax4.set_xlabel("Position")
|
| 1188 |
-
ax4.set_ylabel(value_label)
|
| 1189 |
-
ax4.set_title(f"2D Density Heatmap β Position vs. {value_label}")
|
| 1190 |
-
ax4.set_xticks(positions_unique)
|
| 1191 |
-
ax4.grid(alpha=0.15)
|
| 1192 |
-
fig4.tight_layout()
|
| 1193 |
-
st.pyplot(fig4)
|
| 1194 |
-
|
| 1195 |
except Exception as e:
|
| 1196 |
st.error(f"β Error processing file: {e}")
|
| 1197 |
import traceback
|
|
@@ -1334,7 +1308,7 @@ with tab4:
|
|
| 1334 |
|
| 1335 |
if not any(c.lower() == "sample" for c in df.columns):
|
| 1336 |
df.insert(0, "Sample", np.arange(1, len(df) + 1))
|
| 1337 |
-
st.info("`Sample` column missing
|
| 1338 |
|
| 1339 |
position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
|
| 1340 |
if not position_cols:
|
|
@@ -1352,7 +1326,7 @@ with tab4:
|
|
| 1352 |
|
| 1353 |
if "Total edited" not in df.columns:
|
| 1354 |
df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
|
| 1355 |
-
st.info("`Total edited` column missing
|
| 1356 |
|
| 1357 |
st.markdown("#### βοΈ Volume Calculation Settings")
|
| 1358 |
default_total_vol = st.number_input(
|
|
@@ -1365,7 +1339,7 @@ with tab4:
|
|
| 1365 |
if not vol_candidates:
|
| 1366 |
df['Volume per "1"'] = default_total_vol / df["Total edited"].replace(0, np.nan)
|
| 1367 |
df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
|
| 1368 |
-
st.info(f'`Volume per "1"` column missing
|
| 1369 |
volume_col = 'Volume per "1"'
|
| 1370 |
else:
|
| 1371 |
volume_col = vol_candidates[0]
|
|
@@ -1385,7 +1359,7 @@ with tab4:
|
|
| 1385 |
|
| 1386 |
st.markdown("### π Preview: Suggested Uniform Layout")
|
| 1387 |
if max_wells_per_source == 0:
|
| 1388 |
-
st.info("No edits detected
|
| 1389 |
st.stop()
|
| 1390 |
|
| 1391 |
st.write(
|
|
|
|
| 20 |
# =========================
|
| 21 |
# Encoding Schemes
|
| 22 |
# =========================
|
| 23 |
+
ENCODING_OPTIONS = ["6-bit LNS", "Base64 (6-bit)", "ASCII (7-bit)", "UTF-8 (8-bit)"]
|
| 24 |
|
| 25 |
BITS_PER_UNIT = {
|
| 26 |
+
"6-bit LNS": 6,
|
| 27 |
"Base64 (6-bit)": 6,
|
| 28 |
"ASCII (7-bit)": 7,
|
| 29 |
"UTF-8 (8-bit)": 8,
|
|
|
|
| 39 |
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
|
| 40 |
'3', '4', '5', '6', '7', '8', '9', '.', ',', '(',
|
| 41 |
')','+', '-', '*', '/', '=', '$', '!', ':', '%',
|
| 42 |
+
'"', '#', '@', "'", '?', '&', '(Image)'
|
| 43 |
])
|
| 44 |
}
|
| 45 |
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
|
|
|
| 176 |
- display_units: the encoded representation (Base64 symbol, hex byte, ASCII code, Voyager char)
|
| 177 |
- source_chars: the original text character each chunk maps to
|
| 178 |
"""
|
| 179 |
+
if scheme == "6-bit LNS":
|
| 180 |
bits = []
|
| 181 |
for char in text:
|
| 182 |
val = reverse_voyager_table.get(char.upper(), 0)
|
|
|
|
| 233 |
# Decoding Functions
|
| 234 |
# =========================
|
| 235 |
def decode_from_binary(bits: list[int], scheme: str) -> str:
|
| 236 |
+
if scheme == "6-bit LNS":
|
| 237 |
chars = []
|
| 238 |
for i in range(0, len(bits), 6):
|
| 239 |
chunk = bits[i:i + 6]
|
|
|
|
| 307 |
index=0,
|
| 308 |
key="enc_scheme",
|
| 309 |
help=(
|
| 310 |
+
"**6-bit LNS** β Custom 56-character table (A-Z, 0-9, punctuation). 6 bits/char.\n\n"
|
| 311 |
"**Base64 (6-bit)** β Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
|
| 312 |
"**ASCII (7-bit)** β Standard 7-bit ASCII. 7 bits/char.\n\n"
|
| 313 |
"**UTF-8 (8-bit)** β Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
|
|
|
|
| 316 |
|
| 317 |
bits_per = BITS_PER_UNIT[encoding_scheme]
|
| 318 |
|
| 319 |
+
if encoding_scheme == "6-bit LNS":
|
| 320 |
supported = ''.join(voyager_table[i] for i in range(len(voyager_table)))
|
| 321 |
st.caption(f"Supported characters ({len(voyager_table)}): `{supported}`")
|
| 322 |
|
|
|
|
| 335 |
binary_concat = ''.join(map(str, binary_labels))
|
| 336 |
|
| 337 |
st.markdown("### Output 1 β Binary Labels per Character")
|
| 338 |
+
st.caption(f"Encoding: **{encoding_scheme}** - {bits_per} bits per unit")
|
| 339 |
|
| 340 |
grouped_bits = [binary_labels[i:i + bits_per] for i in range(0, len(binary_labels), bits_per)]
|
| 341 |
scroll_html = (
|
|
|
|
| 345 |
for i, bits in enumerate(grouped_bits):
|
| 346 |
src = source_chars[i] if i < len(source_chars) else "?"
|
| 347 |
enc = display_units[i] if i < len(display_units) else "?"
|
| 348 |
+
if encoding_scheme == "6-bit LNS":
|
| 349 |
scroll_html += f"<div>'{src}' β {bits}</div>"
|
| 350 |
else:
|
| 351 |
scroll_html += f"<div>'{src}' β '{enc}' β {bits}</div>"
|
|
|
|
| 357 |
src = source_chars[i] if i < len(source_chars) else "?"
|
| 358 |
enc = display_units[i] if i < len(display_units) else "?"
|
| 359 |
bit_str = ''.join(map(str, bits))
|
| 360 |
+
if encoding_scheme == "6-bit LNS":
|
| 361 |
per_char_lines.append(f"'{src}' β {bit_str}")
|
| 362 |
else:
|
| 363 |
per_char_lines.append(f"'{src}' β '{enc}' β {bit_str}")
|
|
|
|
| 411 |
["Black & White (1-bit)", "Grayscale (4-bit)"],
|
| 412 |
key="enc_image_type",
|
| 413 |
help=(
|
| 414 |
+
"**Black & White (1-bit)** - Each pixel = 1 bit (0 or 1). Uses a brightness threshold.\n\n"
|
| 415 |
+
"**Grayscale (4-bit)** - Each pixel = 4 bits (0β15 levels). "
|
| 416 |
"Uniform quantization in sRGB/BT.601 luma space. 0 = black, 15 = white. "
|
| 417 |
"Two pixels per byte, high-nibble first; rows top-to-bottom, no row padding."
|
| 418 |
)
|
|
|
|
| 429 |
orig_w, orig_h = img.size
|
| 430 |
aspect = orig_h / orig_w
|
| 431 |
|
| 432 |
+
st.image(img, caption=f"Original (grayscale) - {orig_w}Γ{orig_h} px", use_container_width=True)
|
| 433 |
|
| 434 |
st.markdown("#### βοΈ Resolution")
|
| 435 |
target_width = st.slider(
|
|
|
|
| 456 |
|
| 457 |
binary_matrix = (img_array < threshold).astype(int)
|
| 458 |
|
| 459 |
+
st.markdown("### Preview - Black & White Output")
|
| 460 |
col_prev1, col_prev2 = st.columns(2)
|
| 461 |
with col_prev1:
|
| 462 |
st.image(img_resized, caption=f"Resized grayscale ({target_width}Γ{target_height})", use_container_width=True)
|
|
|
|
| 485 |
key="download_img_binary_txt"
|
| 486 |
)
|
| 487 |
|
| 488 |
+
st.markdown("### Output 2 β Binary Matrix by dimension (Reactions Γ Positions)")
|
| 489 |
columns = [f"Position {i+1}" for i in range(target_width)]
|
| 490 |
df_img = pd.DataFrame(binary_matrix, columns=columns)
|
| 491 |
df_img.insert(0, "Sample", range(1, len(df_img) + 1))
|
|
|
|
| 548 |
gray4_matrix = quantize_to_4bit(img_array)
|
| 549 |
gray8_preview = gray4_to_gray8(gray4_matrix)
|
| 550 |
|
| 551 |
+
st.markdown("### Preview - 4-bit Grayscale (16 levels)")
|
| 552 |
col_prev1, col_prev2 = st.columns(2)
|
| 553 |
with col_prev1:
|
| 554 |
st.image(img_resized, caption=f"Original resized ({target_width}Γ{target_height}, 256 levels)", use_container_width=True)
|
|
|
|
| 740 |
["Black & White (1-bit)", "Grayscale (4-bit)"],
|
| 741 |
key="dec_image_type",
|
| 742 |
help=(
|
| 743 |
+
"**Black & White** - Input is 0/1 binary data. Each value = 1 pixel.\n\n"
|
| 744 |
+
"**Grayscale (4-bit)** - Input is a **value matrix (0β15)**, **binary data** "
|
| 745 |
"(every 4 bits = one pixel), or a packed **.g4 file**."
|
| 746 |
)
|
| 747 |
)
|
|
|
|
| 817 |
display_w = img_width * display_scale
|
| 818 |
display_h = img_height * display_scale
|
| 819 |
pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
|
| 820 |
+
st.image(pil_display, caption=f"Binary image - {img_width}Γ{img_height} (1=black, 0=white)")
|
| 821 |
|
| 822 |
ones = int(bits_matrix.sum())
|
| 823 |
st.markdown(
|
|
|
|
| 861 |
["Value matrix (0β15)", "Binary (4 bits per pixel)", "Packed .g4 file"],
|
| 862 |
key="g4_input_format",
|
| 863 |
help=(
|
| 864 |
+
"**Value matrix** - CSV/XLSX where each cell is a pixel value 0β15. "
|
| 865 |
"Rows = pixel rows, columns = pixel columns.\n\n"
|
| 866 |
+
"**Binary** - 0/1 data where every 4 consecutive bits encode one pixel (0β15).\n\n"
|
| 867 |
+
"**Packed .g4 file** - Binary file with G4 header + packed 4bpp payload "
|
| 868 |
"(two pixels per byte, high-nibble first)."
|
| 869 |
)
|
| 870 |
)
|
|
|
|
| 969 |
display_w = img_width * display_scale
|
| 970 |
display_h = img_height * display_scale
|
| 971 |
pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
|
| 972 |
+
st.image(pil_display, caption=f"4-bit grayscale - {img_width}Γ{img_height} (0=black, 15=white)")
|
| 973 |
|
| 974 |
# Stats
|
| 975 |
unique_vals, counts = np.unique(gray4_matrix, return_counts=True)
|
|
|
|
| 1014 |
st.header("π Data Analytics")
|
| 1015 |
st.markdown("""
|
| 1016 |
Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
|
| 1017 |
+
The file should contain Reactions as rows and position columns with editing values.
|
| 1018 |
This tab provides visualizations **before** any binary labelling.
|
| 1019 |
""")
|
| 1020 |
|
|
|
|
| 1049 |
st.error("No numeric position columns detected.")
|
| 1050 |
st.stop()
|
| 1051 |
|
| 1052 |
+
st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** Reactions.")
|
| 1053 |
|
| 1054 |
pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
|
| 1055 |
|
|
|
|
| 1059 |
total_edited = pos_data.sum(axis=1)
|
| 1060 |
|
| 1061 |
st.markdown("### 1οΈβ£ Raw Data Distribution")
|
| 1062 |
+
st.caption("Visualize editing values across all positions and Reactions - before any binary labelling.")
|
| 1063 |
|
| 1064 |
transform_option = st.selectbox(
|
| 1065 |
"Value transformation:",
|
| 1066 |
+
["Raw (linear)", "log1p", "log1p β log1p"],
|
| 1067 |
index=0,
|
| 1068 |
key="transform_select",
|
| 1069 |
help=(
|
| 1070 |
+
"**Raw** - No transformation.\n\n"
|
| 1071 |
+
"**log1p** - `log(1 + x)`. Compresses high values, spreads low range.\n\n"
|
| 1072 |
+
"**log1p β log1p** - Double log1p. Even stronger compression.\n\n"
|
|
|
|
|
|
|
| 1073 |
)
|
| 1074 |
)
|
| 1075 |
|
| 1076 |
+
# def robust_pos_normalize_log1p(data: pd.DataFrame) -> pd.DataFrame:
|
| 1077 |
+
# logged = np.log1p(data)
|
| 1078 |
+
# result = logged.copy()
|
| 1079 |
+
# for col in result.columns:
|
| 1080 |
+
# med = result[col].median()
|
| 1081 |
+
# q75, q25 = result[col].quantile(0.75), result[col].quantile(0.25)
|
| 1082 |
+
# iqr = q75 - q25
|
| 1083 |
+
# if iqr > 0:
|
| 1084 |
+
# result[col] = (result[col] - med) / iqr
|
| 1085 |
+
# else:
|
| 1086 |
+
# result[col] = result[col] - med
|
| 1087 |
+
# return result
|
| 1088 |
|
| 1089 |
if transform_option == "log1p":
|
| 1090 |
transformed = np.log1p(pos_data)
|
|
|
|
| 1094 |
transformed = np.log1p(np.log1p(pos_data))
|
| 1095 |
value_label = "Editing Value (log1p β log1p)"
|
| 1096 |
transform_tag = "log1p_log1p"
|
| 1097 |
+
# elif transform_option == "log1p β pos. norm.":
|
| 1098 |
+
# transformed = robust_pos_normalize_log1p(pos_data)
|
| 1099 |
+
# value_label = "Editing Value (log1p β pos. norm.)"
|
| 1100 |
+
# transform_tag = "log1p_posnorm"
|
| 1101 |
else:
|
| 1102 |
transformed = pos_data
|
| 1103 |
value_label = "Editing Value"
|
|
|
|
| 1108 |
lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
|
| 1109 |
)
|
| 1110 |
|
| 1111 |
+
st.markdown("#### π Histogram - All Values")
|
| 1112 |
|
| 1113 |
n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
|
| 1114 |
|
|
|
|
| 1135 |
fig2.tight_layout()
|
| 1136 |
st.pyplot(fig2)
|
| 1137 |
|
| 1138 |
+
st.markdown("#### 2οΈβ£ Density Scatter Plot")
|
| 1139 |
st.caption("Each dot = one measurement (sample Γ position). Color = local point density.")
|
| 1140 |
|
| 1141 |
x_vals = melted["Position_idx"].values.astype(float)
|
|
|
|
| 1160 |
cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
|
| 1161 |
ax3.set_xlabel("Position")
|
| 1162 |
ax3.set_ylabel(value_label)
|
| 1163 |
+
ax3.set_title(f"Density Scatter - {value_label} by Position")
|
| 1164 |
ax3.set_xticks(sorted(melted["Position_idx"].unique()))
|
| 1165 |
ax3.grid(alpha=0.2)
|
| 1166 |
fig3.tight_layout()
|
| 1167 |
st.pyplot(fig3)
|
| 1168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1169 |
except Exception as e:
|
| 1170 |
st.error(f"β Error processing file: {e}")
|
| 1171 |
import traceback
|
|
|
|
| 1308 |
|
| 1309 |
if not any(c.lower() == "sample" for c in df.columns):
|
| 1310 |
df.insert(0, "Sample", np.arange(1, len(df) + 1))
|
| 1311 |
+
st.info("`Sample` column missing - automatically generated 1..N.")
|
| 1312 |
|
| 1313 |
position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
|
| 1314 |
if not position_cols:
|
|
|
|
| 1326 |
|
| 1327 |
if "Total edited" not in df.columns:
|
| 1328 |
df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
|
| 1329 |
+
st.info("`Total edited` column missing - calculated automatically as sum of 1s per row.")
|
| 1330 |
|
| 1331 |
st.markdown("#### βοΈ Volume Calculation Settings")
|
| 1332 |
default_total_vol = st.number_input(
|
|
|
|
| 1339 |
if not vol_candidates:
|
| 1340 |
df['Volume per "1"'] = default_total_vol / df["Total edited"].replace(0, np.nan)
|
| 1341 |
df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
|
| 1342 |
+
st.info(f'`Volume per "1"` column missing - calculated automatically as {default_total_vol:.0f} Β΅L (max per input well) / Total edited.')
|
| 1343 |
volume_col = 'Volume per "1"'
|
| 1344 |
else:
|
| 1345 |
volume_col = vol_candidates[0]
|
|
|
|
| 1359 |
|
| 1360 |
st.markdown("### π Preview: Suggested Uniform Layout")
|
| 1361 |
if max_wells_per_source == 0:
|
| 1362 |
+
st.info("No edits detected - nothing to allocate.")
|
| 1363 |
st.stop()
|
| 1364 |
|
| 1365 |
st.write(
|