wenjun99 commited on
Commit
b45976a
Β·
verified Β·
1 Parent(s): dec936f

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +53 -79
src/app.py CHANGED
@@ -20,10 +20,10 @@ st.title("Bitconverter")
20
  # =========================
21
  # Encoding Schemes
22
  # =========================
23
- ENCODING_OPTIONS = ["Voyager 6-bit", "Base64 (6-bit)", "ASCII (7-bit)", "UTF-8 (8-bit)"]
24
 
25
  BITS_PER_UNIT = {
26
- "Voyager 6-bit": 6,
27
  "Base64 (6-bit)": 6,
28
  "ASCII (7-bit)": 7,
29
  "UTF-8 (8-bit)": 8,
@@ -39,7 +39,7 @@ voyager_table = {
39
  'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
40
  '3', '4', '5', '6', '7', '8', '9', '.', ',', '(',
41
  ')','+', '-', '*', '/', '=', '$', '!', ':', '%',
42
- '"', '#', '@', "'", '?', '&'
43
  ])
44
  }
45
  reverse_voyager_table = {v: k for k, v in voyager_table.items()}
@@ -176,7 +176,7 @@ def encode_to_binary(text: str, scheme: str) -> tuple[list[int], list[str], list
176
  - display_units: the encoded representation (Base64 symbol, hex byte, ASCII code, Voyager char)
177
  - source_chars: the original text character each chunk maps to
178
  """
179
- if scheme == "Voyager 6-bit":
180
  bits = []
181
  for char in text:
182
  val = reverse_voyager_table.get(char.upper(), 0)
@@ -233,7 +233,7 @@ def encode_to_binary(text: str, scheme: str) -> tuple[list[int], list[str], list
233
  # Decoding Functions
234
  # =========================
235
  def decode_from_binary(bits: list[int], scheme: str) -> str:
236
- if scheme == "Voyager 6-bit":
237
  chars = []
238
  for i in range(0, len(bits), 6):
239
  chunk = bits[i:i + 6]
@@ -307,7 +307,7 @@ with tab1:
307
  index=0,
308
  key="enc_scheme",
309
  help=(
310
- "**Voyager 6-bit** – Custom 56-character table (A-Z, 0-9, punctuation). 6 bits/char.\n\n"
311
  "**Base64 (6-bit)** – Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
312
  "**ASCII (7-bit)** – Standard 7-bit ASCII. 7 bits/char.\n\n"
313
  "**UTF-8 (8-bit)** – Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
@@ -316,7 +316,7 @@ with tab1:
316
 
317
  bits_per = BITS_PER_UNIT[encoding_scheme]
318
 
319
- if encoding_scheme == "Voyager 6-bit":
320
  supported = ''.join(voyager_table[i] for i in range(len(voyager_table)))
321
  st.caption(f"Supported characters ({len(voyager_table)}): `{supported}`")
322
 
@@ -335,7 +335,7 @@ with tab1:
335
  binary_concat = ''.join(map(str, binary_labels))
336
 
337
  st.markdown("### Output 1 – Binary Labels per Character")
338
- st.caption(f"Encoding: **{encoding_scheme}** β€” {bits_per} bits per unit")
339
 
340
  grouped_bits = [binary_labels[i:i + bits_per] for i in range(0, len(binary_labels), bits_per)]
341
  scroll_html = (
@@ -345,7 +345,7 @@ with tab1:
345
  for i, bits in enumerate(grouped_bits):
346
  src = source_chars[i] if i < len(source_chars) else "?"
347
  enc = display_units[i] if i < len(display_units) else "?"
348
- if encoding_scheme == "Voyager 6-bit":
349
  scroll_html += f"<div>'{src}' β†’ {bits}</div>"
350
  else:
351
  scroll_html += f"<div>'{src}' β†’ '{enc}' β†’ {bits}</div>"
@@ -357,7 +357,7 @@ with tab1:
357
  src = source_chars[i] if i < len(source_chars) else "?"
358
  enc = display_units[i] if i < len(display_units) else "?"
359
  bit_str = ''.join(map(str, bits))
360
- if encoding_scheme == "Voyager 6-bit":
361
  per_char_lines.append(f"'{src}' β†’ {bit_str}")
362
  else:
363
  per_char_lines.append(f"'{src}' β†’ '{enc}' β†’ {bit_str}")
@@ -411,8 +411,8 @@ with tab1:
411
  ["Black & White (1-bit)", "Grayscale (4-bit)"],
412
  key="enc_image_type",
413
  help=(
414
- "**Black & White (1-bit)** β€” Each pixel = 1 bit (0 or 1). Uses a brightness threshold.\n\n"
415
- "**Grayscale (4-bit)** β€” Each pixel = 4 bits (0–15 levels). "
416
  "Uniform quantization in sRGB/BT.601 luma space. 0 = black, 15 = white. "
417
  "Two pixels per byte, high-nibble first; rows top-to-bottom, no row padding."
418
  )
@@ -429,7 +429,7 @@ with tab1:
429
  orig_w, orig_h = img.size
430
  aspect = orig_h / orig_w
431
 
432
- st.image(img, caption=f"Original (grayscale) β€” {orig_w}Γ—{orig_h} px", use_container_width=True)
433
 
434
  st.markdown("#### βš™οΈ Resolution")
435
  target_width = st.slider(
@@ -456,7 +456,7 @@ with tab1:
456
 
457
  binary_matrix = (img_array < threshold).astype(int)
458
 
459
- st.markdown("### Preview β€” Black & White Output")
460
  col_prev1, col_prev2 = st.columns(2)
461
  with col_prev1:
462
  st.image(img_resized, caption=f"Resized grayscale ({target_width}Γ—{target_height})", use_container_width=True)
@@ -485,7 +485,7 @@ with tab1:
485
  key="download_img_binary_txt"
486
  )
487
 
488
- st.markdown("### Output 2 – Binary Matrix by dimension (Samples Γ— Positions)")
489
  columns = [f"Position {i+1}" for i in range(target_width)]
490
  df_img = pd.DataFrame(binary_matrix, columns=columns)
491
  df_img.insert(0, "Sample", range(1, len(df_img) + 1))
@@ -548,7 +548,7 @@ with tab1:
548
  gray4_matrix = quantize_to_4bit(img_array)
549
  gray8_preview = gray4_to_gray8(gray4_matrix)
550
 
551
- st.markdown("### Preview β€” 4-bit Grayscale (16 levels)")
552
  col_prev1, col_prev2 = st.columns(2)
553
  with col_prev1:
554
  st.image(img_resized, caption=f"Original resized ({target_width}Γ—{target_height}, 256 levels)", use_container_width=True)
@@ -740,8 +740,8 @@ with tab2:
740
  ["Black & White (1-bit)", "Grayscale (4-bit)"],
741
  key="dec_image_type",
742
  help=(
743
- "**Black & White** β€” Input is 0/1 binary data. Each value = 1 pixel.\n\n"
744
- "**Grayscale (4-bit)** β€” Input is a **value matrix (0–15)**, **binary data** "
745
  "(every 4 bits = one pixel), or a packed **.g4 file**."
746
  )
747
  )
@@ -817,7 +817,7 @@ with tab2:
817
  display_w = img_width * display_scale
818
  display_h = img_height * display_scale
819
  pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
820
- st.image(pil_display, caption=f"Binary image β€” {img_width}Γ—{img_height} (1=black, 0=white)")
821
 
822
  ones = int(bits_matrix.sum())
823
  st.markdown(
@@ -861,10 +861,10 @@ with tab2:
861
  ["Value matrix (0–15)", "Binary (4 bits per pixel)", "Packed .g4 file"],
862
  key="g4_input_format",
863
  help=(
864
- "**Value matrix** β€” CSV/XLSX where each cell is a pixel value 0–15. "
865
  "Rows = pixel rows, columns = pixel columns.\n\n"
866
- "**Binary** β€” 0/1 data where every 4 consecutive bits encode one pixel (0–15).\n\n"
867
- "**Packed .g4 file** β€” Binary file with G4 header + packed 4bpp payload "
868
  "(two pixels per byte, high-nibble first)."
869
  )
870
  )
@@ -969,7 +969,7 @@ with tab2:
969
  display_w = img_width * display_scale
970
  display_h = img_height * display_scale
971
  pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
972
- st.image(pil_display, caption=f"4-bit grayscale β€” {img_width}Γ—{img_height} (0=black, 15=white)")
973
 
974
  # Stats
975
  unique_vals, counts = np.unique(gray4_matrix, return_counts=True)
@@ -1014,7 +1014,7 @@ with tab3:
1014
  st.header("πŸ“Š Data Analytics")
1015
  st.markdown("""
1016
  Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
1017
- The file should contain samples as rows and position columns with editing values.
1018
  This tab provides visualizations **before** any binary labelling.
1019
  """)
1020
 
@@ -1049,7 +1049,7 @@ with tab3:
1049
  st.error("No numeric position columns detected.")
1050
  st.stop()
1051
 
1052
- st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** samples.")
1053
 
1054
  pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
1055
 
@@ -1059,34 +1059,32 @@ with tab3:
1059
  total_edited = pos_data.sum(axis=1)
1060
 
1061
  st.markdown("### 1️⃣ Raw Data Distribution")
1062
- st.caption("Visualize editing values across all positions and samples β€” before any binary labelling.")
1063
 
1064
  transform_option = st.selectbox(
1065
  "Value transformation:",
1066
- ["Raw (linear)", "log1p", "log1p β†’ log1p", "log1p β†’ pos. norm."],
1067
  index=0,
1068
  key="transform_select",
1069
  help=(
1070
- "**Raw** β€” No transformation.\n\n"
1071
- "**log1p** β€” `log(1 + x)`. Compresses high values, spreads low range.\n\n"
1072
- "**log1p β†’ log1p** β€” Double log1p. Even stronger compression.\n\n"
1073
- "**log1p β†’ pos. norm.** β€” log1p then robust per-position normalization "
1074
- "(median / IQR scaling per position column)."
1075
  )
1076
  )
1077
 
1078
- def robust_pos_normalize_log1p(data: pd.DataFrame) -> pd.DataFrame:
1079
- logged = np.log1p(data)
1080
- result = logged.copy()
1081
- for col in result.columns:
1082
- med = result[col].median()
1083
- q75, q25 = result[col].quantile(0.75), result[col].quantile(0.25)
1084
- iqr = q75 - q25
1085
- if iqr > 0:
1086
- result[col] = (result[col] - med) / iqr
1087
- else:
1088
- result[col] = result[col] - med
1089
- return result
1090
 
1091
  if transform_option == "log1p":
1092
  transformed = np.log1p(pos_data)
@@ -1096,10 +1094,10 @@ with tab3:
1096
  transformed = np.log1p(np.log1p(pos_data))
1097
  value_label = "Editing Value (log1p β†’ log1p)"
1098
  transform_tag = "log1p_log1p"
1099
- elif transform_option == "log1p β†’ pos. norm.":
1100
- transformed = robust_pos_normalize_log1p(pos_data)
1101
- value_label = "Editing Value (log1p β†’ pos. norm.)"
1102
- transform_tag = "log1p_posnorm"
1103
  else:
1104
  transformed = pos_data
1105
  value_label = "Editing Value"
@@ -1110,7 +1108,7 @@ with tab3:
1110
  lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
1111
  )
1112
 
1113
- st.markdown("#### πŸ“Š Histogram β€” All Values")
1114
 
1115
  n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
1116
 
@@ -1137,7 +1135,7 @@ with tab3:
1137
  fig2.tight_layout()
1138
  st.pyplot(fig2)
1139
 
1140
- st.markdown("#### 2️⃣ Density Scatter Plot (FACS-style)")
1141
  st.caption("Each dot = one measurement (sample Γ— position). Color = local point density.")
1142
 
1143
  x_vals = melted["Position_idx"].values.astype(float)
@@ -1162,36 +1160,12 @@ with tab3:
1162
  cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
1163
  ax3.set_xlabel("Position")
1164
  ax3.set_ylabel(value_label)
1165
- ax3.set_title(f"Density Scatter β€” Position vs. {value_label}")
1166
  ax3.set_xticks(sorted(melted["Position_idx"].unique()))
1167
  ax3.grid(alpha=0.2)
1168
  fig3.tight_layout()
1169
  st.pyplot(fig3)
1170
 
1171
- st.markdown("#### 3️⃣ 2D Density Heatmap")
1172
- st.caption("Binned heatmap of editing values by position β€” similar to a FACS density plot.")
1173
-
1174
- y_bins = st.slider("Vertical bins:", min_value=20, max_value=150, value=60, key="heatmap_ybins")
1175
-
1176
- positions_unique = sorted(melted["Position_idx"].unique())
1177
- n_positions = len(positions_unique)
1178
-
1179
- fig4, ax4 = plt.subplots(figsize=(12, 6))
1180
- h = ax4.hist2d(
1181
- x_vals, y_vals,
1182
- bins=[n_positions, y_bins],
1183
- cmap="jet",
1184
- norm=mcolors.LogNorm() if melted["Value"].max() > 0 else None,
1185
- )
1186
- fig4.colorbar(h[3], ax=ax4, label="Count (log scale)")
1187
- ax4.set_xlabel("Position")
1188
- ax4.set_ylabel(value_label)
1189
- ax4.set_title(f"2D Density Heatmap β€” Position vs. {value_label}")
1190
- ax4.set_xticks(positions_unique)
1191
- ax4.grid(alpha=0.15)
1192
- fig4.tight_layout()
1193
- st.pyplot(fig4)
1194
-
1195
  except Exception as e:
1196
  st.error(f"❌ Error processing file: {e}")
1197
  import traceback
@@ -1334,7 +1308,7 @@ with tab4:
1334
 
1335
  if not any(c.lower() == "sample" for c in df.columns):
1336
  df.insert(0, "Sample", np.arange(1, len(df) + 1))
1337
- st.info("`Sample` column missing β€” automatically generated 1..N.")
1338
 
1339
  position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
1340
  if not position_cols:
@@ -1352,7 +1326,7 @@ with tab4:
1352
 
1353
  if "Total edited" not in df.columns:
1354
  df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
1355
- st.info("`Total edited` column missing β€” calculated automatically as sum of 1s per row.")
1356
 
1357
  st.markdown("#### βš™οΈ Volume Calculation Settings")
1358
  default_total_vol = st.number_input(
@@ -1365,7 +1339,7 @@ with tab4:
1365
  if not vol_candidates:
1366
  df['Volume per "1"'] = default_total_vol / df["Total edited"].replace(0, np.nan)
1367
  df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
1368
- st.info(f'`Volume per "1"` column missing β€” calculated automatically as {default_total_vol:.0f} Β΅L (max per input well) / Total edited.')
1369
  volume_col = 'Volume per "1"'
1370
  else:
1371
  volume_col = vol_candidates[0]
@@ -1385,7 +1359,7 @@ with tab4:
1385
 
1386
  st.markdown("### πŸ‘€ Preview: Suggested Uniform Layout")
1387
  if max_wells_per_source == 0:
1388
- st.info("No edits detected β€” nothing to allocate.")
1389
  st.stop()
1390
 
1391
  st.write(
 
20
  # =========================
21
  # Encoding Schemes
22
  # =========================
23
+ ENCODING_OPTIONS = ["6-bit LNS", "Base64 (6-bit)", "ASCII (7-bit)", "UTF-8 (8-bit)"]
24
 
25
  BITS_PER_UNIT = {
26
+ "6-bit LNS": 6,
27
  "Base64 (6-bit)": 6,
28
  "ASCII (7-bit)": 7,
29
  "UTF-8 (8-bit)": 8,
 
39
  'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
40
  '3', '4', '5', '6', '7', '8', '9', '.', ',', '(',
41
  ')','+', '-', '*', '/', '=', '$', '!', ':', '%',
42
+ '"', '#', '@', "'", '?', '&', '(Image)'
43
  ])
44
  }
45
  reverse_voyager_table = {v: k for k, v in voyager_table.items()}
 
176
  - display_units: the encoded representation (Base64 symbol, hex byte, ASCII code, Voyager char)
177
  - source_chars: the original text character each chunk maps to
178
  """
179
+ if scheme == "6-bit LNS":
180
  bits = []
181
  for char in text:
182
  val = reverse_voyager_table.get(char.upper(), 0)
 
233
  # Decoding Functions
234
  # =========================
235
  def decode_from_binary(bits: list[int], scheme: str) -> str:
236
+ if scheme == "6-bit LNS":
237
  chars = []
238
  for i in range(0, len(bits), 6):
239
  chunk = bits[i:i + 6]
 
307
  index=0,
308
  key="enc_scheme",
309
  help=(
310
+ "**6-bit LNS** – Custom 56-character table (A-Z, 0-9, punctuation). 6 bits/char.\n\n"
311
  "**Base64 (6-bit)** – Standard Base64 encoding of UTF-8 bytes. 6 bits/symbol.\n\n"
312
  "**ASCII (7-bit)** – Standard 7-bit ASCII. 7 bits/char.\n\n"
313
  "**UTF-8 (8-bit)** – Full UTF-8 byte encoding. 8 bits/byte. Supports all Unicode."
 
316
 
317
  bits_per = BITS_PER_UNIT[encoding_scheme]
318
 
319
+ if encoding_scheme == "6-bit LNS":
320
  supported = ''.join(voyager_table[i] for i in range(len(voyager_table)))
321
  st.caption(f"Supported characters ({len(voyager_table)}): `{supported}`")
322
 
 
335
  binary_concat = ''.join(map(str, binary_labels))
336
 
337
  st.markdown("### Output 1 – Binary Labels per Character")
338
+ st.caption(f"Encoding: **{encoding_scheme}** - {bits_per} bits per unit")
339
 
340
  grouped_bits = [binary_labels[i:i + bits_per] for i in range(0, len(binary_labels), bits_per)]
341
  scroll_html = (
 
345
  for i, bits in enumerate(grouped_bits):
346
  src = source_chars[i] if i < len(source_chars) else "?"
347
  enc = display_units[i] if i < len(display_units) else "?"
348
+ if encoding_scheme == "6-bit LNS":
349
  scroll_html += f"<div>'{src}' β†’ {bits}</div>"
350
  else:
351
  scroll_html += f"<div>'{src}' β†’ '{enc}' β†’ {bits}</div>"
 
357
  src = source_chars[i] if i < len(source_chars) else "?"
358
  enc = display_units[i] if i < len(display_units) else "?"
359
  bit_str = ''.join(map(str, bits))
360
+ if encoding_scheme == "6-bit LNS":
361
  per_char_lines.append(f"'{src}' β†’ {bit_str}")
362
  else:
363
  per_char_lines.append(f"'{src}' β†’ '{enc}' β†’ {bit_str}")
 
411
  ["Black & White (1-bit)", "Grayscale (4-bit)"],
412
  key="enc_image_type",
413
  help=(
414
+ "**Black & White (1-bit)** - Each pixel = 1 bit (0 or 1). Uses a brightness threshold.\n\n"
415
+ "**Grayscale (4-bit)** - Each pixel = 4 bits (0–15 levels). "
416
  "Uniform quantization in sRGB/BT.601 luma space. 0 = black, 15 = white. "
417
  "Two pixels per byte, high-nibble first; rows top-to-bottom, no row padding."
418
  )
 
429
  orig_w, orig_h = img.size
430
  aspect = orig_h / orig_w
431
 
432
+ st.image(img, caption=f"Original (grayscale) - {orig_w}Γ—{orig_h} px", use_container_width=True)
433
 
434
  st.markdown("#### βš™οΈ Resolution")
435
  target_width = st.slider(
 
456
 
457
  binary_matrix = (img_array < threshold).astype(int)
458
 
459
+ st.markdown("### Preview - Black & White Output")
460
  col_prev1, col_prev2 = st.columns(2)
461
  with col_prev1:
462
  st.image(img_resized, caption=f"Resized grayscale ({target_width}Γ—{target_height})", use_container_width=True)
 
485
  key="download_img_binary_txt"
486
  )
487
 
488
+ st.markdown("### Output 2 – Binary Matrix by dimension (Reactions Γ— Positions)")
489
  columns = [f"Position {i+1}" for i in range(target_width)]
490
  df_img = pd.DataFrame(binary_matrix, columns=columns)
491
  df_img.insert(0, "Sample", range(1, len(df_img) + 1))
 
548
  gray4_matrix = quantize_to_4bit(img_array)
549
  gray8_preview = gray4_to_gray8(gray4_matrix)
550
 
551
+ st.markdown("### Preview - 4-bit Grayscale (16 levels)")
552
  col_prev1, col_prev2 = st.columns(2)
553
  with col_prev1:
554
  st.image(img_resized, caption=f"Original resized ({target_width}Γ—{target_height}, 256 levels)", use_container_width=True)
 
740
  ["Black & White (1-bit)", "Grayscale (4-bit)"],
741
  key="dec_image_type",
742
  help=(
743
+ "**Black & White** - Input is 0/1 binary data. Each value = 1 pixel.\n\n"
744
+ "**Grayscale (4-bit)** - Input is a **value matrix (0–15)**, **binary data** "
745
  "(every 4 bits = one pixel), or a packed **.g4 file**."
746
  )
747
  )
 
817
  display_w = img_width * display_scale
818
  display_h = img_height * display_scale
819
  pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
820
+ st.image(pil_display, caption=f"Binary image - {img_width}Γ—{img_height} (1=black, 0=white)")
821
 
822
  ones = int(bits_matrix.sum())
823
  st.markdown(
 
861
  ["Value matrix (0–15)", "Binary (4 bits per pixel)", "Packed .g4 file"],
862
  key="g4_input_format",
863
  help=(
864
+ "**Value matrix** - CSV/XLSX where each cell is a pixel value 0–15. "
865
  "Rows = pixel rows, columns = pixel columns.\n\n"
866
+ "**Binary** - 0/1 data where every 4 consecutive bits encode one pixel (0–15).\n\n"
867
+ "**Packed .g4 file** - Binary file with G4 header + packed 4bpp payload "
868
  "(two pixels per byte, high-nibble first)."
869
  )
870
  )
 
969
  display_w = img_width * display_scale
970
  display_h = img_height * display_scale
971
  pil_display = pil_img.resize((display_w, display_h), Image.NEAREST)
972
+ st.image(pil_display, caption=f"4-bit grayscale - {img_width}Γ—{img_height} (0=black, 15=white)")
973
 
974
  # Stats
975
  unique_vals, counts = np.unique(gray4_matrix, return_counts=True)
 
1014
  st.header("πŸ“Š Data Analytics")
1015
  st.markdown("""
1016
  Upload your sample data file (Excel or CSV) for a quick exploratory assessment of the editing rates distribution.
1017
+ The file should contain Reactions as rows and position columns with editing values.
1018
  This tab provides visualizations **before** any binary labelling.
1019
  """)
1020
 
 
1049
  st.error("No numeric position columns detected.")
1050
  st.stop()
1051
 
1052
+ st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** Reactions.")
1053
 
1054
  pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
1055
 
 
1059
  total_edited = pos_data.sum(axis=1)
1060
 
1061
  st.markdown("### 1️⃣ Raw Data Distribution")
1062
+ st.caption("Visualize editing values across all positions and Reactions - before any binary labelling.")
1063
 
1064
  transform_option = st.selectbox(
1065
  "Value transformation:",
1066
+ ["Raw (linear)", "log1p", "log1p β†’ log1p"],
1067
  index=0,
1068
  key="transform_select",
1069
  help=(
1070
+ "**Raw** - No transformation.\n\n"
1071
+ "**log1p** - `log(1 + x)`. Compresses high values, spreads low range.\n\n"
1072
+ "**log1p β†’ log1p** - Double log1p. Even stronger compression.\n\n"
 
 
1073
  )
1074
  )
1075
 
1076
+ # def robust_pos_normalize_log1p(data: pd.DataFrame) -> pd.DataFrame:
1077
+ # logged = np.log1p(data)
1078
+ # result = logged.copy()
1079
+ # for col in result.columns:
1080
+ # med = result[col].median()
1081
+ # q75, q25 = result[col].quantile(0.75), result[col].quantile(0.25)
1082
+ # iqr = q75 - q25
1083
+ # if iqr > 0:
1084
+ # result[col] = (result[col] - med) / iqr
1085
+ # else:
1086
+ # result[col] = result[col] - med
1087
+ # return result
1088
 
1089
  if transform_option == "log1p":
1090
  transformed = np.log1p(pos_data)
 
1094
  transformed = np.log1p(np.log1p(pos_data))
1095
  value_label = "Editing Value (log1p β†’ log1p)"
1096
  transform_tag = "log1p_log1p"
1097
+ # elif transform_option == "log1p β†’ pos. norm.":
1098
+ # transformed = robust_pos_normalize_log1p(pos_data)
1099
+ # value_label = "Editing Value (log1p β†’ pos. norm.)"
1100
+ # transform_tag = "log1p_posnorm"
1101
  else:
1102
  transformed = pos_data
1103
  value_label = "Editing Value"
 
1108
  lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
1109
  )
1110
 
1111
+ st.markdown("#### πŸ“Š Histogram - All Values")
1112
 
1113
  n_bins = st.number_input("Number of bins:", min_value=10, max_value=300, value=80, step=10, key="hist_bins")
1114
 
 
1135
  fig2.tight_layout()
1136
  st.pyplot(fig2)
1137
 
1138
+ st.markdown("#### 2️⃣ Density Scatter Plot")
1139
  st.caption("Each dot = one measurement (sample Γ— position). Color = local point density.")
1140
 
1141
  x_vals = melted["Position_idx"].values.astype(float)
 
1160
  cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
1161
  ax3.set_xlabel("Position")
1162
  ax3.set_ylabel(value_label)
1163
+ ax3.set_title(f"Density Scatter - {value_label} by Position")
1164
  ax3.set_xticks(sorted(melted["Position_idx"].unique()))
1165
  ax3.grid(alpha=0.2)
1166
  fig3.tight_layout()
1167
  st.pyplot(fig3)
1168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1169
  except Exception as e:
1170
  st.error(f"❌ Error processing file: {e}")
1171
  import traceback
 
1308
 
1309
  if not any(c.lower() == "sample" for c in df.columns):
1310
  df.insert(0, "Sample", np.arange(1, len(df) + 1))
1311
+ st.info("`Sample` column missing - automatically generated 1..N.")
1312
 
1313
  position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
1314
  if not position_cols:
 
1326
 
1327
  if "Total edited" not in df.columns:
1328
  df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
1329
+ st.info("`Total edited` column missing - calculated automatically as sum of 1s per row.")
1330
 
1331
  st.markdown("#### βš™οΈ Volume Calculation Settings")
1332
  default_total_vol = st.number_input(
 
1339
  if not vol_candidates:
1340
  df['Volume per "1"'] = default_total_vol / df["Total edited"].replace(0, np.nan)
1341
  df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
1342
+ st.info(f'`Volume per "1"` column missing - calculated automatically as {default_total_vol:.0f} Β΅L (max per input well) / Total edited.')
1343
  volume_col = 'Volume per "1"'
1344
  else:
1345
  volume_col = vol_candidates[0]
 
1359
 
1360
  st.markdown("### πŸ‘€ Preview: Suggested Uniform Layout")
1361
  if max_wells_per_source == 0:
1362
+ st.info("No edits detected - nothing to allocate.")
1363
  st.stop()
1364
 
1365
  st.write(