wenjun99 commited on
Commit
4be5ea2
·
verified ·
1 Parent(s): f7a3263

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -45
app.py CHANGED
@@ -169,9 +169,6 @@ with tab2:
169
  else:
170
  st.info("👆 Upload a file to start the reverse conversion.")
171
 
172
- # --------------------------------------------------
173
- # TAB 3: Pipetting Command Generator
174
- # --------------------------------------------------
175
  # --------------------------------------------------
176
  # TAB 3: Pipetting Command Generator
177
  # --------------------------------------------------
@@ -186,7 +183,8 @@ with tab3:
186
  Upload your sample file (Excel, CSV, or TXT) containing binary mutation data.
187
  The app will:
188
  - Auto-detect or create `Sample`, `Position#`, `Total edited`, and `Volume per "1"` columns
189
- - Calculate total demand per input and suggest a **uniform layout width** (consecutive wells per input)
 
190
  - **Preview** the layout on a plate map (with tooltips)
191
  - After confirmation, generate pipetting commands and a source volume summary
192
  """)
@@ -197,7 +195,7 @@ with tab3:
197
  min_value=10.0, max_value=2000.0, value=160.0, step=10.0
198
  )
199
 
200
- # ---------- Helpers (plate geometry & viz) ----------
201
  ROWS_96 = ["A", "B", "C", "D", "E", "F", "G", "H"]
202
  COLS_96 = list(range(1, 13))
203
 
@@ -211,7 +209,7 @@ with tab3:
211
  yield f"{r}{c}"
212
 
213
  def parse_well_name(well: str):
214
- """Split well name like 'A1' or 'H12' into (row_letter, numeric_col)."""
215
  m = re.match(r"([A-Ha-h])\s*([0-9]+)", str(well).strip())
216
  if not m:
217
  return ("A", 0)
@@ -242,10 +240,7 @@ with tab3:
242
  ]
243
 
244
  def render_plate_map_html(plates_used, well_to_input, max_wells_per_source, inputs_count):
245
- """
246
- Render HTML plates. well_to_input: dict[(plate, well)] = (input_idx, index_within_input_block)
247
- """
248
- # Legend HTML
249
  legend_spans = []
250
  for i in range(1, inputs_count + 1):
251
  color = PALETTE[(i-1) % len(PALETTE)]
@@ -256,7 +251,6 @@ with tab3:
256
  )
257
  legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
258
 
259
- # CSS for grid + tooltip
260
  css = """
261
  <style>
262
  .plate { margin: 10px 0 24px 0; }
@@ -305,40 +299,69 @@ with tab3:
305
  df = pd.read_excel(uploaded)
306
  elif uploaded.name.endswith(".csv"):
307
  df = pd.read_csv(uploaded)
308
- else:
309
- df = pd.read_csv(uploaded, sep="\t", engine="python")
 
 
 
310
 
311
  st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
312
 
 
313
  df.columns = [str(c).strip() for c in df.columns]
 
 
314
  if not any(c.lower() == "sample" for c in df.columns):
315
  df.insert(0, "Sample", np.arange(1, len(df) + 1))
316
  st.info("`Sample` column missing — automatically generated 1..N.")
317
 
318
- position_cols = [c for c in df.columns if re.match(r"(?i)^position\\s*\\d+", c)]
 
319
  if not position_cols:
320
  non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
321
  candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
322
  position_cols = candidate_cols
323
  st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
324
 
 
 
 
 
 
 
325
  df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
 
 
326
  if "Total edited" not in df.columns:
327
  df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
328
- st.info("`Total edited` calculated automatically.")
 
 
 
 
 
 
 
 
329
 
330
  vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
331
  if not vol_candidates:
332
- df['Volume per "1"'] = 64 / df["Total edited"].replace(0, np.nan)
333
- df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
 
334
  volume_col = 'Volume per "1"'
335
  else:
336
  volume_col = vol_candidates[0]
337
 
 
338
  if df[volume_col].max() > max_per_well_ul:
339
- st.error(f"❌ A row exceeds the max per-well cap ({max_per_well_ul} µL).")
 
 
 
340
  st.stop()
341
 
 
342
  vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
343
  total_volume_per_input = [float(vol_per_one_series[df[pos] == 1].sum()) for pos in position_cols]
344
  wells_needed_per_input = [int(ceil(tv / max_per_well_ul)) if tv > 0 else 0 for tv in total_volume_per_input]
@@ -350,17 +373,27 @@ with tab3:
350
  st.info("No edits detected — nothing to allocate.")
351
  st.stop()
352
 
 
 
 
 
 
 
353
  total_wells_needed_uniform = num_inputs * max_wells_per_source
354
  plates_needed = int(ceil(total_wells_needed_uniform / 96)) or 1
355
 
356
- # ✅ Correct well sorting (A1 → A2 → A12)
357
  global_wells = sorted(
358
  build_global_wells_list(plates_needed),
359
- key=lambda x: (x[0], ROWS_96.index(parse_well_name(x[1])[0]), parse_well_name(x[1])[1])
 
 
 
 
360
  )
361
  global_wells = global_wells[:total_wells_needed_uniform]
362
 
363
- # Assign blocks
364
  assigned_wells_map, well_to_input, preview_rows = {}, {}, []
365
  for i in range(1, num_inputs + 1):
366
  start, end = (i - 1) * max_wells_per_source, i * max_wells_per_source
@@ -372,7 +405,7 @@ with tab3:
372
  preview_rows.append({
373
  "Input (Position #)": i,
374
  "Total demand (µL)": round(total_volume_per_input[i-1], 2),
375
- "Wells needed": wells_needed_per_input[i-1],
376
  "Allocated (uniform)": max_wells_per_source,
377
  "Assigned wells": block_str
378
  })
@@ -387,6 +420,7 @@ with tab3:
387
  # --- Generate Commands ---
388
  st.markdown("### ✅ Generate Pipetting Commands")
389
  if st.button("Generate using this layout"):
 
390
  per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
391
  commands, source_volume_totals = [], {}
392
 
@@ -403,33 +437,61 @@ with tab3:
403
  continue
404
  wells_for_input = assigned_wells_map[pos_idx]
405
  cum_list = per_input_well_cum[pos_idx]
 
 
406
  for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
407
  if current_vol + vol_per_one <= max_per_well_ul:
408
- cum_list[j] += vol_per_one
409
- source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0) + vol_per_one
410
- commands.append({
411
- "Input #": pos_idx,
412
- "Source plate": src_plate,
413
- "Source well": src_well,
414
- "Destination plate": dest_plate,
415
- "Destination well": dest_well,
416
- "Volume": round(vol_per_one, 2),
417
- "Tool": tool
418
- })
419
  break
420
 
421
- # Sort commands with numeric logic
422
- commands_df = pd.DataFrame(commands).sort_values(
423
- by=["Input #", "Source plate", "Source well", "Destination plate", "Destination well"],
424
- key=lambda col: col.apply(
425
- lambda v: parse_well_name(v)[1] if col.name.endswith("well") else int(v)
426
- ) if col.name.endswith("well") or col.name in ["Input #", "Source plate", "Destination plate"] else col,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  kind="stable"
428
  )
429
 
 
 
 
 
 
 
430
  st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs.")
431
 
432
- # ✅ Source summary numeric sort
433
  summary_rows = []
434
  for i in range(1, num_inputs + 1):
435
  for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
@@ -439,11 +501,15 @@ with tab3:
439
  "Total volume taken (µL)": round(total, 2),
440
  "Allocated capacity (µL)": round(max_per_well_ul, 2)
441
  })
442
- summary_df = pd.DataFrame(summary_rows).sort_values(
443
- by=["Source", "Source plate", "Source well"],
444
- key=lambda col: col.apply(lambda v: parse_well_name(v)[1]) if col.name == "Source well" else col,
 
 
445
  kind="stable"
446
- )
 
 
447
 
448
  # Display results
449
  st.markdown("### 💧 Pipetting Commands")
@@ -458,4 +524,3 @@ with tab3:
458
  st.error(f"❌ Error processing file: {e}")
459
  else:
460
  st.info("👆 Upload an Excel/CSV/TXT file to start.")
461
-
 
169
  else:
170
  st.info("👆 Upload a file to start the reverse conversion.")
171
 
 
 
 
172
  # --------------------------------------------------
173
  # TAB 3: Pipetting Command Generator
174
  # --------------------------------------------------
 
183
  Upload your sample file (Excel, CSV, or TXT) containing binary mutation data.
184
  The app will:
185
  - Auto-detect or create `Sample`, `Position#`, `Total edited`, and `Volume per "1"` columns
186
+ - Let you set the **Desired total volume per sample (µL)** used to compute `Volume per "1"`
187
+ - Calculate total demand per input and suggest a **uniform layout** (same # consecutive wells per input)
188
  - **Preview** the layout on a plate map (with tooltips)
189
  - After confirmation, generate pipetting commands and a source volume summary
190
  """)
 
195
  min_value=10.0, max_value=2000.0, value=160.0, step=10.0
196
  )
197
 
198
+ # ---------- Helpers (plate geometry, parsing, viz) ----------
199
  ROWS_96 = ["A", "B", "C", "D", "E", "F", "G", "H"]
200
  COLS_96 = list(range(1, 13))
201
 
 
209
  yield f"{r}{c}"
210
 
211
  def parse_well_name(well: str):
212
+ """Split 'A1'/'H12' (row_letter, col_num). Robust to stray spaces."""
213
  m = re.match(r"([A-Ha-h])\s*([0-9]+)", str(well).strip())
214
  if not m:
215
  return ("A", 0)
 
240
  ]
241
 
242
  def render_plate_map_html(plates_used, well_to_input, max_wells_per_source, inputs_count):
243
+ """Fancy HTML plate grids with tooltips."""
 
 
 
244
  legend_spans = []
245
  for i in range(1, inputs_count + 1):
246
  color = PALETTE[(i-1) % len(PALETTE)]
 
251
  )
252
  legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
253
 
 
254
  css = """
255
  <style>
256
  .plate { margin: 10px 0 24px 0; }
 
299
  df = pd.read_excel(uploaded)
300
  elif uploaded.name.endswith(".csv"):
301
  df = pd.read_csv(uploaded)
302
+ else: # TXT (tab-delimited try, else CSV)
303
+ try:
304
+ df = pd.read_csv(uploaded, sep="\t")
305
+ except Exception:
306
+ df = pd.read_csv(uploaded)
307
 
308
  st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
309
 
310
+ # --- Clean column names ---
311
  df.columns = [str(c).strip() for c in df.columns]
312
+
313
+ # --- Ensure Sample column ---
314
  if not any(c.lower() == "sample" for c in df.columns):
315
  df.insert(0, "Sample", np.arange(1, len(df) + 1))
316
  st.info("`Sample` column missing — automatically generated 1..N.")
317
 
318
+ # --- Detect & numerically sort Position columns ---
319
+ position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
320
  if not position_cols:
321
  non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
322
  candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
323
  position_cols = candidate_cols
324
  st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
325
 
326
+ def pos_key(col_name: str):
327
+ m = re.search(r"(\d+)", col_name)
328
+ return int(m.group(1)) if m else 10**9
329
+ position_cols = sorted(position_cols, key=pos_key)
330
+
331
+ # Normalize Position columns to numeric {0,1}
332
  df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
333
+
334
+ # --- Ensure Total edited ---
335
  if "Total edited" not in df.columns:
336
  df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
337
+ st.info("`Total edited` column missing — calculated automatically as sum of 1s per row.")
338
+
339
+ # --- User setting for Volume per "1" calculation ---
340
+ st.markdown("#### ⚙️ Volume Calculation Settings")
341
+ default_total_vol = st.number_input(
342
+ "Desired total volume per sample (µL)",
343
+ min_value=1.0, max_value=10000.0, value=64.0, step=1.0,
344
+ help="Used to compute Volume per '1' as (Desired total volume / Total edited) when not provided."
345
+ )
346
 
347
  vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
348
  if not vol_candidates:
349
+ df['Volume per "1"'] = default_total_vol / df["Total edited"].replace(0, np.nan)
350
+ df['Volume per "1"'] = df['Volume per "1"'].fillna(0) # rows with 0 edits → 0 µL
351
+ st.info(f'`Volume per "1"` column missing — calculated automatically as {default_total_vol:.0f} µL / Total edited.')
352
  volume_col = 'Volume per "1"'
353
  else:
354
  volume_col = vol_candidates[0]
355
 
356
+ # Safety: per-transfer must not exceed per-well cap
357
  if df[volume_col].max() > max_per_well_ul:
358
+ st.error(
359
+ f"❌ At least one row has `Volume per \"1\"` greater than the per-well cap ({max_per_well_ul} µL). "
360
+ "Increase the cap or reduce per-transfer volume."
361
+ )
362
  st.stop()
363
 
364
+ # --- Compute total demand per input ---
365
  vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
366
  total_volume_per_input = [float(vol_per_one_series[df[pos] == 1].sum()) for pos in position_cols]
367
  wells_needed_per_input = [int(ceil(tv / max_per_well_ul)) if tv > 0 else 0 for tv in total_volume_per_input]
 
373
  st.info("No edits detected — nothing to allocate.")
374
  st.stop()
375
 
376
+ st.write(
377
+ f"💡 Suggested layout: **{max_wells_per_source} consecutive wells per input** "
378
+ f"(cap {max_per_well_ul:.0f} µL/well)."
379
+ )
380
+
381
+ # Total wells and plates needed
382
  total_wells_needed_uniform = num_inputs * max_wells_per_source
383
  plates_needed = int(ceil(total_wells_needed_uniform / 96)) or 1
384
 
385
+ # ✅ Correct, robust well ordering for layout
386
  global_wells = sorted(
387
  build_global_wells_list(plates_needed),
388
+ key=lambda x: (
389
+ x[0], # plate
390
+ ROWS_96.index(parse_well_name(x[1])[0]), # row index
391
+ parse_well_name(x[1])[1] # column number
392
+ )
393
  )
394
  global_wells = global_wells[:total_wells_needed_uniform]
395
 
396
+ # Assign uniform blocks to each input
397
  assigned_wells_map, well_to_input, preview_rows = {}, {}, []
398
  for i in range(1, num_inputs + 1):
399
  start, end = (i - 1) * max_wells_per_source, i * max_wells_per_source
 
405
  preview_rows.append({
406
  "Input (Position #)": i,
407
  "Total demand (µL)": round(total_volume_per_input[i-1], 2),
408
+ "Wells needed (actual)": wells_needed_per_input[i-1],
409
  "Allocated (uniform)": max_wells_per_source,
410
  "Assigned wells": block_str
411
  })
 
420
  # --- Generate Commands ---
421
  st.markdown("### ✅ Generate Pipetting Commands")
422
  if st.button("Generate using this layout"):
423
+ # Track per-input per-well usage (µL)
424
  per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
425
  commands, source_volume_totals = [], {}
426
 
 
437
  continue
438
  wells_for_input = assigned_wells_map[pos_idx]
439
  cum_list = per_input_well_cum[pos_idx]
440
+
441
+ chosen = None
442
  for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
443
  if current_vol + vol_per_one <= max_per_well_ul:
444
+ chosen = (j, src_plate, src_well)
 
 
 
 
 
 
 
 
 
 
445
  break
446
 
447
+ if chosen is None:
448
+ st.error(
449
+ f"Allocation exhausted for Input {pos_idx} while creating commands. "
450
+ "Increase the max volume per well or review per-transfer volume."
451
+ )
452
+ st.stop()
453
+
454
+ j, src_plate, src_well = chosen
455
+ cum_list[j] += vol_per_one
456
+ per_input_well_cum[pos_idx] = cum_list
457
+ source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0.0) + vol_per_one
458
+
459
+ commands.append({
460
+ "Input #": pos_idx,
461
+ "Source plate": src_plate,
462
+ "Source well": src_well,
463
+ "Destination plate": dest_plate,
464
+ "Destination well": dest_well,
465
+ "Volume": round(vol_per_one, 2),
466
+ "Tool": tool
467
+ })
468
+
469
+ commands_df = pd.DataFrame(commands)
470
+
471
+ # ✅ Add helper sort columns to ensure Source/Destination wells sort A1→A12, B1→B12, ...
472
+ def row_idx_from_well(w): return ROWS_96.index(parse_well_name(w)[0])
473
+ def col_num_from_well(w): return parse_well_name(w)[1]
474
+
475
+ commands_df["Src_row_idx"] = commands_df["Source well"].apply(row_idx_from_well)
476
+ commands_df["Src_col_num"] = commands_df["Source well"].apply(col_num_from_well)
477
+ commands_df["Dst_row_idx"] = commands_df["Destination well"].apply(row_idx_from_well)
478
+ commands_df["Dst_col_num"] = commands_df["Destination well"].apply(col_num_from_well)
479
+
480
+ commands_df = commands_df.sort_values(
481
+ by=["Input #", "Source plate", "Src_row_idx", "Src_col_num",
482
+ "Destination plate", "Dst_row_idx", "Dst_col_num"],
483
  kind="stable"
484
  )
485
 
486
+ # Drop helper columns & order final columns
487
+ commands_df = commands_df[[
488
+ "Input #", "Source plate", "Source well",
489
+ "Destination plate", "Destination well", "Volume", "Tool"
490
+ ]]
491
+
492
  st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs.")
493
 
494
+ # ✅ Source summary numeric sort by plate → row → col
495
  summary_rows = []
496
  for i in range(1, num_inputs + 1):
497
  for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
 
501
  "Total volume taken (µL)": round(total, 2),
502
  "Allocated capacity (µL)": round(max_per_well_ul, 2)
503
  })
504
+ summary_df = pd.DataFrame(summary_rows)
505
+ summary_df["Src_row_idx"] = summary_df["Source well"].apply(row_idx_from_well)
506
+ summary_df["Src_col_num"] = summary_df["Source well"].apply(col_num_from_well)
507
+ summary_df = summary_df.sort_values(
508
+ by=["Source", "Source plate", "Src_row_idx", "Src_col_num"],
509
  kind="stable"
510
+ )[
511
+ ["Source", "Source plate", "Source well", "Total volume taken (µL)", "Allocated capacity (µL)"]
512
+ ]
513
 
514
  # Display results
515
  st.markdown("### 💧 Pipetting Commands")
 
524
  st.error(f"❌ Error processing file: {e}")
525
  else:
526
  st.info("👆 Upload an Excel/CSV/TXT file to start.")