wenjun99 commited on
Commit
f7a3263
·
verified ·
1 Parent(s): f858297

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -124
app.py CHANGED
@@ -172,7 +172,9 @@ with tab2:
172
  # --------------------------------------------------
173
  # TAB 3: Pipetting Command Generator
174
  # --------------------------------------------------
175
-
 
 
176
  with tab3:
177
  import numpy as np
178
  import pandas as pd
@@ -208,6 +210,13 @@ with tab3:
208
  for c in COLS_96:
209
  yield f"{r}{c}"
210
 
 
 
 
 
 
 
 
211
  def sample_index_to_plate_and_well(sample_idx: int):
212
  """Destination mapping: 96-well plates in reading order, extends to multiple plates."""
213
  plate_num = ((sample_idx - 1) // 96) + 1
@@ -247,7 +256,7 @@ with tab3:
247
  )
248
  legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
249
 
250
- # CSS for grid + tooltip (title attribute works too; we use both)
251
  css = """
252
  <style>
253
  .plate { margin: 10px 0 24px 0; }
@@ -262,15 +271,12 @@ with tab3:
262
  """
263
 
264
  body = [css, legend_html]
265
- # Build each plate
266
  for p in range(1, plates_used + 1):
267
  body.append(f"<div class='plate'><div class='plate-title'>Plate {p}</div>")
268
- # header row
269
  body.append("<div class='grid'>")
270
  body.append("<div class='cell head'></div>")
271
  for c in COLS_96:
272
  body.append(f"<div class='cell head'>{c}</div>")
273
- # rows
274
  for r in ROWS_96:
275
  body.append(f"<div class='cell head'>{r}</div>")
276
  for c in COLS_96:
@@ -288,8 +294,7 @@ with tab3:
288
  else:
289
  cell_html = "<div class='cell'></div>"
290
  body.append(cell_html)
291
- body.append("</div></div>") # grid + plate
292
-
293
  return "".join(body)
294
 
295
  # ---------- Main flow ----------
@@ -300,113 +305,74 @@ with tab3:
300
  df = pd.read_excel(uploaded)
301
  elif uploaded.name.endswith(".csv"):
302
  df = pd.read_csv(uploaded)
303
- else: # TXT (tab-delimited fallback)
304
- try:
305
- df = pd.read_csv(uploaded, sep="\t")
306
- except Exception:
307
- df = pd.read_csv(uploaded)
308
 
309
  st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
310
 
311
- # --- Clean column names ---
312
  df.columns = [str(c).strip() for c in df.columns]
313
-
314
- # --- Ensure Sample column ---
315
  if not any(c.lower() == "sample" for c in df.columns):
316
  df.insert(0, "Sample", np.arange(1, len(df) + 1))
317
  st.info("`Sample` column missing — automatically generated 1..N.")
318
 
319
- # --- Detect Position columns ---
320
- position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
321
  if not position_cols:
322
  non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
323
  candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
324
- if not candidate_cols:
325
- st.error("❌ Could not detect any Position columns.")
326
- st.stop()
327
  position_cols = candidate_cols
328
  st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
329
 
330
- # Normalize Position columns to numeric {0,1}
331
  df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
332
-
333
- # --- Ensure Total edited ---
334
  if "Total edited" not in df.columns:
335
  df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
336
- st.info("`Total edited` column missing — calculated automatically as sum of 1s per row.")
337
 
338
- # --- Ensure Volume per "1" ---
339
  vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
340
  if not vol_candidates:
341
  df['Volume per "1"'] = 64 / df["Total edited"].replace(0, np.nan)
342
- df['Volume per "1"'] = df['Volume per "1"'].fillna(0) # rows with 0 edits → 0 µL
343
- st.info('`Volume per "1"` column missing — calculated automatically as 64 / Total edited.')
344
  volume_col = 'Volume per "1"'
345
  else:
346
  volume_col = vol_candidates[0]
347
 
348
- # Safety: per-transfer must not exceed per-well cap
349
  if df[volume_col].max() > max_per_well_ul:
350
- st.error(
351
- f"❌ At least one row has `Volume per \"1\"` greater than the per-well cap ({max_per_well_ul} µL). "
352
- "Increase the cap or reduce per-transfer volume."
353
- )
354
  st.stop()
355
 
356
- # --- Compute total demand per input ---
357
  vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
358
- total_volume_per_input = []
359
- for pos in position_cols:
360
- mask = df[pos] == 1
361
- total_vol = float(vol_per_one_series[mask].sum())
362
- total_volume_per_input.append(total_vol)
363
-
364
- wells_needed_per_input = [
365
- int(ceil(tv / max_per_well_ul)) if tv > 0 else 0
366
- for tv in total_volume_per_input
367
- ]
368
  num_inputs = len(position_cols)
369
  max_wells_per_source = max(wells_needed_per_input) if wells_needed_per_input else 0
370
 
371
  st.markdown("### 👀 Preview: Suggested Uniform Layout")
372
  if max_wells_per_source == 0:
373
- st.info("No edits detected (all inputs require 0 µL). Nothing to allocate.")
374
  st.stop()
375
 
376
- st.write(
377
- f"💡 Suggested layout: **{max_wells_per_source} consecutive wells per input** "
378
- f"(cap {max_per_well_ul:.0f} µL/well)."
379
- )
380
-
381
- # Total wells and plates needed
382
  total_wells_needed_uniform = num_inputs * max_wells_per_source
383
- plates_needed = int(ceil(total_wells_needed_uniform / 96)) if total_wells_needed_uniform > 0 else 1
384
 
385
- # Global wells list long enough to cover allocation
386
- global_wells = build_global_wells_list(plates_needed) # [(p, 'A1'), ...]
387
  global_wells = sorted(
388
  build_global_wells_list(plates_needed),
389
- key=lambda x: (x[0], ROWS_96.index(x[1][0]), int(x[1][1:]))
390
  )
391
- global_wells = global_wells[:total_wells_needed_uniform] # exact length
392
 
393
- # Assign blocks of size max_wells_per_source per input in order
394
- assigned_wells_map = {} # input_idx (1-based) -> list[(plate, well)]
395
- well_to_input = {} # (plate, well) -> (input_idx, within_block_index 1..max_wells_per_source)
396
- preview_rows = []
397
  for i in range(1, num_inputs + 1):
398
- start = (i - 1) * max_wells_per_source
399
- end = start + max_wells_per_source
400
  block = global_wells[start:end]
401
  assigned_wells_map[i] = block
402
  for j, (p, w) in enumerate(block, start=1):
403
  well_to_input[(p, w)] = (i, j)
404
- # Make a readable block string
405
  block_str = ", ".join([f"P{p}:{w}" for (p, w) in block])
406
  preview_rows.append({
407
  "Input (Position #)": i,
408
  "Total demand (µL)": round(total_volume_per_input[i-1], 2),
409
- "Wells needed (actual)": wells_needed_per_input[i-1],
410
  "Allocated (uniform)": max_wells_per_source,
411
  "Assigned wells": block_str
412
  })
@@ -414,20 +380,15 @@ with tab3:
414
  preview_df = pd.DataFrame(preview_rows)
415
  st.dataframe(preview_df, use_container_width=True, height=300)
416
 
417
- # Fancy Plate Map with tooltips
418
  st.markdown("#### Plate Map (hover cells for details)")
419
  plate_html = render_plate_map_html(plates_needed, well_to_input, max_wells_per_source, num_inputs)
420
  st.markdown(plate_html, unsafe_allow_html=True)
421
 
422
  # --- Generate Commands ---
423
  st.markdown("### ✅ Generate Pipetting Commands")
424
- generate = st.button("Generate using this layout")
425
-
426
- if generate:
427
- # Track per-input per-well usage (µL)
428
  per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
429
- commands = []
430
- source_volume_totals = {} # (plate, well) -> total µL drawn
431
 
432
  for _, row in df.iterrows():
433
  sample_id = int(row["Sample"])
@@ -440,90 +401,58 @@ with tab3:
440
  for pos_idx, col in enumerate(position_cols, start=1):
441
  if int(row[col]) != 1:
442
  continue
443
-
444
  wells_for_input = assigned_wells_map[pos_idx]
445
  cum_list = per_input_well_cum[pos_idx]
446
-
447
- chosen = None
448
  for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
449
  if current_vol + vol_per_one <= max_per_well_ul:
450
- chosen = (j, src_plate, src_well)
 
 
 
 
 
 
 
 
 
 
451
  break
452
 
453
- if chosen is None:
454
- # With uniform pre-allocation this shouldn't happen unless extreme rounding / cap too small
455
- st.error(
456
- f"Allocation exhausted for Input {pos_idx} while creating commands. "
457
- "Increase the max volume per well or review per-transfer volume."
458
- )
459
- st.stop()
460
-
461
- j, src_plate, src_well = chosen
462
- cum_list[j] += vol_per_one
463
- per_input_well_cum[pos_idx] = cum_list
464
- source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0.0) + vol_per_one
465
-
466
- commands.append({
467
- "Input #": pos_idx,
468
- "Source plate": src_plate,
469
- "Source well": src_well,
470
- "Destination plate": dest_plate,
471
- "Destination well": dest_well,
472
- "Volume": round(vol_per_one, 2),
473
- "Tool": tool
474
- })
475
-
476
- # Compile results
477
  commands_df = pd.DataFrame(commands).sort_values(
478
  by=["Input #", "Source plate", "Source well", "Destination plate", "Destination well"],
479
- key=lambda col: col.astype(int) if col.name == "Input #" else col,
 
 
480
  kind="stable"
481
  )
482
 
483
- commands_df = commands_df[["Input #", "Source plate", "Source well",
484
- "Destination plate", "Destination well", "Volume", "Tool"]]
485
 
486
- # Source summary (include allocated capacity per well)
487
  summary_rows = []
488
  for i in range(1, num_inputs + 1):
489
  for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
490
  total = source_volume_totals.get((p, w), 0.0)
491
  summary_rows.append({
492
- "Source": i,
493
- "Source plate": p,
494
- "Source well": w,
495
  "Total volume taken (µL)": round(total, 2),
496
  "Allocated capacity (µL)": round(max_per_well_ul, 2)
497
  })
498
- summary_df = pd.DataFrame(summary_rows)
499
-
500
  summary_df = pd.DataFrame(summary_rows).sort_values(
501
  by=["Source", "Source plate", "Source well"],
502
- key=lambda col: col.astype(int) if col.name == "Source" else col,
503
  kind="stable"
504
  )
505
 
506
-
507
- used_plates = max([p for wells in assigned_wells_map.values() for (p, _) in wells]) if assigned_wells_map else 1
508
- st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs using {used_plates} plate(s).")
509
-
510
  st.markdown("### 💧 Pipetting Commands")
511
  st.dataframe(commands_df, use_container_width=True, height=400)
512
- st.download_button(
513
- "⬇️ Download Commands CSV",
514
- commands_df.to_csv(index=False),
515
- "pipetting_commands.csv",
516
- mime="text/csv"
517
- )
518
 
519
  st.markdown("### 📊 Source Volume Summary")
520
  st.dataframe(summary_df, use_container_width=True, height=400)
521
- st.download_button(
522
- "⬇️ Download Source Summary CSV",
523
- summary_df.to_csv(index=False),
524
- "source_volume_summary.csv",
525
- mime="text/csv"
526
- )
527
 
528
  except Exception as e:
529
  st.error(f"❌ Error processing file: {e}")
 
172
  # --------------------------------------------------
173
  # TAB 3: Pipetting Command Generator
174
  # --------------------------------------------------
175
+ # --------------------------------------------------
176
+ # TAB 3: Pipetting Command Generator
177
+ # --------------------------------------------------
178
  with tab3:
179
  import numpy as np
180
  import pandas as pd
 
210
  for c in COLS_96:
211
  yield f"{r}{c}"
212
 
213
+ def parse_well_name(well: str):
214
+ """Split well name like 'A1' or 'H12' into (row_letter, numeric_col)."""
215
+ m = re.match(r"([A-Ha-h])\s*([0-9]+)", str(well).strip())
216
+ if not m:
217
+ return ("A", 0)
218
+ return (m.group(1).upper(), int(m.group(2)))
219
+
220
  def sample_index_to_plate_and_well(sample_idx: int):
221
  """Destination mapping: 96-well plates in reading order, extends to multiple plates."""
222
  plate_num = ((sample_idx - 1) // 96) + 1
 
256
  )
257
  legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
258
 
259
+ # CSS for grid + tooltip
260
  css = """
261
  <style>
262
  .plate { margin: 10px 0 24px 0; }
 
271
  """
272
 
273
  body = [css, legend_html]
 
274
  for p in range(1, plates_used + 1):
275
  body.append(f"<div class='plate'><div class='plate-title'>Plate {p}</div>")
 
276
  body.append("<div class='grid'>")
277
  body.append("<div class='cell head'></div>")
278
  for c in COLS_96:
279
  body.append(f"<div class='cell head'>{c}</div>")
 
280
  for r in ROWS_96:
281
  body.append(f"<div class='cell head'>{r}</div>")
282
  for c in COLS_96:
 
294
  else:
295
  cell_html = "<div class='cell'></div>"
296
  body.append(cell_html)
297
+ body.append("</div></div>")
 
298
  return "".join(body)
299
 
300
  # ---------- Main flow ----------
 
305
  df = pd.read_excel(uploaded)
306
  elif uploaded.name.endswith(".csv"):
307
  df = pd.read_csv(uploaded)
308
+ else:
309
+ df = pd.read_csv(uploaded, sep="\t", engine="python")
 
 
 
310
 
311
  st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
312
 
 
313
  df.columns = [str(c).strip() for c in df.columns]
 
 
314
  if not any(c.lower() == "sample" for c in df.columns):
315
  df.insert(0, "Sample", np.arange(1, len(df) + 1))
316
  st.info("`Sample` column missing — automatically generated 1..N.")
317
 
318
+ position_cols = [c for c in df.columns if re.match(r"(?i)^position\\s*\\d+", c)]
 
319
  if not position_cols:
320
  non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
321
  candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
 
 
 
322
  position_cols = candidate_cols
323
  st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
324
 
 
325
  df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
 
 
326
  if "Total edited" not in df.columns:
327
  df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
328
+ st.info("`Total edited` calculated automatically.")
329
 
 
330
  vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
331
  if not vol_candidates:
332
  df['Volume per "1"'] = 64 / df["Total edited"].replace(0, np.nan)
333
+ df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
 
334
  volume_col = 'Volume per "1"'
335
  else:
336
  volume_col = vol_candidates[0]
337
 
 
338
  if df[volume_col].max() > max_per_well_ul:
339
+ st.error(f"❌ A row exceeds the max per-well cap ({max_per_well_ul} µL).")
 
 
 
340
  st.stop()
341
 
 
342
  vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
343
+ total_volume_per_input = [float(vol_per_one_series[df[pos] == 1].sum()) for pos in position_cols]
344
+ wells_needed_per_input = [int(ceil(tv / max_per_well_ul)) if tv > 0 else 0 for tv in total_volume_per_input]
 
 
 
 
 
 
 
 
345
  num_inputs = len(position_cols)
346
  max_wells_per_source = max(wells_needed_per_input) if wells_needed_per_input else 0
347
 
348
  st.markdown("### 👀 Preview: Suggested Uniform Layout")
349
  if max_wells_per_source == 0:
350
+ st.info("No edits detected nothing to allocate.")
351
  st.stop()
352
 
 
 
 
 
 
 
353
  total_wells_needed_uniform = num_inputs * max_wells_per_source
354
+ plates_needed = int(ceil(total_wells_needed_uniform / 96)) or 1
355
 
356
+ # Correct well sorting (A1 A2 → A12)
 
357
  global_wells = sorted(
358
  build_global_wells_list(plates_needed),
359
+ key=lambda x: (x[0], ROWS_96.index(parse_well_name(x[1])[0]), parse_well_name(x[1])[1])
360
  )
361
+ global_wells = global_wells[:total_wells_needed_uniform]
362
 
363
+ # Assign blocks
364
+ assigned_wells_map, well_to_input, preview_rows = {}, {}, []
 
 
365
  for i in range(1, num_inputs + 1):
366
+ start, end = (i - 1) * max_wells_per_source, i * max_wells_per_source
 
367
  block = global_wells[start:end]
368
  assigned_wells_map[i] = block
369
  for j, (p, w) in enumerate(block, start=1):
370
  well_to_input[(p, w)] = (i, j)
 
371
  block_str = ", ".join([f"P{p}:{w}" for (p, w) in block])
372
  preview_rows.append({
373
  "Input (Position #)": i,
374
  "Total demand (µL)": round(total_volume_per_input[i-1], 2),
375
+ "Wells needed": wells_needed_per_input[i-1],
376
  "Allocated (uniform)": max_wells_per_source,
377
  "Assigned wells": block_str
378
  })
 
380
  preview_df = pd.DataFrame(preview_rows)
381
  st.dataframe(preview_df, use_container_width=True, height=300)
382
 
 
383
  st.markdown("#### Plate Map (hover cells for details)")
384
  plate_html = render_plate_map_html(plates_needed, well_to_input, max_wells_per_source, num_inputs)
385
  st.markdown(plate_html, unsafe_allow_html=True)
386
 
387
  # --- Generate Commands ---
388
  st.markdown("### ✅ Generate Pipetting Commands")
389
+ if st.button("Generate using this layout"):
 
 
 
390
  per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
391
+ commands, source_volume_totals = [], {}
 
392
 
393
  for _, row in df.iterrows():
394
  sample_id = int(row["Sample"])
 
401
  for pos_idx, col in enumerate(position_cols, start=1):
402
  if int(row[col]) != 1:
403
  continue
 
404
  wells_for_input = assigned_wells_map[pos_idx]
405
  cum_list = per_input_well_cum[pos_idx]
 
 
406
  for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
407
  if current_vol + vol_per_one <= max_per_well_ul:
408
+ cum_list[j] += vol_per_one
409
+ source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0) + vol_per_one
410
+ commands.append({
411
+ "Input #": pos_idx,
412
+ "Source plate": src_plate,
413
+ "Source well": src_well,
414
+ "Destination plate": dest_plate,
415
+ "Destination well": dest_well,
416
+ "Volume": round(vol_per_one, 2),
417
+ "Tool": tool
418
+ })
419
  break
420
 
421
+ # Sort commands with numeric logic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  commands_df = pd.DataFrame(commands).sort_values(
423
  by=["Input #", "Source plate", "Source well", "Destination plate", "Destination well"],
424
+ key=lambda col: col.apply(
425
+ lambda v: parse_well_name(v)[1] if col.name.endswith("well") else int(v)
426
+ ) if col.name.endswith("well") or col.name in ["Input #", "Source plate", "Destination plate"] else col,
427
  kind="stable"
428
  )
429
 
430
+ st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs.")
 
431
 
432
+ # Source summary numeric sort
433
  summary_rows = []
434
  for i in range(1, num_inputs + 1):
435
  for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
436
  total = source_volume_totals.get((p, w), 0.0)
437
  summary_rows.append({
438
+ "Source": i, "Source plate": p, "Source well": w,
 
 
439
  "Total volume taken (µL)": round(total, 2),
440
  "Allocated capacity (µL)": round(max_per_well_ul, 2)
441
  })
 
 
442
  summary_df = pd.DataFrame(summary_rows).sort_values(
443
  by=["Source", "Source plate", "Source well"],
444
+ key=lambda col: col.apply(lambda v: parse_well_name(v)[1]) if col.name == "Source well" else col,
445
  kind="stable"
446
  )
447
 
448
+ # Display results
 
 
 
449
  st.markdown("### 💧 Pipetting Commands")
450
  st.dataframe(commands_df, use_container_width=True, height=400)
451
+ st.download_button("⬇️ Download Commands CSV", commands_df.to_csv(index=False), "pipetting_commands.csv", mime="text/csv")
 
 
 
 
 
452
 
453
  st.markdown("### 📊 Source Volume Summary")
454
  st.dataframe(summary_df, use_container_width=True, height=400)
455
+ st.download_button("⬇️ Download Source Summary CSV", summary_df.to_csv(index=False), "source_volume_summary.csv", mime="text/csv")
 
 
 
 
 
456
 
457
  except Exception as e:
458
  st.error(f"❌ Error processing file: {e}")