bitconverter3

Sleeping

App Files Files Community

wenjun99 commited on Oct 23, 2025

Commit

4be5ea2

verified ·

1 Parent(s): f7a3263

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -45

app.py CHANGED Viewed

@@ -169,9 +169,6 @@ with tab2:
     else:
         st.info("👆 Upload a file to start the reverse conversion.")
-# --------------------------------------------------
-# TAB 3: Pipetting Command Generator
-# --------------------------------------------------
 # --------------------------------------------------
 # TAB 3: Pipetting Command Generator
 # --------------------------------------------------
@@ -186,7 +183,8 @@ with tab3:
     Upload your sample file (Excel, CSV, or TXT) containing binary mutation data.
     The app will:
     - Auto-detect or create `Sample`, `Position#`, `Total edited`, and `Volume per "1"` columns
-    - Calculate total demand per input and suggest a **uniform layout width** (consecutive wells per input)
     - **Preview** the layout on a plate map (with tooltips)
     - After confirmation, generate pipetting commands and a source volume summary
     """)
@@ -197,7 +195,7 @@ with tab3:
         min_value=10.0, max_value=2000.0, value=160.0, step=10.0
     )
-    # ---------- Helpers (plate geometry & viz) ----------
     ROWS_96 = ["A", "B", "C", "D", "E", "F", "G", "H"]
     COLS_96 = list(range(1, 13))
@@ -211,7 +209,7 @@ with tab3:
                 yield f"{r}{c}"
     def parse_well_name(well: str):
-        """Split well name like 'A1' or 'H12' into (row_letter, numeric_col)."""
         m = re.match(r"([A-Ha-h])\s*([0-9]+)", str(well).strip())
         if not m:
             return ("A", 0)
@@ -242,10 +240,7 @@ with tab3:
     ]
     def render_plate_map_html(plates_used, well_to_input, max_wells_per_source, inputs_count):
-        """
-        Render HTML plates. well_to_input: dict[(plate, well)] = (input_idx, index_within_input_block)
-        """
-        # Legend HTML
         legend_spans = []
         for i in range(1, inputs_count + 1):
             color = PALETTE[(i-1) % len(PALETTE)]
@@ -256,7 +251,6 @@ with tab3:
             )
         legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
-        # CSS for grid + tooltip
         css = """
         <style>
         .plate { margin: 10px 0 24px 0; }
@@ -305,40 +299,69 @@ with tab3:
                 df = pd.read_excel(uploaded)
             elif uploaded.name.endswith(".csv"):
                 df = pd.read_csv(uploaded)
-            else:
-                df = pd.read_csv(uploaded, sep="\t", engine="python")
             st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
             df.columns = [str(c).strip() for c in df.columns]
             if not any(c.lower() == "sample" for c in df.columns):
                 df.insert(0, "Sample", np.arange(1, len(df) + 1))
                 st.info("`Sample` column missing — automatically generated 1..N.")
-            position_cols = [c for c in df.columns if re.match(r"(?i)^position\\s*\\d+", c)]
             if not position_cols:
                 non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
                 candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
                 position_cols = candidate_cols
                 st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
             df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
             if "Total edited" not in df.columns:
                 df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
-                st.info("`Total edited` calculated automatically.")
             vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
             if not vol_candidates:
-                df['Volume per "1"'] = 64 / df["Total edited"].replace(0, np.nan)
-                df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
                 volume_col = 'Volume per "1"'
             else:
                 volume_col = vol_candidates[0]
             if df[volume_col].max() > max_per_well_ul:
-                st.error(f"❌ A row exceeds the max per-well cap ({max_per_well_ul} µL).")
                 st.stop()
             vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
             total_volume_per_input = [float(vol_per_one_series[df[pos] == 1].sum()) for pos in position_cols]
             wells_needed_per_input = [int(ceil(tv / max_per_well_ul)) if tv > 0 else 0 for tv in total_volume_per_input]
@@ -350,17 +373,27 @@ with tab3:
                 st.info("No edits detected — nothing to allocate.")
                 st.stop()
             total_wells_needed_uniform = num_inputs * max_wells_per_source
             plates_needed = int(ceil(total_wells_needed_uniform / 96)) or 1
-            # ✅ Correct well sorting (A1 → A2 → A12)
             global_wells = sorted(
                 build_global_wells_list(plates_needed),
-                key=lambda x: (x[0], ROWS_96.index(parse_well_name(x[1])[0]), parse_well_name(x[1])[1])
             )
             global_wells = global_wells[:total_wells_needed_uniform]
-            # Assign blocks
             assigned_wells_map, well_to_input, preview_rows = {}, {}, []
             for i in range(1, num_inputs + 1):
                 start, end = (i - 1) * max_wells_per_source, i * max_wells_per_source
@@ -372,7 +405,7 @@ with tab3:
                 preview_rows.append({
                     "Input (Position #)": i,
                     "Total demand (µL)": round(total_volume_per_input[i-1], 2),
-                    "Wells needed": wells_needed_per_input[i-1],
                     "Allocated (uniform)": max_wells_per_source,
                     "Assigned wells": block_str
                 })
@@ -387,6 +420,7 @@ with tab3:
             # --- Generate Commands ---
             st.markdown("### ✅ Generate Pipetting Commands")
             if st.button("Generate using this layout"):
                 per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
                 commands, source_volume_totals = [], {}
@@ -403,33 +437,61 @@ with tab3:
                             continue
                         wells_for_input = assigned_wells_map[pos_idx]
                         cum_list = per_input_well_cum[pos_idx]
                         for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
                             if current_vol + vol_per_one <= max_per_well_ul:
-                                cum_list[j] += vol_per_one
-                                source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0) + vol_per_one
-                                commands.append({
-                                    "Input #": pos_idx,
-                                    "Source plate": src_plate,
-                                    "Source well": src_well,
-                                    "Destination plate": dest_plate,
-                                    "Destination well": dest_well,
-                                    "Volume": round(vol_per_one, 2),
-                                    "Tool": tool
-                                })
                                 break
-                # ✅ Sort commands with numeric logic
-                commands_df = pd.DataFrame(commands).sort_values(
-                    by=["Input #", "Source plate", "Source well", "Destination plate", "Destination well"],
-                    key=lambda col: col.apply(
-                        lambda v: parse_well_name(v)[1] if col.name.endswith("well") else int(v)
-                    ) if col.name.endswith("well") or col.name in ["Input #", "Source plate", "Destination plate"] else col,
                     kind="stable"
                 )
                 st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs.")
-                # ✅ Source summary numeric sort
                 summary_rows = []
                 for i in range(1, num_inputs + 1):
                     for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
@@ -439,11 +501,15 @@ with tab3:
                             "Total volume taken (µL)": round(total, 2),
                             "Allocated capacity (µL)": round(max_per_well_ul, 2)
                         })
-                summary_df = pd.DataFrame(summary_rows).sort_values(
-                    by=["Source", "Source plate", "Source well"],
-                    key=lambda col: col.apply(lambda v: parse_well_name(v)[1]) if col.name == "Source well" else col,
                     kind="stable"
-                )
                 # Display results
                 st.markdown("### 💧 Pipetting Commands")
@@ -458,4 +524,3 @@ with tab3:
             st.error(f"❌ Error processing file: {e}")
     else:
         st.info("👆 Upload an Excel/CSV/TXT file to start.")

     else:
         st.info("👆 Upload a file to start the reverse conversion.")
 # --------------------------------------------------
 # TAB 3: Pipetting Command Generator
 # --------------------------------------------------
     Upload your sample file (Excel, CSV, or TXT) containing binary mutation data.
     The app will:
     - Auto-detect or create `Sample`, `Position#`, `Total edited`, and `Volume per "1"` columns
+    - Let you set the **Desired total volume per sample (µL)** used to compute `Volume per "1"`
+    - Calculate total demand per input and suggest a **uniform layout** (same # consecutive wells per input)
     - **Preview** the layout on a plate map (with tooltips)
     - After confirmation, generate pipetting commands and a source volume summary
     """)
         min_value=10.0, max_value=2000.0, value=160.0, step=10.0
     )
+    # ---------- Helpers (plate geometry, parsing, viz) ----------
     ROWS_96 = ["A", "B", "C", "D", "E", "F", "G", "H"]
     COLS_96 = list(range(1, 13))
                 yield f"{r}{c}"
     def parse_well_name(well: str):
+        """Split 'A1'/'H12' → (row_letter, col_num). Robust to stray spaces."""
         m = re.match(r"([A-Ha-h])\s*([0-9]+)", str(well).strip())
         if not m:
             return ("A", 0)
     ]
     def render_plate_map_html(plates_used, well_to_input, max_wells_per_source, inputs_count):
+        """Fancy HTML plate grids with tooltips."""
         legend_spans = []
         for i in range(1, inputs_count + 1):
             color = PALETTE[(i-1) % len(PALETTE)]
             )
         legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
         css = """
         <style>
         .plate { margin: 10px 0 24px 0; }
                 df = pd.read_excel(uploaded)
             elif uploaded.name.endswith(".csv"):
                 df = pd.read_csv(uploaded)
+            else:  # TXT (tab-delimited try, else CSV)
+                try:
+                    df = pd.read_csv(uploaded, sep="\t")
+                except Exception:
+                    df = pd.read_csv(uploaded)
             st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
+            # --- Clean column names ---
             df.columns = [str(c).strip() for c in df.columns]
+            # --- Ensure Sample column ---
             if not any(c.lower() == "sample" for c in df.columns):
                 df.insert(0, "Sample", np.arange(1, len(df) + 1))
                 st.info("`Sample` column missing — automatically generated 1..N.")
+            # --- Detect & numerically sort Position columns ---
+            position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
             if not position_cols:
                 non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
                 candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
                 position_cols = candidate_cols
                 st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
+            def pos_key(col_name: str):
+                m = re.search(r"(\d+)", col_name)
+                return int(m.group(1)) if m else 10**9
+            position_cols = sorted(position_cols, key=pos_key)
+            # Normalize Position columns to numeric {0,1}
             df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
+            # --- Ensure Total edited ---
             if "Total edited" not in df.columns:
                 df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
+                st.info("`Total edited` column missing — calculated automatically as sum of 1s per row.")
+            # --- User setting for Volume per "1" calculation ---
+            st.markdown("#### ⚙️ Volume Calculation Settings")
+            default_total_vol = st.number_input(
+                "Desired total volume per sample (µL)",
+                min_value=1.0, max_value=10000.0, value=64.0, step=1.0,
+                help="Used to compute Volume per '1' as (Desired total volume / Total edited) when not provided."
+            )
             vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
             if not vol_candidates:
+                df['Volume per "1"'] = default_total_vol / df["Total edited"].replace(0, np.nan)
+                df['Volume per "1"'] = df['Volume per "1"'].fillna(0)  # rows with 0 edits → 0 µL
+                st.info(f'`Volume per "1"` column missing — calculated automatically as {default_total_vol:.0f} µL / Total edited.')
                 volume_col = 'Volume per "1"'
             else:
                 volume_col = vol_candidates[0]
+            # Safety: per-transfer must not exceed per-well cap
             if df[volume_col].max() > max_per_well_ul:
+                st.error(
+                    f"❌ At least one row has `Volume per \"1\"` greater than the per-well cap ({max_per_well_ul} µL). "
+                    "Increase the cap or reduce per-transfer volume."
+                )
                 st.stop()
+            # --- Compute total demand per input ---
             vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
             total_volume_per_input = [float(vol_per_one_series[df[pos] == 1].sum()) for pos in position_cols]
             wells_needed_per_input = [int(ceil(tv / max_per_well_ul)) if tv > 0 else 0 for tv in total_volume_per_input]
                 st.info("No edits detected — nothing to allocate.")
                 st.stop()
+            st.write(
+                f"💡 Suggested layout: **{max_wells_per_source} consecutive wells per input** "
+                f"(cap {max_per_well_ul:.0f} µL/well)."
+            )
+            # Total wells and plates needed
             total_wells_needed_uniform = num_inputs * max_wells_per_source
             plates_needed = int(ceil(total_wells_needed_uniform / 96)) or 1
+            # ✅ Correct, robust well ordering for layout
             global_wells = sorted(
                 build_global_wells_list(plates_needed),
+                key=lambda x: (
+                    x[0],  # plate
+                    ROWS_96.index(parse_well_name(x[1])[0]),  # row index
+                    parse_well_name(x[1])[1]  # column number
+                )
             )
             global_wells = global_wells[:total_wells_needed_uniform]
+            # Assign uniform blocks to each input
             assigned_wells_map, well_to_input, preview_rows = {}, {}, []
             for i in range(1, num_inputs + 1):
                 start, end = (i - 1) * max_wells_per_source, i * max_wells_per_source
                 preview_rows.append({
                     "Input (Position #)": i,
                     "Total demand (µL)": round(total_volume_per_input[i-1], 2),
+                    "Wells needed (actual)": wells_needed_per_input[i-1],
                     "Allocated (uniform)": max_wells_per_source,
                     "Assigned wells": block_str
                 })
             # --- Generate Commands ---
             st.markdown("### ✅ Generate Pipetting Commands")
             if st.button("Generate using this layout"):
+                # Track per-input per-well usage (µL)
                 per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
                 commands, source_volume_totals = [], {}
                             continue
                         wells_for_input = assigned_wells_map[pos_idx]
                         cum_list = per_input_well_cum[pos_idx]
+                        chosen = None
                         for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
                             if current_vol + vol_per_one <= max_per_well_ul:
+                                chosen = (j, src_plate, src_well)
                                 break
+                        if chosen is None:
+                            st.error(
+                                f"Allocation exhausted for Input {pos_idx} while creating commands. "
+                                "Increase the max volume per well or review per-transfer volume."
+                            )
+                            st.stop()
+                        j, src_plate, src_well = chosen
+                        cum_list[j] += vol_per_one
+                        per_input_well_cum[pos_idx] = cum_list
+                        source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0.0) + vol_per_one
+                        commands.append({
+                            "Input #": pos_idx,
+                            "Source plate": src_plate,
+                            "Source well": src_well,
+                            "Destination plate": dest_plate,
+                            "Destination well": dest_well,
+                            "Volume": round(vol_per_one, 2),
+                            "Tool": tool
+                        })
+                commands_df = pd.DataFrame(commands)
+                # ✅ Add helper sort columns to ensure Source/Destination wells sort A1→A12, B1→B12, ...
+                def row_idx_from_well(w): return ROWS_96.index(parse_well_name(w)[0])
+                def col_num_from_well(w): return parse_well_name(w)[1]
+                commands_df["Src_row_idx"] = commands_df["Source well"].apply(row_idx_from_well)
+                commands_df["Src_col_num"] = commands_df["Source well"].apply(col_num_from_well)
+                commands_df["Dst_row_idx"] = commands_df["Destination well"].apply(row_idx_from_well)
+                commands_df["Dst_col_num"] = commands_df["Destination well"].apply(col_num_from_well)
+                commands_df = commands_df.sort_values(
+                    by=["Input #", "Source plate", "Src_row_idx", "Src_col_num",
+                        "Destination plate", "Dst_row_idx", "Dst_col_num"],
                     kind="stable"
                 )
+                # Drop helper columns & order final columns
+                commands_df = commands_df[[
+                    "Input #", "Source plate", "Source well",
+                    "Destination plate", "Destination well", "Volume", "Tool"
+                ]]
                 st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs.")
+                # ✅ Source summary numeric sort by plate → row → col
                 summary_rows = []
                 for i in range(1, num_inputs + 1):
                     for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
                             "Total volume taken (µL)": round(total, 2),
                             "Allocated capacity (µL)": round(max_per_well_ul, 2)
                         })
+                summary_df = pd.DataFrame(summary_rows)
+                summary_df["Src_row_idx"] = summary_df["Source well"].apply(row_idx_from_well)
+                summary_df["Src_col_num"] = summary_df["Source well"].apply(col_num_from_well)
+                summary_df = summary_df.sort_values(
+                    by=["Source", "Source plate", "Src_row_idx", "Src_col_num"],
                     kind="stable"
+                )[
+                    ["Source", "Source plate", "Source well", "Total volume taken (µL)", "Allocated capacity (µL)"]
+                ]
                 # Display results
                 st.markdown("### 💧 Pipetting Commands")
             st.error(f"❌ Error processing file: {e}")
     else:
         st.info("👆 Upload an Excel/CSV/TXT file to start.")