bitconverter3

Sleeping

App Files Files Community

wenjun99 commited on Oct 23, 2025

Commit

f7a3263

verified ·

1 Parent(s): f858297

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -124

app.py CHANGED Viewed

@@ -172,7 +172,9 @@ with tab2:
 # --------------------------------------------------
 # TAB 3: Pipetting Command Generator
 # --------------------------------------------------
 with tab3:
     import numpy as np
     import pandas as pd
@@ -208,6 +210,13 @@ with tab3:
             for c in COLS_96:
                 yield f"{r}{c}"
     def sample_index_to_plate_and_well(sample_idx: int):
         """Destination mapping: 96-well plates in reading order, extends to multiple plates."""
         plate_num = ((sample_idx - 1) // 96) + 1
@@ -247,7 +256,7 @@ with tab3:
             )
         legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
-        # CSS for grid + tooltip (title attribute works too; we use both)
         css = """
         <style>
         .plate { margin: 10px 0 24px 0; }
@@ -262,15 +271,12 @@ with tab3:
         """
         body = [css, legend_html]
-        # Build each plate
         for p in range(1, plates_used + 1):
             body.append(f"<div class='plate'><div class='plate-title'>Plate {p}</div>")
-            # header row
             body.append("<div class='grid'>")
             body.append("<div class='cell head'></div>")
             for c in COLS_96:
                 body.append(f"<div class='cell head'>{c}</div>")
-            # rows
             for r in ROWS_96:
                 body.append(f"<div class='cell head'>{r}</div>")
                 for c in COLS_96:
@@ -288,8 +294,7 @@ with tab3:
                     else:
                         cell_html = "<div class='cell'></div>"
                     body.append(cell_html)
-            body.append("</div></div>")  # grid + plate
         return "".join(body)
     # ---------- Main flow ----------
@@ -300,113 +305,74 @@ with tab3:
                 df = pd.read_excel(uploaded)
             elif uploaded.name.endswith(".csv"):
                 df = pd.read_csv(uploaded)
-            else:  # TXT (tab-delimited fallback)
-                try:
-                    df = pd.read_csv(uploaded, sep="\t")
-                except Exception:
-                    df = pd.read_csv(uploaded)
             st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
-            # --- Clean column names ---
             df.columns = [str(c).strip() for c in df.columns]
-            # --- Ensure Sample column ---
             if not any(c.lower() == "sample" for c in df.columns):
                 df.insert(0, "Sample", np.arange(1, len(df) + 1))
                 st.info("`Sample` column missing — automatically generated 1..N.")
-            # --- Detect Position columns ---
-            position_cols = [c for c in df.columns if re.match(r"(?i)^position\s*\d+", c)]
             if not position_cols:
                 non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
                 candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
-                if not candidate_cols:
-                    st.error("❌ Could not detect any Position columns.")
-                    st.stop()
                 position_cols = candidate_cols
                 st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
-            # Normalize Position columns to numeric {0,1}
             df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
-            # --- Ensure Total edited ---
             if "Total edited" not in df.columns:
                 df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
-                st.info("`Total edited` column missing — calculated automatically as sum of 1s per row.")
-            # --- Ensure Volume per "1" ---
             vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
             if not vol_candidates:
                 df['Volume per "1"'] = 64 / df["Total edited"].replace(0, np.nan)
-                df['Volume per "1"'] = df['Volume per "1"'].fillna(0)  # rows with 0 edits → 0 µL
-                st.info('`Volume per "1"` column missing — calculated automatically as 64 / Total edited.')
                 volume_col = 'Volume per "1"'
             else:
                 volume_col = vol_candidates[0]
-            # Safety: per-transfer must not exceed per-well cap
             if df[volume_col].max() > max_per_well_ul:
-                st.error(
-                    f"❌ At least one row has `Volume per \"1\"` greater than the per-well cap ({max_per_well_ul} µL). "
-                    "Increase the cap or reduce per-transfer volume."
-                )
                 st.stop()
-            # --- Compute total demand per input ---
             vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
-            total_volume_per_input = []
-            for pos in position_cols:
-                mask = df[pos] == 1
-                total_vol = float(vol_per_one_series[mask].sum())
-                total_volume_per_input.append(total_vol)
-            wells_needed_per_input = [
-                int(ceil(tv / max_per_well_ul)) if tv > 0 else 0
-                for tv in total_volume_per_input
-            ]
             num_inputs = len(position_cols)
             max_wells_per_source = max(wells_needed_per_input) if wells_needed_per_input else 0
             st.markdown("### 👀 Preview: Suggested Uniform Layout")
             if max_wells_per_source == 0:
-                st.info("No edits detected (all inputs require 0 µL). Nothing to allocate.")
                 st.stop()
-            st.write(
-                f"💡 Suggested layout: **{max_wells_per_source} consecutive wells per input** "
-                f"(cap {max_per_well_ul:.0f} µL/well)."
-            )
-            # Total wells and plates needed
             total_wells_needed_uniform = num_inputs * max_wells_per_source
-            plates_needed = int(ceil(total_wells_needed_uniform / 96)) if total_wells_needed_uniform > 0 else 1
-            # Global wells list long enough to cover allocation
-            global_wells = build_global_wells_list(plates_needed)  # [(p, 'A1'), ...]
             global_wells = sorted(
                 build_global_wells_list(plates_needed),
-                key=lambda x: (x[0], ROWS_96.index(x[1][0]), int(x[1][1:]))
             )
-            global_wells = global_wells[:total_wells_needed_uniform]  # exact length
-            # Assign blocks of size max_wells_per_source per input in order
-            assigned_wells_map = {}  # input_idx (1-based) -> list[(plate, well)]
-            well_to_input = {}       # (plate, well) -> (input_idx, within_block_index 1..max_wells_per_source)
-            preview_rows = []
             for i in range(1, num_inputs + 1):
-                start = (i - 1) * max_wells_per_source
-                end = start + max_wells_per_source
                 block = global_wells[start:end]
                 assigned_wells_map[i] = block
                 for j, (p, w) in enumerate(block, start=1):
                     well_to_input[(p, w)] = (i, j)
-                # Make a readable block string
                 block_str = ", ".join([f"P{p}:{w}" for (p, w) in block])
                 preview_rows.append({
                     "Input (Position #)": i,
                     "Total demand (µL)": round(total_volume_per_input[i-1], 2),
-                    "Wells needed (actual)": wells_needed_per_input[i-1],
                     "Allocated (uniform)": max_wells_per_source,
                     "Assigned wells": block_str
                 })
@@ -414,20 +380,15 @@ with tab3:
             preview_df = pd.DataFrame(preview_rows)
             st.dataframe(preview_df, use_container_width=True, height=300)
-            # Fancy Plate Map with tooltips
             st.markdown("#### Plate Map (hover cells for details)")
             plate_html = render_plate_map_html(plates_needed, well_to_input, max_wells_per_source, num_inputs)
             st.markdown(plate_html, unsafe_allow_html=True)
             # --- Generate Commands ---
             st.markdown("### ✅ Generate Pipetting Commands")
-            generate = st.button("Generate using this layout")
-            if generate:
-                # Track per-input per-well usage (µL)
                 per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
-                commands = []
-                source_volume_totals = {}  # (plate, well) -> total µL drawn
                 for _, row in df.iterrows():
                     sample_id = int(row["Sample"])
@@ -440,90 +401,58 @@ with tab3:
                     for pos_idx, col in enumerate(position_cols, start=1):
                         if int(row[col]) != 1:
                             continue
                         wells_for_input = assigned_wells_map[pos_idx]
                         cum_list = per_input_well_cum[pos_idx]
-                        chosen = None
                         for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
                             if current_vol + vol_per_one <= max_per_well_ul:
-                                chosen = (j, src_plate, src_well)
                                 break
-                        if chosen is None:
-                            # With uniform pre-allocation this shouldn't happen unless extreme rounding / cap too small
-                            st.error(
-                                f"Allocation exhausted for Input {pos_idx} while creating commands. "
-                                "Increase the max volume per well or review per-transfer volume."
-                            )
-                            st.stop()
-                        j, src_plate, src_well = chosen
-                        cum_list[j] += vol_per_one
-                        per_input_well_cum[pos_idx] = cum_list
-                        source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0.0) + vol_per_one
-                        commands.append({
-                            "Input #": pos_idx,
-                            "Source plate": src_plate,
-                            "Source well": src_well,
-                            "Destination plate": dest_plate,
-                            "Destination well": dest_well,
-                            "Volume": round(vol_per_one, 2),
-                            "Tool": tool
-                        })
-                # Compile results
                 commands_df = pd.DataFrame(commands).sort_values(
                     by=["Input #", "Source plate", "Source well", "Destination plate", "Destination well"],
-                    key=lambda col: col.astype(int) if col.name == "Input #" else col,
                     kind="stable"
                 )
-                commands_df = commands_df[["Input #", "Source plate", "Source well",
-                                           "Destination plate", "Destination well", "Volume", "Tool"]]
-                # Source summary (include allocated capacity per well)
                 summary_rows = []
                 for i in range(1, num_inputs + 1):
                     for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
                         total = source_volume_totals.get((p, w), 0.0)
                         summary_rows.append({
-                            "Source": i,
-                            "Source plate": p,
-                            "Source well": w,
                             "Total volume taken (µL)": round(total, 2),
                             "Allocated capacity (µL)": round(max_per_well_ul, 2)
                         })
-                summary_df = pd.DataFrame(summary_rows)
                 summary_df = pd.DataFrame(summary_rows).sort_values(
                     by=["Source", "Source plate", "Source well"],
-                    key=lambda col: col.astype(int) if col.name == "Source" else col,
                     kind="stable"
                 )
-                used_plates = max([p for wells in assigned_wells_map.values() for (p, _) in wells]) if assigned_wells_map else 1
-                st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs using {used_plates} plate(s).")
                 st.markdown("### 💧 Pipetting Commands")
                 st.dataframe(commands_df, use_container_width=True, height=400)
-                st.download_button(
-                    "⬇️ Download Commands CSV",
-                    commands_df.to_csv(index=False),
-                    "pipetting_commands.csv",
-                    mime="text/csv"
-                )
                 st.markdown("### 📊 Source Volume Summary")
                 st.dataframe(summary_df, use_container_width=True, height=400)
-                st.download_button(
-                    "⬇️ Download Source Summary CSV",
-                    summary_df.to_csv(index=False),
-                    "source_volume_summary.csv",
-                    mime="text/csv"
-                )
         except Exception as e:
             st.error(f"❌ Error processing file: {e}")

 # --------------------------------------------------
 # TAB 3: Pipetting Command Generator
 # --------------------------------------------------
+# --------------------------------------------------
+# TAB 3: Pipetting Command Generator
+# --------------------------------------------------
 with tab3:
     import numpy as np
     import pandas as pd
             for c in COLS_96:
                 yield f"{r}{c}"
+    def parse_well_name(well: str):
+        """Split well name like 'A1' or 'H12' into (row_letter, numeric_col)."""
+        m = re.match(r"([A-Ha-h])\s*([0-9]+)", str(well).strip())
+        if not m:
+            return ("A", 0)
+        return (m.group(1).upper(), int(m.group(2)))
     def sample_index_to_plate_and_well(sample_idx: int):
         """Destination mapping: 96-well plates in reading order, extends to multiple plates."""
         plate_num = ((sample_idx - 1) // 96) + 1
             )
         legend_html = "<div style='margin:8px 0 16px 0'>" + "".join(legend_spans) + "</div>"
+        # CSS for grid + tooltip
         css = """
         <style>
         .plate { margin: 10px 0 24px 0; }
         """
         body = [css, legend_html]
         for p in range(1, plates_used + 1):
             body.append(f"<div class='plate'><div class='plate-title'>Plate {p}</div>")
             body.append("<div class='grid'>")
             body.append("<div class='cell head'></div>")
             for c in COLS_96:
                 body.append(f"<div class='cell head'>{c}</div>")
             for r in ROWS_96:
                 body.append(f"<div class='cell head'>{r}</div>")
                 for c in COLS_96:
                     else:
                         cell_html = "<div class='cell'></div>"
                     body.append(cell_html)
+            body.append("</div></div>")
         return "".join(body)
     # ---------- Main flow ----------
                 df = pd.read_excel(uploaded)
             elif uploaded.name.endswith(".csv"):
                 df = pd.read_csv(uploaded)
+            else:
+                df = pd.read_csv(uploaded, sep="\t", engine="python")
             st.success(f"✅ Loaded file with {len(df)} rows and {len(df.columns)} columns")
             df.columns = [str(c).strip() for c in df.columns]
             if not any(c.lower() == "sample" for c in df.columns):
                 df.insert(0, "Sample", np.arange(1, len(df) + 1))
                 st.info("`Sample` column missing — automatically generated 1..N.")
+            position_cols = [c for c in df.columns if re.match(r"(?i)^position\\s*\\d+", c)]
             if not position_cols:
                 non_pos_cols = {"sample", "total edited", 'volume per "1"', "volume per 1"}
                 candidate_cols = [c for c in df.columns if c.lower() not in non_pos_cols]
                 position_cols = candidate_cols
                 st.info(f"Position columns inferred automatically: {len(position_cols)} detected.")
             df[position_cols] = df[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0).astype(int)
             if "Total edited" not in df.columns:
                 df["Total edited"] = df[position_cols].sum(axis=1).astype(int)
+                st.info("`Total edited` calculated automatically.")
             vol_candidates = [c for c in df.columns if "volume per" in c.lower()]
             if not vol_candidates:
                 df['Volume per "1"'] = 64 / df["Total edited"].replace(0, np.nan)
+                df['Volume per "1"'] = df['Volume per "1"'].fillna(0)
                 volume_col = 'Volume per "1"'
             else:
                 volume_col = vol_candidates[0]
             if df[volume_col].max() > max_per_well_ul:
+                st.error(f"❌ A row exceeds the max per-well cap ({max_per_well_ul} µL).")
                 st.stop()
             vol_per_one_series = pd.to_numeric(df[volume_col], errors="coerce").fillna(0.0)
+            total_volume_per_input = [float(vol_per_one_series[df[pos] == 1].sum()) for pos in position_cols]
+            wells_needed_per_input = [int(ceil(tv / max_per_well_ul)) if tv > 0 else 0 for tv in total_volume_per_input]
             num_inputs = len(position_cols)
             max_wells_per_source = max(wells_needed_per_input) if wells_needed_per_input else 0
             st.markdown("### 👀 Preview: Suggested Uniform Layout")
             if max_wells_per_source == 0:
+                st.info("No edits detected — nothing to allocate.")
                 st.stop()
             total_wells_needed_uniform = num_inputs * max_wells_per_source
+            plates_needed = int(ceil(total_wells_needed_uniform / 96)) or 1
+            # ✅ Correct well sorting (A1 → A2 → A12)
             global_wells = sorted(
                 build_global_wells_list(plates_needed),
+                key=lambda x: (x[0], ROWS_96.index(parse_well_name(x[1])[0]), parse_well_name(x[1])[1])
             )
+            global_wells = global_wells[:total_wells_needed_uniform]
+            # Assign blocks
+            assigned_wells_map, well_to_input, preview_rows = {}, {}, []
             for i in range(1, num_inputs + 1):
+                start, end = (i - 1) * max_wells_per_source, i * max_wells_per_source
                 block = global_wells[start:end]
                 assigned_wells_map[i] = block
                 for j, (p, w) in enumerate(block, start=1):
                     well_to_input[(p, w)] = (i, j)
                 block_str = ", ".join([f"P{p}:{w}" for (p, w) in block])
                 preview_rows.append({
                     "Input (Position #)": i,
                     "Total demand (µL)": round(total_volume_per_input[i-1], 2),
+                    "Wells needed": wells_needed_per_input[i-1],
                     "Allocated (uniform)": max_wells_per_source,
                     "Assigned wells": block_str
                 })
             preview_df = pd.DataFrame(preview_rows)
             st.dataframe(preview_df, use_container_width=True, height=300)
             st.markdown("#### Plate Map (hover cells for details)")
             plate_html = render_plate_map_html(plates_needed, well_to_input, max_wells_per_source, num_inputs)
             st.markdown(plate_html, unsafe_allow_html=True)
             # --- Generate Commands ---
             st.markdown("### ✅ Generate Pipetting Commands")
+            if st.button("Generate using this layout"):
                 per_input_well_cum = {i: [0.0] * max_wells_per_source for i in range(1, num_inputs + 1)}
+                commands, source_volume_totals = [], {}
                 for _, row in df.iterrows():
                     sample_id = int(row["Sample"])
                     for pos_idx, col in enumerate(position_cols, start=1):
                         if int(row[col]) != 1:
                             continue
                         wells_for_input = assigned_wells_map[pos_idx]
                         cum_list = per_input_well_cum[pos_idx]
                         for j, ((src_plate, src_well), current_vol) in enumerate(zip(wells_for_input, cum_list)):
                             if current_vol + vol_per_one <= max_per_well_ul:
+                                cum_list[j] += vol_per_one
+                                source_volume_totals[(src_plate, src_well)] = source_volume_totals.get((src_plate, src_well), 0) + vol_per_one
+                                commands.append({
+                                    "Input #": pos_idx,
+                                    "Source plate": src_plate,
+                                    "Source well": src_well,
+                                    "Destination plate": dest_plate,
+                                    "Destination well": dest_well,
+                                    "Volume": round(vol_per_one, 2),
+                                    "Tool": tool
+                                })
                                 break
+                # ✅ Sort commands with numeric logic
                 commands_df = pd.DataFrame(commands).sort_values(
                     by=["Input #", "Source plate", "Source well", "Destination plate", "Destination well"],
+                    key=lambda col: col.apply(
+                        lambda v: parse_well_name(v)[1] if col.name.endswith("well") else int(v)
+                    ) if col.name.endswith("well") or col.name in ["Input #", "Source plate", "Destination plate"] else col,
                     kind="stable"
                 )
+                st.success(f"✅ Generated {len(commands_df)} commands across {num_inputs} inputs.")
+                # ✅ Source summary numeric sort
                 summary_rows = []
                 for i in range(1, num_inputs + 1):
                     for (p, w), used in zip(assigned_wells_map[i], per_input_well_cum[i]):
                         total = source_volume_totals.get((p, w), 0.0)
                         summary_rows.append({
+                            "Source": i, "Source plate": p, "Source well": w,
                             "Total volume taken (µL)": round(total, 2),
                             "Allocated capacity (µL)": round(max_per_well_ul, 2)
                         })
                 summary_df = pd.DataFrame(summary_rows).sort_values(
                     by=["Source", "Source plate", "Source well"],
+                    key=lambda col: col.apply(lambda v: parse_well_name(v)[1]) if col.name == "Source well" else col,
                     kind="stable"
                 )
+                # Display results
                 st.markdown("### 💧 Pipetting Commands")
                 st.dataframe(commands_df, use_container_width=True, height=400)
+                st.download_button("⬇️ Download Commands CSV", commands_df.to_csv(index=False), "pipetting_commands.csv", mime="text/csv")
                 st.markdown("### 📊 Source Volume Summary")
                 st.dataframe(summary_df, use_container_width=True, height=400)
+                st.download_button("⬇️ Download Source Summary CSV", summary_df.to_csv(index=False), "source_volume_summary.csv", mime="text/csv")
         except Exception as e:
             st.error(f"❌ Error processing file: {e}")