wenjun99 commited on
Commit
fc0d4a5
·
verified ·
1 Parent(s): 41b3afa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -1
app.py CHANGED
@@ -71,7 +71,7 @@ def binary_labels_to_string(bits: list[int]) -> str:
71
  # === Streamlit App ===
72
 
73
  st.title("ASCII & Binary Label Converter")
74
- tab1, tab2 = st.tabs(["Text to Binary Labels (31)", "EF → Binary (31)"])
75
 
76
  # Tab 1: Text to Binary
77
  with tab1:
@@ -182,3 +182,161 @@ with tab2:
182
  decoded_string = binary_labels_to_string(all_bits)
183
  st.subheader("Decoded String (continuous across rows)")
184
  st.write(decoded_string)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # === Streamlit App ===
72
 
73
  st.title("ASCII & Binary Label Converter")
74
+ tab1, tab2, tab3, tab4 = st.tabs(["Text to Binary Labels (31)", "EF → Binary (31)", "Text to Binary Labels (32)", "EF → Binary (2)"])
75
 
76
  # Tab 1: Text to Binary
77
  with tab1:
 
182
  decoded_string = binary_labels_to_string(all_bits)
183
  st.subheader("Decoded String (continuous across rows)")
184
  st.write(decoded_string)
185
+
186
+
187
+ # Mutation site headers did not remove 3614,
188
+ mutation_site_headers_actual_3614 = [
189
+ 3244, 3297, 3350, 3399, 3455, 3509, 3562, 3614,
190
+ 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039,
191
+ 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455,
192
+ 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882
193
+ ]
194
+ # Thresholds for each mutation site removed 3614: 0.091557752,
195
+ thresholds_actual_3614 = pd.Series({
196
+ 3244: 1.096910677, 3297: 0.923658795, 3350: 0.668939037, 3399: 0.914305214,
197
+ 3455: 1.297392984, 3509: 1.812636208, 3562: 1.185047484, 3614: 0.157969131375,
198
+ 3665: 0.298007308, 3720: 0.58857544, 3773: 0.882561082, 3824: 1.149082617,
199
+ 3879: 0.816050702, 3933: 2.936517653, 3985: 1.597166791, 4039: 0.962108082,
200
+ 4089: 1.479783497, 4145: 0.305853225, 4190: 1.311869541, 4245: 1.707556905,
201
+ 4298: 0.875013076, 4349: 1.227704526, 4402: 0.593206446, 4455: 1.179633137,
202
+ 4510: 1.272477799, 4561: 1.293841573, 4615: 1.16821885, 4668: 1.40306,
203
+ 4720: 0.706530878, 4773: 1.483114072, 4828: 0.954939873, 4882: 1.47524328
204
+ })
205
+
206
+ # Mutation site headers reordered: 4402 to 3244, 4882 to 4455
207
+ mutation_site_headers_3614 = [
208
+ 4402, 4349, 4298, 4245, 4190, 4145, 4089, 4039,
209
+ 3985, 3933, 3879, 3824, 3773, 3720, 3665, 3614
210
+ 3562, 3509, 3455, 3399, 3350, 3297, 3244, # 1–23
211
+ 4882, 4828, 4773, 4720, 4668, 4615, 4561, 4510, 4455 # 24–32
212
+ ]
213
+
214
+ # Thresholds reordered accordingly
215
+ thresholds_3614 = pd.Series({h: thresholds_actual_3614[h] for h in mutation_site_headers_3614})
216
+
217
+ # === Utility functions ===
218
+
219
+ # Voyager ASCII 6-bit conversion table
220
+ voyager_table = {
221
+ i: ch for i, ch in enumerate([
222
+ ' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
223
+ 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
224
+ 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
225
+ '3', '4', '5', '6', '7', '8', '9', '.', '(', ')',
226
+ '+', '-', '*', '/', '=', '$', '!', ':', '%', '"',
227
+ '#', '@', '\'', '?', '&'
228
+ ])
229
+ }
230
+ reverse_voyager_table = {v: k for k, v in voyager_table.items()}
231
+
232
+
233
+ # Tab 3: Text to Binary (32)
234
+ with tab3:
235
+ user_input = st.text_input("Enter text", value="DNA")
236
+ if user_input:
237
+ ascii_codes = [ord(c) for c in user_input]
238
+ binary_labels = string_to_binary_labels(user_input)
239
+
240
+ st.subheader("ASCII Codes")
241
+ st.write(ascii_codes)
242
+
243
+ st.subheader("Binary Labels per Character")
244
+ grouped = [binary_labels[i:i+6] for i in range(0, len(binary_labels), 6)]
245
+ for i, bits in enumerate(grouped):
246
+ st.write(f"'{user_input[i]}' → {bits}")
247
+
248
+ st.subheader("Binary Labels (32-bit groups)")
249
+ groups = []
250
+ for i in range(0, len(binary_labels), 32):
251
+ group = binary_labels[i:i+32]
252
+ group += [0] * (32 - len(group))
253
+ groups.append(group + [sum(group)])
254
+
255
+ df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers_3614] + ["Edited Sites"])
256
+ st.dataframe(df)
257
+ st.download_button("Download as CSV", df.to_csv(index=False), "text_32_binary_labels.csv")
258
+
259
+ ascending_headers = sorted(mutation_site_headers_actual_3614)
260
+ df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
261
+ st.subheader("Binary Labels (Ascending Order 3244 → 4882)")
262
+ st.dataframe(df_sorted)
263
+ st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
264
+
265
+ # === Robot Preparation Script Generation ===
266
+ st.subheader("Robot Preparation Script")
267
+ robot_template = pd.read_csv("/home/user/app/Robot.csv", skiprows=3)
268
+ robot_template.columns = ['Labware', 'Source', 'Labware_2', 'Destination', 'Volume', 'Tool', 'Name']
269
+
270
+ # Add Sample numbers for well referencing
271
+ df_sorted.insert(0, 'Sample', range(1, len(df_sorted)+1))
272
+
273
+ # Step 1: Count the number of edited sites per row
274
+ df_sorted['# donors'] = df_sorted.iloc[:, 1:].sum(axis=1)
275
+
276
+ # Step 2: Calculate volume per donor (32 / # donors)
277
+ df_sorted['volume donors (µl)'] = 32 / df_sorted['# donors']
278
+
279
+ # Step 3: Generate the robot script
280
+ robot_script = []
281
+ source_wells = robot_template['Source'].unique().tolist()[:32]
282
+
283
+ for i, col in enumerate(df_sorted.columns[1:33]):
284
+ for row_idx, sample in df_sorted.iterrows():
285
+ if sample[col] == 1:
286
+ source = source_wells[i]
287
+ dest = f"A{sample['Sample']}"
288
+ vol = round(sample['volume donors (µl)'], 2)
289
+ robot_script.append({'Source': source, 'Destination': dest, 'Volume': vol})
290
+
291
+ robot_script_df = pd.DataFrame(robot_script)
292
+ st.dataframe(robot_script_df)
293
+ st.download_button("Download Robot Script CSV", robot_script_df.to_csv(index=False), "robot_script.csv")
294
+
295
+
296
+ # Tab 2: EF → Binary
297
+ with tab2:
298
+ st.write("Upload an Editing Frequency CSV or enter manually:")
299
+ st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.")
300
+ ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
301
+
302
+ if ef_file:
303
+ ef_df = pd.read_csv(ef_file, header=None)
304
+ ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual_3614)]
305
+ else:
306
+ ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual_3614)])
307
+
308
+ edited_df = st.data_editor(ef_df, num_rows="dynamic")
309
+
310
+ if st.button("Convert to Binary Labels"):
311
+ binary_part = pd.DataFrame()
312
+ for col in sorted(mutation_site_headers_actual_3614):
313
+ col_str = str(col)
314
+ threshold = thresholds_actual_3614[col]
315
+ binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
316
+
317
+ binary_reordered = binary_part[[str(h) for h in mutation_site_headers_3614 if str(h) in binary_part.columns]]
318
+
319
+ def color_binary(val):
320
+ if val == 1: return "background-color: lightgreen"
321
+ if val == 0: return "background-color: lightcoral"
322
+ return ""
323
+
324
+ st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)")
325
+ styled = binary_reordered.style.applymap(color_binary)
326
+ st.dataframe(styled)
327
+ st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv")
328
+
329
+ all_bits = binary_reordered.values.flatten().tolist()
330
+ decoded_string = binary_labels_to_string(all_bits)
331
+ st.subheader("Decoded String (continuous across rows)")
332
+ st.write(decoded_string)
333
+
334
+ st.subheader("Binary Labels (Ascending 3244→4882)")
335
+ st.dataframe(binary_part.style.applymap(color_binary))
336
+ st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv")
337
+
338
+ all_bits = binary_part.values.flatten().tolist()
339
+ decoded_string = binary_labels_to_string(all_bits)
340
+ st.subheader("Decoded String (continuous across rows)")
341
+ st.write(decoded_string)
342
+