Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,7 +71,7 @@ def binary_labels_to_string(bits: list[int]) -> str:
|
|
| 71 |
# === Streamlit App ===
|
| 72 |
|
| 73 |
st.title("ASCII & Binary Label Converter")
|
| 74 |
-
tab1, tab2 = st.tabs(["Text to Binary Labels (31)", "EF → Binary (31)"])
|
| 75 |
|
| 76 |
# Tab 1: Text to Binary
|
| 77 |
with tab1:
|
|
@@ -182,3 +182,161 @@ with tab2:
|
|
| 182 |
decoded_string = binary_labels_to_string(all_bits)
|
| 183 |
st.subheader("Decoded String (continuous across rows)")
|
| 184 |
st.write(decoded_string)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
# === Streamlit App ===
|
| 72 |
|
| 73 |
st.title("ASCII & Binary Label Converter")
|
| 74 |
+
tab1, tab2, tab3, tab4 = st.tabs(["Text to Binary Labels (31)", "EF → Binary (31)", "Text to Binary Labels (32)", "EF → Binary (2)"])
|
| 75 |
|
| 76 |
# Tab 1: Text to Binary
|
| 77 |
with tab1:
|
|
|
|
| 182 |
decoded_string = binary_labels_to_string(all_bits)
|
| 183 |
st.subheader("Decoded String (continuous across rows)")
|
| 184 |
st.write(decoded_string)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# Mutation site headers did not remove 3614,
|
| 188 |
+
mutation_site_headers_actual_3614 = [
|
| 189 |
+
3244, 3297, 3350, 3399, 3455, 3509, 3562, 3614,
|
| 190 |
+
3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039,
|
| 191 |
+
4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455,
|
| 192 |
+
4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882
|
| 193 |
+
]
|
| 194 |
+
# Thresholds for each mutation site removed 3614: 0.091557752,
|
| 195 |
+
thresholds_actual_3614 = pd.Series({
|
| 196 |
+
3244: 1.096910677, 3297: 0.923658795, 3350: 0.668939037, 3399: 0.914305214,
|
| 197 |
+
3455: 1.297392984, 3509: 1.812636208, 3562: 1.185047484, 3614: 0.157969131375,
|
| 198 |
+
3665: 0.298007308, 3720: 0.58857544, 3773: 0.882561082, 3824: 1.149082617,
|
| 199 |
+
3879: 0.816050702, 3933: 2.936517653, 3985: 1.597166791, 4039: 0.962108082,
|
| 200 |
+
4089: 1.479783497, 4145: 0.305853225, 4190: 1.311869541, 4245: 1.707556905,
|
| 201 |
+
4298: 0.875013076, 4349: 1.227704526, 4402: 0.593206446, 4455: 1.179633137,
|
| 202 |
+
4510: 1.272477799, 4561: 1.293841573, 4615: 1.16821885, 4668: 1.40306,
|
| 203 |
+
4720: 0.706530878, 4773: 1.483114072, 4828: 0.954939873, 4882: 1.47524328
|
| 204 |
+
})
|
| 205 |
+
|
| 206 |
+
# Mutation site headers reordered: 4402 to 3244, 4882 to 4455
|
| 207 |
+
mutation_site_headers_3614 = [
|
| 208 |
+
4402, 4349, 4298, 4245, 4190, 4145, 4089, 4039,
|
| 209 |
+
3985, 3933, 3879, 3824, 3773, 3720, 3665, 3614
|
| 210 |
+
3562, 3509, 3455, 3399, 3350, 3297, 3244, # 1–23
|
| 211 |
+
4882, 4828, 4773, 4720, 4668, 4615, 4561, 4510, 4455 # 24–32
|
| 212 |
+
]
|
| 213 |
+
|
| 214 |
+
# Thresholds reordered accordingly
|
| 215 |
+
thresholds_3614 = pd.Series({h: thresholds_actual_3614[h] for h in mutation_site_headers_3614})
|
| 216 |
+
|
| 217 |
+
# === Utility functions ===
|
| 218 |
+
|
| 219 |
+
# Voyager ASCII 6-bit conversion table
|
| 220 |
+
voyager_table = {
|
| 221 |
+
i: ch for i, ch in enumerate([
|
| 222 |
+
' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
|
| 223 |
+
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
|
| 224 |
+
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
|
| 225 |
+
'3', '4', '5', '6', '7', '8', '9', '.', '(', ')',
|
| 226 |
+
'+', '-', '*', '/', '=', '$', '!', ':', '%', '"',
|
| 227 |
+
'#', '@', '\'', '?', '&'
|
| 228 |
+
])
|
| 229 |
+
}
|
| 230 |
+
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
# Tab 3: Text to Binary (32)
|
| 234 |
+
with tab3:
|
| 235 |
+
user_input = st.text_input("Enter text", value="DNA")
|
| 236 |
+
if user_input:
|
| 237 |
+
ascii_codes = [ord(c) for c in user_input]
|
| 238 |
+
binary_labels = string_to_binary_labels(user_input)
|
| 239 |
+
|
| 240 |
+
st.subheader("ASCII Codes")
|
| 241 |
+
st.write(ascii_codes)
|
| 242 |
+
|
| 243 |
+
st.subheader("Binary Labels per Character")
|
| 244 |
+
grouped = [binary_labels[i:i+6] for i in range(0, len(binary_labels), 6)]
|
| 245 |
+
for i, bits in enumerate(grouped):
|
| 246 |
+
st.write(f"'{user_input[i]}' → {bits}")
|
| 247 |
+
|
| 248 |
+
st.subheader("Binary Labels (32-bit groups)")
|
| 249 |
+
groups = []
|
| 250 |
+
for i in range(0, len(binary_labels), 32):
|
| 251 |
+
group = binary_labels[i:i+32]
|
| 252 |
+
group += [0] * (32 - len(group))
|
| 253 |
+
groups.append(group + [sum(group)])
|
| 254 |
+
|
| 255 |
+
df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers_3614] + ["Edited Sites"])
|
| 256 |
+
st.dataframe(df)
|
| 257 |
+
st.download_button("Download as CSV", df.to_csv(index=False), "text_32_binary_labels.csv")
|
| 258 |
+
|
| 259 |
+
ascending_headers = sorted(mutation_site_headers_actual_3614)
|
| 260 |
+
df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
|
| 261 |
+
st.subheader("Binary Labels (Ascending Order 3244 → 4882)")
|
| 262 |
+
st.dataframe(df_sorted)
|
| 263 |
+
st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
|
| 264 |
+
|
| 265 |
+
# === Robot Preparation Script Generation ===
|
| 266 |
+
st.subheader("Robot Preparation Script")
|
| 267 |
+
robot_template = pd.read_csv("/home/user/app/Robot.csv", skiprows=3)
|
| 268 |
+
robot_template.columns = ['Labware', 'Source', 'Labware_2', 'Destination', 'Volume', 'Tool', 'Name']
|
| 269 |
+
|
| 270 |
+
# Add Sample numbers for well referencing
|
| 271 |
+
df_sorted.insert(0, 'Sample', range(1, len(df_sorted)+1))
|
| 272 |
+
|
| 273 |
+
# Step 1: Count the number of edited sites per row
|
| 274 |
+
df_sorted['# donors'] = df_sorted.iloc[:, 1:].sum(axis=1)
|
| 275 |
+
|
| 276 |
+
# Step 2: Calculate volume per donor (32 / # donors)
|
| 277 |
+
df_sorted['volume donors (µl)'] = 32 / df_sorted['# donors']
|
| 278 |
+
|
| 279 |
+
# Step 3: Generate the robot script
|
| 280 |
+
robot_script = []
|
| 281 |
+
source_wells = robot_template['Source'].unique().tolist()[:32]
|
| 282 |
+
|
| 283 |
+
for i, col in enumerate(df_sorted.columns[1:33]):
|
| 284 |
+
for row_idx, sample in df_sorted.iterrows():
|
| 285 |
+
if sample[col] == 1:
|
| 286 |
+
source = source_wells[i]
|
| 287 |
+
dest = f"A{sample['Sample']}"
|
| 288 |
+
vol = round(sample['volume donors (µl)'], 2)
|
| 289 |
+
robot_script.append({'Source': source, 'Destination': dest, 'Volume': vol})
|
| 290 |
+
|
| 291 |
+
robot_script_df = pd.DataFrame(robot_script)
|
| 292 |
+
st.dataframe(robot_script_df)
|
| 293 |
+
st.download_button("Download Robot Script CSV", robot_script_df.to_csv(index=False), "robot_script.csv")
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
# Tab 2: EF → Binary
|
| 297 |
+
with tab2:
|
| 298 |
+
st.write("Upload an Editing Frequency CSV or enter manually:")
|
| 299 |
+
st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.")
|
| 300 |
+
ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
| 301 |
+
|
| 302 |
+
if ef_file:
|
| 303 |
+
ef_df = pd.read_csv(ef_file, header=None)
|
| 304 |
+
ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual_3614)]
|
| 305 |
+
else:
|
| 306 |
+
ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual_3614)])
|
| 307 |
+
|
| 308 |
+
edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
| 309 |
+
|
| 310 |
+
if st.button("Convert to Binary Labels"):
|
| 311 |
+
binary_part = pd.DataFrame()
|
| 312 |
+
for col in sorted(mutation_site_headers_actual_3614):
|
| 313 |
+
col_str = str(col)
|
| 314 |
+
threshold = thresholds_actual_3614[col]
|
| 315 |
+
binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
|
| 316 |
+
|
| 317 |
+
binary_reordered = binary_part[[str(h) for h in mutation_site_headers_3614 if str(h) in binary_part.columns]]
|
| 318 |
+
|
| 319 |
+
def color_binary(val):
|
| 320 |
+
if val == 1: return "background-color: lightgreen"
|
| 321 |
+
if val == 0: return "background-color: lightcoral"
|
| 322 |
+
return ""
|
| 323 |
+
|
| 324 |
+
st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)")
|
| 325 |
+
styled = binary_reordered.style.applymap(color_binary)
|
| 326 |
+
st.dataframe(styled)
|
| 327 |
+
st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv")
|
| 328 |
+
|
| 329 |
+
all_bits = binary_reordered.values.flatten().tolist()
|
| 330 |
+
decoded_string = binary_labels_to_string(all_bits)
|
| 331 |
+
st.subheader("Decoded String (continuous across rows)")
|
| 332 |
+
st.write(decoded_string)
|
| 333 |
+
|
| 334 |
+
st.subheader("Binary Labels (Ascending 3244→4882)")
|
| 335 |
+
st.dataframe(binary_part.style.applymap(color_binary))
|
| 336 |
+
st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv")
|
| 337 |
+
|
| 338 |
+
all_bits = binary_part.values.flatten().tolist()
|
| 339 |
+
decoded_string = binary_labels_to_string(all_bits)
|
| 340 |
+
st.subheader("Decoded String (continuous across rows)")
|
| 341 |
+
st.write(decoded_string)
|
| 342 |
+
|