Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PIL import Image, ImageFilter | |
| import numpy as np | |
| import pandas as pd | |
| from streamlit_cropper import st_cropper | |
| # Mutation site headers removed 3614, | |
| mutation_site_headers_actual = [ | |
| 3244, 3297, 3350, 3399, 3455, 3509, 3562, | |
| 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039, | |
| 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455, | |
| 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882 | |
| ] | |
| # Thresholds for each mutation site removed 3614: 0.091557752, | |
| thresholds_actual = pd.Series({ | |
| 3244: 1.094293328, 3297: 0.924916122, 3350: 0.664586629, 3399: 0.91573613, | |
| 3455: 1.300869714, 3509: 1.821975901, 3562: 1.178862418, | |
| 3665: 0.298697327, 3720: 0.58379781, 3773: 0.891088481, 3824: 1.145509641, | |
| 3879: 0.81833191, 3933: 2.93084335, 3985: 1.593758847, 4039: 0.966055013, | |
| 4089: 1.465671338, 4145: 0.30309335, 4190: 1.321615138, 4245: 1.709752495, | |
| 4298: 0.868534701, 4349: 1.222907645, 4402: 0.58873557, 4455: 1.185522985, | |
| 4510: 1.266797682, 4561: 1.109913024, 4615: 1.181106084, 4668: 1.408533949, | |
| 4720: 0.714151142, 4773: 1.471959437, 4828: 0.95879943, 4882: 1.464503885 | |
| }) | |
| # Mutation site headers reordered: 4402 to 3244, 4882 to 4455 | |
| mutation_site_headers = [ | |
| 4402, 4349, 4298, 4245, 4190, 4145, 4089, 4039, | |
| 3985, 3933, 3879, 3824, 3773, 3720, 3665, | |
| 3562, 3509, 3455, 3399, 3350, 3297, 3244, # 1–23 | |
| 4882, 4828, 4773, 4720, 4668, 4615, 4561, 4510, 4455 # 24–32 | |
| ] | |
| # Thresholds reordered accordingly | |
| thresholds = pd.Series({ | |
| 4402: 0.58873557, 4349: 1.222907645, 4298: 0.868534701, 4245: 1.709752495, | |
| 4190: 1.321615138, 4145: 0.30309335, 4089: 1.465671338, 4039: 0.966055013, | |
| 3985: 1.593758847, 3933: 2.93084335, 3879: 0.81833191, 3824: 1.145509641, | |
| 3773: 0.891088481, 3720: 0.58379781, 3665: 0.298697327, | |
| 3562: 1.178862418, 3509: 1.821975901, 3455: 1.300869714, 3399: 0.91573613, | |
| 3350: 0.664586629, 3297: 0.924916122, 3244: 1.094293328, | |
| 4882: 1.464503885, 4828: 0.95879943, 4773: 1.471959437, 4720: 0.714151142, | |
| 4668: 1.408533949, 4615: 1.181106084, 4561: 1.109913024, 4510: 1.266797682, 4455: 1.185522985 | |
| }) | |
| # === Utility functions === | |
| # Voyager ASCII 6-bit conversion table | |
| voyager_table = { | |
| i: ch for i, ch in enumerate([ | |
| ' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', | |
| 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', | |
| 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', | |
| '3', '4', '5', '6', '7', '8', '9', '.', '(', ')', | |
| '+', '-', '*', '/', '=', '$', '!', ':', '%', '"', | |
| '#', '@', '\'', '?', '&' | |
| ]) | |
| } | |
| reverse_voyager_table = {v: k for k, v in voyager_table.items()} | |
| # === Utility functions === | |
| def string_to_binary_labels(s: str) -> list[int]: | |
| bits = [] | |
| for char in s: | |
| val = reverse_voyager_table.get(char.upper(), 0) | |
| char_bits = [(val >> bit) & 1 for bit in range(5, -1, -1)] | |
| bits.extend(char_bits) | |
| return bits | |
| def binary_labels_to_string(bits: list[int]) -> str: | |
| chars = [] | |
| for i in range(0, len(bits), 6): | |
| chunk = bits[i:i+6] | |
| if len(chunk) < 6: | |
| chunk += [0] * (6 - len(chunk)) | |
| val = sum(b << (5 - j) for j, b in enumerate(chunk)) | |
| chars.append(voyager_table.get(val, '?')) | |
| return ''.join(chars) | |
| # def string_to_binary_labels(s: str) -> list[int]: | |
| # bits = [] | |
| # for char in s: | |
| # ascii_code = ord(char) | |
| # char_bits = [(ascii_code >> bit) & 1 for bit in range(7, -1, -1)] | |
| # bits.extend(char_bits) | |
| # return bits | |
| # def binary_labels_to_string(bits: list[int]) -> str: | |
| # chars = [] | |
| # for i in range(0, len(bits), 8): | |
| # byte = bits[i:i+8] | |
| # if len(byte) < 8: | |
| # byte += [0] * (8 - len(byte)) | |
| # ascii_val = sum(b << (7 - j) for j, b in enumerate(byte)) | |
| # chars.append(chr(ascii_val)) | |
| # return ''.join(chars) | |
| def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image: | |
| img = img.convert("RGB") | |
| if img.width < min_size or img.height < min_size: | |
| img = img.resize((min_size, min_size)) | |
| img = img.filter(ImageFilter.GaussianBlur(radius=1)) | |
| return img | |
| def image_to_binary_labels_rgb(img: Image.Image, max_pixels: int = 256) -> list[int]: | |
| img = clean_image(img) | |
| img.thumbnail((int(np.sqrt(max_pixels)), int(np.sqrt(max_pixels)))) | |
| img_array = np.array(img) | |
| flat_pixels = img_array.reshape(-1, 3) | |
| bits = [] | |
| for pixel in flat_pixels: | |
| for channel in pixel: | |
| channel_bits = [(channel >> bit) & 1 for bit in range(7, -1, -1)] | |
| bits.extend(channel_bits) | |
| return bits | |
| def binary_labels_to_rgb_image(binary_labels: list[int], width: int = None, height: int = None) -> Image.Image: | |
| total_pixels = len(binary_labels) // 24 | |
| if width is None or height is None: | |
| side = int(np.ceil(np.sqrt(total_pixels))) | |
| width = height = side | |
| needed_pixels = width * height | |
| needed_bits = needed_pixels * 24 | |
| if len(binary_labels) < needed_bits: | |
| binary_labels += [0] * (needed_bits - len(binary_labels)) | |
| pixels = [] | |
| for i in range(0, needed_bits, 24): | |
| r_bits = binary_labels[i:i+8] | |
| g_bits = binary_labels[i+8:i+16] | |
| b_bits = binary_labels[i+16:i+24] | |
| r = sum(b << (7-j) for j, b in enumerate(r_bits)) | |
| g = sum(b << (7-j) for j, b in enumerate(g_bits)) | |
| b = sum(b << (7-j) for j, b in enumerate(b_bits)) | |
| pixels.append((r, g, b)) | |
| array = np.array(pixels, dtype=np.uint8).reshape((height, width, 3)) | |
| img = Image.fromarray(array, mode='RGB') | |
| return img | |
| # === Streamlit App === | |
| st.title("ASCII & Binary Label Converter") | |
| tab1, tab2, tab3 = st.tabs(["Text to Binary Labels", "Image to Binary Labels", "EF → Binary"]) | |
| # Tab 1: Text to Binary | |
| with tab1: | |
| user_input = st.text_input("Enter text", value="DNA") | |
| if user_input: | |
| ascii_codes = [ord(c) for c in user_input] | |
| binary_labels = string_to_binary_labels(user_input) | |
| st.subheader("ASCII Codes") | |
| st.write(ascii_codes) | |
| st.subheader("Binary Labels per Character") | |
| grouped = [binary_labels[i:i+6] for i in range(0, len(binary_labels), 6)] | |
| for i, bits in enumerate(grouped): | |
| st.write(f"'{user_input[i]}' → {bits}") | |
| st.subheader("Binary Labels (31-bit groups)") | |
| groups = [] | |
| for i in range(0, len(binary_labels), 31): | |
| group = binary_labels[i:i+31] | |
| group += [0] * (31 - len(group)) | |
| groups.append(group + [sum(group)]) | |
| df_31 = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"]) | |
| st.dataframe(df_31) | |
| st.download_button("Download as CSV", df_31.to_csv(index=False), "text_32_binary_labels.csv") | |
| # Additional table with ascending mutation site headers (3244 to 4455) | |
| ascending_headers = sorted([h for h in mutation_site_headers if h <= 4455]) | |
| df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]] | |
| st.subheader("Binary Labels (Ascending Order 3244 → 4455)") | |
| st.dataframe(df_sorted) | |
| st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv") | |
| # st.subheader("Binary Labels (27-bit groups)") | |
| # groups = [] | |
| # for i in range(0, len(binary_labels), 27): | |
| # group = binary_labels[i:i+27] | |
| # group += [0] * (27 - len(group)) | |
| # groups.append(group + [sum(group)]) | |
| # df_27 = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"]) | |
| # st.dataframe(df_27) | |
| # st.download_button("Download as CSV", df_27.to_csv(index=False), "text_27_binary_labels.csv") | |
| # Tab 2: Image to Binary | |
| with tab2: | |
| uploaded = st.file_uploader("Upload an image (jpg/png)", type=["jpg", "jpeg", "png"]) | |
| if uploaded: | |
| img = Image.open(uploaded) | |
| st.image(img, caption="Original", use_column_width=True) | |
| cropped = st_cropper(img, realtime_update=True, box_color="blue", aspect_ratio=None) | |
| st.image(cropped, caption="Cropped", use_column_width=True) | |
| max_pixels = st.slider("Max pixels to encode", 32, 1024, 256, 32) | |
| binary_labels = image_to_binary_labels_rgb(cropped, max_pixels=max_pixels) | |
| st.subheader("Binary Labels from Image") | |
| groups = [] | |
| for i in range(0, len(binary_labels), 32): | |
| group = binary_labels[i:i+32] | |
| group += [0] * (32 - len(group)) | |
| groups.append(group + [sum(group)]) | |
| df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"]) | |
| st.dataframe(df) | |
| st.subheader("Reconstructed Image") | |
| recon = binary_labels_to_rgb_image(binary_labels) | |
| st.image(recon, caption="Reconstructed", use_column_width=True) | |
| st.download_button("Download CSV", df.to_csv(index=False), "image_binary_labels.csv") | |
| # Tab 3: EF → Binary | |
| with st.tabs(["Text to Binary Labels", "Image to Binary Labels", "EF → Binary"])[2]: | |
| st.write("Upload an Editing Frequency CSV or enter manually:") | |
| st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4455.") | |
| ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef") | |
| ascending_input_headers = sorted([h for h in mutation_site_headers if 3244 <= h <= 4455]) | |
| if ef_file: | |
| ef_df = pd.read_csv(ef_file, header=None) | |
| ef_df.columns = [str(site) for site in ascending_input_headers] | |
| else: | |
| ef_df = pd.DataFrame(columns=[str(site) for site in ascending_input_headers]) | |
| edited_df = st.data_editor(ef_df, num_rows="dynamic") | |
| if st.button("Convert to Binary Labels"): | |
| # Use ascending headers to create binary first | |
| binary_part = pd.DataFrame() | |
| for col in ascending_input_headers: | |
| col_str = str(col) | |
| threshold = thresholds[col] | |
| binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int) | |
| # Rearranged for output: custom order from mutation_site_headers | |
| binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]] | |
| def color_binary(val): | |
| if val == 1: return "background-color: lightgreen" | |
| if val == 0: return "background-color: lightcoral" | |
| return "" | |
| st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)") | |
| styled = binary_reordered.style.applymap(color_binary) | |
| st.dataframe(styled) | |
| st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv") | |
| # Reconstruct original string from binary values (flatten row-wise) | |
| for i, row in binary_reordered.iterrows(): | |
| binary_sequence = row.tolist() | |
| text = binary_labels_to_string(binary_sequence) | |
| st.write(f"Row {i+1} decoded string: {text}") | |
| # # Tab 3: EF → Binary | |
| # with tab3: | |
| # st.write("Upload an Editing Frequency CSV or enter manually:") | |
| # st.write("**Note:** Please upload CSV files **without column headers**. Just the 31 editing frequencies per row.") | |
| # ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef") | |
| # if ef_file: | |
| # # Read CSV without headers and assign mutation site headers | |
| # ef_df = pd.read_csv(ef_file, header=None) | |
| # ef_df.columns = [str(site) for site in mutation_site_headers] | |
| # else: | |
| # ef_df = pd.DataFrame(columns=[str(site) for site in mutation_site_headers]) | |
| # edited_df = st.data_editor(ef_df, num_rows="dynamic") | |
| # if st.button("Convert to Binary Labels"): | |
| # int_map = {str(k): k for k in thresholds.index} | |
| # matching_cols = [col for col in edited_df.columns if col in int_map] | |
| # binary_part = pd.DataFrame() | |
| # for col in matching_cols: | |
| # col_threshold = thresholds[int_map[col]] | |
| # binary_part[col] = (edited_df[col].astype(float) >= col_threshold).astype(int) | |
| # non_binary_part = edited_df.drop(columns=matching_cols, errors='ignore') | |
| # binary_df = pd.concat([non_binary_part, binary_part], axis=1) | |
| # def color_binary(val): | |
| # if val == 1: return "background-color: lightgreen" | |
| # if val == 0: return "background-color: lightcoral" | |
| # return "" | |
| # st.subheader("Binary Labels") | |
| # styled = binary_df.style.applymap(color_binary, subset=matching_cols) | |
| # st.dataframe(styled) | |
| # st.download_button("Download CSV", binary_df.to_csv(index=False), "ef_binary_labels.csv") | |
| # # Convert to bitstrings and strings | |
| # binary_strings = [] | |
| # decoded_strings = [] | |
| # for _, row in binary_part.iterrows(): | |
| # bitlist = row.values.tolist() | |
| # bitstring = ''.join(str(b) for b in bitlist) | |
| # binary_strings.append(bitstring) | |
| # decoded_strings.append(binary_labels_to_string(bitlist)) | |
| # st.subheader("Binary as Bitstrings") | |
| # for b in binary_strings: | |
| # st.code(b) | |
| # st.subheader("Decoded Voyager Strings") | |
| # for s in decoded_strings: | |
| # st.write(s) | |