Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,16 +33,8 @@ mutation_site_headers = [
|
|
| 33 |
]
|
| 34 |
|
| 35 |
# Thresholds reordered accordingly
|
| 36 |
-
thresholds = pd.Series({
|
| 37 |
-
|
| 38 |
-
4190: 1.321615138, 4145: 0.30309335, 4089: 1.465671338, 4039: 0.966055013,
|
| 39 |
-
3985: 1.593758847, 3933: 2.93084335, 3879: 0.81833191, 3824: 1.145509641,
|
| 40 |
-
3773: 0.891088481, 3720: 0.58379781, 3665: 0.298697327,
|
| 41 |
-
3562: 1.178862418, 3509: 1.821975901, 3455: 1.300869714, 3399: 0.91573613,
|
| 42 |
-
3350: 0.664586629, 3297: 0.924916122, 3244: 1.094293328,
|
| 43 |
-
4882: 1.464503885, 4828: 0.95879943, 4773: 1.471959437, 4720: 0.714151142,
|
| 44 |
-
4668: 1.408533949, 4615: 1.181106084, 4561: 1.109913024, 4510: 1.266797682, 4455: 1.185522985
|
| 45 |
-
})
|
| 46 |
# === Utility functions ===
|
| 47 |
|
| 48 |
# Voyager ASCII 6-bit conversion table
|
|
@@ -58,8 +50,6 @@ voyager_table = {
|
|
| 58 |
}
|
| 59 |
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
| 60 |
|
| 61 |
-
# === Utility functions ===
|
| 62 |
-
|
| 63 |
def string_to_binary_labels(s: str) -> list[int]:
|
| 64 |
bits = []
|
| 65 |
for char in s:
|
|
@@ -77,25 +67,7 @@ def binary_labels_to_string(bits: list[int]) -> str:
|
|
| 77 |
val = sum(b << (5 - j) for j, b in enumerate(chunk))
|
| 78 |
chars.append(voyager_table.get(val, '?'))
|
| 79 |
return ''.join(chars)
|
| 80 |
-
|
| 81 |
-
# def string_to_binary_labels(s: str) -> list[int]:
|
| 82 |
-
# bits = []
|
| 83 |
-
# for char in s:
|
| 84 |
-
# ascii_code = ord(char)
|
| 85 |
-
# char_bits = [(ascii_code >> bit) & 1 for bit in range(7, -1, -1)]
|
| 86 |
-
# bits.extend(char_bits)
|
| 87 |
-
# return bits
|
| 88 |
-
|
| 89 |
-
# def binary_labels_to_string(bits: list[int]) -> str:
|
| 90 |
-
# chars = []
|
| 91 |
-
# for i in range(0, len(bits), 8):
|
| 92 |
-
# byte = bits[i:i+8]
|
| 93 |
-
# if len(byte) < 8:
|
| 94 |
-
# byte += [0] * (8 - len(byte))
|
| 95 |
-
# ascii_val = sum(b << (7 - j) for j, b in enumerate(byte))
|
| 96 |
-
# chars.append(chr(ascii_val))
|
| 97 |
-
# return ''.join(chars)
|
| 98 |
-
|
| 99 |
def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
|
| 100 |
img = img.convert("RGB")
|
| 101 |
if img.width < min_size or img.height < min_size:
|
|
@@ -172,52 +144,32 @@ with tab1:
|
|
| 172 |
st.dataframe(df)
|
| 173 |
st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")
|
| 174 |
|
| 175 |
-
|
| 176 |
-
ascending_headers = sorted([h for h in mutation_site_headers if h <= 4455])
|
| 177 |
df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
|
| 178 |
-
st.subheader("Binary Labels (Ascending Order 3244 →
|
| 179 |
st.dataframe(df_sorted)
|
| 180 |
st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
|
| 181 |
|
| 182 |
-
|
| 183 |
-
# st.subheader("Binary Labels (27-bit groups)")
|
| 184 |
-
# groups = []
|
| 185 |
-
# for i in range(0, len(binary_labels), 27):
|
| 186 |
-
# group = binary_labels[i:i+27]
|
| 187 |
-
# group += [0] * (27 - len(group))
|
| 188 |
-
# groups.append(group + [sum(group)])
|
| 189 |
-
|
| 190 |
-
# df_27 = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
|
| 191 |
-
# st.dataframe(df_27)
|
| 192 |
-
# st.download_button("Download as CSV", df_27.to_csv(index=False), "text_27_binary_labels.csv")
|
| 193 |
-
|
| 194 |
-
# Tab 3: EF → Binary
|
| 195 |
with tab2:
|
| 196 |
st.write("Upload an Editing Frequency CSV or enter manually:")
|
| 197 |
-
st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to
|
| 198 |
ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
| 199 |
|
| 200 |
-
ascending_input_headers = sorted([h for h in mutation_site_headers if 3244 <= h <= 4402])
|
| 201 |
-
high_index_headers = sorted([h for h in mutation_site_headers if h >= 4455])
|
| 202 |
-
|
| 203 |
if ef_file:
|
| 204 |
ef_df = pd.read_csv(ef_file, header=None)
|
| 205 |
-
ef_df.columns = [str(site) for site in
|
| 206 |
-
for h in high_index_headers:
|
| 207 |
-
ef_df[str(h)] = 0 # add dummy columns for high index as 0
|
| 208 |
else:
|
| 209 |
-
ef_df = pd.DataFrame(columns=[str(site) for site in
|
| 210 |
|
| 211 |
edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
| 212 |
|
| 213 |
if st.button("Convert to Binary Labels"):
|
| 214 |
binary_part = pd.DataFrame()
|
| 215 |
-
for col in
|
| 216 |
col_str = str(col)
|
| 217 |
-
threshold =
|
| 218 |
binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
|
| 219 |
-
for col in high_index_headers:
|
| 220 |
-
binary_part[str(col)] = 0
|
| 221 |
|
| 222 |
binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
|
| 223 |
|
|
@@ -236,90 +188,6 @@ with tab2:
|
|
| 236 |
st.subheader("Decoded String (continuous across rows)")
|
| 237 |
st.write(decoded_string)
|
| 238 |
|
| 239 |
-
|
| 240 |
-
st.
|
| 241 |
-
st.
|
| 242 |
-
st.download_button("Download Ascending Order CSV", binary_ascending.to_csv(index=False), "ef_binary_labels_ascending.csv")
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
# # Tab 3: EF → Binary
|
| 246 |
-
# with tab3:
|
| 247 |
-
# st.write("Upload an Editing Frequency CSV or enter manually:")
|
| 248 |
-
# st.write("**Note:** Please upload CSV files **without column headers**. Just the 31 editing frequencies per row.")
|
| 249 |
-
# ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
| 250 |
-
|
| 251 |
-
# if ef_file:
|
| 252 |
-
# # Read CSV without headers and assign mutation site headers
|
| 253 |
-
# ef_df = pd.read_csv(ef_file, header=None)
|
| 254 |
-
# ef_df.columns = [str(site) for site in mutation_site_headers]
|
| 255 |
-
# else:
|
| 256 |
-
# ef_df = pd.DataFrame(columns=[str(site) for site in mutation_site_headers])
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
# edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
| 260 |
-
|
| 261 |
-
# if st.button("Convert to Binary Labels"):
|
| 262 |
-
# int_map = {str(k): k for k in thresholds.index}
|
| 263 |
-
# matching_cols = [col for col in edited_df.columns if col in int_map]
|
| 264 |
-
|
| 265 |
-
# binary_part = pd.DataFrame()
|
| 266 |
-
# for col in matching_cols:
|
| 267 |
-
# col_threshold = thresholds[int_map[col]]
|
| 268 |
-
# binary_part[col] = (edited_df[col].astype(float) >= col_threshold).astype(int)
|
| 269 |
-
|
| 270 |
-
# non_binary_part = edited_df.drop(columns=matching_cols, errors='ignore')
|
| 271 |
-
# binary_df = pd.concat([non_binary_part, binary_part], axis=1)
|
| 272 |
-
|
| 273 |
-
# def color_binary(val):
|
| 274 |
-
# if val == 1: return "background-color: lightgreen"
|
| 275 |
-
# if val == 0: return "background-color: lightcoral"
|
| 276 |
-
# return ""
|
| 277 |
-
|
| 278 |
-
# st.subheader("Binary Labels")
|
| 279 |
-
# styled = binary_df.style.applymap(color_binary, subset=matching_cols)
|
| 280 |
-
# st.dataframe(styled)
|
| 281 |
-
# st.download_button("Download CSV", binary_df.to_csv(index=False), "ef_binary_labels.csv")
|
| 282 |
-
|
| 283 |
-
# # Convert to bitstrings and strings
|
| 284 |
-
# binary_strings = []
|
| 285 |
-
# decoded_strings = []
|
| 286 |
-
# for _, row in binary_part.iterrows():
|
| 287 |
-
# bitlist = row.values.tolist()
|
| 288 |
-
# bitstring = ''.join(str(b) for b in bitlist)
|
| 289 |
-
# binary_strings.append(bitstring)
|
| 290 |
-
# decoded_strings.append(binary_labels_to_string(bitlist))
|
| 291 |
-
|
| 292 |
-
# st.subheader("Binary as Bitstrings")
|
| 293 |
-
# for b in binary_strings:
|
| 294 |
-
# st.code(b)
|
| 295 |
-
|
| 296 |
-
# st.subheader("Decoded Voyager Strings")
|
| 297 |
-
# for s in decoded_strings:
|
| 298 |
-
# st.write(s)
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
# # Tab 2: Image to Binary
|
| 302 |
-
# with tab2:
|
| 303 |
-
# uploaded = st.file_uploader("Upload an image (jpg/png)", type=["jpg", "jpeg", "png"])
|
| 304 |
-
# if uploaded:
|
| 305 |
-
# img = Image.open(uploaded)
|
| 306 |
-
# st.image(img, caption="Original", use_column_width=True)
|
| 307 |
-
# cropped = st_cropper(img, realtime_update=True, box_color="blue", aspect_ratio=None)
|
| 308 |
-
# st.image(cropped, caption="Cropped", use_column_width=True)
|
| 309 |
-
|
| 310 |
-
# max_pixels = st.slider("Max pixels to encode", 32, 1024, 256, 32)
|
| 311 |
-
# binary_labels = image_to_binary_labels_rgb(cropped, max_pixels=max_pixels)
|
| 312 |
-
|
| 313 |
-
# st.subheader("Binary Labels from Image")
|
| 314 |
-
# groups = []
|
| 315 |
-
# for i in range(0, len(binary_labels), 32):
|
| 316 |
-
# group = binary_labels[i:i+32]
|
| 317 |
-
# group += [0] * (32 - len(group))
|
| 318 |
-
# groups.append(group + [sum(group)])
|
| 319 |
-
# df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
|
| 320 |
-
# st.dataframe(df)
|
| 321 |
-
|
| 322 |
-
# st.subheader("Reconstructed Image")
|
| 323 |
-
# recon = binary_labels_to_rgb_image(binary_labels)
|
| 324 |
-
# st.image(recon, caption="Reconstructed", use_column_width=True)
|
| 325 |
-
# st.download_button("Download CSV", df.to_csv(index=False), "image_binary_labels.csv")
|
|
|
|
| 33 |
]
|
| 34 |
|
| 35 |
# Thresholds reordered accordingly
|
| 36 |
+
thresholds = pd.Series({h: thresholds_actual[h] for h in mutation_site_headers})
|
| 37 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# === Utility functions ===
|
| 39 |
|
| 40 |
# Voyager ASCII 6-bit conversion table
|
|
|
|
| 50 |
}
|
| 51 |
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
| 52 |
|
|
|
|
|
|
|
| 53 |
def string_to_binary_labels(s: str) -> list[int]:
|
| 54 |
bits = []
|
| 55 |
for char in s:
|
|
|
|
| 67 |
val = sum(b << (5 - j) for j, b in enumerate(chunk))
|
| 68 |
chars.append(voyager_table.get(val, '?'))
|
| 69 |
return ''.join(chars)
|
| 70 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
|
| 72 |
img = img.convert("RGB")
|
| 73 |
if img.width < min_size or img.height < min_size:
|
|
|
|
| 144 |
st.dataframe(df)
|
| 145 |
st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")
|
| 146 |
|
| 147 |
+
ascending_headers = sorted(mutation_site_headers_actual)
|
|
|
|
| 148 |
df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
|
| 149 |
+
st.subheader("Binary Labels (Ascending Order 3244 → 4882)")
|
| 150 |
st.dataframe(df_sorted)
|
| 151 |
st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
|
| 152 |
|
| 153 |
+
# Tab 2: EF → Binary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
with tab2:
|
| 155 |
st.write("Upload an Editing Frequency CSV or enter manually:")
|
| 156 |
+
st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.")
|
| 157 |
ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
| 158 |
|
|
|
|
|
|
|
|
|
|
| 159 |
if ef_file:
|
| 160 |
ef_df = pd.read_csv(ef_file, header=None)
|
| 161 |
+
ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual)]
|
|
|
|
|
|
|
| 162 |
else:
|
| 163 |
+
ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual)])
|
| 164 |
|
| 165 |
edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
| 166 |
|
| 167 |
if st.button("Convert to Binary Labels"):
|
| 168 |
binary_part = pd.DataFrame()
|
| 169 |
+
for col in sorted(mutation_site_headers_actual):
|
| 170 |
col_str = str(col)
|
| 171 |
+
threshold = thresholds_actual[col]
|
| 172 |
binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
|
|
|
|
|
|
|
| 173 |
|
| 174 |
binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
|
| 175 |
|
|
|
|
| 188 |
st.subheader("Decoded String (continuous across rows)")
|
| 189 |
st.write(decoded_string)
|
| 190 |
|
| 191 |
+
st.subheader("Binary Labels (Ascending 3244→4882)")
|
| 192 |
+
st.dataframe(binary_part)
|
| 193 |
+
st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|