Spaces:
Sleeping
Sleeping
| import os | |
| os.system("pip install streamlit pandas xlsxwriter openpyxl") | |
| import streamlit as st | |
| import pandas as pd | |
| from io import BytesIO | |
| st.set_page_config(page_title="Protein Repeat Comparator", layout="centered") | |
| st.title("𧬠Protein Repeat Comparator") | |
| st.write("Upload two Excel files with protein data. Frequency values should start from the first row (header).") | |
| uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"]) | |
| uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"]) | |
| if uploaded_file1 and uploaded_file2: | |
| try: | |
| df1 = pd.read_excel(uploaded_file1, header=0) | |
| df2 = pd.read_excel(uploaded_file2, header=0) | |
| df1.columns = df1.columns.astype(str) | |
| df2.columns = df2.columns.astype(str) | |
| id_col = df1.columns[0] | |
| name_col = df1.columns[1] | |
| repeat_columns = df1.columns[2:] | |
| differences = [] | |
| for _, row1 in df1.iterrows(): | |
| entry_id = row1[id_col] | |
| protein_name = row1[name_col] | |
| row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)] | |
| if row2_match.empty: | |
| continue | |
| row2 = row2_match.iloc[0] | |
| for repeat_col in repeat_columns: | |
| freq1 = row1[repeat_col] | |
| freq2 = row2[repeat_col] | |
| if pd.isna(freq1) or pd.isna(freq2): | |
| continue | |
| if freq1 != freq2: | |
| if freq1 == 0: | |
| pct_change = "Infinity" | |
| else: | |
| pct_change = ((freq2 - freq1) / freq1) * 100 | |
| pct_change = round(pct_change, 2) | |
| diff = abs(freq1 - freq2) | |
| differences.append({ | |
| id_col: entry_id, | |
| name_col: protein_name, | |
| "Repeat": repeat_col, | |
| "Frequency File 1": freq1, | |
| "Frequency File 2": freq2, | |
| "Difference": diff, | |
| "%age Change": pct_change | |
| }) | |
| if differences: | |
| result_df = pd.DataFrame(differences) | |
| result_df = result_df.sort_values(by="Difference", ascending=False) | |
| # Show DataFrame in Streamlit app | |
| st.subheader("π View Changed Repeats") | |
| st.dataframe(result_df, use_container_width=True) | |
| # Apply styling | |
| def color_pct(val): | |
| if isinstance(val, str) and val == "Infinity": | |
| return 'color: green' | |
| elif isinstance(val, (int, float)): | |
| if val > 0: | |
| return 'color: green' | |
| elif val < 0: | |
| return 'color: red' | |
| return '' | |
| styled_df = result_df.style.applymap(color_pct, subset=["%age Change"]) | |
| # Save styled output | |
| output = BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats") | |
| output.seek(0) | |
| st.download_button( | |
| label="π₯ Download Excel File", | |
| data=output, | |
| file_name="changed_repeats_with_percentage.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| else: | |
| st.info("No changes in repeat frequencies were found.") | |
| except Exception as e: | |
| st.error(f"β Error: {e}") |