Spaces:
Running
Running
| # !pip install streamlit | |
| # !pip install pandas | |
| import pandas as pd | |
| import streamlit as st | |
| import base64 | |
| import io | |
| import base64 | |
| # Functions | |
| def map_data_to_template(mapping_df, template_df, data_df): | |
| # Initialize the final output dataframe with the template columns, filled with NaN | |
| final_output_df = pd.DataFrame(columns=template_df.columns) | |
| # Prepare a dictionary to hold the mapping from MEDLab to NDA variables | |
| variable_mapping = mapping_df.set_index('MEDLab Variable')['NDA Variable'].to_dict() | |
| # Iterate over each NDA variable to map the data | |
| for nda_var in final_output_df.columns: | |
| medlab_vars = [medlab_var for medlab_var, nda_mapped_var in variable_mapping.items() if nda_mapped_var == nda_var] | |
| # Initialize the column with None | |
| final_output_df[nda_var] = [None] * len(data_df) | |
| # Go through each potential MEDLab variable until we find one that's present and has data | |
| for medlab_var in medlab_vars: | |
| if medlab_var in data_df.columns and not data_df[medlab_var].isnull().all(): | |
| # If a date column, convert to the specified format | |
| if 'date' in medlab_var: | |
| final_output_df[nda_var] = pd.to_datetime(data_df[medlab_var], errors='coerce').dt.strftime('%m/%d/%Y') | |
| else: | |
| final_output_df[nda_var] = data_df[medlab_var] | |
| break # Stop checking once we've mapped one | |
| return final_output_df | |
| # Streamlit app | |
| def main(): | |
| st.markdown("<h1 style='text-align: center; color: #E694FF;'>Data Transformer</h1>", unsafe_allow_html=True) | |
| # File Uploader for each CSV | |
| st.subheader("Upload Files") | |
| nimh_template_file = st.file_uploader("Choose NIMH Template CSV", type=['csv']) | |
| redcap_data_file = st.file_uploader("Choose REDCap Data CSV", type=['csv']) | |
| conversion_key_file = st.file_uploader("Choose Conversion Key CSV", type=['csv']) | |
| if nimh_template_file and redcap_data_file and conversion_key_file: | |
| # Convert the file objects to DataFrames | |
| nimh_template_df = pd.read_csv(io.StringIO(nimh_template_file.getvalue().decode('utf-8')), skiprows=1) | |
| redcap_data_df = pd.read_csv(io.StringIO(redcap_data_file.getvalue().decode('utf-8'))) | |
| conversion_key_df = pd.read_csv(io.StringIO(conversion_key_file.getvalue().decode('utf-8'))) | |
| transformed_data_df = map_data_to_template( | |
| conversion_key_df, | |
| nimh_template_df, | |
| redcap_data_df | |
| ) | |
| # Display transformed data | |
| st.subheader("Transformed Data") | |
| st.write(transformed_data_df) | |
| # Download button for transformed data | |
| st.subheader("Download Transformed Data") | |
| csv = transformed_data_df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here | |
| href = f'<a href="data:file/csv;base64,{b64}" download="transformed_data.csv">Download CSV File</a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| if __name__ == '__main__': | |
| main() | |