Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from openpyxl.styles import PatternFill | |
| from io import BytesIO | |
| # Function to handle file transformation | |
| def process_files(yoman_file, kasefet_file): | |
| # Load the data from the two files (Excel format) | |
| yoman_df = pd.read_excel(yoman_file) | |
| kasefet_df = pd.read_excel(kasefet_file) | |
| # First transformation for kasefet | |
| kasefet_df['linedirection'] = kasefet_df['OfficeLineId'].astype(str) + "-" + kasefet_df['Direction'].astype(str) | |
| # Rename columns in yoman DataFrame | |
| yoman_df = yoman_df.rename(columns={ | |
| '诪驻注讬诇': 'operator', | |
| '讗砖讻讜诇': 'cluster', | |
| '诪住驻专 讗讬专讜注': 'numevent', | |
| '转讗专讬讱 讜砖注转 讛转讞诇转 讗讬专讜注': 'startdate', | |
| '转讗专讬讱 讜砖注转 住讬讜诐 讗讬专讜注': 'enddate', | |
| '住讟讟讜住': 'status', | |
| '诪拽讟讬 拽讜+讻讬讜讜谞讬诐': 'makatdirection', | |
| '转讬讗讜专 讛讗讬专讜注': 'description', | |
| '住讜讙 讗讬专讜注': 'event_type', | |
| '讛注专转 讚讞讬讬讛': 'delay_note' | |
| }) | |
| # Convert date columns to datetime | |
| yoman_df['startdate'] = pd.to_datetime(yoman_df['startdate'], errors='coerce') | |
| yoman_df['enddate'] = pd.to_datetime(yoman_df['enddate'], errors='coerce') | |
| # Merge the two DataFrames | |
| merged_df = pd.merge( | |
| yoman_df, | |
| kasefet_df, | |
| how='left', # Left join to keep all rows from yoman_df | |
| left_on='makatdirection', | |
| right_on='linedirection' | |
| ) | |
| # Convert the trip_dt column in merged_df to datetime | |
| merged_df['trip_dt'] = pd.to_datetime(merged_df['trip_dt'], errors='coerce') | |
| # Create a mask for the date filtering | |
| date_mask = merged_df['trip_dt'].between(merged_df['startdate'], merged_df['enddate'], inclusive='both') | |
| filtered_df = merged_df[date_mask].copy() | |
| # Select relevant columns for the final DataFrame | |
| final_columns = [ | |
| 'numevent', 'startdate', 'enddate', 'status', 'event_type', 'Direction', 'TripId', 'delay_note', | |
| 'trip_dt', 'ClusterId', 'OfficeLineId', 'LineAlternative', 'trip_time', 'rishui_time', | |
| 'bitzua_history_start_dt', 'bitzua_history_end_dt', 'rishui_bitzua_departure_time_diff', | |
| 'description', 'status_nesia_luz_nm', 'status_av_nesia_luz_nm', 'status_bakara_luz_nm' | |
| ] | |
| final_df = filtered_df[final_columns].copy() | |
| # Add missing numevent rows from yoman | |
| missing_numevents = set(yoman_df['numevent']) - set(final_df['numevent']) | |
| if missing_numevents: | |
| new_rows = [] | |
| for numevent in missing_numevents: | |
| yoman_row = yoman_df[yoman_df['numevent'] == numevent].iloc[0] | |
| new_row = { | |
| 'numevent': numevent, | |
| 'status': yoman_row['status'], | |
| 'event_type': yoman_row['event_type'], | |
| 'Direction': np.nan, | |
| 'TripId': np.nan, | |
| 'delay_note': yoman_row['delay_note'], | |
| 'trip_dt': np.nan, | |
| 'ClusterId': np.nan, | |
| 'OfficeLineId': np.nan, | |
| 'LineAlternative': np.nan, | |
| 'trip_time': np.nan, | |
| 'rishui_time': np.nan, | |
| 'bitzua_history_start_dt': np.nan, | |
| 'bitzua_history_end_dt': np.nan, | |
| 'rishui_bitzua_departure_time_diff': np.nan, | |
| 'description': yoman_row['description'], | |
| 'status_nesia_luz_nm': np.nan, | |
| 'status_av_nesia_luz_nm': np.nan, | |
| 'status_bakara_luz_nm': np.nan, | |
| 'startdate': yoman_row['startdate'], | |
| 'enddate': yoman_row['enddate'] | |
| } | |
| new_rows.append(new_row) | |
| new_rows_df = pd.DataFrame(new_rows) | |
| new_rows_df = new_rows_df.dropna(axis=1, how='all') | |
| final_df = pd.concat([final_df, new_rows_df], ignore_index=True) | |
| # Add the electronic control column | |
| final_df['讘拽专讛 讗诇拽讟专讜谞讬转'] = np.where( | |
| final_df['description'].str.startswith(('诇讗 诇讘拽专讛', '诇讗', '诇讗 讘拽专讛')), | |
| '诇讗 诇讘拽专讛 讗诇拽讟专讜谞讬转', | |
| '讻谉 诇讘拽专讛 讗诇拽讟专讜谞讬转' | |
| ) | |
| # Mark rows that are not detected with "no find results" | |
| final_df['not detected'] = np.where(final_df['TripId'].isna(), 'no', '') | |
| # Highlight 'not detected' rows | |
| with pd.ExcelWriter('final_output.xlsx', engine='openpyxl') as writer: | |
| final_df.to_excel(writer, index=False, sheet_name='Data') | |
| workbook = writer.book | |
| worksheet = writer.sheets['Data'] | |
| pink_fill = PatternFill(start_color='FFB2D5', end_color='FFB2D5', fill_type='solid') | |
| for row in range(final_df.shape[0]): | |
| if final_df.iloc[row]['not detected'] == 'no': | |
| for col in range(final_df.shape[1]): | |
| worksheet.cell(row=row + 2, column=col + 1).fill = pink_fill | |
| return 'final_output.xlsx' | |
| # Streamlit interface | |
| st.title('Data Transformation Tool') | |
| st.markdown(""" | |
| Please upload the Yoman and Kasefet files for processing. | |
| """) | |
| # Upload files | |
| yoman_file = st.file_uploader("Upload Yoman File", type=["xlsx"]) | |
| kasefet_file = st.file_uploader("Upload Kasefet File", type=["xlsx"]) | |
| if yoman_file and kasefet_file: | |
| if st.button("Process and Download Output"): | |
| output_file = process_files(yoman_file, kasefet_file) | |
| st.success("Processing completed successfully!") | |
| # Provide download link for the final output file | |
| with open(output_file, "rb") as f: | |
| st.download_button( | |
| label="Download Final Output", | |
| data=f, | |
| file_name="final_output.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |