Spaces:
Sleeping
Sleeping
Upload streamlit_app.py
Browse files- streamlit_app.py +141 -0
streamlit_app.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from openpyxl.styles import PatternFill
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
|
| 7 |
+
# Function to handle file transformation
|
| 8 |
+
def process_files(yoman_file, kasefet_file):
|
| 9 |
+
# Load the data from the two files (Excel format)
|
| 10 |
+
yoman_df = pd.read_excel(yoman_file)
|
| 11 |
+
kasefet_df = pd.read_excel(kasefet_file)
|
| 12 |
+
|
| 13 |
+
# First transformation for kasefet
|
| 14 |
+
kasefet_df['linedirection'] = kasefet_df['OfficeLineId'].astype(str) + "-" + kasefet_df['Direction'].astype(str)
|
| 15 |
+
|
| 16 |
+
# Rename columns in yoman DataFrame
|
| 17 |
+
yoman_df = yoman_df.rename(columns={
|
| 18 |
+
'诪驻注讬诇': 'operator',
|
| 19 |
+
'讗砖讻讜诇': 'cluster',
|
| 20 |
+
'诪住驻专 讗讬专讜注': 'numevent',
|
| 21 |
+
'转讗专讬讱 讜砖注转 讛转讞诇转 讗讬专讜注': 'startdate',
|
| 22 |
+
'转讗专讬讱 讜砖注转 住讬讜诐 讗讬专讜注': 'enddate',
|
| 23 |
+
'住讟讟讜住': 'status',
|
| 24 |
+
'诪拽讟讬 拽讜+讻讬讜讜谞讬诐': 'makatdirection',
|
| 25 |
+
'转讬讗讜专 讛讗讬专讜注': 'description',
|
| 26 |
+
'住讜讙 讗讬专讜注': 'event_type',
|
| 27 |
+
'讛注专转 讚讞讬讬讛': 'delay_note'
|
| 28 |
+
})
|
| 29 |
+
|
| 30 |
+
# Convert date columns to datetime
|
| 31 |
+
yoman_df['startdate'] = pd.to_datetime(yoman_df['startdate'], errors='coerce')
|
| 32 |
+
yoman_df['enddate'] = pd.to_datetime(yoman_df['enddate'], errors='coerce')
|
| 33 |
+
|
| 34 |
+
# Merge the two DataFrames
|
| 35 |
+
merged_df = pd.merge(
|
| 36 |
+
yoman_df,
|
| 37 |
+
kasefet_df,
|
| 38 |
+
how='left', # Left join to keep all rows from yoman_df
|
| 39 |
+
left_on='makatdirection',
|
| 40 |
+
right_on='linedirection'
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# Convert the trip_dt column in merged_df to datetime
|
| 44 |
+
merged_df['trip_dt'] = pd.to_datetime(merged_df['trip_dt'], errors='coerce')
|
| 45 |
+
|
| 46 |
+
# Create a mask for the date filtering
|
| 47 |
+
date_mask = merged_df['trip_dt'].between(merged_df['startdate'], merged_df['enddate'], inclusive='both')
|
| 48 |
+
filtered_df = merged_df[date_mask].copy()
|
| 49 |
+
|
| 50 |
+
# Select relevant columns for the final DataFrame
|
| 51 |
+
final_columns = [
|
| 52 |
+
'numevent', 'startdate', 'enddate', 'status', 'event_type', 'Direction', 'TripId', 'delay_note',
|
| 53 |
+
'trip_dt', 'ClusterId', 'OfficeLineId', 'LineAlternative', 'trip_time', 'rishui_time',
|
| 54 |
+
'bitzua_history_start_dt', 'bitzua_history_end_dt', 'rishui_bitzua_departure_time_diff',
|
| 55 |
+
'description', 'status_nesia_luz_nm', 'status_av_nesia_luz_nm', 'status_bakara_luz_nm'
|
| 56 |
+
]
|
| 57 |
+
final_df = filtered_df[final_columns].copy()
|
| 58 |
+
|
| 59 |
+
# Add missing numevent rows from yoman
|
| 60 |
+
missing_numevents = set(yoman_df['numevent']) - set(final_df['numevent'])
|
| 61 |
+
if missing_numevents:
|
| 62 |
+
new_rows = []
|
| 63 |
+
for numevent in missing_numevents:
|
| 64 |
+
yoman_row = yoman_df[yoman_df['numevent'] == numevent].iloc[0]
|
| 65 |
+
new_row = {
|
| 66 |
+
'numevent': numevent,
|
| 67 |
+
'status': yoman_row['status'],
|
| 68 |
+
'event_type': yoman_row['event_type'],
|
| 69 |
+
'Direction': np.nan,
|
| 70 |
+
'TripId': np.nan,
|
| 71 |
+
'delay_note': yoman_row['delay_note'],
|
| 72 |
+
'trip_dt': np.nan,
|
| 73 |
+
'ClusterId': np.nan,
|
| 74 |
+
'OfficeLineId': np.nan,
|
| 75 |
+
'LineAlternative': np.nan,
|
| 76 |
+
'trip_time': np.nan,
|
| 77 |
+
'rishui_time': np.nan,
|
| 78 |
+
'bitzua_history_start_dt': np.nan,
|
| 79 |
+
'bitzua_history_end_dt': np.nan,
|
| 80 |
+
'rishui_bitzua_departure_time_diff': np.nan,
|
| 81 |
+
'description': yoman_row['description'],
|
| 82 |
+
'status_nesia_luz_nm': np.nan,
|
| 83 |
+
'status_av_nesia_luz_nm': np.nan,
|
| 84 |
+
'status_bakara_luz_nm': np.nan,
|
| 85 |
+
'startdate': yoman_row['startdate'],
|
| 86 |
+
'enddate': yoman_row['enddate']
|
| 87 |
+
}
|
| 88 |
+
new_rows.append(new_row)
|
| 89 |
+
|
| 90 |
+
new_rows_df = pd.DataFrame(new_rows)
|
| 91 |
+
new_rows_df = new_rows_df.dropna(axis=1, how='all')
|
| 92 |
+
final_df = pd.concat([final_df, new_rows_df], ignore_index=True)
|
| 93 |
+
|
| 94 |
+
# Add the electronic control column
|
| 95 |
+
final_df['讘拽专讛 讗诇拽讟专讜谞讬转'] = np.where(
|
| 96 |
+
final_df['description'].str.startswith(('诇讗 诇讘拽专讛', '诇讗', '诇讗 讘拽专讛')),
|
| 97 |
+
'诇讗 诇讘拽专讛 讗诇拽讟专讜谞讬转',
|
| 98 |
+
'讻谉 诇讘拽专讛 讗诇拽讟专讜谞讬转'
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Mark rows that are not detected with "no find results"
|
| 102 |
+
final_df['not detected'] = np.where(final_df['TripId'].isna(), 'no', '')
|
| 103 |
+
|
| 104 |
+
# Highlight 'not detected' rows
|
| 105 |
+
with pd.ExcelWriter('final_output.xlsx', engine='openpyxl') as writer:
|
| 106 |
+
final_df.to_excel(writer, index=False, sheet_name='Data')
|
| 107 |
+
workbook = writer.book
|
| 108 |
+
worksheet = writer.sheets['Data']
|
| 109 |
+
|
| 110 |
+
pink_fill = PatternFill(start_color='FFB2D5', end_color='FFB2D5', fill_type='solid')
|
| 111 |
+
for row in range(final_df.shape[0]):
|
| 112 |
+
if final_df.iloc[row]['not detected'] == 'no':
|
| 113 |
+
for col in range(final_df.shape[1]):
|
| 114 |
+
worksheet.cell(row=row + 2, column=col + 1).fill = pink_fill
|
| 115 |
+
|
| 116 |
+
return 'final_output.xlsx'
|
| 117 |
+
|
| 118 |
+
# Streamlit interface
|
| 119 |
+
st.title('Data Transformation Tool')
|
| 120 |
+
|
| 121 |
+
st.markdown("""
|
| 122 |
+
Please upload the Yoman and Kasefet files for processing.
|
| 123 |
+
""")
|
| 124 |
+
|
| 125 |
+
# Upload files
|
| 126 |
+
yoman_file = st.file_uploader("Upload Yoman File", type=["xlsx"])
|
| 127 |
+
kasefet_file = st.file_uploader("Upload Kasefet File", type=["xlsx"])
|
| 128 |
+
|
| 129 |
+
if yoman_file and kasefet_file:
|
| 130 |
+
if st.button("Process and Download Output"):
|
| 131 |
+
output_file = process_files(yoman_file, kasefet_file)
|
| 132 |
+
st.success("Processing completed successfully!")
|
| 133 |
+
|
| 134 |
+
# Provide download link for the final output file
|
| 135 |
+
with open(output_file, "rb") as f:
|
| 136 |
+
st.download_button(
|
| 137 |
+
label="Download Final Output",
|
| 138 |
+
data=f,
|
| 139 |
+
file_name="final_output.xlsx",
|
| 140 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 141 |
+
)
|