wweavishayaknin commited on
Commit
10f8273
verified
1 Parent(s): 5225a15

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +141 -0
streamlit_app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from openpyxl.styles import PatternFill
5
+ from io import BytesIO
6
+
7
+ # Function to handle file transformation
8
+ def process_files(yoman_file, kasefet_file):
9
+ # Load the data from the two files (Excel format)
10
+ yoman_df = pd.read_excel(yoman_file)
11
+ kasefet_df = pd.read_excel(kasefet_file)
12
+
13
+ # First transformation for kasefet
14
+ kasefet_df['linedirection'] = kasefet_df['OfficeLineId'].astype(str) + "-" + kasefet_df['Direction'].astype(str)
15
+
16
+ # Rename columns in yoman DataFrame
17
+ yoman_df = yoman_df.rename(columns={
18
+ '诪驻注讬诇': 'operator',
19
+ '讗砖讻讜诇': 'cluster',
20
+ '诪住驻专 讗讬专讜注': 'numevent',
21
+ '转讗专讬讱 讜砖注转 讛转讞诇转 讗讬专讜注': 'startdate',
22
+ '转讗专讬讱 讜砖注转 住讬讜诐 讗讬专讜注': 'enddate',
23
+ '住讟讟讜住': 'status',
24
+ '诪拽讟讬 拽讜+讻讬讜讜谞讬诐': 'makatdirection',
25
+ '转讬讗讜专 讛讗讬专讜注': 'description',
26
+ '住讜讙 讗讬专讜注': 'event_type',
27
+ '讛注专转 讚讞讬讬讛': 'delay_note'
28
+ })
29
+
30
+ # Convert date columns to datetime
31
+ yoman_df['startdate'] = pd.to_datetime(yoman_df['startdate'], errors='coerce')
32
+ yoman_df['enddate'] = pd.to_datetime(yoman_df['enddate'], errors='coerce')
33
+
34
+ # Merge the two DataFrames
35
+ merged_df = pd.merge(
36
+ yoman_df,
37
+ kasefet_df,
38
+ how='left', # Left join to keep all rows from yoman_df
39
+ left_on='makatdirection',
40
+ right_on='linedirection'
41
+ )
42
+
43
+ # Convert the trip_dt column in merged_df to datetime
44
+ merged_df['trip_dt'] = pd.to_datetime(merged_df['trip_dt'], errors='coerce')
45
+
46
+ # Create a mask for the date filtering
47
+ date_mask = merged_df['trip_dt'].between(merged_df['startdate'], merged_df['enddate'], inclusive='both')
48
+ filtered_df = merged_df[date_mask].copy()
49
+
50
+ # Select relevant columns for the final DataFrame
51
+ final_columns = [
52
+ 'numevent', 'startdate', 'enddate', 'status', 'event_type', 'Direction', 'TripId', 'delay_note',
53
+ 'trip_dt', 'ClusterId', 'OfficeLineId', 'LineAlternative', 'trip_time', 'rishui_time',
54
+ 'bitzua_history_start_dt', 'bitzua_history_end_dt', 'rishui_bitzua_departure_time_diff',
55
+ 'description', 'status_nesia_luz_nm', 'status_av_nesia_luz_nm', 'status_bakara_luz_nm'
56
+ ]
57
+ final_df = filtered_df[final_columns].copy()
58
+
59
+ # Add missing numevent rows from yoman
60
+ missing_numevents = set(yoman_df['numevent']) - set(final_df['numevent'])
61
+ if missing_numevents:
62
+ new_rows = []
63
+ for numevent in missing_numevents:
64
+ yoman_row = yoman_df[yoman_df['numevent'] == numevent].iloc[0]
65
+ new_row = {
66
+ 'numevent': numevent,
67
+ 'status': yoman_row['status'],
68
+ 'event_type': yoman_row['event_type'],
69
+ 'Direction': np.nan,
70
+ 'TripId': np.nan,
71
+ 'delay_note': yoman_row['delay_note'],
72
+ 'trip_dt': np.nan,
73
+ 'ClusterId': np.nan,
74
+ 'OfficeLineId': np.nan,
75
+ 'LineAlternative': np.nan,
76
+ 'trip_time': np.nan,
77
+ 'rishui_time': np.nan,
78
+ 'bitzua_history_start_dt': np.nan,
79
+ 'bitzua_history_end_dt': np.nan,
80
+ 'rishui_bitzua_departure_time_diff': np.nan,
81
+ 'description': yoman_row['description'],
82
+ 'status_nesia_luz_nm': np.nan,
83
+ 'status_av_nesia_luz_nm': np.nan,
84
+ 'status_bakara_luz_nm': np.nan,
85
+ 'startdate': yoman_row['startdate'],
86
+ 'enddate': yoman_row['enddate']
87
+ }
88
+ new_rows.append(new_row)
89
+
90
+ new_rows_df = pd.DataFrame(new_rows)
91
+ new_rows_df = new_rows_df.dropna(axis=1, how='all')
92
+ final_df = pd.concat([final_df, new_rows_df], ignore_index=True)
93
+
94
+ # Add the electronic control column
95
+ final_df['讘拽专讛 讗诇拽讟专讜谞讬转'] = np.where(
96
+ final_df['description'].str.startswith(('诇讗 诇讘拽专讛', '诇讗', '诇讗 讘拽专讛')),
97
+ '诇讗 诇讘拽专讛 讗诇拽讟专讜谞讬转',
98
+ '讻谉 诇讘拽专讛 讗诇拽讟专讜谞讬转'
99
+ )
100
+
101
+ # Mark rows that are not detected with "no find results"
102
+ final_df['not detected'] = np.where(final_df['TripId'].isna(), 'no', '')
103
+
104
+ # Highlight 'not detected' rows
105
+ with pd.ExcelWriter('final_output.xlsx', engine='openpyxl') as writer:
106
+ final_df.to_excel(writer, index=False, sheet_name='Data')
107
+ workbook = writer.book
108
+ worksheet = writer.sheets['Data']
109
+
110
+ pink_fill = PatternFill(start_color='FFB2D5', end_color='FFB2D5', fill_type='solid')
111
+ for row in range(final_df.shape[0]):
112
+ if final_df.iloc[row]['not detected'] == 'no':
113
+ for col in range(final_df.shape[1]):
114
+ worksheet.cell(row=row + 2, column=col + 1).fill = pink_fill
115
+
116
+ return 'final_output.xlsx'
117
+
118
+ # Streamlit interface
119
+ st.title('Data Transformation Tool')
120
+
121
+ st.markdown("""
122
+ Please upload the Yoman and Kasefet files for processing.
123
+ """)
124
+
125
+ # Upload files
126
+ yoman_file = st.file_uploader("Upload Yoman File", type=["xlsx"])
127
+ kasefet_file = st.file_uploader("Upload Kasefet File", type=["xlsx"])
128
+
129
+ if yoman_file and kasefet_file:
130
+ if st.button("Process and Download Output"):
131
+ output_file = process_files(yoman_file, kasefet_file)
132
+ st.success("Processing completed successfully!")
133
+
134
+ # Provide download link for the final output file
135
+ with open(output_file, "rb") as f:
136
+ st.download_button(
137
+ label="Download Final Output",
138
+ data=f,
139
+ file_name="final_output.xlsx",
140
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
141
+ )