Spaces:

wweavishayaknin
/

detectyoman

Sleeping

App Files Files Community

detectyoman / streamlit_app.py

wweavishayaknin

Upload streamlit_app.py

10f8273 verified over 1 year ago

raw

history blame contribute delete

5.9 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from openpyxl.styles import PatternFill
	from io import BytesIO

	# Function to handle file transformation
	def process_files(yoman_file, kasefet_file):
	# Load the data from the two files (Excel format)
	yoman_df = pd.read_excel(yoman_file)
	kasefet_df = pd.read_excel(kasefet_file)

	# First transformation for kasefet
	kasefet_df['linedirection'] = kasefet_df['OfficeLineId'].astype(str) + "-" + kasefet_df['Direction'].astype(str)

	# Rename columns in yoman DataFrame
	yoman_df = yoman_df.rename(columns={
	'מפעיל': 'operator',
	'אשכול': 'cluster',
	'מספר אירוע': 'numevent',
	'תאריך ושעת התחלת אירוע': 'startdate',
	'תאריך ושעת סיום אירוע': 'enddate',
	'סטטוס': 'status',
	'מקטי קו+כיוונים': 'makatdirection',
	'תיאור האירוע': 'description',
	'סוג אירוע': 'event_type',
	'הערת דחייה': 'delay_note'
	})

	# Convert date columns to datetime
	yoman_df['startdate'] = pd.to_datetime(yoman_df['startdate'], errors='coerce')
	yoman_df['enddate'] = pd.to_datetime(yoman_df['enddate'], errors='coerce')

	# Merge the two DataFrames
	merged_df = pd.merge(
	yoman_df,
	kasefet_df,
	how='left', # Left join to keep all rows from yoman_df
	left_on='makatdirection',
	right_on='linedirection'
	)

	# Convert the trip_dt column in merged_df to datetime
	merged_df['trip_dt'] = pd.to_datetime(merged_df['trip_dt'], errors='coerce')

	# Create a mask for the date filtering
	date_mask = merged_df['trip_dt'].between(merged_df['startdate'], merged_df['enddate'], inclusive='both')
	filtered_df = merged_df[date_mask].copy()

	# Select relevant columns for the final DataFrame
	final_columns = [
	'numevent', 'startdate', 'enddate', 'status', 'event_type', 'Direction', 'TripId', 'delay_note',
	'trip_dt', 'ClusterId', 'OfficeLineId', 'LineAlternative', 'trip_time', 'rishui_time',
	'bitzua_history_start_dt', 'bitzua_history_end_dt', 'rishui_bitzua_departure_time_diff',
	'description', 'status_nesia_luz_nm', 'status_av_nesia_luz_nm', 'status_bakara_luz_nm'
	]
	final_df = filtered_df[final_columns].copy()

	# Add missing numevent rows from yoman
	missing_numevents = set(yoman_df['numevent']) - set(final_df['numevent'])
	if missing_numevents:
	new_rows = []
	for numevent in missing_numevents:
	yoman_row = yoman_df[yoman_df['numevent'] == numevent].iloc[0]
	new_row = {
	'numevent': numevent,
	'status': yoman_row['status'],
	'event_type': yoman_row['event_type'],
	'Direction': np.nan,
	'TripId': np.nan,
	'delay_note': yoman_row['delay_note'],
	'trip_dt': np.nan,
	'ClusterId': np.nan,
	'OfficeLineId': np.nan,
	'LineAlternative': np.nan,
	'trip_time': np.nan,
	'rishui_time': np.nan,
	'bitzua_history_start_dt': np.nan,
	'bitzua_history_end_dt': np.nan,
	'rishui_bitzua_departure_time_diff': np.nan,
	'description': yoman_row['description'],
	'status_nesia_luz_nm': np.nan,
	'status_av_nesia_luz_nm': np.nan,
	'status_bakara_luz_nm': np.nan,
	'startdate': yoman_row['startdate'],
	'enddate': yoman_row['enddate']
	}
	new_rows.append(new_row)

	new_rows_df = pd.DataFrame(new_rows)
	new_rows_df = new_rows_df.dropna(axis=1, how='all')
	final_df = pd.concat([final_df, new_rows_df], ignore_index=True)

	# Add the electronic control column
	final_df['בקרה אלקטרונית'] = np.where(
	final_df['description'].str.startswith(('לא לבקרה', 'לא', 'לא בקרה')),
	'לא לבקרה אלקטרונית',
	'כן לבקרה אלקטרונית'
	)

	# Mark rows that are not detected with "no find results"
	final_df['not detected'] = np.where(final_df['TripId'].isna(), 'no', '')

	# Highlight 'not detected' rows
	with pd.ExcelWriter('final_output.xlsx', engine='openpyxl') as writer:
	final_df.to_excel(writer, index=False, sheet_name='Data')
	workbook = writer.book
	worksheet = writer.sheets['Data']

	pink_fill = PatternFill(start_color='FFB2D5', end_color='FFB2D5', fill_type='solid')
	for row in range(final_df.shape[0]):
	if final_df.iloc[row]['not detected'] == 'no':
	for col in range(final_df.shape[1]):
	worksheet.cell(row=row + 2, column=col + 1).fill = pink_fill

	return 'final_output.xlsx'

	# Streamlit interface
	st.title('Data Transformation Tool')

	st.markdown("""
	Please upload the Yoman and Kasefet files for processing.
	""")

	# Upload files
	yoman_file = st.file_uploader("Upload Yoman File", type=["xlsx"])
	kasefet_file = st.file_uploader("Upload Kasefet File", type=["xlsx"])

	if yoman_file and kasefet_file:
	if st.button("Process and Download Output"):
	output_file = process_files(yoman_file, kasefet_file)
	st.success("Processing completed successfully!")

	# Provide download link for the final output file
	with open(output_file, "rb") as f:
	st.download_button(
	label="Download Final Output",
	data=f,
	file_name="final_output.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)