Spaces:

HyraXuna
/

Getaround_Project_YP

Sleeping

App Files Files Community

HyraXuna commited on Mar 18, 2025

Commit

eef5e54

verified ·

1 Parent(s): d423f81

Uplaod of files and app

Browse files

Files changed (7) hide show

.gitattributes +2 -0
.streamlit/config.toml +12 -0
Aventurine_3.png +3 -0
ChibiElf1.png +3 -0
Dockerfile +37 -0
app.py +637 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Aventurine_3.png filter=lfs diff=lfs merge=lfs -text
+ChibiElf1.png filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,12 @@

+[browser]
+serverAddress = '0.0.0.0'
+[global]
+dataFrameSerialization = "legacy"
+[theme]
+base="light"
+primaryColor="#D59A6F"
+backgroundColor="#BDDFD6"
+secondaryBackgroundColor="#FFDEAD"

Aventurine_3.png ADDED Viewed

Git LFS Details

SHA256: e8b209c5d00a3da07c6b4fda75ef7a4fbfc9e07a67a4c091b53c603c39cbbaee
Pointer size: 131 Bytes
Size of remote file: 180 kB

ChibiElf1.png ADDED Viewed

Git LFS Details

SHA256: f1e518fa8058731da5799aa6c8a5a852450ba2c54eea86545f4fe9d867358220
Pointer size: 132 Bytes
Size of remote file: 2.78 MB

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+# Start with a lightweight Linux Anaconda image
+FROM continuumio/miniconda3
+# Update all packages and install nano unzip and curl
+RUN apt-get update
+RUN apt-get install nano unzip curl -y
+# THIS IS SPECIFIC TO HUGGINFACE
+# We create a new user named "user" with ID of 1000
+RUN useradd -m -u 1000 user
+# We switch from "root" (default user when creating an image) to "user"
+USER user
+# We set two environmnet variables
+# so that we can give ownership to all files in there afterwards
+# we also add /home/user/.local/bin in the $PATH environment variable
+# PATH environment variable sets paths to look for installed binaries
+# We update it so that Linux knows where to look for binaries if we were to install them with "user".
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# We set working directory to $HOME/app (<=> /home/user/app)
+WORKDIR $HOME/app
+# Copy all local files to /home/user/app with "user" as owner of these files
+# Always use --chown=user when using HUGGINGFACE to avoid permission errors
+COPY --chown=user . $HOME/app
+# Install basic dependencies
+RUN pip install -r requirements.txt
+# THIS IS SPECIFIC TO HUGGINGFACE AS WELL
+# expose port 7860 which is the port used by HuggingFace for Web Application
+EXPOSE 7860
+# Run streamlit server
+CMD streamlit run --server.port 7860 app.py

app.py ADDED Viewed

	@@ -0,0 +1,637 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+#################################################################### PAGE CONFIGURATION ####################################################################
+st.set_page_config(page_title="Getaround Project Dashboard", page_icon="🚦", layout="wide")
+#################################################################### SIDEBAR MENU ####################################################################
+st.sidebar.title("Navigation")
+page = st.sidebar.radio("Go to", ["🏠 Home/Introduction", "📊 Delays Analysis", "🎉 The End & Thank You"])
+e = st.sidebar.empty()
+e.write("")
+st.sidebar.write("Made with 💖💗❤️‍🔥 by Youenn PATAT")
+e = st.sidebar.empty()
+e.write("")
+st.sidebar.image("Aventurine_3.png", use_container_width=True)
+st.sidebar.markdown("« 🥂 Cheers, dear reader! 🍷»")
+#################################################################### Loading data ####################################################################
+####################################################################       &      ####################################################################
+#################################################################### Cleaning data ####################################################################
+@st.cache_data
+def load_data():
+    data = pd.read_excel("https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/get_around_delay_analysis.xlsx")
+    return data
+@st.cache_data
+def load_data_price():
+    data_price = pd.read_csv("https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/get_around_pricing_project.csv", index_col=0)
+    return data_price
+data_load_state = st.text('Loading data...')
+data = load_data()
+data_price = load_data_price()
+data_load_state.text("")
+mean_rental_per_day = data_price["rental_price_per_day"].mean()
+# Count the number of entries with delay_at_checkout_in_minutes > mean + 3*std and < mean - 3*std
+mean_delay_checkout = data["delay_at_checkout_in_minutes"].mean()
+std_delay_checkout = data["delay_at_checkout_in_minutes"].std()
+outliers = data[(data['delay_at_checkout_in_minutes'] > (mean_delay_checkout + 3* std_delay_checkout)) |
+                 (data['delay_at_checkout_in_minutes'] < (mean_delay_checkout - 3* std_delay_checkout))]
+# Get the count of such entries
+num_outliers = len(outliers)
+# Filter out and remove the outliers
+data = data[(data['delay_at_checkout_in_minutes'] <= (mean_delay_checkout + 3* std_delay_checkout)) & (data['delay_at_checkout_in_minutes'] >= (mean_delay_checkout - 3* std_delay_checkout)) | (data['delay_at_checkout_in_minutes'].isna())]
+# We keep the Nan values to keep information of the cancel state of the rental, if not all the cancel state would be removed
+# Define a function to categorize delays
+def categorize_delay(delay):
+    if pd.isna(delay):
+        return "Unknown"
+    elif delay <= 0:
+        return "Early or in time"
+    elif delay < 60:
+        return "< 1 hour"
+    elif delay < 120:
+        return "1 to 2 hours"
+    elif delay < 180:
+        return "2 to 3 hours"
+    elif delay < 360:
+        return "3 to 6 hours"
+    elif delay < 720:
+        return "6 to 12 hours"
+    elif delay < 1440:
+        return "12 to 24 hours"
+    else:
+        return "1 day or more"
+# Apply function to create the new column
+data["checkout_delay_category"] = data["delay_at_checkout_in_minutes"].apply(categorize_delay)
+#################################################################### HOME PAGE ####################################################################
+if page == "🏠 Home/Introduction":
+    st.title("Welcome to the Getaround Project Dashboard ⌚🚗⌚")
+    st.image("https://lever-client-logos.s3.amazonaws.com/2bd4cdf9-37f2-497f-9096-c2793296a75f-1568844229943.png", use_container_width=True)
+    st.image("https://img.freepik.com/photos-gratuite/vue-du-modele-voiture-3d_23-2151138976.jpg?t=st=1742139826~exp=1742143426~hmac=a3191c31d2068646ebad17b88c52d572c57397c4d7bff718e2efa77cfaa87d07&w=1380", use_container_width=True)
+    st.markdown("""
+    ## Introduction
+    This project aims to analyze the impact of a new feature of threshold to deal with problematic cases when there are delays at the check-out for a rental.
+    🟠 **What you'll find in this app**:
+    * 📊 Data insights on rental delays & affected revenue.
+    * 📉 Strategies to mitigate issues.
+    * 🎯 Conclusion & recommendations.
+    **Use the sidebar** to navigate between pages. 🚀
+    In this first page, you will find out the presentation of data and first views of it. In the **Delays Analysis** page, you will find the analysis of the problem and answers.
+    And in the last page, some thanking and link for my other works.
+    """)
+    st.subheader("📌 - Basic analysis and view of data",  divider="orange")
+    # diplay raw data for delays
+    st.write("Raw Data")
+    if st.checkbox('Show raw data'):
+        st.subheader('Raw data')
+        st.write(data)
+    # Calculate the value counts of each delay category
+    delay_counts = data['checkout_delay_category'].value_counts()
+    # Calculate the percentage of each category
+    delay_percentages = (delay_counts / delay_counts.sum()) * 100
+    st.markdown("""
+    Firstly, we want to check the proportion of check-in type (`mobile` or `connect`) and the proportion of the rentals' states (`ended` or `canceled`).
+    """)
+    col1, col2 = st.columns([1, 2])
+    with col1:
+        #visualisation of the percentage of the mobile vs connect check rental
+        checkin_counts = data["checkin_type"].value_counts().reset_index()
+        checkin_counts.columns = ["checkin_type", "count"]
+        fig1 = px.pie(checkin_counts,
+                    names="checkin_type",
+                    values="count",
+                    title="Check-in Type Distribution",
+                    color_discrete_sequence=["#3CB371", "#FFA500"])
+        fig1.update_traces(textfont_color="black")
+        st.plotly_chart(fig1, use_container_width=True, key="1")
+    # Add text in the second column
+    with col2:
+        #visualisation of the percentage of the mobile vs connect check rental
+        cancel_counts = data["state"].value_counts().reset_index()
+        cancel_counts.columns = ["state", "count"]
+        fig2 = px.pie(cancel_counts,
+                    names="state",
+                    values="count",
+                    title="Proportion of rentals' states",
+                    color_discrete_sequence=["#3CB371", "#FFA500"])
+        fig2.update_traces(textfont_color="black")
+        st.plotly_chart(fig2, use_container_width=True, key="2")
+    st.markdown("""
+    So, we see that the majority of check-in are made by mobile, only 20% are made by the connected car.
+    Moreover, in our case, with that dataset, we see that rentals are cancels for 15% of rentals.
+    """)
+    st.markdown("""
+                Now let's check the distribution of checkout delays in function of category of time.
+                """)
+    # Count occurrences of each category
+    delay_counts = data["checkout_delay_category"].value_counts().reset_index()
+    delay_counts.columns = ["Category", "Count"]
+    delay_counts["Percentage"] = (delay_counts["Count"] / delay_counts["Count"].sum()) * 100
+    # Define custom colors
+    custom_colors = {
+        "Early or in time": "#FFA500",  # Orange
+    }
+    # Assign green as the default color
+    for category in delay_counts["Category"]:
+        if category not in custom_colors:
+            custom_colors[category] = "#3CB371"  # Green
+    # Create a bar chart
+    fig3 = px.bar(
+        delay_counts,
+        x="Category",
+        y="Count",
+        title="Distribution of Checkout Delays",
+        labels={"Category": "Checkout Delay Category", "Count": "Number of Rentals"},
+        color="Category",
+        text=delay_counts["Percentage"].apply(lambda x: f"{x:.1f}%"),
+        color_discrete_map=custom_colors,
+    )
+    fig3.update_traces(textfont_color="black")
+    fig3.update_xaxes(showgrid=False, tickfont=dict(color='black'))
+    fig3.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig3.update_layout(xaxis_title="", yaxis_title="", title_font=dict(weight="bold"), showlegend=False, xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+    st.plotly_chart(fig3, use_container_width=True, theme=None)
+    st.markdown("""
+                There is only 32.6% of rental checkout that are early or in time, without delay.
+                For 23.4% we don't have informations. And the majoruty of delays are less than 2 hours.
+                """)
+    # Count occurrences of each category grouped by checkin_type
+    delay_counts = data.groupby(["checkout_delay_category", "checkin_type"]).size().reset_index(name="Count")
+    delay_counts["Percentage"] = (delay_counts["Count"] / delay_counts["Count"].sum()) * 100
+    # Create a grouped bar chart
+    fig4 = px.bar(
+        delay_counts,
+        x="checkout_delay_category",
+        y="Count",
+        color="checkin_type",
+        title="Distribution of Checkout Delays by Check-in Type",
+        labels={"checkout_delay_category": "Checkout Delay Category", "Count": "Number of Rentals", "checkin_type": "Check-in Type"},
+        barmode="group",  # Groups bars side by side
+        #text="Count",
+        text=delay_counts["Percentage"].apply(lambda x: f"{x:.1f}%"),
+        color_discrete_sequence=["#FFA500", "#3CB371"]
+    )
+    # Improve layout by setting custom order for x-axis
+    fig4.update_traces(textfont_color="black")
+    fig4.update_xaxes(showgrid=False, tickfont=dict(color='black'))
+    fig4.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig4.update_layout(xaxis_title="", yaxis_title="", title_font=dict(weight="bold"), xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+    fig4.update_layout(xaxis={'categoryorder':'array', 'categoryarray': [
+        "Early or in time", "< 1 hour", "1 to 2 hours", "2 to 3 hours",
+        "3 to 6 hours", "6 to 12 hours", "12 to 24 hours", "1 day or more", "Unknown"
+    ]})
+    st.plotly_chart(fig4, use_container_width=True,  theme=None)
+    st.markdown("""
+                There is much more delay problem with mobile checkin type than connect.
+                """)
+    st.markdown("""
+                Great ! Now for the following analysis, go to the next page "**📊 Delays Analysis**" !
+                """)
+#################################################################### DELAYS ANALYSIS ####################################################################
+elif page == "📊 Delays Analysis":
+    st.title("Analysis & Insights 📊")
+    st.markdown("""
+    Here, we analyze the delay problematic and how to solve it with threshold and a certain scope.
+    **Key Findings**:
+    - 🚗 A minimum delay of **X minutes** reduces scheduling conflicts.
+    - 💰 Potential revenue impact: **Y% of total revenue**.
+    - ✅ Solving **Z% of problematic cases** with the policy.
+    *Visuals and explanations go here.*
+    In the following, we will focus on the next steps and questions:
+                * How often are drivers late for the next check-in? How does it impact the next driver?
+                * Which share of our owner’s revenue would potentially be affected by the feature?
+                * How many rentals would be affected by the feature depending on the threshold and scope we choose?
+                * How many problematic cases will it solve depending on the chosen threshold and scope?
+    """)
+    st.subheader("📌 - How often are drivers late for the next check-in? How does it impact the next driver?",  divider="orange")
+    st.markdown("""
+                So, for the first question, here's the visualization of the check-out that are `late`, `early or in time` and the `unknown` data.
+                """)
+    # Count occurrences of category & group category as simple "late", "in time" or "unknown"
+    delay_drivers = data["checkout_delay_category"].apply(lambda x: "Early or in time" if x == "Early or in time"
+                                                                    else "Unkonwn" if x == "Unknown"
+                                                                    else "Late").value_counts().reset_index()
+    delay_drivers.columns = ["Category", "Count"]
+    delay_drivers["Percentage"] = (delay_drivers["Count"] / delay_drivers["Count"].sum()) * 100
+    # Create a bar chart
+    fig5 = px.bar(
+        delay_drivers,
+        x="Category",
+        y="Count",
+        labels={"Category": "Checkout Delay Category", "Count": "Number of Rentals"},
+        title="Distribution of Checkout Delays",
+        text=delay_drivers["Percentage"].apply(lambda x: f"{x:.1f}%"),
+        color_discrete_sequence=["#FFA500"],
+    )
+    fig5.update_traces(textfont_color="black")
+    fig5.update_xaxes(showgrid=False, tickfont=dict(color='black'))
+    fig5.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig5.update_layout(xaxis_title="", yaxis_title="", title_font=dict(weight="bold"), showlegend=False, xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+    st.plotly_chart(fig5, use_container_width=True, theme=None)
+    # Count occurrences of each category
+    delay_counts = data["checkout_delay_category"].value_counts().reset_index()
+    delay_counts.columns = ["Category", "Count"]
+    delay_counts["Percentage"] = (delay_counts["Count"] / delay_counts["Count"].sum()) * 100
+    # Define custom colors
+    custom_colors = {
+        "Early or in time": "#FFA500",  # Orange
+    }
+    # Assign green as the default color
+    for category in delay_counts["Category"]:
+        if category not in custom_colors:
+            custom_colors[category] = "#3CB371"  # Green
+    # Create a bar chart
+    fig6 = px.bar(
+        delay_counts,
+        x="Category",
+        y="Count",
+        title="Distribution of Checkout Delays",
+        labels={"Category": "Checkout Delay Category", "Count": "Number of Rentals"},
+        color="Category",
+        text=delay_counts["Percentage"].apply(lambda x: f"{x:.1f}%"),
+        color_discrete_map=custom_colors,
+    )
+    fig6.update_traces(textfont_color="black")
+    fig6.update_xaxes(showgrid=False, tickfont=dict(color='black'))
+    fig6.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig6.update_layout(xaxis_title="", yaxis_title="", title_font=dict(weight="bold"), showlegend=False, xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+    st.plotly_chart(fig6, use_container_width=True, theme=None)
+    st.markdown("""
+                Only 32.6% of the check-out are early or in time, whereas almost half of the check-out (44%) are late.
+                """)
+    st.markdown("""
+                Now, for the 2nd question, let's see how delays impact the next driver.
+                """)
+    mean_delay_impact = data["time_delta_with_previous_rental_in_minutes"].mean()
+    min_delay_impact = data["time_delta_with_previous_rental_in_minutes"].min()
+    max_delay_impact = data["time_delta_with_previous_rental_in_minutes"].max()
+    st.markdown("#### Delay impacting informations on the next driver 🚘:")
+    st.write(f"▪️*Average delay impacting next driver:* {mean_delay_impact:.2f} minutes")
+    st.write(f"▪️*Minimum delay impacting next driver:* {min_delay_impact:.2f} minutes")
+    st.write(f"▪️*Maximum delay impacting next driver:* {max_delay_impact:.2f} minutes")
+    delay_impact = data
+    delay_impact["delta-late_checkout"] = delay_impact["time_delta_with_previous_rental_in_minutes"] - delay_impact["delay_at_checkout_in_minutes"]
+    #if negative delta - late checkout, it means that the new rental cannot do its check-in
+    negative_delay_impact = delay_impact[delay_impact["delta-late_checkout"] < 0]
+    late_checkout = delay_drivers[delay_drivers["Category"] == "Late"]["Count"][0]
+    nb_problematic_checkin_late = len(negative_delay_impact)
+    # percentage calculation
+    problematic_delays_rate = nb_problematic_checkin_late*100/late_checkout
+    st.write(f"▪️Among all the delays ({late_checkout}), {problematic_delays_rate:.3f}% \n of delays caused problems to the next rental because the checkout\n was made later than the new rental checkin.")
+    # Calculate the average duration of problematic delays
+    average_problematic_delay = negative_delay_impact['delay_at_checkout_in_minutes'].mean()
+    # Calculate the average duration of non-problematic delays
+    average_non_problematic_delay = data[data['delay_at_checkout_in_minutes'] > 0]['delay_at_checkout_in_minutes'].mean()
+    # Compare the averages
+    st.write(f"▪️Average Duration of Problematic Delays: {average_problematic_delay:.0f} minutes")
+    st.write(f"▪️Average Duration of Non-Problematic Delays: {average_non_problematic_delay:.0f} minutes")
+    delay_impact["problematic_delay"] = delay_impact["delta-late_checkout"] < 0
+    delay_impact["problematic_delay"].value_counts()
+    fig7 = px.histogram(delay_impact, x="problematic_delay", color_discrete_sequence=["#FFA500"], title="Proportion of problematic delays"
+                )
+    fig7.update_xaxes(
+        categoryorder='array',
+        categoryarray=["Problematic", "Non-Problematic"],
+        showgrid=False, tickfont=dict(color='black')
+    )
+    fig7.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig7.add_annotation(x=3, y=10000,text=f"Avg Delay: {average_problematic_delay:.2f} min",showarrow=False)
+    fig7.add_annotation(x=2, y=10000,text=f"Avg Delay: {average_non_problematic_delay:.2f} min",showarrow=False)
+    fig7.update_layout(
+        xaxis=dict(
+            tickmode='array',
+            tickvals=[True, False],
+            ticktext=["Problematic Delay", "Non Problematic Delay"],
+            zeroline=True,zerolinecolor="black",zerolinewidth=2
+        ),
+        xaxis_title="",
+        yaxis_title="",
+        title_font=dict(weight="bold"),
+        showlegend=False,
+        plot_bgcolor="#BDDFD6"
+    )
+    fig7.update_traces(textfont_color="black")
+    st.plotly_chart(fig7, use_container_width=True, theme=None)
+    st.markdown("""
+                For the majority of cases, it poses no problem to have delay, but for 2.857% of the case it is problematic for the following rental.
+                """)
+    st.subheader("📌 - Which share of our owner’s revenue would potentially be affected by the feature?",  divider="orange")
+    # Define the treshold of minimum time between 2 locations (minutes)
+    thresholds = [30, 60, 90, 120, 180, 360, 720, 1440]  # Example : 1 hour
+    data["mean_price_per_rental"] = mean_rental_per_day
+    treshold_data = data
+    percentage_revenue_impacted = []
+    percentage_revenue_impacted_displaying = {}
+    for threshold in thresholds:
+        treshold_data[f"affected_rentals_{threshold}"] = data["time_delta_with_previous_rental_in_minutes"] <= threshold
+        affected_rentals = data[data["time_delta_with_previous_rental_in_minutes"] <= threshold]
+        affected_revenue = affected_rentals["mean_price_per_rental"].sum()
+        total_revenue = data["mean_price_per_rental"].sum()
+        revenue_impact = (affected_revenue / total_revenue) * 100
+        percentage_revenue_impacted.append(revenue_impact)
+        percentage_revenue_impacted_displaying[threshold] = round(revenue_impact, 3)
+    col1, col2 = st.columns([1, 2])
+    with col1:
+        # Select a threshold
+        selected_threshold = st.selectbox("Select a threshold ⏳ (in minutes):", thresholds, key="selectbox_1")
+        # Display impacted revenue percentage
+        st.metric(label="💰 Impacted Revenue", value=f"{percentage_revenue_impacted_displaying[selected_threshold]}%")
+    with col2:
+        affected_counts = [treshold_data[f"affected_rentals_{threshold}"].value_counts().get(True, 0) for threshold in thresholds]
+        affected_rentals_plot = pd.DataFrame({"Threshold (min)": thresholds, "Affected rentals": affected_counts})
+        fig8 = px.line(affected_rentals_plot, x="Threshold (min)", y="Affected rentals", text="Affected rentals",
+                    title="Number of rentals affected by the treshold",
+                    color_discrete_sequence=["#3CB371"],)
+        fig8.update_traces(textposition='top center', textfont_color="black")
+        fig8.update_xaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'), showline=True, linewidth=2, linecolor='black')
+        fig8.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+        fig8.update_layout(xaxis_title="", yaxis_title="", title_font=dict(weight="bold"), showlegend=False, xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+        st.plotly_chart(fig8, use_container_width=True, theme=None)
+    st.subheader("📌 - How many rentals would be affected by the feature depending on the threshold and scope we choose?",  divider="orange")
+    all_affected_list = []
+    all_affected_display = {}
+    connect_affected_list = []
+    connect_affected_display = {}
+    all_affected_percentage = {}
+    connect_affected_percentage = {}
+    for threshold in thresholds:
+        all_rentals = len(data)
+        all_affected = data[data["time_delta_with_previous_rental_in_minutes"] <= threshold].shape[0]
+        all_affected_list.append(all_affected)
+        connect_affected = data[(data["time_delta_with_previous_rental_in_minutes"] <= threshold) &
+                                (data["checkin_type"] == "connect")].shape[0]
+        connect_affected_list.append(connect_affected)
+        all_affected_display[threshold] = all_affected
+        connect_affected_display[threshold] = connect_affected
+        all_affected_percentage[threshold] = (all_affected / all_rentals) * 100
+        connect_affected_percentage[threshold] = (connect_affected / all_rentals) * 100
+    # Select a threshold
+    selected_threshold = st.selectbox("Select a threshold ⏳ (in minutes):", thresholds, key="selectbox_2")
+    # Add a title before metrics
+    st.markdown(f"#### 🚗 Rentals Affected by the {selected_threshold}-Minutes Threshold")
+    col1, col2 = st.columns(2)
+    # Display metrics side by side
+    with col1:
+        st.metric(label="📲 All check-ins affected in number ⇩", value=f"{all_affected_display[selected_threshold]}")
+        st.metric(label="📲 All check-ins affected in % ⇩", value=f"{all_affected_percentage[selected_threshold]:.3f}")
+    with col2:
+        st.metric(label="🛜 Connect check-ins affected in number ⇩", value=f"{connect_affected_display[selected_threshold]}")
+        st.metric(label="🛜 Connect check-ins affected in % ⇩", value=f"{connect_affected_percentage[selected_threshold]:.3f}")
+    data_affected = pd.DataFrame({ "thresholds" : thresholds,
+                 "all_affected" : all_affected_list,
+                 "connect_affected" : connect_affected_list})
+    fig9 = px.scatter(data_affected, x='thresholds', y='all_affected',
+                    color_discrete_sequence=["#FFA500"],
+                    labels={'all_affected': 'All Affected'},
+                    title="Rentals affected by Thresholds in function of the type of check-in")
+    # Add a line for 'all_affected'
+    fig9.add_trace(go.Scatter(x=data_affected['thresholds'], y=data_affected['all_affected'],
+        mode='lines+markers+text', line=dict(color='#FFA500'), name='All Affected', text=data_affected['all_affected']))
+    fig9.add_trace(go.Scatter(x=data_affected['thresholds'], y=data_affected['connect_affected'],
+                        mode='lines+markers+text', marker_color='#3CB371', name='Connect Affected',
+                        text=data_affected['connect_affected'],))  # Texte à afficher sur les marqueurs
+    fig9.update_traces(textposition='top center', textfont_color="black")
+    fig9.update_xaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'), showline=True, linewidth=2, linecolor='black')
+    fig9.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig9.update_layout(xaxis_title="", yaxis_title="", title_font=dict(weight="bold"), showlegend=True, xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+    st.plotly_chart(fig9, use_container_width=True, theme=None)
+    st.markdown("""
+                There are less rentals affected with the scope only on connected check-in than all
+                (mobile + connect) check-in. Moreover, as it could be expected, more rentals are
+                impacted with an increasing of the threshold choice.""")
+    st.subheader("📌 - How many problematic cases will it solve depending on the chosen threshold and scope?",  divider="orange")
+    solved_cases_all_list = []
+    solved_cases_connect_list = []
+    for threshold, i in zip(thresholds, range(len(thresholds))):
+        problematic_cases = negative_delay_impact[(negative_delay_impact["delay_at_checkout_in_minutes"] <= threshold)]
+        problematic_connectec_case = negative_delay_impact[(negative_delay_impact["delay_at_checkout_in_minutes"] <= threshold) &
+                                                        (negative_delay_impact["checkin_type"] == "connect")]
+        total_problems_cases = len(negative_delay_impact)
+        total_connect_pb_cases = len(negative_delay_impact[negative_delay_impact["checkin_type"] == "connect"])
+        solved_cases = problematic_cases.shape[0]
+        solved_cases_all_list.append(solved_cases)
+        solved_cases_connect = problematic_connectec_case.shape[0]
+        solved_cases_connect_list.append(solved_cases_connect)
+        percentage_solved_all = (solved_cases / total_problems_cases) * 100
+        percentage_connect_solved = (solved_cases_connect / total_connect_pb_cases) * 100
+    # Convert to DataFrame
+    df_solved_cases = pd.DataFrame({
+        "Threshold (minutes)": thresholds,
+        "Solved Cases (All Check-ins)": solved_cases_all_list,
+        "Solved Cases (Connect Check-ins)": solved_cases_connect_list,
+        "Revenue Impacted (%)": percentage_revenue_impacted
+    })
+    # Select a threshold with a slider
+    selected_threshold = st.selectbox("Select a threshold ⏳ (in minutes):", thresholds, key="selectbox_3")
+    # Get values for selected threshold
+    selected_data = df_solved_cases[df_solved_cases["Threshold (minutes)"] == selected_threshold].iloc[0]
+    # Display Metrics in Two Columns
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric(label="📲 All Check-ins Solved", value=f"{selected_data['Solved Cases (All Check-ins)']}")
+    with col2:
+        st.metric(label="🛜 Connect Check-ins Solved", value=f"{selected_data['Solved Cases (Connect Check-ins)']}")
+    with col3:
+        st.metric(label="💰 Revenue Impacted", value=f"{selected_data['Revenue Impacted (%)']:.2f} %")
+    # Create the figure
+    fig10 = go.Figure()
+    # Add line for "All Check-ins"
+    fig10.add_trace(go.Scatter(
+        x=thresholds,
+        y=solved_cases_all_list,
+        mode="lines+markers",
+        name="Solved Cases (All Check-ins)",
+        marker=dict(color="#FFA500")
+    ))
+    # Add line for "Connect Check-ins"
+    fig10.add_trace(go.Scatter(
+        x=thresholds,
+        y=solved_cases_connect_list,
+        mode="lines+markers",
+        name="Solved Cases (Connect Check-ins)",
+        marker=dict(color="#3CB371")
+    ))
+    # Add vertical dashed lines with text annotations
+    for i, threshold in enumerate(thresholds):
+        max_y_value = solved_cases_all_list[i]  # Ensure line stops at "Solved Cases (All Check-ins)"
+        # Add dashed line from y=0 to y=max_y_value
+        fig10.add_trace(go.Scatter(
+            x=[threshold, threshold],  # Vertical line at threshold
+            y=[0, max_y_value],  # Stop at max_y_value
+            mode="lines",
+            line=dict(color="red", width=1.5, dash="dash"),
+            name="Revenue Impact Annotation" if i == 0 else None,  # Show legend only once
+            showlegend=(i == 0)
+        ))
+        # Add text annotation slightly above the dashed line
+        fig10.add_annotation(
+            x=threshold,
+            y=max_y_value + 20,  # Position slightly above the dashed line
+            text=f"{percentage_revenue_impacted[i]:.2f}%",  # Format percentage
+            showarrow=False,
+            font=dict(size=10, color="red"),
+            align="center",
+        )
+    fig10.update_traces(textposition='top center', textfont_color="black")
+    fig10.update_xaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'), showline=True, linewidth=2, linecolor='black')
+    fig10.update_yaxes(showgrid=True, gridcolor='#A9A9A9', tickfont=dict(color='black'))
+    fig10.update_layout(title="Number of Problematic Cases Solved by Threshold",xaxis_title="",yaxis_title="", title_font=dict(weight="bold"),showlegend=True, xaxis=dict(zeroline=True,zerolinecolor="black",zerolinewidth=2), plot_bgcolor="#BDDFD6")
+    st.plotly_chart(fig10, use_container_width=True, theme=None)
+    st.markdown("""
+                #### 📊 Data Table""")
+    st.dataframe(df_solved_cases)
+    st.markdown("""
+                Now, we can see the problematic cases solved in function of the check-in type (connect or all {mobile📲 + connect🛜})
+                with the impacted revenue percentage of each threshold. For me the best choice to solve problem without too much
+                economical impact is to choose the threshold of **180** or **360** minutes, for the scope of all check-in type.""")
+    st.markdown("""
+                ✨ Thanks for reading all the way through! I hope you enjoyed it and found it interesting.
+                Go to the last page, `The End & Thank You`, for a little surprise and links to my other works‼️
+                """)
+#################################################################### END & THANK YOU PAGE ####################################################################
+elif page == "🎉 The End & Thank You":
+    st.title("Thank You for Exploring! 🎉")
+     # Create two columns
+    col1, col2 = st.columns([1, 2])  # Adjust column ratio (1:2 for image & text)
+    # Add an image in the first column
+    with col1:
+        st.image("ChibiElf1.png", use_container_width=True)
+    # Add text in the second column
+    with col2:
+        st.markdown("""
+        **Final Thoughts**
+        - 🚀 This analysis helps optimize the rental platform.
+        - 🔎 Finding the right balance between user experience and revenue impact is key.
+        **🙏 Thank you for your time!**
+        📩 Feel free to reach out for more insights.
+        Here are the links for my other works on **Github** & **Linkedin**:
+        """)
+        # Define the GitHub and LinkedIn URLs
+        github_url = "https://github.com/HyraXuna?tab=repositories"
+        linkedin_url = "https://www.linkedin.com/in/youenn-patat-46b59b246/"
+        # Display clickable images for GitHub and LinkedIn
+        st.markdown(
+            f"""
+            <div style="display: flex; justify-content: center; gap: 20px;">
+                <a href="{github_url}" target="_blank">
+                    <img src="https://cdn-icons-png.flaticon.com/512/25/25231.png" width="40">
+                </a>
+                <a href="{linkedin_url}" target="_blank">
+                    <img src="https://cdn-icons-png.flaticon.com/512/174/174857.png" width="40">
+                </a>
+            </div>
+            """,
+            unsafe_allow_html=True
+        )
+    st.balloons()  # 🎈 Fun effect for celebration!
+### Footer
+st.markdown("---")
+st.markdown(
+    """
+    <div style="text-align: center;">
+        <p>If you want to see more, check out my <strong>Github</strong> 📖</p>
+        <a href="https://github.com/HyraXuna?tab=repositories" target="_blank">
+            <img src="https://cdn-icons-png.flaticon.com/512/25/25231.png" width="40">
+        </a>
+    </div>
+    """,
+    unsafe_allow_html=True
+)
+st.markdown("---")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+boto3
+pandas
+gunicorn
+streamlit
+scikit-learn
+matplotlib
+seaborn
+plotly