llepogam commited on
Commit
e8a197f
·
1 Parent(s): 3abf26b

get around analysis app

Browse files
Files changed (5) hide show
  1. Dockerfile +18 -0
  2. app.py +591 -0
  3. get_around_delay_analysis.xlsx +0 -0
  4. requirements.txt +11 -0
  5. run.bat +8 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM continuumio/miniconda3
2
+
3
+ WORKDIR /home/app
4
+
5
+ RUN apt-get update
6
+ RUN apt-get install nano unzip
7
+ RUN apt install curl -y
8
+
9
+ RUN curl -fsSL https://get.deta.dev/cli.sh | sh
10
+
11
+ COPY requirements.txt /dependencies/requirements.txt
12
+ RUN pip install --no-cache-dir -r /dependencies/requirements.txt
13
+
14
+
15
+ COPY . /home/app
16
+
17
+ # Use shell form of CMD to allow environment variable substitution
18
+ CMD streamlit run app.py --server.port=${PORT} --server.address=0.0.0.0
app.py ADDED
@@ -0,0 +1,591 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+
7
+ # Set page config
8
+ st.set_page_config(
9
+ page_title="Getaround Delay Analysis",
10
+ page_icon="🚗",
11
+ layout="wide"
12
+ )
13
+
14
+ # App title
15
+ st.title("🚗 Getaround Rental Delay Analysis")
16
+
17
+ # Function to load and preprocess data
18
+ @st.cache_data
19
+ def load_data():
20
+ try:
21
+ # URL for the data
22
+ url = 'https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/get_around_delay_analysis.xlsx'
23
+
24
+ try:
25
+ st.write(f"Trying to load data from URL: {url}")
26
+ df = pd.read_excel(url)
27
+ st.success(f"Successfully loaded data from URL")
28
+ except Exception as e:
29
+ st.error(f"Error loading data from URL: {e}")
30
+
31
+ # Remove debugging messages after successful load
32
+ st.success("Data loaded successfully!")
33
+
34
+ # Fill NaN values in time_delta (assuming NaN means >12h)
35
+ df['time_delta_with_previous_rental_in_minutes'] = df['time_delta_with_previous_rental_in_minutes'].fillna(721)
36
+
37
+ # Create time categories for analysis
38
+ bins = [-np.inf, 30, 60, 180, 720, np.inf]
39
+ labels = ['1. <30 minutes', '2. 30-60 minutes', '3. 1-3 hours', '4. 3-12 hours', '5. >12 hours']
40
+ df['time_vs_previous_rental_category'] = pd.cut(
41
+ df['time_delta_with_previous_rental_in_minutes'],
42
+ bins=bins,
43
+ labels=labels,
44
+ right=False
45
+ )
46
+
47
+ # Add checkout status
48
+ df.loc[df['delay_at_checkout_in_minutes'] < 0, 'checkout_status'] = 'Late'
49
+ df.loc[df['delay_at_checkout_in_minutes'] >= 0, 'checkout_status'] = 'On time'
50
+ df.loc[df['delay_at_checkout_in_minutes'].isna(), 'checkout_status'] = 'On time' # Let's assume that the nan is meaning that there are no delays
51
+
52
+ # Add checkout delay categories
53
+ bins_checkout = [-np.inf, -720, -120, -60, -30, 0, 30, 60, np.inf]
54
+ labels_checkout = ['1. >12h late', '2. 2-12h late', '3. 1-2h late', '4. 30-60min late',
55
+ '5. <30min late', '6. <30min early', '7. 30-60min early', '8. >1h early']
56
+ df['checkout_delay_category'] = pd.cut(
57
+ df['delay_at_checkout_in_minutes'],
58
+ bins=bins_checkout,
59
+ labels=labels_checkout,
60
+ right=False
61
+ )
62
+
63
+ # Add car rental frequency category
64
+ car_rental_counts = df['car_id'].value_counts().reset_index()
65
+ car_rental_counts.columns = ['car_id', 'rental_count']
66
+
67
+ # Define rental frequency categories
68
+ def categorize_frequency(count):
69
+ if count == 1:
70
+ return '1 rental'
71
+ elif 2 <= count <= 3:
72
+ return '2-3 rentals'
73
+ elif 4 <= count <= 5:
74
+ return '4-5 rentals'
75
+ elif 6 <= count <= 10:
76
+ return '6-10 rentals'
77
+ else:
78
+ return '>10 rentals'
79
+
80
+ car_rental_counts['rental_frequency_category'] = car_rental_counts['rental_count'].apply(categorize_frequency)
81
+
82
+ # Merge the frequency category back to the main dataframe
83
+ df = df.merge(car_rental_counts[['car_id', 'rental_frequency_category']], on='car_id', how='left')
84
+
85
+
86
+ df = df.merge(
87
+ df[['rental_id', 'delay_at_checkout_in_minutes']],
88
+ left_on='previous_ended_rental_id',
89
+ right_on='rental_id',
90
+ how='left',
91
+ suffixes=('', '_previous')
92
+ ).rename(columns={'delay_at_checkout_in_minutes_previous': 'delay_previous_rental'})
93
+
94
+ df['gap_between_checkin_chekout']=df['time_delta_with_previous_rental_in_minutes']-df['delay_previous_rental']
95
+ df['late_checkin'] = ''
96
+ bins = [-np.inf, 0, np.inf]
97
+
98
+ labels = ['Late', 'Not Late']
99
+
100
+ df['late_checkin'] = pd.cut(
101
+ df['gap_between_checkin_chekout'],
102
+ bins=bins,
103
+ labels=labels,
104
+ right=False
105
+ )
106
+
107
+ return df
108
+
109
+ except Exception as e:
110
+ st.error(f"Error loading data: {e}")
111
+ return None
112
+
113
+ # Load data
114
+ df = load_data()
115
+
116
+ if df is not None:
117
+ # Create tabs - adding "Key Insights" as the first tab
118
+ tab0, tab1, tab2, tab3 = st.tabs(["Key Insights", "General Analysis", "Late Checkout Impact", "Threshold Analysis"])
119
+
120
+ # Tab 0: Key Insights
121
+ with tab0:
122
+ st.header("Key Insights")
123
+
124
+ # Calculate key metrics for insights
125
+ total_rentals = len(df)
126
+ connect_rentals = len(df[df['checkin_type'] == 'connect'])
127
+ mobile_rentals = len(df[df['checkin_type'] == 'mobile'])
128
+ connect_pct = connect_rentals / total_rentals * 100
129
+ mobile_pct = mobile_rentals / total_rentals * 100
130
+
131
+ late_checkouts = len(df[df['checkout_status'] == 'Late'])
132
+ late_checkout_pct = late_checkouts / total_rentals * 100
133
+
134
+ canceled_rentals = len(df[df['state'] == 'canceled'])
135
+ canceled_pct = canceled_rentals / total_rentals * 100
136
+
137
+ late_checkins = len(df[df['late_checkin'] == 'Late'])
138
+
139
+ # Display metrics in columns
140
+ st.subheader("Rental Overview")
141
+ col1, col2, col3 = st.columns(3)
142
+ with col1:
143
+ st.metric("Total Rentals", f"{total_rentals:,}")
144
+ with col2:
145
+ st.metric("Connect Rentals", f"{connect_rentals:,} ({connect_pct:.1f}%)")
146
+ with col3:
147
+ st.metric("Mobile Rentals", f"{mobile_rentals:,} ({mobile_pct:.1f}%)")
148
+
149
+ st.subheader("Delay Impact")
150
+ col1, col2, col3 = st.columns(3)
151
+ with col1:
152
+ st.metric("Late Checkouts", f"{late_checkouts:,} ({late_checkout_pct:.1f}%)")
153
+ with col2:
154
+ st.metric("Canceled Rentals", f"{canceled_rentals:,} ({canceled_pct:.1f}%)")
155
+ with col3:
156
+ st.metric("Late Check-ins due to Previous Rental", f"{late_checkins:,}")
157
+
158
+ # Summary text
159
+ st.markdown("""
160
+ ### Key Findings
161
+
162
+ 1. **Short time gaps between reservations represent a minor portion of business operations**:
163
+ - Out of 21k rentals, only 8% have a time gap below 12 hours between consecutive rentals
164
+ - On average, each car is rented fewer than 3 times, indicating moderate utilization
165
+ - Less than 400 rentals (approximately 2%) have a time gap below 1 hour from the previous rental
166
+
167
+ 2. **Late checkouts have limited impact on overall business operations**:
168
+ - Only 218 rentals were affected by late checkouts, where the car was not available at the scheduled time
169
+ - The cancellation rate for affected rentals is around 17%, which is comparable to the average cancellation rate of 15%
170
+ - Most delays were under 30 minutes, likely due to minor traffic issues, which wouldn't typically justify a cancellation
171
+
172
+ 3. **A buffer of 30-60 minutes between rentals appears sufficient to minimize scheduling conflicts**:
173
+ - Given the current rental frequency, aggressive time optimization does not appear necessary
174
+ - Most delays are less than 1 hour, and this buffer would prevent most potential issues
175
+ - Approximately 4% of reservations would be affected by implementing this threshold
176
+ """)
177
+
178
+ # Tab 1: General Analysis
179
+ with tab1:
180
+ st.header("General Analysis")
181
+
182
+ # Key figures
183
+ st.subheader("Key Figures")
184
+
185
+ total_rentals = len(df)
186
+ close_rentals = len(df[df['time_delta_with_previous_rental_in_minutes'] < 720]) # Less than 12 hours
187
+ avg_rentals_per_car = df['car_id'].value_counts().mean()
188
+
189
+ col1, col2, col3 = st.columns(3)
190
+ with col1:
191
+ st.metric("Total Rentals", f"{total_rentals:,}")
192
+ with col2:
193
+ st.metric("% Rentals with <12h Gap between 2 rentals", f"{close_rentals/total_rentals:.1%}")
194
+ with col3:
195
+ st.metric("Avg. Rentals per Car", f"{avg_rentals_per_car:.1f}")
196
+
197
+ # Histogram for distribution of selected columns
198
+ st.subheader("Column Distribution")
199
+ allowed_columns = [
200
+ 'checkin_type',
201
+ 'state',
202
+ 'time_vs_previous_rental_category',
203
+ 'checkout_status',
204
+ 'checkout_delay_category',
205
+ 'rental_frequency_category'
206
+ ]
207
+ selected_column = st.selectbox("Select column to visualize", allowed_columns)
208
+
209
+ # Create histogram for selected column
210
+ if pd.api.types.is_numeric_dtype(df[selected_column]):
211
+ fig = px.histogram(
212
+ df,
213
+ x=selected_column,
214
+ title=f"Distribution of {selected_column}"
215
+ )
216
+ else:
217
+ # For categorical columns, show a bar chart instead
218
+ value_counts = df[selected_column].value_counts().reset_index()
219
+ value_counts.columns = ['Value', 'Count']
220
+ fig = px.bar(
221
+ value_counts,
222
+ x='Value',
223
+ y='Count',
224
+ title=f"Distribution of {selected_column}"
225
+ )
226
+
227
+ st.plotly_chart(fig, use_container_width=True)
228
+
229
+ # Graph showing distribution of reservations by time before previous rental
230
+ st.subheader("Time Between Consecutive Rentals by State")
231
+
232
+ # Filter out '>12 hours' category
233
+ filtered_df = df
234
+
235
+ # Group by time category and state
236
+ time_state_dist = filtered_df.groupby(['time_vs_previous_rental_category', 'state']).size().reset_index()
237
+ time_state_dist.columns = ['Time Category', 'State', 'Count']
238
+
239
+ # Calculate total for each time category for percentage
240
+ time_totals = filtered_df.groupby('time_vs_previous_rental_category').size().reset_index()
241
+ time_totals.columns = ['Time Category', 'Total']
242
+
243
+ # Merge to get the percentage
244
+ time_state_dist = time_state_dist.merge(time_totals, on='Time Category')
245
+ time_state_dist['Percentage'] = time_state_dist['Count'] / time_state_dist['Total'] * 100
246
+
247
+ # Create the graph
248
+ fig = px.bar(
249
+ time_state_dist,
250
+ x='Time Category',
251
+ y='Percentage',
252
+ color='State',
253
+ barmode='stack',
254
+ text=time_state_dist['Percentage'].round(1),
255
+ title="Distribution of Time Between Consecutive Rentals by State",
256
+ labels={'Percentage': 'Percentage (%)'}
257
+ )
258
+ fig.update_traces(texttemplate='%{text}%', textposition='inside')
259
+ fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
260
+ st.plotly_chart(fig, use_container_width=True)
261
+
262
+ # Graph showing distribution of reservations by time before previous rental
263
+ st.subheader("Time Between Consecutive Rentals by Type")
264
+
265
+ # Filter out '>12 hours' category
266
+ filtered_df = df
267
+
268
+ # Group by time category and state
269
+ time_state_dist = filtered_df.groupby(['time_vs_previous_rental_category', 'checkin_type']).size().reset_index()
270
+ time_state_dist.columns = ['Time Category', 'Type', 'Count']
271
+
272
+ # Calculate total for each time category for percentage
273
+ time_totals = filtered_df.groupby('time_vs_previous_rental_category').size().reset_index()
274
+ time_totals.columns = ['Time Category', 'Total']
275
+
276
+ # Merge to get the percentage
277
+ time_state_dist = time_state_dist.merge(time_totals, on='Time Category')
278
+ time_state_dist['Percentage'] = time_state_dist['Count'] / time_state_dist['Total'] * 100
279
+
280
+ # Create the graph
281
+ fig = px.bar(
282
+ time_state_dist,
283
+ x='Time Category',
284
+ y='Percentage',
285
+ color='Type',
286
+ barmode='stack',
287
+ text=time_state_dist['Percentage'].round(1),
288
+ title="Distribution of Time Between Consecutive Rentals by Type",
289
+ labels={'Percentage': 'Percentage (%)'}
290
+ )
291
+ fig.update_traces(texttemplate='%{text}%', textposition='inside')
292
+ fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
293
+ st.plotly_chart(fig, use_container_width=True)
294
+
295
+ with tab2:
296
+ # Late Checkouts Analysis
297
+ st.subheader("Late Checkouts and Cancellations")
298
+
299
+ # Get rentals with previous rental information
300
+ rentals_with_prev = df.dropna(subset=['previous_ended_rental_id'])
301
+ total_with_prev = len(rentals_with_prev)
302
+
303
+ # Count late checkouts among rentals with previous rental info
304
+ late_checkouts = rentals_with_prev[rentals_with_prev['checkout_status'] == 'Late']
305
+ num_late_checkouts = len(late_checkouts)
306
+
307
+ # Count the percentage of info from previous rental
308
+ pct_rental_with_infom_previous_rental = total_with_prev / total_rentals * 100
309
+
310
+ # Count canceled rentals after a late checkout
311
+ canceled_after_late = rentals_with_prev[(rentals_with_prev['checkout_status'] == 'Late') &
312
+ (rentals_with_prev['state'] == 'canceled')]
313
+ pct_canceled_after_late = len(canceled_after_late) / num_late_checkouts * 100 if num_late_checkouts > 0 else 0
314
+
315
+ # Count the number of rental where the checking was late due to the previous rental
316
+ number_late_checking = df[df['late_checkin'] == "Late"] # Keep this as a DataFrame, not len()
317
+
318
+ # Key figures
319
+ st.markdown("### Key Figures")
320
+ col1, col2, col3 = st.columns(3)
321
+ with col1:
322
+ st.metric("Rentals with Previous Rental Info", f"{total_with_prev:,}")
323
+ with col2:
324
+ st.metric("Percentage of Rental with Previous Rental", f"{pct_rental_with_infom_previous_rental:.1f}%")
325
+ with col3:
326
+ st.metric("Number of Rental with Late Checkin due to Previous Rental", f"{len(number_late_checking):,}")
327
+
328
+ st.markdown("### Rental State depending on Late Checkout")
329
+
330
+ # Step 1: Group by and count rental_id - Fixed observed parameter
331
+ grouped = df.groupby(['late_checkin', 'state'], observed=True)['rental_id'].count().reset_index()
332
+ grouped.rename(columns={'rental_id': 'count'}, inplace=True)
333
+
334
+ # Step 2: Group by late_checkin only and calculate the sum - Fixed observed parameter
335
+ sum_grouped = df.groupby(['late_checkin'], observed=True)['rental_id'].count().reset_index()
336
+ sum_grouped.rename(columns={'rental_id': 'sum'}, inplace=True)
337
+
338
+ # Correctly merge the dataframes - only using 'late_checkin' as the key
339
+ result = pd.merge(grouped, sum_grouped, on='late_checkin')
340
+
341
+ # Calculate percentage
342
+ result['percentage'] = result['count']/result['sum']*100
343
+
344
+ # Create the graph showing counts with state color
345
+ fig = px.bar(
346
+ result,
347
+ x='late_checkin',
348
+ y='count',
349
+ color='state',
350
+ barmode='stack',
351
+ text=result['count'],
352
+ title="Distribution of State by Type of Delay",
353
+ labels={'count': 'Number of Rentals', 'late_checkin': 'Checkout Status', 'state': 'Rental State'}
354
+ )
355
+ fig.update_traces(texttemplate='%{text}', textposition='inside')
356
+ fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
357
+ st.plotly_chart(fig, use_container_width=True)
358
+
359
+ # Create a percentage graph
360
+ fig = px.bar(
361
+ result,
362
+ x='late_checkin',
363
+ y='percentage',
364
+ color='state',
365
+ barmode='stack',
366
+ text=result['percentage'].round(1),
367
+ title="Percentage Distribution of State by Type of Delay",
368
+ labels={'percentage': 'Percentage (%)', 'late_checkin': 'Checkout Status', 'state': 'Rental State'}
369
+ )
370
+ fig.update_traces(texttemplate='%{text}%', textposition='inside')
371
+ fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
372
+ st.plotly_chart(fig, use_container_width=True)
373
+
374
+ st.markdown("### Split of Rentals with Late Checkin by Checkout Delay Category")
375
+
376
+ # Filter for late checkouts only
377
+ df_late = df[df['checkout_status'] == 'Late']
378
+
379
+ # Group by checkout delay category - Fixed observed parameter
380
+ checkout_delay_counts = df_late.groupby('checkout_delay_category', observed=True)['rental_id'].count().reset_index()
381
+ checkout_delay_counts.columns = ['Checkout Delay Category', 'Count']
382
+
383
+ # Calculate total and percentages
384
+ total = checkout_delay_counts['Count'].sum()
385
+ checkout_delay_counts['Percentage'] = checkout_delay_counts['Count'] / total * 100
386
+
387
+ # Sort the data to ensure consistent display order (assuming delay categories have numeric prefixes)
388
+ checkout_delay_counts = checkout_delay_counts.sort_values('Checkout Delay Category')
389
+
390
+ # Create the count graph with improved styling
391
+ fig1 = px.bar(
392
+ checkout_delay_counts,
393
+ x='Checkout Delay Category',
394
+ y='Count',
395
+ text='Count',
396
+ title="Number of Late Rentals by Checkout Delay Category",
397
+ labels={
398
+ 'Checkout Delay Category': 'Checkout Delay Category',
399
+ 'Count': 'Number of Rentals'
400
+ },
401
+ color='Count',
402
+ color_continuous_scale='Blues'
403
+ )
404
+ fig1.update_traces(texttemplate='%{text}', textposition='inside')
405
+ fig1.update_layout(
406
+ uniformtext_minsize=8,
407
+ uniformtext_mode='hide',
408
+ xaxis_title="Checkout Delay Category",
409
+ yaxis_title="Number of Rentals",
410
+ coloraxis_showscale=False
411
+ )
412
+ st.plotly_chart(fig1, use_container_width=True)
413
+
414
+ # Create the percentage graph
415
+ fig2 = px.bar(
416
+ checkout_delay_counts,
417
+ x='Checkout Delay Category',
418
+ y='Percentage',
419
+ text=checkout_delay_counts['Percentage'].round(1),
420
+ title="Percentage of Late Rentals by Checkout Delay Category",
421
+ labels={
422
+ 'Checkout Delay Category': 'Checkout Delay Category',
423
+ 'Percentage': 'Percentage (%)'
424
+ },
425
+ color='Percentage',
426
+ color_continuous_scale='Blues'
427
+ )
428
+ fig2.update_traces(texttemplate='%{text}%', textposition='inside')
429
+ fig2.update_layout(
430
+ uniformtext_minsize=8,
431
+ uniformtext_mode='hide',
432
+ xaxis_title="Checkout Delay Category",
433
+ yaxis_title="Percentage of Rentals (%)",
434
+ coloraxis_showscale=False
435
+ )
436
+ st.plotly_chart(fig2, use_container_width=True)
437
+
438
+
439
+ # Tab 3: Threshold Analysis
440
+ with tab3:
441
+ st.header("Threshold Analysis")
442
+
443
+ # Threshold selection
444
+ threshold_options = [15, 30, 60, 90, 120, 180, 240, 300, 360]
445
+ threshold = st.select_slider(
446
+ "Select minimum delay threshold (minutes)",
447
+ options=threshold_options,
448
+ value=60
449
+ )
450
+
451
+ st.markdown(f"### Impact of {threshold}-minute Minimum Delay")
452
+
453
+ # Create a range of thresholds to analyze
454
+ thresholds = list(range(0, 361, 30))
455
+ if threshold not in thresholds:
456
+ thresholds.append(threshold)
457
+ thresholds.sort()
458
+
459
+ # Calculate affected rentals for each threshold
460
+ threshold_impact = []
461
+ for t in thresholds:
462
+ all_affected = len(df[df['time_delta_with_previous_rental_in_minutes'] < t])
463
+ connect_affected = len(df[(df['checkin_type'] == 'connect') &
464
+ (df['time_delta_with_previous_rental_in_minutes'] < t)])
465
+ mobile_affected = len(df[(df['checkin_type'] == 'mobile') &
466
+ (df['time_delta_with_previous_rental_in_minutes'] < t)])
467
+
468
+ threshold_impact.append({
469
+ 'threshold': t,
470
+ 'all_affected': all_affected,
471
+ 'connect_affected': connect_affected,
472
+ 'mobile_affected': mobile_affected,
473
+ 'all_pct': all_affected / len(df) * 100 if len(df) > 0 else 0,
474
+ 'connect_pct': connect_affected / len(df[df['checkin_type'] == 'connect']) * 100
475
+ if len(df[df['checkin_type'] == 'connect']) > 0 else 0,
476
+ 'mobile_pct': mobile_affected / len(df[df['checkin_type'] == 'mobile']) * 100
477
+ if len(df[df['checkin_type'] == 'mobile']) > 0 else 0
478
+ })
479
+
480
+ threshold_df = pd.DataFrame(threshold_impact)
481
+
482
+ # Plot absolute numbers
483
+ fig = px.line(
484
+ threshold_df,
485
+ x='threshold',
486
+ y=['all_affected', 'connect_affected', 'mobile_affected'],
487
+ labels={
488
+ 'threshold': 'Minimum Delay Threshold (minutes)',
489
+ 'value': 'Number of Affected Rentals',
490
+ 'variable': 'Car Type'
491
+ },
492
+ title="Number of Affected Rentals by Threshold"
493
+ )
494
+
495
+ # Update legend names
496
+ newnames = {'all_affected': 'All Cars', 'connect_affected': 'Connect Cars', 'mobile_affected': 'Mobile Cars'}
497
+ fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
498
+
499
+ fig.update_layout(hovermode="x unified")
500
+
501
+ # Add vertical line for selected threshold
502
+ fig.add_vline(x=threshold, line_dash="dash", line_color="red")
503
+ fig.add_annotation(x=threshold, y=max(threshold_df['all_affected']),
504
+ text=f"Selected: {threshold} min",
505
+ showarrow=True, arrowhead=1, ax=30, ay=-30)
506
+
507
+ st.plotly_chart(fig, use_container_width=True)
508
+
509
+ # Plot percentage
510
+ fig = px.line(
511
+ threshold_df,
512
+ x='threshold',
513
+ y=['all_pct', 'connect_pct', 'mobile_pct'],
514
+ labels={
515
+ 'threshold': 'Minimum Delay Threshold (minutes)',
516
+ 'value': 'Percentage of Affected Rentals (%)',
517
+ 'variable': 'Car Type'
518
+ },
519
+ title="Percentage of Affected Rentals by Threshold"
520
+ )
521
+
522
+ # Update legend names
523
+ newnames = {'all_pct': 'All Cars', 'connect_pct': 'Connect Cars', 'mobile_pct': 'Mobile Cars'}
524
+ fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
525
+
526
+ fig.update_layout(hovermode="x unified")
527
+
528
+ # Add vertical line for selected threshold
529
+ fig.add_vline(x=threshold, line_dash="dash", line_color="red")
530
+ fig.add_annotation(x=threshold, y=max(threshold_df['all_pct']),
531
+ text=f"Selected: {threshold} min",
532
+ showarrow=True, arrowhead=1, ax=30, ay=-30)
533
+
534
+ st.plotly_chart(fig, use_container_width=True)
535
+
536
+ # Display data table for the selected threshold
537
+ st.subheader(f"Impact at Selected Threshold: {threshold} minutes")
538
+
539
+ selected_row = threshold_df[threshold_df['threshold'] == threshold].iloc[0] if len(threshold_df[threshold_df['threshold'] == threshold]) > 0 else None
540
+
541
+ if selected_row is not None:
542
+ col1, col2, col3 = st.columns(3)
543
+ with col1:
544
+ st.metric("All Cars Affected", f"{int(selected_row['all_affected']):,}")
545
+ with col2:
546
+ st.metric("Connect Cars Affected", f"{int(selected_row['connect_affected']):,}")
547
+ with col3:
548
+ st.metric("Mobile Cars Affected", f"{int(selected_row['mobile_affected']):,}")
549
+
550
+ # Display detailed breakdown for the selected threshold
551
+ affected_rentals = df[df['time_delta_with_previous_rental_in_minutes'] < threshold]
552
+
553
+ if not affected_rentals.empty:
554
+ st.subheader("Breakdown of Affected Rentals")
555
+
556
+ # By check-in type
557
+ checkin_breakdown = affected_rentals['checkin_type'].value_counts().reset_index()
558
+ checkin_breakdown.columns = ['Check-in Type', 'Count']
559
+ checkin_breakdown['Percentage'] = checkin_breakdown['Count'] / len(affected_rentals) * 100
560
+
561
+ fig = px.pie(
562
+ checkin_breakdown,
563
+ values='Count',
564
+ names='Check-in Type',
565
+ title=f"Distribution of Affected Rentals by Check-in Type ({threshold} min threshold)",
566
+ hole=0.4
567
+ )
568
+ st.plotly_chart(fig, use_container_width=True)
569
+
570
+ # By state
571
+ state_breakdown = affected_rentals['state'].value_counts().reset_index()
572
+ state_breakdown.columns = ['State', 'Count']
573
+ state_breakdown['Percentage'] = state_breakdown['Count'] / len(affected_rentals) * 100
574
+
575
+ fig = px.bar(
576
+ state_breakdown,
577
+ x='State',
578
+ y='Count',
579
+ color='State',
580
+ text_auto='.0f',
581
+ title=f"State Distribution of Affected Rentals ({threshold} min threshold)"
582
+ )
583
+ fig.update_traces(textposition='outside')
584
+ st.plotly_chart(fig, use_container_width=True)
585
+
586
+ else:
587
+ st.error("Failed to load data. The app tried loading from the URL (https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/get_around_delay_analysis.xlsx) and local paths without success. Please check your internet connection or upload the file manually.")
588
+
589
+ # Footer
590
+ st.markdown("---")
591
+ st.markdown("Getaround Rental Delay Analysis Dashboard - Developed by Louis Le Pogam")
get_around_delay_analysis.xlsx ADDED
Binary file (752 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ boto3
2
+ pandas
3
+ gunicorn
4
+ streamlit
5
+ scikit-learn
6
+ matplotlib
7
+ seaborn
8
+ plotly
9
+ huggingface_hub
10
+ numpy
11
+ openpyxl
run.bat ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ :: Run the Docker command
4
+ docker run -it ^
5
+ -v "%cd%:/home/app" ^
6
+ -e PORT=7860 ^
7
+ -p 7860:7860 ^
8
+ getaround_streamlitapp