ralate2 commited on
Commit
522d8d9
·
verified ·
1 Parent(s): ed54ea1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -105
app.py CHANGED
@@ -441,8 +441,90 @@ elif viz_type == "Complaints by Housing Block and Type":
441
  The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
442
  """)
443
 
444
- elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
445
- st.subheader("Complaints by Housing Block and Type- Incorporating Suggestions Based on Professor's Feedback")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
 
447
  # Filtering the data based on the selected year and housing block
448
  filtered_data_time = data # Use filtered_data if date range is not needed
@@ -486,10 +568,21 @@ elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestion
486
  percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
487
 
488
  # Plotting the data
489
- fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
  # Adding percentage labels to the plot
492
- ax = fig.gca()
493
  for idx, block in enumerate(complaint_pivot.index):
494
  cumulative_height = 0
495
  for i, complaint_type in enumerate(complaint_pivot.columns):
@@ -507,10 +600,15 @@ elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestion
507
  )
508
  cumulative_height += count
509
 
 
 
 
 
 
510
  # Display the plot in Streamlit
511
  st.pyplot(fig)
512
 
513
- # writeup
514
  st.write("""
515
  **What this visualization shows:**
516
  This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
@@ -521,107 +619,7 @@ elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestion
521
  **Color Scheme:**
522
  The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
523
  """)
524
- # In the above code , We incorporated all of the professor's suggestions and refined the chart to make it more useful for analysis while ensuring good aesthetics. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
525
 
526
- # elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
527
- # st.subheader("Complaints by Housing Block and Type - Incorporating Suggestions Based on Professor's Feedback")
528
-
529
- # # Define blocks to be excluded
530
- # excluded_blocks = [
531
- # '3400 block', '3500 block', '3600 block', '3700 block', '3800 block', '3900 block',
532
- # '4000 block', '4100 block', '4200 block', '4300 block', '4400 block', '4500 block',
533
- # '4600 block', '4700 block', '4800 block', '4900 block', '5000 block'
534
- # ]
535
-
536
- # # Creating the desired order, excluding unwanted blocks
537
- # desired_order = [
538
- # '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
539
- # '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
540
- # '1200 block', '1300 block', '1400 block', '1500 block', '1600 block',
541
- # '1700 block', '1800 block', '1900 block', '2000 block', '2100 block',
542
- # '2200 block', '2300 block', '2400 block', '2500 block', '2600 block',
543
- # '2700 block', '2800 block', '2900 block', '3000 block', '3100 block',
544
- # '3200 block', '3300 block'
545
- # ]
546
-
547
- # # Filtering the data based on selected year
548
- # filtered_data_time = data # Use filtered_data if date range is not needed
549
- # if selected_year != 'All Time':
550
- # filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
551
-
552
- # # Exclude blocks from the data
553
- # filtered_data_time = filtered_data_time[~filtered_data_time['Housing Block'].isin(excluded_blocks)]
554
-
555
- # # Get the list of blocks excluding the unwanted ones
556
- # available_blocks = sorted(filtered_data_time['Housing Block'].unique().tolist())
557
-
558
- # # Dropdown for Housing Block (excluding unwanted blocks)
559
- # block_options = ['All Blocks'] + available_blocks
560
- # selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")
561
-
562
- # # Further filtering by selected Housing Block (if applicable)
563
- # if selected_block != 'All Blocks':
564
- # filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
565
-
566
- # # Pivoting the data
567
- # complaint_pivot = filtered_data_time.pivot_table(
568
- # index='Housing Block',
569
- # columns='Type of Complaint',
570
- # values='Disposition',
571
- # aggfunc='count',
572
- # fill_value=0
573
- # )
574
-
575
- # # Ensure the pivot data is numeric for plotting
576
- # complaint_pivot = complaint_pivot.astype(float)
577
-
578
- # # Reordering the pivot table by the desired order (excluding unwanted blocks)
579
- # complaint_pivot = complaint_pivot.reindex(desired_order)
580
-
581
- # # If a specific block is selected, only show that block on the x-axis
582
- # if selected_block != 'All Blocks':
583
- # complaint_pivot = complaint_pivot.loc[[selected_block]]
584
-
585
- # # Calculating percentages for each complaint type per housing block
586
- # percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
587
-
588
- # # Plotting the data
589
- # fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
590
-
591
- # # Adding percentage labels to the plot
592
- # ax = fig.gca()
593
- # for idx, block in enumerate(complaint_pivot.index):
594
- # cumulative_height = 0
595
- # for i, complaint_type in enumerate(complaint_pivot.columns):
596
- # count = complaint_pivot.iloc[idx, i]
597
- # percent = percentages.iloc[idx, i]
598
- # if count > 0:
599
- # # Compute the position for the percentage label
600
- # x_pos = idx - 0.4 + 0.8 / 2 # Adjusting the position of the label
601
- # y_pos = cumulative_height + count / 2
602
- # ax.text(
603
- # x_pos, y_pos, f"{percent:.1f}%",
604
- # ha='center', va='center',
605
- # fontsize=10, color='black',
606
- # bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
607
- # )
608
- # cumulative_height += count
609
-
610
- # # Display the plot in Streamlit
611
- # st.pyplot(fig)
612
-
613
- # # writeup
614
- # st.write("""
615
- # **What this visualization shows:**
616
- # This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage distribution of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
617
- # Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
618
-
619
- # **Why it's interesting:**
620
- # By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
621
-
622
- # **Color Scheme:**
623
- # The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
624
- # """)
625
 
626
 
627
  # Footer
 
441
  The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
442
  """)
443
 
444
+ # elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
445
+ # st.subheader("Complaints by Housing Block and Type- Incorporating Suggestions Based on Professor's Feedback")
446
+
447
+ # # Filtering the data based on the selected year and housing block
448
+ # filtered_data_time = data # Use filtered_data if date range is not needed
449
+ # if selected_year != 'All Time':
450
+ # filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
451
+
452
+ # # Further filtering by Housing Block (if applicable)
453
+ # if selected_block != 'All Blocks':
454
+ # filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
455
+
456
+ # # Pivoting the data based on the filtered data
457
+ # complaint_pivot = filtered_data_time.pivot_table(
458
+ # index='Housing Block',
459
+ # columns='Type of Complaint',
460
+ # values='Disposition',
461
+ # aggfunc='count',
462
+ # fill_value=0
463
+ # )
464
+
465
+ # # Ensuring the pivoted data is numeric for plotting
466
+ # complaint_pivot = complaint_pivot.astype(float)
467
+
468
+ # # Desired order for the housing blocks
469
+ # desired_order = [
470
+ # '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
471
+ # '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
472
+ # '1200 block', '1300 block', '1400 block', '1500 block', '1600 block',
473
+ # '1700 block', '1800 block', '1900 block', '2000 block', '2100 block',
474
+ # '2200 block', '2300 block', '2400 block', '2500 block', '2600 block',
475
+ # '2700 block', '2800 block', '2900 block', '3000 block', '3100 block',
476
+ # '3200 block', '3300 block', '3400 block', '3500 block', '3600 block',
477
+ # '3700 block', '3800 block', '3900 block', '4000 block', '4100 block',
478
+ # '4200 block', '4300 block', '4400 block', '4500 block', '4600 block',
479
+ # '4700 block', '4800 block', '4900 block', '5000 block'
480
+ # ]
481
+
482
+ # # Reordering the index of the pivot table according to the desired order
483
+ # complaint_pivot = complaint_pivot.reindex(desired_order)
484
+
485
+ # # Calculating percentages for each complaint type per housing block
486
+ # percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
487
+
488
+ # # Plotting the data
489
+ # fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
490
+
491
+ # # Adding percentage labels to the plot
492
+ # ax = fig.gca()
493
+ # for idx, block in enumerate(complaint_pivot.index):
494
+ # cumulative_height = 0
495
+ # for i, complaint_type in enumerate(complaint_pivot.columns):
496
+ # count = complaint_pivot.iloc[idx, i]
497
+ # percent = percentages.iloc[idx, i]
498
+ # if count > 0:
499
+ # # Compute the position for the percentage label
500
+ # x_pos = idx - 0.4 + 0.8 / 2 # Adjusting the position of the label
501
+ # y_pos = cumulative_height + count / 2
502
+ # ax.text(
503
+ # x_pos, y_pos, f"{percent:.1f}%",
504
+ # ha='center', va='center',
505
+ # fontsize=10, color='black',
506
+ # bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
507
+ # )
508
+ # cumulative_height += count
509
+
510
+ # # Display the plot in Streamlit
511
+ # st.pyplot(fig)
512
+
513
+ # # writeup
514
+ # st.write("""
515
+ # **What this visualization shows:**
516
+ # This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
517
+
518
+ # **Why it's interesting:**
519
+ # By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
520
+
521
+ # **Color Scheme:**
522
+ # The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
523
+ # """)
524
+ # In the above code , We incorporated all of the professor's suggestions and refined the chart to make it more useful for analysis while ensuring good aesthetics. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
525
+
526
+ if viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
527
+ st.subheader("Complaints by Housing Block and Type - Incorporating Suggestions Based on Professor's Feedback")
528
 
529
  # Filtering the data based on the selected year and housing block
530
  filtered_data_time = data # Use filtered_data if date range is not needed
 
568
  percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
569
 
570
  # Plotting the data
571
+ fig, ax = plt.subplots(figsize=(10, 6))
572
+ complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
573
+
574
+ # Adjusting the x-axis ticks
575
+ if selected_block != 'All Blocks':
576
+ # Hide all x-axis labels if a single block is selected
577
+ ax.set_xticks([])
578
+ ax.set_xticklabels([])
579
+ else:
580
+ # Show every nth label to avoid overcrowding
581
+ tick_spacing = max(1, len(complaint_pivot) // 10) # Adjust based on the number of blocks
582
+ ax.set_xticks(range(0, len(complaint_pivot.index), tick_spacing))
583
+ ax.set_xticklabels(complaint_pivot.index[::tick_spacing], rotation=45, ha='right')
584
 
585
  # Adding percentage labels to the plot
 
586
  for idx, block in enumerate(complaint_pivot.index):
587
  cumulative_height = 0
588
  for i, complaint_type in enumerate(complaint_pivot.columns):
 
600
  )
601
  cumulative_height += count
602
 
603
+ # Setting labels and title
604
+ ax.set_xlabel('Housing Block')
605
+ ax.set_ylabel('Number of Complaints')
606
+ ax.set_title('Complaints by Housing Block and Type')
607
+
608
  # Display the plot in Streamlit
609
  st.pyplot(fig)
610
 
611
+ # Writeup
612
  st.write("""
613
  **What this visualization shows:**
614
  This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
 
619
  **Color Scheme:**
620
  The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
621
  """)
 
622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
 
624
 
625
  # Footer