joshdavham commited on
Commit
51ce8b5
·
1 Parent(s): 81850c8

get rid of get_zoomed_word_coverage_chart

Browse files
Files changed (1) hide show
  1. app.py +13 -166
app.py CHANGED
@@ -749,160 +749,14 @@ st.markdown("If we take all the words in CIJ, count them then order them from mo
749
  # word coverage chart
750
 
751
  @st.cache_data
752
- def get_word_coverage_chart():
753
 
754
  word_coverage_df = pd.read_csv('word_coverage_df_plot.tsv', sep='\t')
755
 
756
- # Data for vertical lines corresponding to each level
757
- line_data = pd.DataFrame({
758
- 'x': [4295, 5606, 6853, 9085],
759
- 'level': ['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
760
- 'text': ['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced']
761
- })
762
-
763
- selection = alt.selection_point(fields=['level'], bind='legend', on='click')
764
-
765
- highlight = alt.selection_point(name="highlight", fields=['level'], on='mouseover', empty=False)
766
-
767
- line_chart = alt.Chart(word_coverage_df).mark_line(
768
- cursor='pointer',
769
- point=False,
770
- ).encode(
771
- x=alt.X(
772
- 'rank:Q',
773
- scale=alt.Scale(domain=[-10,16000]),
774
- #scale=alt.Scale(domain=[1000,16000]),
775
- title='Number of words known',
776
- axis=alt.Axis(
777
- labelFontSize=14,
778
- titleFontSize=18,
779
- #titleFont='Urbanist',
780
- titleColor='black',
781
- titleFontWeight='normal',
782
- #titleFontStyle='italic',
783
- titlePadding=20
784
- )
785
- ),
786
- y=alt.Y(
787
- 'coverage_perc:Q',
788
- scale=alt.Scale(domain=[0,105]),
789
- #scale=alt.Scale(domain=[90,101]),
790
- title='% of words understood',
791
- axis=alt.Axis(
792
- labelFontSize=14,
793
- titleFontSize=18,
794
- #titleFont='Urbanist',
795
- titleColor='black',
796
- titleFontWeight='normal',
797
- #titleFontStyle='italic',
798
- titlePadding=20,
799
- tickCount=5
800
- ),
801
- ),
802
- #x=alt.X('rank:Q', scale=alt.Scale(domain=[1000,16000])),
803
- #y=alt.Y('coverage_perc:Q', scale=alt.Scale(domain=[90,101])),
804
- color=alt.Color(
805
- 'level:N',
806
- scale=alt.Scale(range=['#a5bee4', '#9ad6d8', '#c7aecd', '#dd9e9e']),
807
- sort=['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
808
- legend=alt.Legend(
809
- title='CIJ Level',
810
- titleFontSize=18,
811
- titleFontWeight='bolder',
812
- labelFontSize=16,
813
- symbolType='circle',
814
- symbolSize=200,
815
- #symbolStrokeWidth=3,
816
- orient='right',
817
- direction='vertical',
818
- #fillColor='black',
819
- padding=10,
820
- cornerRadius=5,
821
- )
822
- ),
823
- tooltip=[
824
- alt.Tooltip('word:N', title='Word: '),
825
- alt.Tooltip('rank:Q', title="CIJ rank: "),
826
- alt.Tooltip('coverage_perc_str:N', title='Word coverage: '),
827
- alt.Tooltip('level:N', title='Level: ')
828
- ],
829
- opacity=alt.condition(selection, alt.value(1.0), alt.value(0.2)),
830
- strokeWidth=alt.condition(selection | highlight, alt.value(6), alt.value(2))
831
- ).properties(
832
- width='container',
833
- height=500,
834
- title=alt.TitleParams(
835
- text='Word coverage curves',
836
- offset=20,
837
- #subtitle='(clickable)',
838
- #font='Urbanist',
839
- fontSize=24,
840
- fontWeight='normal',
841
- anchor='middle',
842
- color='black',
843
- subtitleFontSize=15,
844
- subtitleColor='gray'
845
- )
846
- ).add_params(
847
- selection,
848
- highlight
849
- )
850
-
851
- # Vertical lines corresponding to each level
852
- vertical_lines = alt.Chart(line_data).mark_rule(
853
- color='red',
854
- strokeWidth=4,
855
- strokeDash = [10, 2], # first arg is length, second is gap
856
- ).encode(
857
- x='x:Q',
858
- tooltip=[
859
- alt.Tooltip('x:N', title='Words needed to reach 98%:'),
860
- alt.Tooltip('level:N', title='Level:')
861
- ],
862
- #color=alt.condition(select, 'level:N', alt.value('gray')), # Link the color with the selection
863
- color=alt.Color(
864
- 'level:N',
865
- scale=alt.Scale(range=['red', 'green', 'blue', 'yellow']), # Use the same color scale as the histogram
866
- sort=['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
867
- legend=None # No legend for lines, it is already shown in the histogram
868
- ),
869
- opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
870
- strokeWidth=alt.condition(highlight, alt.value(20), alt.value(1))
871
- ).add_params(
872
- selection,
873
- highlight
874
- )#.interactive()
875
-
876
- text_labels = alt.Chart(line_data).mark_text(
877
- align='center', # Align text to the left of the line
878
- dx=0, # Offset the text to the right by 5 pixels
879
- dy=-10, # Adjust vertical positioning
880
- fontSize=16,
881
- fontWeight='bold'
882
- ).encode(
883
- x='x:Q',
884
- y=alt.value(0), # Positioning y at the top of the chart, can be adjusted as needed
885
- text=alt.Text('x:Q', format='.0f'), # Display the x value, formatted as an integer
886
- color=alt.Color(
887
- 'level:N',
888
- scale=alt.Scale(range=['red', 'green', 'blue', 'orange']),
889
- sort=['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
890
- legend=None
891
- ),
892
- opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
893
- )
894
-
895
- #layered_chart = alt.layer(line_chart, background='#f6f8fb')
896
- layered_chart = alt.layer(line_chart, vertical_lines, text_labels, background='white')
897
-
898
- return layered_chart
899
-
900
- @st.cache_data
901
- def get_zoomed_word_coverage_chart():
902
-
903
- word_coverage_df = pd.read_csv('word_coverage_df_plot.tsv', sep='\t')
904
-
905
- word_coverage_df_sub = word_coverage_df.loc[word_coverage_df['coverage_perc']>=90]
906
 
907
  # Data for vertical lines corresponding to each level
908
  line_data = pd.DataFrame({
@@ -918,12 +772,10 @@ def get_zoomed_word_coverage_chart():
918
  line_chart = alt.Chart(word_coverage_df_sub).mark_line(
919
  cursor='pointer',
920
  point=False,
921
- strokeWidth=6
922
  ).encode(
923
  x=alt.X(
924
- 'rank:Q',
925
- #scale=alt.Scale(domain=[-10,16000]),
926
- scale=alt.Scale(domain=[1000,16000]),
927
  title='Number of words known',
928
  axis=alt.Axis(
929
  labelFontSize=14,
@@ -936,9 +788,8 @@ def get_zoomed_word_coverage_chart():
936
  )
937
  ),
938
  y=alt.Y(
939
- 'coverage_perc:Q',
940
- #scale=alt.Scale(domain=[0,105]),
941
- scale=alt.Scale(domain=[90,101]),
942
  title='% of words understood',
943
  axis=alt.Axis(
944
  labelFontSize=14,
@@ -951,8 +802,6 @@ def get_zoomed_word_coverage_chart():
951
  tickCount=5
952
  ),
953
  ),
954
- #x=alt.X('rank:Q', scale=alt.Scale(domain=[1000,16000])),
955
- #y=alt.Y('coverage_perc:Q', scale=alt.Scale(domain=[90,101])),
956
  color=alt.Color(
957
  'level:N',
958
  scale=alt.Scale(range=['#a5bee4', '#9ad6d8', '#c7aecd', '#dd9e9e']),
@@ -979,7 +828,7 @@ def get_zoomed_word_coverage_chart():
979
  alt.Tooltip('level:N', title='Level: ')
980
  ],
981
  opacity=alt.condition(selection, alt.value(1.0), alt.value(0.2)),
982
- #strokeWidth=alt.condition(selection | highlight, alt.value(6), alt.value(2))
983
  ).properties(
984
  width='container',
985
  height=500,
@@ -1019,7 +868,7 @@ def get_zoomed_word_coverage_chart():
1019
  legend=None # No legend for lines, it is already shown in the histogram
1020
  ),
1021
  opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
1022
- #strokeWidth=alt.condition(highlight, alt.value(20), alt.value(1))
1023
  ).add_params(
1024
  selection,
1025
  highlight
@@ -1044,19 +893,17 @@ def get_zoomed_word_coverage_chart():
1044
  opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
1045
  )
1046
 
1047
- #layered_chart = alt.layer(line_chart, background='#f6f8fb')
1048
  layered_chart = alt.layer(line_chart, vertical_lines, text_labels, background='white')
1049
 
1050
  return layered_chart
1051
 
1052
-
1053
  if st.checkbox('Zoom in'):
1054
 
1055
- word_coverage_chart = get_zoomed_word_coverage_chart()
1056
 
1057
  else:
1058
 
1059
- word_coverage_chart = get_word_coverage_chart()
1060
 
1061
  st.altair_chart(word_coverage_chart, use_container_width=True)
1062
 
 
749
  # word coverage chart
750
 
751
  @st.cache_data
752
+ def get_word_coverage_chart(zoom=False):
753
 
754
  word_coverage_df = pd.read_csv('word_coverage_df_plot.tsv', sep='\t')
755
 
756
+ if zoom:
757
+ word_coverage_df_sub = word_coverage_df.loc[word_coverage_df['coverage_perc']>=90]
758
+ else:
759
+ word_coverage_df_sub = word_coverage_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
 
761
  # Data for vertical lines corresponding to each level
762
  line_data = pd.DataFrame({
 
772
  line_chart = alt.Chart(word_coverage_df_sub).mark_line(
773
  cursor='pointer',
774
  point=False,
 
775
  ).encode(
776
  x=alt.X(
777
+ 'rank:Q',
778
+ scale=alt.Scale(domain=[1000,16000]) if zoom else alt.Scale(domain=[-10,16000]),
 
779
  title='Number of words known',
780
  axis=alt.Axis(
781
  labelFontSize=14,
 
788
  )
789
  ),
790
  y=alt.Y(
791
+ 'coverage_perc:Q',
792
+ scale=alt.Scale(domain=[90,101]) if zoom else alt.Scale(domain=[0,105]),
 
793
  title='% of words understood',
794
  axis=alt.Axis(
795
  labelFontSize=14,
 
802
  tickCount=5
803
  ),
804
  ),
 
 
805
  color=alt.Color(
806
  'level:N',
807
  scale=alt.Scale(range=['#a5bee4', '#9ad6d8', '#c7aecd', '#dd9e9e']),
 
828
  alt.Tooltip('level:N', title='Level: ')
829
  ],
830
  opacity=alt.condition(selection, alt.value(1.0), alt.value(0.2)),
831
+ strokeWidth=alt.condition(selection | highlight, alt.value(6), alt.value(2))
832
  ).properties(
833
  width='container',
834
  height=500,
 
868
  legend=None # No legend for lines, it is already shown in the histogram
869
  ),
870
  opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
871
+ strokeWidth=alt.condition(highlight, alt.value(20), alt.value(1))
872
  ).add_params(
873
  selection,
874
  highlight
 
893
  opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
894
  )
895
 
 
896
  layered_chart = alt.layer(line_chart, vertical_lines, text_labels, background='white')
897
 
898
  return layered_chart
899
 
 
900
  if st.checkbox('Zoom in'):
901
 
902
+ word_coverage_chart = get_word_coverage_chart(zoom=True)
903
 
904
  else:
905
 
906
+ word_coverage_chart = get_word_coverage_chart(zoom=False)
907
 
908
  st.altair_chart(word_coverage_chart, use_container_width=True)
909