joshdavham commited on
Commit
5229ea9
·
1 Parent(s): 4b92973

add sconj hist

Browse files
Files changed (1) hide show
  1. app.py +169 -1
app.py CHANGED
@@ -1412,11 +1412,179 @@ st.markdown("(It's okay ff the above didn't quite make sense to you - just know
1412
 
1413
  # grammar table
1414
 
 
 
 
1415
  st.markdown("## Grammar")
1416
 
1417
  st.markdown("Easier videos tend to use less [subordinating conjunctions](https://universaldependencies.org/u/pos/SCONJ.html) than harder videos.")
1418
 
1419
- st.markdown("[TODO]: Add sconj histogram")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1420
 
1421
  st.markdown("We also notice differences in the use of other types of words.")
1422
 
 
1412
 
1413
  # grammar table
1414
 
1415
+ ###
1416
+ # GRAMMAR
1417
+ ###
1418
  st.markdown("## Grammar")
1419
 
1420
  st.markdown("Easier videos tend to use less [subordinating conjunctions](https://universaldependencies.org/u/pos/SCONJ.html) than harder videos.")
1421
 
1422
+ def get_sconj_hist(show_medians=False):
1423
+
1424
+ video_df['sconj_props_perc'] = 100.0 * video_df['sconj_props']
1425
+
1426
+ # Data for vertical lines corresponding to each level
1427
+ line_data = pd.DataFrame({
1428
+ 'x': [2.64, 4.73, 6.63, 7.67],
1429
+ 'level': ['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
1430
+ 'text': ['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced']
1431
+ })
1432
+
1433
+ selection = alt.selection_point(fields=['level'], bind='legend', on='click')
1434
+
1435
+ highlight = alt.selection_point(name="highlight", fields=['level'], on='mouseover', empty=False)
1436
+
1437
+ histogram = alt.Chart(video_df).mark_bar(
1438
+ opacity=0.5,
1439
+ binSpacing=3,
1440
+ stroke='black',
1441
+ strokeWidth=0,
1442
+ cornerRadius=5,
1443
+ cursor="pointer"
1444
+ ).encode(
1445
+ alt.X(
1446
+ 'sconj_props_perc:Q',
1447
+ bin=alt.Bin(maxbins=30),
1448
+ title='Percentage of words',
1449
+ axis=alt.Axis(
1450
+ labelFontSize=14,
1451
+ titleFontSize=18,
1452
+ #titleFont='Urbanist',
1453
+ titleColor='black',
1454
+ titleFontWeight='normal',
1455
+ #titleFontStyle='italic',
1456
+ titlePadding=30,
1457
+ #format='.1f%'
1458
+ )
1459
+ ),
1460
+ alt.Y(
1461
+ 'count()',
1462
+ title="Num. videos",
1463
+ axis=alt.Axis(
1464
+ labelFontSize=14,
1465
+ titleFontSize=18,
1466
+ #titleFont='Urbanist',
1467
+ titleColor='black',
1468
+ titleFontWeight='normal',
1469
+ #titleFontStyle='italic',
1470
+ titlePadding=20,
1471
+ tickCount=5
1472
+ ),
1473
+ scale=alt.Scale(domain=[0,50])
1474
+ ).stack(None),
1475
+ alt.Color(
1476
+ 'level:N',
1477
+ scale=alt.Scale(range=['#a5bee4', '#9ad6d8', '#c7aecd', '#dd9e9e']),
1478
+ sort=['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
1479
+ legend=alt.Legend(
1480
+ title='CIJ Level',
1481
+ #titleFont='Urbanist',
1482
+ titleFontSize=18,
1483
+ titleFontWeight='bolder',
1484
+ labelFontSize=16,
1485
+ #labelFont='Urbanist',
1486
+ symbolType='circle',
1487
+ symbolSize=200,
1488
+ symbolStrokeWidth=0,
1489
+ orient='right',
1490
+ direction='vertical',
1491
+ fillColor='white',
1492
+ padding=10,
1493
+ cornerRadius=5,
1494
+ )
1495
+ ),
1496
+ tooltip=[
1497
+ alt.Tooltip('sconj_props_perc:Q', title='Percentage of subordinating conjunctions:', bin=True), # Properly indicate that `wpm` is binned
1498
+ alt.Tooltip('level:N', title='Level:'),
1499
+ alt.Tooltip('count()', title='Video count:')
1500
+ ],
1501
+ opacity=alt.condition(selection, alt.value(0.75), alt.value(0.1)),
1502
+ strokeWidth=alt.condition(highlight, alt.value(2), alt.value(1))
1503
+ ).properties(
1504
+ #width=750,
1505
+ width='container',
1506
+ #height='container',
1507
+ height=500,
1508
+ #background='beige',
1509
+ #padding=50,
1510
+ title=alt.TitleParams(
1511
+ text='Percentages of subordinating conjunctions',
1512
+ offset=20,
1513
+ #subtitle='(clickable)',
1514
+ #font='Urbanist',
1515
+ fontSize=24,
1516
+ fontWeight='normal',
1517
+ anchor='middle',
1518
+ color='black',
1519
+ subtitleFontSize=15,
1520
+ subtitleColor='gray'
1521
+ )
1522
+ ).add_params(
1523
+ selection,
1524
+ highlight
1525
+ )
1526
+
1527
+ # Vertical lines corresponding to each level
1528
+ vertical_lines = alt.Chart(line_data).mark_rule(
1529
+ color='red',
1530
+ strokeWidth=6,
1531
+ strokeDash = [10, 2], # first arg is length, second is gap
1532
+ ).encode(
1533
+ x='x:Q',
1534
+ tooltip=[
1535
+ alt.Tooltip('x:N', title='Median percentage of subordinating conjunctions:'),
1536
+ alt.Tooltip('level:N', title='Level:')
1537
+ ],
1538
+ #color=alt.condition(select, 'level:N', alt.value('gray')), # Link the color with the selection
1539
+ color=alt.Color(
1540
+ 'level:N',
1541
+ scale=alt.Scale(range=['red', 'green', 'blue', 'yellow']), # Use the same color scale as the histogram
1542
+ sort=['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
1543
+ legend=None # No legend for lines, it is already shown in the histogram
1544
+ ),
1545
+ opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
1546
+ strokeWidth=alt.condition(highlight, alt.value(20), alt.value(1))
1547
+ ).add_params(
1548
+ selection,
1549
+ highlight
1550
+ )
1551
+
1552
+ text_labels = alt.Chart(line_data).mark_text(
1553
+ align='center', # Align text to the left of the line
1554
+ dx=0, # Offset the text to the right by 5 pixels
1555
+ dy=-10, # Adjust vertical positioning
1556
+ fontSize=16,
1557
+ fontWeight='bold'
1558
+ ).encode(
1559
+ x='x:Q',
1560
+ y=alt.value(0), # Positioning y at the top of the chart, can be adjusted as needed
1561
+ text=alt.Text('x:Q', format='.2f'), # Display the x value, formatted as an integer
1562
+ color=alt.Color(
1563
+ 'level:N',
1564
+ scale=alt.Scale(range=['red', 'green', 'blue', 'orange']),
1565
+ sort=['Complete Beginner', 'Beginner', 'Intermediate', 'Advanced'],
1566
+ legend=None
1567
+ ),
1568
+ opacity=alt.condition(selection, alt.value(1.0), alt.value(0.1)), # Link opacity with selection
1569
+ )
1570
+
1571
+
1572
+ if show_medians:
1573
+ layered_chart = alt.layer(histogram, vertical_lines, text_labels, background='white')
1574
+ else:
1575
+ layered_chart = alt.layer(histogram, background='white')
1576
+
1577
+ return layered_chart
1578
+
1579
+ if st.checkbox('Show medians', key='sconj'):
1580
+
1581
+ sconj_hist = get_sconj_hist(show_medians=True)
1582
+
1583
+ else:
1584
+
1585
+ sconj_hist = get_sconj_hist(show_medians=False)
1586
+
1587
+ st.altair_chart(sconj_hist, use_container_width=True)
1588
 
1589
  st.markdown("We also notice differences in the use of other types of words.")
1590