emsesc commited on
Commit
fb3bf5f
·
1 Parent(s): 9eeefaa

add font and fix country issue

Browse files
Files changed (3) hide show
  1. app.py +113 -50
  2. assets/styles.css +2 -0
  3. graphs/leaderboard.py +113 -49
app.py CHANGED
@@ -665,57 +665,120 @@ def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view="all_do
665
  end = pd.to_datetime(slider_value[1], unit="s")
666
  time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
667
 
668
- # Build the aggregation query to get top N with all needed metadata
669
- # This query groups by the target column and aggregates downloads
670
- # while collecting all metadata we need for chips
671
- query = f"""
672
- WITH base_data AS (
673
- SELECT
674
- {group_col},
675
- CASE
676
- WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
677
- WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
678
- ELSE org_country_single
679
- END AS org_country_single,
680
- author,
681
- derived_author,
682
- merged_country_groups_single,
683
- merged_modality,
684
- downloads,
685
- model
686
- FROM {view}
687
- {time_clause}
688
- ),
689
-
690
- -- Compute the total downloads for all rows in the time range
691
- total_downloads_cte AS (
692
- SELECT SUM(downloads) AS total_downloads_all
693
- FROM base_data
694
- ),
695
-
696
- -- Compute per-group totals and their percentage of all downloads
697
- top_items AS (
698
- SELECT
699
- b.{group_col} AS name,
700
- SUM(b.downloads) AS total_downloads,
701
- ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
702
- -- Pick first non-null metadata values for reference
703
- ANY_VALUE(b.org_country_single) AS org_country_single,
704
- ANY_VALUE(b.author) AS author,
705
- ANY_VALUE(b.derived_author) AS derived_author,
706
- ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
707
- ANY_VALUE(b.merged_modality) AS merged_modality,
708
- ANY_VALUE(b.model) AS model
709
- FROM base_data b
710
- CROSS JOIN total_downloads_cte t
711
- GROUP BY b.{group_col}, t.total_downloads_all
712
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
 
714
- SELECT *
715
- FROM top_items
716
- ORDER BY total_downloads DESC
717
- LIMIT {top_n};
718
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
 
720
  return con.execute(query).fetchdf()
721
 
 
665
  end = pd.to_datetime(slider_value[1], unit="s")
666
  time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
667
 
668
+ # If grouping by country, group by the transformed country column
669
+ if group_col == "org_country_single":
670
+ group_expr = """CASE
671
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
672
+ WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
673
+ ELSE org_country_single
674
+ END"""
675
+ else:
676
+ group_expr = group_col
677
+
678
+ # Build a lookup for author -> country mapping
679
+ # When grouping by derived_author, we need to find the country where derived_author = author
680
+ if group_col == "derived_author":
681
+ query = f"""
682
+ WITH base_data AS (
683
+ SELECT
684
+ {group_expr} AS group_key,
685
+ CASE
686
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
687
+ WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
688
+ ELSE org_country_single
689
+ END AS org_country_single,
690
+ author,
691
+ derived_author,
692
+ merged_country_groups_single,
693
+ merged_modality,
694
+ downloads,
695
+ model
696
+ FROM {view}
697
+ {time_clause}
698
+ ),
699
+
700
+ -- Create a lookup table for derived_author -> country
701
+ author_country_lookup AS (
702
+ SELECT DISTINCT
703
+ author,
704
+ FIRST_VALUE(org_country_single) OVER (PARTITION BY author ORDER BY downloads DESC) AS author_country
705
+ FROM base_data
706
+ WHERE author IS NOT NULL
707
+ ),
708
+
709
+ total_downloads_cte AS (
710
+ SELECT SUM(downloads) AS total_downloads_all
711
+ FROM base_data
712
+ ),
713
+
714
+ top_items AS (
715
+ SELECT
716
+ b.group_key AS name,
717
+ SUM(b.downloads) AS total_downloads,
718
+ ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
719
+ COALESCE(acl.author_country, ANY_VALUE(b.org_country_single)) AS org_country_single,
720
+ ANY_VALUE(b.author) AS author,
721
+ ANY_VALUE(b.derived_author) AS derived_author,
722
+ ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
723
+ ANY_VALUE(b.merged_modality) AS merged_modality,
724
+ ANY_VALUE(b.model) AS model
725
+ FROM base_data b
726
+ CROSS JOIN total_downloads_cte t
727
+ LEFT JOIN author_country_lookup acl ON b.group_key = acl.author
728
+ GROUP BY b.group_key, acl.author_country, t.total_downloads_all
729
+ )
730
 
731
+ SELECT *
732
+ FROM top_items
733
+ ORDER BY total_downloads DESC
734
+ LIMIT {top_n};
735
+ """
736
+ else:
737
+ query = f"""
738
+ WITH base_data AS (
739
+ SELECT
740
+ {group_expr} AS group_key,
741
+ CASE
742
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
743
+ WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
744
+ ELSE org_country_single
745
+ END AS org_country_single,
746
+ author,
747
+ derived_author,
748
+ merged_country_groups_single,
749
+ merged_modality,
750
+ downloads,
751
+ model
752
+ FROM {view}
753
+ {time_clause}
754
+ ),
755
+
756
+ total_downloads_cte AS (
757
+ SELECT SUM(downloads) AS total_downloads_all
758
+ FROM base_data
759
+ ),
760
+
761
+ top_items AS (
762
+ SELECT
763
+ b.group_key AS name,
764
+ SUM(b.downloads) AS total_downloads,
765
+ ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
766
+ ANY_VALUE(b.org_country_single) AS org_country_single,
767
+ ANY_VALUE(b.author) AS author,
768
+ ANY_VALUE(b.derived_author) AS derived_author,
769
+ ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
770
+ ANY_VALUE(b.merged_modality) AS merged_modality,
771
+ ANY_VALUE(b.model) AS model
772
+ FROM base_data b
773
+ CROSS JOIN total_downloads_cte t
774
+ GROUP BY b.group_key, t.total_downloads_all
775
+ )
776
+
777
+ SELECT *
778
+ FROM top_items
779
+ ORDER BY total_downloads DESC
780
+ LIMIT {top_n};
781
+ """
782
 
783
  return con.execute(query).fetchdf()
784
 
assets/styles.css CHANGED
@@ -1,3 +1,5 @@
 
 
1
  /* Header links: transparent background, white text, grow on hover */
2
  .no-bg-link {
3
  background-color: transparent !important;
 
1
+ @import url("https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap");
2
+
3
  /* Header links: transparent background, white text, grow on hover */
4
  .no-bg-link {
5
  background-color: transparent !important;
graphs/leaderboard.py CHANGED
@@ -424,56 +424,120 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_
424
  start = pd.to_datetime(time_filter[0], unit="s")
425
  end = pd.to_datetime(time_filter[1], unit="s")
426
  time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
427
-
428
- # Optimized query: first find top N, then get only those rows
429
- query = f"""
430
- WITH base_data AS (
431
- SELECT
432
- {group_col},
433
- CASE
434
- WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
435
- WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
436
- ELSE org_country_single
437
- END AS org_country_single,
438
- author,
439
- derived_author,
440
- merged_country_groups_single,
441
- merged_modality,
442
- downloads,
443
- model
444
- FROM {view}
445
- {time_clause}
446
- ),
447
-
448
- -- Compute the total downloads for all rows in the time range
449
- total_downloads_cte AS (
450
- SELECT SUM(downloads) AS total_downloads_all
451
- FROM base_data
452
- ),
453
-
454
- -- Compute per-group totals and their percentage of all downloads
455
- top_items AS (
456
- SELECT
457
- b.{group_col} AS name,
458
- SUM(b.downloads) AS total_downloads,
459
- ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
460
- -- Pick first non-null metadata values for reference
461
- ANY_VALUE(b.org_country_single) AS org_country_single,
462
- ANY_VALUE(b.author) AS author,
463
- ANY_VALUE(b.derived_author) AS derived_author,
464
- ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
465
- ANY_VALUE(b.merged_modality) AS merged_modality,
466
- ANY_VALUE(b.model) AS model
467
- FROM base_data b
468
- CROSS JOIN total_downloads_cte t
469
- GROUP BY b.{group_col}, t.total_downloads_all
470
- )
471
 
472
- SELECT *
473
- FROM top_items
474
- ORDER BY total_downloads DESC
475
- LIMIT {top_n};
476
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
  try:
479
  return con.execute(query).fetchdf()
 
424
  start = pd.to_datetime(time_filter[0], unit="s")
425
  end = pd.to_datetime(time_filter[1], unit="s")
426
  time_clause = f"WHERE time >= '{start}' AND time <= '{end}'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
 
428
+ # If grouping by country, group by the transformed country column
429
+ if group_col == "org_country_single":
430
+ group_expr = """CASE
431
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
432
+ WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
433
+ ELSE org_country_single
434
+ END"""
435
+ else:
436
+ group_expr = group_col
437
+
438
+ # When grouping by derived_author, lookup the country where derived_author = author
439
+ if group_col == "derived_author":
440
+ query = f"""
441
+ WITH base_data AS (
442
+ SELECT
443
+ {group_expr} AS group_key,
444
+ CASE
445
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
446
+ WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
447
+ ELSE org_country_single
448
+ END AS org_country_single,
449
+ author,
450
+ derived_author,
451
+ merged_country_groups_single,
452
+ merged_modality,
453
+ downloads,
454
+ model
455
+ FROM {view}
456
+ {time_clause}
457
+ ),
458
+
459
+ -- Create a lookup table for derived_author -> country
460
+ author_country_lookup AS (
461
+ SELECT DISTINCT
462
+ author,
463
+ FIRST_VALUE(org_country_single) OVER (PARTITION BY author ORDER BY downloads DESC) AS author_country
464
+ FROM base_data
465
+ WHERE author IS NOT NULL
466
+ ),
467
+
468
+ total_downloads_cte AS (
469
+ SELECT SUM(downloads) AS total_downloads_all
470
+ FROM base_data
471
+ ),
472
+
473
+ top_items AS (
474
+ SELECT
475
+ b.group_key AS name,
476
+ SUM(b.downloads) AS total_downloads,
477
+ ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
478
+ COALESCE(acl.author_country, ANY_VALUE(b.org_country_single)) AS org_country_single,
479
+ ANY_VALUE(b.author) AS author,
480
+ ANY_VALUE(b.derived_author) AS derived_author,
481
+ ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
482
+ ANY_VALUE(b.merged_modality) AS merged_modality,
483
+ ANY_VALUE(b.model) AS model
484
+ FROM base_data b
485
+ CROSS JOIN total_downloads_cte t
486
+ LEFT JOIN author_country_lookup acl ON b.group_key = acl.author
487
+ GROUP BY b.group_key, acl.author_country, t.total_downloads_all
488
+ )
489
+
490
+ SELECT *
491
+ FROM top_items
492
+ ORDER BY total_downloads DESC
493
+ LIMIT {top_n};
494
+ """
495
+ else:
496
+ query = f"""
497
+ WITH base_data AS (
498
+ SELECT
499
+ {group_expr} AS group_key,
500
+ CASE
501
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
502
+ WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
503
+ ELSE org_country_single
504
+ END AS org_country_single,
505
+ author,
506
+ derived_author,
507
+ merged_country_groups_single,
508
+ merged_modality,
509
+ downloads,
510
+ model
511
+ FROM {view}
512
+ {time_clause}
513
+ ),
514
+
515
+ total_downloads_cte AS (
516
+ SELECT SUM(downloads) AS total_downloads_all
517
+ FROM base_data
518
+ ),
519
+
520
+ top_items AS (
521
+ SELECT
522
+ b.group_key AS name,
523
+ SUM(b.downloads) AS total_downloads,
524
+ ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total,
525
+ ANY_VALUE(b.org_country_single) AS org_country_single,
526
+ ANY_VALUE(b.author) AS author,
527
+ ANY_VALUE(b.derived_author) AS derived_author,
528
+ ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
529
+ ANY_VALUE(b.merged_modality) AS merged_modality,
530
+ ANY_VALUE(b.model) AS model
531
+ FROM base_data b
532
+ CROSS JOIN total_downloads_cte t
533
+ GROUP BY b.group_key, t.total_downloads_all
534
+ )
535
+
536
+ SELECT *
537
+ FROM top_items
538
+ ORDER BY total_downloads DESC
539
+ LIMIT {top_n};
540
+ """
541
 
542
  try:
543
  return con.execute(query).fetchdf()