Spaces:
Running
Running
add paperverse code
Browse files- README.md +4 -4
- app.py +766 -66
- integrated_ml_taxonomy.json +488 -0
README.md
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 😻
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.36.2
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
-
short_description:
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: PaperVerse Explorer
|
| 3 |
emoji: 😻
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.36.2
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
+
short_description: Research Paper Stats on Hugging Face
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -2,46 +2,366 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from datasets import load_dataset
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# --- Constants ---
|
| 8 |
TOP_K_CHOICES = list(range(5, 51, 5))
|
| 9 |
-
HF_DATASET_ID = "evijit/
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
]
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def load_datasets_data():
|
| 16 |
-
"""Load the
|
| 17 |
start_time = time.time()
|
| 18 |
print(f"Attempting to load dataset from Hugging Face Hub: {HF_DATASET_ID}")
|
| 19 |
try:
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
msg = f"Successfully loaded dataset in {time.time() - start_time:.2f}s."
|
| 23 |
print(msg)
|
| 24 |
return df, True, msg
|
| 25 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
err_msg = f"Failed to load dataset. Error: {e}"
|
| 27 |
print(err_msg)
|
| 28 |
return pd.DataFrame(), False, err_msg
|
| 29 |
|
| 30 |
-
def make_treemap_data(df, count_by, top_k=25, tag_filter=None, skip_cats=None):
|
| 31 |
"""
|
| 32 |
Filter data and prepare it for a multi-level treemap.
|
| 33 |
- Preserves individual datasets for the top K organizations.
|
| 34 |
- Groups all other organizations into a single "Other" category.
|
|
|
|
| 35 |
"""
|
| 36 |
if df is None or df.empty:
|
| 37 |
return pd.DataFrame()
|
| 38 |
|
| 39 |
filtered_df = df.copy()
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
|
| 47 |
if tag_filter and tag_filter != "None" and tag_filter in col_map:
|
|
@@ -55,63 +375,209 @@ def make_treemap_data(df, count_by, top_k=25, tag_filter=None, skip_cats=None):
|
|
| 55 |
filtered_df[count_by] = 0.0
|
| 56 |
filtered_df[count_by] = pd.to_numeric(filtered_df[count_by], errors='coerce').fillna(0.0)
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
def create_treemap(treemap_data, count_by, title=None):
|
| 77 |
"""Generate the Plotly treemap figure from the prepared data."""
|
| 78 |
if treemap_data.empty or treemap_data[count_by].sum() <= 0:
|
| 79 |
fig = px.treemap(names=["No data matches filters"], parents=[""], values=[1])
|
| 80 |
fig.update_layout(title="No data matches the selected filters", margin=dict(t=50, l=25, r=25, b=25))
|
| 81 |
return fig
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
|
|
|
|
|
|
|
|
|
|
| 86 |
fig.update_traces(
|
| 87 |
-
textinfo="label+value
|
| 88 |
-
hovertemplate=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
)
|
| 90 |
return fig
|
| 91 |
|
| 92 |
# --- Gradio UI Blocks ---
|
| 93 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
datasets_data_state = gr.State(pd.DataFrame())
|
| 95 |
loading_complete_state = gr.State(False)
|
|
|
|
| 96 |
|
| 97 |
with gr.Row():
|
| 98 |
-
gr.Markdown("#
|
| 99 |
|
| 100 |
-
with gr.
|
| 101 |
-
with gr.
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
def _update_button_interactivity(is_loaded_flag):
|
| 114 |
return gr.update(interactive=is_loaded_flag)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
## CHANGE: New combined function to load data and generate the initial plot on startup.
|
| 117 |
def load_and_generate_initial_plot(progress=gr.Progress()):
|
|
@@ -126,57 +592,140 @@ with gr.Blocks(title="🤗 Dataverse Explorer", fill_width=True) as demo:
|
|
| 126 |
ts = pd.to_datetime(current_df['data_download_timestamp'].iloc[0], utc=True)
|
| 127 |
date_display = ts.strftime('%B %d, %Y, %H:%M:%S %Z')
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
data_info_text = (f"### Data Information\n- Source: `{HF_DATASET_ID}`\n"
|
| 130 |
f"- Status: {status_msg_from_load}\n"
|
| 131 |
-
f"- Total
|
| 132 |
f"- Data as of: {date_display}\n")
|
| 133 |
else:
|
| 134 |
data_info_text = f"### Data Load Failed\n- {status_msg_from_load}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
except Exception as e:
|
| 136 |
status_msg_from_load = f"An unexpected error occurred: {str(e)}"
|
| 137 |
data_info_text = f"### Critical Error\n- {status_msg_from_load}"
|
| 138 |
load_success_flag = False
|
| 139 |
current_df = pd.DataFrame() # Ensure df is empty on failure
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
print(f"Critical error in load_and_generate_initial_plot: {e}")
|
| 141 |
|
| 142 |
# --- Part 2: Generate Initial Plot ---
|
| 143 |
progress(0.6, desc="Generating initial plot...")
|
| 144 |
-
#
|
| 145 |
-
default_metric = "
|
| 146 |
default_tag = "None"
|
| 147 |
default_k = 25
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
# Reuse the existing controller function for plotting
|
| 151 |
initial_plot, initial_status = ui_generate_plot_controller(
|
| 152 |
-
default_metric,
|
| 153 |
)
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
def ui_generate_plot_controller(metric_choice,
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
if df_current_datasets is None or df_current_datasets.empty:
|
| 160 |
return create_treemap(pd.DataFrame(), metric_choice), "Dataset data is not loaded. Cannot generate plot."
|
| 161 |
|
| 162 |
progress(0.1, desc="Aggregating data...")
|
| 163 |
cats_to_skip = [cat.strip() for cat in skip_cats_input.split(',') if cat.strip()]
|
| 164 |
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
progress(0.7, desc="Generating plot...")
|
| 168 |
-
title_labels = {
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
if treemap_df.empty:
|
| 173 |
plot_stats_md = "No data matches the selected filters. Please try different options."
|
| 174 |
else:
|
| 175 |
total_value_in_plot = treemap_df[metric_choice].sum()
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
return plotly_fig, plot_stats_md
|
| 182 |
|
|
@@ -186,7 +735,19 @@ with gr.Blocks(title="🤗 Dataverse Explorer", fill_width=True) as demo:
|
|
| 186 |
demo.load(
|
| 187 |
fn=load_and_generate_initial_plot,
|
| 188 |
inputs=[],
|
| 189 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
)
|
| 191 |
|
| 192 |
loading_complete_state.change(
|
|
@@ -195,12 +756,151 @@ with gr.Blocks(title="🤗 Dataverse Explorer", fill_width=True) as demo:
|
|
| 195 |
outputs=generate_plot_button
|
| 196 |
)
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
generate_plot_button.click(
|
| 199 |
fn=ui_generate_plot_controller,
|
| 200 |
-
inputs=[
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
outputs=[plot_output, status_message_md]
|
| 203 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
if __name__ == "__main__":
|
| 206 |
print("Application starting...")
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
import time
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import requests
|
| 8 |
+
import duckdb
|
| 9 |
+
import json
|
| 10 |
from datasets import load_dataset
|
| 11 |
+
from huggingface_hub import logout as hf_logout
|
| 12 |
+
from gradio_rangeslider import RangeSlider
|
| 13 |
|
| 14 |
# --- Constants ---
|
| 15 |
TOP_K_CHOICES = list(range(5, 51, 5))
|
| 16 |
+
HF_DATASET_ID = "evijit/paperverse_daily_data"
|
| 17 |
+
# Direct parquet file URL (public)
|
| 18 |
+
PARQUET_URL = "https://huggingface.co/datasets/evijit/paperverse_daily_data/resolve/main/papers_with_semantic_taxonomy.parquet"
|
| 19 |
+
TAXONOMY_JSON_PATH = "integrated_ml_taxonomy.json"
|
| 20 |
+
|
| 21 |
+
# Simple content filters derived from the new dataset
|
| 22 |
+
TAG_FILTER_CHOICES = [
|
| 23 |
+
"None",
|
| 24 |
+
"Has Code",
|
| 25 |
+
"Has Media",
|
| 26 |
+
"Has Organization",
|
| 27 |
]
|
| 28 |
|
| 29 |
+
# Load taxonomy from JSON file
|
| 30 |
+
def load_taxonomy():
|
| 31 |
+
"""Load the ML taxonomy from JSON file."""
|
| 32 |
+
try:
|
| 33 |
+
with open(TAXONOMY_JSON_PATH, 'r') as f:
|
| 34 |
+
taxonomy = json.load(f)
|
| 35 |
+
|
| 36 |
+
# Extract choices for dropdowns
|
| 37 |
+
categories = sorted(taxonomy.keys())
|
| 38 |
+
|
| 39 |
+
# Build subcategories and topics
|
| 40 |
+
all_subcategories = set()
|
| 41 |
+
all_topics = set()
|
| 42 |
+
|
| 43 |
+
for category, subcats in taxonomy.items():
|
| 44 |
+
for subcat, topics in subcats.items():
|
| 45 |
+
all_subcategories.add(subcat)
|
| 46 |
+
all_topics.update(topics)
|
| 47 |
+
|
| 48 |
+
return {
|
| 49 |
+
'categories': ["All"] + categories,
|
| 50 |
+
'subcategories': ["All"] + sorted(all_subcategories),
|
| 51 |
+
'topics': ["All"] + sorted(all_topics),
|
| 52 |
+
'taxonomy': taxonomy
|
| 53 |
+
}
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Error loading taxonomy from JSON: {e}")
|
| 56 |
+
return {
|
| 57 |
+
'categories': ["All"],
|
| 58 |
+
'subcategories': ["All"],
|
| 59 |
+
'topics': ["All"],
|
| 60 |
+
'taxonomy': {}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
TAXONOMY_DATA = load_taxonomy()
|
| 64 |
+
|
| 65 |
+
def _first_non_null(*values):
|
| 66 |
+
for v in values:
|
| 67 |
+
if v is None:
|
| 68 |
+
continue
|
| 69 |
+
# treat empty strings as null-ish
|
| 70 |
+
if isinstance(v, str) and v.strip() == "":
|
| 71 |
+
continue
|
| 72 |
+
return v
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _get_nested(row, *paths):
|
| 77 |
+
"""Try multiple dotted paths in a row that may contain dicts; return first non-null."""
|
| 78 |
+
for path in paths:
|
| 79 |
+
cur = row
|
| 80 |
+
ok = True
|
| 81 |
+
for key in path.split('.'):
|
| 82 |
+
if isinstance(cur, dict) and key in cur:
|
| 83 |
+
cur = cur[key]
|
| 84 |
+
else:
|
| 85 |
+
ok = False
|
| 86 |
+
break
|
| 87 |
+
if ok and cur is not None:
|
| 88 |
+
return cur
|
| 89 |
+
return None
|
| 90 |
+
|
| 91 |
+
|
| 92 |
def load_datasets_data():
|
| 93 |
+
"""Load the PaperVerse Daily dataset from the Hugging Face Hub and normalize columns used by the app."""
|
| 94 |
start_time = time.time()
|
| 95 |
print(f"Attempting to load dataset from Hugging Face Hub: {HF_DATASET_ID}")
|
| 96 |
try:
|
| 97 |
+
# First try: direct parquet download (avoids any auth header issues)
|
| 98 |
+
try:
|
| 99 |
+
print(f"Trying direct parquet download: {PARQUET_URL}")
|
| 100 |
+
with requests.get(PARQUET_URL, stream=True, timeout=120) as resp:
|
| 101 |
+
resp.raise_for_status()
|
| 102 |
+
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmpf:
|
| 103 |
+
for chunk in resp.iter_content(chunk_size=1024 * 1024):
|
| 104 |
+
if chunk:
|
| 105 |
+
tmpf.write(chunk)
|
| 106 |
+
tmp_path = tmpf.name
|
| 107 |
+
try:
|
| 108 |
+
# Use DuckDB to read parquet to avoid pyarrow decoding issues
|
| 109 |
+
df = duckdb.query(f"SELECT * FROM read_parquet('{tmp_path}')").df()
|
| 110 |
+
finally:
|
| 111 |
+
try:
|
| 112 |
+
os.remove(tmp_path)
|
| 113 |
+
except Exception:
|
| 114 |
+
pass
|
| 115 |
+
print("Loaded DataFrame from direct parquet download via DuckDB.")
|
| 116 |
+
except Exception as direct_e:
|
| 117 |
+
print(f"Direct parquet load failed: {direct_e}. Falling back to datasets loader...")
|
| 118 |
+
# Force anonymous access in case an invalid cached token is present
|
| 119 |
+
# Clear any token environment variables that could inject a bad Authorization header
|
| 120 |
+
for env_key in ("HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "HF_HUB_TOKEN"):
|
| 121 |
+
if os.environ.pop(env_key, None) is not None:
|
| 122 |
+
print(f"Cleared env var: {env_key}")
|
| 123 |
+
|
| 124 |
+
# Prefer explicit train split when available
|
| 125 |
+
try:
|
| 126 |
+
dataset_obj = load_dataset(HF_DATASET_ID, split="train", token=None)
|
| 127 |
+
except TypeError:
|
| 128 |
+
dataset_obj = load_dataset(HF_DATASET_ID, split="train", use_auth_token=False)
|
| 129 |
+
except Exception:
|
| 130 |
+
# Fallback: load all splits and pick the first available
|
| 131 |
+
try:
|
| 132 |
+
dataset_obj = load_dataset(HF_DATASET_ID, token=None)
|
| 133 |
+
except TypeError:
|
| 134 |
+
dataset_obj = load_dataset(HF_DATASET_ID, use_auth_token=False)
|
| 135 |
+
|
| 136 |
+
# Handle both Dataset and DatasetDict
|
| 137 |
+
try:
|
| 138 |
+
# If it's a Dataset (single split), this will work
|
| 139 |
+
df = dataset_obj.to_pandas()
|
| 140 |
+
except AttributeError:
|
| 141 |
+
# Otherwise assume DatasetDict and take the first split
|
| 142 |
+
first_split = list(dataset_obj.keys())[0]
|
| 143 |
+
df = dataset_obj[first_split].to_pandas()
|
| 144 |
+
|
| 145 |
+
# --- Normalize expected columns for the visualization ---
|
| 146 |
+
# organization: prefer top-level organization_name, then paper_organization.name/fullname, else Unknown
|
| 147 |
+
if 'organization_name' in df.columns:
|
| 148 |
+
org_series = df['organization_name']
|
| 149 |
+
else:
|
| 150 |
+
# try nested dicts commonly produced by HF datasets
|
| 151 |
+
org_series = df.apply(
|
| 152 |
+
lambda r: _first_non_null(
|
| 153 |
+
_get_nested(r, 'paper_organization.name'),
|
| 154 |
+
_get_nested(r, 'paper_organization.fullname'),
|
| 155 |
+
_get_nested(r, 'organization.name'),
|
| 156 |
+
_get_nested(r, 'organization.fullname')
|
| 157 |
+
), axis=1
|
| 158 |
+
)
|
| 159 |
+
df['organization'] = org_series.fillna('Unknown')
|
| 160 |
+
|
| 161 |
+
# Extract organization avatar/logo
|
| 162 |
+
if 'organization_name' in df.columns:
|
| 163 |
+
# Try to get avatar from paper_organization or organization struct
|
| 164 |
+
def _get_avatar(row):
|
| 165 |
+
for path in ['paper_organization.avatar', 'organization.avatar']:
|
| 166 |
+
av = _get_nested(row, path)
|
| 167 |
+
if av and isinstance(av, str) and av.strip():
|
| 168 |
+
return av
|
| 169 |
+
return None
|
| 170 |
+
org_avatar_series = df.apply(_get_avatar, axis=1)
|
| 171 |
+
else:
|
| 172 |
+
org_avatar_series = pd.Series([None] * len(df))
|
| 173 |
+
df['organization_avatar'] = org_avatar_series
|
| 174 |
+
|
| 175 |
+
# id for each paper row
|
| 176 |
+
cand_cols = [
|
| 177 |
+
'paper_id', 'paper_discussionId', 'key'
|
| 178 |
+
]
|
| 179 |
+
id_val = None
|
| 180 |
+
for c in cand_cols:
|
| 181 |
+
if c in df.columns:
|
| 182 |
+
id_val = df[c]
|
| 183 |
+
break
|
| 184 |
+
if id_val is None:
|
| 185 |
+
# fallback to title + index
|
| 186 |
+
if 'paper_title' in df.columns:
|
| 187 |
+
df['id'] = df['paper_title'].astype(str) + '_' + df.reset_index().index.astype(str)
|
| 188 |
+
elif 'title' in df.columns:
|
| 189 |
+
df['id'] = df['title'].astype(str) + '_' + df.reset_index().index.astype(str)
|
| 190 |
+
else:
|
| 191 |
+
df['id'] = df.reset_index().index.astype(str)
|
| 192 |
+
else:
|
| 193 |
+
df['id'] = id_val.astype(str)
|
| 194 |
+
|
| 195 |
+
# numeric metrics used for aggregation
|
| 196 |
+
def _to_num(col_name):
|
| 197 |
+
if col_name in df.columns:
|
| 198 |
+
return pd.to_numeric(df[col_name], errors='coerce').fillna(0.0)
|
| 199 |
+
return pd.Series([0.0] * len(df))
|
| 200 |
+
|
| 201 |
+
df['paper_upvotes'] = _to_num('paper_upvotes')
|
| 202 |
+
df['numComments'] = _to_num('numComments')
|
| 203 |
+
df['paper_githubStars'] = _to_num('paper_githubStars')
|
| 204 |
+
|
| 205 |
+
# computed boolean filters
|
| 206 |
+
def _has_code(row):
|
| 207 |
+
# Check for GitHub repo
|
| 208 |
+
try:
|
| 209 |
+
gh = row['paper_githubRepo'] if 'paper_githubRepo' in row and pd.notna(row['paper_githubRepo']) else None
|
| 210 |
+
if isinstance(gh, str) and len(gh.strip()) > 0:
|
| 211 |
+
return True
|
| 212 |
+
except Exception:
|
| 213 |
+
pass
|
| 214 |
+
# Check for project page
|
| 215 |
+
try:
|
| 216 |
+
pp = row.get('paper_projectPage') if isinstance(row, dict) else row.get('paper_projectPage', None)
|
| 217 |
+
if isinstance(pp, str) and len(str(pp).strip()) > 0 and str(pp).strip().lower() != 'n/a':
|
| 218 |
+
return True
|
| 219 |
+
except Exception:
|
| 220 |
+
pass
|
| 221 |
+
return False
|
| 222 |
+
|
| 223 |
+
def _has_media(row):
|
| 224 |
+
for c in ['paper_mediaUrls', 'mediaUrls']:
|
| 225 |
+
try:
|
| 226 |
+
v = row[c]
|
| 227 |
+
if isinstance(v, list) and len(v) > 0:
|
| 228 |
+
return True
|
| 229 |
+
# some providers store arrays as strings like "[... ]"
|
| 230 |
+
if isinstance(v, str) and v.strip().startswith('[') and len(v.strip()) > 2:
|
| 231 |
+
return True
|
| 232 |
+
except Exception:
|
| 233 |
+
continue
|
| 234 |
+
return False
|
| 235 |
+
|
| 236 |
+
df['has_code'] = df.apply(_has_code, axis=1)
|
| 237 |
+
df['has_media'] = df.apply(_has_media, axis=1)
|
| 238 |
+
df['has_organization'] = df['organization'].astype(str).str.strip().ne('Unknown')
|
| 239 |
+
|
| 240 |
+
# Process publishedAt field for date filtering
|
| 241 |
+
if 'publishedAt' in df.columns:
|
| 242 |
+
df['publishedAt_dt'] = pd.to_datetime(df['publishedAt'], errors='coerce')
|
| 243 |
+
else:
|
| 244 |
+
df['publishedAt_dt'] = pd.NaT
|
| 245 |
+
|
| 246 |
+
# Ensure topic hierarchy columns exist and are strings
|
| 247 |
+
for col_name, default_val in [
|
| 248 |
+
('primary_category', 'Unknown'),
|
| 249 |
+
('primary_subcategory', 'Unknown'),
|
| 250 |
+
('primary_topic', 'Unknown'),
|
| 251 |
+
]:
|
| 252 |
+
if col_name not in df.columns:
|
| 253 |
+
df[col_name] = default_val
|
| 254 |
+
else:
|
| 255 |
+
df[col_name] = df[col_name].fillna(default_val).astype(str).replace({'': default_val})
|
| 256 |
+
|
| 257 |
+
# Create a human-friendly paper label for treemap leaves: "<title> — <topic>"
|
| 258 |
+
def _pick_title(row):
|
| 259 |
+
t1 = row.get('paper_title') if isinstance(row, dict) else None
|
| 260 |
+
try:
|
| 261 |
+
t1 = row['paper_title'] if 'paper_title' in row and pd.notna(row['paper_title']) and str(row['paper_title']).strip() != '' else None
|
| 262 |
+
except Exception:
|
| 263 |
+
pass
|
| 264 |
+
if t1 is not None:
|
| 265 |
+
return str(t1)
|
| 266 |
+
try:
|
| 267 |
+
t2 = row['title'] if 'title' in row and pd.notna(row['title']) and str(row['title']).strip() != '' else None
|
| 268 |
+
except Exception:
|
| 269 |
+
t2 = None
|
| 270 |
+
return str(t2) if t2 is not None else 'Untitled'
|
| 271 |
+
|
| 272 |
+
def _pick_topic(row):
|
| 273 |
+
# Prefer primary_topic, else first of taxonomy_topics
|
| 274 |
+
try:
|
| 275 |
+
pt = row['primary_topic'] if 'primary_topic' in row and pd.notna(row['primary_topic']) and str(row['primary_topic']).strip() != '' else None
|
| 276 |
+
except Exception:
|
| 277 |
+
pt = None
|
| 278 |
+
if pt is not None:
|
| 279 |
+
return str(pt)
|
| 280 |
+
try:
|
| 281 |
+
tt = row['taxonomy_topics'] if 'taxonomy_topics' in row else None
|
| 282 |
+
if isinstance(tt, list) and len(tt) > 0:
|
| 283 |
+
return str(tt[0])
|
| 284 |
+
# Sometimes arrays are serialized as strings like "[ ... ]"
|
| 285 |
+
if isinstance(tt, str) and tt.strip().startswith('[') and len(tt.strip()) > 2:
|
| 286 |
+
# naive parse for first quoted token
|
| 287 |
+
inner = tt.strip().lstrip('[').rstrip(']')
|
| 288 |
+
first = inner.split(',')[0].strip().strip('"\'')
|
| 289 |
+
return first if first else 'No topic'
|
| 290 |
+
except Exception:
|
| 291 |
+
pass
|
| 292 |
+
return 'No topic'
|
| 293 |
+
|
| 294 |
+
titles = df.apply(_pick_title, axis=1)
|
| 295 |
+
df['paper_label'] = titles.astype(str)
|
| 296 |
+
# Build a Topic Chain for hover details
|
| 297 |
+
df['topic_chain'] = (
|
| 298 |
+
df['primary_category'].astype(str) + ' > ' +
|
| 299 |
+
df['primary_subcategory'].astype(str) + ' > ' +
|
| 300 |
+
df['primary_topic'].astype(str)
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
# Ensure link fields exist for hover details
|
| 304 |
+
for link_col in ['paper_githubRepo', 'paper_projectPage']:
|
| 305 |
+
if link_col not in df.columns:
|
| 306 |
+
df[link_col] = 'N/A'
|
| 307 |
+
else:
|
| 308 |
+
df[link_col] = df[link_col].fillna('N/A').replace({'': 'N/A'})
|
| 309 |
+
|
| 310 |
msg = f"Successfully loaded dataset in {time.time() - start_time:.2f}s."
|
| 311 |
print(msg)
|
| 312 |
return df, True, msg
|
| 313 |
except Exception as e:
|
| 314 |
+
# If we encountered invalid credentials, try logging out programmatically and retry once anonymously
|
| 315 |
+
if "Invalid credentials" in str(e) or "401 Client Error" in str(e):
|
| 316 |
+
try:
|
| 317 |
+
print("Encountered auth error; attempting to clear cached token and retry anonymously...")
|
| 318 |
+
hf_logout()
|
| 319 |
+
try:
|
| 320 |
+
dataset_dict = load_dataset(HF_DATASET_ID, token=None)
|
| 321 |
+
except TypeError:
|
| 322 |
+
dataset_dict = load_dataset(HF_DATASET_ID, use_auth_token=False)
|
| 323 |
+
df = dataset_dict[list(dataset_dict.keys())[0]].to_pandas()
|
| 324 |
+
msg = f"Successfully loaded dataset after clearing token in {time.time() - start_time:.2f}s."
|
| 325 |
+
print(msg)
|
| 326 |
+
return df, True, msg
|
| 327 |
+
except Exception as e2:
|
| 328 |
+
err_msg = f"Failed to load dataset after retry. Error: {e2} (initial: {e})"
|
| 329 |
+
print(err_msg)
|
| 330 |
+
return pd.DataFrame(), False, err_msg
|
| 331 |
err_msg = f"Failed to load dataset. Error: {e}"
|
| 332 |
print(err_msg)
|
| 333 |
return pd.DataFrame(), False, err_msg
|
| 334 |
|
| 335 |
+
def make_treemap_data(df, count_by, top_k=25, tag_filter=None, skip_cats=None, group_by='organization', date_range=None):
|
| 336 |
"""
|
| 337 |
Filter data and prepare it for a multi-level treemap.
|
| 338 |
- Preserves individual datasets for the top K organizations.
|
| 339 |
- Groups all other organizations into a single "Other" category.
|
| 340 |
+
- date_range: tuple of (min_timestamp, max_timestamp) in seconds since epoch
|
| 341 |
"""
|
| 342 |
if df is None or df.empty:
|
| 343 |
return pd.DataFrame()
|
| 344 |
|
| 345 |
filtered_df = df.copy()
|
| 346 |
|
| 347 |
+
# Apply date range filter
|
| 348 |
+
if date_range is not None and 'publishedAt_dt' in filtered_df.columns:
|
| 349 |
+
min_ts, max_ts = date_range
|
| 350 |
+
min_date = pd.to_datetime(min_ts, unit='s')
|
| 351 |
+
max_date = pd.to_datetime(max_ts, unit='s')
|
| 352 |
+
# Remove timezone info for comparison if publishedAt_dt is tz-naive
|
| 353 |
+
if filtered_df['publishedAt_dt'].dt.tz is None:
|
| 354 |
+
min_date = min_date.tz_localize(None)
|
| 355 |
+
max_date = max_date.tz_localize(None)
|
| 356 |
+
filtered_df = filtered_df[
|
| 357 |
+
(filtered_df['publishedAt_dt'] >= min_date) &
|
| 358 |
+
(filtered_df['publishedAt_dt'] <= max_date)
|
| 359 |
+
]
|
| 360 |
+
|
| 361 |
+
col_map = {
|
| 362 |
+
"Has Code": "has_code",
|
| 363 |
+
"Has Media": "has_media",
|
| 364 |
+
"Has Organization": "has_organization",
|
| 365 |
}
|
| 366 |
|
| 367 |
if tag_filter and tag_filter != "None" and tag_filter in col_map:
|
|
|
|
| 375 |
filtered_df[count_by] = 0.0
|
| 376 |
filtered_df[count_by] = pd.to_numeric(filtered_df[count_by], errors='coerce').fillna(0.0)
|
| 377 |
|
| 378 |
+
if group_by == 'organization':
|
| 379 |
+
all_org_totals = filtered_df.groupby("organization")[count_by].sum()
|
| 380 |
+
top_org_names = all_org_totals.nlargest(top_k, keep='first').index.tolist()
|
| 381 |
|
| 382 |
+
top_orgs_df = filtered_df[filtered_df['organization'].isin(top_org_names)].copy()
|
| 383 |
+
other_total = all_org_totals[~all_org_totals.index.isin(top_org_names)].sum()
|
| 384 |
+
|
| 385 |
+
final_df_for_plot = top_orgs_df
|
| 386 |
+
|
| 387 |
+
if other_total > 0:
|
| 388 |
+
other_row = pd.DataFrame([{
|
| 389 |
+
'organization': 'Other',
|
| 390 |
+
'paper_label': 'Other',
|
| 391 |
+
'primary_category': 'Other',
|
| 392 |
+
'primary_subcategory': 'Other',
|
| 393 |
+
'primary_topic': 'Other',
|
| 394 |
+
'topic_chain': 'Other > Other > Other',
|
| 395 |
+
'paper_githubRepo': 'N/A',
|
| 396 |
+
'paper_projectPage': 'N/A',
|
| 397 |
+
'organization_avatar': None,
|
| 398 |
+
count_by: other_total
|
| 399 |
+
}])
|
| 400 |
+
final_df_for_plot = pd.concat([final_df_for_plot, other_row], ignore_index=True)
|
| 401 |
|
| 402 |
+
if skip_cats and len(skip_cats) > 0:
|
| 403 |
+
final_df_for_plot = final_df_for_plot[~final_df_for_plot['organization'].isin(skip_cats)]
|
| 404 |
|
| 405 |
+
final_df_for_plot["root"] = "papers"
|
| 406 |
+
return final_df_for_plot
|
| 407 |
+
else:
|
| 408 |
+
# Topic grouping: apply top-k to topic combinations and handle skip list
|
| 409 |
+
topic_totals = filtered_df.groupby(['primary_category', 'primary_subcategory', 'primary_topic'])[count_by].sum()
|
| 410 |
+
top_topics = topic_totals.nlargest(top_k, keep='first').index.tolist()
|
| 411 |
+
|
| 412 |
+
# Filter to top topics
|
| 413 |
+
top_topics_df = filtered_df[
|
| 414 |
+
filtered_df.apply(
|
| 415 |
+
lambda r: (r['primary_category'], r['primary_subcategory'], r['primary_topic']) in top_topics,
|
| 416 |
+
axis=1
|
| 417 |
+
)
|
| 418 |
+
].copy()
|
| 419 |
+
|
| 420 |
+
# Apply skip filter (skip by primary_topic name)
|
| 421 |
+
if skip_cats and len(skip_cats) > 0:
|
| 422 |
+
top_topics_df = top_topics_df[~top_topics_df['primary_topic'].isin(skip_cats)]
|
| 423 |
+
|
| 424 |
+
top_topics_df["root"] = "papers"
|
| 425 |
+
return top_topics_df
|
| 426 |
|
| 427 |
+
def create_treemap(treemap_data, count_by, title=None, path=None, metric_label=None):
|
| 428 |
"""Generate the Plotly treemap figure from the prepared data."""
|
| 429 |
if treemap_data.empty or treemap_data[count_by].sum() <= 0:
|
| 430 |
fig = px.treemap(names=["No data matches filters"], parents=[""], values=[1])
|
| 431 |
fig.update_layout(title="No data matches the selected filters", margin=dict(t=50, l=25, r=25, b=25))
|
| 432 |
return fig
|
| 433 |
+
if path is None:
|
| 434 |
+
path = ["root", "organization", "paper_label"]
|
| 435 |
+
# Add custom data columns as regular columns for Plotly to access
|
| 436 |
+
# This ensures all nodes (including intermediate hierarchy nodes) have these fields
|
| 437 |
+
# Ensure organization_avatar column exists (for search details, not hover)
|
| 438 |
+
if 'organization_avatar' not in treemap_data.columns:
|
| 439 |
+
treemap_data['organization_avatar'] = None
|
| 440 |
+
|
| 441 |
+
fig = px.treemap(
|
| 442 |
+
treemap_data,
|
| 443 |
+
path=path,
|
| 444 |
+
values=count_by,
|
| 445 |
+
hover_data={
|
| 446 |
+
'primary_category': True,
|
| 447 |
+
'primary_subcategory': True,
|
| 448 |
+
'primary_topic': True,
|
| 449 |
+
'paper_githubRepo': True,
|
| 450 |
+
'paper_projectPage': True,
|
| 451 |
+
},
|
| 452 |
+
title=title,
|
| 453 |
+
color_discrete_sequence=px.colors.qualitative.Plotly
|
| 454 |
+
)
|
| 455 |
fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
|
| 456 |
+
display_metric = metric_label if metric_label else count_by
|
| 457 |
+
|
| 458 |
+
# Clean hover without organization avatar (images shown in search details instead)
|
| 459 |
fig.update_traces(
|
| 460 |
+
textinfo="label+value",
|
| 461 |
+
hovertemplate=(
|
| 462 |
+
"<b>%{label}</b><br>"
|
| 463 |
+
+ "%{value:,} " + display_metric +
|
| 464 |
+
"<br><br><b>Topic Hierarchy:</b><br>"
|
| 465 |
+
+ "%{customdata[0]} > %{customdata[1]} > %{customdata[2]}<br>"
|
| 466 |
+
+ "<br><b>Links:</b><br>"
|
| 467 |
+
+ "GitHub: %{customdata[3]}<br>"
|
| 468 |
+
+ "Project: %{customdata[4]}"
|
| 469 |
+
+ "<extra></extra>"
|
| 470 |
+
),
|
| 471 |
)
|
| 472 |
return fig
|
| 473 |
|
| 474 |
# --- Gradio UI Blocks ---
|
| 475 |
+
with gr.Blocks(
|
| 476 |
+
title="📚 PaperVerse Daily Explorer",
|
| 477 |
+
fill_width=True,
|
| 478 |
+
css="""
|
| 479 |
+
/* Hide the timestamp numbers on the range slider */
|
| 480 |
+
#date-range-slider-wrapper .head,
|
| 481 |
+
#date-range-slider-wrapper div[data-testid="range-slider"] > span {
|
| 482 |
+
display: none !important;
|
| 483 |
+
}
|
| 484 |
+
"""
|
| 485 |
+
) as demo:
|
| 486 |
datasets_data_state = gr.State(pd.DataFrame())
|
| 487 |
loading_complete_state = gr.State(False)
|
| 488 |
+
date_range_state = gr.State(None) # Store min/max timestamps
|
| 489 |
|
| 490 |
with gr.Row():
|
| 491 |
+
gr.Markdown("# 📚 PaperVerse Daily Explorer")
|
| 492 |
|
| 493 |
+
with gr.Tabs():
|
| 494 |
+
with gr.Tab("📊 Treemap Visualization"):
|
| 495 |
+
with gr.Row():
|
| 496 |
+
with gr.Column(scale=1):
|
| 497 |
+
count_by_dropdown = gr.Dropdown(
|
| 498 |
+
label="Metric",
|
| 499 |
+
choices=[
|
| 500 |
+
("Upvotes", "paper_upvotes"),
|
| 501 |
+
("Comments", "numComments"),
|
| 502 |
+
],
|
| 503 |
+
value="paper_upvotes",
|
| 504 |
+
)
|
| 505 |
+
group_by_dropdown = gr.Dropdown(
|
| 506 |
+
label="Group by",
|
| 507 |
+
choices=[("Organization", "organization"), ("Topic", "topic")],
|
| 508 |
+
value="organization",
|
| 509 |
+
)
|
| 510 |
+
gr.Markdown("**Filters**")
|
| 511 |
+
filter_code = gr.Checkbox(label="Has Code", value=False)
|
| 512 |
+
filter_media = gr.Checkbox(label="Has Media", value=False)
|
| 513 |
+
filter_org = gr.Checkbox(label="Has Organization", value=False)
|
| 514 |
+
|
| 515 |
+
gr.Markdown("**Date Range**")
|
| 516 |
+
date_range_slider = RangeSlider(
|
| 517 |
+
minimum=0,
|
| 518 |
+
maximum=100,
|
| 519 |
+
value=(0, 100),
|
| 520 |
+
label="Paper Release Date Range",
|
| 521 |
+
interactive=True,
|
| 522 |
+
elem_id="date-range-slider-wrapper"
|
| 523 |
+
)
|
| 524 |
+
date_range_display = gr.Markdown("Loading date range...")
|
| 525 |
+
|
| 526 |
+
top_k_dropdown = gr.Dropdown(label="Number of Top Organizations", choices=TOP_K_CHOICES, value=25)
|
| 527 |
+
category_filter_dropdown = gr.Dropdown(label="Primary Category", choices=["All"], value="All")
|
| 528 |
+
subcategory_filter_dropdown = gr.Dropdown(label="Primary Subcategory", choices=["All"], value="All")
|
| 529 |
+
topic_filter_dropdown = gr.Dropdown(label="Primary Topic", choices=["All"], value="All")
|
| 530 |
+
skip_cats_textbox = gr.Textbox(label="Organizations to Skip", value="unaffiliated, Other")
|
| 531 |
+
generate_plot_button = gr.Button(value="Generate Plot", variant="primary", interactive=False)
|
| 532 |
+
|
| 533 |
+
with gr.Column(scale=3):
|
| 534 |
+
plot_output = gr.Plot()
|
| 535 |
+
status_message_md = gr.Markdown("Initializing...")
|
| 536 |
+
data_info_md = gr.Markdown("")
|
| 537 |
+
|
| 538 |
+
with gr.Tab("🔍 Paper Search"):
|
| 539 |
+
with gr.Column():
|
| 540 |
+
gr.Markdown("### � Search Papers and Organizations")
|
| 541 |
+
with gr.Row():
|
| 542 |
+
search_item = gr.Textbox(
|
| 543 |
+
label="Search Organization or Paper",
|
| 544 |
+
placeholder="Type organization name or paper title to see details...",
|
| 545 |
+
scale=4
|
| 546 |
+
)
|
| 547 |
+
search_button = gr.Button("Show Details", scale=1, variant="secondary")
|
| 548 |
+
selected_info_html = gr.HTML(value="<p style='color: gray;'>Enter an organization name or paper title above to see details</p>")
|
| 549 |
|
| 550 |
def _update_button_interactivity(is_loaded_flag):
|
| 551 |
return gr.update(interactive=is_loaded_flag)
|
| 552 |
+
|
| 553 |
+
def _format_date_range(date_range_tuple, date_range_value):
|
| 554 |
+
"""Convert slider values to readable date range text"""
|
| 555 |
+
if date_range_tuple is None:
|
| 556 |
+
return "Date range unavailable"
|
| 557 |
+
min_ts, max_ts = date_range_tuple
|
| 558 |
+
selected_min, selected_max = date_range_value
|
| 559 |
+
|
| 560 |
+
# Convert slider values to timestamps
|
| 561 |
+
# The slider values are already timestamps
|
| 562 |
+
min_date = pd.to_datetime(selected_min, unit='s')
|
| 563 |
+
max_date = pd.to_datetime(selected_max, unit='s')
|
| 564 |
+
|
| 565 |
+
return f"**Selected Range:** {min_date.strftime('%B %d, %Y')} to {max_date.strftime('%B %d, %Y')}"
|
| 566 |
+
|
| 567 |
+
def _toggle_labels_by_grouping(group_by_value):
|
| 568 |
+
# Update labels based on grouping mode
|
| 569 |
+
if group_by_value == 'topic':
|
| 570 |
+
top_k_label = "Number of Top Topics"
|
| 571 |
+
skip_label = "Topics to Skip"
|
| 572 |
+
skip_value = "" # Clear skip box for topics
|
| 573 |
+
else:
|
| 574 |
+
top_k_label = "Number of Top Organizations"
|
| 575 |
+
skip_label = "Organizations to Skip"
|
| 576 |
+
skip_value = "unaffiliated, Other" # Default orgs to skip
|
| 577 |
+
return (
|
| 578 |
+
gr.update(label=top_k_label),
|
| 579 |
+
gr.update(label=skip_label, value=skip_value)
|
| 580 |
+
)
|
| 581 |
|
| 582 |
## CHANGE: New combined function to load data and generate the initial plot on startup.
|
| 583 |
def load_and_generate_initial_plot(progress=gr.Progress()):
|
|
|
|
| 592 |
ts = pd.to_datetime(current_df['data_download_timestamp'].iloc[0], utc=True)
|
| 593 |
date_display = ts.strftime('%B %d, %Y, %H:%M:%S %Z')
|
| 594 |
|
| 595 |
+
# Calculate date range from publishedAt_dt
|
| 596 |
+
min_ts = 0
|
| 597 |
+
max_ts = 100
|
| 598 |
+
date_range_text = "Date range unavailable"
|
| 599 |
+
date_range_tuple = None
|
| 600 |
+
|
| 601 |
+
if 'publishedAt_dt' in current_df.columns:
|
| 602 |
+
valid_dates = current_df['publishedAt_dt'].dropna()
|
| 603 |
+
if len(valid_dates) > 0:
|
| 604 |
+
min_date = valid_dates.min()
|
| 605 |
+
max_date = valid_dates.max()
|
| 606 |
+
min_ts = int(min_date.timestamp())
|
| 607 |
+
max_ts = int(max_date.timestamp())
|
| 608 |
+
date_range_tuple = (min_ts, max_ts)
|
| 609 |
+
date_range_text = f"**Full Range:** {min_date.strftime('%B %d, %Y')} to {max_date.strftime('%B %d, %Y')}"
|
| 610 |
+
|
| 611 |
data_info_text = (f"### Data Information\n- Source: `{HF_DATASET_ID}`\n"
|
| 612 |
f"- Status: {status_msg_from_load}\n"
|
| 613 |
+
f"- Total records loaded: {len(current_df):,}\n"
|
| 614 |
f"- Data as of: {date_display}\n")
|
| 615 |
else:
|
| 616 |
data_info_text = f"### Data Load Failed\n- {status_msg_from_load}"
|
| 617 |
+
min_ts = 0
|
| 618 |
+
max_ts = 100
|
| 619 |
+
date_range_text = "Date range unavailable"
|
| 620 |
+
date_range_tuple = None
|
| 621 |
except Exception as e:
|
| 622 |
status_msg_from_load = f"An unexpected error occurred: {str(e)}"
|
| 623 |
data_info_text = f"### Critical Error\n- {status_msg_from_load}"
|
| 624 |
load_success_flag = False
|
| 625 |
current_df = pd.DataFrame() # Ensure df is empty on failure
|
| 626 |
+
min_ts = 0
|
| 627 |
+
max_ts = 100
|
| 628 |
+
date_range_text = "Date range unavailable"
|
| 629 |
+
date_range_tuple = None
|
| 630 |
print(f"Critical error in load_and_generate_initial_plot: {e}")
|
| 631 |
|
| 632 |
# --- Part 2: Generate Initial Plot ---
|
| 633 |
progress(0.6, desc="Generating initial plot...")
|
| 634 |
+
# Defaults matching UI definitions
|
| 635 |
+
default_metric = "paper_upvotes"
|
| 636 |
default_tag = "None"
|
| 637 |
default_k = 25
|
| 638 |
+
default_group_by = "organization"
|
| 639 |
+
default_skip_cats = "unaffiliated, Other"
|
| 640 |
+
|
| 641 |
+
# Use taxonomy from JSON instead of calculating from dataset
|
| 642 |
+
cat_choices = TAXONOMY_DATA['categories']
|
| 643 |
+
subcat_choices = TAXONOMY_DATA['subcategories']
|
| 644 |
+
topic_choices = TAXONOMY_DATA['topics']
|
| 645 |
|
| 646 |
+
# Reuse the existing controller function for plotting (with date range set to None for initial load)
|
| 647 |
initial_plot, initial_status = ui_generate_plot_controller(
|
| 648 |
+
default_metric, False, False, False, default_k, default_group_by, "All", "All", "All", default_skip_cats, None, current_df, progress
|
| 649 |
)
|
| 650 |
|
| 651 |
+
# Also update taxonomy dropdown choices
|
| 652 |
+
return (
|
| 653 |
+
current_df,
|
| 654 |
+
load_success_flag,
|
| 655 |
+
data_info_text,
|
| 656 |
+
initial_status,
|
| 657 |
+
initial_plot,
|
| 658 |
+
gr.update(choices=cat_choices, value="All"),
|
| 659 |
+
gr.update(choices=subcat_choices, value="All"),
|
| 660 |
+
gr.update(choices=topic_choices, value="All"),
|
| 661 |
+
gr.update(minimum=min_ts, maximum=max_ts, value=(min_ts, max_ts)),
|
| 662 |
+
date_range_text,
|
| 663 |
+
date_range_tuple,
|
| 664 |
+
)
|
| 665 |
|
| 666 |
+
def ui_generate_plot_controller(metric_choice, has_code, has_media, has_org,
|
| 667 |
+
k_orgs, group_by_choice,
|
| 668 |
+
category_choice, subcategory_choice, topic_choice,
|
| 669 |
+
skip_cats_input, date_range, df_current_datasets, progress=gr.Progress()):
|
| 670 |
if df_current_datasets is None or df_current_datasets.empty:
|
| 671 |
return create_treemap(pd.DataFrame(), metric_choice), "Dataset data is not loaded. Cannot generate plot."
|
| 672 |
|
| 673 |
progress(0.1, desc="Aggregating data...")
|
| 674 |
cats_to_skip = [cat.strip() for cat in skip_cats_input.split(',') if cat.strip()]
|
| 675 |
|
| 676 |
+
# Apply content filters (checkboxes)
|
| 677 |
+
df_filtered = df_current_datasets.copy()
|
| 678 |
+
if has_code:
|
| 679 |
+
df_filtered = df_filtered[df_filtered['has_code']]
|
| 680 |
+
if has_media:
|
| 681 |
+
df_filtered = df_filtered[df_filtered['has_media']]
|
| 682 |
+
if has_org:
|
| 683 |
+
df_filtered = df_filtered[df_filtered['has_organization']]
|
| 684 |
+
|
| 685 |
+
# Apply taxonomy filters
|
| 686 |
+
if category_choice and category_choice != 'All':
|
| 687 |
+
df_filtered = df_filtered[df_filtered['primary_category'] == category_choice]
|
| 688 |
+
if subcategory_choice and subcategory_choice != 'All':
|
| 689 |
+
df_filtered = df_filtered[df_filtered['primary_subcategory'] == subcategory_choice]
|
| 690 |
+
if topic_choice and topic_choice != 'All':
|
| 691 |
+
df_filtered = df_filtered[df_filtered['primary_topic'] == topic_choice]
|
| 692 |
+
|
| 693 |
+
treemap_df = make_treemap_data(df_filtered, metric_choice, k_orgs, None, cats_to_skip, group_by_choice, date_range)
|
| 694 |
|
| 695 |
progress(0.7, desc="Generating plot...")
|
| 696 |
+
title_labels = {
|
| 697 |
+
"paper_upvotes": "Upvotes",
|
| 698 |
+
"numComments": "Comments",
|
| 699 |
+
}
|
| 700 |
+
if group_by_choice == "topic":
|
| 701 |
+
chart_title = f"PaperVerse Daily - {title_labels.get(metric_choice, metric_choice)} by Topic"
|
| 702 |
+
path = ["root", "primary_category", "primary_subcategory", "primary_topic", "paper_label"]
|
| 703 |
+
else:
|
| 704 |
+
chart_title = f"PaperVerse Daily - {title_labels.get(metric_choice, metric_choice)} by Organization"
|
| 705 |
+
path = ["root", "organization", "paper_label"]
|
| 706 |
+
plotly_fig = create_treemap(
|
| 707 |
+
treemap_df,
|
| 708 |
+
metric_choice,
|
| 709 |
+
chart_title,
|
| 710 |
+
path=path,
|
| 711 |
+
metric_label=title_labels.get(metric_choice, metric_choice),
|
| 712 |
+
)
|
| 713 |
|
| 714 |
if treemap_df.empty:
|
| 715 |
plot_stats_md = "No data matches the selected filters. Please try different options."
|
| 716 |
else:
|
| 717 |
total_value_in_plot = treemap_df[metric_choice].sum()
|
| 718 |
+
total_items_in_plot = treemap_df[treemap_df['paper_label'] != 'Other']['paper_label'].nunique()
|
| 719 |
+
if group_by_choice == "topic":
|
| 720 |
+
group_count = treemap_df[["primary_category", "primary_subcategory", "primary_topic"]].drop_duplicates().shape[0]
|
| 721 |
+
group_line = f"**Topics Shown**: {group_count:,} unique triplets"
|
| 722 |
+
else:
|
| 723 |
+
group_line = f"**Organizations Shown**: {treemap_df['organization'].nunique():,}"
|
| 724 |
+
plot_stats_md = (
|
| 725 |
+
f"## Plot Statistics\n- {group_line}\n"
|
| 726 |
+
f"- **Individual Papers Shown**: {total_items_in_plot:,}\n"
|
| 727 |
+
f"- **Total {title_labels.get(metric_choice, metric_choice)} in plot**: {int(total_value_in_plot):,}"
|
| 728 |
+
)
|
| 729 |
|
| 730 |
return plotly_fig, plot_stats_md
|
| 731 |
|
|
|
|
| 735 |
demo.load(
|
| 736 |
fn=load_and_generate_initial_plot,
|
| 737 |
inputs=[],
|
| 738 |
+
outputs=[
|
| 739 |
+
datasets_data_state,
|
| 740 |
+
loading_complete_state,
|
| 741 |
+
data_info_md,
|
| 742 |
+
status_message_md,
|
| 743 |
+
plot_output,
|
| 744 |
+
category_filter_dropdown,
|
| 745 |
+
subcategory_filter_dropdown,
|
| 746 |
+
topic_filter_dropdown,
|
| 747 |
+
date_range_slider,
|
| 748 |
+
date_range_display,
|
| 749 |
+
date_range_state,
|
| 750 |
+
]
|
| 751 |
)
|
| 752 |
|
| 753 |
loading_complete_state.change(
|
|
|
|
| 756 |
outputs=generate_plot_button
|
| 757 |
)
|
| 758 |
|
| 759 |
+
# Update labels based on grouping mode
|
| 760 |
+
group_by_dropdown.change(
|
| 761 |
+
fn=_toggle_labels_by_grouping,
|
| 762 |
+
inputs=group_by_dropdown,
|
| 763 |
+
outputs=[top_k_dropdown, skip_cats_textbox],
|
| 764 |
+
)
|
| 765 |
+
|
| 766 |
+
# Update date range display when slider changes
|
| 767 |
+
date_range_slider.change(
|
| 768 |
+
fn=_format_date_range,
|
| 769 |
+
inputs=[date_range_state, date_range_slider],
|
| 770 |
+
outputs=date_range_display,
|
| 771 |
+
show_progress="hidden"
|
| 772 |
+
)
|
| 773 |
+
|
| 774 |
+
def handle_search_details(search_text, df_current):
|
| 775 |
+
"""Search for an organization or paper and show detailed information."""
|
| 776 |
+
if not search_text or not search_text.strip():
|
| 777 |
+
return "<p style='color: gray;'>Please enter a search term</p>"
|
| 778 |
+
|
| 779 |
+
if df_current is None or df_current.empty:
|
| 780 |
+
return "<p style='color: gray;'>No data available</p>"
|
| 781 |
+
|
| 782 |
+
search_text = search_text.strip()
|
| 783 |
+
|
| 784 |
+
try:
|
| 785 |
+
# Try to find matching rows by organization or paper title (case-insensitive partial match)
|
| 786 |
+
matching_rows = df_current[
|
| 787 |
+
df_current['organization'].str.contains(search_text, case=False, na=False) |
|
| 788 |
+
df_current['paper_label'].str.contains(search_text, case=False, na=False) |
|
| 789 |
+
(df_current['paper_title'].str.contains(search_text, case=False, na=False) if 'paper_title' in df_current.columns else False)
|
| 790 |
+
]
|
| 791 |
+
|
| 792 |
+
if matching_rows.empty:
|
| 793 |
+
return f"<p style='color: orange;'>No results found for: <b>{search_text}</b></p><p style='color: gray;'>Try searching for an organization name (e.g., 'Qwen', 'Meta') or paper title keyword</p>"
|
| 794 |
+
|
| 795 |
+
# Build the info panel HTML showing all matching results
|
| 796 |
+
num_results = len(matching_rows)
|
| 797 |
+
html_parts = [
|
| 798 |
+
f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 8px; background: #f9f9f9; max-height: 600px; overflow-y: auto;'>",
|
| 799 |
+
f"<h3 style='margin: 0 0 15px 0; color: #333;'>🔍 Found {num_results} result{'s' if num_results > 1 else ''} for: <span style='color: #0366d6;'>{search_text}</span></h3>"
|
| 800 |
+
]
|
| 801 |
+
|
| 802 |
+
# Limit to first 20 results to avoid too much content
|
| 803 |
+
display_rows = matching_rows.head(20)
|
| 804 |
+
|
| 805 |
+
for idx, (_, row) in enumerate(display_rows.iterrows()):
|
| 806 |
+
# Add separator between results
|
| 807 |
+
if idx > 0:
|
| 808 |
+
html_parts.append("<hr style='margin: 15px 0; border: none; border-top: 1px solid #ddd;'/>")
|
| 809 |
+
|
| 810 |
+
html_parts.append("<div style='margin-bottom: 10px; overflow: auto;'>")
|
| 811 |
+
|
| 812 |
+
# Get organization avatar from precomputed column
|
| 813 |
+
org_avatar = row.get('organization_avatar')
|
| 814 |
+
|
| 815 |
+
# Organization logo if available
|
| 816 |
+
if org_avatar and isinstance(org_avatar, str) and org_avatar.strip() and org_avatar.strip().lower() not in ['none', 'null', 'n/a', '']:
|
| 817 |
+
html_parts.append(f"<img src='{org_avatar}' style='max-width: 60px; max-height: 60px; border-radius: 50%; margin-bottom: 8px; float: left; margin-right: 12px; border: 2px solid #ddd;' onerror=\"this.style.display='none'\"/>")
|
| 818 |
+
|
| 819 |
+
# Get paper thumbnail (direct field from schema)
|
| 820 |
+
paper_thumbnail = row.get('thumbnail')
|
| 821 |
+
|
| 822 |
+
# Paper thumbnail if available
|
| 823 |
+
if paper_thumbnail and isinstance(paper_thumbnail, str) and paper_thumbnail.strip() and paper_thumbnail.strip().lower() not in ['none', 'null', 'n/a', '']:
|
| 824 |
+
html_parts.append(f"<img src='{paper_thumbnail}' style='max-width: 120px; max-height: 120px; border-radius: 8px; margin-bottom: 8px; float: right; margin-left: 12px; border: 1px solid #ddd;' onerror=\"this.style.display='none'\"/>")
|
| 825 |
+
|
| 826 |
+
# Organization name
|
| 827 |
+
org_name = row.get('organization', 'Unknown')
|
| 828 |
+
html_parts.append(f"<p style='margin: 0 0 5px 0; font-weight: bold; color: #333;'>🏢 {org_name}</p>")
|
| 829 |
+
|
| 830 |
+
# Paper title
|
| 831 |
+
paper_title = row.get('paper_title', row.get('title', 'Untitled'))
|
| 832 |
+
html_parts.append(f"<p style='margin: 0 0 5px 0; color: #555; font-size: 0.95em;'>📄 {paper_title}</p>")
|
| 833 |
+
|
| 834 |
+
# Topic hierarchy
|
| 835 |
+
category = row.get('primary_category', 'Unknown')
|
| 836 |
+
subcategory = row.get('primary_subcategory', 'Unknown')
|
| 837 |
+
topic = row.get('primary_topic', 'Unknown')
|
| 838 |
+
html_parts.append(f"<p style='margin: 0 0 5px 0; font-size: 0.9em; color: #666;'><b>Topics:</b> {category} → {subcategory} → {topic}</p>")
|
| 839 |
+
|
| 840 |
+
# Metrics
|
| 841 |
+
upvotes = row.get('paper_upvotes', 0)
|
| 842 |
+
comments = row.get('numComments', 0)
|
| 843 |
+
html_parts.append(f"<p style='margin: 0 0 5px 0; font-size: 0.9em;'><b>Metrics:</b> ⬆️ {upvotes:,} upvotes | 💬 {comments:,} comments</p>")
|
| 844 |
+
|
| 845 |
+
# Links
|
| 846 |
+
github = row.get('paper_githubRepo')
|
| 847 |
+
project = row.get('paper_projectPage')
|
| 848 |
+
|
| 849 |
+
links = []
|
| 850 |
+
if github and isinstance(github, str) and github.strip() and github.strip().lower() not in ['n/a', 'none']:
|
| 851 |
+
links.append(f"<a href='{github}' target='_blank' style='color: #0366d6; margin-right: 15px;'>🔗 GitHub</a>")
|
| 852 |
+
|
| 853 |
+
if project and isinstance(project, str) and project.strip() and project.strip().lower() not in ['n/a', 'none']:
|
| 854 |
+
links.append(f"<a href='{project}' target='_blank' style='color: #0366d6;'>🔗 Project</a>")
|
| 855 |
+
|
| 856 |
+
if links:
|
| 857 |
+
html_parts.append(f"<p style='margin: 0; font-size: 0.9em;'>{' '.join(links)}</p>")
|
| 858 |
+
|
| 859 |
+
html_parts.append("<div style='clear: both;'></div>")
|
| 860 |
+
html_parts.append("</div>")
|
| 861 |
+
|
| 862 |
+
if num_results > 20:
|
| 863 |
+
html_parts.append(f"<p style='margin-top: 15px; color: #666; font-style: italic;'>Showing first 20 of {num_results} results. Refine your search for fewer results.</p>")
|
| 864 |
+
|
| 865 |
+
html_parts.append("</div>")
|
| 866 |
+
|
| 867 |
+
return "".join(html_parts)
|
| 868 |
+
|
| 869 |
+
except Exception as e:
|
| 870 |
+
return f"<p style='color: red;'>Error displaying details: {str(e)}</p>"
|
| 871 |
+
|
| 872 |
generate_plot_button.click(
|
| 873 |
fn=ui_generate_plot_controller,
|
| 874 |
+
inputs=[
|
| 875 |
+
count_by_dropdown,
|
| 876 |
+
filter_code,
|
| 877 |
+
filter_media,
|
| 878 |
+
filter_org,
|
| 879 |
+
top_k_dropdown,
|
| 880 |
+
group_by_dropdown,
|
| 881 |
+
category_filter_dropdown,
|
| 882 |
+
subcategory_filter_dropdown,
|
| 883 |
+
topic_filter_dropdown,
|
| 884 |
+
skip_cats_textbox,
|
| 885 |
+
date_range_slider,
|
| 886 |
+
datasets_data_state,
|
| 887 |
+
],
|
| 888 |
outputs=[plot_output, status_message_md]
|
| 889 |
)
|
| 890 |
+
|
| 891 |
+
# Handle search button for showing details
|
| 892 |
+
search_button.click(
|
| 893 |
+
fn=handle_search_details,
|
| 894 |
+
inputs=[search_item, datasets_data_state],
|
| 895 |
+
outputs=[selected_info_html]
|
| 896 |
+
)
|
| 897 |
+
|
| 898 |
+
# Also trigger on Enter key in search box
|
| 899 |
+
search_item.submit(
|
| 900 |
+
fn=handle_search_details,
|
| 901 |
+
inputs=[search_item, datasets_data_state],
|
| 902 |
+
outputs=[selected_info_html]
|
| 903 |
+
)
|
| 904 |
|
| 905 |
if __name__ == "__main__":
|
| 906 |
print("Application starting...")
|
integrated_ml_taxonomy.json
ADDED
|
@@ -0,0 +1,488 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Algorithms and Learning Methods": {
|
| 3 |
+
"Supervised Learning": [
|
| 4 |
+
"Classification",
|
| 5 |
+
"Regression",
|
| 6 |
+
"Structured Prediction",
|
| 7 |
+
"Ranking and Preference Learning"
|
| 8 |
+
],
|
| 9 |
+
"Unsupervised Learning": [
|
| 10 |
+
"Clustering",
|
| 11 |
+
"Density Estimation",
|
| 12 |
+
"Unsupervised Representation Learning"
|
| 13 |
+
],
|
| 14 |
+
"Semi-Supervised and Self-Supervised Learning": [
|
| 15 |
+
"Semi-Supervised Learning",
|
| 16 |
+
"Self-Supervised Learning"
|
| 17 |
+
],
|
| 18 |
+
"Reinforcement Learning and Planning": [
|
| 19 |
+
"Reinforcement Learning",
|
| 20 |
+
"Reinforcement Learning with Human Feedback (RLHF)",
|
| 21 |
+
"Markov Decision Processes",
|
| 22 |
+
"Model-Based RL",
|
| 23 |
+
"Multi-Agent RL",
|
| 24 |
+
"Hierarchical RL",
|
| 25 |
+
"Exploration",
|
| 26 |
+
"Decision and Control",
|
| 27 |
+
"Planning",
|
| 28 |
+
"Planning Algorithms",
|
| 29 |
+
"Navigation"
|
| 30 |
+
],
|
| 31 |
+
"Transfer and Adaptation": [
|
| 32 |
+
"Transfer Learning",
|
| 33 |
+
"Meta-Learning",
|
| 34 |
+
"Multitask Learning",
|
| 35 |
+
"Lifelong Learning",
|
| 36 |
+
"Continual Learning",
|
| 37 |
+
"Few-Shot Learning",
|
| 38 |
+
"Domain Adaptation",
|
| 39 |
+
"Model Mixing Methods"
|
| 40 |
+
],
|
| 41 |
+
"Representation Learning": [
|
| 42 |
+
"Representation Learning",
|
| 43 |
+
"Embedding Approaches",
|
| 44 |
+
"Metric Learning",
|
| 45 |
+
"Similarity and Distance Learning",
|
| 46 |
+
"Nonlinear Dimensionality Reduction and Manifold Learning",
|
| 47 |
+
"Components Analysis (CCA, ICA, LDA, PCA)",
|
| 48 |
+
"Sparse Coding and Dimensionality Expansion",
|
| 49 |
+
"Sparsity and Compressed Sensing"
|
| 50 |
+
],
|
| 51 |
+
"Model Alignment and Adaptation": [
|
| 52 |
+
"Fine-Tuning",
|
| 53 |
+
"Instruction-Tuning",
|
| 54 |
+
"Prompt Tuning",
|
| 55 |
+
"In-Context Learning",
|
| 56 |
+
"Value Alignment and Human Feedback",
|
| 57 |
+
"Alignment Methods"
|
| 58 |
+
],
|
| 59 |
+
"Adversarial and Robust Learning": [
|
| 60 |
+
"Adversarial Learning",
|
| 61 |
+
"AI Red Teaming and Adversarial Testing",
|
| 62 |
+
"Adversarial Attacks and Defenses",
|
| 63 |
+
"Threat Models and Mitigations"
|
| 64 |
+
],
|
| 65 |
+
"Active and Interactive Learning": [
|
| 66 |
+
"Active Learning",
|
| 67 |
+
"Online Learning",
|
| 68 |
+
"Interactive Learning",
|
| 69 |
+
"Bandit Algorithms",
|
| 70 |
+
"Dialog- or Communication-Based Learning"
|
| 71 |
+
],
|
| 72 |
+
"Ensemble and Boosting Methods": [
|
| 73 |
+
"Boosting and Ensemble Methods"
|
| 74 |
+
],
|
| 75 |
+
"Specialized Learning Paradigms": [
|
| 76 |
+
"AutoML",
|
| 77 |
+
"Multimodal Learning",
|
| 78 |
+
"Relational Learning",
|
| 79 |
+
"Collaborative Filtering",
|
| 80 |
+
"Adaptive Data Analysis",
|
| 81 |
+
"Communication- or Memory-Bounded Learning",
|
| 82 |
+
"Large Scale Learning",
|
| 83 |
+
"Program Induction",
|
| 84 |
+
"Learning and Unlearning"
|
| 85 |
+
],
|
| 86 |
+
"Data Handling": [
|
| 87 |
+
"Missing Data",
|
| 88 |
+
"Data Compression",
|
| 89 |
+
"Model Selection and Structure Learning"
|
| 90 |
+
]
|
| 91 |
+
},
|
| 92 |
+
"Deep Learning": {
|
| 93 |
+
"Architectures": [
|
| 94 |
+
"CNN Architectures",
|
| 95 |
+
"Recurrent Networks",
|
| 96 |
+
"Attention Models",
|
| 97 |
+
"Transformer Architectures",
|
| 98 |
+
"Memory-Augmented Neural Networks",
|
| 99 |
+
"Interaction-Based Deep Networks",
|
| 100 |
+
"Biologically Plausible Deep Networks"
|
| 101 |
+
],
|
| 102 |
+
"Model Types": [
|
| 103 |
+
"Deep Autoencoders",
|
| 104 |
+
"Adversarial Networks",
|
| 105 |
+
"Generative Models",
|
| 106 |
+
"Predictive Models",
|
| 107 |
+
"Supervised Deep Networks"
|
| 108 |
+
],
|
| 109 |
+
"Training and Optimization": [
|
| 110 |
+
"Efficient Training Methods",
|
| 111 |
+
"Distributed Training and Inference",
|
| 112 |
+
"Training Dynamics",
|
| 113 |
+
"Optimization Instability",
|
| 114 |
+
"Efficient Inference Methods",
|
| 115 |
+
"Optimization for Deep Networks"
|
| 116 |
+
],
|
| 117 |
+
"Model Efficiency": [
|
| 118 |
+
"Model Distillation",
|
| 119 |
+
"Model Compression",
|
| 120 |
+
"Quantization",
|
| 121 |
+
"Sample Efficient Methods",
|
| 122 |
+
"Memory Efficient Methods"
|
| 123 |
+
],
|
| 124 |
+
"Inference and Decoding": [
|
| 125 |
+
"Decoding Algorithms",
|
| 126 |
+
"Reasoning Algorithms",
|
| 127 |
+
"Search Algorithms"
|
| 128 |
+
],
|
| 129 |
+
"Analysis and Interpretation": [
|
| 130 |
+
"Analysis and Understanding of Deep Networks",
|
| 131 |
+
"Visualization or Exposition Techniques for Deep Networks",
|
| 132 |
+
"Interpretability and Explainability",
|
| 133 |
+
"Demystification",
|
| 134 |
+
"Scaling Laws",
|
| 135 |
+
"Emergent Capabilities",
|
| 136 |
+
"Grokking"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
"Probabilistic Methods": {
|
| 140 |
+
"Bayesian Methods": [
|
| 141 |
+
"Bayesian Theory",
|
| 142 |
+
"Bayesian Nonparametrics",
|
| 143 |
+
"Gaussian Processes"
|
| 144 |
+
],
|
| 145 |
+
"Inference": [
|
| 146 |
+
"Variational Inference",
|
| 147 |
+
"MCMC",
|
| 148 |
+
"Belief Propagation",
|
| 149 |
+
"Distributed Inference",
|
| 150 |
+
"Uncertainty Estimation"
|
| 151 |
+
],
|
| 152 |
+
"Models": [
|
| 153 |
+
"Graphical Models",
|
| 154 |
+
"Hierarchical Models",
|
| 155 |
+
"Latent Variable Models",
|
| 156 |
+
"Topic Models",
|
| 157 |
+
"Causal Inference",
|
| 158 |
+
"Causal Reasoning"
|
| 159 |
+
],
|
| 160 |
+
"Probabilistic Programming": [
|
| 161 |
+
"Probabilistic Programming"
|
| 162 |
+
]
|
| 163 |
+
},
|
| 164 |
+
"Optimization": {
|
| 165 |
+
"Continuous Optimization": [
|
| 166 |
+
"Convex Optimization",
|
| 167 |
+
"Non-Convex Optimization",
|
| 168 |
+
"Stochastic Optimization",
|
| 169 |
+
"Stochastic Methods"
|
| 170 |
+
],
|
| 171 |
+
"Discrete Optimization": [
|
| 172 |
+
"Discrete Optimization",
|
| 173 |
+
"Submodular Optimization"
|
| 174 |
+
],
|
| 175 |
+
"Evolutionary Methods": [
|
| 176 |
+
"Evolutionary Computation"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
"Theory": {
|
| 180 |
+
"Learning Theory": [
|
| 181 |
+
"Computational Learning Theory",
|
| 182 |
+
"Statistical Learning Theory",
|
| 183 |
+
"Models of Learning and Generalization",
|
| 184 |
+
"Hardness of Learning and Approximations",
|
| 185 |
+
"Regularization",
|
| 186 |
+
"Fundamental Limitations of Learning",
|
| 187 |
+
"Complexity of Learning Systems"
|
| 188 |
+
],
|
| 189 |
+
"Statistical Theory": [
|
| 190 |
+
"Frequentist Statistics",
|
| 191 |
+
"High-Dimensional Inference",
|
| 192 |
+
"Large Deviations and Asymptotic Analysis"
|
| 193 |
+
],
|
| 194 |
+
"Mathematical Foundations": [
|
| 195 |
+
"Information Theory",
|
| 196 |
+
"Control Theory",
|
| 197 |
+
"Game Theory and Computational Economics",
|
| 198 |
+
"Statistical Physics of Learning",
|
| 199 |
+
"Spaces of Functions and Kernels",
|
| 200 |
+
"Spectral Methods",
|
| 201 |
+
"Kernel Methods",
|
| 202 |
+
"Large Margin Methods"
|
| 203 |
+
],
|
| 204 |
+
"Algorithmic Theory": [
|
| 205 |
+
"Data-driven Algorithm Design"
|
| 206 |
+
]
|
| 207 |
+
},
|
| 208 |
+
"Knowledge and Reasoning": {
|
| 209 |
+
"Knowledge Representation": [
|
| 210 |
+
"Knowledge Models",
|
| 211 |
+
"World Models",
|
| 212 |
+
"Factuality"
|
| 213 |
+
],
|
| 214 |
+
"Reasoning": [
|
| 215 |
+
"Commonsense Reasoning",
|
| 216 |
+
"Theory of Mind",
|
| 217 |
+
"Social Norms Understanding",
|
| 218 |
+
"Pragmatics"
|
| 219 |
+
],
|
| 220 |
+
"Knowledge Integration": [
|
| 221 |
+
"Retrieval-Augmented Models",
|
| 222 |
+
"Tool Use and API Integration",
|
| 223 |
+
"Neurosymbolic and Hybrid AI Systems (Physics-Informed, Logic, Formal Reasoning)"
|
| 224 |
+
]
|
| 225 |
+
},
|
| 226 |
+
"Evaluation and Benchmarking": {
|
| 227 |
+
"Evaluation Methods": [
|
| 228 |
+
"Benchmarks",
|
| 229 |
+
"Evaluation Protocols and Metrics",
|
| 230 |
+
"Human Evaluation",
|
| 231 |
+
"Machine Evaluation",
|
| 232 |
+
"Scalable Oversight"
|
| 233 |
+
],
|
| 234 |
+
"Simulation and Testing": [
|
| 235 |
+
"Simulation Environments",
|
| 236 |
+
"Assurance Testing and Deployment Policies"
|
| 237 |
+
]
|
| 238 |
+
},
|
| 239 |
+
"Applications": {
|
| 240 |
+
"Vision": [
|
| 241 |
+
"Computer Vision",
|
| 242 |
+
"Object Detection",
|
| 243 |
+
"Object Recognition",
|
| 244 |
+
"Image Segmentation",
|
| 245 |
+
"Body Pose, Face, and Gesture Analysis",
|
| 246 |
+
"Tracking and Motion in Video",
|
| 247 |
+
"Video Analysis",
|
| 248 |
+
"Visual Question Answering",
|
| 249 |
+
"Visual Scene Analysis and Interpretation",
|
| 250 |
+
"Computational Photography",
|
| 251 |
+
"Denoising"
|
| 252 |
+
],
|
| 253 |
+
"Language": [
|
| 254 |
+
"Natural Language Processing",
|
| 255 |
+
"Language Representation Learning",
|
| 256 |
+
"Dialog Systems",
|
| 257 |
+
"Conversational AI"
|
| 258 |
+
],
|
| 259 |
+
"Audio and Speech": [
|
| 260 |
+
"Audio and Speech Processing",
|
| 261 |
+
"Speech Recognition",
|
| 262 |
+
"Music Modeling and Analysis"
|
| 263 |
+
],
|
| 264 |
+
"Multimodal": [
|
| 265 |
+
"Multimodal Models",
|
| 266 |
+
"Vision-Language Models",
|
| 267 |
+
"Audio-Visual Learning",
|
| 268 |
+
"Cross-Modal Learning"
|
| 269 |
+
],
|
| 270 |
+
"Robotics and Embodied AI": [
|
| 271 |
+
"Robotics",
|
| 272 |
+
"Motor Control",
|
| 273 |
+
"Autonomous Systems",
|
| 274 |
+
"Perception and Action",
|
| 275 |
+
"Embodied AI"
|
| 276 |
+
],
|
| 277 |
+
"Code and Software": [
|
| 278 |
+
"Program Understanding and Generation",
|
| 279 |
+
"Code Generation",
|
| 280 |
+
"Software Engineering with AI",
|
| 281 |
+
"Automated Reasoning and Formal Methods"
|
| 282 |
+
],
|
| 283 |
+
"Science and Engineering": [
|
| 284 |
+
"Computational Biology and Bioinformatics",
|
| 285 |
+
"Physical Sciences (Physics, Chemistry, Biology)",
|
| 286 |
+
"Scientific Discovery",
|
| 287 |
+
"Quantum Learning"
|
| 288 |
+
],
|
| 289 |
+
"Mathematics": [
|
| 290 |
+
"Mathematical Reasoning",
|
| 291 |
+
"Theorem Proving",
|
| 292 |
+
"Symbolic Mathematics"
|
| 293 |
+
],
|
| 294 |
+
"Health and Medicine": [
|
| 295 |
+
"Medical Applications",
|
| 296 |
+
"Clinical Decision Support",
|
| 297 |
+
"Drug Discovery",
|
| 298 |
+
"Healthcare AI"
|
| 299 |
+
],
|
| 300 |
+
"Education": [
|
| 301 |
+
"Educational Applications",
|
| 302 |
+
"Intelligent Tutoring Systems",
|
| 303 |
+
"Educational Technology"
|
| 304 |
+
],
|
| 305 |
+
"Social and Web": [
|
| 306 |
+
"Computational Social Science",
|
| 307 |
+
"Recommender Systems",
|
| 308 |
+
"Information Retrieval",
|
| 309 |
+
"Web Applications and Internet Data",
|
| 310 |
+
"Network Analysis"
|
| 311 |
+
],
|
| 312 |
+
"Interactive Systems": [
|
| 313 |
+
"Game Playing",
|
| 314 |
+
"Multi-Agent Systems",
|
| 315 |
+
"Human-AI Interaction"
|
| 316 |
+
],
|
| 317 |
+
"Data and Signals": [
|
| 318 |
+
"Signal Processing",
|
| 319 |
+
"Time Series Analysis",
|
| 320 |
+
"Matrix and Tensor Factorization",
|
| 321 |
+
"Database Applications"
|
| 322 |
+
],
|
| 323 |
+
"Finance and Economics": [
|
| 324 |
+
"Quantitative Finance and Econometrics",
|
| 325 |
+
"Economic Modeling"
|
| 326 |
+
],
|
| 327 |
+
"Activity and Recognition": [
|
| 328 |
+
"Activity and Event Recognition"
|
| 329 |
+
],
|
| 330 |
+
"Infrastructure": [
|
| 331 |
+
"Hardware and Systems",
|
| 332 |
+
"Sustainability"
|
| 333 |
+
]
|
| 334 |
+
},
|
| 335 |
+
"Data": {
|
| 336 |
+
"Data Collection and Curation": [
|
| 337 |
+
"Pre-Training Data",
|
| 338 |
+
"Data Curation and Analysis",
|
| 339 |
+
"Manual and Algorithmic Data Processing",
|
| 340 |
+
"Responsible Data Management"
|
| 341 |
+
],
|
| 342 |
+
"Data Generation": [
|
| 343 |
+
"Synthetic Data Generation",
|
| 344 |
+
"Data Augmentation"
|
| 345 |
+
],
|
| 346 |
+
"Data Resources": [
|
| 347 |
+
"Benchmarks",
|
| 348 |
+
"Data Sets or Data Repositories",
|
| 349 |
+
"Datasets and Benchmarks"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
"Infrastructure and Tools": {
|
| 353 |
+
"Software and Libraries": [
|
| 354 |
+
"Software Toolkits",
|
| 355 |
+
"Infrastructure, Software Libraries",
|
| 356 |
+
"Virtual Environments"
|
| 357 |
+
],
|
| 358 |
+
"Hardware and Systems": [
|
| 359 |
+
"Hardware Setups for Large-Scale Training",
|
| 360 |
+
"Distributed Systems",
|
| 361 |
+
"Specialized Hardware"
|
| 362 |
+
]
|
| 363 |
+
},
|
| 364 |
+
"Neuroscience and Cognitive Science": {
|
| 365 |
+
"Brain Studies": [
|
| 366 |
+
"Brain Imaging",
|
| 367 |
+
"Brain Mapping",
|
| 368 |
+
"Brain Segmentation",
|
| 369 |
+
"Connectomics",
|
| 370 |
+
"Neural Coding",
|
| 371 |
+
"Spike Train Generation",
|
| 372 |
+
"Synaptic Modulation"
|
| 373 |
+
],
|
| 374 |
+
"Cognitive Functions": [
|
| 375 |
+
"Cognitive Science",
|
| 376 |
+
"Memory",
|
| 377 |
+
"Perception",
|
| 378 |
+
"Visual Perception",
|
| 379 |
+
"Auditory Perception",
|
| 380 |
+
"Problem Solving",
|
| 381 |
+
"Reasoning",
|
| 382 |
+
"Linguistics",
|
| 383 |
+
"Psycholinguistics"
|
| 384 |
+
],
|
| 385 |
+
"Learning and Adaptation": [
|
| 386 |
+
"Human or Animal Learning",
|
| 387 |
+
"Plasticity and Adaptation",
|
| 388 |
+
"Neuropsychology"
|
| 389 |
+
],
|
| 390 |
+
"Brain-Computer Interfaces": [
|
| 391 |
+
"Brain-Computer Interfaces and Neural Prostheses"
|
| 392 |
+
],
|
| 393 |
+
"Philosophy": [
|
| 394 |
+
"Philosophical Perspectives on AI",
|
| 395 |
+
"Philosophy of Mind and Language",
|
| 396 |
+
"Cognitive Philosophy"
|
| 397 |
+
]
|
| 398 |
+
},
|
| 399 |
+
"Structured Data": {
|
| 400 |
+
"Graphs and Geometry": [
|
| 401 |
+
"Learning on Graphs",
|
| 402 |
+
"Geometric Deep Learning",
|
| 403 |
+
"Topology and Manifold Learning"
|
| 404 |
+
]
|
| 405 |
+
},
|
| 406 |
+
"Societal Considerations": {
|
| 407 |
+
"Fairness and Equity": [
|
| 408 |
+
"Algorithmic Fairness and Bias",
|
| 409 |
+
"Bias in AI Systems",
|
| 410 |
+
"Equity",
|
| 411 |
+
"Algorithmic Recourse",
|
| 412 |
+
"Justice, Power, and Inequality"
|
| 413 |
+
],
|
| 414 |
+
"Safety and Security": [
|
| 415 |
+
"AI Safety",
|
| 416 |
+
"Security",
|
| 417 |
+
"Adversarial Robustness",
|
| 418 |
+
"Risks, Harms, and Failures",
|
| 419 |
+
"Safe and Trustworthy AI"
|
| 420 |
+
],
|
| 421 |
+
"Privacy": [
|
| 422 |
+
"Privacy, Anonymity, and Security",
|
| 423 |
+
"Data Protection",
|
| 424 |
+
"Privacy-Preserving Methods"
|
| 425 |
+
],
|
| 426 |
+
"Misinformation and Content": [
|
| 427 |
+
"Misinformation and Disinformation",
|
| 428 |
+
"Content Moderation",
|
| 429 |
+
"Information Integrity"
|
| 430 |
+
],
|
| 431 |
+
"Transparency and Accountability": [
|
| 432 |
+
"Fairness, Accountability, and Transparency",
|
| 433 |
+
"Transparency Documentation",
|
| 434 |
+
"Audits of AI Systems",
|
| 435 |
+
"Explainability for Accountability"
|
| 436 |
+
],
|
| 437 |
+
"Human Factors": [
|
| 438 |
+
"Human-AI Interaction",
|
| 439 |
+
"Trust in AI Systems",
|
| 440 |
+
"Human-Centered AI",
|
| 441 |
+
"Participatory and Deliberative Methods"
|
| 442 |
+
],
|
| 443 |
+
"Design and Development": [
|
| 444 |
+
"Sociotechnical Design and Development",
|
| 445 |
+
"Value-Sensitive Design",
|
| 446 |
+
"Diversity in Design and Development",
|
| 447 |
+
"Responsible Development Practices"
|
| 448 |
+
],
|
| 449 |
+
"Societal Impacts": [
|
| 450 |
+
"Cultural Impacts",
|
| 451 |
+
"Environmental Impacts and Climate Change",
|
| 452 |
+
"Labor and Economic Impacts",
|
| 453 |
+
"Job Displacement and Automation",
|
| 454 |
+
"Misuse of AI Systems"
|
| 455 |
+
],
|
| 456 |
+
"Governance and Policy": [
|
| 457 |
+
"Regulation and Governance",
|
| 458 |
+
"Legal Topics in AI",
|
| 459 |
+
"Policy and Law",
|
| 460 |
+
"Licensing and Liability",
|
| 461 |
+
"Organizational Factors"
|
| 462 |
+
],
|
| 463 |
+
"Critical Perspectives": [
|
| 464 |
+
"Critical and Sociotechnical Foresight",
|
| 465 |
+
"Historical and Humanistic Perspectives",
|
| 466 |
+
"Social Scientific Perspectives",
|
| 467 |
+
"Resistance and Contestation",
|
| 468 |
+
"Social Epistemology"
|
| 469 |
+
],
|
| 470 |
+
"Values and Ethics": [
|
| 471 |
+
"Moral and Political Philosophy of AI",
|
| 472 |
+
"Ethics in AI",
|
| 473 |
+
"Values in Technology Design"
|
| 474 |
+
],
|
| 475 |
+
"Cross-Cultural and Multilingual": [
|
| 476 |
+
"Multi-Linguality",
|
| 477 |
+
"Low-Resource Languages",
|
| 478 |
+
"Vernacular Languages",
|
| 479 |
+
"Multiculturalism",
|
| 480 |
+
"Value Pluralism",
|
| 481 |
+
"Cross-Cultural AI"
|
| 482 |
+
],
|
| 483 |
+
"Interdisciplinary Approaches": [
|
| 484 |
+
"Interdisciplinarity and Cross-Functional Teams",
|
| 485 |
+
"Industry, Government, and Civil Society Collaboration"
|
| 486 |
+
]
|
| 487 |
+
}
|
| 488 |
+
}
|