Spaces:
Sleeping
Sleeping
Dynamic confidence thresholds: percentile-based dropdown adapts to any model
Browse files- Dockerfile +22 -13
- app.py +35 -12
- requirements.in +10 -0
- requirements.txt +203 -11
- templates/base.html +6 -1
- templates/index.html +5 -3
- templates/partials/paper_card.html +30 -8
Dockerfile
CHANGED
|
@@ -1,26 +1,35 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
-
# Enable HF transfer for faster downloads
|
| 6 |
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
| 7 |
|
| 8 |
-
# Install
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
COPY requirements.txt .
|
| 13 |
-
RUN uv pip install --system --no-cache -r requirements.txt
|
| 14 |
-
|
| 15 |
-
# Copy application
|
| 16 |
-
COPY . .
|
| 17 |
-
|
| 18 |
-
# Create non-root user for security
|
| 19 |
RUN useradd -m -u 1000 user
|
| 20 |
USER user
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# HF Spaces expects port 7860
|
| 23 |
-
ENV PORT=7860
|
| 24 |
EXPOSE 7860
|
| 25 |
|
| 26 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
+
# Install uv from official image (fast, no pip bootstrap needed)
|
| 4 |
+
COPY --from=ghcr.io/astral-sh/uv:0.9.30 /uv /bin/uv
|
| 5 |
|
|
|
|
| 6 |
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
| 7 |
|
| 8 |
+
# Install dependencies from requirements.in - resolved on the target platform,
|
| 9 |
+
# no cross-compilation flags needed. CPU-only torch via extra index.
|
| 10 |
+
COPY requirements.in /tmp/requirements.in
|
| 11 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 12 |
+
uv pip install --system \
|
| 13 |
+
--extra-index-url https://download.pytorch.org/whl/cpu \
|
| 14 |
+
-r /tmp/requirements.in
|
| 15 |
|
| 16 |
+
# Create non-root user (HF Spaces runs as user ID 1000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
RUN useradd -m -u 1000 user
|
| 18 |
USER user
|
| 19 |
|
| 20 |
+
# Set home and path
|
| 21 |
+
ENV HOME=/home/user \
|
| 22 |
+
PATH=/home/user/.local/bin:$PATH
|
| 23 |
+
|
| 24 |
+
WORKDIR $HOME/app
|
| 25 |
+
|
| 26 |
+
# Copy application (as user)
|
| 27 |
+
COPY --chown=user . .
|
| 28 |
+
|
| 29 |
+
# Create data directory for dataset download
|
| 30 |
+
RUN mkdir -p $HOME/app/data
|
| 31 |
+
|
| 32 |
# HF Spaces expects port 7860
|
|
|
|
| 33 |
EXPOSE 7860
|
| 34 |
|
| 35 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
CHANGED
|
@@ -3,6 +3,7 @@ FastAPI + HTMX app for browsing arxiv papers with new ML datasets.
|
|
| 3 |
Downloads Lance dataset from HuggingFace Hub and loads locally.
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 6 |
import re
|
| 7 |
from datetime import date, timedelta
|
| 8 |
from functools import lru_cache
|
|
@@ -46,9 +47,18 @@ def highlight_search(text: str, search: str) -> Markup:
|
|
| 46 |
return Markup(highlighted)
|
| 47 |
|
| 48 |
|
| 49 |
-
# Register custom
|
| 50 |
templates.env.filters["highlight"] = highlight_search
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# Dataset config
|
| 53 |
DATASET_REPO = "librarian-bots/arxiv-cs-papers-lance"
|
| 54 |
|
|
@@ -65,8 +75,10 @@ _model_cache: dict = {}
|
|
| 65 |
def get_lance_dataset():
|
| 66 |
"""Download dataset from HF Hub (cached) and return Lance connection."""
|
| 67 |
if "ds" not in _lance_cache:
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
print(f"Downloading dataset from {DATASET_REPO} to {local_dir}...")
|
| 71 |
snapshot_download(
|
| 72 |
DATASET_REPO,
|
|
@@ -126,18 +138,27 @@ def get_categories() -> list[str]:
|
|
| 126 |
|
| 127 |
|
| 128 |
@lru_cache(maxsize=1)
|
| 129 |
-
def
|
| 130 |
-
"""
|
| 131 |
|
| 132 |
-
|
| 133 |
"""
|
| 134 |
df = get_dataframe()
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
|
| 143 |
@lru_cache(maxsize=1)
|
|
@@ -359,6 +380,7 @@ async def home(
|
|
| 359 |
df = get_dataframe()
|
| 360 |
categories = get_categories()
|
| 361 |
histogram_data = get_histogram_data()
|
|
|
|
| 362 |
|
| 363 |
# Get stats
|
| 364 |
total_papers = len(df)
|
|
@@ -372,6 +394,7 @@ async def home(
|
|
| 372 |
"total_papers": total_papers,
|
| 373 |
"new_dataset_count": new_dataset_count,
|
| 374 |
"histogram_data": histogram_data,
|
|
|
|
| 375 |
# Pass filter state for URL persistence
|
| 376 |
"search": search or "",
|
| 377 |
"search_type": search_type,
|
|
|
|
| 3 |
Downloads Lance dataset from HuggingFace Hub and loads locally.
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
import math
|
| 7 |
import re
|
| 8 |
from datetime import date, timedelta
|
| 9 |
from functools import lru_cache
|
|
|
|
| 47 |
return Markup(highlighted)
|
| 48 |
|
| 49 |
|
| 50 |
+
# Register custom filters
|
| 51 |
templates.env.filters["highlight"] = highlight_search
|
| 52 |
|
| 53 |
+
|
| 54 |
+
def confidence_fmt(score):
|
| 55 |
+
"""Format confidence as percentage, truncating to 1 decimal to avoid rounding 99.95->100."""
|
| 56 |
+
pct = math.floor(score * 1000) / 10
|
| 57 |
+
return f"{pct:.1f}"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
templates.env.filters["confidence"] = confidence_fmt
|
| 61 |
+
|
| 62 |
# Dataset config
|
| 63 |
DATASET_REPO = "librarian-bots/arxiv-cs-papers-lance"
|
| 64 |
|
|
|
|
| 75 |
def get_lance_dataset():
|
| 76 |
"""Download dataset from HF Hub (cached) and return Lance connection."""
|
| 77 |
if "ds" not in _lance_cache:
|
| 78 |
+
import os
|
| 79 |
+
# Use HF_HOME or /tmp for Spaces compatibility (./data not writable on Spaces)
|
| 80 |
+
cache_base = os.environ.get("HF_HOME", "/tmp/hf_cache")
|
| 81 |
+
local_dir = f"{cache_base}/arxiv-lance"
|
| 82 |
print(f"Downloading dataset from {DATASET_REPO} to {local_dir}...")
|
| 83 |
snapshot_download(
|
| 84 |
DATASET_REPO,
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
@lru_cache(maxsize=1)
|
| 141 |
+
def get_confidence_options() -> list[dict]:
|
| 142 |
+
"""Compute confidence filter options from actual data distribution.
|
| 143 |
|
| 144 |
+
Uses percentiles so the UI adapts to any model's score range.
|
| 145 |
"""
|
| 146 |
df = get_dataframe()
|
| 147 |
+
scores = df.filter(pl.col("is_new_dataset"))["confidence_score"]
|
| 148 |
+
|
| 149 |
+
options = [{"value": "0.5", "label": "All new datasets", "count": len(scores)}]
|
| 150 |
+
|
| 151 |
+
for pct_label, quantile in [("Top 75%", 0.25), ("Top 50%", 0.50), ("Top 25%", 0.75)]:
|
| 152 |
+
threshold = float(scores.quantile(quantile))
|
| 153 |
+
count = scores.filter(scores >= threshold).len()
|
| 154 |
+
options.append({
|
| 155 |
+
"value": f"{threshold:.2f}",
|
| 156 |
+
"label": pct_label,
|
| 157 |
+
"count": int(count),
|
| 158 |
+
})
|
| 159 |
+
|
| 160 |
+
options.append({"value": "0", "label": "All papers", "count": len(df)})
|
| 161 |
+
return options
|
| 162 |
|
| 163 |
|
| 164 |
@lru_cache(maxsize=1)
|
|
|
|
| 380 |
df = get_dataframe()
|
| 381 |
categories = get_categories()
|
| 382 |
histogram_data = get_histogram_data()
|
| 383 |
+
confidence_options = get_confidence_options()
|
| 384 |
|
| 385 |
# Get stats
|
| 386 |
total_papers = len(df)
|
|
|
|
| 394 |
"total_papers": total_papers,
|
| 395 |
"new_dataset_count": new_dataset_count,
|
| 396 |
"histogram_data": histogram_data,
|
| 397 |
+
"confidence_options": confidence_options,
|
| 398 |
# Pass filter state for URL persistence
|
| 399 |
"search": search or "",
|
| 400 |
"search_type": search_type,
|
requirements.in
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
pylance
|
| 3 |
+
polars
|
| 4 |
+
cachetools
|
| 5 |
+
python-dotenv
|
| 6 |
+
Jinja2
|
| 7 |
+
markupsafe
|
| 8 |
+
huggingface-hub
|
| 9 |
+
sentence-transformers
|
| 10 |
+
uvicorn
|
requirements.txt
CHANGED
|
@@ -1,11 +1,203 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv pip compile requirements.in -o requirements.txt --python-platform linux --python-version 3.11
|
| 3 |
+
annotated-doc==0.0.4
|
| 4 |
+
# via fastapi
|
| 5 |
+
annotated-types==0.7.0
|
| 6 |
+
# via pydantic
|
| 7 |
+
anyio==4.12.1
|
| 8 |
+
# via
|
| 9 |
+
# httpx
|
| 10 |
+
# starlette
|
| 11 |
+
cachetools==7.0.0
|
| 12 |
+
# via -r requirements.in
|
| 13 |
+
certifi==2026.1.4
|
| 14 |
+
# via
|
| 15 |
+
# httpcore
|
| 16 |
+
# httpx
|
| 17 |
+
click==8.3.1
|
| 18 |
+
# via
|
| 19 |
+
# typer-slim
|
| 20 |
+
# uvicorn
|
| 21 |
+
cuda-bindings==12.9.4
|
| 22 |
+
# via torch
|
| 23 |
+
cuda-pathfinder==1.3.3
|
| 24 |
+
# via cuda-bindings
|
| 25 |
+
fastapi==0.128.1
|
| 26 |
+
# via -r requirements.in
|
| 27 |
+
filelock==3.20.3
|
| 28 |
+
# via
|
| 29 |
+
# huggingface-hub
|
| 30 |
+
# torch
|
| 31 |
+
# transformers
|
| 32 |
+
fsspec==2026.1.0
|
| 33 |
+
# via
|
| 34 |
+
# huggingface-hub
|
| 35 |
+
# torch
|
| 36 |
+
h11==0.16.0
|
| 37 |
+
# via
|
| 38 |
+
# httpcore
|
| 39 |
+
# uvicorn
|
| 40 |
+
hf-xet==1.2.0
|
| 41 |
+
# via huggingface-hub
|
| 42 |
+
httpcore==1.0.9
|
| 43 |
+
# via httpx
|
| 44 |
+
httpx==0.28.1
|
| 45 |
+
# via huggingface-hub
|
| 46 |
+
huggingface-hub==1.4.0
|
| 47 |
+
# via
|
| 48 |
+
# -r requirements.in
|
| 49 |
+
# sentence-transformers
|
| 50 |
+
# tokenizers
|
| 51 |
+
# transformers
|
| 52 |
+
idna==3.11
|
| 53 |
+
# via
|
| 54 |
+
# anyio
|
| 55 |
+
# httpx
|
| 56 |
+
jinja2==3.1.6
|
| 57 |
+
# via
|
| 58 |
+
# -r requirements.in
|
| 59 |
+
# torch
|
| 60 |
+
joblib==1.5.3
|
| 61 |
+
# via scikit-learn
|
| 62 |
+
lance-namespace==0.4.5
|
| 63 |
+
# via pylance
|
| 64 |
+
lance-namespace-urllib3-client==0.4.5
|
| 65 |
+
# via lance-namespace
|
| 66 |
+
markupsafe==3.0.3
|
| 67 |
+
# via
|
| 68 |
+
# -r requirements.in
|
| 69 |
+
# jinja2
|
| 70 |
+
mpmath==1.3.0
|
| 71 |
+
# via sympy
|
| 72 |
+
networkx==3.6.1
|
| 73 |
+
# via torch
|
| 74 |
+
numpy==2.4.2
|
| 75 |
+
# via
|
| 76 |
+
# pylance
|
| 77 |
+
# scikit-learn
|
| 78 |
+
# scipy
|
| 79 |
+
# sentence-transformers
|
| 80 |
+
# transformers
|
| 81 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 82 |
+
# via
|
| 83 |
+
# nvidia-cudnn-cu12
|
| 84 |
+
# nvidia-cusolver-cu12
|
| 85 |
+
# torch
|
| 86 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 87 |
+
# via torch
|
| 88 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 89 |
+
# via torch
|
| 90 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 91 |
+
# via torch
|
| 92 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 93 |
+
# via torch
|
| 94 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 95 |
+
# via torch
|
| 96 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 97 |
+
# via torch
|
| 98 |
+
nvidia-curand-cu12==10.3.9.90
|
| 99 |
+
# via torch
|
| 100 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 101 |
+
# via torch
|
| 102 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 103 |
+
# via
|
| 104 |
+
# nvidia-cusolver-cu12
|
| 105 |
+
# torch
|
| 106 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 107 |
+
# via torch
|
| 108 |
+
nvidia-nccl-cu12==2.27.5
|
| 109 |
+
# via torch
|
| 110 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 111 |
+
# via
|
| 112 |
+
# nvidia-cufft-cu12
|
| 113 |
+
# nvidia-cusolver-cu12
|
| 114 |
+
# nvidia-cusparse-cu12
|
| 115 |
+
# torch
|
| 116 |
+
nvidia-nvshmem-cu12==3.4.5
|
| 117 |
+
# via torch
|
| 118 |
+
nvidia-nvtx-cu12==12.8.90
|
| 119 |
+
# via torch
|
| 120 |
+
packaging==26.0
|
| 121 |
+
# via
|
| 122 |
+
# huggingface-hub
|
| 123 |
+
# transformers
|
| 124 |
+
polars==1.38.0
|
| 125 |
+
# via -r requirements.in
|
| 126 |
+
polars-runtime-32==1.38.0
|
| 127 |
+
# via polars
|
| 128 |
+
pyarrow==23.0.0
|
| 129 |
+
# via pylance
|
| 130 |
+
pydantic==2.12.5
|
| 131 |
+
# via
|
| 132 |
+
# fastapi
|
| 133 |
+
# lance-namespace-urllib3-client
|
| 134 |
+
pydantic-core==2.41.5
|
| 135 |
+
# via pydantic
|
| 136 |
+
pylance==1.0.4
|
| 137 |
+
# via -r requirements.in
|
| 138 |
+
python-dateutil==2.9.0.post0
|
| 139 |
+
# via lance-namespace-urllib3-client
|
| 140 |
+
python-dotenv==1.2.1
|
| 141 |
+
# via -r requirements.in
|
| 142 |
+
pyyaml==6.0.3
|
| 143 |
+
# via
|
| 144 |
+
# huggingface-hub
|
| 145 |
+
# transformers
|
| 146 |
+
regex==2026.1.15
|
| 147 |
+
# via transformers
|
| 148 |
+
safetensors==0.7.0
|
| 149 |
+
# via transformers
|
| 150 |
+
scikit-learn==1.8.0
|
| 151 |
+
# via sentence-transformers
|
| 152 |
+
scipy==1.17.0
|
| 153 |
+
# via
|
| 154 |
+
# scikit-learn
|
| 155 |
+
# sentence-transformers
|
| 156 |
+
sentence-transformers==5.2.2
|
| 157 |
+
# via -r requirements.in
|
| 158 |
+
shellingham==1.5.4
|
| 159 |
+
# via huggingface-hub
|
| 160 |
+
six==1.17.0
|
| 161 |
+
# via python-dateutil
|
| 162 |
+
starlette==0.50.0
|
| 163 |
+
# via fastapi
|
| 164 |
+
sympy==1.14.0
|
| 165 |
+
# via torch
|
| 166 |
+
threadpoolctl==3.6.0
|
| 167 |
+
# via scikit-learn
|
| 168 |
+
tokenizers==0.22.2
|
| 169 |
+
# via transformers
|
| 170 |
+
torch==2.10.0
|
| 171 |
+
# via sentence-transformers
|
| 172 |
+
tqdm==4.67.3
|
| 173 |
+
# via
|
| 174 |
+
# huggingface-hub
|
| 175 |
+
# sentence-transformers
|
| 176 |
+
# transformers
|
| 177 |
+
transformers==5.0.0
|
| 178 |
+
# via sentence-transformers
|
| 179 |
+
triton==3.6.0
|
| 180 |
+
# via torch
|
| 181 |
+
typer-slim==0.21.1
|
| 182 |
+
# via
|
| 183 |
+
# huggingface-hub
|
| 184 |
+
# transformers
|
| 185 |
+
typing-extensions==4.15.0
|
| 186 |
+
# via
|
| 187 |
+
# anyio
|
| 188 |
+
# fastapi
|
| 189 |
+
# huggingface-hub
|
| 190 |
+
# lance-namespace-urllib3-client
|
| 191 |
+
# pydantic
|
| 192 |
+
# pydantic-core
|
| 193 |
+
# sentence-transformers
|
| 194 |
+
# starlette
|
| 195 |
+
# torch
|
| 196 |
+
# typer-slim
|
| 197 |
+
# typing-inspection
|
| 198 |
+
typing-inspection==0.4.2
|
| 199 |
+
# via pydantic
|
| 200 |
+
urllib3==2.6.3
|
| 201 |
+
# via lance-namespace-urllib3-client
|
| 202 |
+
uvicorn==0.40.0
|
| 203 |
+
# via -r requirements.in
|
templates/base.html
CHANGED
|
@@ -11,7 +11,12 @@
|
|
| 11 |
<!-- HTMX -->
|
| 12 |
<script src="https://unpkg.com/htmx.org@1.9.12"></script>
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
<style>
|
|
|
|
|
|
|
| 15 |
/* Loading indicator - subtle */
|
| 16 |
.htmx-indicator { display: none; }
|
| 17 |
.htmx-request .htmx-indicator,
|
|
@@ -35,7 +40,7 @@
|
|
| 35 |
|
| 36 |
<footer class="border-t border-gray-100 mt-12">
|
| 37 |
<div class="max-w-3xl mx-auto px-4 py-4 text-gray-400 text-xs">
|
| 38 |
-
<a href="https://huggingface.co/datasets/
|
| 39 |
<span class="mx-2">·</span>
|
| 40 |
<a href="https://huggingface.co/davanstrien/ModernBERT-base-is-new-arxiv-dataset" class="hover:text-gray-600">Model</a>
|
| 41 |
</div>
|
|
|
|
| 11 |
<!-- HTMX -->
|
| 12 |
<script src="https://unpkg.com/htmx.org@1.9.12"></script>
|
| 13 |
|
| 14 |
+
<!-- Alpine.js (for expand/collapse) -->
|
| 15 |
+
<script defer src="https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
| 16 |
+
|
| 17 |
<style>
|
| 18 |
+
/* Alpine.js cloak (hide until loaded) */
|
| 19 |
+
[x-cloak] { display: none !important; }
|
| 20 |
/* Loading indicator - subtle */
|
| 21 |
.htmx-indicator { display: none; }
|
| 22 |
.htmx-request .htmx-indicator,
|
|
|
|
| 40 |
|
| 41 |
<footer class="border-t border-gray-100 mt-12">
|
| 42 |
<div class="max-w-3xl mx-auto px-4 py-4 text-gray-400 text-xs">
|
| 43 |
+
<a href="https://huggingface.co/datasets/librarian-bots/arxiv-cs-papers-lance" class="hover:text-gray-600">Data source</a>
|
| 44 |
<span class="mx-2">·</span>
|
| 45 |
<a href="https://huggingface.co/davanstrien/ModernBERT-base-is-new-arxiv-dataset" class="hover:text-gray-600">Model</a>
|
| 46 |
</div>
|
templates/index.html
CHANGED
|
@@ -84,9 +84,11 @@
|
|
| 84 |
hx-include="#filter-form, #search-input, #category-select, #since-filter, #sort-select, #search-type-toggle"
|
| 85 |
hx-indicator="#loading-indicator"
|
| 86 |
hx-push-url="true">
|
| 87 |
-
|
| 88 |
-
<option value="
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
</select>
|
| 91 |
|
| 92 |
<!-- Since filter dropdown -->
|
|
|
|
| 84 |
hx-include="#filter-form, #search-input, #category-select, #since-filter, #sort-select, #search-type-toggle"
|
| 85 |
hx-indicator="#loading-indicator"
|
| 86 |
hx-push-url="true">
|
| 87 |
+
{% for opt in confidence_options %}
|
| 88 |
+
<option value="{{ opt.value }}" {% if min_confidence == opt.value %}selected{% endif %}>
|
| 89 |
+
{{ opt.label }}
|
| 90 |
+
</option>
|
| 91 |
+
{% endfor %}
|
| 92 |
</select>
|
| 93 |
|
| 94 |
<!-- Since filter dropdown -->
|
templates/partials/paper_card.html
CHANGED
|
@@ -36,7 +36,7 @@
|
|
| 36 |
</span>
|
| 37 |
</span>
|
| 38 |
<span class="text-gray-400 inline-flex items-center gap-1">
|
| 39 |
-
{{
|
| 40 |
<span class="cursor-help" title="Model confidence this paper introduces a new dataset">
|
| 41 |
<svg class="w-3.5 h-3.5 text-gray-300" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
| 42 |
<circle cx="12" cy="12" r="10" stroke-width="1.5"></circle>
|
|
@@ -47,7 +47,7 @@
|
|
| 47 |
</span>
|
| 48 |
{% else %}
|
| 49 |
<span class="{% if paper.confidence_score < 0.8 %}text-gray-400{% else %}text-gray-500{% endif %} inline-flex items-center gap-1">
|
| 50 |
-
{{
|
| 51 |
<span class="cursor-help" title="Model confidence this paper introduces a new dataset">
|
| 52 |
<svg class="w-3.5 h-3.5 {% if paper.confidence_score < 0.8 %}text-gray-300{% else %}text-gray-400{% endif %}" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
| 53 |
<circle cx="12" cy="12" r="10" stroke-width="1.5"></circle>
|
|
@@ -59,12 +59,34 @@
|
|
| 59 |
{% endif %}
|
| 60 |
</div>
|
| 61 |
|
| 62 |
-
<!--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
<p class="mt-2 text-gray-600 text-sm leading-relaxed">
|
| 64 |
-
{% if search %}
|
| 65 |
-
{{ paper.abstract[:400]|highlight(search) }}{% if paper.abstract|length > 400 %}...{% endif %}
|
| 66 |
-
{% else %}
|
| 67 |
-
{{ paper.abstract[:400] }}{% if paper.abstract|length > 400 %}...{% endif %}
|
| 68 |
-
{% endif %}
|
| 69 |
</p>
|
|
|
|
| 70 |
</article>
|
|
|
|
| 36 |
</span>
|
| 37 |
</span>
|
| 38 |
<span class="text-gray-400 inline-flex items-center gap-1">
|
| 39 |
+
{{ paper.confidence_score|confidence }}% conf.
|
| 40 |
<span class="cursor-help" title="Model confidence this paper introduces a new dataset">
|
| 41 |
<svg class="w-3.5 h-3.5 text-gray-300" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
| 42 |
<circle cx="12" cy="12" r="10" stroke-width="1.5"></circle>
|
|
|
|
| 47 |
</span>
|
| 48 |
{% else %}
|
| 49 |
<span class="{% if paper.confidence_score < 0.8 %}text-gray-400{% else %}text-gray-500{% endif %} inline-flex items-center gap-1">
|
| 50 |
+
{{ paper.confidence_score|confidence }}% conf.
|
| 51 |
<span class="cursor-help" title="Model confidence this paper introduces a new dataset">
|
| 52 |
<svg class="w-3.5 h-3.5 {% if paper.confidence_score < 0.8 %}text-gray-300{% else %}text-gray-400{% endif %}" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
| 53 |
<circle cx="12" cy="12" r="10" stroke-width="1.5"></circle>
|
|
|
|
| 59 |
{% endif %}
|
| 60 |
</div>
|
| 61 |
|
| 62 |
+
<!-- Authors -->
|
| 63 |
+
{% if paper.authors %}
|
| 64 |
+
{% set author_list = paper.authors.split(', ') %}
|
| 65 |
+
<p class="mt-1 text-sm text-gray-500">
|
| 66 |
+
{{ author_list[:3]|join(', ') }}{% if author_list|length > 3 %} <span class="text-gray-400">et al.</span>{% endif %}
|
| 67 |
+
</p>
|
| 68 |
+
{% endif %}
|
| 69 |
+
|
| 70 |
+
<!-- Abstract (expandable) -->
|
| 71 |
+
{% if paper.abstract|length > 400 %}
|
| 72 |
+
<div class="mt-2" x-data="{ expanded: false }">
|
| 73 |
+
<p class="text-gray-600 text-sm leading-relaxed">
|
| 74 |
+
<span x-show="!expanded">
|
| 75 |
+
{% if search %}{{ paper.abstract[:400]|highlight(search) }}{% else %}{{ paper.abstract[:400] }}{% endif %}…
|
| 76 |
+
</span>
|
| 77 |
+
<span x-show="expanded" x-cloak>
|
| 78 |
+
{% if search %}{{ paper.abstract|highlight(search) }}{% else %}{{ paper.abstract }}{% endif %}
|
| 79 |
+
</span>
|
| 80 |
+
<button @click="expanded = !expanded"
|
| 81 |
+
class="ml-1 text-gray-400 hover:text-gray-600 text-sm">
|
| 82 |
+
<span x-show="!expanded">Show more</span>
|
| 83 |
+
<span x-show="expanded" x-cloak>Show less</span>
|
| 84 |
+
</button>
|
| 85 |
+
</p>
|
| 86 |
+
</div>
|
| 87 |
+
{% else %}
|
| 88 |
<p class="mt-2 text-gray-600 text-sm leading-relaxed">
|
| 89 |
+
{% if search %}{{ paper.abstract|highlight(search) }}{% else %}{{ paper.abstract }}{% endif %}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
</p>
|
| 91 |
+
{% endif %}
|
| 92 |
</article>
|