Spaces:
Sleeping
Sleeping
Upload 63 files
Browse filesThis view is limited to 50 files because it contains too many changes. Β See raw diff
- app.py +81 -0
- app/__init__.py +1 -0
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/core/__init__.py +1 -0
- app/core/__pycache__/__init__.cpython-312.pyc +0 -0
- app/core/__pycache__/config.cpython-312.pyc +0 -0
- app/core/__pycache__/database.cpython-312.pyc +0 -0
- app/core/__pycache__/models.cpython-312.pyc +0 -0
- app/core/__pycache__/notifier.cpython-312.pyc +0 -0
- app/core/__pycache__/scheduler.cpython-312.pyc +0 -0
- app/core/__pycache__/telegram_bot.cpython-312.pyc +0 -0
- app/core/config.py +135 -0
- app/core/database.py +359 -0
- app/core/models.py +80 -0
- app/core/notifier.py +77 -0
- app/core/scheduler.py +135 -0
- app/core/telegram_bot.py +294 -0
- app/fetcher/__init__.py +1 -0
- app/fetcher/__pycache__/__init__.cpython-312.pyc +0 -0
- app/fetcher/__pycache__/arxiv_client.cpython-312.pyc +0 -0
- app/fetcher/__pycache__/fetch_pipeline.cpython-312.pyc +0 -0
- app/fetcher/__pycache__/http_session.cpython-312.pyc +0 -0
- app/fetcher/__pycache__/pubmed_client.cpython-312.pyc +0 -0
- app/fetcher/__pycache__/semantic_scholar.cpython-312.pyc +0 -0
- app/fetcher/arxiv_client.py +159 -0
- app/fetcher/crossref_client.py +69 -0
- app/fetcher/fetch_pipeline.py +225 -0
- app/fetcher/http_session.py +223 -0
- app/fetcher/pubmed_client.py +213 -0
- app/fetcher/semantic_scholar.py +181 -0
- app/ranker/__init__.py +1 -0
- app/ranker/__pycache__/__init__.cpython-312.pyc +0 -0
- app/ranker/__pycache__/citation_scorer.cpython-312.pyc +0 -0
- app/ranker/__pycache__/composite_ranker.cpython-312.pyc +0 -0
- app/ranker/__pycache__/tfidf_ranker.cpython-312.pyc +0 -0
- app/ranker/citation_scorer.py +49 -0
- app/ranker/composite_ranker.py +88 -0
- app/ranker/tfidf_ranker.py +182 -0
- app/summarizer/__pycache__/groq_client.cpython-312.pyc +0 -0
- app/summarizer/groq_client.py +101 -0
- app/ui/__init__.py +1 -0
- app/ui/__pycache__/__init__.cpython-312.pyc +0 -0
- app/ui/__pycache__/detail_screen.cpython-312.pyc +0 -0
- app/ui/__pycache__/home_screen.cpython-312.pyc +0 -0
- app/ui/__pycache__/settings_screen.cpython-312.pyc +0 -0
- app/ui/detail_screen.py +135 -0
- app/ui/home_screen.py +123 -0
- app/ui/kv/detail.kv +243 -0
- app/ui/kv/home.kv +162 -0
- app/ui/kv/settings.kv +318 -0
app.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
import threading
|
| 5 |
+
import time
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
import pytz
|
| 8 |
+
|
| 9 |
+
st.set_page_config(page_title="ResearchRadar-HF", page_icon="π‘")
|
| 10 |
+
|
| 11 |
+
st.title("π‘ ResearchRadar Bot")
|
| 12 |
+
st.markdown("Your daily research digest is running in the background.")
|
| 13 |
+
|
| 14 |
+
# Timezone processing for EEST (UTC+3)
|
| 15 |
+
TIMEZONE = pytz.timezone('Europe/Bucharest') # or any UTC+3 region
|
| 16 |
+
LATEST_LOG = "Logs will appear here once a fetch starts..."
|
| 17 |
+
|
| 18 |
+
status_placeholder = st.empty()
|
| 19 |
+
log_placeholder = st.empty()
|
| 20 |
+
|
| 21 |
+
def run_worker():
|
| 22 |
+
"""Background thread that triggers the fetch script."""
|
| 23 |
+
while True:
|
| 24 |
+
now = datetime.now(TIMEZONE)
|
| 25 |
+
|
| 26 |
+
# Target time: 05:00 AM (EEST)
|
| 27 |
+
target = now.replace(hour=5, minute=0, second=0, microsecond=0)
|
| 28 |
+
|
| 29 |
+
if target <= now:
|
| 30 |
+
target += timedelta(days=1)
|
| 31 |
+
|
| 32 |
+
wait_seconds = (target - now).total_seconds()
|
| 33 |
+
|
| 34 |
+
# Check every 60 seconds if it's time
|
| 35 |
+
if wait_seconds > 60:
|
| 36 |
+
time.sleep(60)
|
| 37 |
+
continue
|
| 38 |
+
|
| 39 |
+
# Execute the fetch
|
| 40 |
+
print(f"[{datetime.now()}] Triggering fetch...")
|
| 41 |
+
subprocess.run(["python", "run_daily.py", "--now"])
|
| 42 |
+
|
| 43 |
+
# Sleep for a bit to avoid double-triggering
|
| 44 |
+
time.sleep(120)
|
| 45 |
+
|
| 46 |
+
# Start background thread only once
|
| 47 |
+
if 'worker_started' not in st.session_state:
|
| 48 |
+
thread = threading.Thread(target=run_worker, daemon=True)
|
| 49 |
+
thread.start()
|
| 50 |
+
st.session_state['worker_started'] = True
|
| 51 |
+
|
| 52 |
+
# Dashboard UI
|
| 53 |
+
with status_placeholder.container():
|
| 54 |
+
now_eest = datetime.now(TIMEZONE)
|
| 55 |
+
st.info(f"π Current EEST Time: **{now_eest.strftime('%H:%M:%S')}**")
|
| 56 |
+
|
| 57 |
+
target = now_eest.replace(hour=5, minute=0, second=0, microsecond=0)
|
| 58 |
+
if target <= now_eest:
|
| 59 |
+
target += timedelta(days=1)
|
| 60 |
+
|
| 61 |
+
diff = target - now_eest
|
| 62 |
+
st.success(f"β Next fetch in: **{diff}** (at 05:00 AM)")
|
| 63 |
+
|
| 64 |
+
st.divider()
|
| 65 |
+
if st.button("π Trigger Manual Fetch Now"):
|
| 66 |
+
with st.spinner("Fetching papers... this takes a few minutes (Groq rate-limits apply)"):
|
| 67 |
+
res = subprocess.run(["python", "run_daily.py", "--now"], capture_output=True, text=True)
|
| 68 |
+
st.code(res.stdout)
|
| 69 |
+
if res.stderr:
|
| 70 |
+
st.error(res.stderr)
|
| 71 |
+
|
| 72 |
+
st.markdown("""
|
| 73 |
+
### π How it works on Hugging Face:
|
| 74 |
+
- This Space runs **24/7**.
|
| 75 |
+
- At **05:00 AM EEST**, it triggers `run_daily.py --now`.
|
| 76 |
+
- It reads your `GROQ_API_KEY` and `TELEGRAM` tokens from your **Space Secrets**.
|
| 77 |
+
""")
|
| 78 |
+
|
| 79 |
+
# Persistent storage check (optional)
|
| 80 |
+
if not os.path.exists(".researchradar"):
|
| 81 |
+
os.makedirs(".researchradar", exist_ok=True)
|
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ResearchRadar β Weekly AI & Neuroscience Papers
|
app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (139 Bytes). View file
|
|
|
app/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Core business logic β framework-agnostic
|
app/core/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (144 Bytes). View file
|
|
|
app/core/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (3.36 kB). View file
|
|
|
app/core/__pycache__/database.cpython-312.pyc
ADDED
|
Binary file (14 kB). View file
|
|
|
app/core/__pycache__/models.cpython-312.pyc
ADDED
|
Binary file (3.83 kB). View file
|
|
|
app/core/__pycache__/notifier.cpython-312.pyc
ADDED
|
Binary file (3.18 kB). View file
|
|
|
app/core/__pycache__/scheduler.cpython-312.pyc
ADDED
|
Binary file (5.38 kB). View file
|
|
|
app/core/__pycache__/telegram_bot.cpython-312.pyc
ADDED
|
Binary file (11.3 kB). View file
|
|
|
app/core/config.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β App-wide constants and environment configuration.
|
| 3 |
+
|
| 4 |
+
All magic values live here. Never hard-code strings or numbers in other modules.
|
| 5 |
+
Environment variables are read at startup using os.getenv() with documented defaults.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import logging
|
| 10 |
+
|
| 11 |
+
# ---------------------------------------------------------------------------
|
| 12 |
+
# Logging
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
LOG_LEVEL = os.getenv('RESEARCHRADAR_LOG_LEVEL', 'INFO').upper()
|
| 15 |
+
logging.basicConfig(
|
| 16 |
+
level=getattr(logging, LOG_LEVEL, logging.INFO),
|
| 17 |
+
format='[%(asctime)s] %(name)s %(levelname)s: %(message)s',
|
| 18 |
+
datefmt='%Y-%m-%d %H:%M:%S',
|
| 19 |
+
)
|
| 20 |
+
logger = logging.getLogger('researchradar')
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# Data Source URLs
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
ARXIV_BASE_URL = 'http://export.arxiv.org/api/query'
|
| 26 |
+
ARXIV_MAX_RESULTS = 50
|
| 27 |
+
|
| 28 |
+
SEMSCHOLAR_BASE_URL = 'https://api.semanticscholar.org/graph/v1'
|
| 29 |
+
PUBMED_BASE_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'
|
| 30 |
+
CROSSREF_BASE_URL = 'https://api.crossref.org/works'
|
| 31 |
+
|
| 32 |
+
# ---------------------------------------------------------------------------
|
| 33 |
+
# HTTP / Retry Configuration
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
HTTP_TIMEOUT = 20 # seconds per request
|
| 36 |
+
HTTP_MAX_RETRIES = 4
|
| 37 |
+
HTTP_BACKOFF_BASE = 2 # exponential: 2^attempt seconds
|
| 38 |
+
HTTP_BACKOFF_MAX = 64 # cap at 64 seconds
|
| 39 |
+
RETRY_STATUS_CODES = {429, 500, 502, 503, 504}
|
| 40 |
+
|
| 41 |
+
# ---------------------------------------------------------------------------
|
| 42 |
+
# Scheduler
|
| 43 |
+
# ---------------------------------------------------------------------------
|
| 44 |
+
SCHEDULE_DAY = 'sun'
|
| 45 |
+
SCHEDULE_HOUR = 8
|
| 46 |
+
SCHEDULE_MINUTE = 0
|
| 47 |
+
|
| 48 |
+
# ---------------------------------------------------------------------------
|
| 49 |
+
# Ranking & Display
|
| 50 |
+
# ---------------------------------------------------------------------------
|
| 51 |
+
TOP_N_PER_CATEGORY = 5 # papers to surface in each digest card
|
| 52 |
+
CITATION_NORM = 50 # citation_score = min(citations / CITATION_NORM, 1.0)
|
| 53 |
+
RECENCY_BONUS = 0.2 # added to papers < 3 days old
|
| 54 |
+
|
| 55 |
+
# Default composite weights (user-adjustable in settings)
|
| 56 |
+
WEIGHT_RELEVANCE = 0.60
|
| 57 |
+
WEIGHT_CITATION = 0.30
|
| 58 |
+
WEIGHT_RECENCY = 0.10
|
| 59 |
+
|
| 60 |
+
# ---------------------------------------------------------------------------
|
| 61 |
+
# Database
|
| 62 |
+
# ---------------------------------------------------------------------------
|
| 63 |
+
DB_VERSION = 2 # increment on schema change (added summary_llm)
|
| 64 |
+
DB_PATH = os.getenv('RESEARCHRADAR_DB_PATH', '') # resolved at runtime
|
| 65 |
+
|
| 66 |
+
# ---------------------------------------------------------------------------
|
| 67 |
+
# Category Mapping
|
| 68 |
+
# ---------------------------------------------------------------------------
|
| 69 |
+
ARXIV_CATEGORY_MAP = {
|
| 70 |
+
'ml': ['cs.LG', 'stat.ML'],
|
| 71 |
+
'ai': ['cs.AI', 'cs.CL', 'cs.CV'],
|
| 72 |
+
'cs': ['cs.SE', 'cs.PL', 'cs.DS', 'cs.AR'],
|
| 73 |
+
'neuroscience': ['q-bio.NC'],
|
| 74 |
+
'bci': ['eess.SP', 'cs.HC'],
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
CATEGORY_LABELS = {
|
| 78 |
+
'ml': 'Machine Learning',
|
| 79 |
+
'ai': 'Artificial Intelligence',
|
| 80 |
+
'cs': 'Computer Science',
|
| 81 |
+
'neuroscience': 'Neuroscience',
|
| 82 |
+
'bci': 'Brain-Computer Interface',
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# Keyword map used by Semantic Scholar fallback searches
|
| 86 |
+
KEYWORD_MAP = {
|
| 87 |
+
'ml': ['machine learning', 'deep learning', 'neural network'],
|
| 88 |
+
'ai': ['artificial intelligence', 'natural language processing',
|
| 89 |
+
'computer vision', 'reinforcement learning','Transformers'],
|
| 90 |
+
'cs': ['software engineering', 'programming languages',
|
| 91 |
+
'data structures', 'algorithms'],
|
| 92 |
+
'neuroscience': ['neuroscience', 'synaptic plasticity', 'cortex',
|
| 93 |
+
'neural circuits',"speech recognition","autism",'dementia','alzheimer','parkinson'],
|
| 94 |
+
'bci': ['brain computer interface', 'EEG', 'neural decoding',
|
| 95 |
+
'neuroprosthetics'],
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
# PubMed MeSH terms for supplemental queries
|
| 99 |
+
PUBMED_MESH_MAP = {
|
| 100 |
+
'neuroscience': 'Neurosciences[MeSH]',
|
| 101 |
+
'bci': 'Brain-Computer Interfaces[MeSH]',
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# ---------------------------------------------------------------------------
|
| 105 |
+
# Groq (LLM Summarization)
|
| 106 |
+
# ---------------------------------------------------------------------------
|
| 107 |
+
GROQ_API_KEY = os.getenv('GROQ_API_KEY', '')
|
| 108 |
+
GROQ_BASE_URL = 'https://api.groq.com/openai/v1/chat/completions'
|
| 109 |
+
GROQ_MODEL = 'llama-3.1-8b-instant'
|
| 110 |
+
|
| 111 |
+
# Rate Limits (llama-3.1-8b-instant)
|
| 112 |
+
GROQ_RPM = 30 # 1 request / 2 seconds
|
| 113 |
+
GROQ_TPM = 6000
|
| 114 |
+
GROQ_DELAY = 2.1 # seconds between requests to be safe
|
| 115 |
+
|
| 116 |
+
# ---------------------------------------------------------------------------
|
| 117 |
+
# Filtering
|
| 118 |
+
# ---------------------------------------------------------------------------
|
| 119 |
+
# Neuro/BCI papers MUST have these keywords to be included
|
| 120 |
+
AI_FILTERS = [
|
| 121 |
+
'ai', 'machine learning', 'neural network', 'deep learning',
|
| 122 |
+
'reinforcement learning', 'transformer', 'algorithm', 'artificial intelligence',
|
| 123 |
+
'decoder', 'encoder', 'brain computer interface', 'classifier'
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
# ---------------------------------------------------------------------------
|
| 127 |
+
# Optional API Keys (never required)
|
| 128 |
+
# ---------------------------------------------------------------------------
|
| 129 |
+
SEMANTIC_SCHOLAR_API_KEY = os.getenv('SEMANTIC_SCHOLAR_API_KEY', '')
|
| 130 |
+
NCBI_API_KEY = os.getenv('NCBI_API_KEY', '')
|
| 131 |
+
|
| 132 |
+
# ---------------------------------------------------------------------------
|
| 133 |
+
# User-Agent β required by arXiv fair-use policy
|
| 134 |
+
# ---------------------------------------------------------------------------
|
| 135 |
+
USER_AGENT = 'ResearchRadar/1.0 (contact: app@example.com)'
|
app/core/database.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β SQLite wrapper with migrations.
|
| 3 |
+
|
| 4 |
+
All write operations use parameterised queries exclusively.
|
| 5 |
+
Never format SQL strings with user or API data.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
import sqlite3
|
| 14 |
+
import time
|
| 15 |
+
from datetime import date, datetime
|
| 16 |
+
from typing import List, Optional
|
| 17 |
+
|
| 18 |
+
from app.core.config import DB_VERSION
|
| 19 |
+
from app.core.models import Digest, Paper
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# Schema DDL (Version 1)
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
_SCHEMA_V1 = """
|
| 28 |
+
CREATE TABLE IF NOT EXISTS meta (
|
| 29 |
+
key TEXT PRIMARY KEY,
|
| 30 |
+
value TEXT NOT NULL
|
| 31 |
+
);
|
| 32 |
+
|
| 33 |
+
CREATE TABLE IF NOT EXISTS papers (
|
| 34 |
+
paper_id TEXT PRIMARY KEY,
|
| 35 |
+
source TEXT NOT NULL,
|
| 36 |
+
title TEXT NOT NULL,
|
| 37 |
+
abstract TEXT NOT NULL,
|
| 38 |
+
summary_llm TEXT,
|
| 39 |
+
authors TEXT NOT NULL,
|
| 40 |
+
published_date TEXT NOT NULL,
|
| 41 |
+
categories TEXT NOT NULL,
|
| 42 |
+
app_category TEXT NOT NULL,
|
| 43 |
+
pdf_url TEXT,
|
| 44 |
+
abstract_url TEXT NOT NULL,
|
| 45 |
+
citation_count INTEGER DEFAULT 0,
|
| 46 |
+
relevance_score REAL DEFAULT 0.0,
|
| 47 |
+
composite_score REAL DEFAULT 0.0,
|
| 48 |
+
fetched_at TEXT NOT NULL,
|
| 49 |
+
is_bookmarked INTEGER DEFAULT 0,
|
| 50 |
+
is_read INTEGER DEFAULT 0
|
| 51 |
+
);
|
| 52 |
+
|
| 53 |
+
CREATE TABLE IF NOT EXISTS digests (
|
| 54 |
+
digest_id TEXT PRIMARY KEY,
|
| 55 |
+
week_start TEXT NOT NULL,
|
| 56 |
+
generated_at TEXT NOT NULL,
|
| 57 |
+
total_fetched INTEGER,
|
| 58 |
+
total_ranked INTEGER,
|
| 59 |
+
fetch_errors TEXT
|
| 60 |
+
);
|
| 61 |
+
|
| 62 |
+
CREATE TABLE IF NOT EXISTS digest_papers (
|
| 63 |
+
digest_id TEXT NOT NULL,
|
| 64 |
+
paper_id TEXT NOT NULL,
|
| 65 |
+
rank_order INTEGER NOT NULL,
|
| 66 |
+
PRIMARY KEY (digest_id, paper_id),
|
| 67 |
+
FOREIGN KEY (digest_id) REFERENCES digests(digest_id),
|
| 68 |
+
FOREIGN KEY (paper_id) REFERENCES papers(paper_id)
|
| 69 |
+
);
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
# ---------------------------------------------------------------------------
|
| 73 |
+
# Connection
|
| 74 |
+
# ---------------------------------------------------------------------------
|
| 75 |
+
|
| 76 |
+
_DB_RETRY_MAX = 3
|
| 77 |
+
_DB_RETRY_SLEEP = 0.5
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def get_connection(db_path: str) -> sqlite3.Connection:
|
| 81 |
+
"""Return a connection with row_factory and WAL mode enabled."""
|
| 82 |
+
conn = sqlite3.connect(db_path)
|
| 83 |
+
conn.row_factory = sqlite3.Row
|
| 84 |
+
conn.execute('PRAGMA journal_mode=WAL')
|
| 85 |
+
conn.execute('PRAGMA foreign_keys=ON')
|
| 86 |
+
return conn
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _retry_on_locked(func):
|
| 90 |
+
"""Decorator: retry up to _DB_RETRY_MAX times on 'database is locked'."""
|
| 91 |
+
def wrapper(*args, **kwargs):
|
| 92 |
+
for attempt in range(_DB_RETRY_MAX):
|
| 93 |
+
try:
|
| 94 |
+
return func(*args, **kwargs)
|
| 95 |
+
except sqlite3.OperationalError as exc:
|
| 96 |
+
if 'database is locked' in str(exc) and attempt < _DB_RETRY_MAX - 1:
|
| 97 |
+
logger.warning('DB locked β retrying (%d/%d)', attempt + 1, _DB_RETRY_MAX)
|
| 98 |
+
time.sleep(_DB_RETRY_SLEEP)
|
| 99 |
+
else:
|
| 100 |
+
raise
|
| 101 |
+
return wrapper
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# ---------------------------------------------------------------------------
|
| 105 |
+
# Initialisation & Migrations
|
| 106 |
+
# ---------------------------------------------------------------------------
|
| 107 |
+
|
| 108 |
+
def initialize(db_path: str) -> None:
|
| 109 |
+
"""Create tables and run any pending migrations."""
|
| 110 |
+
conn = get_connection(db_path)
|
| 111 |
+
try:
|
| 112 |
+
conn.executescript(_SCHEMA_V1)
|
| 113 |
+
# Set version if not present
|
| 114 |
+
row = conn.execute(
|
| 115 |
+
"SELECT value FROM meta WHERE key = 'db_version'"
|
| 116 |
+
).fetchone()
|
| 117 |
+
if row is None:
|
| 118 |
+
conn.execute(
|
| 119 |
+
"INSERT INTO meta (key, value) VALUES ('db_version', ?)",
|
| 120 |
+
(str(DB_VERSION),),
|
| 121 |
+
)
|
| 122 |
+
else:
|
| 123 |
+
current = int(row['value'])
|
| 124 |
+
if current < DB_VERSION:
|
| 125 |
+
run_migrations(conn, current, DB_VERSION)
|
| 126 |
+
conn.commit()
|
| 127 |
+
finally:
|
| 128 |
+
conn.close()
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def run_migrations(conn: sqlite3.Connection, current: int, target: int) -> None:
|
| 132 |
+
"""Apply sequential migrations from *current* to *target* version."""
|
| 133 |
+
logger.info('Migrating DB from v%d to v%d', current, target)
|
| 134 |
+
|
| 135 |
+
if current < 2:
|
| 136 |
+
try:
|
| 137 |
+
conn.execute("ALTER TABLE papers ADD COLUMN summary_llm TEXT")
|
| 138 |
+
logger.info('V2 Migration: Added summary_llm column to papers table.')
|
| 139 |
+
except sqlite3.OperationalError as e:
|
| 140 |
+
if 'duplicate column name' in str(e).lower():
|
| 141 |
+
pass # Already exists
|
| 142 |
+
else:
|
| 143 |
+
raise
|
| 144 |
+
|
| 145 |
+
conn.execute(
|
| 146 |
+
"UPDATE meta SET value = ? WHERE key = 'db_version'",
|
| 147 |
+
(str(target),),
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# ---------------------------------------------------------------------------
|
| 152 |
+
# Paper helpers
|
| 153 |
+
# ---------------------------------------------------------------------------
|
| 154 |
+
|
| 155 |
+
def _paper_to_row(paper: Paper) -> tuple:
|
| 156 |
+
return (
|
| 157 |
+
paper.paper_id,
|
| 158 |
+
paper.source,
|
| 159 |
+
paper.title,
|
| 160 |
+
paper.abstract,
|
| 161 |
+
paper.summary_llm,
|
| 162 |
+
json.dumps(paper.authors),
|
| 163 |
+
paper.published_date.isoformat(),
|
| 164 |
+
json.dumps(paper.categories),
|
| 165 |
+
paper.app_category,
|
| 166 |
+
paper.pdf_url,
|
| 167 |
+
paper.abstract_url,
|
| 168 |
+
paper.citation_count,
|
| 169 |
+
paper.relevance_score,
|
| 170 |
+
paper.composite_score,
|
| 171 |
+
paper.fetched_at.isoformat(),
|
| 172 |
+
int(paper.is_bookmarked),
|
| 173 |
+
int(paper.is_read),
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _row_to_paper(row: sqlite3.Row) -> Paper:
|
| 178 |
+
return Paper(
|
| 179 |
+
paper_id=row['paper_id'],
|
| 180 |
+
source=row['source'],
|
| 181 |
+
title=row['title'],
|
| 182 |
+
abstract=row['abstract'],
|
| 183 |
+
summary_llm=row['summary_llm'],
|
| 184 |
+
authors=json.loads(row['authors']),
|
| 185 |
+
published_date=date.fromisoformat(row['published_date']),
|
| 186 |
+
categories=json.loads(row['categories']),
|
| 187 |
+
app_category=row['app_category'],
|
| 188 |
+
pdf_url=row['pdf_url'],
|
| 189 |
+
abstract_url=row['abstract_url'],
|
| 190 |
+
citation_count=row['citation_count'],
|
| 191 |
+
relevance_score=row['relevance_score'],
|
| 192 |
+
composite_score=row['composite_score'],
|
| 193 |
+
fetched_at=datetime.fromisoformat(row['fetched_at']),
|
| 194 |
+
is_bookmarked=bool(row['is_bookmarked']),
|
| 195 |
+
is_read=bool(row['is_read']),
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# ---------------------------------------------------------------------------
|
| 200 |
+
# CRUD Operations
|
| 201 |
+
# ---------------------------------------------------------------------------
|
| 202 |
+
|
| 203 |
+
@_retry_on_locked
|
| 204 |
+
def save_digest(db_path: str, digest: Digest) -> None:
|
| 205 |
+
"""Transactional insert of a digest + all its papers."""
|
| 206 |
+
conn = get_connection(db_path)
|
| 207 |
+
try:
|
| 208 |
+
conn.execute('BEGIN')
|
| 209 |
+
|
| 210 |
+
# Insert digest record
|
| 211 |
+
conn.execute(
|
| 212 |
+
"""INSERT OR REPLACE INTO digests
|
| 213 |
+
(digest_id, week_start, generated_at, total_fetched,
|
| 214 |
+
total_ranked, fetch_errors)
|
| 215 |
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
| 216 |
+
(
|
| 217 |
+
digest.digest_id,
|
| 218 |
+
digest.week_start.isoformat(),
|
| 219 |
+
digest.generated_at.isoformat(),
|
| 220 |
+
digest.total_fetched,
|
| 221 |
+
digest.total_ranked,
|
| 222 |
+
json.dumps(digest.fetch_errors),
|
| 223 |
+
),
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# Insert papers and link to digest
|
| 227 |
+
rank = 0
|
| 228 |
+
for category, papers in digest.papers.items():
|
| 229 |
+
for paper in papers:
|
| 230 |
+
conn.execute(
|
| 231 |
+
"""INSERT OR REPLACE INTO papers
|
| 232 |
+
(paper_id, source, title, abstract, summary_llm, authors,
|
| 233 |
+
published_date, categories, app_category, pdf_url,
|
| 234 |
+
abstract_url, citation_count, relevance_score,
|
| 235 |
+
composite_score, fetched_at, is_bookmarked, is_read)
|
| 236 |
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
|
| 237 |
+
_paper_to_row(paper),
|
| 238 |
+
)
|
| 239 |
+
rank += 1
|
| 240 |
+
conn.execute(
|
| 241 |
+
"""INSERT OR REPLACE INTO digest_papers
|
| 242 |
+
(digest_id, paper_id, rank_order) VALUES (?, ?, ?)""",
|
| 243 |
+
(digest.digest_id, paper.paper_id, rank),
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
conn.commit()
|
| 247 |
+
logger.info('Saved digest %s with %d papers', digest.digest_id, rank)
|
| 248 |
+
except Exception:
|
| 249 |
+
conn.rollback()
|
| 250 |
+
logger.exception('Failed to save digest β rolled back')
|
| 251 |
+
raise
|
| 252 |
+
finally:
|
| 253 |
+
conn.close()
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
@_retry_on_locked
|
| 257 |
+
def get_latest_digest(db_path: str) -> Optional[Digest]:
|
| 258 |
+
"""Load the most recent digest."""
|
| 259 |
+
conn = get_connection(db_path)
|
| 260 |
+
try:
|
| 261 |
+
row = conn.execute(
|
| 262 |
+
'SELECT * FROM digests ORDER BY generated_at DESC LIMIT 1'
|
| 263 |
+
).fetchone()
|
| 264 |
+
if row is None:
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
digest = Digest(
|
| 268 |
+
digest_id=row['digest_id'],
|
| 269 |
+
week_start=date.fromisoformat(row['week_start']),
|
| 270 |
+
generated_at=datetime.fromisoformat(row['generated_at']),
|
| 271 |
+
total_fetched=row['total_fetched'],
|
| 272 |
+
total_ranked=row['total_ranked'],
|
| 273 |
+
fetch_errors=json.loads(row['fetch_errors'] or '[]'),
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
# Load papers linked to this digest
|
| 277 |
+
paper_rows = conn.execute(
|
| 278 |
+
"""SELECT p.* FROM papers p
|
| 279 |
+
INNER JOIN digest_papers dp ON p.paper_id = dp.paper_id
|
| 280 |
+
WHERE dp.digest_id = ?
|
| 281 |
+
ORDER BY dp.rank_order""",
|
| 282 |
+
(digest.digest_id,),
|
| 283 |
+
).fetchall()
|
| 284 |
+
|
| 285 |
+
papers_by_cat: dict = {}
|
| 286 |
+
for pr in paper_rows:
|
| 287 |
+
paper = _row_to_paper(pr)
|
| 288 |
+
papers_by_cat.setdefault(paper.app_category, []).append(paper)
|
| 289 |
+
digest.papers = papers_by_cat
|
| 290 |
+
return digest
|
| 291 |
+
finally:
|
| 292 |
+
conn.close()
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
@_retry_on_locked
|
| 296 |
+
def get_papers(db_path: str, category: str, limit: int = 10) -> List[Paper]:
|
| 297 |
+
"""Get papers for a category, ordered by composite score."""
|
| 298 |
+
conn = get_connection(db_path)
|
| 299 |
+
try:
|
| 300 |
+
rows = conn.execute(
|
| 301 |
+
"""SELECT * FROM papers
|
| 302 |
+
WHERE app_category = ?
|
| 303 |
+
ORDER BY composite_score DESC
|
| 304 |
+
LIMIT ?""",
|
| 305 |
+
(category, limit),
|
| 306 |
+
).fetchall()
|
| 307 |
+
return [_row_to_paper(r) for r in rows]
|
| 308 |
+
finally:
|
| 309 |
+
conn.close()
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
@_retry_on_locked
|
| 313 |
+
def toggle_bookmark(db_path: str, paper_id: str) -> bool:
|
| 314 |
+
"""Toggle bookmark state; returns the new state."""
|
| 315 |
+
conn = get_connection(db_path)
|
| 316 |
+
try:
|
| 317 |
+
conn.execute(
|
| 318 |
+
"""UPDATE papers
|
| 319 |
+
SET is_bookmarked = CASE WHEN is_bookmarked = 0 THEN 1 ELSE 0 END
|
| 320 |
+
WHERE paper_id = ?""",
|
| 321 |
+
(paper_id,),
|
| 322 |
+
)
|
| 323 |
+
conn.commit()
|
| 324 |
+
row = conn.execute(
|
| 325 |
+
'SELECT is_bookmarked FROM papers WHERE paper_id = ?',
|
| 326 |
+
(paper_id,),
|
| 327 |
+
).fetchone()
|
| 328 |
+
return bool(row['is_bookmarked']) if row else False
|
| 329 |
+
finally:
|
| 330 |
+
conn.close()
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
@_retry_on_locked
|
| 334 |
+
def mark_read(db_path: str, paper_id: str) -> None:
|
| 335 |
+
"""Mark a paper as read."""
|
| 336 |
+
conn = get_connection(db_path)
|
| 337 |
+
try:
|
| 338 |
+
conn.execute(
|
| 339 |
+
'UPDATE papers SET is_read = 1 WHERE paper_id = ?',
|
| 340 |
+
(paper_id,),
|
| 341 |
+
)
|
| 342 |
+
conn.commit()
|
| 343 |
+
finally:
|
| 344 |
+
conn.close()
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
@_retry_on_locked
|
| 348 |
+
def get_bookmarked_papers(db_path: str) -> List[Paper]:
|
| 349 |
+
"""Return all bookmarked papers ordered by composite score."""
|
| 350 |
+
conn = get_connection(db_path)
|
| 351 |
+
try:
|
| 352 |
+
rows = conn.execute(
|
| 353 |
+
"""SELECT * FROM papers
|
| 354 |
+
WHERE is_bookmarked = 1
|
| 355 |
+
ORDER BY composite_score DESC"""
|
| 356 |
+
).fetchall()
|
| 357 |
+
return [_row_to_paper(r) for r in rows]
|
| 358 |
+
finally:
|
| 359 |
+
conn.close()
|
app/core/models.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Pure data models.
|
| 3 |
+
|
| 4 |
+
All models are standard Python dataclasses with no external dependencies,
|
| 5 |
+
making them fully testable in isolation.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import uuid
|
| 11 |
+
from dataclasses import dataclass, field
|
| 12 |
+
from datetime import date, datetime
|
| 13 |
+
from typing import Dict, List, Optional
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass
|
| 17 |
+
class Paper:
|
| 18 |
+
"""A single research paper from any source."""
|
| 19 |
+
|
| 20 |
+
paper_id: str # arXiv ID or PubMed PMID β primary key
|
| 21 |
+
source: str # 'arxiv' | 'semantic_scholar' | 'pubmed'
|
| 22 |
+
title: str
|
| 23 |
+
abstract: str
|
| 24 |
+
authors: List[str]
|
| 25 |
+
published_date: date # UTC
|
| 26 |
+
categories: List[str] # e.g. ['cs.LG', 'stat.ML']
|
| 27 |
+
app_category: str # mapped app category slug
|
| 28 |
+
summary_llm: Optional[str] = None # Brief summary (Idea, Method, Results) via Groq
|
| 29 |
+
pdf_url: Optional[str] = None # direct PDF link if available
|
| 30 |
+
abstract_url: str = '' # canonical web page
|
| 31 |
+
citation_count: int = 0
|
| 32 |
+
relevance_score: float = 0.0 # set by ranker
|
| 33 |
+
composite_score: float = 0.0 # set by ranker
|
| 34 |
+
fetched_at: datetime = field(default_factory=datetime.utcnow)
|
| 35 |
+
is_bookmarked: bool = False
|
| 36 |
+
is_read: bool = False
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class Digest:
|
| 41 |
+
"""A weekly digest containing ranked papers per category."""
|
| 42 |
+
|
| 43 |
+
digest_id: str # UUID4 hex
|
| 44 |
+
week_start: date # Monday of the fetched week (ISO)
|
| 45 |
+
generated_at: datetime
|
| 46 |
+
papers: Dict[str, List[Paper]] = field(default_factory=dict)
|
| 47 |
+
total_fetched: int = 0
|
| 48 |
+
total_ranked: int = 0
|
| 49 |
+
fetch_errors: List[str] = field(default_factory=list)
|
| 50 |
+
|
| 51 |
+
@classmethod
|
| 52 |
+
def create_new(cls) -> 'Digest':
|
| 53 |
+
"""Factory: create a fresh Digest for this week."""
|
| 54 |
+
today = datetime.utcnow()
|
| 55 |
+
# ISO week starts Monday (weekday 0)
|
| 56 |
+
monday = today.date()
|
| 57 |
+
weekday = monday.weekday()
|
| 58 |
+
monday = monday.__class__.fromordinal(monday.toordinal() - weekday)
|
| 59 |
+
return cls(
|
| 60 |
+
digest_id=uuid.uuid4().hex,
|
| 61 |
+
week_start=monday,
|
| 62 |
+
generated_at=today,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@dataclass
|
| 67 |
+
class UserProfile:
|
| 68 |
+
"""User interest profile used by the ranker."""
|
| 69 |
+
|
| 70 |
+
interests: Dict[str, str] = field(default_factory=lambda: {
|
| 71 |
+
'ml': 'deep learning transformers attention',
|
| 72 |
+
'ai': 'artificial intelligence language models',
|
| 73 |
+
'cs': 'software engineering algorithms',
|
| 74 |
+
'neuroscience': 'synaptic plasticity cortex neurons',
|
| 75 |
+
'bci': 'brain computer interface EEG decoding',
|
| 76 |
+
})
|
| 77 |
+
weight_relevance: float = 0.60
|
| 78 |
+
weight_citation: float = 0.30
|
| 79 |
+
weight_recency: float = 0.10
|
| 80 |
+
top_n_per_category: int = 5
|
app/core/notifier.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Notification wrapper.
|
| 3 |
+
|
| 4 |
+
Primary: Telegram Bot notifications (works on any phone).
|
| 5 |
+
Fallback: plyer local notifications (desktop / Kivy builds).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import os
|
| 12 |
+
from typing import Optional
|
| 13 |
+
|
| 14 |
+
from app.core.models import Digest
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def send_digest_notification(digest: Digest, data_dir: str = '') -> None:
|
| 20 |
+
"""
|
| 21 |
+
Send a notification about the latest digest.
|
| 22 |
+
|
| 23 |
+
Tries Telegram first (phone notifications), then falls back to plyer.
|
| 24 |
+
"""
|
| 25 |
+
# Try Telegram first
|
| 26 |
+
if data_dir:
|
| 27 |
+
try:
|
| 28 |
+
from app.core.telegram_bot import send_digest_notification as tg_send
|
| 29 |
+
if tg_send(digest, data_dir):
|
| 30 |
+
return # Telegram succeeded
|
| 31 |
+
except ImportError:
|
| 32 |
+
pass
|
| 33 |
+
except Exception:
|
| 34 |
+
logger.debug('Telegram notification failed', exc_info=True)
|
| 35 |
+
|
| 36 |
+
# Fallback: plyer local notification
|
| 37 |
+
_send_plyer_notification(digest)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _send_plyer_notification(digest: Digest) -> None:
|
| 41 |
+
"""Send a local notification via plyer (desktop / mobile Kivy)."""
|
| 42 |
+
try:
|
| 43 |
+
from plyer import notification
|
| 44 |
+
except ImportError:
|
| 45 |
+
logger.info('plyer not installed β skipping notification')
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
lines = []
|
| 49 |
+
top_title = ''
|
| 50 |
+
for cat, papers in digest.papers.items():
|
| 51 |
+
count = len(papers)
|
| 52 |
+
label = cat.replace('_', ' ').title()
|
| 53 |
+
lines.append(f'{label}: {count} paper{"s" if count != 1 else ""}')
|
| 54 |
+
if papers and not top_title:
|
| 55 |
+
top_title = papers[0].title
|
| 56 |
+
|
| 57 |
+
if not lines:
|
| 58 |
+
lines.append('No new papers this week.')
|
| 59 |
+
|
| 60 |
+
message = '\n'.join(lines)
|
| 61 |
+
if top_title:
|
| 62 |
+
if len(top_title) > 80:
|
| 63 |
+
top_title = top_title[:77] + '...'
|
| 64 |
+
message += f'\n\nπ {top_title}'
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
notification.notify(
|
| 68 |
+
title='ResearchRadar β New Papers!',
|
| 69 |
+
message=message,
|
| 70 |
+
app_name='ResearchRadar',
|
| 71 |
+
timeout=10,
|
| 72 |
+
)
|
| 73 |
+
logger.info('Notification sent for digest %s', digest.digest_id)
|
| 74 |
+
except NotImplementedError:
|
| 75 |
+
logger.warning('Notifications not supported on this platform')
|
| 76 |
+
except Exception:
|
| 77 |
+
logger.warning('Notification failed', exc_info=True)
|
app/core/scheduler.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Job scheduling.
|
| 3 |
+
|
| 4 |
+
Uses APScheduler with CronTrigger for the weekly fetch job.
|
| 5 |
+
On Android, uses AlarmManager via pyjnius to wake the app if backgrounded.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
from typing import Callable, Optional
|
| 12 |
+
|
| 13 |
+
from app.core.config import SCHEDULE_DAY, SCHEDULE_HOUR, SCHEDULE_MINUTE
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
# ---------------------------------------------------------------------------
|
| 18 |
+
# APScheduler setup
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
|
| 21 |
+
def setup_scheduler(
|
| 22 |
+
db_path: str,
|
| 23 |
+
fetch_callback: Optional[Callable] = None,
|
| 24 |
+
) -> object:
|
| 25 |
+
"""
|
| 26 |
+
Initialise and start the APScheduler BackgroundScheduler.
|
| 27 |
+
|
| 28 |
+
- CronTrigger: every Sunday at 08:00 local time.
|
| 29 |
+
- misfire_grace_time: 3600s (fires within 1 hour of missed time).
|
| 30 |
+
- max_instances: 1 (prevent overlapping fetch jobs).
|
| 31 |
+
"""
|
| 32 |
+
try:
|
| 33 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
| 34 |
+
from apscheduler.triggers.cron import CronTrigger
|
| 35 |
+
except ImportError:
|
| 36 |
+
logger.warning('APScheduler not installed β scheduler disabled')
|
| 37 |
+
return None
|
| 38 |
+
|
| 39 |
+
if fetch_callback is None:
|
| 40 |
+
from app.fetcher.fetch_pipeline import run_weekly_fetch
|
| 41 |
+
|
| 42 |
+
def _default_callback():
|
| 43 |
+
run_weekly_fetch(db_path)
|
| 44 |
+
|
| 45 |
+
fetch_callback = _default_callback
|
| 46 |
+
|
| 47 |
+
scheduler = BackgroundScheduler()
|
| 48 |
+
|
| 49 |
+
# Try to use SQLAlchemy job store for persistence
|
| 50 |
+
try:
|
| 51 |
+
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
|
| 52 |
+
jobstore = SQLAlchemyJobStore(url=f'sqlite:///{db_path}')
|
| 53 |
+
scheduler.add_jobstore(jobstore, 'default')
|
| 54 |
+
except ImportError:
|
| 55 |
+
logger.info('SQLAlchemy not available β using memory job store')
|
| 56 |
+
|
| 57 |
+
scheduler.add_job(
|
| 58 |
+
fetch_callback,
|
| 59 |
+
CronTrigger(
|
| 60 |
+
day_of_week=SCHEDULE_DAY,
|
| 61 |
+
hour=SCHEDULE_HOUR,
|
| 62 |
+
minute=SCHEDULE_MINUTE,
|
| 63 |
+
),
|
| 64 |
+
id='weekly_fetch',
|
| 65 |
+
name='Weekly Paper Fetch',
|
| 66 |
+
misfire_grace_time=3600,
|
| 67 |
+
max_instances=1,
|
| 68 |
+
replace_existing=True,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
scheduler.start()
|
| 73 |
+
logger.info(
|
| 74 |
+
'Scheduler started β next fetch: %s %02d:%02d',
|
| 75 |
+
SCHEDULE_DAY.upper(), SCHEDULE_HOUR, SCHEDULE_MINUTE,
|
| 76 |
+
)
|
| 77 |
+
except Exception as exc:
|
| 78 |
+
# SchedulerAlreadyRunningError or other β log and continue
|
| 79 |
+
logger.warning('Scheduler start issue (non-fatal): %s', exc)
|
| 80 |
+
|
| 81 |
+
return scheduler
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ---------------------------------------------------------------------------
|
| 85 |
+
# Android AlarmManager integration (Android-only)
|
| 86 |
+
# ---------------------------------------------------------------------------
|
| 87 |
+
|
| 88 |
+
def setup_android_alarm() -> None:
|
| 89 |
+
"""
|
| 90 |
+
Set a repeating alarm via Android's AlarmManager to wake the app
|
| 91 |
+
every Sunday at 08:00.
|
| 92 |
+
|
| 93 |
+
Only called on Android. Guarded by platform check in main.py.
|
| 94 |
+
"""
|
| 95 |
+
try:
|
| 96 |
+
from jnius import autoclass
|
| 97 |
+
|
| 98 |
+
Context = autoclass('android.content.Context')
|
| 99 |
+
Intent = autoclass('android.content.Intent')
|
| 100 |
+
PendingIntent = autoclass('android.app.PendingIntent')
|
| 101 |
+
AlarmManager = autoclass('android.app.AlarmManager')
|
| 102 |
+
Calendar = autoclass('java.util.Calendar')
|
| 103 |
+
|
| 104 |
+
from android import mActivity # type: ignore[import]
|
| 105 |
+
|
| 106 |
+
context = mActivity.getApplicationContext()
|
| 107 |
+
alarm_mgr = context.getSystemService(Context.ALARM_SERVICE)
|
| 108 |
+
|
| 109 |
+
intent = Intent(context, mActivity.getClass())
|
| 110 |
+
pending = PendingIntent.getActivity(
|
| 111 |
+
context, 0, intent,
|
| 112 |
+
PendingIntent.FLAG_UPDATE_CURRENT | PendingIntent.FLAG_IMMUTABLE,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# Set weekly repeating alarm
|
| 116 |
+
cal = Calendar.getInstance()
|
| 117 |
+
cal.set(Calendar.DAY_OF_WEEK, Calendar.SUNDAY)
|
| 118 |
+
cal.set(Calendar.HOUR_OF_DAY, SCHEDULE_HOUR)
|
| 119 |
+
cal.set(Calendar.MINUTE, SCHEDULE_MINUTE)
|
| 120 |
+
cal.set(Calendar.SECOND, 0)
|
| 121 |
+
|
| 122 |
+
interval_week = 7 * 24 * 60 * 60 * 1000 # ms
|
| 123 |
+
|
| 124 |
+
alarm_mgr.setExactAndAllowWhileIdle(
|
| 125 |
+
AlarmManager.RTC_WAKEUP,
|
| 126 |
+
cal.getTimeInMillis(),
|
| 127 |
+
pending,
|
| 128 |
+
)
|
| 129 |
+
logger.info('Android AlarmManager set for Sunday %02d:%02d',
|
| 130 |
+
SCHEDULE_HOUR, SCHEDULE_MINUTE)
|
| 131 |
+
|
| 132 |
+
except ImportError:
|
| 133 |
+
logger.debug('pyjnius not available β not on Android')
|
| 134 |
+
except Exception:
|
| 135 |
+
logger.warning('Failed to set Android alarm', exc_info=True)
|
app/core/telegram_bot.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Telegram Bot notification system.
|
| 3 |
+
|
| 4 |
+
Sends formatted paper digests to the user's Telegram chat.
|
| 5 |
+
Replaces plyer notifications for phone delivery.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
from typing import Dict, List, Optional
|
| 14 |
+
|
| 15 |
+
import requests
|
| 16 |
+
|
| 17 |
+
from app.core.models import Digest, Paper
|
| 18 |
+
from app.core.config import CATEGORY_LABELS
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# Configuration
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
|
| 26 |
+
_CONFIG_KEYS = ('telegram_bot_token', 'telegram_chat_id')
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _load_telegram_config(data_dir: str) -> dict:
|
| 30 |
+
"""Load Telegram config from settings.json."""
|
| 31 |
+
path = os.path.join(data_dir, 'settings.json')
|
| 32 |
+
if not os.path.exists(path):
|
| 33 |
+
return {}
|
| 34 |
+
try:
|
| 35 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 36 |
+
return json.load(f)
|
| 37 |
+
except (json.JSONDecodeError, OSError):
|
| 38 |
+
return {}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _get_credentials(data_dir: str) -> tuple:
|
| 42 |
+
"""
|
| 43 |
+
Get bot token and chat ID from settings or environment variables.
|
| 44 |
+
|
| 45 |
+
Priority: env vars > settings.json
|
| 46 |
+
"""
|
| 47 |
+
config = _load_telegram_config(data_dir)
|
| 48 |
+
|
| 49 |
+
token = (
|
| 50 |
+
os.getenv('TELEGRAM_BOT_TOKEN')
|
| 51 |
+
or config.get('telegram_bot_token', '')
|
| 52 |
+
)
|
| 53 |
+
chat_id = (
|
| 54 |
+
os.getenv('TELEGRAM_CHAT_ID')
|
| 55 |
+
or config.get('telegram_chat_id', '')
|
| 56 |
+
)
|
| 57 |
+
return token, chat_id
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# ---------------------------------------------------------------------------
|
| 61 |
+
# Message formatting
|
| 62 |
+
# ---------------------------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
def _format_paper(rank: int, paper: Paper) -> str:
|
| 65 |
+
"""Format a single paper as a Telegram message block."""
|
| 66 |
+
# Authors (first author + et al.)
|
| 67 |
+
if paper.authors:
|
| 68 |
+
if len(paper.authors) > 2:
|
| 69 |
+
authors = f"{paper.authors[0]} et al."
|
| 70 |
+
else:
|
| 71 |
+
authors = ", ".join(paper.authors)
|
| 72 |
+
else:
|
| 73 |
+
authors = "Unknown"
|
| 74 |
+
|
| 75 |
+
# Score badge
|
| 76 |
+
score = f"{paper.composite_score:.2f}"
|
| 77 |
+
|
| 78 |
+
lines = [
|
| 79 |
+
f"*{rank}.* [{paper.title}]({paper.abstract_url})",
|
| 80 |
+
f" π€ _{authors}_",
|
| 81 |
+
f" π
{paper.published_date.isoformat()} β’ π Score: {score} β’ π Citations: {paper.citation_count}",
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
# LLM Summary (Structured)
|
| 85 |
+
if paper.summary_llm:
|
| 86 |
+
lines.append("")
|
| 87 |
+
lines.append(f"π€ *AI Summary:*")
|
| 88 |
+
# Indent the summary for readability
|
| 89 |
+
for slink in paper.summary_llm.split('\n'):
|
| 90 |
+
if slink.strip():
|
| 91 |
+
lines.append(f" _{slink.strip()}_")
|
| 92 |
+
|
| 93 |
+
if paper.pdf_url:
|
| 94 |
+
lines.append("")
|
| 95 |
+
lines.append(f" π [PDF]({paper.pdf_url})")
|
| 96 |
+
|
| 97 |
+
return "\n".join(lines)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def format_digest_message(digest: Digest) -> str:
|
| 101 |
+
"""Format a full digest as a Telegram-ready Markdown message."""
|
| 102 |
+
lines = [
|
| 103 |
+
"π‘ *ResearchRadar β Daily Paper Digest*",
|
| 104 |
+
f"π
Week of {digest.week_start.isoformat()}",
|
| 105 |
+
f"π Generated: {digest.generated_at.strftime('%Y-%m-%d %H:%M UTC')}",
|
| 106 |
+
"",
|
| 107 |
+
]
|
| 108 |
+
|
| 109 |
+
total_papers = 0
|
| 110 |
+
|
| 111 |
+
for cat_slug, papers in digest.papers.items():
|
| 112 |
+
if not papers:
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
+
cat_name = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
|
| 116 |
+
total_papers += len(papers)
|
| 117 |
+
|
| 118 |
+
lines.append(f"ββββββββββββββββββββ")
|
| 119 |
+
lines.append(f"π¬ *{cat_name}* ({len(papers)} papers)")
|
| 120 |
+
lines.append("")
|
| 121 |
+
|
| 122 |
+
for i, paper in enumerate(papers, 1):
|
| 123 |
+
lines.append(_format_paper(i, paper))
|
| 124 |
+
lines.append("")
|
| 125 |
+
|
| 126 |
+
if total_papers == 0:
|
| 127 |
+
lines.append("_No new papers found this cycle. Check back tomorrow!_")
|
| 128 |
+
|
| 129 |
+
# Summary footer
|
| 130 |
+
lines.append("ββββββββββββββββββββ")
|
| 131 |
+
lines.append(
|
| 132 |
+
f"π *Summary:* {digest.total_fetched} fetched β "
|
| 133 |
+
f"{digest.total_ranked} ranked β {total_papers} delivered"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
if digest.fetch_errors:
|
| 137 |
+
lines.append(f"β οΈ {len(digest.fetch_errors)} non-fatal errors logged")
|
| 138 |
+
|
| 139 |
+
return "\n".join(lines)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def format_short_notification(digest: Digest) -> str:
|
| 143 |
+
"""Format a short notification summary."""
|
| 144 |
+
counts = []
|
| 145 |
+
for cat_slug, papers in digest.papers.items():
|
| 146 |
+
if papers:
|
| 147 |
+
label = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
|
| 148 |
+
counts.append(f"{label}: {len(papers)}")
|
| 149 |
+
|
| 150 |
+
if not counts:
|
| 151 |
+
return "π‘ ResearchRadar: No new papers found today."
|
| 152 |
+
|
| 153 |
+
summary = " | ".join(counts)
|
| 154 |
+
total = sum(len(p) for p in digest.papers.values())
|
| 155 |
+
return f"π‘ *ResearchRadar* β {total} new papers!\n{summary}"
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# ---------------------------------------------------------------------------
|
| 159 |
+
# Sending
|
| 160 |
+
# ---------------------------------------------------------------------------
|
| 161 |
+
|
| 162 |
+
def send_message(
|
| 163 |
+
token: str,
|
| 164 |
+
chat_id: str,
|
| 165 |
+
text: str,
|
| 166 |
+
parse_mode: str = 'Markdown',
|
| 167 |
+
disable_preview: bool = True,
|
| 168 |
+
) -> bool:
|
| 169 |
+
"""
|
| 170 |
+
Send a message via Telegram Bot API.
|
| 171 |
+
|
| 172 |
+
Returns True on success, False on failure (never raises).
|
| 173 |
+
"""
|
| 174 |
+
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
| 175 |
+
|
| 176 |
+
# Telegram has a 4096 char limit per message
|
| 177 |
+
if len(text) > 4000:
|
| 178 |
+
return _send_chunked(token, chat_id, text, parse_mode, disable_preview)
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
resp = requests.post(
|
| 182 |
+
url,
|
| 183 |
+
json={
|
| 184 |
+
'chat_id': chat_id,
|
| 185 |
+
'text': text,
|
| 186 |
+
'parse_mode': parse_mode,
|
| 187 |
+
'disable_web_page_preview': disable_preview,
|
| 188 |
+
},
|
| 189 |
+
timeout=15,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
if resp.status_code == 200:
|
| 193 |
+
data = resp.json()
|
| 194 |
+
if data.get('ok'):
|
| 195 |
+
logger.info('Telegram message sent to chat %s', chat_id)
|
| 196 |
+
return True
|
| 197 |
+
else:
|
| 198 |
+
logger.error('Telegram API error: %s', data.get('description'))
|
| 199 |
+
return False
|
| 200 |
+
else:
|
| 201 |
+
logger.error('Telegram HTTP %d: %s', resp.status_code, resp.text[:200])
|
| 202 |
+
return False
|
| 203 |
+
|
| 204 |
+
except requests.exceptions.RequestException as exc:
|
| 205 |
+
logger.error('Telegram send failed: %s', exc)
|
| 206 |
+
return False
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def _send_chunked(
|
| 210 |
+
token: str,
|
| 211 |
+
chat_id: str,
|
| 212 |
+
text: str,
|
| 213 |
+
parse_mode: str,
|
| 214 |
+
disable_preview: bool,
|
| 215 |
+
) -> bool:
|
| 216 |
+
"""Split long messages at section boundaries and send sequentially."""
|
| 217 |
+
chunks = []
|
| 218 |
+
current = ""
|
| 219 |
+
|
| 220 |
+
for line in text.split("\n"):
|
| 221 |
+
if len(current) + len(line) + 1 > 3800 and current:
|
| 222 |
+
chunks.append(current)
|
| 223 |
+
current = line
|
| 224 |
+
else:
|
| 225 |
+
current = current + "\n" + line if current else line
|
| 226 |
+
|
| 227 |
+
if current:
|
| 228 |
+
chunks.append(current)
|
| 229 |
+
|
| 230 |
+
success = True
|
| 231 |
+
for i, chunk in enumerate(chunks):
|
| 232 |
+
if i > 0:
|
| 233 |
+
import time
|
| 234 |
+
time.sleep(0.5) # Rate limiting courtesy
|
| 235 |
+
|
| 236 |
+
ok = send_message(token, chat_id, chunk, parse_mode, disable_preview)
|
| 237 |
+
if not ok:
|
| 238 |
+
success = False
|
| 239 |
+
|
| 240 |
+
return success
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# ---------------------------------------------------------------------------
|
| 244 |
+
# High-level API
|
| 245 |
+
# ---------------------------------------------------------------------------
|
| 246 |
+
|
| 247 |
+
def send_digest_notification(digest: Digest, data_dir: str) -> bool:
|
| 248 |
+
"""
|
| 249 |
+
Send the full digest to Telegram.
|
| 250 |
+
|
| 251 |
+
Reads credentials from env vars or settings.json.
|
| 252 |
+
Returns True on success, False on failure (never raises).
|
| 253 |
+
"""
|
| 254 |
+
token, chat_id = _get_credentials(data_dir)
|
| 255 |
+
|
| 256 |
+
if not token or not chat_id:
|
| 257 |
+
logger.warning(
|
| 258 |
+
'Telegram not configured β set TELEGRAM_BOT_TOKEN and '
|
| 259 |
+
'TELEGRAM_CHAT_ID in environment or settings.json'
|
| 260 |
+
)
|
| 261 |
+
return False
|
| 262 |
+
|
| 263 |
+
# Send short notification first
|
| 264 |
+
short = format_short_notification(digest)
|
| 265 |
+
send_message(token, chat_id, short)
|
| 266 |
+
|
| 267 |
+
# Then send the full digest
|
| 268 |
+
full = format_digest_message(digest)
|
| 269 |
+
return send_message(token, chat_id, full)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def send_test_message(data_dir: str) -> bool:
|
| 273 |
+
"""Send a test message to verify Telegram setup."""
|
| 274 |
+
token, chat_id = _get_credentials(data_dir)
|
| 275 |
+
|
| 276 |
+
if not token or not chat_id:
|
| 277 |
+
print("β Telegram not configured!")
|
| 278 |
+
print(" Set TELEGRAM_BOT_TOKEN and TELEGRAM_CHAT_ID in settings.json")
|
| 279 |
+
print(" or as environment variables.")
|
| 280 |
+
return False
|
| 281 |
+
|
| 282 |
+
text = (
|
| 283 |
+
"β
*ResearchRadar β Test Message*\n\n"
|
| 284 |
+
"Your Telegram notifications are working!\n"
|
| 285 |
+
"You'll receive daily paper digests at your configured time."
|
| 286 |
+
)
|
| 287 |
+
success = send_message(token, chat_id, text)
|
| 288 |
+
|
| 289 |
+
if success:
|
| 290 |
+
print("β
Test message sent! Check your Telegram.")
|
| 291 |
+
else:
|
| 292 |
+
print("β Failed to send test message. Check your bot token and chat ID.")
|
| 293 |
+
|
| 294 |
+
return success
|
app/fetcher/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Data acquisition layer
|
app/fetcher/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (147 Bytes). View file
|
|
|
app/fetcher/__pycache__/arxiv_client.cpython-312.pyc
ADDED
|
Binary file (6.43 kB). View file
|
|
|
app/fetcher/__pycache__/fetch_pipeline.cpython-312.pyc
ADDED
|
Binary file (9.61 kB). View file
|
|
|
app/fetcher/__pycache__/http_session.cpython-312.pyc
ADDED
|
Binary file (8.26 kB). View file
|
|
|
app/fetcher/__pycache__/pubmed_client.cpython-312.pyc
ADDED
|
Binary file (7.59 kB). View file
|
|
|
app/fetcher/__pycache__/semantic_scholar.cpython-312.pyc
ADDED
|
Binary file (6.79 kB). View file
|
|
|
app/fetcher/arxiv_client.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β arXiv Atom API client.
|
| 3 |
+
|
| 4 |
+
Fetches papers submitted/updated within the last N days for given arXiv
|
| 5 |
+
categories. Uses xml.etree.ElementTree (stdlib) β no lxml needed.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import xml.etree.ElementTree as ET
|
| 12 |
+
from datetime import date, datetime, timedelta
|
| 13 |
+
from typing import List
|
| 14 |
+
|
| 15 |
+
from app.core.config import ARXIV_BASE_URL, ARXIV_MAX_RESULTS
|
| 16 |
+
from app.core.models import Paper
|
| 17 |
+
from app.fetcher.http_session import FetchError, RetrySession
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
# arXiv Atom namespace
|
| 22 |
+
_NS = {
|
| 23 |
+
'atom': 'http://www.w3.org/2005/Atom',
|
| 24 |
+
'arxiv': 'http://arxiv.org/schemas/atom',
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def fetch_papers(
|
| 29 |
+
category_slug: str,
|
| 30 |
+
arxiv_cats: List[str],
|
| 31 |
+
session: RetrySession,
|
| 32 |
+
days_back: int = 7,
|
| 33 |
+
) -> List[Paper]:
|
| 34 |
+
"""
|
| 35 |
+
Fetch papers submitted/updated within *days_back* days across all
|
| 36 |
+
arXiv categories in *arxiv_cats*.
|
| 37 |
+
|
| 38 |
+
Returns a list of Paper instances. Never raises β returns [] on error.
|
| 39 |
+
"""
|
| 40 |
+
today = date.today()
|
| 41 |
+
start = today - timedelta(days=days_back)
|
| 42 |
+
end = today
|
| 43 |
+
|
| 44 |
+
query = '(' + ' OR '.join(f'cat:{c}' for c in arxiv_cats) + ')'
|
| 45 |
+
|
| 46 |
+
params = {
|
| 47 |
+
'search_query': query,
|
| 48 |
+
'start': 0,
|
| 49 |
+
'max_results': ARXIV_MAX_RESULTS,
|
| 50 |
+
'sortBy': 'submittedDate',
|
| 51 |
+
'sortOrder': 'descending',
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
response = session.get(ARXIV_BASE_URL, params=params)
|
| 56 |
+
except FetchError as exc:
|
| 57 |
+
logger.error('arXiv fetch failed for %s: %s', category_slug, exc)
|
| 58 |
+
return []
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
root = ET.fromstring(response.text)
|
| 62 |
+
except ET.ParseError as exc:
|
| 63 |
+
logger.error(
|
| 64 |
+
'arXiv XML parse error: %s β snippet: %s',
|
| 65 |
+
exc, response.text[:300],
|
| 66 |
+
)
|
| 67 |
+
return []
|
| 68 |
+
|
| 69 |
+
papers: List[Paper] = []
|
| 70 |
+
|
| 71 |
+
for entry in root.findall('atom:entry', _NS):
|
| 72 |
+
try:
|
| 73 |
+
paper = _parse_entry(entry, category_slug, start, end)
|
| 74 |
+
if paper is not None:
|
| 75 |
+
papers.append(paper)
|
| 76 |
+
except Exception:
|
| 77 |
+
logger.debug('Skipping malformed arXiv entry', exc_info=True)
|
| 78 |
+
|
| 79 |
+
logger.info(
|
| 80 |
+
'arXiv: fetched %d papers for [%s] (%s)',
|
| 81 |
+
len(papers), category_slug, ', '.join(arxiv_cats),
|
| 82 |
+
)
|
| 83 |
+
return papers
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _parse_entry(
|
| 87 |
+
entry: ET.Element,
|
| 88 |
+
category_slug: str,
|
| 89 |
+
start: date,
|
| 90 |
+
end: date,
|
| 91 |
+
) -> Paper | None:
|
| 92 |
+
"""Parse a single <entry> element into a Paper, or return None."""
|
| 93 |
+
|
| 94 |
+
title_el = entry.find('atom:title', _NS)
|
| 95 |
+
abstract_el = entry.find('atom:summary', _NS)
|
| 96 |
+
if title_el is None or abstract_el is None:
|
| 97 |
+
return None
|
| 98 |
+
|
| 99 |
+
title = ' '.join((title_el.text or '').split())
|
| 100 |
+
abstract = ' '.join((abstract_el.text or '').split())
|
| 101 |
+
if not title or not abstract:
|
| 102 |
+
logger.debug('Skipping entry with empty title/abstract')
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
# arXiv ID
|
| 106 |
+
id_el = entry.find('atom:id', _NS)
|
| 107 |
+
raw_id = (id_el.text or '') if id_el is not None else ''
|
| 108 |
+
arxiv_id = raw_id.replace('http://arxiv.org/abs/', '').strip()
|
| 109 |
+
if not arxiv_id:
|
| 110 |
+
return None
|
| 111 |
+
paper_id = f'arxiv:{arxiv_id}'
|
| 112 |
+
|
| 113 |
+
# Authors
|
| 114 |
+
authors = []
|
| 115 |
+
for author_el in entry.findall('atom:author', _NS):
|
| 116 |
+
name_el = author_el.find('atom:name', _NS)
|
| 117 |
+
if name_el is not None and name_el.text:
|
| 118 |
+
authors.append(name_el.text.strip())
|
| 119 |
+
|
| 120 |
+
# Published date
|
| 121 |
+
pub_el = entry.find('atom:published', _NS)
|
| 122 |
+
pub_text = (pub_el.text or '') if pub_el is not None else ''
|
| 123 |
+
try:
|
| 124 |
+
published = datetime.fromisoformat(
|
| 125 |
+
pub_text.replace('Z', '+00:00')
|
| 126 |
+
).date()
|
| 127 |
+
except (ValueError, TypeError):
|
| 128 |
+
published = date.today()
|
| 129 |
+
|
| 130 |
+
# Categories
|
| 131 |
+
categories = []
|
| 132 |
+
for cat_el in entry.findall('atom:category', _NS):
|
| 133 |
+
term = cat_el.get('term', '')
|
| 134 |
+
if term:
|
| 135 |
+
categories.append(term)
|
| 136 |
+
|
| 137 |
+
# PDF link
|
| 138 |
+
pdf_url = None
|
| 139 |
+
for link_el in entry.findall('atom:link', _NS):
|
| 140 |
+
if link_el.get('title') == 'pdf':
|
| 141 |
+
pdf_url = link_el.get('href')
|
| 142 |
+
break
|
| 143 |
+
if pdf_url is None and arxiv_id:
|
| 144 |
+
pdf_url = f'https://arxiv.org/pdf/{arxiv_id}'
|
| 145 |
+
|
| 146 |
+
abstract_url = f'https://arxiv.org/abs/{arxiv_id}'
|
| 147 |
+
|
| 148 |
+
return Paper(
|
| 149 |
+
paper_id=paper_id,
|
| 150 |
+
source='arxiv',
|
| 151 |
+
title=title,
|
| 152 |
+
abstract=abstract,
|
| 153 |
+
authors=authors,
|
| 154 |
+
published_date=published,
|
| 155 |
+
categories=categories,
|
| 156 |
+
app_category=category_slug,
|
| 157 |
+
pdf_url=pdf_url,
|
| 158 |
+
abstract_url=abstract_url,
|
| 159 |
+
)
|
app/fetcher/crossref_client.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β CrossRef DOI client.
|
| 3 |
+
|
| 4 |
+
DOI resolution & citation metadata fallback.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
from datetime import date
|
| 11 |
+
from typing import Optional
|
| 12 |
+
|
| 13 |
+
from app.core.config import CROSSREF_BASE_URL
|
| 14 |
+
from app.fetcher.http_session import FetchError, RetrySession
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_citation_count(doi: str, session: RetrySession) -> Optional[int]:
|
| 20 |
+
"""
|
| 21 |
+
Retrieve the 'is-referenced-by-count' from CrossRef for a given DOI.
|
| 22 |
+
|
| 23 |
+
Returns None on any error β this is best-effort enrichment.
|
| 24 |
+
"""
|
| 25 |
+
url = f'{CROSSREF_BASE_URL}/{doi}'
|
| 26 |
+
try:
|
| 27 |
+
response = session.get(
|
| 28 |
+
url,
|
| 29 |
+
headers={'Accept': 'application/json'},
|
| 30 |
+
)
|
| 31 |
+
data = response.json()
|
| 32 |
+
message = data.get('message', {})
|
| 33 |
+
return message.get('is-referenced-by-count')
|
| 34 |
+
except (FetchError, ValueError, KeyError) as exc:
|
| 35 |
+
logger.debug('CrossRef lookup failed for DOI %s: %s', doi, exc)
|
| 36 |
+
return None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def resolve_doi(doi: str, session: RetrySession) -> Optional[dict]:
|
| 40 |
+
"""
|
| 41 |
+
Resolve a DOI and return basic metadata dict including title, authors.
|
| 42 |
+
"""
|
| 43 |
+
url = f'{CROSSREF_BASE_URL}/{doi}'
|
| 44 |
+
try:
|
| 45 |
+
response = session.get(
|
| 46 |
+
url,
|
| 47 |
+
headers={'Accept': 'application/json'},
|
| 48 |
+
)
|
| 49 |
+
data = response.json()
|
| 50 |
+
msg = data.get('message', {})
|
| 51 |
+
|
| 52 |
+
title_parts = msg.get('title', [])
|
| 53 |
+
title = title_parts[0] if title_parts else ''
|
| 54 |
+
|
| 55 |
+
authors = []
|
| 56 |
+
for a in msg.get('author', []):
|
| 57 |
+
given = a.get('given', '')
|
| 58 |
+
family = a.get('family', '')
|
| 59 |
+
authors.append(f'{given} {family}'.strip())
|
| 60 |
+
|
| 61 |
+
return {
|
| 62 |
+
'doi': doi,
|
| 63 |
+
'title': title,
|
| 64 |
+
'authors': authors,
|
| 65 |
+
'citation_count': msg.get('is-referenced-by-count', 0),
|
| 66 |
+
}
|
| 67 |
+
except (FetchError, ValueError, KeyError) as exc:
|
| 68 |
+
logger.debug('CrossRef resolve failed for DOI %s: %s', doi, exc)
|
| 69 |
+
return None
|
app/fetcher/fetch_pipeline.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Fetch pipeline orchestration.
|
| 3 |
+
|
| 4 |
+
Contains the main Sunday job logic. Coordinates all API clients,
|
| 5 |
+
handles fallback, deduplication, ranking, storage, and notification.
|
| 6 |
+
|
| 7 |
+
This function must **never raise** β all exceptions are caught and
|
| 8 |
+
logged into ``Digest.fetch_errors``.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
from difflib import SequenceMatcher
|
| 15 |
+
from typing import Dict, List
|
| 16 |
+
|
| 17 |
+
from app.core.config import (
|
| 18 |
+
ARXIV_CATEGORY_MAP,
|
| 19 |
+
KEYWORD_MAP,
|
| 20 |
+
PUBMED_MESH_MAP,
|
| 21 |
+
TOP_N_PER_CATEGORY,
|
| 22 |
+
AI_FILTERS,
|
| 23 |
+
)
|
| 24 |
+
from app.core.models import Digest, Paper, UserProfile
|
| 25 |
+
from app.core import database
|
| 26 |
+
from app.fetcher import arxiv_client, pubmed_client, semantic_scholar
|
| 27 |
+
from app.fetcher.http_session import FetchError, RetrySession
|
| 28 |
+
from app.ranker import composite_ranker
|
| 29 |
+
from app.summarizer.groq_client import GroqSummarizer
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
# Public entry point
|
| 36 |
+
# ---------------------------------------------------------------------------
|
| 37 |
+
|
| 38 |
+
def run_weekly_fetch(
|
| 39 |
+
db_path: str,
|
| 40 |
+
profile: UserProfile | None = None,
|
| 41 |
+
) -> Digest:
|
| 42 |
+
"""
|
| 43 |
+
Main weekly pipeline. Called by the scheduler every Sunday.
|
| 44 |
+
|
| 45 |
+
1. Fetch papers from arXiv (primary) with Semantic Scholar fallback.
|
| 46 |
+
2. For neuro/BCI categories, additionally fetch from PubMed and merge.
|
| 47 |
+
3. Enrich citation counts (best-effort).
|
| 48 |
+
4. Rank papers via composite ranker.
|
| 49 |
+
5. Save digest to DB and send notification.
|
| 50 |
+
6. Return the Digest.
|
| 51 |
+
"""
|
| 52 |
+
if profile is None:
|
| 53 |
+
profile = UserProfile()
|
| 54 |
+
|
| 55 |
+
digest = Digest.create_new()
|
| 56 |
+
session = RetrySession()
|
| 57 |
+
all_papers: Dict[str, List[Paper]] = {}
|
| 58 |
+
|
| 59 |
+
for category, arxiv_cats in ARXIV_CATEGORY_MAP.items():
|
| 60 |
+
papers = _fetch_category(category, arxiv_cats, session, digest)
|
| 61 |
+
|
| 62 |
+
# PubMed supplement for neuroscience & BCI
|
| 63 |
+
if category in PUBMED_MESH_MAP:
|
| 64 |
+
pubmed_papers = _fetch_pubmed(category, session, digest)
|
| 65 |
+
papers = _deduplicate(papers + pubmed_papers)
|
| 66 |
+
|
| 67 |
+
# Enforce AI filter for neuro categories
|
| 68 |
+
# "I want only those papers in neuroscience and BCI which has in someway AI or ML"
|
| 69 |
+
papers = _ai_filter(papers)
|
| 70 |
+
|
| 71 |
+
all_papers[category] = papers
|
| 72 |
+
|
| 73 |
+
# Enrich citation counts (best-effort)
|
| 74 |
+
flat = [p for cat_list in all_papers.values() for p in cat_list]
|
| 75 |
+
try:
|
| 76 |
+
semantic_scholar.enrich_citations(flat, session)
|
| 77 |
+
except Exception as exc:
|
| 78 |
+
logger.warning('Citation enrichment failed: %s', exc)
|
| 79 |
+
digest.fetch_errors.append(f'Citation enrichment: {exc}')
|
| 80 |
+
|
| 81 |
+
# Rank
|
| 82 |
+
digest.total_fetched = sum(len(v) for v in all_papers.values())
|
| 83 |
+
ranked = composite_ranker.rank_all(all_papers, profile)
|
| 84 |
+
|
| 85 |
+
# After ranking, summarize the top papers for the digest
|
| 86 |
+
# (Only summarizes top N results that appear in the final ranked lists)
|
| 87 |
+
_summarize_top_papers(ranked)
|
| 88 |
+
|
| 89 |
+
digest.papers = ranked
|
| 90 |
+
digest.total_ranked = sum(len(v) for v in ranked.values())
|
| 91 |
+
|
| 92 |
+
# Persist
|
| 93 |
+
try:
|
| 94 |
+
database.save_digest(db_path, digest)
|
| 95 |
+
except Exception as exc:
|
| 96 |
+
logger.error('Failed to save digest: %s', exc)
|
| 97 |
+
digest.fetch_errors.append(f'DB save error: {exc}')
|
| 98 |
+
|
| 99 |
+
# Notification (best-effort)
|
| 100 |
+
try:
|
| 101 |
+
from app.core.notifier import send_digest_notification
|
| 102 |
+
send_digest_notification(digest)
|
| 103 |
+
except Exception as exc:
|
| 104 |
+
logger.warning('Notification failed: %s', exc)
|
| 105 |
+
|
| 106 |
+
return digest
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# ---------------------------------------------------------------------------
|
| 110 |
+
# Internal helpers
|
| 111 |
+
# ---------------------------------------------------------------------------
|
| 112 |
+
|
| 113 |
+
def _fetch_category(
|
| 114 |
+
category: str,
|
| 115 |
+
arxiv_cats: list,
|
| 116 |
+
session: RetrySession,
|
| 117 |
+
digest: Digest,
|
| 118 |
+
) -> List[Paper]:
|
| 119 |
+
"""Fetch from arXiv, fall back to Semantic Scholar if empty / error."""
|
| 120 |
+
papers: List[Paper] = []
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
papers = arxiv_client.fetch_papers(category, arxiv_cats, session)
|
| 124 |
+
except Exception as exc:
|
| 125 |
+
msg = f'arXiv error [{category}]: {exc}'
|
| 126 |
+
logger.warning(msg)
|
| 127 |
+
digest.fetch_errors.append(msg)
|
| 128 |
+
|
| 129 |
+
if not papers:
|
| 130 |
+
logger.info('arXiv empty for [%s] β trying Semantic Scholar', category)
|
| 131 |
+
try:
|
| 132 |
+
keywords = KEYWORD_MAP.get(category, [category])
|
| 133 |
+
papers = semantic_scholar.fetch_papers(category, keywords, session)
|
| 134 |
+
except Exception as exc:
|
| 135 |
+
msg = f'Semantic Scholar error [{category}]: {exc}'
|
| 136 |
+
logger.warning(msg)
|
| 137 |
+
digest.fetch_errors.append(msg)
|
| 138 |
+
|
| 139 |
+
if not papers:
|
| 140 |
+
logger.info('No papers found for [%s] from any source', category)
|
| 141 |
+
|
| 142 |
+
return papers
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _fetch_pubmed(
|
| 146 |
+
category: str,
|
| 147 |
+
session: RetrySession,
|
| 148 |
+
digest: Digest,
|
| 149 |
+
) -> List[Paper]:
|
| 150 |
+
"""Fetch supplemental papers from PubMed."""
|
| 151 |
+
mesh = PUBMED_MESH_MAP.get(category, '')
|
| 152 |
+
if not mesh:
|
| 153 |
+
return []
|
| 154 |
+
try:
|
| 155 |
+
return pubmed_client.fetch_papers(category, mesh, session)
|
| 156 |
+
except Exception as exc:
|
| 157 |
+
msg = f'PubMed error [{category}]: {exc}'
|
| 158 |
+
logger.warning(msg)
|
| 159 |
+
digest.fetch_errors.append(msg)
|
| 160 |
+
return []
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _summarize_top_papers(papers_by_cat: Dict[str, List[Paper]]):
|
| 164 |
+
"""Call Groq to summarize papers in the final digest list."""
|
| 165 |
+
summarizer = GroqSummarizer()
|
| 166 |
+
for cat, papers in papers_by_cat.items():
|
| 167 |
+
if papers:
|
| 168 |
+
logger.info("Summarizing %d papers for category [%s]...", len(papers), cat)
|
| 169 |
+
summarizer.summarize_many(papers)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def _ai_filter(papers: List[Paper]) -> List[Paper]:
|
| 173 |
+
"""Filter to only include papers mentioning AI/ML keywords in title or abstract."""
|
| 174 |
+
if not papers:
|
| 175 |
+
return []
|
| 176 |
+
|
| 177 |
+
result = []
|
| 178 |
+
for p in papers:
|
| 179 |
+
text = (p.title + " " + p.abstract).lower()
|
| 180 |
+
if any(f in text for f in AI_FILTERS):
|
| 181 |
+
result.append(p)
|
| 182 |
+
return result
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _deduplicate(papers: List[Paper]) -> List[Paper]:
|
| 186 |
+
"""
|
| 187 |
+
Remove duplicate papers.
|
| 188 |
+
|
| 189 |
+
Two papers are considered duplicates if:
|
| 190 |
+
- Their paper_id matches, OR
|
| 191 |
+
- Their title similarity (SequenceMatcher ratio) > 0.92
|
| 192 |
+
|
| 193 |
+
When merging, prefer arXiv > Semantic Scholar > PubMed.
|
| 194 |
+
"""
|
| 195 |
+
SOURCE_PRIORITY = {'arxiv': 0, 'semantic_scholar': 1, 'pubmed': 2}
|
| 196 |
+
seen_ids: set = set()
|
| 197 |
+
seen_titles: List[str] = []
|
| 198 |
+
result: List[Paper] = []
|
| 199 |
+
|
| 200 |
+
# Sort by source priority so preferred sources come first
|
| 201 |
+
papers.sort(key=lambda p: SOURCE_PRIORITY.get(p.source, 9))
|
| 202 |
+
|
| 203 |
+
for paper in papers:
|
| 204 |
+
if paper.paper_id in seen_ids:
|
| 205 |
+
continue
|
| 206 |
+
|
| 207 |
+
is_dup = False
|
| 208 |
+
for existing_title in seen_titles:
|
| 209 |
+
if SequenceMatcher(None, paper.title.lower(), existing_title).ratio() > 0.92:
|
| 210 |
+
is_dup = True
|
| 211 |
+
break
|
| 212 |
+
|
| 213 |
+
if is_dup:
|
| 214 |
+
continue
|
| 215 |
+
|
| 216 |
+
seen_ids.add(paper.paper_id)
|
| 217 |
+
seen_titles.append(paper.title.lower())
|
| 218 |
+
result.append(paper)
|
| 219 |
+
|
| 220 |
+
if len(papers) != len(result):
|
| 221 |
+
logger.info(
|
| 222 |
+
'Deduplication: %d β %d papers', len(papers), len(result),
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
return result
|
app/fetcher/http_session.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β RetrySession.
|
| 3 |
+
|
| 4 |
+
Single point of contact for all outbound HTTP.
|
| 5 |
+
No other module calls `requests` directly.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import random
|
| 12 |
+
import time
|
| 13 |
+
from typing import Optional, Set
|
| 14 |
+
|
| 15 |
+
import requests
|
| 16 |
+
|
| 17 |
+
from app.core.config import (
|
| 18 |
+
HTTP_BACKOFF_BASE,
|
| 19 |
+
HTTP_BACKOFF_MAX,
|
| 20 |
+
HTTP_MAX_RETRIES,
|
| 21 |
+
HTTP_TIMEOUT,
|
| 22 |
+
RETRY_STATUS_CODES,
|
| 23 |
+
USER_AGENT,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
# ---------------------------------------------------------------------------
|
| 29 |
+
# Custom Exceptions
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
class FetchError(Exception):
|
| 33 |
+
"""Base exception for all fetch-related errors."""
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class FetchTimeoutError(FetchError):
|
| 38 |
+
"""Raised when a request times out."""
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class FetchNetworkError(FetchError):
|
| 43 |
+
"""Raised on connection / DNS errors."""
|
| 44 |
+
pass
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class SourceNotFoundError(FetchError):
|
| 48 |
+
"""Raised on HTTP 404."""
|
| 49 |
+
pass
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class SourceAuthError(FetchError):
|
| 53 |
+
"""Raised on HTTP 401 / 403."""
|
| 54 |
+
pass
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class MaxRetriesExceeded(FetchError):
|
| 58 |
+
"""Raised when all retry attempts are exhausted."""
|
| 59 |
+
pass
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# ---------------------------------------------------------------------------
|
| 63 |
+
# RetrySession
|
| 64 |
+
# ---------------------------------------------------------------------------
|
| 65 |
+
|
| 66 |
+
class RetrySession:
|
| 67 |
+
"""HTTP GET wrapper with exponential back-off, retries, and error mapping."""
|
| 68 |
+
|
| 69 |
+
def __init__(
|
| 70 |
+
self,
|
| 71 |
+
max_retries: int = HTTP_MAX_RETRIES,
|
| 72 |
+
backoff_base: int = HTTP_BACKOFF_BASE,
|
| 73 |
+
backoff_max: int = HTTP_BACKOFF_MAX,
|
| 74 |
+
timeout: int = HTTP_TIMEOUT,
|
| 75 |
+
retry_status_codes: Optional[Set[int]] = None,
|
| 76 |
+
):
|
| 77 |
+
self.max_retries = max_retries
|
| 78 |
+
self.backoff_base = backoff_base
|
| 79 |
+
self.backoff_max = backoff_max
|
| 80 |
+
self.timeout = timeout
|
| 81 |
+
self.retry_status_codes = retry_status_codes or RETRY_STATUS_CODES
|
| 82 |
+
self._session = requests.Session()
|
| 83 |
+
self._session.headers.update({'User-Agent': USER_AGENT})
|
| 84 |
+
|
| 85 |
+
# ------------------------------------------------------------------
|
| 86 |
+
def get(
|
| 87 |
+
self,
|
| 88 |
+
url: str,
|
| 89 |
+
params: Optional[dict] = None,
|
| 90 |
+
headers: Optional[dict] = None,
|
| 91 |
+
) -> requests.Response:
|
| 92 |
+
"""
|
| 93 |
+
GET *url* with automatic retries and exponential back-off.
|
| 94 |
+
|
| 95 |
+
Returns a `requests.Response` with status 200 on success.
|
| 96 |
+
Raises a typed `FetchError` subclass on failure.
|
| 97 |
+
"""
|
| 98 |
+
merged_headers = dict(self._session.headers)
|
| 99 |
+
if headers:
|
| 100 |
+
merged_headers.update(headers)
|
| 101 |
+
|
| 102 |
+
last_exc: Optional[Exception] = None
|
| 103 |
+
|
| 104 |
+
for attempt in range(self.max_retries + 1):
|
| 105 |
+
try:
|
| 106 |
+
resp = self._session.get(
|
| 107 |
+
url,
|
| 108 |
+
params=params,
|
| 109 |
+
headers=merged_headers,
|
| 110 |
+
timeout=self.timeout,
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
if resp.status_code == 200:
|
| 114 |
+
return resp
|
| 115 |
+
|
| 116 |
+
if resp.status_code in self.retry_status_codes:
|
| 117 |
+
wait = min(
|
| 118 |
+
self.backoff_base ** attempt + random.uniform(0, 1),
|
| 119 |
+
self.backoff_max,
|
| 120 |
+
)
|
| 121 |
+
logger.warning(
|
| 122 |
+
'HTTP %d from %s β retrying in %.1fs (attempt %d/%d)',
|
| 123 |
+
resp.status_code, url, wait, attempt + 1, self.max_retries,
|
| 124 |
+
)
|
| 125 |
+
time.sleep(wait)
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
if resp.status_code == 404:
|
| 129 |
+
raise SourceNotFoundError(f'404 Not Found: {url}')
|
| 130 |
+
|
| 131 |
+
if resp.status_code in {400, 401, 403}:
|
| 132 |
+
raise SourceAuthError(
|
| 133 |
+
f'HTTP {resp.status_code} from {url}'
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Other 4xx / unexpected codes
|
| 137 |
+
raise FetchError(
|
| 138 |
+
f'HTTP {resp.status_code} from {url}: '
|
| 139 |
+
f'{resp.text[:200]}'
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
except requests.exceptions.Timeout as exc:
|
| 143 |
+
raise FetchTimeoutError(f'Timeout on {url}') from exc
|
| 144 |
+
|
| 145 |
+
except requests.exceptions.ConnectionError as exc:
|
| 146 |
+
raise FetchNetworkError(f'Connection error on {url}') from exc
|
| 147 |
+
|
| 148 |
+
except requests.exceptions.RequestException as exc:
|
| 149 |
+
raise FetchError(f'Request error on {url}: {exc}') from exc
|
| 150 |
+
|
| 151 |
+
except FetchError:
|
| 152 |
+
raise # re-raise our own typed exceptions
|
| 153 |
+
|
| 154 |
+
raise MaxRetriesExceeded(
|
| 155 |
+
f'All {self.max_retries} retries exhausted for {url}'
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# ------------------------------------------------------------------
|
| 159 |
+
def post(
|
| 160 |
+
self,
|
| 161 |
+
url: str,
|
| 162 |
+
json: Optional[dict] = None,
|
| 163 |
+
headers: Optional[dict] = None,
|
| 164 |
+
) -> requests.Response:
|
| 165 |
+
"""POST with the same retry / error logic as GET."""
|
| 166 |
+
merged_headers = dict(self._session.headers)
|
| 167 |
+
if headers:
|
| 168 |
+
merged_headers.update(headers)
|
| 169 |
+
|
| 170 |
+
last_exc: Optional[Exception] = None
|
| 171 |
+
|
| 172 |
+
for attempt in range(self.max_retries + 1):
|
| 173 |
+
try:
|
| 174 |
+
resp = self._session.post(
|
| 175 |
+
url,
|
| 176 |
+
json=json,
|
| 177 |
+
headers=merged_headers,
|
| 178 |
+
timeout=self.timeout,
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
if resp.status_code == 200:
|
| 182 |
+
return resp
|
| 183 |
+
|
| 184 |
+
if resp.status_code in self.retry_status_codes:
|
| 185 |
+
wait = min(
|
| 186 |
+
self.backoff_base ** attempt + random.uniform(0, 1),
|
| 187 |
+
self.backoff_max,
|
| 188 |
+
)
|
| 189 |
+
logger.warning(
|
| 190 |
+
'POST %d from %s β retrying in %.1fs (attempt %d/%d)',
|
| 191 |
+
resp.status_code, url, wait, attempt + 1, self.max_retries,
|
| 192 |
+
)
|
| 193 |
+
time.sleep(wait)
|
| 194 |
+
continue
|
| 195 |
+
|
| 196 |
+
if resp.status_code == 404:
|
| 197 |
+
raise SourceNotFoundError(f'404 Not Found: {url}')
|
| 198 |
+
|
| 199 |
+
if resp.status_code in {400, 401, 403}:
|
| 200 |
+
raise SourceAuthError(
|
| 201 |
+
f'HTTP {resp.status_code} from {url}'
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
raise FetchError(
|
| 205 |
+
f'HTTP {resp.status_code} from {url}: '
|
| 206 |
+
f'{resp.text[:200]}'
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
except requests.exceptions.Timeout as exc:
|
| 210 |
+
raise FetchTimeoutError(f'Timeout on {url}') from exc
|
| 211 |
+
|
| 212 |
+
except requests.exceptions.ConnectionError as exc:
|
| 213 |
+
raise FetchNetworkError(f'Connection error on {url}') from exc
|
| 214 |
+
|
| 215 |
+
except requests.exceptions.RequestException as exc:
|
| 216 |
+
raise FetchError(f'Request error on {url}: {exc}') from exc
|
| 217 |
+
|
| 218 |
+
except FetchError:
|
| 219 |
+
raise
|
| 220 |
+
|
| 221 |
+
raise MaxRetriesExceeded(
|
| 222 |
+
f'All {self.max_retries} retries exhausted for POST {url}'
|
| 223 |
+
)
|
app/fetcher/pubmed_client.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β PubMed E-utilities client.
|
| 3 |
+
|
| 4 |
+
Supplemental source for Neuroscience and BCI categories only.
|
| 5 |
+
Two-step process: ESearch to get IDs, then EFetch to get abstracts.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import xml.etree.ElementTree as ET
|
| 12 |
+
from datetime import date, datetime
|
| 13 |
+
from typing import List, Optional
|
| 14 |
+
|
| 15 |
+
from app.core.config import NCBI_API_KEY, PUBMED_BASE_URL
|
| 16 |
+
from app.core.models import Paper
|
| 17 |
+
from app.fetcher.http_session import FetchError, RetrySession
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def fetch_papers(
|
| 23 |
+
category_slug: str,
|
| 24 |
+
mesh_terms: str,
|
| 25 |
+
session: RetrySession,
|
| 26 |
+
days_back: int = 7,
|
| 27 |
+
) -> List[Paper]:
|
| 28 |
+
"""
|
| 29 |
+
Fetch recent papers from PubMed matching *mesh_terms*.
|
| 30 |
+
|
| 31 |
+
Returns a list of Paper instances. Never raises β returns [] on error.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# ---------------------------------------------------------------
|
| 35 |
+
# Step 1 β ESearch
|
| 36 |
+
# ---------------------------------------------------------------
|
| 37 |
+
esearch_params: dict = {
|
| 38 |
+
'db': 'pubmed',
|
| 39 |
+
'term': f'{mesh_terms} AND ("last {days_back} days"[PDat])',
|
| 40 |
+
'retmax': 50,
|
| 41 |
+
'retmode': 'json',
|
| 42 |
+
'usehistory': 'y',
|
| 43 |
+
}
|
| 44 |
+
if NCBI_API_KEY:
|
| 45 |
+
esearch_params['api_key'] = NCBI_API_KEY
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
esearch_resp = session.get(
|
| 49 |
+
f'{PUBMED_BASE_URL}/esearch.fcgi', params=esearch_params
|
| 50 |
+
)
|
| 51 |
+
except FetchError as exc:
|
| 52 |
+
logger.error('PubMed ESearch failed for %s: %s', category_slug, exc)
|
| 53 |
+
return []
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
esearch_data = esearch_resp.json()
|
| 57 |
+
except ValueError:
|
| 58 |
+
logger.error('PubMed ESearch returned invalid JSON')
|
| 59 |
+
return []
|
| 60 |
+
|
| 61 |
+
result = esearch_data.get('esearchresult', {})
|
| 62 |
+
count = int(result.get('count', 0))
|
| 63 |
+
if count == 0:
|
| 64 |
+
logger.info('PubMed: 0 results for %s', category_slug)
|
| 65 |
+
return []
|
| 66 |
+
|
| 67 |
+
web_env = result.get('webenv', '')
|
| 68 |
+
query_key = result.get('querykey', '')
|
| 69 |
+
if not web_env or not query_key:
|
| 70 |
+
logger.error('PubMed ESearch missing WebEnv / query_key')
|
| 71 |
+
return []
|
| 72 |
+
|
| 73 |
+
# ---------------------------------------------------------------
|
| 74 |
+
# Step 2 β EFetch
|
| 75 |
+
# ---------------------------------------------------------------
|
| 76 |
+
efetch_params: dict = {
|
| 77 |
+
'db': 'pubmed',
|
| 78 |
+
'WebEnv': web_env,
|
| 79 |
+
'query_key': query_key,
|
| 80 |
+
'retmax': 50,
|
| 81 |
+
'retmode': 'xml',
|
| 82 |
+
'rettype': 'abstract',
|
| 83 |
+
}
|
| 84 |
+
if NCBI_API_KEY:
|
| 85 |
+
efetch_params['api_key'] = NCBI_API_KEY
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
efetch_resp = session.get(
|
| 89 |
+
f'{PUBMED_BASE_URL}/efetch.fcgi', params=efetch_params
|
| 90 |
+
)
|
| 91 |
+
except FetchError as exc:
|
| 92 |
+
logger.error('PubMed EFetch failed for %s: %s', category_slug, exc)
|
| 93 |
+
return []
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
root = ET.fromstring(efetch_resp.text)
|
| 97 |
+
except ET.ParseError as exc:
|
| 98 |
+
logger.error('PubMed XML parse error: %s', exc)
|
| 99 |
+
return []
|
| 100 |
+
|
| 101 |
+
papers: List[Paper] = []
|
| 102 |
+
for article_el in root.findall('.//PubmedArticle'):
|
| 103 |
+
try:
|
| 104 |
+
paper = _parse_article(article_el, category_slug)
|
| 105 |
+
if paper is not None:
|
| 106 |
+
papers.append(paper)
|
| 107 |
+
except Exception:
|
| 108 |
+
logger.debug('Skipping malformed PubMed article', exc_info=True)
|
| 109 |
+
|
| 110 |
+
logger.info('PubMed: fetched %d papers for [%s]', len(papers), category_slug)
|
| 111 |
+
return papers
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# ---------------------------------------------------------------------------
|
| 115 |
+
# XML parsing helpers
|
| 116 |
+
# ---------------------------------------------------------------------------
|
| 117 |
+
|
| 118 |
+
def _parse_article(el: ET.Element, category_slug: str) -> Optional[Paper]:
|
| 119 |
+
"""Parse a single <PubmedArticle> element."""
|
| 120 |
+
|
| 121 |
+
# PMID
|
| 122 |
+
pmid_el = el.find('.//PMID')
|
| 123 |
+
if pmid_el is None or not pmid_el.text:
|
| 124 |
+
return None
|
| 125 |
+
pmid = pmid_el.text.strip()
|
| 126 |
+
paper_id = f'pubmed:{pmid}'
|
| 127 |
+
|
| 128 |
+
# Title
|
| 129 |
+
title_el = el.find('.//ArticleTitle')
|
| 130 |
+
title = (title_el.text or '').strip() if title_el is not None else ''
|
| 131 |
+
if not title:
|
| 132 |
+
return None
|
| 133 |
+
|
| 134 |
+
# Abstract β may be structured (Background, Methods, etc.)
|
| 135 |
+
abstract_parts: List[str] = []
|
| 136 |
+
for abs_el in el.findall('.//AbstractText'):
|
| 137 |
+
label = abs_el.get('Label', '')
|
| 138 |
+
text = (abs_el.text or '').strip()
|
| 139 |
+
if label and text:
|
| 140 |
+
abstract_parts.append(f'{label}: {text}')
|
| 141 |
+
elif text:
|
| 142 |
+
abstract_parts.append(text)
|
| 143 |
+
abstract = '\n'.join(abstract_parts)
|
| 144 |
+
if not abstract:
|
| 145 |
+
return None
|
| 146 |
+
|
| 147 |
+
# Authors
|
| 148 |
+
authors: List[str] = []
|
| 149 |
+
for author_el in el.findall('.//Author'):
|
| 150 |
+
last = author_el.findtext('LastName', '').strip()
|
| 151 |
+
fore = author_el.findtext('ForeName', '').strip()
|
| 152 |
+
if last:
|
| 153 |
+
name = f'{fore} {last}'.strip()
|
| 154 |
+
authors.append(name)
|
| 155 |
+
|
| 156 |
+
# Publication date (best-effort)
|
| 157 |
+
pub_date = _parse_pub_date(el)
|
| 158 |
+
|
| 159 |
+
abstract_url = f'https://pubmed.ncbi.nlm.nih.gov/{pmid}/'
|
| 160 |
+
|
| 161 |
+
return Paper(
|
| 162 |
+
paper_id=paper_id,
|
| 163 |
+
source='pubmed',
|
| 164 |
+
title=title,
|
| 165 |
+
abstract=abstract,
|
| 166 |
+
authors=authors,
|
| 167 |
+
published_date=pub_date,
|
| 168 |
+
categories=[],
|
| 169 |
+
app_category=category_slug,
|
| 170 |
+
pdf_url=None,
|
| 171 |
+
abstract_url=abstract_url,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def _parse_pub_date(el: ET.Element) -> date:
|
| 176 |
+
"""Best-effort parse of PubMed date (Year, Month, Day may be partial)."""
|
| 177 |
+
pub_date_el = el.find('.//PubDate')
|
| 178 |
+
if pub_date_el is None:
|
| 179 |
+
return date.today()
|
| 180 |
+
|
| 181 |
+
year_text = pub_date_el.findtext('Year', '')
|
| 182 |
+
month_text = pub_date_el.findtext('Month', '')
|
| 183 |
+
day_text = pub_date_el.findtext('Day', '')
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
year = int(year_text)
|
| 187 |
+
except (ValueError, TypeError):
|
| 188 |
+
return date.today()
|
| 189 |
+
|
| 190 |
+
# Month may be numeric or abbreviated text
|
| 191 |
+
month = 1
|
| 192 |
+
if month_text:
|
| 193 |
+
try:
|
| 194 |
+
month = int(month_text)
|
| 195 |
+
except ValueError:
|
| 196 |
+
_MONTH_ABBREV = {
|
| 197 |
+
'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4,
|
| 198 |
+
'may': 5, 'jun': 6, 'jul': 7, 'aug': 8,
|
| 199 |
+
'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12,
|
| 200 |
+
}
|
| 201 |
+
month = _MONTH_ABBREV.get(month_text.lower()[:3], 1)
|
| 202 |
+
|
| 203 |
+
day = 1
|
| 204 |
+
if day_text:
|
| 205 |
+
try:
|
| 206 |
+
day = int(day_text)
|
| 207 |
+
except ValueError:
|
| 208 |
+
pass
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
return date(year, month, day)
|
| 212 |
+
except ValueError:
|
| 213 |
+
return date(year, 1, 1)
|
app/fetcher/semantic_scholar.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Semantic Scholar REST client.
|
| 3 |
+
|
| 4 |
+
Used as a fallback fetch source and to enrich citation counts for arXiv papers.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
from datetime import date, datetime, timedelta
|
| 11 |
+
from typing import List, Optional
|
| 12 |
+
|
| 13 |
+
from app.core.config import SEMSCHOLAR_BASE_URL, SEMANTIC_SCHOLAR_API_KEY
|
| 14 |
+
from app.core.models import Paper
|
| 15 |
+
from app.fetcher.http_session import FetchError, RetrySession
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def fetch_papers(
|
| 21 |
+
category_slug: str,
|
| 22 |
+
keywords: List[str],
|
| 23 |
+
session: RetrySession,
|
| 24 |
+
days_back: int = 7,
|
| 25 |
+
) -> List[Paper]:
|
| 26 |
+
"""
|
| 27 |
+
Search Semantic Scholar for recent papers matching *keywords*.
|
| 28 |
+
|
| 29 |
+
Returns a list of Paper instances. Never raises β returns [] on error.
|
| 30 |
+
"""
|
| 31 |
+
query_text = ' OR '.join(keywords)
|
| 32 |
+
url = f'{SEMSCHOLAR_BASE_URL}/paper/search'
|
| 33 |
+
params = {
|
| 34 |
+
'query': query_text,
|
| 35 |
+
'fields': (
|
| 36 |
+
'paperId,title,abstract,authors,year,citationCount,'
|
| 37 |
+
'externalIds,publicationDate,openAccessPdf'
|
| 38 |
+
),
|
| 39 |
+
'publicationTypes': 'JournalArticle,Conference',
|
| 40 |
+
'limit': 50,
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
headers = {}
|
| 44 |
+
if SEMANTIC_SCHOLAR_API_KEY:
|
| 45 |
+
headers['x-api-key'] = SEMANTIC_SCHOLAR_API_KEY
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
response = session.get(url, params=params, headers=headers)
|
| 49 |
+
except FetchError as exc:
|
| 50 |
+
logger.error('Semantic Scholar fetch failed for %s: %s', category_slug, exc)
|
| 51 |
+
return []
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
data = response.json()
|
| 55 |
+
except ValueError:
|
| 56 |
+
logger.error('Semantic Scholar returned invalid JSON')
|
| 57 |
+
return []
|
| 58 |
+
|
| 59 |
+
cutoff = date.today() - timedelta(days=days_back)
|
| 60 |
+
papers: List[Paper] = []
|
| 61 |
+
|
| 62 |
+
for item in data.get('data', []):
|
| 63 |
+
try:
|
| 64 |
+
paper = _parse_item(item, category_slug, cutoff)
|
| 65 |
+
if paper is not None:
|
| 66 |
+
papers.append(paper)
|
| 67 |
+
except Exception:
|
| 68 |
+
logger.debug('Skipping malformed Semantic Scholar item', exc_info=True)
|
| 69 |
+
|
| 70 |
+
logger.info(
|
| 71 |
+
'Semantic Scholar: fetched %d papers for [%s]',
|
| 72 |
+
len(papers), category_slug,
|
| 73 |
+
)
|
| 74 |
+
return papers
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _parse_item(item: dict, category_slug: str, cutoff: date) -> Optional[Paper]:
|
| 78 |
+
"""Parse a single S2 search result into a Paper, or None."""
|
| 79 |
+
pub_date_str = item.get('publicationDate', '')
|
| 80 |
+
if not pub_date_str:
|
| 81 |
+
return None
|
| 82 |
+
try:
|
| 83 |
+
pub_date = date.fromisoformat(pub_date_str)
|
| 84 |
+
except ValueError:
|
| 85 |
+
return None
|
| 86 |
+
|
| 87 |
+
if pub_date < cutoff:
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
title = (item.get('title') or '').strip()
|
| 91 |
+
abstract = (item.get('abstract') or '').strip()
|
| 92 |
+
if not title or not abstract:
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
s2_id = item.get('paperId', '')
|
| 96 |
+
ext_ids = item.get('externalIds', {}) or {}
|
| 97 |
+
arxiv_id = ext_ids.get('ArXiv', '')
|
| 98 |
+
|
| 99 |
+
paper_id = f'arxiv:{arxiv_id}' if arxiv_id else f's2:{s2_id}'
|
| 100 |
+
|
| 101 |
+
authors = [
|
| 102 |
+
a.get('name', '') for a in (item.get('authors') or [])
|
| 103 |
+
if a.get('name')
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
pdf_info = item.get('openAccessPdf') or {}
|
| 107 |
+
pdf_url = pdf_info.get('url')
|
| 108 |
+
|
| 109 |
+
abstract_url = f'https://www.semanticscholar.org/paper/{s2_id}'
|
| 110 |
+
|
| 111 |
+
return Paper(
|
| 112 |
+
paper_id=paper_id,
|
| 113 |
+
source='semantic_scholar',
|
| 114 |
+
title=title,
|
| 115 |
+
abstract=abstract,
|
| 116 |
+
authors=authors,
|
| 117 |
+
published_date=pub_date,
|
| 118 |
+
categories=[],
|
| 119 |
+
app_category=category_slug,
|
| 120 |
+
pdf_url=pdf_url,
|
| 121 |
+
abstract_url=abstract_url,
|
| 122 |
+
citation_count=item.get('citationCount', 0) or 0,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# ---------------------------------------------------------------------------
|
| 127 |
+
# Citation enrichment
|
| 128 |
+
# ---------------------------------------------------------------------------
|
| 129 |
+
|
| 130 |
+
def enrich_citations(papers: List[Paper], session: RetrySession) -> List[Paper]:
|
| 131 |
+
"""
|
| 132 |
+
Batch-enrich citation counts from Semantic Scholar.
|
| 133 |
+
|
| 134 |
+
This is best-effort: on failure the papers are returned unchanged.
|
| 135 |
+
"""
|
| 136 |
+
if not papers:
|
| 137 |
+
return papers
|
| 138 |
+
|
| 139 |
+
# Build lookup of arXiv IDs (strip prefix)
|
| 140 |
+
ids = []
|
| 141 |
+
for p in papers:
|
| 142 |
+
if p.paper_id.startswith('arxiv:'):
|
| 143 |
+
ids.append(f'ArXiv:{p.paper_id[6:]}')
|
| 144 |
+
elif p.paper_id.startswith('s2:'):
|
| 145 |
+
ids.append(p.paper_id[3:])
|
| 146 |
+
|
| 147 |
+
if not ids:
|
| 148 |
+
return papers
|
| 149 |
+
|
| 150 |
+
url = f'{SEMSCHOLAR_BASE_URL}/paper/batch'
|
| 151 |
+
headers = {}
|
| 152 |
+
if SEMANTIC_SCHOLAR_API_KEY:
|
| 153 |
+
headers['x-api-key'] = SEMANTIC_SCHOLAR_API_KEY
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
response = session.post(
|
| 157 |
+
url,
|
| 158 |
+
json={'ids': ids},
|
| 159 |
+
headers=headers,
|
| 160 |
+
)
|
| 161 |
+
results = response.json()
|
| 162 |
+
except (FetchError, ValueError) as exc:
|
| 163 |
+
logger.warning('Citation enrichment failed (best-effort): %s', exc)
|
| 164 |
+
return papers
|
| 165 |
+
|
| 166 |
+
# Map S2 results back to papers
|
| 167 |
+
result_map: dict = {}
|
| 168 |
+
for item in results:
|
| 169 |
+
if item and 'paperId' in item:
|
| 170 |
+
ext = item.get('externalIds', {}) or {}
|
| 171 |
+
arxiv = ext.get('ArXiv')
|
| 172 |
+
if arxiv:
|
| 173 |
+
result_map[f'arxiv:{arxiv}'] = item.get('citationCount', 0) or 0
|
| 174 |
+
result_map[f's2:{item["paperId"]}'] = item.get('citationCount', 0) or 0
|
| 175 |
+
|
| 176 |
+
for paper in papers:
|
| 177 |
+
if paper.paper_id in result_map:
|
| 178 |
+
paper.citation_count = result_map[paper.paper_id]
|
| 179 |
+
|
| 180 |
+
logger.info('Enriched citations for %d papers', len(papers))
|
| 181 |
+
return papers
|
app/ranker/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Paper scoring & ranking
|
app/ranker/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (146 Bytes). View file
|
|
|
app/ranker/__pycache__/citation_scorer.cpython-312.pyc
ADDED
|
Binary file (2.11 kB). View file
|
|
|
app/ranker/__pycache__/composite_ranker.cpython-312.pyc
ADDED
|
Binary file (3.18 kB). View file
|
|
|
app/ranker/__pycache__/tfidf_ranker.cpython-312.pyc
ADDED
|
Binary file (9.25 kB). View file
|
|
|
app/ranker/citation_scorer.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Citation velocity scorer.
|
| 3 |
+
|
| 4 |
+
Normalises raw citation counts into a [0.0, 1.0] score and applies a
|
| 5 |
+
recency bonus for very fresh papers.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from datetime import date, timedelta
|
| 11 |
+
from typing import List
|
| 12 |
+
|
| 13 |
+
from app.core.config import CITATION_NORM, RECENCY_BONUS
|
| 14 |
+
from app.core.models import Paper
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def score(paper: Paper) -> float:
|
| 18 |
+
"""
|
| 19 |
+
Return a citation score in [0.0, 1.0].
|
| 20 |
+
|
| 21 |
+
- ``citation_score = min(citation_count / CITATION_NORM, 1.0)``
|
| 22 |
+
- Papers published < 3 days ago get a recency bonus.
|
| 23 |
+
"""
|
| 24 |
+
citation_score = min(paper.citation_count / max(CITATION_NORM, 1), 1.0)
|
| 25 |
+
|
| 26 |
+
days_old = (date.today() - paper.published_date).days
|
| 27 |
+
if days_old < 3:
|
| 28 |
+
citation_score = min(citation_score + RECENCY_BONUS, 1.0)
|
| 29 |
+
|
| 30 |
+
return citation_score
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def score_many(papers: List[Paper]) -> List[Paper]:
|
| 34 |
+
"""Set ``citation_score`` on each paper and return the list (in-place)."""
|
| 35 |
+
for p in papers:
|
| 36 |
+
# Store on the Paper via the composite score pipeline; we use
|
| 37 |
+
# a transient attribute. The composite ranker reads this.
|
| 38 |
+
p._citation_score = score(p) # type: ignore[attr-defined]
|
| 39 |
+
return papers
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def recency_score(paper: Paper) -> float:
|
| 43 |
+
"""
|
| 44 |
+
Return a recency score in [0.0, 1.0].
|
| 45 |
+
|
| 46 |
+
1.0 = published today, 0.0 = published β₯ 7 days ago.
|
| 47 |
+
"""
|
| 48 |
+
days_old = max((date.today() - paper.published_date).days, 0)
|
| 49 |
+
return max(1.0 - days_old / 7.0, 0.0)
|
app/ranker/composite_ranker.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Composite ranker.
|
| 3 |
+
|
| 4 |
+
Combines relevance, citation, and recency scores with user-configurable
|
| 5 |
+
weights to produce a final ``composite_score`` for each paper.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
from typing import Dict, List
|
| 12 |
+
|
| 13 |
+
from app.core.config import (
|
| 14 |
+
TOP_N_PER_CATEGORY,
|
| 15 |
+
WEIGHT_CITATION,
|
| 16 |
+
WEIGHT_RECENCY,
|
| 17 |
+
WEIGHT_RELEVANCE,
|
| 18 |
+
)
|
| 19 |
+
from app.core.models import Paper, UserProfile
|
| 20 |
+
from app.ranker import citation_scorer
|
| 21 |
+
from app.ranker.tfidf_ranker import TfidfRanker
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def rank_all(
|
| 27 |
+
papers_by_category: Dict[str, List[Paper]],
|
| 28 |
+
profile: UserProfile,
|
| 29 |
+
cache_dir: str = '',
|
| 30 |
+
) -> Dict[str, List[Paper]]:
|
| 31 |
+
"""
|
| 32 |
+
Score and sort papers per category.
|
| 33 |
+
|
| 34 |
+
Returns a dict ``{category: [Paper, ...]}`` with each list sorted by
|
| 35 |
+
``composite_score`` descending and sliced to ``top_n``.
|
| 36 |
+
"""
|
| 37 |
+
w_rel = profile.weight_relevance
|
| 38 |
+
w_cit = profile.weight_citation
|
| 39 |
+
w_rec = profile.weight_recency
|
| 40 |
+
top_n = profile.top_n_per_category
|
| 41 |
+
|
| 42 |
+
# Validate weights
|
| 43 |
+
total = w_rel + w_cit + w_rec
|
| 44 |
+
if abs(total - 1.0) > 0.01:
|
| 45 |
+
logger.warning(
|
| 46 |
+
'Ranking weights sum to %.2f (expected 1.0) β normalising', total
|
| 47 |
+
)
|
| 48 |
+
w_rel /= total
|
| 49 |
+
w_cit /= total
|
| 50 |
+
w_rec /= total
|
| 51 |
+
|
| 52 |
+
# Build TF-IDF ranker
|
| 53 |
+
ranker = TfidfRanker(cache_dir=cache_dir)
|
| 54 |
+
if not ranker.load_cache():
|
| 55 |
+
ranker.fit_profile(profile.interests)
|
| 56 |
+
|
| 57 |
+
ranked: Dict[str, List[Paper]] = {}
|
| 58 |
+
|
| 59 |
+
for category, papers in papers_by_category.items():
|
| 60 |
+
if not papers:
|
| 61 |
+
ranked[category] = []
|
| 62 |
+
continue
|
| 63 |
+
|
| 64 |
+
# Relevance scores
|
| 65 |
+
ranker.score_many(papers)
|
| 66 |
+
|
| 67 |
+
# Citation + recency scores
|
| 68 |
+
for paper in papers:
|
| 69 |
+
cit_score = citation_scorer.score(paper)
|
| 70 |
+
rec_score = citation_scorer.recency_score(paper)
|
| 71 |
+
|
| 72 |
+
paper.composite_score = (
|
| 73 |
+
w_rel * paper.relevance_score
|
| 74 |
+
+ w_cit * cit_score
|
| 75 |
+
+ w_rec * rec_score
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Sort and slice
|
| 79 |
+
papers.sort(key=lambda p: p.composite_score, reverse=True)
|
| 80 |
+
ranked[category] = papers[:top_n]
|
| 81 |
+
|
| 82 |
+
logger.info(
|
| 83 |
+
'Ranked [%s]: %d β top %d (best=%.3f)',
|
| 84 |
+
category, len(papers), min(top_n, len(papers)),
|
| 85 |
+
papers[0].composite_score if papers else 0.0,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
return ranked
|
app/ranker/tfidf_ranker.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β TF-IDF relevance scorer.
|
| 3 |
+
|
| 4 |
+
Computes cosine similarity between paper text and the user interest profile.
|
| 5 |
+
Falls back to a hand-written bag-of-words implementation if scikit-learn
|
| 6 |
+
is not available (mobile build edge case).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
import pickle
|
| 14 |
+
from typing import Dict, List, Optional
|
| 15 |
+
|
| 16 |
+
from app.core.models import Paper
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
# Try scikit-learn; fall back to pure-Python BoW
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
try:
|
| 24 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 25 |
+
from sklearn.metrics.pairwise import cosine_similarity as _cosine
|
| 26 |
+
|
| 27 |
+
_HAS_SKLEARN = True
|
| 28 |
+
except ImportError:
|
| 29 |
+
_HAS_SKLEARN = False
|
| 30 |
+
logger.warning('scikit-learn not available β using fallback BoW scorer')
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TfidfRanker:
|
| 34 |
+
"""Score papers against a user interest profile using TF-IDF cosine similarity."""
|
| 35 |
+
|
| 36 |
+
def __init__(self, cache_dir: str = ''):
|
| 37 |
+
self._cache_dir = cache_dir
|
| 38 |
+
self._vectorizer = None
|
| 39 |
+
self._profile_vectors: Dict[str, object] = {}
|
| 40 |
+
|
| 41 |
+
# ------------------------------------------------------------------
|
| 42 |
+
# Public API
|
| 43 |
+
# ------------------------------------------------------------------
|
| 44 |
+
|
| 45 |
+
def fit_profile(self, interests: Dict[str, str]) -> None:
|
| 46 |
+
"""
|
| 47 |
+
Build / rebuild the TF-IDF model from user interest keywords.
|
| 48 |
+
|
| 49 |
+
*interests*: ``{'ml': 'deep learning transformers', ...}``
|
| 50 |
+
"""
|
| 51 |
+
if _HAS_SKLEARN:
|
| 52 |
+
self._fit_sklearn(interests)
|
| 53 |
+
else:
|
| 54 |
+
self._fit_bow(interests)
|
| 55 |
+
|
| 56 |
+
def score(self, paper: Paper) -> float:
|
| 57 |
+
"""
|
| 58 |
+
Return relevance score in [0.0, 1.0] for a paper against its
|
| 59 |
+
category's profile vector.
|
| 60 |
+
"""
|
| 61 |
+
cat = paper.app_category
|
| 62 |
+
text = f'{paper.title} {paper.abstract}'
|
| 63 |
+
|
| 64 |
+
if _HAS_SKLEARN:
|
| 65 |
+
return self._score_sklearn(text, cat)
|
| 66 |
+
else:
|
| 67 |
+
return self._score_bow(text, cat)
|
| 68 |
+
|
| 69 |
+
def score_many(self, papers: List[Paper]) -> List[Paper]:
|
| 70 |
+
"""Set ``relevance_score`` on each paper in-place and return the list."""
|
| 71 |
+
for p in papers:
|
| 72 |
+
p.relevance_score = self.score(p)
|
| 73 |
+
return papers
|
| 74 |
+
|
| 75 |
+
# ------------------------------------------------------------------
|
| 76 |
+
# scikit-learn implementation
|
| 77 |
+
# ------------------------------------------------------------------
|
| 78 |
+
|
| 79 |
+
def _fit_sklearn(self, interests: Dict[str, str]) -> None:
|
| 80 |
+
corpus = list(interests.values())
|
| 81 |
+
self._vectorizer = TfidfVectorizer(
|
| 82 |
+
max_features=5000, stop_words='english'
|
| 83 |
+
)
|
| 84 |
+
self._vectorizer.fit(corpus)
|
| 85 |
+
self._profile_vectors = {}
|
| 86 |
+
for cat, text in interests.items():
|
| 87 |
+
vec = self._vectorizer.transform([text])
|
| 88 |
+
self._profile_vectors[cat] = vec
|
| 89 |
+
self._save_cache()
|
| 90 |
+
|
| 91 |
+
def _score_sklearn(self, text: str, category: str) -> float:
|
| 92 |
+
if self._vectorizer is None or category not in self._profile_vectors:
|
| 93 |
+
return 0.0
|
| 94 |
+
paper_vec = self._vectorizer.transform([text])
|
| 95 |
+
sim = _cosine(paper_vec, self._profile_vectors[category])
|
| 96 |
+
return float(max(0.0, min(sim[0][0], 1.0)))
|
| 97 |
+
|
| 98 |
+
# ------------------------------------------------------------------
|
| 99 |
+
# Pure-Python bag-of-words fallback
|
| 100 |
+
# ------------------------------------------------------------------
|
| 101 |
+
|
| 102 |
+
def _fit_bow(self, interests: Dict[str, str]) -> None:
|
| 103 |
+
self._bow_profiles: Dict[str, Dict[str, int]] = {}
|
| 104 |
+
for cat, text in interests.items():
|
| 105 |
+
self._bow_profiles[cat] = _word_freq(text.lower())
|
| 106 |
+
|
| 107 |
+
def _score_bow(self, text: str, category: str) -> float:
|
| 108 |
+
profile = getattr(self, '_bow_profiles', {}).get(category)
|
| 109 |
+
if not profile:
|
| 110 |
+
return 0.0
|
| 111 |
+
paper_freq = _word_freq(text.lower())
|
| 112 |
+
return _cosine_bow(paper_freq, profile)
|
| 113 |
+
|
| 114 |
+
# ------------------------------------------------------------------
|
| 115 |
+
# Cache management
|
| 116 |
+
# ------------------------------------------------------------------
|
| 117 |
+
|
| 118 |
+
def _save_cache(self) -> None:
|
| 119 |
+
if not self._cache_dir or not _HAS_SKLEARN:
|
| 120 |
+
return
|
| 121 |
+
path = os.path.join(self._cache_dir, 'tfidf_cache.pkl')
|
| 122 |
+
try:
|
| 123 |
+
with open(path, 'wb') as f:
|
| 124 |
+
pickle.dump(
|
| 125 |
+
(self._vectorizer, self._profile_vectors), f
|
| 126 |
+
)
|
| 127 |
+
except Exception:
|
| 128 |
+
logger.debug('Could not save TF-IDF cache', exc_info=True)
|
| 129 |
+
|
| 130 |
+
def load_cache(self) -> bool:
|
| 131 |
+
"""Attempt to load a cached vectorizer. Returns True on success."""
|
| 132 |
+
if not self._cache_dir or not _HAS_SKLEARN:
|
| 133 |
+
return False
|
| 134 |
+
path = os.path.join(self._cache_dir, 'tfidf_cache.pkl')
|
| 135 |
+
if not os.path.exists(path):
|
| 136 |
+
return False
|
| 137 |
+
try:
|
| 138 |
+
with open(path, 'rb') as f:
|
| 139 |
+
self._vectorizer, self._profile_vectors = pickle.load(f)
|
| 140 |
+
return True
|
| 141 |
+
except Exception:
|
| 142 |
+
logger.warning('TF-IDF cache corrupt β rebuilding', exc_info=True)
|
| 143 |
+
try:
|
| 144 |
+
os.remove(path)
|
| 145 |
+
except OSError:
|
| 146 |
+
pass
|
| 147 |
+
return False
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# ---------------------------------------------------------------------------
|
| 151 |
+
# BoW helpers
|
| 152 |
+
# ---------------------------------------------------------------------------
|
| 153 |
+
|
| 154 |
+
_STOPWORDS = frozenset(
|
| 155 |
+
'a an the is are was were be been being have has had do does did '
|
| 156 |
+
'will would shall should may might can could of in to for on with '
|
| 157 |
+
'at by from and or but not no nor so yet both either neither '
|
| 158 |
+
'each every all any few more most other some such that this these '
|
| 159 |
+
'those i me my we our you your he him his she her it its they them '
|
| 160 |
+
'their what which who whom when where why how'.split()
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def _word_freq(text: str) -> Dict[str, int]:
|
| 165 |
+
freq: Dict[str, int] = {}
|
| 166 |
+
for word in text.split():
|
| 167 |
+
w = ''.join(c for c in word if c.isalnum())
|
| 168 |
+
if w and w not in _STOPWORDS and len(w) > 2:
|
| 169 |
+
freq[w] = freq.get(w, 0) + 1
|
| 170 |
+
return freq
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def _cosine_bow(a: Dict[str, int], b: Dict[str, int]) -> float:
|
| 174 |
+
common = set(a) & set(b)
|
| 175 |
+
if not common:
|
| 176 |
+
return 0.0
|
| 177 |
+
dot = sum(a[k] * b[k] for k in common)
|
| 178 |
+
mag_a = sum(v * v for v in a.values()) ** 0.5
|
| 179 |
+
mag_b = sum(v * v for v in b.values()) ** 0.5
|
| 180 |
+
if mag_a == 0 or mag_b == 0:
|
| 181 |
+
return 0.0
|
| 182 |
+
return dot / (mag_a * mag_b)
|
app/summarizer/__pycache__/groq_client.cpython-312.pyc
ADDED
|
Binary file (4.35 kB). View file
|
|
|
app/summarizer/groq_client.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β Groq LLM summarizer.
|
| 3 |
+
|
| 4 |
+
Summarizes papers using Groq API (llama-3.1-8b-instant).
|
| 5 |
+
Follows user's requested structural (Idea, Method, Results) and
|
| 6 |
+
enforces rate limit delays (30 RPM).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
import time
|
| 13 |
+
from typing import List, Optional
|
| 14 |
+
|
| 15 |
+
import requests
|
| 16 |
+
from app.core.config import (
|
| 17 |
+
GROQ_API_KEY, GROQ_BASE_URL, GROQ_MODEL, GROQ_DELAY
|
| 18 |
+
)
|
| 19 |
+
from app.core.models import Paper
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
class GroqSummarizer:
|
| 24 |
+
"""Handles LLM calls to Groq with rate-limiting and structured prompts."""
|
| 25 |
+
|
| 26 |
+
def __init__(self, api_key: str = GROQ_API_KEY):
|
| 27 |
+
self.api_key = api_key
|
| 28 |
+
self.last_call_time = 0.0
|
| 29 |
+
|
| 30 |
+
def summarize_paper(self, paper: Paper) -> Optional[str]:
|
| 31 |
+
"""
|
| 32 |
+
Produce a structured summary of the paper.
|
| 33 |
+
Structure:
|
| 34 |
+
- Idea: The core core concept.
|
| 35 |
+
- Method: The approach or architecture.
|
| 36 |
+
- Results: The outcome or findings.
|
| 37 |
+
"""
|
| 38 |
+
if not self.api_key:
|
| 39 |
+
logger.info("Skip Groq summarization: NO API KEY.")
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
# Prepare prompt
|
| 43 |
+
prompt = (
|
| 44 |
+
f"Please summarize the following research paper abstract into three brief sections:\n"
|
| 45 |
+
f"1. Idea: (The core concept)\n"
|
| 46 |
+
f"2. Method: (The proposed approach)\n"
|
| 47 |
+
f"3. Results: (Key findings)\n\n"
|
| 48 |
+
f"Title: {paper.title}\n"
|
| 49 |
+
f"Abstract: {paper.abstract}\n\n"
|
| 50 |
+
"Keep it concise and professional. Respond in plain text with those three labels."
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Enforce rate limit delay
|
| 54 |
+
elapsed = time.time() - self.last_call_time
|
| 55 |
+
if elapsed < GROQ_DELAY:
|
| 56 |
+
sleep_time = GROQ_DELAY - elapsed
|
| 57 |
+
logger.debug(f"Groq Rate Limit: Sleeping for {sleep_time:.2f}s")
|
| 58 |
+
time.sleep(sleep_time)
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
logger.info(f"Summarizing paper [{paper.paper_id}] via Groq...")
|
| 62 |
+
response = requests.post(
|
| 63 |
+
GROQ_BASE_URL,
|
| 64 |
+
headers={
|
| 65 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 66 |
+
"Content-Type": "application/json"
|
| 67 |
+
},
|
| 68 |
+
json={
|
| 69 |
+
"model": GROQ_MODEL,
|
| 70 |
+
"messages": [
|
| 71 |
+
{"role": "system", "content": "You are a scientific research assistant summarizing papers."},
|
| 72 |
+
{"role": "user", "content": prompt}
|
| 73 |
+
],
|
| 74 |
+
"temperature": 0.3,
|
| 75 |
+
"max_tokens": 300
|
| 76 |
+
},
|
| 77 |
+
timeout=30
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
self.last_call_time = time.time()
|
| 81 |
+
|
| 82 |
+
if response.status_code == 200:
|
| 83 |
+
data = response.json()
|
| 84 |
+
summary = data['choices'][0]['message']['content'].strip()
|
| 85 |
+
return summary
|
| 86 |
+
else:
|
| 87 |
+
logger.error(f"Groq API error ({response.status_code}): {response.text}")
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
except Exception as exc:
|
| 91 |
+
logger.exception(f"Unexpected error during Groq summarization: {exc}")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
def summarize_many(self, papers: List[Paper]):
|
| 95 |
+
"""
|
| 96 |
+
Iterate through papers and update their summary_llm field.
|
| 97 |
+
"""
|
| 98 |
+
for p in papers:
|
| 99 |
+
# We only summarize if it doesn't already have a summary
|
| 100 |
+
if not p.summary_llm:
|
| 101 |
+
p.summary_llm = self.summarize_paper(p)
|
app/ui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Kivy UI screens & widgets
|
app/ui/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (142 Bytes). View file
|
|
|
app/ui/__pycache__/detail_screen.cpython-312.pyc
ADDED
|
Binary file (6.43 kB). View file
|
|
|
app/ui/__pycache__/home_screen.cpython-312.pyc
ADDED
|
Binary file (5.99 kB). View file
|
|
|
app/ui/__pycache__/settings_screen.cpython-312.pyc
ADDED
|
Binary file (6.95 kB). View file
|
|
|
app/ui/detail_screen.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β DetailScreen.
|
| 3 |
+
|
| 4 |
+
Displays ranked papers for a single category with bookmark & read
|
| 5 |
+
functionality. Tapping a paper opens a modal with the full abstract.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import webbrowser
|
| 12 |
+
|
| 13 |
+
from kivy.lang import Builder
|
| 14 |
+
from kivy.properties import BooleanProperty, ListProperty, StringProperty
|
| 15 |
+
from kivy.uix.boxlayout import BoxLayout
|
| 16 |
+
from kivy.uix.modalview import ModalView
|
| 17 |
+
from kivy.uix.screenmanager import Screen
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
import os
|
| 22 |
+
_KV_PATH = os.path.join(os.path.dirname(__file__), 'kv', 'detail.kv')
|
| 23 |
+
if os.path.exists(_KV_PATH):
|
| 24 |
+
Builder.load_file(_KV_PATH)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class PaperRow(BoxLayout):
|
| 28 |
+
"""A single paper row in the detail list."""
|
| 29 |
+
|
| 30 |
+
rank = StringProperty('1')
|
| 31 |
+
title = StringProperty('')
|
| 32 |
+
authors = StringProperty('')
|
| 33 |
+
date_str = StringProperty('')
|
| 34 |
+
score_text = StringProperty('0.00')
|
| 35 |
+
is_bookmarked = BooleanProperty(False)
|
| 36 |
+
paper_id = StringProperty('')
|
| 37 |
+
abstract_url = StringProperty('')
|
| 38 |
+
pdf_url = StringProperty('')
|
| 39 |
+
abstract_text = StringProperty('')
|
| 40 |
+
|
| 41 |
+
def toggle_bookmark(self):
|
| 42 |
+
from kivy.app import App
|
| 43 |
+
app = App.get_running_app()
|
| 44 |
+
if app:
|
| 45 |
+
new_state = app.toggle_bookmark(self.paper_id)
|
| 46 |
+
self.is_bookmarked = new_state
|
| 47 |
+
|
| 48 |
+
def show_detail(self):
|
| 49 |
+
popup = PaperDetailPopup()
|
| 50 |
+
popup.paper_title = self.title
|
| 51 |
+
popup.paper_authors = self.authors
|
| 52 |
+
popup.paper_abstract = self.abstract_text
|
| 53 |
+
popup.paper_url = self.abstract_url
|
| 54 |
+
popup.paper_pdf = self.pdf_url
|
| 55 |
+
popup.open()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class PaperDetailPopup(ModalView):
|
| 59 |
+
"""Modal showing full paper details."""
|
| 60 |
+
|
| 61 |
+
paper_title = StringProperty('')
|
| 62 |
+
paper_authors = StringProperty('')
|
| 63 |
+
paper_abstract = StringProperty('')
|
| 64 |
+
paper_url = StringProperty('')
|
| 65 |
+
paper_pdf = StringProperty('')
|
| 66 |
+
|
| 67 |
+
def open_in_browser(self):
|
| 68 |
+
if self.paper_url:
|
| 69 |
+
try:
|
| 70 |
+
webbrowser.open(self.paper_url)
|
| 71 |
+
except Exception:
|
| 72 |
+
logger.warning('Could not open browser')
|
| 73 |
+
|
| 74 |
+
def open_pdf(self):
|
| 75 |
+
if self.paper_pdf:
|
| 76 |
+
try:
|
| 77 |
+
webbrowser.open(self.paper_pdf)
|
| 78 |
+
except Exception:
|
| 79 |
+
logger.warning('Could not open PDF')
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class DetailScreen(Screen):
|
| 83 |
+
"""Screen showing papers for a single category."""
|
| 84 |
+
|
| 85 |
+
category_slug = StringProperty('')
|
| 86 |
+
category_name = StringProperty('')
|
| 87 |
+
week_range = StringProperty('')
|
| 88 |
+
paper_rows = ListProperty([])
|
| 89 |
+
|
| 90 |
+
def load_papers(self, category_slug: str):
|
| 91 |
+
"""Populate the screen with papers from the latest digest."""
|
| 92 |
+
from kivy.app import App
|
| 93 |
+
app = App.get_running_app()
|
| 94 |
+
if not app:
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
from app.core.config import CATEGORY_LABELS
|
| 98 |
+
self.category_slug = category_slug
|
| 99 |
+
self.category_name = CATEGORY_LABELS.get(category_slug, category_slug.title())
|
| 100 |
+
|
| 101 |
+
digest = app.get_latest_digest()
|
| 102 |
+
container = self.ids.get('paper_container')
|
| 103 |
+
if container is None:
|
| 104 |
+
return
|
| 105 |
+
container.clear_widgets()
|
| 106 |
+
|
| 107 |
+
if digest is None:
|
| 108 |
+
self.week_range = 'No data'
|
| 109 |
+
return
|
| 110 |
+
|
| 111 |
+
self.week_range = f'Week of {digest.week_start.isoformat()}'
|
| 112 |
+
papers = digest.papers.get(category_slug, [])
|
| 113 |
+
|
| 114 |
+
for i, paper in enumerate(papers, 1):
|
| 115 |
+
row = PaperRow()
|
| 116 |
+
row.rank = str(i)
|
| 117 |
+
row.paper_id = paper.paper_id
|
| 118 |
+
row.title = paper.title
|
| 119 |
+
row.abstract_text = paper.abstract
|
| 120 |
+
|
| 121 |
+
if paper.authors:
|
| 122 |
+
if len(paper.authors) > 2:
|
| 123 |
+
row.authors = f'{paper.authors[0]} et al.'
|
| 124 |
+
else:
|
| 125 |
+
row.authors = ', '.join(paper.authors)
|
| 126 |
+
else:
|
| 127 |
+
row.authors = 'Unknown'
|
| 128 |
+
|
| 129 |
+
row.date_str = paper.published_date.isoformat()
|
| 130 |
+
row.score_text = f'{paper.composite_score:.2f}'
|
| 131 |
+
row.is_bookmarked = paper.is_bookmarked
|
| 132 |
+
row.abstract_url = paper.abstract_url
|
| 133 |
+
row.pdf_url = paper.pdf_url or ''
|
| 134 |
+
|
| 135 |
+
container.add_widget(row)
|
app/ui/home_screen.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ResearchRadar β HomeScreen.
|
| 3 |
+
|
| 4 |
+
Displays the latest digest as a scrollable list of DigestCard widgets,
|
| 5 |
+
one per category. Includes a "Refresh Now" FAB and empty-state onboarding.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import threading
|
| 12 |
+
|
| 13 |
+
from kivy.clock import Clock
|
| 14 |
+
from kivy.lang import Builder
|
| 15 |
+
from kivy.properties import (
|
| 16 |
+
BooleanProperty,
|
| 17 |
+
ListProperty,
|
| 18 |
+
ObjectProperty,
|
| 19 |
+
StringProperty,
|
| 20 |
+
)
|
| 21 |
+
from kivy.uix.boxlayout import BoxLayout
|
| 22 |
+
from kivy.uix.screenmanager import Screen
|
| 23 |
+
|
| 24 |
+
from app.core.config import CATEGORY_LABELS
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
# Load KV
|
| 29 |
+
import os
|
| 30 |
+
_KV_PATH = os.path.join(os.path.dirname(__file__), 'kv', 'home.kv')
|
| 31 |
+
if os.path.exists(_KV_PATH):
|
| 32 |
+
Builder.load_file(_KV_PATH)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DigestCard(BoxLayout):
|
| 36 |
+
"""A single category card showing paper count and top paper title."""
|
| 37 |
+
|
| 38 |
+
category_slug = StringProperty('')
|
| 39 |
+
category_name = StringProperty('')
|
| 40 |
+
paper_count = StringProperty('0')
|
| 41 |
+
top_paper_title = StringProperty('No papers yet')
|
| 42 |
+
top_score = StringProperty('β')
|
| 43 |
+
|
| 44 |
+
def on_touch_up(self, touch):
|
| 45 |
+
if self.collide_point(*touch.pos):
|
| 46 |
+
app = self._get_app()
|
| 47 |
+
if app:
|
| 48 |
+
app.show_detail(self.category_slug)
|
| 49 |
+
return super().on_touch_up(touch)
|
| 50 |
+
|
| 51 |
+
def _get_app(self):
|
| 52 |
+
from kivy.app import App
|
| 53 |
+
return App.get_running_app()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class HomeScreen(Screen):
|
| 57 |
+
"""Main screen showing the latest weekly digest."""
|
| 58 |
+
|
| 59 |
+
is_fetching = BooleanProperty(False)
|
| 60 |
+
last_fetched = StringProperty('Never')
|
| 61 |
+
digest_cards = ListProperty([])
|
| 62 |
+
|
| 63 |
+
def on_enter(self):
|
| 64 |
+
"""Load digest when screen becomes visible."""
|
| 65 |
+
self.load_digest()
|
| 66 |
+
|
| 67 |
+
def load_digest(self):
|
| 68 |
+
"""Load the latest digest from the database and populate cards."""
|
| 69 |
+
from kivy.app import App
|
| 70 |
+
app = App.get_running_app()
|
| 71 |
+
if not app:
|
| 72 |
+
return
|
| 73 |
+
|
| 74 |
+
digest = app.get_latest_digest()
|
| 75 |
+
container = self.ids.get('card_container')
|
| 76 |
+
if container is None:
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
container.clear_widgets()
|
| 80 |
+
|
| 81 |
+
if digest is None:
|
| 82 |
+
self.last_fetched = 'Never β tap Fetch Now!'
|
| 83 |
+
return
|
| 84 |
+
|
| 85 |
+
self.last_fetched = digest.generated_at.strftime('%Y-%m-%d %H:%M')
|
| 86 |
+
|
| 87 |
+
for cat_slug, papers in digest.papers.items():
|
| 88 |
+
card = DigestCard()
|
| 89 |
+
card.category_slug = cat_slug
|
| 90 |
+
card.category_name = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
|
| 91 |
+
card.paper_count = str(len(papers))
|
| 92 |
+
if papers:
|
| 93 |
+
title = papers[0].title
|
| 94 |
+
if len(title) > 70:
|
| 95 |
+
title = title[:67] + '...'
|
| 96 |
+
card.top_paper_title = title
|
| 97 |
+
card.top_score = f'{papers[0].composite_score:.2f}'
|
| 98 |
+
container.add_widget(card)
|
| 99 |
+
|
| 100 |
+
def trigger_fetch(self):
|
| 101 |
+
"""Run the weekly fetch in a background thread."""
|
| 102 |
+
if self.is_fetching:
|
| 103 |
+
return
|
| 104 |
+
self.is_fetching = True
|
| 105 |
+
self.last_fetched = 'Fetching...'
|
| 106 |
+
|
| 107 |
+
from kivy.app import App
|
| 108 |
+
app = App.get_running_app()
|
| 109 |
+
|
| 110 |
+
def _run():
|
| 111 |
+
try:
|
| 112 |
+
app.run_fetch()
|
| 113 |
+
except Exception:
|
| 114 |
+
logger.exception('Background fetch failed')
|
| 115 |
+
finally:
|
| 116 |
+
Clock.schedule_once(lambda dt: self._on_fetch_done(), 0)
|
| 117 |
+
|
| 118 |
+
thread = threading.Thread(target=_run, daemon=True)
|
| 119 |
+
thread.start()
|
| 120 |
+
|
| 121 |
+
def _on_fetch_done(self):
|
| 122 |
+
self.is_fetching = False
|
| 123 |
+
self.load_digest()
|
app/ui/kv/detail.kv
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#:kivy 2.3.0
|
| 2 |
+
|
| 3 |
+
# =====================================================================
|
| 4 |
+
# DetailScreen β ranked paper list for a single category
|
| 5 |
+
# =====================================================================
|
| 6 |
+
|
| 7 |
+
<PaperRow>:
|
| 8 |
+
orientation: 'vertical'
|
| 9 |
+
size_hint_y: None
|
| 10 |
+
height: dp(100)
|
| 11 |
+
padding: dp(14), dp(8)
|
| 12 |
+
spacing: dp(4)
|
| 13 |
+
|
| 14 |
+
canvas.before:
|
| 15 |
+
Color:
|
| 16 |
+
rgba: 0.14, 0.16, 0.22, 1
|
| 17 |
+
RoundedRectangle:
|
| 18 |
+
pos: self.x + dp(4), self.y + dp(2)
|
| 19 |
+
size: self.width - dp(8), self.height - dp(4)
|
| 20 |
+
radius: [dp(10)]
|
| 21 |
+
|
| 22 |
+
BoxLayout:
|
| 23 |
+
size_hint_y: None
|
| 24 |
+
height: dp(24)
|
| 25 |
+
|
| 26 |
+
Label:
|
| 27 |
+
text: '#' + root.rank
|
| 28 |
+
font_size: sp(16)
|
| 29 |
+
bold: True
|
| 30 |
+
color: 0.55, 0.78, 1.0, 1
|
| 31 |
+
size_hint_x: None
|
| 32 |
+
width: dp(36)
|
| 33 |
+
halign: 'center'
|
| 34 |
+
text_size: self.size
|
| 35 |
+
|
| 36 |
+
Label:
|
| 37 |
+
text: root.title
|
| 38 |
+
font_size: sp(14)
|
| 39 |
+
color: 0.92, 0.92, 0.95, 1
|
| 40 |
+
halign: 'left'
|
| 41 |
+
text_size: self.width, None
|
| 42 |
+
shorten: True
|
| 43 |
+
shorten_from: 'right'
|
| 44 |
+
|
| 45 |
+
Button:
|
| 46 |
+
text: 'β
' if root.is_bookmarked else 'β'
|
| 47 |
+
font_size: sp(20)
|
| 48 |
+
size_hint_x: None
|
| 49 |
+
width: dp(40)
|
| 50 |
+
background_color: 0, 0, 0, 0
|
| 51 |
+
color: (1, 0.85, 0.2, 1) if root.is_bookmarked else (0.5, 0.5, 0.5, 1)
|
| 52 |
+
on_release: root.toggle_bookmark()
|
| 53 |
+
|
| 54 |
+
Label:
|
| 55 |
+
text: root.authors + ' Β· ' + root.date_str
|
| 56 |
+
font_size: sp(11)
|
| 57 |
+
color: 0.55, 0.55, 0.6, 1
|
| 58 |
+
halign: 'left'
|
| 59 |
+
text_size: self.size
|
| 60 |
+
size_hint_y: None
|
| 61 |
+
height: dp(18)
|
| 62 |
+
|
| 63 |
+
BoxLayout:
|
| 64 |
+
size_hint_y: None
|
| 65 |
+
height: dp(24)
|
| 66 |
+
|
| 67 |
+
# Score bar
|
| 68 |
+
BoxLayout:
|
| 69 |
+
size_hint_x: 0.6
|
| 70 |
+
Widget:
|
| 71 |
+
canvas:
|
| 72 |
+
Color:
|
| 73 |
+
rgba: 0.2, 0.2, 0.3, 1
|
| 74 |
+
RoundedRectangle:
|
| 75 |
+
pos: self.pos
|
| 76 |
+
size: self.width, dp(6)
|
| 77 |
+
radius: [dp(3)]
|
| 78 |
+
Color:
|
| 79 |
+
rgba: 0.3, 0.75, 0.5, 1
|
| 80 |
+
RoundedRectangle:
|
| 81 |
+
pos: self.pos
|
| 82 |
+
size: self.width * min(float(root.score_text or 0), 1.0), dp(6)
|
| 83 |
+
radius: [dp(3)]
|
| 84 |
+
|
| 85 |
+
Label:
|
| 86 |
+
text: root.score_text
|
| 87 |
+
font_size: sp(12)
|
| 88 |
+
color: 0.4, 0.85, 0.6, 1
|
| 89 |
+
size_hint_x: 0.2
|
| 90 |
+
halign: 'right'
|
| 91 |
+
text_size: self.size
|
| 92 |
+
|
| 93 |
+
Button:
|
| 94 |
+
text: 'View'
|
| 95 |
+
font_size: sp(12)
|
| 96 |
+
size_hint_x: 0.2
|
| 97 |
+
background_normal: ''
|
| 98 |
+
background_color: 0.2, 0.4, 0.7, 1
|
| 99 |
+
color: 1, 1, 1, 1
|
| 100 |
+
on_release: root.show_detail()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
<PaperDetailPopup>:
|
| 104 |
+
size_hint: 0.92, 0.85
|
| 105 |
+
auto_dismiss: True
|
| 106 |
+
|
| 107 |
+
canvas.before:
|
| 108 |
+
Color:
|
| 109 |
+
rgba: 0.1, 0.12, 0.16, 0.98
|
| 110 |
+
RoundedRectangle:
|
| 111 |
+
pos: self.pos
|
| 112 |
+
size: self.size
|
| 113 |
+
radius: [dp(16)]
|
| 114 |
+
|
| 115 |
+
BoxLayout:
|
| 116 |
+
orientation: 'vertical'
|
| 117 |
+
padding: dp(20)
|
| 118 |
+
spacing: dp(12)
|
| 119 |
+
|
| 120 |
+
Label:
|
| 121 |
+
text: root.paper_title
|
| 122 |
+
font_size: sp(17)
|
| 123 |
+
bold: True
|
| 124 |
+
color: 1, 1, 1, 1
|
| 125 |
+
halign: 'left'
|
| 126 |
+
valign: 'top'
|
| 127 |
+
text_size: self.width, None
|
| 128 |
+
size_hint_y: None
|
| 129 |
+
height: self.texture_size[1]
|
| 130 |
+
|
| 131 |
+
Label:
|
| 132 |
+
text: root.paper_authors
|
| 133 |
+
font_size: sp(12)
|
| 134 |
+
color: 0.6, 0.6, 0.7, 1
|
| 135 |
+
halign: 'left'
|
| 136 |
+
text_size: self.size
|
| 137 |
+
size_hint_y: None
|
| 138 |
+
height: dp(20)
|
| 139 |
+
|
| 140 |
+
ScrollView:
|
| 141 |
+
Label:
|
| 142 |
+
text: root.paper_abstract
|
| 143 |
+
font_size: sp(13)
|
| 144 |
+
color: 0.85, 0.85, 0.9, 1
|
| 145 |
+
halign: 'left'
|
| 146 |
+
valign: 'top'
|
| 147 |
+
text_size: self.width, None
|
| 148 |
+
size_hint_y: None
|
| 149 |
+
height: self.texture_size[1]
|
| 150 |
+
markup: False
|
| 151 |
+
|
| 152 |
+
BoxLayout:
|
| 153 |
+
size_hint_y: None
|
| 154 |
+
height: dp(44)
|
| 155 |
+
spacing: dp(10)
|
| 156 |
+
|
| 157 |
+
Button:
|
| 158 |
+
text: 'π Open Abstract'
|
| 159 |
+
font_size: sp(13)
|
| 160 |
+
background_normal: ''
|
| 161 |
+
background_color: 0.2, 0.45, 0.8, 1
|
| 162 |
+
color: 1, 1, 1, 1
|
| 163 |
+
on_release: root.open_in_browser()
|
| 164 |
+
|
| 165 |
+
Button:
|
| 166 |
+
text: 'π Open PDF'
|
| 167 |
+
font_size: sp(13)
|
| 168 |
+
background_normal: ''
|
| 169 |
+
background_color: 0.3, 0.65, 0.4, 1
|
| 170 |
+
color: 1, 1, 1, 1
|
| 171 |
+
on_release: root.open_pdf()
|
| 172 |
+
|
| 173 |
+
Button:
|
| 174 |
+
text: 'β Close'
|
| 175 |
+
font_size: sp(13)
|
| 176 |
+
background_normal: ''
|
| 177 |
+
background_color: 0.5, 0.2, 0.2, 1
|
| 178 |
+
color: 1, 1, 1, 1
|
| 179 |
+
on_release: root.dismiss()
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
<DetailScreen>:
|
| 183 |
+
name: 'detail'
|
| 184 |
+
|
| 185 |
+
BoxLayout:
|
| 186 |
+
orientation: 'vertical'
|
| 187 |
+
|
| 188 |
+
canvas.before:
|
| 189 |
+
Color:
|
| 190 |
+
rgba: 0.08, 0.09, 0.12, 1
|
| 191 |
+
Rectangle:
|
| 192 |
+
pos: self.pos
|
| 193 |
+
size: self.size
|
| 194 |
+
|
| 195 |
+
# ββ Header ββββββββββββββββββββββββββββββββββββββ
|
| 196 |
+
BoxLayout:
|
| 197 |
+
size_hint_y: None
|
| 198 |
+
height: dp(60)
|
| 199 |
+
padding: dp(12), dp(10)
|
| 200 |
+
|
| 201 |
+
canvas.before:
|
| 202 |
+
Color:
|
| 203 |
+
rgba: 0.10, 0.12, 0.18, 1
|
| 204 |
+
Rectangle:
|
| 205 |
+
pos: self.pos
|
| 206 |
+
size: self.size
|
| 207 |
+
|
| 208 |
+
Button:
|
| 209 |
+
text: 'β'
|
| 210 |
+
font_size: sp(22)
|
| 211 |
+
size_hint_x: None
|
| 212 |
+
width: dp(48)
|
| 213 |
+
background_color: 0, 0, 0, 0
|
| 214 |
+
color: 0.7, 0.7, 0.8, 1
|
| 215 |
+
on_release: app.go_home()
|
| 216 |
+
|
| 217 |
+
BoxLayout:
|
| 218 |
+
orientation: 'vertical'
|
| 219 |
+
Label:
|
| 220 |
+
text: root.category_name
|
| 221 |
+
font_size: sp(18)
|
| 222 |
+
bold: True
|
| 223 |
+
color: 0.55, 0.78, 1.0, 1
|
| 224 |
+
halign: 'left'
|
| 225 |
+
text_size: self.size
|
| 226 |
+
Label:
|
| 227 |
+
text: root.week_range
|
| 228 |
+
font_size: sp(12)
|
| 229 |
+
color: 0.5, 0.5, 0.6, 1
|
| 230 |
+
halign: 'left'
|
| 231 |
+
text_size: self.size
|
| 232 |
+
|
| 233 |
+
# ββ Paper list ββββββββββββββββββββββββββββββββββ
|
| 234 |
+
ScrollView:
|
| 235 |
+
do_scroll_x: False
|
| 236 |
+
|
| 237 |
+
BoxLayout:
|
| 238 |
+
id: paper_container
|
| 239 |
+
orientation: 'vertical'
|
| 240 |
+
size_hint_y: None
|
| 241 |
+
height: self.minimum_height
|
| 242 |
+
padding: dp(6)
|
| 243 |
+
spacing: dp(8)
|
app/ui/kv/home.kv
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#:kivy 2.3.0
|
| 2 |
+
|
| 3 |
+
# =====================================================================
|
| 4 |
+
# HomeScreen β digest card list with FAB and status bar
|
| 5 |
+
# =====================================================================
|
| 6 |
+
|
| 7 |
+
<DigestCard>:
|
| 8 |
+
orientation: 'vertical'
|
| 9 |
+
size_hint_y: None
|
| 10 |
+
height: dp(120)
|
| 11 |
+
padding: dp(16)
|
| 12 |
+
spacing: dp(6)
|
| 13 |
+
|
| 14 |
+
canvas.before:
|
| 15 |
+
Color:
|
| 16 |
+
rgba: 0.15, 0.17, 0.22, 1
|
| 17 |
+
RoundedRectangle:
|
| 18 |
+
pos: self.x + dp(4), self.y + dp(2)
|
| 19 |
+
size: self.width - dp(8), self.height - dp(4)
|
| 20 |
+
radius: [dp(14)]
|
| 21 |
+
Color:
|
| 22 |
+
rgba: 0.22, 0.25, 0.32, 1
|
| 23 |
+
RoundedRectangle:
|
| 24 |
+
pos: self.x + dp(4), self.y + dp(4)
|
| 25 |
+
size: self.width - dp(8), self.height - dp(4)
|
| 26 |
+
radius: [dp(14)]
|
| 27 |
+
|
| 28 |
+
BoxLayout:
|
| 29 |
+
size_hint_y: None
|
| 30 |
+
height: dp(28)
|
| 31 |
+
Label:
|
| 32 |
+
text: root.category_name
|
| 33 |
+
font_size: sp(18)
|
| 34 |
+
bold: True
|
| 35 |
+
color: 0.55, 0.78, 1.0, 1
|
| 36 |
+
halign: 'left'
|
| 37 |
+
text_size: self.size
|
| 38 |
+
Label:
|
| 39 |
+
text: root.paper_count + ' papers'
|
| 40 |
+
font_size: sp(14)
|
| 41 |
+
color: 0.6, 0.6, 0.7, 1
|
| 42 |
+
halign: 'right'
|
| 43 |
+
text_size: self.size
|
| 44 |
+
size_hint_x: 0.3
|
| 45 |
+
|
| 46 |
+
Label:
|
| 47 |
+
text: root.top_paper_title
|
| 48 |
+
font_size: sp(14)
|
| 49 |
+
color: 0.85, 0.85, 0.9, 1
|
| 50 |
+
halign: 'left'
|
| 51 |
+
valign: 'top'
|
| 52 |
+
text_size: self.width, None
|
| 53 |
+
shorten: True
|
| 54 |
+
shorten_from: 'right'
|
| 55 |
+
|
| 56 |
+
BoxLayout:
|
| 57 |
+
size_hint_y: None
|
| 58 |
+
height: dp(22)
|
| 59 |
+
Label:
|
| 60 |
+
text: 'Score: ' + root.top_score
|
| 61 |
+
font_size: sp(12)
|
| 62 |
+
color: 0.4, 0.85, 0.6, 1
|
| 63 |
+
halign: 'left'
|
| 64 |
+
text_size: self.size
|
| 65 |
+
Label:
|
| 66 |
+
text: 'Tap to explore β'
|
| 67 |
+
font_size: sp(11)
|
| 68 |
+
color: 0.5, 0.5, 0.6, 1
|
| 69 |
+
halign: 'right'
|
| 70 |
+
text_size: self.size
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
<HomeScreen>:
|
| 74 |
+
name: 'home'
|
| 75 |
+
|
| 76 |
+
BoxLayout:
|
| 77 |
+
orientation: 'vertical'
|
| 78 |
+
padding: 0
|
| 79 |
+
spacing: 0
|
| 80 |
+
|
| 81 |
+
canvas.before:
|
| 82 |
+
Color:
|
| 83 |
+
rgba: 0.08, 0.09, 0.12, 1
|
| 84 |
+
Rectangle:
|
| 85 |
+
pos: self.pos
|
| 86 |
+
size: self.size
|
| 87 |
+
|
| 88 |
+
# ββ Header ββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
BoxLayout:
|
| 90 |
+
size_hint_y: None
|
| 91 |
+
height: dp(64)
|
| 92 |
+
padding: dp(16), dp(12)
|
| 93 |
+
|
| 94 |
+
canvas.before:
|
| 95 |
+
Color:
|
| 96 |
+
rgba: 0.10, 0.12, 0.18, 1
|
| 97 |
+
Rectangle:
|
| 98 |
+
pos: self.pos
|
| 99 |
+
size: self.size
|
| 100 |
+
|
| 101 |
+
Label:
|
| 102 |
+
text: 'π‘ ResearchRadar'
|
| 103 |
+
font_size: sp(22)
|
| 104 |
+
bold: True
|
| 105 |
+
color: 1, 1, 1, 1
|
| 106 |
+
halign: 'left'
|
| 107 |
+
text_size: self.size
|
| 108 |
+
valign: 'center'
|
| 109 |
+
|
| 110 |
+
Button:
|
| 111 |
+
text: 'β'
|
| 112 |
+
font_size: sp(22)
|
| 113 |
+
size_hint_x: None
|
| 114 |
+
width: dp(48)
|
| 115 |
+
background_color: 0, 0, 0, 0
|
| 116 |
+
color: 0.7, 0.7, 0.8, 1
|
| 117 |
+
on_release: app.show_settings()
|
| 118 |
+
|
| 119 |
+
# ββ Digest cards ββββββββββββββββββββββββββββββββ
|
| 120 |
+
ScrollView:
|
| 121 |
+
do_scroll_x: False
|
| 122 |
+
|
| 123 |
+
BoxLayout:
|
| 124 |
+
id: card_container
|
| 125 |
+
orientation: 'vertical'
|
| 126 |
+
size_hint_y: None
|
| 127 |
+
height: self.minimum_height
|
| 128 |
+
padding: dp(8)
|
| 129 |
+
spacing: dp(10)
|
| 130 |
+
|
| 131 |
+
# ββ Bottom bar ββββββββββββββββββββββββββββββββββ
|
| 132 |
+
BoxLayout:
|
| 133 |
+
size_hint_y: None
|
| 134 |
+
height: dp(56)
|
| 135 |
+
padding: dp(12), dp(8)
|
| 136 |
+
|
| 137 |
+
canvas.before:
|
| 138 |
+
Color:
|
| 139 |
+
rgba: 0.10, 0.12, 0.18, 1
|
| 140 |
+
Rectangle:
|
| 141 |
+
pos: self.pos
|
| 142 |
+
size: self.size
|
| 143 |
+
|
| 144 |
+
Label:
|
| 145 |
+
text: 'Last: ' + root.last_fetched
|
| 146 |
+
font_size: sp(12)
|
| 147 |
+
color: 0.5, 0.5, 0.6, 1
|
| 148 |
+
halign: 'left'
|
| 149 |
+
text_size: self.size
|
| 150 |
+
valign: 'center'
|
| 151 |
+
|
| 152 |
+
Button:
|
| 153 |
+
text: 'π Fetch Now'
|
| 154 |
+
font_size: sp(14)
|
| 155 |
+
bold: True
|
| 156 |
+
size_hint_x: None
|
| 157 |
+
width: dp(130)
|
| 158 |
+
background_normal: ''
|
| 159 |
+
background_color: 0.2, 0.5, 0.9, 1
|
| 160 |
+
color: 1, 1, 1, 1
|
| 161 |
+
disabled: root.is_fetching
|
| 162 |
+
on_release: root.trigger_fetch()
|
app/ui/kv/settings.kv
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#:kivy 2.3.0
|
| 2 |
+
|
| 3 |
+
# =====================================================================
|
| 4 |
+
# SettingsScreen β interest keywords, weights, API keys
|
| 5 |
+
# =====================================================================
|
| 6 |
+
|
| 7 |
+
<SettingsScreen>:
|
| 8 |
+
name: 'settings'
|
| 9 |
+
|
| 10 |
+
BoxLayout:
|
| 11 |
+
orientation: 'vertical'
|
| 12 |
+
|
| 13 |
+
canvas.before:
|
| 14 |
+
Color:
|
| 15 |
+
rgba: 0.08, 0.09, 0.12, 1
|
| 16 |
+
Rectangle:
|
| 17 |
+
pos: self.pos
|
| 18 |
+
size: self.size
|
| 19 |
+
|
| 20 |
+
# ββ Header ββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
BoxLayout:
|
| 22 |
+
size_hint_y: None
|
| 23 |
+
height: dp(60)
|
| 24 |
+
padding: dp(12), dp(10)
|
| 25 |
+
|
| 26 |
+
canvas.before:
|
| 27 |
+
Color:
|
| 28 |
+
rgba: 0.10, 0.12, 0.18, 1
|
| 29 |
+
Rectangle:
|
| 30 |
+
pos: self.pos
|
| 31 |
+
size: self.size
|
| 32 |
+
|
| 33 |
+
Button:
|
| 34 |
+
text: 'β'
|
| 35 |
+
font_size: sp(22)
|
| 36 |
+
size_hint_x: None
|
| 37 |
+
width: dp(48)
|
| 38 |
+
background_color: 0, 0, 0, 0
|
| 39 |
+
color: 0.7, 0.7, 0.8, 1
|
| 40 |
+
on_release: app.go_home()
|
| 41 |
+
|
| 42 |
+
Label:
|
| 43 |
+
text: 'β Settings'
|
| 44 |
+
font_size: sp(20)
|
| 45 |
+
bold: True
|
| 46 |
+
color: 1, 1, 1, 1
|
| 47 |
+
halign: 'left'
|
| 48 |
+
text_size: self.size
|
| 49 |
+
valign: 'center'
|
| 50 |
+
|
| 51 |
+
# ββ Scrollable content ββββββββββββββββββββββββββ
|
| 52 |
+
ScrollView:
|
| 53 |
+
do_scroll_x: False
|
| 54 |
+
|
| 55 |
+
BoxLayout:
|
| 56 |
+
orientation: 'vertical'
|
| 57 |
+
size_hint_y: None
|
| 58 |
+
height: self.minimum_height
|
| 59 |
+
padding: dp(16)
|
| 60 |
+
spacing: dp(14)
|
| 61 |
+
|
| 62 |
+
# ββ Interest Keywords βββββββββββββββββββ
|
| 63 |
+
Label:
|
| 64 |
+
text: 'Interest Keywords'
|
| 65 |
+
font_size: sp(16)
|
| 66 |
+
bold: True
|
| 67 |
+
color: 0.55, 0.78, 1.0, 1
|
| 68 |
+
halign: 'left'
|
| 69 |
+
text_size: self.size
|
| 70 |
+
size_hint_y: None
|
| 71 |
+
height: dp(30)
|
| 72 |
+
|
| 73 |
+
Label:
|
| 74 |
+
text: 'Machine Learning'
|
| 75 |
+
font_size: sp(13)
|
| 76 |
+
color: 0.7, 0.7, 0.8, 1
|
| 77 |
+
halign: 'left'
|
| 78 |
+
text_size: self.size
|
| 79 |
+
size_hint_y: None
|
| 80 |
+
height: dp(20)
|
| 81 |
+
TextInput:
|
| 82 |
+
text: root.ml_keywords
|
| 83 |
+
on_text: root.ml_keywords = self.text
|
| 84 |
+
multiline: False
|
| 85 |
+
size_hint_y: None
|
| 86 |
+
height: dp(38)
|
| 87 |
+
font_size: sp(13)
|
| 88 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 89 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 90 |
+
cursor_color: 0.55, 0.78, 1.0, 1
|
| 91 |
+
|
| 92 |
+
Label:
|
| 93 |
+
text: 'Artificial Intelligence'
|
| 94 |
+
font_size: sp(13)
|
| 95 |
+
color: 0.7, 0.7, 0.8, 1
|
| 96 |
+
halign: 'left'
|
| 97 |
+
text_size: self.size
|
| 98 |
+
size_hint_y: None
|
| 99 |
+
height: dp(20)
|
| 100 |
+
TextInput:
|
| 101 |
+
text: root.ai_keywords
|
| 102 |
+
on_text: root.ai_keywords = self.text
|
| 103 |
+
multiline: False
|
| 104 |
+
size_hint_y: None
|
| 105 |
+
height: dp(38)
|
| 106 |
+
font_size: sp(13)
|
| 107 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 108 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 109 |
+
cursor_color: 0.55, 0.78, 1.0, 1
|
| 110 |
+
|
| 111 |
+
Label:
|
| 112 |
+
text: 'Computer Science'
|
| 113 |
+
font_size: sp(13)
|
| 114 |
+
color: 0.7, 0.7, 0.8, 1
|
| 115 |
+
halign: 'left'
|
| 116 |
+
text_size: self.size
|
| 117 |
+
size_hint_y: None
|
| 118 |
+
height: dp(20)
|
| 119 |
+
TextInput:
|
| 120 |
+
text: root.cs_keywords
|
| 121 |
+
on_text: root.cs_keywords = self.text
|
| 122 |
+
multiline: False
|
| 123 |
+
size_hint_y: None
|
| 124 |
+
height: dp(38)
|
| 125 |
+
font_size: sp(13)
|
| 126 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 127 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 128 |
+
cursor_color: 0.55, 0.78, 1.0, 1
|
| 129 |
+
|
| 130 |
+
Label:
|
| 131 |
+
text: 'Neuroscience'
|
| 132 |
+
font_size: sp(13)
|
| 133 |
+
color: 0.7, 0.7, 0.8, 1
|
| 134 |
+
halign: 'left'
|
| 135 |
+
text_size: self.size
|
| 136 |
+
size_hint_y: None
|
| 137 |
+
height: dp(20)
|
| 138 |
+
TextInput:
|
| 139 |
+
text: root.neuro_keywords
|
| 140 |
+
on_text: root.neuro_keywords = self.text
|
| 141 |
+
multiline: False
|
| 142 |
+
size_hint_y: None
|
| 143 |
+
height: dp(38)
|
| 144 |
+
font_size: sp(13)
|
| 145 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 146 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 147 |
+
cursor_color: 0.55, 0.78, 1.0, 1
|
| 148 |
+
|
| 149 |
+
Label:
|
| 150 |
+
text: 'Brain-Computer Interface'
|
| 151 |
+
font_size: sp(13)
|
| 152 |
+
color: 0.7, 0.7, 0.8, 1
|
| 153 |
+
halign: 'left'
|
| 154 |
+
text_size: self.size
|
| 155 |
+
size_hint_y: None
|
| 156 |
+
height: dp(20)
|
| 157 |
+
TextInput:
|
| 158 |
+
text: root.bci_keywords
|
| 159 |
+
on_text: root.bci_keywords = self.text
|
| 160 |
+
multiline: False
|
| 161 |
+
size_hint_y: None
|
| 162 |
+
height: dp(38)
|
| 163 |
+
font_size: sp(13)
|
| 164 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 165 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 166 |
+
cursor_color: 0.55, 0.78, 1.0, 1
|
| 167 |
+
|
| 168 |
+
# ββ Ranking Weights βββββββββββββββββββββ
|
| 169 |
+
Label:
|
| 170 |
+
text: 'Ranking Weights'
|
| 171 |
+
font_size: sp(16)
|
| 172 |
+
bold: True
|
| 173 |
+
color: 0.55, 0.78, 1.0, 1
|
| 174 |
+
halign: 'left'
|
| 175 |
+
text_size: self.size
|
| 176 |
+
size_hint_y: None
|
| 177 |
+
height: dp(30)
|
| 178 |
+
|
| 179 |
+
Label:
|
| 180 |
+
text: 'Relevance: ' + '{:.0%}'.format(root.weight_relevance)
|
| 181 |
+
font_size: sp(13)
|
| 182 |
+
color: 0.7, 0.7, 0.8, 1
|
| 183 |
+
halign: 'left'
|
| 184 |
+
text_size: self.size
|
| 185 |
+
size_hint_y: None
|
| 186 |
+
height: dp(20)
|
| 187 |
+
Slider:
|
| 188 |
+
value: root.weight_relevance
|
| 189 |
+
on_value: root.weight_relevance = self.value
|
| 190 |
+
min: 0
|
| 191 |
+
max: 1
|
| 192 |
+
step: 0.05
|
| 193 |
+
size_hint_y: None
|
| 194 |
+
height: dp(36)
|
| 195 |
+
cursor_size: dp(20), dp(20)
|
| 196 |
+
|
| 197 |
+
Label:
|
| 198 |
+
text: 'Citations: ' + '{:.0%}'.format(root.weight_citation)
|
| 199 |
+
font_size: sp(13)
|
| 200 |
+
color: 0.7, 0.7, 0.8, 1
|
| 201 |
+
halign: 'left'
|
| 202 |
+
text_size: self.size
|
| 203 |
+
size_hint_y: None
|
| 204 |
+
height: dp(20)
|
| 205 |
+
Slider:
|
| 206 |
+
value: root.weight_citation
|
| 207 |
+
on_value: root.weight_citation = self.value
|
| 208 |
+
min: 0
|
| 209 |
+
max: 1
|
| 210 |
+
step: 0.05
|
| 211 |
+
size_hint_y: None
|
| 212 |
+
height: dp(36)
|
| 213 |
+
cursor_size: dp(20), dp(20)
|
| 214 |
+
|
| 215 |
+
Label:
|
| 216 |
+
text: 'Recency: ' + '{:.0%}'.format(root.weight_recency)
|
| 217 |
+
font_size: sp(13)
|
| 218 |
+
color: 0.7, 0.7, 0.8, 1
|
| 219 |
+
halign: 'left'
|
| 220 |
+
text_size: self.size
|
| 221 |
+
size_hint_y: None
|
| 222 |
+
height: dp(20)
|
| 223 |
+
Slider:
|
| 224 |
+
value: root.weight_recency
|
| 225 |
+
on_value: root.weight_recency = self.value
|
| 226 |
+
min: 0
|
| 227 |
+
max: 1
|
| 228 |
+
step: 0.05
|
| 229 |
+
size_hint_y: None
|
| 230 |
+
height: dp(36)
|
| 231 |
+
cursor_size: dp(20), dp(20)
|
| 232 |
+
|
| 233 |
+
# ββ Papers per category βββββββββββββββββ
|
| 234 |
+
Label:
|
| 235 |
+
text: 'Papers per Category'
|
| 236 |
+
font_size: sp(16)
|
| 237 |
+
bold: True
|
| 238 |
+
color: 0.55, 0.78, 1.0, 1
|
| 239 |
+
halign: 'left'
|
| 240 |
+
text_size: self.size
|
| 241 |
+
size_hint_y: None
|
| 242 |
+
height: dp(30)
|
| 243 |
+
|
| 244 |
+
Spinner:
|
| 245 |
+
text: str(int(root.top_n))
|
| 246 |
+
values: ['3', '5', '7', '10']
|
| 247 |
+
on_text: root.top_n = int(self.text) if self.text else 5
|
| 248 |
+
size_hint_y: None
|
| 249 |
+
height: dp(40)
|
| 250 |
+
font_size: sp(14)
|
| 251 |
+
background_color: 0.18, 0.2, 0.28, 1
|
| 252 |
+
color: 0.9, 0.9, 0.95, 1
|
| 253 |
+
|
| 254 |
+
# ββ API Keys (optional) βββββββββββββββββ
|
| 255 |
+
Label:
|
| 256 |
+
text: 'API Keys (optional)'
|
| 257 |
+
font_size: sp(16)
|
| 258 |
+
bold: True
|
| 259 |
+
color: 0.55, 0.78, 1.0, 1
|
| 260 |
+
halign: 'left'
|
| 261 |
+
text_size: self.size
|
| 262 |
+
size_hint_y: None
|
| 263 |
+
height: dp(30)
|
| 264 |
+
|
| 265 |
+
Label:
|
| 266 |
+
text: 'Semantic Scholar API Key'
|
| 267 |
+
font_size: sp(13)
|
| 268 |
+
color: 0.7, 0.7, 0.8, 1
|
| 269 |
+
halign: 'left'
|
| 270 |
+
text_size: self.size
|
| 271 |
+
size_hint_y: None
|
| 272 |
+
height: dp(20)
|
| 273 |
+
TextInput:
|
| 274 |
+
text: root.semantic_scholar_key
|
| 275 |
+
on_text: root.semantic_scholar_key = self.text
|
| 276 |
+
multiline: False
|
| 277 |
+
password: True
|
| 278 |
+
size_hint_y: None
|
| 279 |
+
height: dp(38)
|
| 280 |
+
font_size: sp(13)
|
| 281 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 282 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 283 |
+
|
| 284 |
+
Label:
|
| 285 |
+
text: 'PubMed (NCBI) API Key'
|
| 286 |
+
font_size: sp(13)
|
| 287 |
+
color: 0.7, 0.7, 0.8, 1
|
| 288 |
+
halign: 'left'
|
| 289 |
+
text_size: self.size
|
| 290 |
+
size_hint_y: None
|
| 291 |
+
height: dp(20)
|
| 292 |
+
TextInput:
|
| 293 |
+
text: root.pubmed_key
|
| 294 |
+
on_text: root.pubmed_key = self.text
|
| 295 |
+
multiline: False
|
| 296 |
+
password: True
|
| 297 |
+
size_hint_y: None
|
| 298 |
+
height: dp(38)
|
| 299 |
+
font_size: sp(13)
|
| 300 |
+
background_color: 0.15, 0.17, 0.22, 1
|
| 301 |
+
foreground_color: 0.9, 0.9, 0.95, 1
|
| 302 |
+
|
| 303 |
+
# ββ Save Button βββββββββββββββββββββββββ
|
| 304 |
+
Button:
|
| 305 |
+
text: 'πΎ Save Settings'
|
| 306 |
+
font_size: sp(16)
|
| 307 |
+
bold: True
|
| 308 |
+
size_hint_y: None
|
| 309 |
+
height: dp(50)
|
| 310 |
+
background_normal: ''
|
| 311 |
+
background_color: 0.2, 0.55, 0.35, 1
|
| 312 |
+
color: 1, 1, 1, 1
|
| 313 |
+
on_release: root.save_settings()
|
| 314 |
+
|
| 315 |
+
# Spacer
|
| 316 |
+
Widget:
|
| 317 |
+
size_hint_y: None
|
| 318 |
+
height: dp(40)
|