ak0601 commited on
Commit
fdcd9e5
Β·
verified Β·
1 Parent(s): d2bb178

Upload 63 files

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. app.py +81 -0
  2. app/__init__.py +1 -0
  3. app/__pycache__/__init__.cpython-312.pyc +0 -0
  4. app/core/__init__.py +1 -0
  5. app/core/__pycache__/__init__.cpython-312.pyc +0 -0
  6. app/core/__pycache__/config.cpython-312.pyc +0 -0
  7. app/core/__pycache__/database.cpython-312.pyc +0 -0
  8. app/core/__pycache__/models.cpython-312.pyc +0 -0
  9. app/core/__pycache__/notifier.cpython-312.pyc +0 -0
  10. app/core/__pycache__/scheduler.cpython-312.pyc +0 -0
  11. app/core/__pycache__/telegram_bot.cpython-312.pyc +0 -0
  12. app/core/config.py +135 -0
  13. app/core/database.py +359 -0
  14. app/core/models.py +80 -0
  15. app/core/notifier.py +77 -0
  16. app/core/scheduler.py +135 -0
  17. app/core/telegram_bot.py +294 -0
  18. app/fetcher/__init__.py +1 -0
  19. app/fetcher/__pycache__/__init__.cpython-312.pyc +0 -0
  20. app/fetcher/__pycache__/arxiv_client.cpython-312.pyc +0 -0
  21. app/fetcher/__pycache__/fetch_pipeline.cpython-312.pyc +0 -0
  22. app/fetcher/__pycache__/http_session.cpython-312.pyc +0 -0
  23. app/fetcher/__pycache__/pubmed_client.cpython-312.pyc +0 -0
  24. app/fetcher/__pycache__/semantic_scholar.cpython-312.pyc +0 -0
  25. app/fetcher/arxiv_client.py +159 -0
  26. app/fetcher/crossref_client.py +69 -0
  27. app/fetcher/fetch_pipeline.py +225 -0
  28. app/fetcher/http_session.py +223 -0
  29. app/fetcher/pubmed_client.py +213 -0
  30. app/fetcher/semantic_scholar.py +181 -0
  31. app/ranker/__init__.py +1 -0
  32. app/ranker/__pycache__/__init__.cpython-312.pyc +0 -0
  33. app/ranker/__pycache__/citation_scorer.cpython-312.pyc +0 -0
  34. app/ranker/__pycache__/composite_ranker.cpython-312.pyc +0 -0
  35. app/ranker/__pycache__/tfidf_ranker.cpython-312.pyc +0 -0
  36. app/ranker/citation_scorer.py +49 -0
  37. app/ranker/composite_ranker.py +88 -0
  38. app/ranker/tfidf_ranker.py +182 -0
  39. app/summarizer/__pycache__/groq_client.cpython-312.pyc +0 -0
  40. app/summarizer/groq_client.py +101 -0
  41. app/ui/__init__.py +1 -0
  42. app/ui/__pycache__/__init__.cpython-312.pyc +0 -0
  43. app/ui/__pycache__/detail_screen.cpython-312.pyc +0 -0
  44. app/ui/__pycache__/home_screen.cpython-312.pyc +0 -0
  45. app/ui/__pycache__/settings_screen.cpython-312.pyc +0 -0
  46. app/ui/detail_screen.py +135 -0
  47. app/ui/home_screen.py +123 -0
  48. app/ui/kv/detail.kv +243 -0
  49. app/ui/kv/home.kv +162 -0
  50. app/ui/kv/settings.kv +318 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import subprocess
4
+ import threading
5
+ import time
6
+ from datetime import datetime, timedelta
7
+ import pytz
8
+
9
+ st.set_page_config(page_title="ResearchRadar-HF", page_icon="πŸ“‘")
10
+
11
+ st.title("πŸ“‘ ResearchRadar Bot")
12
+ st.markdown("Your daily research digest is running in the background.")
13
+
14
+ # Timezone processing for EEST (UTC+3)
15
+ TIMEZONE = pytz.timezone('Europe/Bucharest') # or any UTC+3 region
16
+ LATEST_LOG = "Logs will appear here once a fetch starts..."
17
+
18
+ status_placeholder = st.empty()
19
+ log_placeholder = st.empty()
20
+
21
+ def run_worker():
22
+ """Background thread that triggers the fetch script."""
23
+ while True:
24
+ now = datetime.now(TIMEZONE)
25
+
26
+ # Target time: 05:00 AM (EEST)
27
+ target = now.replace(hour=5, minute=0, second=0, microsecond=0)
28
+
29
+ if target <= now:
30
+ target += timedelta(days=1)
31
+
32
+ wait_seconds = (target - now).total_seconds()
33
+
34
+ # Check every 60 seconds if it's time
35
+ if wait_seconds > 60:
36
+ time.sleep(60)
37
+ continue
38
+
39
+ # Execute the fetch
40
+ print(f"[{datetime.now()}] Triggering fetch...")
41
+ subprocess.run(["python", "run_daily.py", "--now"])
42
+
43
+ # Sleep for a bit to avoid double-triggering
44
+ time.sleep(120)
45
+
46
+ # Start background thread only once
47
+ if 'worker_started' not in st.session_state:
48
+ thread = threading.Thread(target=run_worker, daemon=True)
49
+ thread.start()
50
+ st.session_state['worker_started'] = True
51
+
52
+ # Dashboard UI
53
+ with status_placeholder.container():
54
+ now_eest = datetime.now(TIMEZONE)
55
+ st.info(f"πŸ•’ Current EEST Time: **{now_eest.strftime('%H:%M:%S')}**")
56
+
57
+ target = now_eest.replace(hour=5, minute=0, second=0, microsecond=0)
58
+ if target <= now_eest:
59
+ target += timedelta(days=1)
60
+
61
+ diff = target - now_eest
62
+ st.success(f"βŒ› Next fetch in: **{diff}** (at 05:00 AM)")
63
+
64
+ st.divider()
65
+ if st.button("πŸ”„ Trigger Manual Fetch Now"):
66
+ with st.spinner("Fetching papers... this takes a few minutes (Groq rate-limits apply)"):
67
+ res = subprocess.run(["python", "run_daily.py", "--now"], capture_output=True, text=True)
68
+ st.code(res.stdout)
69
+ if res.stderr:
70
+ st.error(res.stderr)
71
+
72
+ st.markdown("""
73
+ ### πŸ›  How it works on Hugging Face:
74
+ - This Space runs **24/7**.
75
+ - At **05:00 AM EEST**, it triggers `run_daily.py --now`.
76
+ - It reads your `GROQ_API_KEY` and `TELEGRAM` tokens from your **Space Secrets**.
77
+ """)
78
+
79
+ # Persistent storage check (optional)
80
+ if not os.path.exists(".researchradar"):
81
+ os.makedirs(".researchradar", exist_ok=True)
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ResearchRadar β€” Weekly AI & Neuroscience Papers
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (139 Bytes). View file
 
app/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Core business logic β€” framework-agnostic
app/core/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (144 Bytes). View file
 
app/core/__pycache__/config.cpython-312.pyc ADDED
Binary file (3.36 kB). View file
 
app/core/__pycache__/database.cpython-312.pyc ADDED
Binary file (14 kB). View file
 
app/core/__pycache__/models.cpython-312.pyc ADDED
Binary file (3.83 kB). View file
 
app/core/__pycache__/notifier.cpython-312.pyc ADDED
Binary file (3.18 kB). View file
 
app/core/__pycache__/scheduler.cpython-312.pyc ADDED
Binary file (5.38 kB). View file
 
app/core/__pycache__/telegram_bot.cpython-312.pyc ADDED
Binary file (11.3 kB). View file
 
app/core/config.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” App-wide constants and environment configuration.
3
+
4
+ All magic values live here. Never hard-code strings or numbers in other modules.
5
+ Environment variables are read at startup using os.getenv() with documented defaults.
6
+ """
7
+
8
+ import os
9
+ import logging
10
+
11
+ # ---------------------------------------------------------------------------
12
+ # Logging
13
+ # ---------------------------------------------------------------------------
14
+ LOG_LEVEL = os.getenv('RESEARCHRADAR_LOG_LEVEL', 'INFO').upper()
15
+ logging.basicConfig(
16
+ level=getattr(logging, LOG_LEVEL, logging.INFO),
17
+ format='[%(asctime)s] %(name)s %(levelname)s: %(message)s',
18
+ datefmt='%Y-%m-%d %H:%M:%S',
19
+ )
20
+ logger = logging.getLogger('researchradar')
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Data Source URLs
24
+ # ---------------------------------------------------------------------------
25
+ ARXIV_BASE_URL = 'http://export.arxiv.org/api/query'
26
+ ARXIV_MAX_RESULTS = 50
27
+
28
+ SEMSCHOLAR_BASE_URL = 'https://api.semanticscholar.org/graph/v1'
29
+ PUBMED_BASE_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'
30
+ CROSSREF_BASE_URL = 'https://api.crossref.org/works'
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # HTTP / Retry Configuration
34
+ # ---------------------------------------------------------------------------
35
+ HTTP_TIMEOUT = 20 # seconds per request
36
+ HTTP_MAX_RETRIES = 4
37
+ HTTP_BACKOFF_BASE = 2 # exponential: 2^attempt seconds
38
+ HTTP_BACKOFF_MAX = 64 # cap at 64 seconds
39
+ RETRY_STATUS_CODES = {429, 500, 502, 503, 504}
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Scheduler
43
+ # ---------------------------------------------------------------------------
44
+ SCHEDULE_DAY = 'sun'
45
+ SCHEDULE_HOUR = 8
46
+ SCHEDULE_MINUTE = 0
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Ranking & Display
50
+ # ---------------------------------------------------------------------------
51
+ TOP_N_PER_CATEGORY = 5 # papers to surface in each digest card
52
+ CITATION_NORM = 50 # citation_score = min(citations / CITATION_NORM, 1.0)
53
+ RECENCY_BONUS = 0.2 # added to papers < 3 days old
54
+
55
+ # Default composite weights (user-adjustable in settings)
56
+ WEIGHT_RELEVANCE = 0.60
57
+ WEIGHT_CITATION = 0.30
58
+ WEIGHT_RECENCY = 0.10
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Database
62
+ # ---------------------------------------------------------------------------
63
+ DB_VERSION = 2 # increment on schema change (added summary_llm)
64
+ DB_PATH = os.getenv('RESEARCHRADAR_DB_PATH', '') # resolved at runtime
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Category Mapping
68
+ # ---------------------------------------------------------------------------
69
+ ARXIV_CATEGORY_MAP = {
70
+ 'ml': ['cs.LG', 'stat.ML'],
71
+ 'ai': ['cs.AI', 'cs.CL', 'cs.CV'],
72
+ 'cs': ['cs.SE', 'cs.PL', 'cs.DS', 'cs.AR'],
73
+ 'neuroscience': ['q-bio.NC'],
74
+ 'bci': ['eess.SP', 'cs.HC'],
75
+ }
76
+
77
+ CATEGORY_LABELS = {
78
+ 'ml': 'Machine Learning',
79
+ 'ai': 'Artificial Intelligence',
80
+ 'cs': 'Computer Science',
81
+ 'neuroscience': 'Neuroscience',
82
+ 'bci': 'Brain-Computer Interface',
83
+ }
84
+
85
+ # Keyword map used by Semantic Scholar fallback searches
86
+ KEYWORD_MAP = {
87
+ 'ml': ['machine learning', 'deep learning', 'neural network'],
88
+ 'ai': ['artificial intelligence', 'natural language processing',
89
+ 'computer vision', 'reinforcement learning','Transformers'],
90
+ 'cs': ['software engineering', 'programming languages',
91
+ 'data structures', 'algorithms'],
92
+ 'neuroscience': ['neuroscience', 'synaptic plasticity', 'cortex',
93
+ 'neural circuits',"speech recognition","autism",'dementia','alzheimer','parkinson'],
94
+ 'bci': ['brain computer interface', 'EEG', 'neural decoding',
95
+ 'neuroprosthetics'],
96
+ }
97
+
98
+ # PubMed MeSH terms for supplemental queries
99
+ PUBMED_MESH_MAP = {
100
+ 'neuroscience': 'Neurosciences[MeSH]',
101
+ 'bci': 'Brain-Computer Interfaces[MeSH]',
102
+ }
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Groq (LLM Summarization)
106
+ # ---------------------------------------------------------------------------
107
+ GROQ_API_KEY = os.getenv('GROQ_API_KEY', '')
108
+ GROQ_BASE_URL = 'https://api.groq.com/openai/v1/chat/completions'
109
+ GROQ_MODEL = 'llama-3.1-8b-instant'
110
+
111
+ # Rate Limits (llama-3.1-8b-instant)
112
+ GROQ_RPM = 30 # 1 request / 2 seconds
113
+ GROQ_TPM = 6000
114
+ GROQ_DELAY = 2.1 # seconds between requests to be safe
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # Filtering
118
+ # ---------------------------------------------------------------------------
119
+ # Neuro/BCI papers MUST have these keywords to be included
120
+ AI_FILTERS = [
121
+ 'ai', 'machine learning', 'neural network', 'deep learning',
122
+ 'reinforcement learning', 'transformer', 'algorithm', 'artificial intelligence',
123
+ 'decoder', 'encoder', 'brain computer interface', 'classifier'
124
+ ]
125
+
126
+ # ---------------------------------------------------------------------------
127
+ # Optional API Keys (never required)
128
+ # ---------------------------------------------------------------------------
129
+ SEMANTIC_SCHOLAR_API_KEY = os.getenv('SEMANTIC_SCHOLAR_API_KEY', '')
130
+ NCBI_API_KEY = os.getenv('NCBI_API_KEY', '')
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # User-Agent β€” required by arXiv fair-use policy
134
+ # ---------------------------------------------------------------------------
135
+ USER_AGENT = 'ResearchRadar/1.0 (contact: app@example.com)'
app/core/database.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” SQLite wrapper with migrations.
3
+
4
+ All write operations use parameterised queries exclusively.
5
+ Never format SQL strings with user or API data.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ import sqlite3
14
+ import time
15
+ from datetime import date, datetime
16
+ from typing import List, Optional
17
+
18
+ from app.core.config import DB_VERSION
19
+ from app.core.models import Digest, Paper
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Schema DDL (Version 1)
25
+ # ---------------------------------------------------------------------------
26
+
27
+ _SCHEMA_V1 = """
28
+ CREATE TABLE IF NOT EXISTS meta (
29
+ key TEXT PRIMARY KEY,
30
+ value TEXT NOT NULL
31
+ );
32
+
33
+ CREATE TABLE IF NOT EXISTS papers (
34
+ paper_id TEXT PRIMARY KEY,
35
+ source TEXT NOT NULL,
36
+ title TEXT NOT NULL,
37
+ abstract TEXT NOT NULL,
38
+ summary_llm TEXT,
39
+ authors TEXT NOT NULL,
40
+ published_date TEXT NOT NULL,
41
+ categories TEXT NOT NULL,
42
+ app_category TEXT NOT NULL,
43
+ pdf_url TEXT,
44
+ abstract_url TEXT NOT NULL,
45
+ citation_count INTEGER DEFAULT 0,
46
+ relevance_score REAL DEFAULT 0.0,
47
+ composite_score REAL DEFAULT 0.0,
48
+ fetched_at TEXT NOT NULL,
49
+ is_bookmarked INTEGER DEFAULT 0,
50
+ is_read INTEGER DEFAULT 0
51
+ );
52
+
53
+ CREATE TABLE IF NOT EXISTS digests (
54
+ digest_id TEXT PRIMARY KEY,
55
+ week_start TEXT NOT NULL,
56
+ generated_at TEXT NOT NULL,
57
+ total_fetched INTEGER,
58
+ total_ranked INTEGER,
59
+ fetch_errors TEXT
60
+ );
61
+
62
+ CREATE TABLE IF NOT EXISTS digest_papers (
63
+ digest_id TEXT NOT NULL,
64
+ paper_id TEXT NOT NULL,
65
+ rank_order INTEGER NOT NULL,
66
+ PRIMARY KEY (digest_id, paper_id),
67
+ FOREIGN KEY (digest_id) REFERENCES digests(digest_id),
68
+ FOREIGN KEY (paper_id) REFERENCES papers(paper_id)
69
+ );
70
+ """
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Connection
74
+ # ---------------------------------------------------------------------------
75
+
76
+ _DB_RETRY_MAX = 3
77
+ _DB_RETRY_SLEEP = 0.5
78
+
79
+
80
+ def get_connection(db_path: str) -> sqlite3.Connection:
81
+ """Return a connection with row_factory and WAL mode enabled."""
82
+ conn = sqlite3.connect(db_path)
83
+ conn.row_factory = sqlite3.Row
84
+ conn.execute('PRAGMA journal_mode=WAL')
85
+ conn.execute('PRAGMA foreign_keys=ON')
86
+ return conn
87
+
88
+
89
+ def _retry_on_locked(func):
90
+ """Decorator: retry up to _DB_RETRY_MAX times on 'database is locked'."""
91
+ def wrapper(*args, **kwargs):
92
+ for attempt in range(_DB_RETRY_MAX):
93
+ try:
94
+ return func(*args, **kwargs)
95
+ except sqlite3.OperationalError as exc:
96
+ if 'database is locked' in str(exc) and attempt < _DB_RETRY_MAX - 1:
97
+ logger.warning('DB locked β€” retrying (%d/%d)', attempt + 1, _DB_RETRY_MAX)
98
+ time.sleep(_DB_RETRY_SLEEP)
99
+ else:
100
+ raise
101
+ return wrapper
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Initialisation & Migrations
106
+ # ---------------------------------------------------------------------------
107
+
108
+ def initialize(db_path: str) -> None:
109
+ """Create tables and run any pending migrations."""
110
+ conn = get_connection(db_path)
111
+ try:
112
+ conn.executescript(_SCHEMA_V1)
113
+ # Set version if not present
114
+ row = conn.execute(
115
+ "SELECT value FROM meta WHERE key = 'db_version'"
116
+ ).fetchone()
117
+ if row is None:
118
+ conn.execute(
119
+ "INSERT INTO meta (key, value) VALUES ('db_version', ?)",
120
+ (str(DB_VERSION),),
121
+ )
122
+ else:
123
+ current = int(row['value'])
124
+ if current < DB_VERSION:
125
+ run_migrations(conn, current, DB_VERSION)
126
+ conn.commit()
127
+ finally:
128
+ conn.close()
129
+
130
+
131
+ def run_migrations(conn: sqlite3.Connection, current: int, target: int) -> None:
132
+ """Apply sequential migrations from *current* to *target* version."""
133
+ logger.info('Migrating DB from v%d to v%d', current, target)
134
+
135
+ if current < 2:
136
+ try:
137
+ conn.execute("ALTER TABLE papers ADD COLUMN summary_llm TEXT")
138
+ logger.info('V2 Migration: Added summary_llm column to papers table.')
139
+ except sqlite3.OperationalError as e:
140
+ if 'duplicate column name' in str(e).lower():
141
+ pass # Already exists
142
+ else:
143
+ raise
144
+
145
+ conn.execute(
146
+ "UPDATE meta SET value = ? WHERE key = 'db_version'",
147
+ (str(target),),
148
+ )
149
+
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # Paper helpers
153
+ # ---------------------------------------------------------------------------
154
+
155
+ def _paper_to_row(paper: Paper) -> tuple:
156
+ return (
157
+ paper.paper_id,
158
+ paper.source,
159
+ paper.title,
160
+ paper.abstract,
161
+ paper.summary_llm,
162
+ json.dumps(paper.authors),
163
+ paper.published_date.isoformat(),
164
+ json.dumps(paper.categories),
165
+ paper.app_category,
166
+ paper.pdf_url,
167
+ paper.abstract_url,
168
+ paper.citation_count,
169
+ paper.relevance_score,
170
+ paper.composite_score,
171
+ paper.fetched_at.isoformat(),
172
+ int(paper.is_bookmarked),
173
+ int(paper.is_read),
174
+ )
175
+
176
+
177
+ def _row_to_paper(row: sqlite3.Row) -> Paper:
178
+ return Paper(
179
+ paper_id=row['paper_id'],
180
+ source=row['source'],
181
+ title=row['title'],
182
+ abstract=row['abstract'],
183
+ summary_llm=row['summary_llm'],
184
+ authors=json.loads(row['authors']),
185
+ published_date=date.fromisoformat(row['published_date']),
186
+ categories=json.loads(row['categories']),
187
+ app_category=row['app_category'],
188
+ pdf_url=row['pdf_url'],
189
+ abstract_url=row['abstract_url'],
190
+ citation_count=row['citation_count'],
191
+ relevance_score=row['relevance_score'],
192
+ composite_score=row['composite_score'],
193
+ fetched_at=datetime.fromisoformat(row['fetched_at']),
194
+ is_bookmarked=bool(row['is_bookmarked']),
195
+ is_read=bool(row['is_read']),
196
+ )
197
+
198
+
199
+ # ---------------------------------------------------------------------------
200
+ # CRUD Operations
201
+ # ---------------------------------------------------------------------------
202
+
203
+ @_retry_on_locked
204
+ def save_digest(db_path: str, digest: Digest) -> None:
205
+ """Transactional insert of a digest + all its papers."""
206
+ conn = get_connection(db_path)
207
+ try:
208
+ conn.execute('BEGIN')
209
+
210
+ # Insert digest record
211
+ conn.execute(
212
+ """INSERT OR REPLACE INTO digests
213
+ (digest_id, week_start, generated_at, total_fetched,
214
+ total_ranked, fetch_errors)
215
+ VALUES (?, ?, ?, ?, ?, ?)""",
216
+ (
217
+ digest.digest_id,
218
+ digest.week_start.isoformat(),
219
+ digest.generated_at.isoformat(),
220
+ digest.total_fetched,
221
+ digest.total_ranked,
222
+ json.dumps(digest.fetch_errors),
223
+ ),
224
+ )
225
+
226
+ # Insert papers and link to digest
227
+ rank = 0
228
+ for category, papers in digest.papers.items():
229
+ for paper in papers:
230
+ conn.execute(
231
+ """INSERT OR REPLACE INTO papers
232
+ (paper_id, source, title, abstract, summary_llm, authors,
233
+ published_date, categories, app_category, pdf_url,
234
+ abstract_url, citation_count, relevance_score,
235
+ composite_score, fetched_at, is_bookmarked, is_read)
236
+ VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
237
+ _paper_to_row(paper),
238
+ )
239
+ rank += 1
240
+ conn.execute(
241
+ """INSERT OR REPLACE INTO digest_papers
242
+ (digest_id, paper_id, rank_order) VALUES (?, ?, ?)""",
243
+ (digest.digest_id, paper.paper_id, rank),
244
+ )
245
+
246
+ conn.commit()
247
+ logger.info('Saved digest %s with %d papers', digest.digest_id, rank)
248
+ except Exception:
249
+ conn.rollback()
250
+ logger.exception('Failed to save digest β€” rolled back')
251
+ raise
252
+ finally:
253
+ conn.close()
254
+
255
+
256
+ @_retry_on_locked
257
+ def get_latest_digest(db_path: str) -> Optional[Digest]:
258
+ """Load the most recent digest."""
259
+ conn = get_connection(db_path)
260
+ try:
261
+ row = conn.execute(
262
+ 'SELECT * FROM digests ORDER BY generated_at DESC LIMIT 1'
263
+ ).fetchone()
264
+ if row is None:
265
+ return None
266
+
267
+ digest = Digest(
268
+ digest_id=row['digest_id'],
269
+ week_start=date.fromisoformat(row['week_start']),
270
+ generated_at=datetime.fromisoformat(row['generated_at']),
271
+ total_fetched=row['total_fetched'],
272
+ total_ranked=row['total_ranked'],
273
+ fetch_errors=json.loads(row['fetch_errors'] or '[]'),
274
+ )
275
+
276
+ # Load papers linked to this digest
277
+ paper_rows = conn.execute(
278
+ """SELECT p.* FROM papers p
279
+ INNER JOIN digest_papers dp ON p.paper_id = dp.paper_id
280
+ WHERE dp.digest_id = ?
281
+ ORDER BY dp.rank_order""",
282
+ (digest.digest_id,),
283
+ ).fetchall()
284
+
285
+ papers_by_cat: dict = {}
286
+ for pr in paper_rows:
287
+ paper = _row_to_paper(pr)
288
+ papers_by_cat.setdefault(paper.app_category, []).append(paper)
289
+ digest.papers = papers_by_cat
290
+ return digest
291
+ finally:
292
+ conn.close()
293
+
294
+
295
+ @_retry_on_locked
296
+ def get_papers(db_path: str, category: str, limit: int = 10) -> List[Paper]:
297
+ """Get papers for a category, ordered by composite score."""
298
+ conn = get_connection(db_path)
299
+ try:
300
+ rows = conn.execute(
301
+ """SELECT * FROM papers
302
+ WHERE app_category = ?
303
+ ORDER BY composite_score DESC
304
+ LIMIT ?""",
305
+ (category, limit),
306
+ ).fetchall()
307
+ return [_row_to_paper(r) for r in rows]
308
+ finally:
309
+ conn.close()
310
+
311
+
312
+ @_retry_on_locked
313
+ def toggle_bookmark(db_path: str, paper_id: str) -> bool:
314
+ """Toggle bookmark state; returns the new state."""
315
+ conn = get_connection(db_path)
316
+ try:
317
+ conn.execute(
318
+ """UPDATE papers
319
+ SET is_bookmarked = CASE WHEN is_bookmarked = 0 THEN 1 ELSE 0 END
320
+ WHERE paper_id = ?""",
321
+ (paper_id,),
322
+ )
323
+ conn.commit()
324
+ row = conn.execute(
325
+ 'SELECT is_bookmarked FROM papers WHERE paper_id = ?',
326
+ (paper_id,),
327
+ ).fetchone()
328
+ return bool(row['is_bookmarked']) if row else False
329
+ finally:
330
+ conn.close()
331
+
332
+
333
+ @_retry_on_locked
334
+ def mark_read(db_path: str, paper_id: str) -> None:
335
+ """Mark a paper as read."""
336
+ conn = get_connection(db_path)
337
+ try:
338
+ conn.execute(
339
+ 'UPDATE papers SET is_read = 1 WHERE paper_id = ?',
340
+ (paper_id,),
341
+ )
342
+ conn.commit()
343
+ finally:
344
+ conn.close()
345
+
346
+
347
+ @_retry_on_locked
348
+ def get_bookmarked_papers(db_path: str) -> List[Paper]:
349
+ """Return all bookmarked papers ordered by composite score."""
350
+ conn = get_connection(db_path)
351
+ try:
352
+ rows = conn.execute(
353
+ """SELECT * FROM papers
354
+ WHERE is_bookmarked = 1
355
+ ORDER BY composite_score DESC"""
356
+ ).fetchall()
357
+ return [_row_to_paper(r) for r in rows]
358
+ finally:
359
+ conn.close()
app/core/models.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Pure data models.
3
+
4
+ All models are standard Python dataclasses with no external dependencies,
5
+ making them fully testable in isolation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import uuid
11
+ from dataclasses import dataclass, field
12
+ from datetime import date, datetime
13
+ from typing import Dict, List, Optional
14
+
15
+
16
+ @dataclass
17
+ class Paper:
18
+ """A single research paper from any source."""
19
+
20
+ paper_id: str # arXiv ID or PubMed PMID β€” primary key
21
+ source: str # 'arxiv' | 'semantic_scholar' | 'pubmed'
22
+ title: str
23
+ abstract: str
24
+ authors: List[str]
25
+ published_date: date # UTC
26
+ categories: List[str] # e.g. ['cs.LG', 'stat.ML']
27
+ app_category: str # mapped app category slug
28
+ summary_llm: Optional[str] = None # Brief summary (Idea, Method, Results) via Groq
29
+ pdf_url: Optional[str] = None # direct PDF link if available
30
+ abstract_url: str = '' # canonical web page
31
+ citation_count: int = 0
32
+ relevance_score: float = 0.0 # set by ranker
33
+ composite_score: float = 0.0 # set by ranker
34
+ fetched_at: datetime = field(default_factory=datetime.utcnow)
35
+ is_bookmarked: bool = False
36
+ is_read: bool = False
37
+
38
+
39
+ @dataclass
40
+ class Digest:
41
+ """A weekly digest containing ranked papers per category."""
42
+
43
+ digest_id: str # UUID4 hex
44
+ week_start: date # Monday of the fetched week (ISO)
45
+ generated_at: datetime
46
+ papers: Dict[str, List[Paper]] = field(default_factory=dict)
47
+ total_fetched: int = 0
48
+ total_ranked: int = 0
49
+ fetch_errors: List[str] = field(default_factory=list)
50
+
51
+ @classmethod
52
+ def create_new(cls) -> 'Digest':
53
+ """Factory: create a fresh Digest for this week."""
54
+ today = datetime.utcnow()
55
+ # ISO week starts Monday (weekday 0)
56
+ monday = today.date()
57
+ weekday = monday.weekday()
58
+ monday = monday.__class__.fromordinal(monday.toordinal() - weekday)
59
+ return cls(
60
+ digest_id=uuid.uuid4().hex,
61
+ week_start=monday,
62
+ generated_at=today,
63
+ )
64
+
65
+
66
+ @dataclass
67
+ class UserProfile:
68
+ """User interest profile used by the ranker."""
69
+
70
+ interests: Dict[str, str] = field(default_factory=lambda: {
71
+ 'ml': 'deep learning transformers attention',
72
+ 'ai': 'artificial intelligence language models',
73
+ 'cs': 'software engineering algorithms',
74
+ 'neuroscience': 'synaptic plasticity cortex neurons',
75
+ 'bci': 'brain computer interface EEG decoding',
76
+ })
77
+ weight_relevance: float = 0.60
78
+ weight_citation: float = 0.30
79
+ weight_recency: float = 0.10
80
+ top_n_per_category: int = 5
app/core/notifier.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Notification wrapper.
3
+
4
+ Primary: Telegram Bot notifications (works on any phone).
5
+ Fallback: plyer local notifications (desktop / Kivy builds).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import os
12
+ from typing import Optional
13
+
14
+ from app.core.models import Digest
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def send_digest_notification(digest: Digest, data_dir: str = '') -> None:
20
+ """
21
+ Send a notification about the latest digest.
22
+
23
+ Tries Telegram first (phone notifications), then falls back to plyer.
24
+ """
25
+ # Try Telegram first
26
+ if data_dir:
27
+ try:
28
+ from app.core.telegram_bot import send_digest_notification as tg_send
29
+ if tg_send(digest, data_dir):
30
+ return # Telegram succeeded
31
+ except ImportError:
32
+ pass
33
+ except Exception:
34
+ logger.debug('Telegram notification failed', exc_info=True)
35
+
36
+ # Fallback: plyer local notification
37
+ _send_plyer_notification(digest)
38
+
39
+
40
+ def _send_plyer_notification(digest: Digest) -> None:
41
+ """Send a local notification via plyer (desktop / mobile Kivy)."""
42
+ try:
43
+ from plyer import notification
44
+ except ImportError:
45
+ logger.info('plyer not installed β€” skipping notification')
46
+ return
47
+
48
+ lines = []
49
+ top_title = ''
50
+ for cat, papers in digest.papers.items():
51
+ count = len(papers)
52
+ label = cat.replace('_', ' ').title()
53
+ lines.append(f'{label}: {count} paper{"s" if count != 1 else ""}')
54
+ if papers and not top_title:
55
+ top_title = papers[0].title
56
+
57
+ if not lines:
58
+ lines.append('No new papers this week.')
59
+
60
+ message = '\n'.join(lines)
61
+ if top_title:
62
+ if len(top_title) > 80:
63
+ top_title = top_title[:77] + '...'
64
+ message += f'\n\nπŸ“„ {top_title}'
65
+
66
+ try:
67
+ notification.notify(
68
+ title='ResearchRadar β€” New Papers!',
69
+ message=message,
70
+ app_name='ResearchRadar',
71
+ timeout=10,
72
+ )
73
+ logger.info('Notification sent for digest %s', digest.digest_id)
74
+ except NotImplementedError:
75
+ logger.warning('Notifications not supported on this platform')
76
+ except Exception:
77
+ logger.warning('Notification failed', exc_info=True)
app/core/scheduler.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Job scheduling.
3
+
4
+ Uses APScheduler with CronTrigger for the weekly fetch job.
5
+ On Android, uses AlarmManager via pyjnius to wake the app if backgrounded.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Callable, Optional
12
+
13
+ from app.core.config import SCHEDULE_DAY, SCHEDULE_HOUR, SCHEDULE_MINUTE
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # APScheduler setup
19
+ # ---------------------------------------------------------------------------
20
+
21
+ def setup_scheduler(
22
+ db_path: str,
23
+ fetch_callback: Optional[Callable] = None,
24
+ ) -> object:
25
+ """
26
+ Initialise and start the APScheduler BackgroundScheduler.
27
+
28
+ - CronTrigger: every Sunday at 08:00 local time.
29
+ - misfire_grace_time: 3600s (fires within 1 hour of missed time).
30
+ - max_instances: 1 (prevent overlapping fetch jobs).
31
+ """
32
+ try:
33
+ from apscheduler.schedulers.background import BackgroundScheduler
34
+ from apscheduler.triggers.cron import CronTrigger
35
+ except ImportError:
36
+ logger.warning('APScheduler not installed β€” scheduler disabled')
37
+ return None
38
+
39
+ if fetch_callback is None:
40
+ from app.fetcher.fetch_pipeline import run_weekly_fetch
41
+
42
+ def _default_callback():
43
+ run_weekly_fetch(db_path)
44
+
45
+ fetch_callback = _default_callback
46
+
47
+ scheduler = BackgroundScheduler()
48
+
49
+ # Try to use SQLAlchemy job store for persistence
50
+ try:
51
+ from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
52
+ jobstore = SQLAlchemyJobStore(url=f'sqlite:///{db_path}')
53
+ scheduler.add_jobstore(jobstore, 'default')
54
+ except ImportError:
55
+ logger.info('SQLAlchemy not available β€” using memory job store')
56
+
57
+ scheduler.add_job(
58
+ fetch_callback,
59
+ CronTrigger(
60
+ day_of_week=SCHEDULE_DAY,
61
+ hour=SCHEDULE_HOUR,
62
+ minute=SCHEDULE_MINUTE,
63
+ ),
64
+ id='weekly_fetch',
65
+ name='Weekly Paper Fetch',
66
+ misfire_grace_time=3600,
67
+ max_instances=1,
68
+ replace_existing=True,
69
+ )
70
+
71
+ try:
72
+ scheduler.start()
73
+ logger.info(
74
+ 'Scheduler started β€” next fetch: %s %02d:%02d',
75
+ SCHEDULE_DAY.upper(), SCHEDULE_HOUR, SCHEDULE_MINUTE,
76
+ )
77
+ except Exception as exc:
78
+ # SchedulerAlreadyRunningError or other β€” log and continue
79
+ logger.warning('Scheduler start issue (non-fatal): %s', exc)
80
+
81
+ return scheduler
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Android AlarmManager integration (Android-only)
86
+ # ---------------------------------------------------------------------------
87
+
88
+ def setup_android_alarm() -> None:
89
+ """
90
+ Set a repeating alarm via Android's AlarmManager to wake the app
91
+ every Sunday at 08:00.
92
+
93
+ Only called on Android. Guarded by platform check in main.py.
94
+ """
95
+ try:
96
+ from jnius import autoclass
97
+
98
+ Context = autoclass('android.content.Context')
99
+ Intent = autoclass('android.content.Intent')
100
+ PendingIntent = autoclass('android.app.PendingIntent')
101
+ AlarmManager = autoclass('android.app.AlarmManager')
102
+ Calendar = autoclass('java.util.Calendar')
103
+
104
+ from android import mActivity # type: ignore[import]
105
+
106
+ context = mActivity.getApplicationContext()
107
+ alarm_mgr = context.getSystemService(Context.ALARM_SERVICE)
108
+
109
+ intent = Intent(context, mActivity.getClass())
110
+ pending = PendingIntent.getActivity(
111
+ context, 0, intent,
112
+ PendingIntent.FLAG_UPDATE_CURRENT | PendingIntent.FLAG_IMMUTABLE,
113
+ )
114
+
115
+ # Set weekly repeating alarm
116
+ cal = Calendar.getInstance()
117
+ cal.set(Calendar.DAY_OF_WEEK, Calendar.SUNDAY)
118
+ cal.set(Calendar.HOUR_OF_DAY, SCHEDULE_HOUR)
119
+ cal.set(Calendar.MINUTE, SCHEDULE_MINUTE)
120
+ cal.set(Calendar.SECOND, 0)
121
+
122
+ interval_week = 7 * 24 * 60 * 60 * 1000 # ms
123
+
124
+ alarm_mgr.setExactAndAllowWhileIdle(
125
+ AlarmManager.RTC_WAKEUP,
126
+ cal.getTimeInMillis(),
127
+ pending,
128
+ )
129
+ logger.info('Android AlarmManager set for Sunday %02d:%02d',
130
+ SCHEDULE_HOUR, SCHEDULE_MINUTE)
131
+
132
+ except ImportError:
133
+ logger.debug('pyjnius not available β€” not on Android')
134
+ except Exception:
135
+ logger.warning('Failed to set Android alarm', exc_info=True)
app/core/telegram_bot.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Telegram Bot notification system.
3
+
4
+ Sends formatted paper digests to the user's Telegram chat.
5
+ Replaces plyer notifications for phone delivery.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ from typing import Dict, List, Optional
14
+
15
+ import requests
16
+
17
+ from app.core.models import Digest, Paper
18
+ from app.core.config import CATEGORY_LABELS
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Configuration
24
+ # ---------------------------------------------------------------------------
25
+
26
+ _CONFIG_KEYS = ('telegram_bot_token', 'telegram_chat_id')
27
+
28
+
29
+ def _load_telegram_config(data_dir: str) -> dict:
30
+ """Load Telegram config from settings.json."""
31
+ path = os.path.join(data_dir, 'settings.json')
32
+ if not os.path.exists(path):
33
+ return {}
34
+ try:
35
+ with open(path, 'r', encoding='utf-8') as f:
36
+ return json.load(f)
37
+ except (json.JSONDecodeError, OSError):
38
+ return {}
39
+
40
+
41
+ def _get_credentials(data_dir: str) -> tuple:
42
+ """
43
+ Get bot token and chat ID from settings or environment variables.
44
+
45
+ Priority: env vars > settings.json
46
+ """
47
+ config = _load_telegram_config(data_dir)
48
+
49
+ token = (
50
+ os.getenv('TELEGRAM_BOT_TOKEN')
51
+ or config.get('telegram_bot_token', '')
52
+ )
53
+ chat_id = (
54
+ os.getenv('TELEGRAM_CHAT_ID')
55
+ or config.get('telegram_chat_id', '')
56
+ )
57
+ return token, chat_id
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Message formatting
62
+ # ---------------------------------------------------------------------------
63
+
64
+ def _format_paper(rank: int, paper: Paper) -> str:
65
+ """Format a single paper as a Telegram message block."""
66
+ # Authors (first author + et al.)
67
+ if paper.authors:
68
+ if len(paper.authors) > 2:
69
+ authors = f"{paper.authors[0]} et al."
70
+ else:
71
+ authors = ", ".join(paper.authors)
72
+ else:
73
+ authors = "Unknown"
74
+
75
+ # Score badge
76
+ score = f"{paper.composite_score:.2f}"
77
+
78
+ lines = [
79
+ f"*{rank}.* [{paper.title}]({paper.abstract_url})",
80
+ f" πŸ‘€ _{authors}_",
81
+ f" πŸ“… {paper.published_date.isoformat()} β€’ πŸ“Š Score: {score} β€’ πŸ“ Citations: {paper.citation_count}",
82
+ ]
83
+
84
+ # LLM Summary (Structured)
85
+ if paper.summary_llm:
86
+ lines.append("")
87
+ lines.append(f"πŸ€– *AI Summary:*")
88
+ # Indent the summary for readability
89
+ for slink in paper.summary_llm.split('\n'):
90
+ if slink.strip():
91
+ lines.append(f" _{slink.strip()}_")
92
+
93
+ if paper.pdf_url:
94
+ lines.append("")
95
+ lines.append(f" πŸ“„ [PDF]({paper.pdf_url})")
96
+
97
+ return "\n".join(lines)
98
+
99
+
100
+ def format_digest_message(digest: Digest) -> str:
101
+ """Format a full digest as a Telegram-ready Markdown message."""
102
+ lines = [
103
+ "πŸ“‘ *ResearchRadar β€” Daily Paper Digest*",
104
+ f"πŸ“… Week of {digest.week_start.isoformat()}",
105
+ f"πŸ• Generated: {digest.generated_at.strftime('%Y-%m-%d %H:%M UTC')}",
106
+ "",
107
+ ]
108
+
109
+ total_papers = 0
110
+
111
+ for cat_slug, papers in digest.papers.items():
112
+ if not papers:
113
+ continue
114
+
115
+ cat_name = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
116
+ total_papers += len(papers)
117
+
118
+ lines.append(f"━━━━━━━━━━━━━━━━━━━━")
119
+ lines.append(f"πŸ”¬ *{cat_name}* ({len(papers)} papers)")
120
+ lines.append("")
121
+
122
+ for i, paper in enumerate(papers, 1):
123
+ lines.append(_format_paper(i, paper))
124
+ lines.append("")
125
+
126
+ if total_papers == 0:
127
+ lines.append("_No new papers found this cycle. Check back tomorrow!_")
128
+
129
+ # Summary footer
130
+ lines.append("━━━━━━━━━━━━━━━━━━━━")
131
+ lines.append(
132
+ f"πŸ“Š *Summary:* {digest.total_fetched} fetched β†’ "
133
+ f"{digest.total_ranked} ranked β†’ {total_papers} delivered"
134
+ )
135
+
136
+ if digest.fetch_errors:
137
+ lines.append(f"⚠️ {len(digest.fetch_errors)} non-fatal errors logged")
138
+
139
+ return "\n".join(lines)
140
+
141
+
142
+ def format_short_notification(digest: Digest) -> str:
143
+ """Format a short notification summary."""
144
+ counts = []
145
+ for cat_slug, papers in digest.papers.items():
146
+ if papers:
147
+ label = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
148
+ counts.append(f"{label}: {len(papers)}")
149
+
150
+ if not counts:
151
+ return "πŸ“‘ ResearchRadar: No new papers found today."
152
+
153
+ summary = " | ".join(counts)
154
+ total = sum(len(p) for p in digest.papers.values())
155
+ return f"πŸ“‘ *ResearchRadar* β€” {total} new papers!\n{summary}"
156
+
157
+
158
+ # ---------------------------------------------------------------------------
159
+ # Sending
160
+ # ---------------------------------------------------------------------------
161
+
162
+ def send_message(
163
+ token: str,
164
+ chat_id: str,
165
+ text: str,
166
+ parse_mode: str = 'Markdown',
167
+ disable_preview: bool = True,
168
+ ) -> bool:
169
+ """
170
+ Send a message via Telegram Bot API.
171
+
172
+ Returns True on success, False on failure (never raises).
173
+ """
174
+ url = f"https://api.telegram.org/bot{token}/sendMessage"
175
+
176
+ # Telegram has a 4096 char limit per message
177
+ if len(text) > 4000:
178
+ return _send_chunked(token, chat_id, text, parse_mode, disable_preview)
179
+
180
+ try:
181
+ resp = requests.post(
182
+ url,
183
+ json={
184
+ 'chat_id': chat_id,
185
+ 'text': text,
186
+ 'parse_mode': parse_mode,
187
+ 'disable_web_page_preview': disable_preview,
188
+ },
189
+ timeout=15,
190
+ )
191
+
192
+ if resp.status_code == 200:
193
+ data = resp.json()
194
+ if data.get('ok'):
195
+ logger.info('Telegram message sent to chat %s', chat_id)
196
+ return True
197
+ else:
198
+ logger.error('Telegram API error: %s', data.get('description'))
199
+ return False
200
+ else:
201
+ logger.error('Telegram HTTP %d: %s', resp.status_code, resp.text[:200])
202
+ return False
203
+
204
+ except requests.exceptions.RequestException as exc:
205
+ logger.error('Telegram send failed: %s', exc)
206
+ return False
207
+
208
+
209
+ def _send_chunked(
210
+ token: str,
211
+ chat_id: str,
212
+ text: str,
213
+ parse_mode: str,
214
+ disable_preview: bool,
215
+ ) -> bool:
216
+ """Split long messages at section boundaries and send sequentially."""
217
+ chunks = []
218
+ current = ""
219
+
220
+ for line in text.split("\n"):
221
+ if len(current) + len(line) + 1 > 3800 and current:
222
+ chunks.append(current)
223
+ current = line
224
+ else:
225
+ current = current + "\n" + line if current else line
226
+
227
+ if current:
228
+ chunks.append(current)
229
+
230
+ success = True
231
+ for i, chunk in enumerate(chunks):
232
+ if i > 0:
233
+ import time
234
+ time.sleep(0.5) # Rate limiting courtesy
235
+
236
+ ok = send_message(token, chat_id, chunk, parse_mode, disable_preview)
237
+ if not ok:
238
+ success = False
239
+
240
+ return success
241
+
242
+
243
+ # ---------------------------------------------------------------------------
244
+ # High-level API
245
+ # ---------------------------------------------------------------------------
246
+
247
+ def send_digest_notification(digest: Digest, data_dir: str) -> bool:
248
+ """
249
+ Send the full digest to Telegram.
250
+
251
+ Reads credentials from env vars or settings.json.
252
+ Returns True on success, False on failure (never raises).
253
+ """
254
+ token, chat_id = _get_credentials(data_dir)
255
+
256
+ if not token or not chat_id:
257
+ logger.warning(
258
+ 'Telegram not configured β€” set TELEGRAM_BOT_TOKEN and '
259
+ 'TELEGRAM_CHAT_ID in environment or settings.json'
260
+ )
261
+ return False
262
+
263
+ # Send short notification first
264
+ short = format_short_notification(digest)
265
+ send_message(token, chat_id, short)
266
+
267
+ # Then send the full digest
268
+ full = format_digest_message(digest)
269
+ return send_message(token, chat_id, full)
270
+
271
+
272
+ def send_test_message(data_dir: str) -> bool:
273
+ """Send a test message to verify Telegram setup."""
274
+ token, chat_id = _get_credentials(data_dir)
275
+
276
+ if not token or not chat_id:
277
+ print("❌ Telegram not configured!")
278
+ print(" Set TELEGRAM_BOT_TOKEN and TELEGRAM_CHAT_ID in settings.json")
279
+ print(" or as environment variables.")
280
+ return False
281
+
282
+ text = (
283
+ "βœ… *ResearchRadar β€” Test Message*\n\n"
284
+ "Your Telegram notifications are working!\n"
285
+ "You'll receive daily paper digests at your configured time."
286
+ )
287
+ success = send_message(token, chat_id, text)
288
+
289
+ if success:
290
+ print("βœ… Test message sent! Check your Telegram.")
291
+ else:
292
+ print("❌ Failed to send test message. Check your bot token and chat ID.")
293
+
294
+ return success
app/fetcher/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Data acquisition layer
app/fetcher/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (147 Bytes). View file
 
app/fetcher/__pycache__/arxiv_client.cpython-312.pyc ADDED
Binary file (6.43 kB). View file
 
app/fetcher/__pycache__/fetch_pipeline.cpython-312.pyc ADDED
Binary file (9.61 kB). View file
 
app/fetcher/__pycache__/http_session.cpython-312.pyc ADDED
Binary file (8.26 kB). View file
 
app/fetcher/__pycache__/pubmed_client.cpython-312.pyc ADDED
Binary file (7.59 kB). View file
 
app/fetcher/__pycache__/semantic_scholar.cpython-312.pyc ADDED
Binary file (6.79 kB). View file
 
app/fetcher/arxiv_client.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” arXiv Atom API client.
3
+
4
+ Fetches papers submitted/updated within the last N days for given arXiv
5
+ categories. Uses xml.etree.ElementTree (stdlib) β€” no lxml needed.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import xml.etree.ElementTree as ET
12
+ from datetime import date, datetime, timedelta
13
+ from typing import List
14
+
15
+ from app.core.config import ARXIV_BASE_URL, ARXIV_MAX_RESULTS
16
+ from app.core.models import Paper
17
+ from app.fetcher.http_session import FetchError, RetrySession
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # arXiv Atom namespace
22
+ _NS = {
23
+ 'atom': 'http://www.w3.org/2005/Atom',
24
+ 'arxiv': 'http://arxiv.org/schemas/atom',
25
+ }
26
+
27
+
28
+ def fetch_papers(
29
+ category_slug: str,
30
+ arxiv_cats: List[str],
31
+ session: RetrySession,
32
+ days_back: int = 7,
33
+ ) -> List[Paper]:
34
+ """
35
+ Fetch papers submitted/updated within *days_back* days across all
36
+ arXiv categories in *arxiv_cats*.
37
+
38
+ Returns a list of Paper instances. Never raises β€” returns [] on error.
39
+ """
40
+ today = date.today()
41
+ start = today - timedelta(days=days_back)
42
+ end = today
43
+
44
+ query = '(' + ' OR '.join(f'cat:{c}' for c in arxiv_cats) + ')'
45
+
46
+ params = {
47
+ 'search_query': query,
48
+ 'start': 0,
49
+ 'max_results': ARXIV_MAX_RESULTS,
50
+ 'sortBy': 'submittedDate',
51
+ 'sortOrder': 'descending',
52
+ }
53
+
54
+ try:
55
+ response = session.get(ARXIV_BASE_URL, params=params)
56
+ except FetchError as exc:
57
+ logger.error('arXiv fetch failed for %s: %s', category_slug, exc)
58
+ return []
59
+
60
+ try:
61
+ root = ET.fromstring(response.text)
62
+ except ET.ParseError as exc:
63
+ logger.error(
64
+ 'arXiv XML parse error: %s β€” snippet: %s',
65
+ exc, response.text[:300],
66
+ )
67
+ return []
68
+
69
+ papers: List[Paper] = []
70
+
71
+ for entry in root.findall('atom:entry', _NS):
72
+ try:
73
+ paper = _parse_entry(entry, category_slug, start, end)
74
+ if paper is not None:
75
+ papers.append(paper)
76
+ except Exception:
77
+ logger.debug('Skipping malformed arXiv entry', exc_info=True)
78
+
79
+ logger.info(
80
+ 'arXiv: fetched %d papers for [%s] (%s)',
81
+ len(papers), category_slug, ', '.join(arxiv_cats),
82
+ )
83
+ return papers
84
+
85
+
86
+ def _parse_entry(
87
+ entry: ET.Element,
88
+ category_slug: str,
89
+ start: date,
90
+ end: date,
91
+ ) -> Paper | None:
92
+ """Parse a single <entry> element into a Paper, or return None."""
93
+
94
+ title_el = entry.find('atom:title', _NS)
95
+ abstract_el = entry.find('atom:summary', _NS)
96
+ if title_el is None or abstract_el is None:
97
+ return None
98
+
99
+ title = ' '.join((title_el.text or '').split())
100
+ abstract = ' '.join((abstract_el.text or '').split())
101
+ if not title or not abstract:
102
+ logger.debug('Skipping entry with empty title/abstract')
103
+ return None
104
+
105
+ # arXiv ID
106
+ id_el = entry.find('atom:id', _NS)
107
+ raw_id = (id_el.text or '') if id_el is not None else ''
108
+ arxiv_id = raw_id.replace('http://arxiv.org/abs/', '').strip()
109
+ if not arxiv_id:
110
+ return None
111
+ paper_id = f'arxiv:{arxiv_id}'
112
+
113
+ # Authors
114
+ authors = []
115
+ for author_el in entry.findall('atom:author', _NS):
116
+ name_el = author_el.find('atom:name', _NS)
117
+ if name_el is not None and name_el.text:
118
+ authors.append(name_el.text.strip())
119
+
120
+ # Published date
121
+ pub_el = entry.find('atom:published', _NS)
122
+ pub_text = (pub_el.text or '') if pub_el is not None else ''
123
+ try:
124
+ published = datetime.fromisoformat(
125
+ pub_text.replace('Z', '+00:00')
126
+ ).date()
127
+ except (ValueError, TypeError):
128
+ published = date.today()
129
+
130
+ # Categories
131
+ categories = []
132
+ for cat_el in entry.findall('atom:category', _NS):
133
+ term = cat_el.get('term', '')
134
+ if term:
135
+ categories.append(term)
136
+
137
+ # PDF link
138
+ pdf_url = None
139
+ for link_el in entry.findall('atom:link', _NS):
140
+ if link_el.get('title') == 'pdf':
141
+ pdf_url = link_el.get('href')
142
+ break
143
+ if pdf_url is None and arxiv_id:
144
+ pdf_url = f'https://arxiv.org/pdf/{arxiv_id}'
145
+
146
+ abstract_url = f'https://arxiv.org/abs/{arxiv_id}'
147
+
148
+ return Paper(
149
+ paper_id=paper_id,
150
+ source='arxiv',
151
+ title=title,
152
+ abstract=abstract,
153
+ authors=authors,
154
+ published_date=published,
155
+ categories=categories,
156
+ app_category=category_slug,
157
+ pdf_url=pdf_url,
158
+ abstract_url=abstract_url,
159
+ )
app/fetcher/crossref_client.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” CrossRef DOI client.
3
+
4
+ DOI resolution & citation metadata fallback.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from datetime import date
11
+ from typing import Optional
12
+
13
+ from app.core.config import CROSSREF_BASE_URL
14
+ from app.fetcher.http_session import FetchError, RetrySession
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def get_citation_count(doi: str, session: RetrySession) -> Optional[int]:
20
+ """
21
+ Retrieve the 'is-referenced-by-count' from CrossRef for a given DOI.
22
+
23
+ Returns None on any error β€” this is best-effort enrichment.
24
+ """
25
+ url = f'{CROSSREF_BASE_URL}/{doi}'
26
+ try:
27
+ response = session.get(
28
+ url,
29
+ headers={'Accept': 'application/json'},
30
+ )
31
+ data = response.json()
32
+ message = data.get('message', {})
33
+ return message.get('is-referenced-by-count')
34
+ except (FetchError, ValueError, KeyError) as exc:
35
+ logger.debug('CrossRef lookup failed for DOI %s: %s', doi, exc)
36
+ return None
37
+
38
+
39
+ def resolve_doi(doi: str, session: RetrySession) -> Optional[dict]:
40
+ """
41
+ Resolve a DOI and return basic metadata dict including title, authors.
42
+ """
43
+ url = f'{CROSSREF_BASE_URL}/{doi}'
44
+ try:
45
+ response = session.get(
46
+ url,
47
+ headers={'Accept': 'application/json'},
48
+ )
49
+ data = response.json()
50
+ msg = data.get('message', {})
51
+
52
+ title_parts = msg.get('title', [])
53
+ title = title_parts[0] if title_parts else ''
54
+
55
+ authors = []
56
+ for a in msg.get('author', []):
57
+ given = a.get('given', '')
58
+ family = a.get('family', '')
59
+ authors.append(f'{given} {family}'.strip())
60
+
61
+ return {
62
+ 'doi': doi,
63
+ 'title': title,
64
+ 'authors': authors,
65
+ 'citation_count': msg.get('is-referenced-by-count', 0),
66
+ }
67
+ except (FetchError, ValueError, KeyError) as exc:
68
+ logger.debug('CrossRef resolve failed for DOI %s: %s', doi, exc)
69
+ return None
app/fetcher/fetch_pipeline.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Fetch pipeline orchestration.
3
+
4
+ Contains the main Sunday job logic. Coordinates all API clients,
5
+ handles fallback, deduplication, ranking, storage, and notification.
6
+
7
+ This function must **never raise** β€” all exceptions are caught and
8
+ logged into ``Digest.fetch_errors``.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from difflib import SequenceMatcher
15
+ from typing import Dict, List
16
+
17
+ from app.core.config import (
18
+ ARXIV_CATEGORY_MAP,
19
+ KEYWORD_MAP,
20
+ PUBMED_MESH_MAP,
21
+ TOP_N_PER_CATEGORY,
22
+ AI_FILTERS,
23
+ )
24
+ from app.core.models import Digest, Paper, UserProfile
25
+ from app.core import database
26
+ from app.fetcher import arxiv_client, pubmed_client, semantic_scholar
27
+ from app.fetcher.http_session import FetchError, RetrySession
28
+ from app.ranker import composite_ranker
29
+ from app.summarizer.groq_client import GroqSummarizer
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Public entry point
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def run_weekly_fetch(
39
+ db_path: str,
40
+ profile: UserProfile | None = None,
41
+ ) -> Digest:
42
+ """
43
+ Main weekly pipeline. Called by the scheduler every Sunday.
44
+
45
+ 1. Fetch papers from arXiv (primary) with Semantic Scholar fallback.
46
+ 2. For neuro/BCI categories, additionally fetch from PubMed and merge.
47
+ 3. Enrich citation counts (best-effort).
48
+ 4. Rank papers via composite ranker.
49
+ 5. Save digest to DB and send notification.
50
+ 6. Return the Digest.
51
+ """
52
+ if profile is None:
53
+ profile = UserProfile()
54
+
55
+ digest = Digest.create_new()
56
+ session = RetrySession()
57
+ all_papers: Dict[str, List[Paper]] = {}
58
+
59
+ for category, arxiv_cats in ARXIV_CATEGORY_MAP.items():
60
+ papers = _fetch_category(category, arxiv_cats, session, digest)
61
+
62
+ # PubMed supplement for neuroscience & BCI
63
+ if category in PUBMED_MESH_MAP:
64
+ pubmed_papers = _fetch_pubmed(category, session, digest)
65
+ papers = _deduplicate(papers + pubmed_papers)
66
+
67
+ # Enforce AI filter for neuro categories
68
+ # "I want only those papers in neuroscience and BCI which has in someway AI or ML"
69
+ papers = _ai_filter(papers)
70
+
71
+ all_papers[category] = papers
72
+
73
+ # Enrich citation counts (best-effort)
74
+ flat = [p for cat_list in all_papers.values() for p in cat_list]
75
+ try:
76
+ semantic_scholar.enrich_citations(flat, session)
77
+ except Exception as exc:
78
+ logger.warning('Citation enrichment failed: %s', exc)
79
+ digest.fetch_errors.append(f'Citation enrichment: {exc}')
80
+
81
+ # Rank
82
+ digest.total_fetched = sum(len(v) for v in all_papers.values())
83
+ ranked = composite_ranker.rank_all(all_papers, profile)
84
+
85
+ # After ranking, summarize the top papers for the digest
86
+ # (Only summarizes top N results that appear in the final ranked lists)
87
+ _summarize_top_papers(ranked)
88
+
89
+ digest.papers = ranked
90
+ digest.total_ranked = sum(len(v) for v in ranked.values())
91
+
92
+ # Persist
93
+ try:
94
+ database.save_digest(db_path, digest)
95
+ except Exception as exc:
96
+ logger.error('Failed to save digest: %s', exc)
97
+ digest.fetch_errors.append(f'DB save error: {exc}')
98
+
99
+ # Notification (best-effort)
100
+ try:
101
+ from app.core.notifier import send_digest_notification
102
+ send_digest_notification(digest)
103
+ except Exception as exc:
104
+ logger.warning('Notification failed: %s', exc)
105
+
106
+ return digest
107
+
108
+
109
+ # ---------------------------------------------------------------------------
110
+ # Internal helpers
111
+ # ---------------------------------------------------------------------------
112
+
113
+ def _fetch_category(
114
+ category: str,
115
+ arxiv_cats: list,
116
+ session: RetrySession,
117
+ digest: Digest,
118
+ ) -> List[Paper]:
119
+ """Fetch from arXiv, fall back to Semantic Scholar if empty / error."""
120
+ papers: List[Paper] = []
121
+
122
+ try:
123
+ papers = arxiv_client.fetch_papers(category, arxiv_cats, session)
124
+ except Exception as exc:
125
+ msg = f'arXiv error [{category}]: {exc}'
126
+ logger.warning(msg)
127
+ digest.fetch_errors.append(msg)
128
+
129
+ if not papers:
130
+ logger.info('arXiv empty for [%s] β€” trying Semantic Scholar', category)
131
+ try:
132
+ keywords = KEYWORD_MAP.get(category, [category])
133
+ papers = semantic_scholar.fetch_papers(category, keywords, session)
134
+ except Exception as exc:
135
+ msg = f'Semantic Scholar error [{category}]: {exc}'
136
+ logger.warning(msg)
137
+ digest.fetch_errors.append(msg)
138
+
139
+ if not papers:
140
+ logger.info('No papers found for [%s] from any source', category)
141
+
142
+ return papers
143
+
144
+
145
+ def _fetch_pubmed(
146
+ category: str,
147
+ session: RetrySession,
148
+ digest: Digest,
149
+ ) -> List[Paper]:
150
+ """Fetch supplemental papers from PubMed."""
151
+ mesh = PUBMED_MESH_MAP.get(category, '')
152
+ if not mesh:
153
+ return []
154
+ try:
155
+ return pubmed_client.fetch_papers(category, mesh, session)
156
+ except Exception as exc:
157
+ msg = f'PubMed error [{category}]: {exc}'
158
+ logger.warning(msg)
159
+ digest.fetch_errors.append(msg)
160
+ return []
161
+
162
+
163
+ def _summarize_top_papers(papers_by_cat: Dict[str, List[Paper]]):
164
+ """Call Groq to summarize papers in the final digest list."""
165
+ summarizer = GroqSummarizer()
166
+ for cat, papers in papers_by_cat.items():
167
+ if papers:
168
+ logger.info("Summarizing %d papers for category [%s]...", len(papers), cat)
169
+ summarizer.summarize_many(papers)
170
+
171
+
172
+ def _ai_filter(papers: List[Paper]) -> List[Paper]:
173
+ """Filter to only include papers mentioning AI/ML keywords in title or abstract."""
174
+ if not papers:
175
+ return []
176
+
177
+ result = []
178
+ for p in papers:
179
+ text = (p.title + " " + p.abstract).lower()
180
+ if any(f in text for f in AI_FILTERS):
181
+ result.append(p)
182
+ return result
183
+
184
+
185
+ def _deduplicate(papers: List[Paper]) -> List[Paper]:
186
+ """
187
+ Remove duplicate papers.
188
+
189
+ Two papers are considered duplicates if:
190
+ - Their paper_id matches, OR
191
+ - Their title similarity (SequenceMatcher ratio) > 0.92
192
+
193
+ When merging, prefer arXiv > Semantic Scholar > PubMed.
194
+ """
195
+ SOURCE_PRIORITY = {'arxiv': 0, 'semantic_scholar': 1, 'pubmed': 2}
196
+ seen_ids: set = set()
197
+ seen_titles: List[str] = []
198
+ result: List[Paper] = []
199
+
200
+ # Sort by source priority so preferred sources come first
201
+ papers.sort(key=lambda p: SOURCE_PRIORITY.get(p.source, 9))
202
+
203
+ for paper in papers:
204
+ if paper.paper_id in seen_ids:
205
+ continue
206
+
207
+ is_dup = False
208
+ for existing_title in seen_titles:
209
+ if SequenceMatcher(None, paper.title.lower(), existing_title).ratio() > 0.92:
210
+ is_dup = True
211
+ break
212
+
213
+ if is_dup:
214
+ continue
215
+
216
+ seen_ids.add(paper.paper_id)
217
+ seen_titles.append(paper.title.lower())
218
+ result.append(paper)
219
+
220
+ if len(papers) != len(result):
221
+ logger.info(
222
+ 'Deduplication: %d β†’ %d papers', len(papers), len(result),
223
+ )
224
+
225
+ return result
app/fetcher/http_session.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” RetrySession.
3
+
4
+ Single point of contact for all outbound HTTP.
5
+ No other module calls `requests` directly.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import random
12
+ import time
13
+ from typing import Optional, Set
14
+
15
+ import requests
16
+
17
+ from app.core.config import (
18
+ HTTP_BACKOFF_BASE,
19
+ HTTP_BACKOFF_MAX,
20
+ HTTP_MAX_RETRIES,
21
+ HTTP_TIMEOUT,
22
+ RETRY_STATUS_CODES,
23
+ USER_AGENT,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Custom Exceptions
30
+ # ---------------------------------------------------------------------------
31
+
32
+ class FetchError(Exception):
33
+ """Base exception for all fetch-related errors."""
34
+ pass
35
+
36
+
37
+ class FetchTimeoutError(FetchError):
38
+ """Raised when a request times out."""
39
+ pass
40
+
41
+
42
+ class FetchNetworkError(FetchError):
43
+ """Raised on connection / DNS errors."""
44
+ pass
45
+
46
+
47
+ class SourceNotFoundError(FetchError):
48
+ """Raised on HTTP 404."""
49
+ pass
50
+
51
+
52
+ class SourceAuthError(FetchError):
53
+ """Raised on HTTP 401 / 403."""
54
+ pass
55
+
56
+
57
+ class MaxRetriesExceeded(FetchError):
58
+ """Raised when all retry attempts are exhausted."""
59
+ pass
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # RetrySession
64
+ # ---------------------------------------------------------------------------
65
+
66
+ class RetrySession:
67
+ """HTTP GET wrapper with exponential back-off, retries, and error mapping."""
68
+
69
+ def __init__(
70
+ self,
71
+ max_retries: int = HTTP_MAX_RETRIES,
72
+ backoff_base: int = HTTP_BACKOFF_BASE,
73
+ backoff_max: int = HTTP_BACKOFF_MAX,
74
+ timeout: int = HTTP_TIMEOUT,
75
+ retry_status_codes: Optional[Set[int]] = None,
76
+ ):
77
+ self.max_retries = max_retries
78
+ self.backoff_base = backoff_base
79
+ self.backoff_max = backoff_max
80
+ self.timeout = timeout
81
+ self.retry_status_codes = retry_status_codes or RETRY_STATUS_CODES
82
+ self._session = requests.Session()
83
+ self._session.headers.update({'User-Agent': USER_AGENT})
84
+
85
+ # ------------------------------------------------------------------
86
+ def get(
87
+ self,
88
+ url: str,
89
+ params: Optional[dict] = None,
90
+ headers: Optional[dict] = None,
91
+ ) -> requests.Response:
92
+ """
93
+ GET *url* with automatic retries and exponential back-off.
94
+
95
+ Returns a `requests.Response` with status 200 on success.
96
+ Raises a typed `FetchError` subclass on failure.
97
+ """
98
+ merged_headers = dict(self._session.headers)
99
+ if headers:
100
+ merged_headers.update(headers)
101
+
102
+ last_exc: Optional[Exception] = None
103
+
104
+ for attempt in range(self.max_retries + 1):
105
+ try:
106
+ resp = self._session.get(
107
+ url,
108
+ params=params,
109
+ headers=merged_headers,
110
+ timeout=self.timeout,
111
+ )
112
+
113
+ if resp.status_code == 200:
114
+ return resp
115
+
116
+ if resp.status_code in self.retry_status_codes:
117
+ wait = min(
118
+ self.backoff_base ** attempt + random.uniform(0, 1),
119
+ self.backoff_max,
120
+ )
121
+ logger.warning(
122
+ 'HTTP %d from %s β€” retrying in %.1fs (attempt %d/%d)',
123
+ resp.status_code, url, wait, attempt + 1, self.max_retries,
124
+ )
125
+ time.sleep(wait)
126
+ continue
127
+
128
+ if resp.status_code == 404:
129
+ raise SourceNotFoundError(f'404 Not Found: {url}')
130
+
131
+ if resp.status_code in {400, 401, 403}:
132
+ raise SourceAuthError(
133
+ f'HTTP {resp.status_code} from {url}'
134
+ )
135
+
136
+ # Other 4xx / unexpected codes
137
+ raise FetchError(
138
+ f'HTTP {resp.status_code} from {url}: '
139
+ f'{resp.text[:200]}'
140
+ )
141
+
142
+ except requests.exceptions.Timeout as exc:
143
+ raise FetchTimeoutError(f'Timeout on {url}') from exc
144
+
145
+ except requests.exceptions.ConnectionError as exc:
146
+ raise FetchNetworkError(f'Connection error on {url}') from exc
147
+
148
+ except requests.exceptions.RequestException as exc:
149
+ raise FetchError(f'Request error on {url}: {exc}') from exc
150
+
151
+ except FetchError:
152
+ raise # re-raise our own typed exceptions
153
+
154
+ raise MaxRetriesExceeded(
155
+ f'All {self.max_retries} retries exhausted for {url}'
156
+ )
157
+
158
+ # ------------------------------------------------------------------
159
+ def post(
160
+ self,
161
+ url: str,
162
+ json: Optional[dict] = None,
163
+ headers: Optional[dict] = None,
164
+ ) -> requests.Response:
165
+ """POST with the same retry / error logic as GET."""
166
+ merged_headers = dict(self._session.headers)
167
+ if headers:
168
+ merged_headers.update(headers)
169
+
170
+ last_exc: Optional[Exception] = None
171
+
172
+ for attempt in range(self.max_retries + 1):
173
+ try:
174
+ resp = self._session.post(
175
+ url,
176
+ json=json,
177
+ headers=merged_headers,
178
+ timeout=self.timeout,
179
+ )
180
+
181
+ if resp.status_code == 200:
182
+ return resp
183
+
184
+ if resp.status_code in self.retry_status_codes:
185
+ wait = min(
186
+ self.backoff_base ** attempt + random.uniform(0, 1),
187
+ self.backoff_max,
188
+ )
189
+ logger.warning(
190
+ 'POST %d from %s β€” retrying in %.1fs (attempt %d/%d)',
191
+ resp.status_code, url, wait, attempt + 1, self.max_retries,
192
+ )
193
+ time.sleep(wait)
194
+ continue
195
+
196
+ if resp.status_code == 404:
197
+ raise SourceNotFoundError(f'404 Not Found: {url}')
198
+
199
+ if resp.status_code in {400, 401, 403}:
200
+ raise SourceAuthError(
201
+ f'HTTP {resp.status_code} from {url}'
202
+ )
203
+
204
+ raise FetchError(
205
+ f'HTTP {resp.status_code} from {url}: '
206
+ f'{resp.text[:200]}'
207
+ )
208
+
209
+ except requests.exceptions.Timeout as exc:
210
+ raise FetchTimeoutError(f'Timeout on {url}') from exc
211
+
212
+ except requests.exceptions.ConnectionError as exc:
213
+ raise FetchNetworkError(f'Connection error on {url}') from exc
214
+
215
+ except requests.exceptions.RequestException as exc:
216
+ raise FetchError(f'Request error on {url}: {exc}') from exc
217
+
218
+ except FetchError:
219
+ raise
220
+
221
+ raise MaxRetriesExceeded(
222
+ f'All {self.max_retries} retries exhausted for POST {url}'
223
+ )
app/fetcher/pubmed_client.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” PubMed E-utilities client.
3
+
4
+ Supplemental source for Neuroscience and BCI categories only.
5
+ Two-step process: ESearch to get IDs, then EFetch to get abstracts.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import xml.etree.ElementTree as ET
12
+ from datetime import date, datetime
13
+ from typing import List, Optional
14
+
15
+ from app.core.config import NCBI_API_KEY, PUBMED_BASE_URL
16
+ from app.core.models import Paper
17
+ from app.fetcher.http_session import FetchError, RetrySession
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def fetch_papers(
23
+ category_slug: str,
24
+ mesh_terms: str,
25
+ session: RetrySession,
26
+ days_back: int = 7,
27
+ ) -> List[Paper]:
28
+ """
29
+ Fetch recent papers from PubMed matching *mesh_terms*.
30
+
31
+ Returns a list of Paper instances. Never raises β€” returns [] on error.
32
+ """
33
+
34
+ # ---------------------------------------------------------------
35
+ # Step 1 β€” ESearch
36
+ # ---------------------------------------------------------------
37
+ esearch_params: dict = {
38
+ 'db': 'pubmed',
39
+ 'term': f'{mesh_terms} AND ("last {days_back} days"[PDat])',
40
+ 'retmax': 50,
41
+ 'retmode': 'json',
42
+ 'usehistory': 'y',
43
+ }
44
+ if NCBI_API_KEY:
45
+ esearch_params['api_key'] = NCBI_API_KEY
46
+
47
+ try:
48
+ esearch_resp = session.get(
49
+ f'{PUBMED_BASE_URL}/esearch.fcgi', params=esearch_params
50
+ )
51
+ except FetchError as exc:
52
+ logger.error('PubMed ESearch failed for %s: %s', category_slug, exc)
53
+ return []
54
+
55
+ try:
56
+ esearch_data = esearch_resp.json()
57
+ except ValueError:
58
+ logger.error('PubMed ESearch returned invalid JSON')
59
+ return []
60
+
61
+ result = esearch_data.get('esearchresult', {})
62
+ count = int(result.get('count', 0))
63
+ if count == 0:
64
+ logger.info('PubMed: 0 results for %s', category_slug)
65
+ return []
66
+
67
+ web_env = result.get('webenv', '')
68
+ query_key = result.get('querykey', '')
69
+ if not web_env or not query_key:
70
+ logger.error('PubMed ESearch missing WebEnv / query_key')
71
+ return []
72
+
73
+ # ---------------------------------------------------------------
74
+ # Step 2 β€” EFetch
75
+ # ---------------------------------------------------------------
76
+ efetch_params: dict = {
77
+ 'db': 'pubmed',
78
+ 'WebEnv': web_env,
79
+ 'query_key': query_key,
80
+ 'retmax': 50,
81
+ 'retmode': 'xml',
82
+ 'rettype': 'abstract',
83
+ }
84
+ if NCBI_API_KEY:
85
+ efetch_params['api_key'] = NCBI_API_KEY
86
+
87
+ try:
88
+ efetch_resp = session.get(
89
+ f'{PUBMED_BASE_URL}/efetch.fcgi', params=efetch_params
90
+ )
91
+ except FetchError as exc:
92
+ logger.error('PubMed EFetch failed for %s: %s', category_slug, exc)
93
+ return []
94
+
95
+ try:
96
+ root = ET.fromstring(efetch_resp.text)
97
+ except ET.ParseError as exc:
98
+ logger.error('PubMed XML parse error: %s', exc)
99
+ return []
100
+
101
+ papers: List[Paper] = []
102
+ for article_el in root.findall('.//PubmedArticle'):
103
+ try:
104
+ paper = _parse_article(article_el, category_slug)
105
+ if paper is not None:
106
+ papers.append(paper)
107
+ except Exception:
108
+ logger.debug('Skipping malformed PubMed article', exc_info=True)
109
+
110
+ logger.info('PubMed: fetched %d papers for [%s]', len(papers), category_slug)
111
+ return papers
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # XML parsing helpers
116
+ # ---------------------------------------------------------------------------
117
+
118
+ def _parse_article(el: ET.Element, category_slug: str) -> Optional[Paper]:
119
+ """Parse a single <PubmedArticle> element."""
120
+
121
+ # PMID
122
+ pmid_el = el.find('.//PMID')
123
+ if pmid_el is None or not pmid_el.text:
124
+ return None
125
+ pmid = pmid_el.text.strip()
126
+ paper_id = f'pubmed:{pmid}'
127
+
128
+ # Title
129
+ title_el = el.find('.//ArticleTitle')
130
+ title = (title_el.text or '').strip() if title_el is not None else ''
131
+ if not title:
132
+ return None
133
+
134
+ # Abstract β€” may be structured (Background, Methods, etc.)
135
+ abstract_parts: List[str] = []
136
+ for abs_el in el.findall('.//AbstractText'):
137
+ label = abs_el.get('Label', '')
138
+ text = (abs_el.text or '').strip()
139
+ if label and text:
140
+ abstract_parts.append(f'{label}: {text}')
141
+ elif text:
142
+ abstract_parts.append(text)
143
+ abstract = '\n'.join(abstract_parts)
144
+ if not abstract:
145
+ return None
146
+
147
+ # Authors
148
+ authors: List[str] = []
149
+ for author_el in el.findall('.//Author'):
150
+ last = author_el.findtext('LastName', '').strip()
151
+ fore = author_el.findtext('ForeName', '').strip()
152
+ if last:
153
+ name = f'{fore} {last}'.strip()
154
+ authors.append(name)
155
+
156
+ # Publication date (best-effort)
157
+ pub_date = _parse_pub_date(el)
158
+
159
+ abstract_url = f'https://pubmed.ncbi.nlm.nih.gov/{pmid}/'
160
+
161
+ return Paper(
162
+ paper_id=paper_id,
163
+ source='pubmed',
164
+ title=title,
165
+ abstract=abstract,
166
+ authors=authors,
167
+ published_date=pub_date,
168
+ categories=[],
169
+ app_category=category_slug,
170
+ pdf_url=None,
171
+ abstract_url=abstract_url,
172
+ )
173
+
174
+
175
+ def _parse_pub_date(el: ET.Element) -> date:
176
+ """Best-effort parse of PubMed date (Year, Month, Day may be partial)."""
177
+ pub_date_el = el.find('.//PubDate')
178
+ if pub_date_el is None:
179
+ return date.today()
180
+
181
+ year_text = pub_date_el.findtext('Year', '')
182
+ month_text = pub_date_el.findtext('Month', '')
183
+ day_text = pub_date_el.findtext('Day', '')
184
+
185
+ try:
186
+ year = int(year_text)
187
+ except (ValueError, TypeError):
188
+ return date.today()
189
+
190
+ # Month may be numeric or abbreviated text
191
+ month = 1
192
+ if month_text:
193
+ try:
194
+ month = int(month_text)
195
+ except ValueError:
196
+ _MONTH_ABBREV = {
197
+ 'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4,
198
+ 'may': 5, 'jun': 6, 'jul': 7, 'aug': 8,
199
+ 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12,
200
+ }
201
+ month = _MONTH_ABBREV.get(month_text.lower()[:3], 1)
202
+
203
+ day = 1
204
+ if day_text:
205
+ try:
206
+ day = int(day_text)
207
+ except ValueError:
208
+ pass
209
+
210
+ try:
211
+ return date(year, month, day)
212
+ except ValueError:
213
+ return date(year, 1, 1)
app/fetcher/semantic_scholar.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Semantic Scholar REST client.
3
+
4
+ Used as a fallback fetch source and to enrich citation counts for arXiv papers.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from datetime import date, datetime, timedelta
11
+ from typing import List, Optional
12
+
13
+ from app.core.config import SEMSCHOLAR_BASE_URL, SEMANTIC_SCHOLAR_API_KEY
14
+ from app.core.models import Paper
15
+ from app.fetcher.http_session import FetchError, RetrySession
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def fetch_papers(
21
+ category_slug: str,
22
+ keywords: List[str],
23
+ session: RetrySession,
24
+ days_back: int = 7,
25
+ ) -> List[Paper]:
26
+ """
27
+ Search Semantic Scholar for recent papers matching *keywords*.
28
+
29
+ Returns a list of Paper instances. Never raises β€” returns [] on error.
30
+ """
31
+ query_text = ' OR '.join(keywords)
32
+ url = f'{SEMSCHOLAR_BASE_URL}/paper/search'
33
+ params = {
34
+ 'query': query_text,
35
+ 'fields': (
36
+ 'paperId,title,abstract,authors,year,citationCount,'
37
+ 'externalIds,publicationDate,openAccessPdf'
38
+ ),
39
+ 'publicationTypes': 'JournalArticle,Conference',
40
+ 'limit': 50,
41
+ }
42
+
43
+ headers = {}
44
+ if SEMANTIC_SCHOLAR_API_KEY:
45
+ headers['x-api-key'] = SEMANTIC_SCHOLAR_API_KEY
46
+
47
+ try:
48
+ response = session.get(url, params=params, headers=headers)
49
+ except FetchError as exc:
50
+ logger.error('Semantic Scholar fetch failed for %s: %s', category_slug, exc)
51
+ return []
52
+
53
+ try:
54
+ data = response.json()
55
+ except ValueError:
56
+ logger.error('Semantic Scholar returned invalid JSON')
57
+ return []
58
+
59
+ cutoff = date.today() - timedelta(days=days_back)
60
+ papers: List[Paper] = []
61
+
62
+ for item in data.get('data', []):
63
+ try:
64
+ paper = _parse_item(item, category_slug, cutoff)
65
+ if paper is not None:
66
+ papers.append(paper)
67
+ except Exception:
68
+ logger.debug('Skipping malformed Semantic Scholar item', exc_info=True)
69
+
70
+ logger.info(
71
+ 'Semantic Scholar: fetched %d papers for [%s]',
72
+ len(papers), category_slug,
73
+ )
74
+ return papers
75
+
76
+
77
+ def _parse_item(item: dict, category_slug: str, cutoff: date) -> Optional[Paper]:
78
+ """Parse a single S2 search result into a Paper, or None."""
79
+ pub_date_str = item.get('publicationDate', '')
80
+ if not pub_date_str:
81
+ return None
82
+ try:
83
+ pub_date = date.fromisoformat(pub_date_str)
84
+ except ValueError:
85
+ return None
86
+
87
+ if pub_date < cutoff:
88
+ return None
89
+
90
+ title = (item.get('title') or '').strip()
91
+ abstract = (item.get('abstract') or '').strip()
92
+ if not title or not abstract:
93
+ return None
94
+
95
+ s2_id = item.get('paperId', '')
96
+ ext_ids = item.get('externalIds', {}) or {}
97
+ arxiv_id = ext_ids.get('ArXiv', '')
98
+
99
+ paper_id = f'arxiv:{arxiv_id}' if arxiv_id else f's2:{s2_id}'
100
+
101
+ authors = [
102
+ a.get('name', '') for a in (item.get('authors') or [])
103
+ if a.get('name')
104
+ ]
105
+
106
+ pdf_info = item.get('openAccessPdf') or {}
107
+ pdf_url = pdf_info.get('url')
108
+
109
+ abstract_url = f'https://www.semanticscholar.org/paper/{s2_id}'
110
+
111
+ return Paper(
112
+ paper_id=paper_id,
113
+ source='semantic_scholar',
114
+ title=title,
115
+ abstract=abstract,
116
+ authors=authors,
117
+ published_date=pub_date,
118
+ categories=[],
119
+ app_category=category_slug,
120
+ pdf_url=pdf_url,
121
+ abstract_url=abstract_url,
122
+ citation_count=item.get('citationCount', 0) or 0,
123
+ )
124
+
125
+
126
+ # ---------------------------------------------------------------------------
127
+ # Citation enrichment
128
+ # ---------------------------------------------------------------------------
129
+
130
+ def enrich_citations(papers: List[Paper], session: RetrySession) -> List[Paper]:
131
+ """
132
+ Batch-enrich citation counts from Semantic Scholar.
133
+
134
+ This is best-effort: on failure the papers are returned unchanged.
135
+ """
136
+ if not papers:
137
+ return papers
138
+
139
+ # Build lookup of arXiv IDs (strip prefix)
140
+ ids = []
141
+ for p in papers:
142
+ if p.paper_id.startswith('arxiv:'):
143
+ ids.append(f'ArXiv:{p.paper_id[6:]}')
144
+ elif p.paper_id.startswith('s2:'):
145
+ ids.append(p.paper_id[3:])
146
+
147
+ if not ids:
148
+ return papers
149
+
150
+ url = f'{SEMSCHOLAR_BASE_URL}/paper/batch'
151
+ headers = {}
152
+ if SEMANTIC_SCHOLAR_API_KEY:
153
+ headers['x-api-key'] = SEMANTIC_SCHOLAR_API_KEY
154
+
155
+ try:
156
+ response = session.post(
157
+ url,
158
+ json={'ids': ids},
159
+ headers=headers,
160
+ )
161
+ results = response.json()
162
+ except (FetchError, ValueError) as exc:
163
+ logger.warning('Citation enrichment failed (best-effort): %s', exc)
164
+ return papers
165
+
166
+ # Map S2 results back to papers
167
+ result_map: dict = {}
168
+ for item in results:
169
+ if item and 'paperId' in item:
170
+ ext = item.get('externalIds', {}) or {}
171
+ arxiv = ext.get('ArXiv')
172
+ if arxiv:
173
+ result_map[f'arxiv:{arxiv}'] = item.get('citationCount', 0) or 0
174
+ result_map[f's2:{item["paperId"]}'] = item.get('citationCount', 0) or 0
175
+
176
+ for paper in papers:
177
+ if paper.paper_id in result_map:
178
+ paper.citation_count = result_map[paper.paper_id]
179
+
180
+ logger.info('Enriched citations for %d papers', len(papers))
181
+ return papers
app/ranker/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Paper scoring & ranking
app/ranker/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (146 Bytes). View file
 
app/ranker/__pycache__/citation_scorer.cpython-312.pyc ADDED
Binary file (2.11 kB). View file
 
app/ranker/__pycache__/composite_ranker.cpython-312.pyc ADDED
Binary file (3.18 kB). View file
 
app/ranker/__pycache__/tfidf_ranker.cpython-312.pyc ADDED
Binary file (9.25 kB). View file
 
app/ranker/citation_scorer.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Citation velocity scorer.
3
+
4
+ Normalises raw citation counts into a [0.0, 1.0] score and applies a
5
+ recency bonus for very fresh papers.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from datetime import date, timedelta
11
+ from typing import List
12
+
13
+ from app.core.config import CITATION_NORM, RECENCY_BONUS
14
+ from app.core.models import Paper
15
+
16
+
17
+ def score(paper: Paper) -> float:
18
+ """
19
+ Return a citation score in [0.0, 1.0].
20
+
21
+ - ``citation_score = min(citation_count / CITATION_NORM, 1.0)``
22
+ - Papers published < 3 days ago get a recency bonus.
23
+ """
24
+ citation_score = min(paper.citation_count / max(CITATION_NORM, 1), 1.0)
25
+
26
+ days_old = (date.today() - paper.published_date).days
27
+ if days_old < 3:
28
+ citation_score = min(citation_score + RECENCY_BONUS, 1.0)
29
+
30
+ return citation_score
31
+
32
+
33
+ def score_many(papers: List[Paper]) -> List[Paper]:
34
+ """Set ``citation_score`` on each paper and return the list (in-place)."""
35
+ for p in papers:
36
+ # Store on the Paper via the composite score pipeline; we use
37
+ # a transient attribute. The composite ranker reads this.
38
+ p._citation_score = score(p) # type: ignore[attr-defined]
39
+ return papers
40
+
41
+
42
+ def recency_score(paper: Paper) -> float:
43
+ """
44
+ Return a recency score in [0.0, 1.0].
45
+
46
+ 1.0 = published today, 0.0 = published β‰₯ 7 days ago.
47
+ """
48
+ days_old = max((date.today() - paper.published_date).days, 0)
49
+ return max(1.0 - days_old / 7.0, 0.0)
app/ranker/composite_ranker.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Composite ranker.
3
+
4
+ Combines relevance, citation, and recency scores with user-configurable
5
+ weights to produce a final ``composite_score`` for each paper.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Dict, List
12
+
13
+ from app.core.config import (
14
+ TOP_N_PER_CATEGORY,
15
+ WEIGHT_CITATION,
16
+ WEIGHT_RECENCY,
17
+ WEIGHT_RELEVANCE,
18
+ )
19
+ from app.core.models import Paper, UserProfile
20
+ from app.ranker import citation_scorer
21
+ from app.ranker.tfidf_ranker import TfidfRanker
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def rank_all(
27
+ papers_by_category: Dict[str, List[Paper]],
28
+ profile: UserProfile,
29
+ cache_dir: str = '',
30
+ ) -> Dict[str, List[Paper]]:
31
+ """
32
+ Score and sort papers per category.
33
+
34
+ Returns a dict ``{category: [Paper, ...]}`` with each list sorted by
35
+ ``composite_score`` descending and sliced to ``top_n``.
36
+ """
37
+ w_rel = profile.weight_relevance
38
+ w_cit = profile.weight_citation
39
+ w_rec = profile.weight_recency
40
+ top_n = profile.top_n_per_category
41
+
42
+ # Validate weights
43
+ total = w_rel + w_cit + w_rec
44
+ if abs(total - 1.0) > 0.01:
45
+ logger.warning(
46
+ 'Ranking weights sum to %.2f (expected 1.0) β€” normalising', total
47
+ )
48
+ w_rel /= total
49
+ w_cit /= total
50
+ w_rec /= total
51
+
52
+ # Build TF-IDF ranker
53
+ ranker = TfidfRanker(cache_dir=cache_dir)
54
+ if not ranker.load_cache():
55
+ ranker.fit_profile(profile.interests)
56
+
57
+ ranked: Dict[str, List[Paper]] = {}
58
+
59
+ for category, papers in papers_by_category.items():
60
+ if not papers:
61
+ ranked[category] = []
62
+ continue
63
+
64
+ # Relevance scores
65
+ ranker.score_many(papers)
66
+
67
+ # Citation + recency scores
68
+ for paper in papers:
69
+ cit_score = citation_scorer.score(paper)
70
+ rec_score = citation_scorer.recency_score(paper)
71
+
72
+ paper.composite_score = (
73
+ w_rel * paper.relevance_score
74
+ + w_cit * cit_score
75
+ + w_rec * rec_score
76
+ )
77
+
78
+ # Sort and slice
79
+ papers.sort(key=lambda p: p.composite_score, reverse=True)
80
+ ranked[category] = papers[:top_n]
81
+
82
+ logger.info(
83
+ 'Ranked [%s]: %d β†’ top %d (best=%.3f)',
84
+ category, len(papers), min(top_n, len(papers)),
85
+ papers[0].composite_score if papers else 0.0,
86
+ )
87
+
88
+ return ranked
app/ranker/tfidf_ranker.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” TF-IDF relevance scorer.
3
+
4
+ Computes cosine similarity between paper text and the user interest profile.
5
+ Falls back to a hand-written bag-of-words implementation if scikit-learn
6
+ is not available (mobile build edge case).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import os
13
+ import pickle
14
+ from typing import Dict, List, Optional
15
+
16
+ from app.core.models import Paper
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Try scikit-learn; fall back to pure-Python BoW
22
+ # ---------------------------------------------------------------------------
23
+ try:
24
+ from sklearn.feature_extraction.text import TfidfVectorizer
25
+ from sklearn.metrics.pairwise import cosine_similarity as _cosine
26
+
27
+ _HAS_SKLEARN = True
28
+ except ImportError:
29
+ _HAS_SKLEARN = False
30
+ logger.warning('scikit-learn not available β€” using fallback BoW scorer')
31
+
32
+
33
+ class TfidfRanker:
34
+ """Score papers against a user interest profile using TF-IDF cosine similarity."""
35
+
36
+ def __init__(self, cache_dir: str = ''):
37
+ self._cache_dir = cache_dir
38
+ self._vectorizer = None
39
+ self._profile_vectors: Dict[str, object] = {}
40
+
41
+ # ------------------------------------------------------------------
42
+ # Public API
43
+ # ------------------------------------------------------------------
44
+
45
+ def fit_profile(self, interests: Dict[str, str]) -> None:
46
+ """
47
+ Build / rebuild the TF-IDF model from user interest keywords.
48
+
49
+ *interests*: ``{'ml': 'deep learning transformers', ...}``
50
+ """
51
+ if _HAS_SKLEARN:
52
+ self._fit_sklearn(interests)
53
+ else:
54
+ self._fit_bow(interests)
55
+
56
+ def score(self, paper: Paper) -> float:
57
+ """
58
+ Return relevance score in [0.0, 1.0] for a paper against its
59
+ category's profile vector.
60
+ """
61
+ cat = paper.app_category
62
+ text = f'{paper.title} {paper.abstract}'
63
+
64
+ if _HAS_SKLEARN:
65
+ return self._score_sklearn(text, cat)
66
+ else:
67
+ return self._score_bow(text, cat)
68
+
69
+ def score_many(self, papers: List[Paper]) -> List[Paper]:
70
+ """Set ``relevance_score`` on each paper in-place and return the list."""
71
+ for p in papers:
72
+ p.relevance_score = self.score(p)
73
+ return papers
74
+
75
+ # ------------------------------------------------------------------
76
+ # scikit-learn implementation
77
+ # ------------------------------------------------------------------
78
+
79
+ def _fit_sklearn(self, interests: Dict[str, str]) -> None:
80
+ corpus = list(interests.values())
81
+ self._vectorizer = TfidfVectorizer(
82
+ max_features=5000, stop_words='english'
83
+ )
84
+ self._vectorizer.fit(corpus)
85
+ self._profile_vectors = {}
86
+ for cat, text in interests.items():
87
+ vec = self._vectorizer.transform([text])
88
+ self._profile_vectors[cat] = vec
89
+ self._save_cache()
90
+
91
+ def _score_sklearn(self, text: str, category: str) -> float:
92
+ if self._vectorizer is None or category not in self._profile_vectors:
93
+ return 0.0
94
+ paper_vec = self._vectorizer.transform([text])
95
+ sim = _cosine(paper_vec, self._profile_vectors[category])
96
+ return float(max(0.0, min(sim[0][0], 1.0)))
97
+
98
+ # ------------------------------------------------------------------
99
+ # Pure-Python bag-of-words fallback
100
+ # ------------------------------------------------------------------
101
+
102
+ def _fit_bow(self, interests: Dict[str, str]) -> None:
103
+ self._bow_profiles: Dict[str, Dict[str, int]] = {}
104
+ for cat, text in interests.items():
105
+ self._bow_profiles[cat] = _word_freq(text.lower())
106
+
107
+ def _score_bow(self, text: str, category: str) -> float:
108
+ profile = getattr(self, '_bow_profiles', {}).get(category)
109
+ if not profile:
110
+ return 0.0
111
+ paper_freq = _word_freq(text.lower())
112
+ return _cosine_bow(paper_freq, profile)
113
+
114
+ # ------------------------------------------------------------------
115
+ # Cache management
116
+ # ------------------------------------------------------------------
117
+
118
+ def _save_cache(self) -> None:
119
+ if not self._cache_dir or not _HAS_SKLEARN:
120
+ return
121
+ path = os.path.join(self._cache_dir, 'tfidf_cache.pkl')
122
+ try:
123
+ with open(path, 'wb') as f:
124
+ pickle.dump(
125
+ (self._vectorizer, self._profile_vectors), f
126
+ )
127
+ except Exception:
128
+ logger.debug('Could not save TF-IDF cache', exc_info=True)
129
+
130
+ def load_cache(self) -> bool:
131
+ """Attempt to load a cached vectorizer. Returns True on success."""
132
+ if not self._cache_dir or not _HAS_SKLEARN:
133
+ return False
134
+ path = os.path.join(self._cache_dir, 'tfidf_cache.pkl')
135
+ if not os.path.exists(path):
136
+ return False
137
+ try:
138
+ with open(path, 'rb') as f:
139
+ self._vectorizer, self._profile_vectors = pickle.load(f)
140
+ return True
141
+ except Exception:
142
+ logger.warning('TF-IDF cache corrupt β€” rebuilding', exc_info=True)
143
+ try:
144
+ os.remove(path)
145
+ except OSError:
146
+ pass
147
+ return False
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # BoW helpers
152
+ # ---------------------------------------------------------------------------
153
+
154
+ _STOPWORDS = frozenset(
155
+ 'a an the is are was were be been being have has had do does did '
156
+ 'will would shall should may might can could of in to for on with '
157
+ 'at by from and or but not no nor so yet both either neither '
158
+ 'each every all any few more most other some such that this these '
159
+ 'those i me my we our you your he him his she her it its they them '
160
+ 'their what which who whom when where why how'.split()
161
+ )
162
+
163
+
164
+ def _word_freq(text: str) -> Dict[str, int]:
165
+ freq: Dict[str, int] = {}
166
+ for word in text.split():
167
+ w = ''.join(c for c in word if c.isalnum())
168
+ if w and w not in _STOPWORDS and len(w) > 2:
169
+ freq[w] = freq.get(w, 0) + 1
170
+ return freq
171
+
172
+
173
+ def _cosine_bow(a: Dict[str, int], b: Dict[str, int]) -> float:
174
+ common = set(a) & set(b)
175
+ if not common:
176
+ return 0.0
177
+ dot = sum(a[k] * b[k] for k in common)
178
+ mag_a = sum(v * v for v in a.values()) ** 0.5
179
+ mag_b = sum(v * v for v in b.values()) ** 0.5
180
+ if mag_a == 0 or mag_b == 0:
181
+ return 0.0
182
+ return dot / (mag_a * mag_b)
app/summarizer/__pycache__/groq_client.cpython-312.pyc ADDED
Binary file (4.35 kB). View file
 
app/summarizer/groq_client.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” Groq LLM summarizer.
3
+
4
+ Summarizes papers using Groq API (llama-3.1-8b-instant).
5
+ Follows user's requested structural (Idea, Method, Results) and
6
+ enforces rate limit delays (30 RPM).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import time
13
+ from typing import List, Optional
14
+
15
+ import requests
16
+ from app.core.config import (
17
+ GROQ_API_KEY, GROQ_BASE_URL, GROQ_MODEL, GROQ_DELAY
18
+ )
19
+ from app.core.models import Paper
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class GroqSummarizer:
24
+ """Handles LLM calls to Groq with rate-limiting and structured prompts."""
25
+
26
+ def __init__(self, api_key: str = GROQ_API_KEY):
27
+ self.api_key = api_key
28
+ self.last_call_time = 0.0
29
+
30
+ def summarize_paper(self, paper: Paper) -> Optional[str]:
31
+ """
32
+ Produce a structured summary of the paper.
33
+ Structure:
34
+ - Idea: The core core concept.
35
+ - Method: The approach or architecture.
36
+ - Results: The outcome or findings.
37
+ """
38
+ if not self.api_key:
39
+ logger.info("Skip Groq summarization: NO API KEY.")
40
+ return None
41
+
42
+ # Prepare prompt
43
+ prompt = (
44
+ f"Please summarize the following research paper abstract into three brief sections:\n"
45
+ f"1. Idea: (The core concept)\n"
46
+ f"2. Method: (The proposed approach)\n"
47
+ f"3. Results: (Key findings)\n\n"
48
+ f"Title: {paper.title}\n"
49
+ f"Abstract: {paper.abstract}\n\n"
50
+ "Keep it concise and professional. Respond in plain text with those three labels."
51
+ )
52
+
53
+ # Enforce rate limit delay
54
+ elapsed = time.time() - self.last_call_time
55
+ if elapsed < GROQ_DELAY:
56
+ sleep_time = GROQ_DELAY - elapsed
57
+ logger.debug(f"Groq Rate Limit: Sleeping for {sleep_time:.2f}s")
58
+ time.sleep(sleep_time)
59
+
60
+ try:
61
+ logger.info(f"Summarizing paper [{paper.paper_id}] via Groq...")
62
+ response = requests.post(
63
+ GROQ_BASE_URL,
64
+ headers={
65
+ "Authorization": f"Bearer {self.api_key}",
66
+ "Content-Type": "application/json"
67
+ },
68
+ json={
69
+ "model": GROQ_MODEL,
70
+ "messages": [
71
+ {"role": "system", "content": "You are a scientific research assistant summarizing papers."},
72
+ {"role": "user", "content": prompt}
73
+ ],
74
+ "temperature": 0.3,
75
+ "max_tokens": 300
76
+ },
77
+ timeout=30
78
+ )
79
+
80
+ self.last_call_time = time.time()
81
+
82
+ if response.status_code == 200:
83
+ data = response.json()
84
+ summary = data['choices'][0]['message']['content'].strip()
85
+ return summary
86
+ else:
87
+ logger.error(f"Groq API error ({response.status_code}): {response.text}")
88
+ return None
89
+
90
+ except Exception as exc:
91
+ logger.exception(f"Unexpected error during Groq summarization: {exc}")
92
+ return None
93
+
94
+ def summarize_many(self, papers: List[Paper]):
95
+ """
96
+ Iterate through papers and update their summary_llm field.
97
+ """
98
+ for p in papers:
99
+ # We only summarize if it doesn't already have a summary
100
+ if not p.summary_llm:
101
+ p.summary_llm = self.summarize_paper(p)
app/ui/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Kivy UI screens & widgets
app/ui/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (142 Bytes). View file
 
app/ui/__pycache__/detail_screen.cpython-312.pyc ADDED
Binary file (6.43 kB). View file
 
app/ui/__pycache__/home_screen.cpython-312.pyc ADDED
Binary file (5.99 kB). View file
 
app/ui/__pycache__/settings_screen.cpython-312.pyc ADDED
Binary file (6.95 kB). View file
 
app/ui/detail_screen.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” DetailScreen.
3
+
4
+ Displays ranked papers for a single category with bookmark & read
5
+ functionality. Tapping a paper opens a modal with the full abstract.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import webbrowser
12
+
13
+ from kivy.lang import Builder
14
+ from kivy.properties import BooleanProperty, ListProperty, StringProperty
15
+ from kivy.uix.boxlayout import BoxLayout
16
+ from kivy.uix.modalview import ModalView
17
+ from kivy.uix.screenmanager import Screen
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ import os
22
+ _KV_PATH = os.path.join(os.path.dirname(__file__), 'kv', 'detail.kv')
23
+ if os.path.exists(_KV_PATH):
24
+ Builder.load_file(_KV_PATH)
25
+
26
+
27
+ class PaperRow(BoxLayout):
28
+ """A single paper row in the detail list."""
29
+
30
+ rank = StringProperty('1')
31
+ title = StringProperty('')
32
+ authors = StringProperty('')
33
+ date_str = StringProperty('')
34
+ score_text = StringProperty('0.00')
35
+ is_bookmarked = BooleanProperty(False)
36
+ paper_id = StringProperty('')
37
+ abstract_url = StringProperty('')
38
+ pdf_url = StringProperty('')
39
+ abstract_text = StringProperty('')
40
+
41
+ def toggle_bookmark(self):
42
+ from kivy.app import App
43
+ app = App.get_running_app()
44
+ if app:
45
+ new_state = app.toggle_bookmark(self.paper_id)
46
+ self.is_bookmarked = new_state
47
+
48
+ def show_detail(self):
49
+ popup = PaperDetailPopup()
50
+ popup.paper_title = self.title
51
+ popup.paper_authors = self.authors
52
+ popup.paper_abstract = self.abstract_text
53
+ popup.paper_url = self.abstract_url
54
+ popup.paper_pdf = self.pdf_url
55
+ popup.open()
56
+
57
+
58
+ class PaperDetailPopup(ModalView):
59
+ """Modal showing full paper details."""
60
+
61
+ paper_title = StringProperty('')
62
+ paper_authors = StringProperty('')
63
+ paper_abstract = StringProperty('')
64
+ paper_url = StringProperty('')
65
+ paper_pdf = StringProperty('')
66
+
67
+ def open_in_browser(self):
68
+ if self.paper_url:
69
+ try:
70
+ webbrowser.open(self.paper_url)
71
+ except Exception:
72
+ logger.warning('Could not open browser')
73
+
74
+ def open_pdf(self):
75
+ if self.paper_pdf:
76
+ try:
77
+ webbrowser.open(self.paper_pdf)
78
+ except Exception:
79
+ logger.warning('Could not open PDF')
80
+
81
+
82
+ class DetailScreen(Screen):
83
+ """Screen showing papers for a single category."""
84
+
85
+ category_slug = StringProperty('')
86
+ category_name = StringProperty('')
87
+ week_range = StringProperty('')
88
+ paper_rows = ListProperty([])
89
+
90
+ def load_papers(self, category_slug: str):
91
+ """Populate the screen with papers from the latest digest."""
92
+ from kivy.app import App
93
+ app = App.get_running_app()
94
+ if not app:
95
+ return
96
+
97
+ from app.core.config import CATEGORY_LABELS
98
+ self.category_slug = category_slug
99
+ self.category_name = CATEGORY_LABELS.get(category_slug, category_slug.title())
100
+
101
+ digest = app.get_latest_digest()
102
+ container = self.ids.get('paper_container')
103
+ if container is None:
104
+ return
105
+ container.clear_widgets()
106
+
107
+ if digest is None:
108
+ self.week_range = 'No data'
109
+ return
110
+
111
+ self.week_range = f'Week of {digest.week_start.isoformat()}'
112
+ papers = digest.papers.get(category_slug, [])
113
+
114
+ for i, paper in enumerate(papers, 1):
115
+ row = PaperRow()
116
+ row.rank = str(i)
117
+ row.paper_id = paper.paper_id
118
+ row.title = paper.title
119
+ row.abstract_text = paper.abstract
120
+
121
+ if paper.authors:
122
+ if len(paper.authors) > 2:
123
+ row.authors = f'{paper.authors[0]} et al.'
124
+ else:
125
+ row.authors = ', '.join(paper.authors)
126
+ else:
127
+ row.authors = 'Unknown'
128
+
129
+ row.date_str = paper.published_date.isoformat()
130
+ row.score_text = f'{paper.composite_score:.2f}'
131
+ row.is_bookmarked = paper.is_bookmarked
132
+ row.abstract_url = paper.abstract_url
133
+ row.pdf_url = paper.pdf_url or ''
134
+
135
+ container.add_widget(row)
app/ui/home_screen.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ResearchRadar β€” HomeScreen.
3
+
4
+ Displays the latest digest as a scrollable list of DigestCard widgets,
5
+ one per category. Includes a "Refresh Now" FAB and empty-state onboarding.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import threading
12
+
13
+ from kivy.clock import Clock
14
+ from kivy.lang import Builder
15
+ from kivy.properties import (
16
+ BooleanProperty,
17
+ ListProperty,
18
+ ObjectProperty,
19
+ StringProperty,
20
+ )
21
+ from kivy.uix.boxlayout import BoxLayout
22
+ from kivy.uix.screenmanager import Screen
23
+
24
+ from app.core.config import CATEGORY_LABELS
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Load KV
29
+ import os
30
+ _KV_PATH = os.path.join(os.path.dirname(__file__), 'kv', 'home.kv')
31
+ if os.path.exists(_KV_PATH):
32
+ Builder.load_file(_KV_PATH)
33
+
34
+
35
+ class DigestCard(BoxLayout):
36
+ """A single category card showing paper count and top paper title."""
37
+
38
+ category_slug = StringProperty('')
39
+ category_name = StringProperty('')
40
+ paper_count = StringProperty('0')
41
+ top_paper_title = StringProperty('No papers yet')
42
+ top_score = StringProperty('β€”')
43
+
44
+ def on_touch_up(self, touch):
45
+ if self.collide_point(*touch.pos):
46
+ app = self._get_app()
47
+ if app:
48
+ app.show_detail(self.category_slug)
49
+ return super().on_touch_up(touch)
50
+
51
+ def _get_app(self):
52
+ from kivy.app import App
53
+ return App.get_running_app()
54
+
55
+
56
+ class HomeScreen(Screen):
57
+ """Main screen showing the latest weekly digest."""
58
+
59
+ is_fetching = BooleanProperty(False)
60
+ last_fetched = StringProperty('Never')
61
+ digest_cards = ListProperty([])
62
+
63
+ def on_enter(self):
64
+ """Load digest when screen becomes visible."""
65
+ self.load_digest()
66
+
67
+ def load_digest(self):
68
+ """Load the latest digest from the database and populate cards."""
69
+ from kivy.app import App
70
+ app = App.get_running_app()
71
+ if not app:
72
+ return
73
+
74
+ digest = app.get_latest_digest()
75
+ container = self.ids.get('card_container')
76
+ if container is None:
77
+ return
78
+
79
+ container.clear_widgets()
80
+
81
+ if digest is None:
82
+ self.last_fetched = 'Never β€” tap Fetch Now!'
83
+ return
84
+
85
+ self.last_fetched = digest.generated_at.strftime('%Y-%m-%d %H:%M')
86
+
87
+ for cat_slug, papers in digest.papers.items():
88
+ card = DigestCard()
89
+ card.category_slug = cat_slug
90
+ card.category_name = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
91
+ card.paper_count = str(len(papers))
92
+ if papers:
93
+ title = papers[0].title
94
+ if len(title) > 70:
95
+ title = title[:67] + '...'
96
+ card.top_paper_title = title
97
+ card.top_score = f'{papers[0].composite_score:.2f}'
98
+ container.add_widget(card)
99
+
100
+ def trigger_fetch(self):
101
+ """Run the weekly fetch in a background thread."""
102
+ if self.is_fetching:
103
+ return
104
+ self.is_fetching = True
105
+ self.last_fetched = 'Fetching...'
106
+
107
+ from kivy.app import App
108
+ app = App.get_running_app()
109
+
110
+ def _run():
111
+ try:
112
+ app.run_fetch()
113
+ except Exception:
114
+ logger.exception('Background fetch failed')
115
+ finally:
116
+ Clock.schedule_once(lambda dt: self._on_fetch_done(), 0)
117
+
118
+ thread = threading.Thread(target=_run, daemon=True)
119
+ thread.start()
120
+
121
+ def _on_fetch_done(self):
122
+ self.is_fetching = False
123
+ self.load_digest()
app/ui/kv/detail.kv ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #:kivy 2.3.0
2
+
3
+ # =====================================================================
4
+ # DetailScreen β€” ranked paper list for a single category
5
+ # =====================================================================
6
+
7
+ <PaperRow>:
8
+ orientation: 'vertical'
9
+ size_hint_y: None
10
+ height: dp(100)
11
+ padding: dp(14), dp(8)
12
+ spacing: dp(4)
13
+
14
+ canvas.before:
15
+ Color:
16
+ rgba: 0.14, 0.16, 0.22, 1
17
+ RoundedRectangle:
18
+ pos: self.x + dp(4), self.y + dp(2)
19
+ size: self.width - dp(8), self.height - dp(4)
20
+ radius: [dp(10)]
21
+
22
+ BoxLayout:
23
+ size_hint_y: None
24
+ height: dp(24)
25
+
26
+ Label:
27
+ text: '#' + root.rank
28
+ font_size: sp(16)
29
+ bold: True
30
+ color: 0.55, 0.78, 1.0, 1
31
+ size_hint_x: None
32
+ width: dp(36)
33
+ halign: 'center'
34
+ text_size: self.size
35
+
36
+ Label:
37
+ text: root.title
38
+ font_size: sp(14)
39
+ color: 0.92, 0.92, 0.95, 1
40
+ halign: 'left'
41
+ text_size: self.width, None
42
+ shorten: True
43
+ shorten_from: 'right'
44
+
45
+ Button:
46
+ text: 'β˜…' if root.is_bookmarked else 'β˜†'
47
+ font_size: sp(20)
48
+ size_hint_x: None
49
+ width: dp(40)
50
+ background_color: 0, 0, 0, 0
51
+ color: (1, 0.85, 0.2, 1) if root.is_bookmarked else (0.5, 0.5, 0.5, 1)
52
+ on_release: root.toggle_bookmark()
53
+
54
+ Label:
55
+ text: root.authors + ' Β· ' + root.date_str
56
+ font_size: sp(11)
57
+ color: 0.55, 0.55, 0.6, 1
58
+ halign: 'left'
59
+ text_size: self.size
60
+ size_hint_y: None
61
+ height: dp(18)
62
+
63
+ BoxLayout:
64
+ size_hint_y: None
65
+ height: dp(24)
66
+
67
+ # Score bar
68
+ BoxLayout:
69
+ size_hint_x: 0.6
70
+ Widget:
71
+ canvas:
72
+ Color:
73
+ rgba: 0.2, 0.2, 0.3, 1
74
+ RoundedRectangle:
75
+ pos: self.pos
76
+ size: self.width, dp(6)
77
+ radius: [dp(3)]
78
+ Color:
79
+ rgba: 0.3, 0.75, 0.5, 1
80
+ RoundedRectangle:
81
+ pos: self.pos
82
+ size: self.width * min(float(root.score_text or 0), 1.0), dp(6)
83
+ radius: [dp(3)]
84
+
85
+ Label:
86
+ text: root.score_text
87
+ font_size: sp(12)
88
+ color: 0.4, 0.85, 0.6, 1
89
+ size_hint_x: 0.2
90
+ halign: 'right'
91
+ text_size: self.size
92
+
93
+ Button:
94
+ text: 'View'
95
+ font_size: sp(12)
96
+ size_hint_x: 0.2
97
+ background_normal: ''
98
+ background_color: 0.2, 0.4, 0.7, 1
99
+ color: 1, 1, 1, 1
100
+ on_release: root.show_detail()
101
+
102
+
103
+ <PaperDetailPopup>:
104
+ size_hint: 0.92, 0.85
105
+ auto_dismiss: True
106
+
107
+ canvas.before:
108
+ Color:
109
+ rgba: 0.1, 0.12, 0.16, 0.98
110
+ RoundedRectangle:
111
+ pos: self.pos
112
+ size: self.size
113
+ radius: [dp(16)]
114
+
115
+ BoxLayout:
116
+ orientation: 'vertical'
117
+ padding: dp(20)
118
+ spacing: dp(12)
119
+
120
+ Label:
121
+ text: root.paper_title
122
+ font_size: sp(17)
123
+ bold: True
124
+ color: 1, 1, 1, 1
125
+ halign: 'left'
126
+ valign: 'top'
127
+ text_size: self.width, None
128
+ size_hint_y: None
129
+ height: self.texture_size[1]
130
+
131
+ Label:
132
+ text: root.paper_authors
133
+ font_size: sp(12)
134
+ color: 0.6, 0.6, 0.7, 1
135
+ halign: 'left'
136
+ text_size: self.size
137
+ size_hint_y: None
138
+ height: dp(20)
139
+
140
+ ScrollView:
141
+ Label:
142
+ text: root.paper_abstract
143
+ font_size: sp(13)
144
+ color: 0.85, 0.85, 0.9, 1
145
+ halign: 'left'
146
+ valign: 'top'
147
+ text_size: self.width, None
148
+ size_hint_y: None
149
+ height: self.texture_size[1]
150
+ markup: False
151
+
152
+ BoxLayout:
153
+ size_hint_y: None
154
+ height: dp(44)
155
+ spacing: dp(10)
156
+
157
+ Button:
158
+ text: '🌐 Open Abstract'
159
+ font_size: sp(13)
160
+ background_normal: ''
161
+ background_color: 0.2, 0.45, 0.8, 1
162
+ color: 1, 1, 1, 1
163
+ on_release: root.open_in_browser()
164
+
165
+ Button:
166
+ text: 'πŸ“„ Open PDF'
167
+ font_size: sp(13)
168
+ background_normal: ''
169
+ background_color: 0.3, 0.65, 0.4, 1
170
+ color: 1, 1, 1, 1
171
+ on_release: root.open_pdf()
172
+
173
+ Button:
174
+ text: 'βœ• Close'
175
+ font_size: sp(13)
176
+ background_normal: ''
177
+ background_color: 0.5, 0.2, 0.2, 1
178
+ color: 1, 1, 1, 1
179
+ on_release: root.dismiss()
180
+
181
+
182
+ <DetailScreen>:
183
+ name: 'detail'
184
+
185
+ BoxLayout:
186
+ orientation: 'vertical'
187
+
188
+ canvas.before:
189
+ Color:
190
+ rgba: 0.08, 0.09, 0.12, 1
191
+ Rectangle:
192
+ pos: self.pos
193
+ size: self.size
194
+
195
+ # ── Header ──────────────────────────────────────
196
+ BoxLayout:
197
+ size_hint_y: None
198
+ height: dp(60)
199
+ padding: dp(12), dp(10)
200
+
201
+ canvas.before:
202
+ Color:
203
+ rgba: 0.10, 0.12, 0.18, 1
204
+ Rectangle:
205
+ pos: self.pos
206
+ size: self.size
207
+
208
+ Button:
209
+ text: '←'
210
+ font_size: sp(22)
211
+ size_hint_x: None
212
+ width: dp(48)
213
+ background_color: 0, 0, 0, 0
214
+ color: 0.7, 0.7, 0.8, 1
215
+ on_release: app.go_home()
216
+
217
+ BoxLayout:
218
+ orientation: 'vertical'
219
+ Label:
220
+ text: root.category_name
221
+ font_size: sp(18)
222
+ bold: True
223
+ color: 0.55, 0.78, 1.0, 1
224
+ halign: 'left'
225
+ text_size: self.size
226
+ Label:
227
+ text: root.week_range
228
+ font_size: sp(12)
229
+ color: 0.5, 0.5, 0.6, 1
230
+ halign: 'left'
231
+ text_size: self.size
232
+
233
+ # ── Paper list ──────────────────────────────────
234
+ ScrollView:
235
+ do_scroll_x: False
236
+
237
+ BoxLayout:
238
+ id: paper_container
239
+ orientation: 'vertical'
240
+ size_hint_y: None
241
+ height: self.minimum_height
242
+ padding: dp(6)
243
+ spacing: dp(8)
app/ui/kv/home.kv ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #:kivy 2.3.0
2
+
3
+ # =====================================================================
4
+ # HomeScreen β€” digest card list with FAB and status bar
5
+ # =====================================================================
6
+
7
+ <DigestCard>:
8
+ orientation: 'vertical'
9
+ size_hint_y: None
10
+ height: dp(120)
11
+ padding: dp(16)
12
+ spacing: dp(6)
13
+
14
+ canvas.before:
15
+ Color:
16
+ rgba: 0.15, 0.17, 0.22, 1
17
+ RoundedRectangle:
18
+ pos: self.x + dp(4), self.y + dp(2)
19
+ size: self.width - dp(8), self.height - dp(4)
20
+ radius: [dp(14)]
21
+ Color:
22
+ rgba: 0.22, 0.25, 0.32, 1
23
+ RoundedRectangle:
24
+ pos: self.x + dp(4), self.y + dp(4)
25
+ size: self.width - dp(8), self.height - dp(4)
26
+ radius: [dp(14)]
27
+
28
+ BoxLayout:
29
+ size_hint_y: None
30
+ height: dp(28)
31
+ Label:
32
+ text: root.category_name
33
+ font_size: sp(18)
34
+ bold: True
35
+ color: 0.55, 0.78, 1.0, 1
36
+ halign: 'left'
37
+ text_size: self.size
38
+ Label:
39
+ text: root.paper_count + ' papers'
40
+ font_size: sp(14)
41
+ color: 0.6, 0.6, 0.7, 1
42
+ halign: 'right'
43
+ text_size: self.size
44
+ size_hint_x: 0.3
45
+
46
+ Label:
47
+ text: root.top_paper_title
48
+ font_size: sp(14)
49
+ color: 0.85, 0.85, 0.9, 1
50
+ halign: 'left'
51
+ valign: 'top'
52
+ text_size: self.width, None
53
+ shorten: True
54
+ shorten_from: 'right'
55
+
56
+ BoxLayout:
57
+ size_hint_y: None
58
+ height: dp(22)
59
+ Label:
60
+ text: 'Score: ' + root.top_score
61
+ font_size: sp(12)
62
+ color: 0.4, 0.85, 0.6, 1
63
+ halign: 'left'
64
+ text_size: self.size
65
+ Label:
66
+ text: 'Tap to explore β†’'
67
+ font_size: sp(11)
68
+ color: 0.5, 0.5, 0.6, 1
69
+ halign: 'right'
70
+ text_size: self.size
71
+
72
+
73
+ <HomeScreen>:
74
+ name: 'home'
75
+
76
+ BoxLayout:
77
+ orientation: 'vertical'
78
+ padding: 0
79
+ spacing: 0
80
+
81
+ canvas.before:
82
+ Color:
83
+ rgba: 0.08, 0.09, 0.12, 1
84
+ Rectangle:
85
+ pos: self.pos
86
+ size: self.size
87
+
88
+ # ── Header ──────────────────────────────────────
89
+ BoxLayout:
90
+ size_hint_y: None
91
+ height: dp(64)
92
+ padding: dp(16), dp(12)
93
+
94
+ canvas.before:
95
+ Color:
96
+ rgba: 0.10, 0.12, 0.18, 1
97
+ Rectangle:
98
+ pos: self.pos
99
+ size: self.size
100
+
101
+ Label:
102
+ text: 'πŸ“‘ ResearchRadar'
103
+ font_size: sp(22)
104
+ bold: True
105
+ color: 1, 1, 1, 1
106
+ halign: 'left'
107
+ text_size: self.size
108
+ valign: 'center'
109
+
110
+ Button:
111
+ text: 'βš™'
112
+ font_size: sp(22)
113
+ size_hint_x: None
114
+ width: dp(48)
115
+ background_color: 0, 0, 0, 0
116
+ color: 0.7, 0.7, 0.8, 1
117
+ on_release: app.show_settings()
118
+
119
+ # ── Digest cards ────────────────────────────────
120
+ ScrollView:
121
+ do_scroll_x: False
122
+
123
+ BoxLayout:
124
+ id: card_container
125
+ orientation: 'vertical'
126
+ size_hint_y: None
127
+ height: self.minimum_height
128
+ padding: dp(8)
129
+ spacing: dp(10)
130
+
131
+ # ── Bottom bar ──────────────────────────────────
132
+ BoxLayout:
133
+ size_hint_y: None
134
+ height: dp(56)
135
+ padding: dp(12), dp(8)
136
+
137
+ canvas.before:
138
+ Color:
139
+ rgba: 0.10, 0.12, 0.18, 1
140
+ Rectangle:
141
+ pos: self.pos
142
+ size: self.size
143
+
144
+ Label:
145
+ text: 'Last: ' + root.last_fetched
146
+ font_size: sp(12)
147
+ color: 0.5, 0.5, 0.6, 1
148
+ halign: 'left'
149
+ text_size: self.size
150
+ valign: 'center'
151
+
152
+ Button:
153
+ text: 'πŸ”„ Fetch Now'
154
+ font_size: sp(14)
155
+ bold: True
156
+ size_hint_x: None
157
+ width: dp(130)
158
+ background_normal: ''
159
+ background_color: 0.2, 0.5, 0.9, 1
160
+ color: 1, 1, 1, 1
161
+ disabled: root.is_fetching
162
+ on_release: root.trigger_fetch()
app/ui/kv/settings.kv ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #:kivy 2.3.0
2
+
3
+ # =====================================================================
4
+ # SettingsScreen β€” interest keywords, weights, API keys
5
+ # =====================================================================
6
+
7
+ <SettingsScreen>:
8
+ name: 'settings'
9
+
10
+ BoxLayout:
11
+ orientation: 'vertical'
12
+
13
+ canvas.before:
14
+ Color:
15
+ rgba: 0.08, 0.09, 0.12, 1
16
+ Rectangle:
17
+ pos: self.pos
18
+ size: self.size
19
+
20
+ # ── Header ──────────────────────────────────────
21
+ BoxLayout:
22
+ size_hint_y: None
23
+ height: dp(60)
24
+ padding: dp(12), dp(10)
25
+
26
+ canvas.before:
27
+ Color:
28
+ rgba: 0.10, 0.12, 0.18, 1
29
+ Rectangle:
30
+ pos: self.pos
31
+ size: self.size
32
+
33
+ Button:
34
+ text: '←'
35
+ font_size: sp(22)
36
+ size_hint_x: None
37
+ width: dp(48)
38
+ background_color: 0, 0, 0, 0
39
+ color: 0.7, 0.7, 0.8, 1
40
+ on_release: app.go_home()
41
+
42
+ Label:
43
+ text: 'βš™ Settings'
44
+ font_size: sp(20)
45
+ bold: True
46
+ color: 1, 1, 1, 1
47
+ halign: 'left'
48
+ text_size: self.size
49
+ valign: 'center'
50
+
51
+ # ── Scrollable content ──────────────────────────
52
+ ScrollView:
53
+ do_scroll_x: False
54
+
55
+ BoxLayout:
56
+ orientation: 'vertical'
57
+ size_hint_y: None
58
+ height: self.minimum_height
59
+ padding: dp(16)
60
+ spacing: dp(14)
61
+
62
+ # ── Interest Keywords ───────────────────
63
+ Label:
64
+ text: 'Interest Keywords'
65
+ font_size: sp(16)
66
+ bold: True
67
+ color: 0.55, 0.78, 1.0, 1
68
+ halign: 'left'
69
+ text_size: self.size
70
+ size_hint_y: None
71
+ height: dp(30)
72
+
73
+ Label:
74
+ text: 'Machine Learning'
75
+ font_size: sp(13)
76
+ color: 0.7, 0.7, 0.8, 1
77
+ halign: 'left'
78
+ text_size: self.size
79
+ size_hint_y: None
80
+ height: dp(20)
81
+ TextInput:
82
+ text: root.ml_keywords
83
+ on_text: root.ml_keywords = self.text
84
+ multiline: False
85
+ size_hint_y: None
86
+ height: dp(38)
87
+ font_size: sp(13)
88
+ background_color: 0.15, 0.17, 0.22, 1
89
+ foreground_color: 0.9, 0.9, 0.95, 1
90
+ cursor_color: 0.55, 0.78, 1.0, 1
91
+
92
+ Label:
93
+ text: 'Artificial Intelligence'
94
+ font_size: sp(13)
95
+ color: 0.7, 0.7, 0.8, 1
96
+ halign: 'left'
97
+ text_size: self.size
98
+ size_hint_y: None
99
+ height: dp(20)
100
+ TextInput:
101
+ text: root.ai_keywords
102
+ on_text: root.ai_keywords = self.text
103
+ multiline: False
104
+ size_hint_y: None
105
+ height: dp(38)
106
+ font_size: sp(13)
107
+ background_color: 0.15, 0.17, 0.22, 1
108
+ foreground_color: 0.9, 0.9, 0.95, 1
109
+ cursor_color: 0.55, 0.78, 1.0, 1
110
+
111
+ Label:
112
+ text: 'Computer Science'
113
+ font_size: sp(13)
114
+ color: 0.7, 0.7, 0.8, 1
115
+ halign: 'left'
116
+ text_size: self.size
117
+ size_hint_y: None
118
+ height: dp(20)
119
+ TextInput:
120
+ text: root.cs_keywords
121
+ on_text: root.cs_keywords = self.text
122
+ multiline: False
123
+ size_hint_y: None
124
+ height: dp(38)
125
+ font_size: sp(13)
126
+ background_color: 0.15, 0.17, 0.22, 1
127
+ foreground_color: 0.9, 0.9, 0.95, 1
128
+ cursor_color: 0.55, 0.78, 1.0, 1
129
+
130
+ Label:
131
+ text: 'Neuroscience'
132
+ font_size: sp(13)
133
+ color: 0.7, 0.7, 0.8, 1
134
+ halign: 'left'
135
+ text_size: self.size
136
+ size_hint_y: None
137
+ height: dp(20)
138
+ TextInput:
139
+ text: root.neuro_keywords
140
+ on_text: root.neuro_keywords = self.text
141
+ multiline: False
142
+ size_hint_y: None
143
+ height: dp(38)
144
+ font_size: sp(13)
145
+ background_color: 0.15, 0.17, 0.22, 1
146
+ foreground_color: 0.9, 0.9, 0.95, 1
147
+ cursor_color: 0.55, 0.78, 1.0, 1
148
+
149
+ Label:
150
+ text: 'Brain-Computer Interface'
151
+ font_size: sp(13)
152
+ color: 0.7, 0.7, 0.8, 1
153
+ halign: 'left'
154
+ text_size: self.size
155
+ size_hint_y: None
156
+ height: dp(20)
157
+ TextInput:
158
+ text: root.bci_keywords
159
+ on_text: root.bci_keywords = self.text
160
+ multiline: False
161
+ size_hint_y: None
162
+ height: dp(38)
163
+ font_size: sp(13)
164
+ background_color: 0.15, 0.17, 0.22, 1
165
+ foreground_color: 0.9, 0.9, 0.95, 1
166
+ cursor_color: 0.55, 0.78, 1.0, 1
167
+
168
+ # ── Ranking Weights ─────────────────────
169
+ Label:
170
+ text: 'Ranking Weights'
171
+ font_size: sp(16)
172
+ bold: True
173
+ color: 0.55, 0.78, 1.0, 1
174
+ halign: 'left'
175
+ text_size: self.size
176
+ size_hint_y: None
177
+ height: dp(30)
178
+
179
+ Label:
180
+ text: 'Relevance: ' + '{:.0%}'.format(root.weight_relevance)
181
+ font_size: sp(13)
182
+ color: 0.7, 0.7, 0.8, 1
183
+ halign: 'left'
184
+ text_size: self.size
185
+ size_hint_y: None
186
+ height: dp(20)
187
+ Slider:
188
+ value: root.weight_relevance
189
+ on_value: root.weight_relevance = self.value
190
+ min: 0
191
+ max: 1
192
+ step: 0.05
193
+ size_hint_y: None
194
+ height: dp(36)
195
+ cursor_size: dp(20), dp(20)
196
+
197
+ Label:
198
+ text: 'Citations: ' + '{:.0%}'.format(root.weight_citation)
199
+ font_size: sp(13)
200
+ color: 0.7, 0.7, 0.8, 1
201
+ halign: 'left'
202
+ text_size: self.size
203
+ size_hint_y: None
204
+ height: dp(20)
205
+ Slider:
206
+ value: root.weight_citation
207
+ on_value: root.weight_citation = self.value
208
+ min: 0
209
+ max: 1
210
+ step: 0.05
211
+ size_hint_y: None
212
+ height: dp(36)
213
+ cursor_size: dp(20), dp(20)
214
+
215
+ Label:
216
+ text: 'Recency: ' + '{:.0%}'.format(root.weight_recency)
217
+ font_size: sp(13)
218
+ color: 0.7, 0.7, 0.8, 1
219
+ halign: 'left'
220
+ text_size: self.size
221
+ size_hint_y: None
222
+ height: dp(20)
223
+ Slider:
224
+ value: root.weight_recency
225
+ on_value: root.weight_recency = self.value
226
+ min: 0
227
+ max: 1
228
+ step: 0.05
229
+ size_hint_y: None
230
+ height: dp(36)
231
+ cursor_size: dp(20), dp(20)
232
+
233
+ # ── Papers per category ─────────────────
234
+ Label:
235
+ text: 'Papers per Category'
236
+ font_size: sp(16)
237
+ bold: True
238
+ color: 0.55, 0.78, 1.0, 1
239
+ halign: 'left'
240
+ text_size: self.size
241
+ size_hint_y: None
242
+ height: dp(30)
243
+
244
+ Spinner:
245
+ text: str(int(root.top_n))
246
+ values: ['3', '5', '7', '10']
247
+ on_text: root.top_n = int(self.text) if self.text else 5
248
+ size_hint_y: None
249
+ height: dp(40)
250
+ font_size: sp(14)
251
+ background_color: 0.18, 0.2, 0.28, 1
252
+ color: 0.9, 0.9, 0.95, 1
253
+
254
+ # ── API Keys (optional) ─────────────────
255
+ Label:
256
+ text: 'API Keys (optional)'
257
+ font_size: sp(16)
258
+ bold: True
259
+ color: 0.55, 0.78, 1.0, 1
260
+ halign: 'left'
261
+ text_size: self.size
262
+ size_hint_y: None
263
+ height: dp(30)
264
+
265
+ Label:
266
+ text: 'Semantic Scholar API Key'
267
+ font_size: sp(13)
268
+ color: 0.7, 0.7, 0.8, 1
269
+ halign: 'left'
270
+ text_size: self.size
271
+ size_hint_y: None
272
+ height: dp(20)
273
+ TextInput:
274
+ text: root.semantic_scholar_key
275
+ on_text: root.semantic_scholar_key = self.text
276
+ multiline: False
277
+ password: True
278
+ size_hint_y: None
279
+ height: dp(38)
280
+ font_size: sp(13)
281
+ background_color: 0.15, 0.17, 0.22, 1
282
+ foreground_color: 0.9, 0.9, 0.95, 1
283
+
284
+ Label:
285
+ text: 'PubMed (NCBI) API Key'
286
+ font_size: sp(13)
287
+ color: 0.7, 0.7, 0.8, 1
288
+ halign: 'left'
289
+ text_size: self.size
290
+ size_hint_y: None
291
+ height: dp(20)
292
+ TextInput:
293
+ text: root.pubmed_key
294
+ on_text: root.pubmed_key = self.text
295
+ multiline: False
296
+ password: True
297
+ size_hint_y: None
298
+ height: dp(38)
299
+ font_size: sp(13)
300
+ background_color: 0.15, 0.17, 0.22, 1
301
+ foreground_color: 0.9, 0.9, 0.95, 1
302
+
303
+ # ── Save Button ─────────────────────────
304
+ Button:
305
+ text: 'πŸ’Ύ Save Settings'
306
+ font_size: sp(16)
307
+ bold: True
308
+ size_hint_y: None
309
+ height: dp(50)
310
+ background_normal: ''
311
+ background_color: 0.2, 0.55, 0.35, 1
312
+ color: 1, 1, 1, 1
313
+ on_release: root.save_settings()
314
+
315
+ # Spacer
316
+ Widget:
317
+ size_hint_y: None
318
+ height: dp(40)