Spaces:

ak0601
/

ResearchRadar

Sleeping

App Files Files Community

ak0601 commited on 13 days ago

Commit

fdcd9e5

verified ·

1 Parent(s): d2bb178

Upload 63 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +81 -0
app/__init__.py +1 -0
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/core/__init__.py +1 -0
app/core/__pycache__/__init__.cpython-312.pyc +0 -0
app/core/__pycache__/config.cpython-312.pyc +0 -0
app/core/__pycache__/database.cpython-312.pyc +0 -0
app/core/__pycache__/models.cpython-312.pyc +0 -0
app/core/__pycache__/notifier.cpython-312.pyc +0 -0
app/core/__pycache__/scheduler.cpython-312.pyc +0 -0
app/core/__pycache__/telegram_bot.cpython-312.pyc +0 -0
app/core/config.py +135 -0
app/core/database.py +359 -0
app/core/models.py +80 -0
app/core/notifier.py +77 -0
app/core/scheduler.py +135 -0
app/core/telegram_bot.py +294 -0
app/fetcher/__init__.py +1 -0
app/fetcher/__pycache__/__init__.cpython-312.pyc +0 -0
app/fetcher/__pycache__/arxiv_client.cpython-312.pyc +0 -0
app/fetcher/__pycache__/fetch_pipeline.cpython-312.pyc +0 -0
app/fetcher/__pycache__/http_session.cpython-312.pyc +0 -0
app/fetcher/__pycache__/pubmed_client.cpython-312.pyc +0 -0
app/fetcher/__pycache__/semantic_scholar.cpython-312.pyc +0 -0
app/fetcher/arxiv_client.py +159 -0
app/fetcher/crossref_client.py +69 -0
app/fetcher/fetch_pipeline.py +225 -0
app/fetcher/http_session.py +223 -0
app/fetcher/pubmed_client.py +213 -0
app/fetcher/semantic_scholar.py +181 -0
app/ranker/__init__.py +1 -0
app/ranker/__pycache__/__init__.cpython-312.pyc +0 -0
app/ranker/__pycache__/citation_scorer.cpython-312.pyc +0 -0
app/ranker/__pycache__/composite_ranker.cpython-312.pyc +0 -0
app/ranker/__pycache__/tfidf_ranker.cpython-312.pyc +0 -0
app/ranker/citation_scorer.py +49 -0
app/ranker/composite_ranker.py +88 -0
app/ranker/tfidf_ranker.py +182 -0
app/summarizer/__pycache__/groq_client.cpython-312.pyc +0 -0
app/summarizer/groq_client.py +101 -0
app/ui/__init__.py +1 -0
app/ui/__pycache__/__init__.cpython-312.pyc +0 -0
app/ui/__pycache__/detail_screen.cpython-312.pyc +0 -0
app/ui/__pycache__/home_screen.cpython-312.pyc +0 -0
app/ui/__pycache__/settings_screen.cpython-312.pyc +0 -0
app/ui/detail_screen.py +135 -0
app/ui/home_screen.py +123 -0
app/ui/kv/detail.kv +243 -0
app/ui/kv/home.kv +162 -0
app/ui/kv/settings.kv +318 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import streamlit as st
+import os
+import subprocess
+import threading
+import time
+from datetime import datetime, timedelta
+import pytz
+st.set_page_config(page_title="ResearchRadar-HF", page_icon="📡")
+st.title("📡 ResearchRadar Bot")
+st.markdown("Your daily research digest is running in the background.")
+# Timezone processing for EEST (UTC+3)
+TIMEZONE = pytz.timezone('Europe/Bucharest') # or any UTC+3 region
+LATEST_LOG = "Logs will appear here once a fetch starts..."
+status_placeholder = st.empty()
+log_placeholder = st.empty()
+def run_worker():
+    """Background thread that triggers the fetch script."""
+    while True:
+        now = datetime.now(TIMEZONE)
+        # Target time: 05:00 AM (EEST)
+        target = now.replace(hour=5, minute=0, second=0, microsecond=0)
+        if target <= now:
+            target += timedelta(days=1)
+        wait_seconds = (target - now).total_seconds()
+        # Check every 60 seconds if it's time
+        if wait_seconds > 60:
+            time.sleep(60)
+            continue
+        # Execute the fetch
+        print(f"[{datetime.now()}] Triggering fetch...")
+        subprocess.run(["python", "run_daily.py", "--now"])
+        # Sleep for a bit to avoid double-triggering
+        time.sleep(120)
+# Start background thread only once
+if 'worker_started' not in st.session_state:
+    thread = threading.Thread(target=run_worker, daemon=True)
+    thread.start()
+    st.session_state['worker_started'] = True
+# Dashboard UI
+with status_placeholder.container():
+    now_eest = datetime.now(TIMEZONE)
+    st.info(f"🕒 Current EEST Time: **{now_eest.strftime('%H:%M:%S')}**")
+    target = now_eest.replace(hour=5, minute=0, second=0, microsecond=0)
+    if target <= now_eest:
+        target += timedelta(days=1)
+    diff = target - now_eest
+    st.success(f"⌛ Next fetch in: **{diff}** (at 05:00 AM)")
+st.divider()
+if st.button("🔄 Trigger Manual Fetch Now"):
+    with st.spinner("Fetching papers... this takes a few minutes (Groq rate-limits apply)"):
+        res = subprocess.run(["python", "run_daily.py", "--now"], capture_output=True, text=True)
+        st.code(res.stdout)
+        if res.stderr:
+            st.error(res.stderr)
+st.markdown("""
+### 🛠 How it works on Hugging Face:
+- This Space runs **24/7**.
+- At **05:00 AM EEST**, it triggers `run_daily.py --now`.
+- It reads your `GROQ_API_KEY` and `TELEGRAM` tokens from your **Space Secrets**.
+""")
+# Persistent storage check (optional)
+if not os.path.exists(".researchradar"):
+    os.makedirs(".researchradar", exist_ok=True)

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # ResearchRadar — Weekly AI & Neuroscience Papers

app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (139 Bytes). View file

app/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Core business logic — framework-agnostic

app/core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (144 Bytes). View file

app/core/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (3.36 kB). View file

app/core/__pycache__/database.cpython-312.pyc ADDED Viewed

Binary file (14 kB). View file

app/core/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (3.83 kB). View file

app/core/__pycache__/notifier.cpython-312.pyc ADDED Viewed

Binary file (3.18 kB). View file

app/core/__pycache__/scheduler.cpython-312.pyc ADDED Viewed

Binary file (5.38 kB). View file

app/core/__pycache__/telegram_bot.cpython-312.pyc ADDED Viewed

Binary file (11.3 kB). View file

app/core/config.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""
+ResearchRadar — App-wide constants and environment configuration.
+All magic values live here. Never hard-code strings or numbers in other modules.
+Environment variables are read at startup using os.getenv() with documented defaults.
+"""
+import os
+import logging
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+LOG_LEVEL = os.getenv('RESEARCHRADAR_LOG_LEVEL', 'INFO').upper()
+logging.basicConfig(
+    level=getattr(logging, LOG_LEVEL, logging.INFO),
+    format='[%(asctime)s] %(name)s %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+)
+logger = logging.getLogger('researchradar')
+# ---------------------------------------------------------------------------
+# Data Source URLs
+# ---------------------------------------------------------------------------
+ARXIV_BASE_URL      = 'http://export.arxiv.org/api/query'
+ARXIV_MAX_RESULTS   = 50
+SEMSCHOLAR_BASE_URL = 'https://api.semanticscholar.org/graph/v1'
+PUBMED_BASE_URL     = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'
+CROSSREF_BASE_URL   = 'https://api.crossref.org/works'
+# ---------------------------------------------------------------------------
+# HTTP / Retry Configuration
+# ---------------------------------------------------------------------------
+HTTP_TIMEOUT        = 20        # seconds per request
+HTTP_MAX_RETRIES    = 4
+HTTP_BACKOFF_BASE   = 2         # exponential: 2^attempt seconds
+HTTP_BACKOFF_MAX    = 64        # cap at 64 seconds
+RETRY_STATUS_CODES  = {429, 500, 502, 503, 504}
+# ---------------------------------------------------------------------------
+# Scheduler
+# ---------------------------------------------------------------------------
+SCHEDULE_DAY        = 'sun'
+SCHEDULE_HOUR       = 8
+SCHEDULE_MINUTE     = 0
+# ---------------------------------------------------------------------------
+# Ranking & Display
+# ---------------------------------------------------------------------------
+TOP_N_PER_CATEGORY  = 5         # papers to surface in each digest card
+CITATION_NORM       = 50        # citation_score = min(citations / CITATION_NORM, 1.0)
+RECENCY_BONUS       = 0.2       # added to papers < 3 days old
+# Default composite weights (user-adjustable in settings)
+WEIGHT_RELEVANCE    = 0.60
+WEIGHT_CITATION     = 0.30
+WEIGHT_RECENCY      = 0.10
+# ---------------------------------------------------------------------------
+# Database
+# ---------------------------------------------------------------------------
+DB_VERSION          = 2         # increment on schema change (added summary_llm)
+DB_PATH             = os.getenv('RESEARCHRADAR_DB_PATH', '')  # resolved at runtime
+# ---------------------------------------------------------------------------
+# Category Mapping
+# ---------------------------------------------------------------------------
+ARXIV_CATEGORY_MAP = {
+    'ml':           ['cs.LG', 'stat.ML'],
+    'ai':           ['cs.AI', 'cs.CL', 'cs.CV'],
+    'cs':           ['cs.SE', 'cs.PL', 'cs.DS', 'cs.AR'],
+    'neuroscience': ['q-bio.NC'],
+    'bci':          ['eess.SP', 'cs.HC'],
+}
+CATEGORY_LABELS = {
+    'ml':           'Machine Learning',
+    'ai':           'Artificial Intelligence',
+    'cs':           'Computer Science',
+    'neuroscience': 'Neuroscience',
+    'bci':          'Brain-Computer Interface',
+}
+# Keyword map used by Semantic Scholar fallback searches
+KEYWORD_MAP = {
+    'ml':           ['machine learning', 'deep learning', 'neural network'],
+    'ai':           ['artificial intelligence', 'natural language processing',
+                     'computer vision', 'reinforcement learning','Transformers'],
+    'cs':           ['software engineering', 'programming languages',
+                     'data structures', 'algorithms'],
+    'neuroscience': ['neuroscience', 'synaptic plasticity', 'cortex',
+                     'neural circuits',"speech recognition","autism",'dementia','alzheimer','parkinson'],
+    'bci':          ['brain computer interface', 'EEG', 'neural decoding',
+                     'neuroprosthetics'],
+}
+# PubMed MeSH terms for supplemental queries
+PUBMED_MESH_MAP = {
+    'neuroscience': 'Neurosciences[MeSH]',
+    'bci':          'Brain-Computer Interfaces[MeSH]',
+}
+# ---------------------------------------------------------------------------
+# Groq (LLM Summarization)
+# ---------------------------------------------------------------------------
+GROQ_API_KEY      = os.getenv('GROQ_API_KEY', '')
+GROQ_BASE_URL     = 'https://api.groq.com/openai/v1/chat/completions'
+GROQ_MODEL        = 'llama-3.1-8b-instant'
+# Rate Limits (llama-3.1-8b-instant)
+GROQ_RPM          = 30  # 1 request / 2 seconds
+GROQ_TPM          = 6000
+GROQ_DELAY        = 2.1  # seconds between requests to be safe
+# ---------------------------------------------------------------------------
+# Filtering
+# ---------------------------------------------------------------------------
+# Neuro/BCI papers MUST have these keywords to be included
+AI_FILTERS = [
+    'ai', 'machine learning', 'neural network', 'deep learning',
+    'reinforcement learning', 'transformer', 'algorithm', 'artificial intelligence',
+    'decoder', 'encoder', 'brain computer interface', 'classifier'
+]
+# ---------------------------------------------------------------------------
+# Optional API Keys (never required)
+# ---------------------------------------------------------------------------
+SEMANTIC_SCHOLAR_API_KEY = os.getenv('SEMANTIC_SCHOLAR_API_KEY', '')
+NCBI_API_KEY             = os.getenv('NCBI_API_KEY', '')
+# ---------------------------------------------------------------------------
+# User-Agent — required by arXiv fair-use policy
+# ---------------------------------------------------------------------------
+USER_AGENT = 'ResearchRadar/1.0 (contact: app@example.com)'

app/core/database.py ADDED Viewed

	@@ -0,0 +1,359 @@

+"""
+ResearchRadar — SQLite wrapper with migrations.
+All write operations use parameterised queries exclusively.
+Never format SQL strings with user or API data.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import sqlite3
+import time
+from datetime import date, datetime
+from typing import List, Optional
+from app.core.config import DB_VERSION
+from app.core.models import Digest, Paper
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Schema DDL (Version 1)
+# ---------------------------------------------------------------------------
+_SCHEMA_V1 = """
+CREATE TABLE IF NOT EXISTS meta (
+    key   TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
+CREATE TABLE IF NOT EXISTS papers (
+    paper_id        TEXT PRIMARY KEY,
+    source          TEXT NOT NULL,
+    title           TEXT NOT NULL,
+    abstract        TEXT NOT NULL,
+    summary_llm     TEXT,
+    authors         TEXT NOT NULL,
+    published_date  TEXT NOT NULL,
+    categories      TEXT NOT NULL,
+    app_category    TEXT NOT NULL,
+    pdf_url         TEXT,
+    abstract_url    TEXT NOT NULL,
+    citation_count  INTEGER DEFAULT 0,
+    relevance_score REAL    DEFAULT 0.0,
+    composite_score REAL    DEFAULT 0.0,
+    fetched_at      TEXT NOT NULL,
+    is_bookmarked   INTEGER DEFAULT 0,
+    is_read         INTEGER DEFAULT 0
+);
+CREATE TABLE IF NOT EXISTS digests (
+    digest_id     TEXT PRIMARY KEY,
+    week_start    TEXT NOT NULL,
+    generated_at  TEXT NOT NULL,
+    total_fetched INTEGER,
+    total_ranked  INTEGER,
+    fetch_errors  TEXT
+);
+CREATE TABLE IF NOT EXISTS digest_papers (
+    digest_id  TEXT NOT NULL,
+    paper_id   TEXT NOT NULL,
+    rank_order INTEGER NOT NULL,
+    PRIMARY KEY (digest_id, paper_id),
+    FOREIGN KEY (digest_id) REFERENCES digests(digest_id),
+    FOREIGN KEY (paper_id)  REFERENCES papers(paper_id)
+);
+"""
+# ---------------------------------------------------------------------------
+# Connection
+# ---------------------------------------------------------------------------
+_DB_RETRY_MAX = 3
+_DB_RETRY_SLEEP = 0.5
+def get_connection(db_path: str) -> sqlite3.Connection:
+    """Return a connection with row_factory and WAL mode enabled."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    conn.execute('PRAGMA journal_mode=WAL')
+    conn.execute('PRAGMA foreign_keys=ON')
+    return conn
+def _retry_on_locked(func):
+    """Decorator: retry up to _DB_RETRY_MAX times on 'database is locked'."""
+    def wrapper(*args, **kwargs):
+        for attempt in range(_DB_RETRY_MAX):
+            try:
+                return func(*args, **kwargs)
+            except sqlite3.OperationalError as exc:
+                if 'database is locked' in str(exc) and attempt < _DB_RETRY_MAX - 1:
+                    logger.warning('DB locked — retrying (%d/%d)', attempt + 1, _DB_RETRY_MAX)
+                    time.sleep(_DB_RETRY_SLEEP)
+                else:
+                    raise
+    return wrapper
+# ---------------------------------------------------------------------------
+# Initialisation & Migrations
+# ---------------------------------------------------------------------------
+def initialize(db_path: str) -> None:
+    """Create tables and run any pending migrations."""
+    conn = get_connection(db_path)
+    try:
+        conn.executescript(_SCHEMA_V1)
+        # Set version if not present
+        row = conn.execute(
+            "SELECT value FROM meta WHERE key = 'db_version'"
+        ).fetchone()
+        if row is None:
+            conn.execute(
+                "INSERT INTO meta (key, value) VALUES ('db_version', ?)",
+                (str(DB_VERSION),),
+            )
+        else:
+            current = int(row['value'])
+            if current < DB_VERSION:
+                run_migrations(conn, current, DB_VERSION)
+        conn.commit()
+    finally:
+        conn.close()
+def run_migrations(conn: sqlite3.Connection, current: int, target: int) -> None:
+    """Apply sequential migrations from *current* to *target* version."""
+    logger.info('Migrating DB from v%d to v%d', current, target)
+    if current < 2:
+        try:
+            conn.execute("ALTER TABLE papers ADD COLUMN summary_llm TEXT")
+            logger.info('V2 Migration: Added summary_llm column to papers table.')
+        except sqlite3.OperationalError as e:
+            if 'duplicate column name' in str(e).lower():
+                pass # Already exists
+            else:
+                raise
+    conn.execute(
+        "UPDATE meta SET value = ? WHERE key = 'db_version'",
+        (str(target),),
+    )
+# ---------------------------------------------------------------------------
+# Paper helpers
+# ---------------------------------------------------------------------------
+def _paper_to_row(paper: Paper) -> tuple:
+    return (
+        paper.paper_id,
+        paper.source,
+        paper.title,
+        paper.abstract,
+        paper.summary_llm,
+        json.dumps(paper.authors),
+        paper.published_date.isoformat(),
+        json.dumps(paper.categories),
+        paper.app_category,
+        paper.pdf_url,
+        paper.abstract_url,
+        paper.citation_count,
+        paper.relevance_score,
+        paper.composite_score,
+        paper.fetched_at.isoformat(),
+        int(paper.is_bookmarked),
+        int(paper.is_read),
+    )
+def _row_to_paper(row: sqlite3.Row) -> Paper:
+    return Paper(
+        paper_id=row['paper_id'],
+        source=row['source'],
+        title=row['title'],
+        abstract=row['abstract'],
+        summary_llm=row['summary_llm'],
+        authors=json.loads(row['authors']),
+        published_date=date.fromisoformat(row['published_date']),
+        categories=json.loads(row['categories']),
+        app_category=row['app_category'],
+        pdf_url=row['pdf_url'],
+        abstract_url=row['abstract_url'],
+        citation_count=row['citation_count'],
+        relevance_score=row['relevance_score'],
+        composite_score=row['composite_score'],
+        fetched_at=datetime.fromisoformat(row['fetched_at']),
+        is_bookmarked=bool(row['is_bookmarked']),
+        is_read=bool(row['is_read']),
+    )
+# ---------------------------------------------------------------------------
+# CRUD Operations
+# ---------------------------------------------------------------------------
+@_retry_on_locked
+def save_digest(db_path: str, digest: Digest) -> None:
+    """Transactional insert of a digest + all its papers."""
+    conn = get_connection(db_path)
+    try:
+        conn.execute('BEGIN')
+        # Insert digest record
+        conn.execute(
+            """INSERT OR REPLACE INTO digests
+               (digest_id, week_start, generated_at, total_fetched,
+                total_ranked, fetch_errors)
+               VALUES (?, ?, ?, ?, ?, ?)""",
+            (
+                digest.digest_id,
+                digest.week_start.isoformat(),
+                digest.generated_at.isoformat(),
+                digest.total_fetched,
+                digest.total_ranked,
+                json.dumps(digest.fetch_errors),
+            ),
+        )
+        # Insert papers and link to digest
+        rank = 0
+        for category, papers in digest.papers.items():
+            for paper in papers:
+                conn.execute(
+                    """INSERT OR REPLACE INTO papers
+                       (paper_id, source, title, abstract, summary_llm, authors,
+                        published_date, categories, app_category, pdf_url,
+                        abstract_url, citation_count, relevance_score,
+                        composite_score, fetched_at, is_bookmarked, is_read)
+                       VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
+                    _paper_to_row(paper),
+                )
+                rank += 1
+                conn.execute(
+                    """INSERT OR REPLACE INTO digest_papers
+                       (digest_id, paper_id, rank_order) VALUES (?, ?, ?)""",
+                    (digest.digest_id, paper.paper_id, rank),
+                )
+        conn.commit()
+        logger.info('Saved digest %s with %d papers', digest.digest_id, rank)
+    except Exception:
+        conn.rollback()
+        logger.exception('Failed to save digest — rolled back')
+        raise
+    finally:
+        conn.close()
+@_retry_on_locked
+def get_latest_digest(db_path: str) -> Optional[Digest]:
+    """Load the most recent digest."""
+    conn = get_connection(db_path)
+    try:
+        row = conn.execute(
+            'SELECT * FROM digests ORDER BY generated_at DESC LIMIT 1'
+        ).fetchone()
+        if row is None:
+            return None
+        digest = Digest(
+            digest_id=row['digest_id'],
+            week_start=date.fromisoformat(row['week_start']),
+            generated_at=datetime.fromisoformat(row['generated_at']),
+            total_fetched=row['total_fetched'],
+            total_ranked=row['total_ranked'],
+            fetch_errors=json.loads(row['fetch_errors'] or '[]'),
+        )
+        # Load papers linked to this digest
+        paper_rows = conn.execute(
+            """SELECT p.* FROM papers p
+               INNER JOIN digest_papers dp ON p.paper_id = dp.paper_id
+               WHERE dp.digest_id = ?
+               ORDER BY dp.rank_order""",
+            (digest.digest_id,),
+        ).fetchall()
+        papers_by_cat: dict = {}
+        for pr in paper_rows:
+            paper = _row_to_paper(pr)
+            papers_by_cat.setdefault(paper.app_category, []).append(paper)
+        digest.papers = papers_by_cat
+        return digest
+    finally:
+        conn.close()
+@_retry_on_locked
+def get_papers(db_path: str, category: str, limit: int = 10) -> List[Paper]:
+    """Get papers for a category, ordered by composite score."""
+    conn = get_connection(db_path)
+    try:
+        rows = conn.execute(
+            """SELECT * FROM papers
+               WHERE app_category = ?
+               ORDER BY composite_score DESC
+               LIMIT ?""",
+            (category, limit),
+        ).fetchall()
+        return [_row_to_paper(r) for r in rows]
+    finally:
+        conn.close()
+@_retry_on_locked
+def toggle_bookmark(db_path: str, paper_id: str) -> bool:
+    """Toggle bookmark state; returns the new state."""
+    conn = get_connection(db_path)
+    try:
+        conn.execute(
+            """UPDATE papers
+               SET is_bookmarked = CASE WHEN is_bookmarked = 0 THEN 1 ELSE 0 END
+               WHERE paper_id = ?""",
+            (paper_id,),
+        )
+        conn.commit()
+        row = conn.execute(
+            'SELECT is_bookmarked FROM papers WHERE paper_id = ?',
+            (paper_id,),
+        ).fetchone()
+        return bool(row['is_bookmarked']) if row else False
+    finally:
+        conn.close()
+@_retry_on_locked
+def mark_read(db_path: str, paper_id: str) -> None:
+    """Mark a paper as read."""
+    conn = get_connection(db_path)
+    try:
+        conn.execute(
+            'UPDATE papers SET is_read = 1 WHERE paper_id = ?',
+            (paper_id,),
+        )
+        conn.commit()
+    finally:
+        conn.close()
+@_retry_on_locked
+def get_bookmarked_papers(db_path: str) -> List[Paper]:
+    """Return all bookmarked papers ordered by composite score."""
+    conn = get_connection(db_path)
+    try:
+        rows = conn.execute(
+            """SELECT * FROM papers
+               WHERE is_bookmarked = 1
+               ORDER BY composite_score DESC"""
+        ).fetchall()
+        return [_row_to_paper(r) for r in rows]
+    finally:
+        conn.close()

app/core/models.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+ResearchRadar — Pure data models.
+All models are standard Python dataclasses with no external dependencies,
+making them fully testable in isolation.
+"""
+from __future__ import annotations
+import uuid
+from dataclasses import dataclass, field
+from datetime import date, datetime
+from typing import Dict, List, Optional
+@dataclass
+class Paper:
+    """A single research paper from any source."""
+    paper_id:        str                    # arXiv ID or PubMed PMID — primary key
+    source:          str                    # 'arxiv' | 'semantic_scholar' | 'pubmed'
+    title:           str
+    abstract:        str
+    authors:         List[str]
+    published_date:  date                   # UTC
+    categories:      List[str]              # e.g. ['cs.LG', 'stat.ML']
+    app_category:    str                    # mapped app category slug
+    summary_llm:     Optional[str] = None   # Brief summary (Idea, Method, Results) via Groq
+    pdf_url:         Optional[str] = None   # direct PDF link if available
+    abstract_url:    str = ''               # canonical web page
+    citation_count:  int   = 0
+    relevance_score: float = 0.0            # set by ranker
+    composite_score: float = 0.0            # set by ranker
+    fetched_at:      datetime = field(default_factory=datetime.utcnow)
+    is_bookmarked:   bool = False
+    is_read:         bool = False
+@dataclass
+class Digest:
+    """A weekly digest containing ranked papers per category."""
+    digest_id:     str                              # UUID4 hex
+    week_start:    date                             # Monday of the fetched week (ISO)
+    generated_at:  datetime
+    papers:        Dict[str, List[Paper]] = field(default_factory=dict)
+    total_fetched: int = 0
+    total_ranked:  int = 0
+    fetch_errors:  List[str] = field(default_factory=list)
+    @classmethod
+    def create_new(cls) -> 'Digest':
+        """Factory: create a fresh Digest for this week."""
+        today = datetime.utcnow()
+        # ISO week starts Monday (weekday 0)
+        monday = today.date()
+        weekday = monday.weekday()
+        monday = monday.__class__.fromordinal(monday.toordinal() - weekday)
+        return cls(
+            digest_id=uuid.uuid4().hex,
+            week_start=monday,
+            generated_at=today,
+        )
+@dataclass
+class UserProfile:
+    """User interest profile used by the ranker."""
+    interests: Dict[str, str] = field(default_factory=lambda: {
+        'ml':           'deep learning transformers attention',
+        'ai':           'artificial intelligence language models',
+        'cs':           'software engineering algorithms',
+        'neuroscience': 'synaptic plasticity cortex neurons',
+        'bci':          'brain computer interface EEG decoding',
+    })
+    weight_relevance: float = 0.60
+    weight_citation:  float = 0.30
+    weight_recency:   float = 0.10
+    top_n_per_category: int = 5

app/core/notifier.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+ResearchRadar — Notification wrapper.
+Primary: Telegram Bot notifications (works on any phone).
+Fallback: plyer local notifications (desktop / Kivy builds).
+"""
+from __future__ import annotations
+import logging
+import os
+from typing import Optional
+from app.core.models import Digest
+logger = logging.getLogger(__name__)
+def send_digest_notification(digest: Digest, data_dir: str = '') -> None:
+    """
+    Send a notification about the latest digest.
+    Tries Telegram first (phone notifications), then falls back to plyer.
+    """
+    # Try Telegram first
+    if data_dir:
+        try:
+            from app.core.telegram_bot import send_digest_notification as tg_send
+            if tg_send(digest, data_dir):
+                return  # Telegram succeeded
+        except ImportError:
+            pass
+        except Exception:
+            logger.debug('Telegram notification failed', exc_info=True)
+    # Fallback: plyer local notification
+    _send_plyer_notification(digest)
+def _send_plyer_notification(digest: Digest) -> None:
+    """Send a local notification via plyer (desktop / mobile Kivy)."""
+    try:
+        from plyer import notification
+    except ImportError:
+        logger.info('plyer not installed — skipping notification')
+        return
+    lines = []
+    top_title = ''
+    for cat, papers in digest.papers.items():
+        count = len(papers)
+        label = cat.replace('_', ' ').title()
+        lines.append(f'{label}: {count} paper{"s" if count != 1 else ""}')
+        if papers and not top_title:
+            top_title = papers[0].title
+    if not lines:
+        lines.append('No new papers this week.')
+    message = '\n'.join(lines)
+    if top_title:
+        if len(top_title) > 80:
+            top_title = top_title[:77] + '...'
+        message += f'\n\n📄 {top_title}'
+    try:
+        notification.notify(
+            title='ResearchRadar — New Papers!',
+            message=message,
+            app_name='ResearchRadar',
+            timeout=10,
+        )
+        logger.info('Notification sent for digest %s', digest.digest_id)
+    except NotImplementedError:
+        logger.warning('Notifications not supported on this platform')
+    except Exception:
+        logger.warning('Notification failed', exc_info=True)

app/core/scheduler.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""
+ResearchRadar — Job scheduling.
+Uses APScheduler with CronTrigger for the weekly fetch job.
+On Android, uses AlarmManager via pyjnius to wake the app if backgrounded.
+"""
+from __future__ import annotations
+import logging
+from typing import Callable, Optional
+from app.core.config import SCHEDULE_DAY, SCHEDULE_HOUR, SCHEDULE_MINUTE
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# APScheduler setup
+# ---------------------------------------------------------------------------
+def setup_scheduler(
+    db_path: str,
+    fetch_callback: Optional[Callable] = None,
+) -> object:
+    """
+    Initialise and start the APScheduler BackgroundScheduler.
+    - CronTrigger: every Sunday at 08:00 local time.
+    - misfire_grace_time: 3600s (fires within 1 hour of missed time).
+    - max_instances: 1 (prevent overlapping fetch jobs).
+    """
+    try:
+        from apscheduler.schedulers.background import BackgroundScheduler
+        from apscheduler.triggers.cron import CronTrigger
+    except ImportError:
+        logger.warning('APScheduler not installed — scheduler disabled')
+        return None
+    if fetch_callback is None:
+        from app.fetcher.fetch_pipeline import run_weekly_fetch
+        def _default_callback():
+            run_weekly_fetch(db_path)
+        fetch_callback = _default_callback
+    scheduler = BackgroundScheduler()
+    # Try to use SQLAlchemy job store for persistence
+    try:
+        from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
+        jobstore = SQLAlchemyJobStore(url=f'sqlite:///{db_path}')
+        scheduler.add_jobstore(jobstore, 'default')
+    except ImportError:
+        logger.info('SQLAlchemy not available — using memory job store')
+    scheduler.add_job(
+        fetch_callback,
+        CronTrigger(
+            day_of_week=SCHEDULE_DAY,
+            hour=SCHEDULE_HOUR,
+            minute=SCHEDULE_MINUTE,
+        ),
+        id='weekly_fetch',
+        name='Weekly Paper Fetch',
+        misfire_grace_time=3600,
+        max_instances=1,
+        replace_existing=True,
+    )
+    try:
+        scheduler.start()
+        logger.info(
+            'Scheduler started — next fetch: %s %02d:%02d',
+            SCHEDULE_DAY.upper(), SCHEDULE_HOUR, SCHEDULE_MINUTE,
+        )
+    except Exception as exc:
+        # SchedulerAlreadyRunningError or other — log and continue
+        logger.warning('Scheduler start issue (non-fatal): %s', exc)
+    return scheduler
+# ---------------------------------------------------------------------------
+# Android AlarmManager integration (Android-only)
+# ---------------------------------------------------------------------------
+def setup_android_alarm() -> None:
+    """
+    Set a repeating alarm via Android's AlarmManager to wake the app
+    every Sunday at 08:00.
+    Only called on Android. Guarded by platform check in main.py.
+    """
+    try:
+        from jnius import autoclass
+        Context = autoclass('android.content.Context')
+        Intent = autoclass('android.content.Intent')
+        PendingIntent = autoclass('android.app.PendingIntent')
+        AlarmManager = autoclass('android.app.AlarmManager')
+        Calendar = autoclass('java.util.Calendar')
+        from android import mActivity  # type: ignore[import]
+        context = mActivity.getApplicationContext()
+        alarm_mgr = context.getSystemService(Context.ALARM_SERVICE)
+        intent = Intent(context, mActivity.getClass())
+        pending = PendingIntent.getActivity(
+            context, 0, intent,
+            PendingIntent.FLAG_UPDATE_CURRENT | PendingIntent.FLAG_IMMUTABLE,
+        )
+        # Set weekly repeating alarm
+        cal = Calendar.getInstance()
+        cal.set(Calendar.DAY_OF_WEEK, Calendar.SUNDAY)
+        cal.set(Calendar.HOUR_OF_DAY, SCHEDULE_HOUR)
+        cal.set(Calendar.MINUTE, SCHEDULE_MINUTE)
+        cal.set(Calendar.SECOND, 0)
+        interval_week = 7 * 24 * 60 * 60 * 1000  # ms
+        alarm_mgr.setExactAndAllowWhileIdle(
+            AlarmManager.RTC_WAKEUP,
+            cal.getTimeInMillis(),
+            pending,
+        )
+        logger.info('Android AlarmManager set for Sunday %02d:%02d',
+                     SCHEDULE_HOUR, SCHEDULE_MINUTE)
+    except ImportError:
+        logger.debug('pyjnius not available — not on Android')
+    except Exception:
+        logger.warning('Failed to set Android alarm', exc_info=True)

app/core/telegram_bot.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+ResearchRadar — Telegram Bot notification system.
+Sends formatted paper digests to the user's Telegram chat.
+Replaces plyer notifications for phone delivery.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+from typing import Dict, List, Optional
+import requests
+from app.core.models import Digest, Paper
+from app.core.config import CATEGORY_LABELS
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+_CONFIG_KEYS = ('telegram_bot_token', 'telegram_chat_id')
+def _load_telegram_config(data_dir: str) -> dict:
+    """Load Telegram config from settings.json."""
+    path = os.path.join(data_dir, 'settings.json')
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {}
+def _get_credentials(data_dir: str) -> tuple:
+    """
+    Get bot token and chat ID from settings or environment variables.
+    Priority: env vars > settings.json
+    """
+    config = _load_telegram_config(data_dir)
+    token = (
+        os.getenv('TELEGRAM_BOT_TOKEN')
+        or config.get('telegram_bot_token', '')
+    )
+    chat_id = (
+        os.getenv('TELEGRAM_CHAT_ID')
+        or config.get('telegram_chat_id', '')
+    )
+    return token, chat_id
+# ---------------------------------------------------------------------------
+# Message formatting
+# ---------------------------------------------------------------------------
+def _format_paper(rank: int, paper: Paper) -> str:
+    """Format a single paper as a Telegram message block."""
+    # Authors (first author + et al.)
+    if paper.authors:
+        if len(paper.authors) > 2:
+            authors = f"{paper.authors[0]} et al."
+        else:
+            authors = ", ".join(paper.authors)
+    else:
+        authors = "Unknown"
+    # Score badge
+    score = f"{paper.composite_score:.2f}"
+    lines = [
+        f"*{rank}.* [{paper.title}]({paper.abstract_url})",
+        f"   👤 _{authors}_",
+        f"   📅 {paper.published_date.isoformat()}  •  📊 Score: {score}  •  📝 Citations: {paper.citation_count}",
+    ]
+    # LLM Summary (Structured)
+    if paper.summary_llm:
+        lines.append("")
+        lines.append(f"🤖 *AI Summary:*")
+        # Indent the summary for readability
+        for slink in paper.summary_llm.split('\n'):
+            if slink.strip():
+                lines.append(f"   _{slink.strip()}_")
+    if paper.pdf_url:
+        lines.append("")
+        lines.append(f"   📄 [PDF]({paper.pdf_url})")
+    return "\n".join(lines)
+def format_digest_message(digest: Digest) -> str:
+    """Format a full digest as a Telegram-ready Markdown message."""
+    lines = [
+        "📡 *ResearchRadar — Daily Paper Digest*",
+        f"📅 Week of {digest.week_start.isoformat()}",
+        f"🕐 Generated: {digest.generated_at.strftime('%Y-%m-%d %H:%M UTC')}",
+        "",
+    ]
+    total_papers = 0
+    for cat_slug, papers in digest.papers.items():
+        if not papers:
+            continue
+        cat_name = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
+        total_papers += len(papers)
+        lines.append(f"━━━━━━━━━━━━━━━━━━━━")
+        lines.append(f"🔬 *{cat_name}* ({len(papers)} papers)")
+        lines.append("")
+        for i, paper in enumerate(papers, 1):
+            lines.append(_format_paper(i, paper))
+            lines.append("")
+    if total_papers == 0:
+        lines.append("_No new papers found this cycle. Check back tomorrow!_")
+    # Summary footer
+    lines.append("━━━━━━━━━━━━━━━━━━━━")
+    lines.append(
+        f"📊 *Summary:* {digest.total_fetched} fetched → "
+        f"{digest.total_ranked} ranked → {total_papers} delivered"
+    )
+    if digest.fetch_errors:
+        lines.append(f"⚠️ {len(digest.fetch_errors)} non-fatal errors logged")
+    return "\n".join(lines)
+def format_short_notification(digest: Digest) -> str:
+    """Format a short notification summary."""
+    counts = []
+    for cat_slug, papers in digest.papers.items():
+        if papers:
+            label = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
+            counts.append(f"{label}: {len(papers)}")
+    if not counts:
+        return "📡 ResearchRadar: No new papers found today."
+    summary = " | ".join(counts)
+    total = sum(len(p) for p in digest.papers.values())
+    return f"📡 *ResearchRadar* — {total} new papers!\n{summary}"
+# ---------------------------------------------------------------------------
+# Sending
+# ---------------------------------------------------------------------------
+def send_message(
+    token: str,
+    chat_id: str,
+    text: str,
+    parse_mode: str = 'Markdown',
+    disable_preview: bool = True,
+) -> bool:
+    """
+    Send a message via Telegram Bot API.
+    Returns True on success, False on failure (never raises).
+    """
+    url = f"https://api.telegram.org/bot{token}/sendMessage"
+    # Telegram has a 4096 char limit per message
+    if len(text) > 4000:
+        return _send_chunked(token, chat_id, text, parse_mode, disable_preview)
+    try:
+        resp = requests.post(
+            url,
+            json={
+                'chat_id': chat_id,
+                'text': text,
+                'parse_mode': parse_mode,
+                'disable_web_page_preview': disable_preview,
+            },
+            timeout=15,
+        )
+        if resp.status_code == 200:
+            data = resp.json()
+            if data.get('ok'):
+                logger.info('Telegram message sent to chat %s', chat_id)
+                return True
+            else:
+                logger.error('Telegram API error: %s', data.get('description'))
+                return False
+        else:
+            logger.error('Telegram HTTP %d: %s', resp.status_code, resp.text[:200])
+            return False
+    except requests.exceptions.RequestException as exc:
+        logger.error('Telegram send failed: %s', exc)
+        return False
+def _send_chunked(
+    token: str,
+    chat_id: str,
+    text: str,
+    parse_mode: str,
+    disable_preview: bool,
+) -> bool:
+    """Split long messages at section boundaries and send sequentially."""
+    chunks = []
+    current = ""
+    for line in text.split("\n"):
+        if len(current) + len(line) + 1 > 3800 and current:
+            chunks.append(current)
+            current = line
+        else:
+            current = current + "\n" + line if current else line
+    if current:
+        chunks.append(current)
+    success = True
+    for i, chunk in enumerate(chunks):
+        if i > 0:
+            import time
+            time.sleep(0.5)  # Rate limiting courtesy
+        ok = send_message(token, chat_id, chunk, parse_mode, disable_preview)
+        if not ok:
+            success = False
+    return success
+# ---------------------------------------------------------------------------
+# High-level API
+# ---------------------------------------------------------------------------
+def send_digest_notification(digest: Digest, data_dir: str) -> bool:
+    """
+    Send the full digest to Telegram.
+    Reads credentials from env vars or settings.json.
+    Returns True on success, False on failure (never raises).
+    """
+    token, chat_id = _get_credentials(data_dir)
+    if not token or not chat_id:
+        logger.warning(
+            'Telegram not configured — set TELEGRAM_BOT_TOKEN and '
+            'TELEGRAM_CHAT_ID in environment or settings.json'
+        )
+        return False
+    # Send short notification first
+    short = format_short_notification(digest)
+    send_message(token, chat_id, short)
+    # Then send the full digest
+    full = format_digest_message(digest)
+    return send_message(token, chat_id, full)
+def send_test_message(data_dir: str) -> bool:
+    """Send a test message to verify Telegram setup."""
+    token, chat_id = _get_credentials(data_dir)
+    if not token or not chat_id:
+        print("❌ Telegram not configured!")
+        print("   Set TELEGRAM_BOT_TOKEN and TELEGRAM_CHAT_ID in settings.json")
+        print("   or as environment variables.")
+        return False
+    text = (
+        "✅ *ResearchRadar — Test Message*\n\n"
+        "Your Telegram notifications are working!\n"
+        "You'll receive daily paper digests at your configured time."
+    )
+    success = send_message(token, chat_id, text)
+    if success:
+        print("✅ Test message sent! Check your Telegram.")
+    else:
+        print("❌ Failed to send test message. Check your bot token and chat ID.")
+    return success

app/fetcher/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Data acquisition layer

app/fetcher/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (147 Bytes). View file

app/fetcher/__pycache__/arxiv_client.cpython-312.pyc ADDED Viewed

Binary file (6.43 kB). View file

app/fetcher/__pycache__/fetch_pipeline.cpython-312.pyc ADDED Viewed

Binary file (9.61 kB). View file

app/fetcher/__pycache__/http_session.cpython-312.pyc ADDED Viewed

Binary file (8.26 kB). View file

app/fetcher/__pycache__/pubmed_client.cpython-312.pyc ADDED Viewed

Binary file (7.59 kB). View file

app/fetcher/__pycache__/semantic_scholar.cpython-312.pyc ADDED Viewed

Binary file (6.79 kB). View file

app/fetcher/arxiv_client.py ADDED Viewed

	@@ -0,0 +1,159 @@

+"""
+ResearchRadar — arXiv Atom API client.
+Fetches papers submitted/updated within the last N days for given arXiv
+categories.  Uses xml.etree.ElementTree (stdlib) — no lxml needed.
+"""
+from __future__ import annotations
+import logging
+import xml.etree.ElementTree as ET
+from datetime import date, datetime, timedelta
+from typing import List
+from app.core.config import ARXIV_BASE_URL, ARXIV_MAX_RESULTS
+from app.core.models import Paper
+from app.fetcher.http_session import FetchError, RetrySession
+logger = logging.getLogger(__name__)
+# arXiv Atom namespace
+_NS = {
+    'atom': 'http://www.w3.org/2005/Atom',
+    'arxiv': 'http://arxiv.org/schemas/atom',
+}
+def fetch_papers(
+    category_slug: str,
+    arxiv_cats: List[str],
+    session: RetrySession,
+    days_back: int = 7,
+) -> List[Paper]:
+    """
+    Fetch papers submitted/updated within *days_back* days across all
+    arXiv categories in *arxiv_cats*.
+    Returns a list of Paper instances.  Never raises — returns [] on error.
+    """
+    today = date.today()
+    start = today - timedelta(days=days_back)
+    end = today
+    query = '(' + ' OR '.join(f'cat:{c}' for c in arxiv_cats) + ')'
+    params = {
+        'search_query': query,
+        'start': 0,
+        'max_results': ARXIV_MAX_RESULTS,
+        'sortBy': 'submittedDate',
+        'sortOrder': 'descending',
+    }
+    try:
+        response = session.get(ARXIV_BASE_URL, params=params)
+    except FetchError as exc:
+        logger.error('arXiv fetch failed for %s: %s', category_slug, exc)
+        return []
+    try:
+        root = ET.fromstring(response.text)
+    except ET.ParseError as exc:
+        logger.error(
+            'arXiv XML parse error: %s — snippet: %s',
+            exc, response.text[:300],
+        )
+        return []
+    papers: List[Paper] = []
+    for entry in root.findall('atom:entry', _NS):
+        try:
+            paper = _parse_entry(entry, category_slug, start, end)
+            if paper is not None:
+                papers.append(paper)
+        except Exception:
+            logger.debug('Skipping malformed arXiv entry', exc_info=True)
+    logger.info(
+        'arXiv: fetched %d papers for [%s] (%s)',
+        len(papers), category_slug, ', '.join(arxiv_cats),
+    )
+    return papers
+def _parse_entry(
+    entry: ET.Element,
+    category_slug: str,
+    start: date,
+    end: date,
+) -> Paper | None:
+    """Parse a single <entry> element into a Paper, or return None."""
+    title_el = entry.find('atom:title', _NS)
+    abstract_el = entry.find('atom:summary', _NS)
+    if title_el is None or abstract_el is None:
+        return None
+    title = ' '.join((title_el.text or '').split())
+    abstract = ' '.join((abstract_el.text or '').split())
+    if not title or not abstract:
+        logger.debug('Skipping entry with empty title/abstract')
+        return None
+    # arXiv ID
+    id_el = entry.find('atom:id', _NS)
+    raw_id = (id_el.text or '') if id_el is not None else ''
+    arxiv_id = raw_id.replace('http://arxiv.org/abs/', '').strip()
+    if not arxiv_id:
+        return None
+    paper_id = f'arxiv:{arxiv_id}'
+    # Authors
+    authors = []
+    for author_el in entry.findall('atom:author', _NS):
+        name_el = author_el.find('atom:name', _NS)
+        if name_el is not None and name_el.text:
+            authors.append(name_el.text.strip())
+    # Published date
+    pub_el = entry.find('atom:published', _NS)
+    pub_text = (pub_el.text or '') if pub_el is not None else ''
+    try:
+        published = datetime.fromisoformat(
+            pub_text.replace('Z', '+00:00')
+        ).date()
+    except (ValueError, TypeError):
+        published = date.today()
+    # Categories
+    categories = []
+    for cat_el in entry.findall('atom:category', _NS):
+        term = cat_el.get('term', '')
+        if term:
+            categories.append(term)
+    # PDF link
+    pdf_url = None
+    for link_el in entry.findall('atom:link', _NS):
+        if link_el.get('title') == 'pdf':
+            pdf_url = link_el.get('href')
+            break
+    if pdf_url is None and arxiv_id:
+        pdf_url = f'https://arxiv.org/pdf/{arxiv_id}'
+    abstract_url = f'https://arxiv.org/abs/{arxiv_id}'
+    return Paper(
+        paper_id=paper_id,
+        source='arxiv',
+        title=title,
+        abstract=abstract,
+        authors=authors,
+        published_date=published,
+        categories=categories,
+        app_category=category_slug,
+        pdf_url=pdf_url,
+        abstract_url=abstract_url,
+    )

app/fetcher/crossref_client.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+ResearchRadar — CrossRef DOI client.
+DOI resolution & citation metadata fallback.
+"""
+from __future__ import annotations
+import logging
+from datetime import date
+from typing import Optional
+from app.core.config import CROSSREF_BASE_URL
+from app.fetcher.http_session import FetchError, RetrySession
+logger = logging.getLogger(__name__)
+def get_citation_count(doi: str, session: RetrySession) -> Optional[int]:
+    """
+    Retrieve the 'is-referenced-by-count' from CrossRef for a given DOI.
+    Returns None on any error — this is best-effort enrichment.
+    """
+    url = f'{CROSSREF_BASE_URL}/{doi}'
+    try:
+        response = session.get(
+            url,
+            headers={'Accept': 'application/json'},
+        )
+        data = response.json()
+        message = data.get('message', {})
+        return message.get('is-referenced-by-count')
+    except (FetchError, ValueError, KeyError) as exc:
+        logger.debug('CrossRef lookup failed for DOI %s: %s', doi, exc)
+        return None
+def resolve_doi(doi: str, session: RetrySession) -> Optional[dict]:
+    """
+    Resolve a DOI and return basic metadata dict including title, authors.
+    """
+    url = f'{CROSSREF_BASE_URL}/{doi}'
+    try:
+        response = session.get(
+            url,
+            headers={'Accept': 'application/json'},
+        )
+        data = response.json()
+        msg = data.get('message', {})
+        title_parts = msg.get('title', [])
+        title = title_parts[0] if title_parts else ''
+        authors = []
+        for a in msg.get('author', []):
+            given = a.get('given', '')
+            family = a.get('family', '')
+            authors.append(f'{given} {family}'.strip())
+        return {
+            'doi': doi,
+            'title': title,
+            'authors': authors,
+            'citation_count': msg.get('is-referenced-by-count', 0),
+        }
+    except (FetchError, ValueError, KeyError) as exc:
+        logger.debug('CrossRef resolve failed for DOI %s: %s', doi, exc)
+        return None

app/fetcher/fetch_pipeline.py ADDED Viewed

	@@ -0,0 +1,225 @@

+"""
+ResearchRadar — Fetch pipeline orchestration.
+Contains the main Sunday job logic.  Coordinates all API clients,
+handles fallback, deduplication, ranking, storage, and notification.
+This function must **never raise** — all exceptions are caught and
+logged into ``Digest.fetch_errors``.
+"""
+from __future__ import annotations
+import logging
+from difflib import SequenceMatcher
+from typing import Dict, List
+from app.core.config import (
+    ARXIV_CATEGORY_MAP,
+    KEYWORD_MAP,
+    PUBMED_MESH_MAP,
+    TOP_N_PER_CATEGORY,
+    AI_FILTERS,
+)
+from app.core.models import Digest, Paper, UserProfile
+from app.core import database
+from app.fetcher import arxiv_client, pubmed_client, semantic_scholar
+from app.fetcher.http_session import FetchError, RetrySession
+from app.ranker import composite_ranker
+from app.summarizer.groq_client import GroqSummarizer
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+def run_weekly_fetch(
+    db_path: str,
+    profile: UserProfile | None = None,
+) -> Digest:
+    """
+    Main weekly pipeline.  Called by the scheduler every Sunday.
+    1. Fetch papers from arXiv (primary) with Semantic Scholar fallback.
+    2. For neuro/BCI categories, additionally fetch from PubMed and merge.
+    3. Enrich citation counts (best-effort).
+    4. Rank papers via composite ranker.
+    5. Save digest to DB and send notification.
+    6. Return the Digest.
+    """
+    if profile is None:
+        profile = UserProfile()
+    digest = Digest.create_new()
+    session = RetrySession()
+    all_papers: Dict[str, List[Paper]] = {}
+    for category, arxiv_cats in ARXIV_CATEGORY_MAP.items():
+        papers = _fetch_category(category, arxiv_cats, session, digest)
+        # PubMed supplement for neuroscience & BCI
+        if category in PUBMED_MESH_MAP:
+            pubmed_papers = _fetch_pubmed(category, session, digest)
+            papers = _deduplicate(papers + pubmed_papers)
+            # Enforce AI filter for neuro categories
+            # "I want only those papers in neuroscience and BCI which has in someway AI or ML"
+            papers = _ai_filter(papers)
+        all_papers[category] = papers
+    # Enrich citation counts (best-effort)
+    flat = [p for cat_list in all_papers.values() for p in cat_list]
+    try:
+        semantic_scholar.enrich_citations(flat, session)
+    except Exception as exc:
+        logger.warning('Citation enrichment failed: %s', exc)
+        digest.fetch_errors.append(f'Citation enrichment: {exc}')
+    # Rank
+    digest.total_fetched = sum(len(v) for v in all_papers.values())
+    ranked = composite_ranker.rank_all(all_papers, profile)
+    # After ranking, summarize the top papers for the digest
+    # (Only summarizes top N results that appear in the final ranked lists)
+    _summarize_top_papers(ranked)
+    digest.papers = ranked
+    digest.total_ranked = sum(len(v) for v in ranked.values())
+    # Persist
+    try:
+        database.save_digest(db_path, digest)
+    except Exception as exc:
+        logger.error('Failed to save digest: %s', exc)
+        digest.fetch_errors.append(f'DB save error: {exc}')
+    # Notification (best-effort)
+    try:
+        from app.core.notifier import send_digest_notification
+        send_digest_notification(digest)
+    except Exception as exc:
+        logger.warning('Notification failed: %s', exc)
+    return digest
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+def _fetch_category(
+    category: str,
+    arxiv_cats: list,
+    session: RetrySession,
+    digest: Digest,
+) -> List[Paper]:
+    """Fetch from arXiv, fall back to Semantic Scholar if empty / error."""
+    papers: List[Paper] = []
+    try:
+        papers = arxiv_client.fetch_papers(category, arxiv_cats, session)
+    except Exception as exc:
+        msg = f'arXiv error [{category}]: {exc}'
+        logger.warning(msg)
+        digest.fetch_errors.append(msg)
+    if not papers:
+        logger.info('arXiv empty for [%s] — trying Semantic Scholar', category)
+        try:
+            keywords = KEYWORD_MAP.get(category, [category])
+            papers = semantic_scholar.fetch_papers(category, keywords, session)
+        except Exception as exc:
+            msg = f'Semantic Scholar error [{category}]: {exc}'
+            logger.warning(msg)
+            digest.fetch_errors.append(msg)
+    if not papers:
+        logger.info('No papers found for [%s] from any source', category)
+    return papers
+def _fetch_pubmed(
+    category: str,
+    session: RetrySession,
+    digest: Digest,
+) -> List[Paper]:
+    """Fetch supplemental papers from PubMed."""
+    mesh = PUBMED_MESH_MAP.get(category, '')
+    if not mesh:
+        return []
+    try:
+        return pubmed_client.fetch_papers(category, mesh, session)
+    except Exception as exc:
+        msg = f'PubMed error [{category}]: {exc}'
+        logger.warning(msg)
+        digest.fetch_errors.append(msg)
+        return []
+def _summarize_top_papers(papers_by_cat: Dict[str, List[Paper]]):
+    """Call Groq to summarize papers in the final digest list."""
+    summarizer = GroqSummarizer()
+    for cat, papers in papers_by_cat.items():
+        if papers:
+            logger.info("Summarizing %d papers for category [%s]...", len(papers), cat)
+            summarizer.summarize_many(papers)
+def _ai_filter(papers: List[Paper]) -> List[Paper]:
+    """Filter to only include papers mentioning AI/ML keywords in title or abstract."""
+    if not papers:
+        return []
+    result = []
+    for p in papers:
+        text = (p.title + " " + p.abstract).lower()
+        if any(f in text for f in AI_FILTERS):
+            result.append(p)
+    return result
+def _deduplicate(papers: List[Paper]) -> List[Paper]:
+    """
+    Remove duplicate papers.
+    Two papers are considered duplicates if:
+    - Their paper_id matches, OR
+    - Their title similarity (SequenceMatcher ratio) > 0.92
+    When merging, prefer arXiv > Semantic Scholar > PubMed.
+    """
+    SOURCE_PRIORITY = {'arxiv': 0, 'semantic_scholar': 1, 'pubmed': 2}
+    seen_ids: set = set()
+    seen_titles: List[str] = []
+    result: List[Paper] = []
+    # Sort by source priority so preferred sources come first
+    papers.sort(key=lambda p: SOURCE_PRIORITY.get(p.source, 9))
+    for paper in papers:
+        if paper.paper_id in seen_ids:
+            continue
+        is_dup = False
+        for existing_title in seen_titles:
+            if SequenceMatcher(None, paper.title.lower(), existing_title).ratio() > 0.92:
+                is_dup = True
+                break
+        if is_dup:
+            continue
+        seen_ids.add(paper.paper_id)
+        seen_titles.append(paper.title.lower())
+        result.append(paper)
+    if len(papers) != len(result):
+        logger.info(
+            'Deduplication: %d → %d papers', len(papers), len(result),
+        )
+    return result

app/fetcher/http_session.py ADDED Viewed

	@@ -0,0 +1,223 @@

+"""
+ResearchRadar — RetrySession.
+Single point of contact for all outbound HTTP.
+No other module calls `requests` directly.
+"""
+from __future__ import annotations
+import logging
+import random
+import time
+from typing import Optional, Set
+import requests
+from app.core.config import (
+    HTTP_BACKOFF_BASE,
+    HTTP_BACKOFF_MAX,
+    HTTP_MAX_RETRIES,
+    HTTP_TIMEOUT,
+    RETRY_STATUS_CODES,
+    USER_AGENT,
+)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Custom Exceptions
+# ---------------------------------------------------------------------------
+class FetchError(Exception):
+    """Base exception for all fetch-related errors."""
+    pass
+class FetchTimeoutError(FetchError):
+    """Raised when a request times out."""
+    pass
+class FetchNetworkError(FetchError):
+    """Raised on connection / DNS errors."""
+    pass
+class SourceNotFoundError(FetchError):
+    """Raised on HTTP 404."""
+    pass
+class SourceAuthError(FetchError):
+    """Raised on HTTP 401 / 403."""
+    pass
+class MaxRetriesExceeded(FetchError):
+    """Raised when all retry attempts are exhausted."""
+    pass
+# ---------------------------------------------------------------------------
+# RetrySession
+# ---------------------------------------------------------------------------
+class RetrySession:
+    """HTTP GET wrapper with exponential back-off, retries, and error mapping."""
+    def __init__(
+        self,
+        max_retries: int = HTTP_MAX_RETRIES,
+        backoff_base: int = HTTP_BACKOFF_BASE,
+        backoff_max: int = HTTP_BACKOFF_MAX,
+        timeout: int = HTTP_TIMEOUT,
+        retry_status_codes: Optional[Set[int]] = None,
+    ):
+        self.max_retries = max_retries
+        self.backoff_base = backoff_base
+        self.backoff_max = backoff_max
+        self.timeout = timeout
+        self.retry_status_codes = retry_status_codes or RETRY_STATUS_CODES
+        self._session = requests.Session()
+        self._session.headers.update({'User-Agent': USER_AGENT})
+    # ------------------------------------------------------------------
+    def get(
+        self,
+        url: str,
+        params: Optional[dict] = None,
+        headers: Optional[dict] = None,
+    ) -> requests.Response:
+        """
+        GET *url* with automatic retries and exponential back-off.
+        Returns a `requests.Response` with status 200 on success.
+        Raises a typed `FetchError` subclass on failure.
+        """
+        merged_headers = dict(self._session.headers)
+        if headers:
+            merged_headers.update(headers)
+        last_exc: Optional[Exception] = None
+        for attempt in range(self.max_retries + 1):
+            try:
+                resp = self._session.get(
+                    url,
+                    params=params,
+                    headers=merged_headers,
+                    timeout=self.timeout,
+                )
+                if resp.status_code == 200:
+                    return resp
+                if resp.status_code in self.retry_status_codes:
+                    wait = min(
+                        self.backoff_base ** attempt + random.uniform(0, 1),
+                        self.backoff_max,
+                    )
+                    logger.warning(
+                        'HTTP %d from %s — retrying in %.1fs (attempt %d/%d)',
+                        resp.status_code, url, wait, attempt + 1, self.max_retries,
+                    )
+                    time.sleep(wait)
+                    continue
+                if resp.status_code == 404:
+                    raise SourceNotFoundError(f'404 Not Found: {url}')
+                if resp.status_code in {400, 401, 403}:
+                    raise SourceAuthError(
+                        f'HTTP {resp.status_code} from {url}'
+                    )
+                # Other 4xx / unexpected codes
+                raise FetchError(
+                    f'HTTP {resp.status_code} from {url}: '
+                    f'{resp.text[:200]}'
+                )
+            except requests.exceptions.Timeout as exc:
+                raise FetchTimeoutError(f'Timeout on {url}') from exc
+            except requests.exceptions.ConnectionError as exc:
+                raise FetchNetworkError(f'Connection error on {url}') from exc
+            except requests.exceptions.RequestException as exc:
+                raise FetchError(f'Request error on {url}: {exc}') from exc
+            except FetchError:
+                raise  # re-raise our own typed exceptions
+        raise MaxRetriesExceeded(
+            f'All {self.max_retries} retries exhausted for {url}'
+        )
+    # ------------------------------------------------------------------
+    def post(
+        self,
+        url: str,
+        json: Optional[dict] = None,
+        headers: Optional[dict] = None,
+    ) -> requests.Response:
+        """POST with the same retry / error logic as GET."""
+        merged_headers = dict(self._session.headers)
+        if headers:
+            merged_headers.update(headers)
+        last_exc: Optional[Exception] = None
+        for attempt in range(self.max_retries + 1):
+            try:
+                resp = self._session.post(
+                    url,
+                    json=json,
+                    headers=merged_headers,
+                    timeout=self.timeout,
+                )
+                if resp.status_code == 200:
+                    return resp
+                if resp.status_code in self.retry_status_codes:
+                    wait = min(
+                        self.backoff_base ** attempt + random.uniform(0, 1),
+                        self.backoff_max,
+                    )
+                    logger.warning(
+                        'POST %d from %s — retrying in %.1fs (attempt %d/%d)',
+                        resp.status_code, url, wait, attempt + 1, self.max_retries,
+                    )
+                    time.sleep(wait)
+                    continue
+                if resp.status_code == 404:
+                    raise SourceNotFoundError(f'404 Not Found: {url}')
+                if resp.status_code in {400, 401, 403}:
+                    raise SourceAuthError(
+                        f'HTTP {resp.status_code} from {url}'
+                    )
+                raise FetchError(
+                    f'HTTP {resp.status_code} from {url}: '
+                    f'{resp.text[:200]}'
+                )
+            except requests.exceptions.Timeout as exc:
+                raise FetchTimeoutError(f'Timeout on {url}') from exc
+            except requests.exceptions.ConnectionError as exc:
+                raise FetchNetworkError(f'Connection error on {url}') from exc
+            except requests.exceptions.RequestException as exc:
+                raise FetchError(f'Request error on {url}: {exc}') from exc
+            except FetchError:
+                raise
+        raise MaxRetriesExceeded(
+            f'All {self.max_retries} retries exhausted for POST {url}'
+        )

app/fetcher/pubmed_client.py ADDED Viewed

	@@ -0,0 +1,213 @@

+"""
+ResearchRadar — PubMed E-utilities client.
+Supplemental source for Neuroscience and BCI categories only.
+Two-step process: ESearch to get IDs, then EFetch to get abstracts.
+"""
+from __future__ import annotations
+import logging
+import xml.etree.ElementTree as ET
+from datetime import date, datetime
+from typing import List, Optional
+from app.core.config import NCBI_API_KEY, PUBMED_BASE_URL
+from app.core.models import Paper
+from app.fetcher.http_session import FetchError, RetrySession
+logger = logging.getLogger(__name__)
+def fetch_papers(
+    category_slug: str,
+    mesh_terms: str,
+    session: RetrySession,
+    days_back: int = 7,
+) -> List[Paper]:
+    """
+    Fetch recent papers from PubMed matching *mesh_terms*.
+    Returns a list of Paper instances.  Never raises — returns [] on error.
+    """
+    # ---------------------------------------------------------------
+    # Step 1 — ESearch
+    # ---------------------------------------------------------------
+    esearch_params: dict = {
+        'db': 'pubmed',
+        'term': f'{mesh_terms} AND ("last {days_back} days"[PDat])',
+        'retmax': 50,
+        'retmode': 'json',
+        'usehistory': 'y',
+    }
+    if NCBI_API_KEY:
+        esearch_params['api_key'] = NCBI_API_KEY
+    try:
+        esearch_resp = session.get(
+            f'{PUBMED_BASE_URL}/esearch.fcgi', params=esearch_params
+        )
+    except FetchError as exc:
+        logger.error('PubMed ESearch failed for %s: %s', category_slug, exc)
+        return []
+    try:
+        esearch_data = esearch_resp.json()
+    except ValueError:
+        logger.error('PubMed ESearch returned invalid JSON')
+        return []
+    result = esearch_data.get('esearchresult', {})
+    count = int(result.get('count', 0))
+    if count == 0:
+        logger.info('PubMed: 0 results for %s', category_slug)
+        return []
+    web_env = result.get('webenv', '')
+    query_key = result.get('querykey', '')
+    if not web_env or not query_key:
+        logger.error('PubMed ESearch missing WebEnv / query_key')
+        return []
+    # ---------------------------------------------------------------
+    # Step 2 — EFetch
+    # ---------------------------------------------------------------
+    efetch_params: dict = {
+        'db': 'pubmed',
+        'WebEnv': web_env,
+        'query_key': query_key,
+        'retmax': 50,
+        'retmode': 'xml',
+        'rettype': 'abstract',
+    }
+    if NCBI_API_KEY:
+        efetch_params['api_key'] = NCBI_API_KEY
+    try:
+        efetch_resp = session.get(
+            f'{PUBMED_BASE_URL}/efetch.fcgi', params=efetch_params
+        )
+    except FetchError as exc:
+        logger.error('PubMed EFetch failed for %s: %s', category_slug, exc)
+        return []
+    try:
+        root = ET.fromstring(efetch_resp.text)
+    except ET.ParseError as exc:
+        logger.error('PubMed XML parse error: %s', exc)
+        return []
+    papers: List[Paper] = []
+    for article_el in root.findall('.//PubmedArticle'):
+        try:
+            paper = _parse_article(article_el, category_slug)
+            if paper is not None:
+                papers.append(paper)
+        except Exception:
+            logger.debug('Skipping malformed PubMed article', exc_info=True)
+    logger.info('PubMed: fetched %d papers for [%s]', len(papers), category_slug)
+    return papers
+# ---------------------------------------------------------------------------
+# XML parsing helpers
+# ---------------------------------------------------------------------------
+def _parse_article(el: ET.Element, category_slug: str) -> Optional[Paper]:
+    """Parse a single <PubmedArticle> element."""
+    # PMID
+    pmid_el = el.find('.//PMID')
+    if pmid_el is None or not pmid_el.text:
+        return None
+    pmid = pmid_el.text.strip()
+    paper_id = f'pubmed:{pmid}'
+    # Title
+    title_el = el.find('.//ArticleTitle')
+    title = (title_el.text or '').strip() if title_el is not None else ''
+    if not title:
+        return None
+    # Abstract — may be structured (Background, Methods, etc.)
+    abstract_parts: List[str] = []
+    for abs_el in el.findall('.//AbstractText'):
+        label = abs_el.get('Label', '')
+        text = (abs_el.text or '').strip()
+        if label and text:
+            abstract_parts.append(f'{label}: {text}')
+        elif text:
+            abstract_parts.append(text)
+    abstract = '\n'.join(abstract_parts)
+    if not abstract:
+        return None
+    # Authors
+    authors: List[str] = []
+    for author_el in el.findall('.//Author'):
+        last = author_el.findtext('LastName', '').strip()
+        fore = author_el.findtext('ForeName', '').strip()
+        if last:
+            name = f'{fore} {last}'.strip()
+            authors.append(name)
+    # Publication date (best-effort)
+    pub_date = _parse_pub_date(el)
+    abstract_url = f'https://pubmed.ncbi.nlm.nih.gov/{pmid}/'
+    return Paper(
+        paper_id=paper_id,
+        source='pubmed',
+        title=title,
+        abstract=abstract,
+        authors=authors,
+        published_date=pub_date,
+        categories=[],
+        app_category=category_slug,
+        pdf_url=None,
+        abstract_url=abstract_url,
+    )
+def _parse_pub_date(el: ET.Element) -> date:
+    """Best-effort parse of PubMed date (Year, Month, Day may be partial)."""
+    pub_date_el = el.find('.//PubDate')
+    if pub_date_el is None:
+        return date.today()
+    year_text = pub_date_el.findtext('Year', '')
+    month_text = pub_date_el.findtext('Month', '')
+    day_text = pub_date_el.findtext('Day', '')
+    try:
+        year = int(year_text)
+    except (ValueError, TypeError):
+        return date.today()
+    # Month may be numeric or abbreviated text
+    month = 1
+    if month_text:
+        try:
+            month = int(month_text)
+        except ValueError:
+            _MONTH_ABBREV = {
+                'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4,
+                'may': 5, 'jun': 6, 'jul': 7, 'aug': 8,
+                'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12,
+            }
+            month = _MONTH_ABBREV.get(month_text.lower()[:3], 1)
+    day = 1
+    if day_text:
+        try:
+            day = int(day_text)
+        except ValueError:
+            pass
+    try:
+        return date(year, month, day)
+    except ValueError:
+        return date(year, 1, 1)

app/fetcher/semantic_scholar.py ADDED Viewed

	@@ -0,0 +1,181 @@

+"""
+ResearchRadar — Semantic Scholar REST client.
+Used as a fallback fetch source and to enrich citation counts for arXiv papers.
+"""
+from __future__ import annotations
+import logging
+from datetime import date, datetime, timedelta
+from typing import List, Optional
+from app.core.config import SEMSCHOLAR_BASE_URL, SEMANTIC_SCHOLAR_API_KEY
+from app.core.models import Paper
+from app.fetcher.http_session import FetchError, RetrySession
+logger = logging.getLogger(__name__)
+def fetch_papers(
+    category_slug: str,
+    keywords: List[str],
+    session: RetrySession,
+    days_back: int = 7,
+) -> List[Paper]:
+    """
+    Search Semantic Scholar for recent papers matching *keywords*.
+    Returns a list of Paper instances.  Never raises — returns [] on error.
+    """
+    query_text = ' OR '.join(keywords)
+    url = f'{SEMSCHOLAR_BASE_URL}/paper/search'
+    params = {
+        'query': query_text,
+        'fields': (
+            'paperId,title,abstract,authors,year,citationCount,'
+            'externalIds,publicationDate,openAccessPdf'
+        ),
+        'publicationTypes': 'JournalArticle,Conference',
+        'limit': 50,
+    }
+    headers = {}
+    if SEMANTIC_SCHOLAR_API_KEY:
+        headers['x-api-key'] = SEMANTIC_SCHOLAR_API_KEY
+    try:
+        response = session.get(url, params=params, headers=headers)
+    except FetchError as exc:
+        logger.error('Semantic Scholar fetch failed for %s: %s', category_slug, exc)
+        return []
+    try:
+        data = response.json()
+    except ValueError:
+        logger.error('Semantic Scholar returned invalid JSON')
+        return []
+    cutoff = date.today() - timedelta(days=days_back)
+    papers: List[Paper] = []
+    for item in data.get('data', []):
+        try:
+            paper = _parse_item(item, category_slug, cutoff)
+            if paper is not None:
+                papers.append(paper)
+        except Exception:
+            logger.debug('Skipping malformed Semantic Scholar item', exc_info=True)
+    logger.info(
+        'Semantic Scholar: fetched %d papers for [%s]',
+        len(papers), category_slug,
+    )
+    return papers
+def _parse_item(item: dict, category_slug: str, cutoff: date) -> Optional[Paper]:
+    """Parse a single S2 search result into a Paper, or None."""
+    pub_date_str = item.get('publicationDate', '')
+    if not pub_date_str:
+        return None
+    try:
+        pub_date = date.fromisoformat(pub_date_str)
+    except ValueError:
+        return None
+    if pub_date < cutoff:
+        return None
+    title = (item.get('title') or '').strip()
+    abstract = (item.get('abstract') or '').strip()
+    if not title or not abstract:
+        return None
+    s2_id = item.get('paperId', '')
+    ext_ids = item.get('externalIds', {}) or {}
+    arxiv_id = ext_ids.get('ArXiv', '')
+    paper_id = f'arxiv:{arxiv_id}' if arxiv_id else f's2:{s2_id}'
+    authors = [
+        a.get('name', '') for a in (item.get('authors') or [])
+        if a.get('name')
+    ]
+    pdf_info = item.get('openAccessPdf') or {}
+    pdf_url = pdf_info.get('url')
+    abstract_url = f'https://www.semanticscholar.org/paper/{s2_id}'
+    return Paper(
+        paper_id=paper_id,
+        source='semantic_scholar',
+        title=title,
+        abstract=abstract,
+        authors=authors,
+        published_date=pub_date,
+        categories=[],
+        app_category=category_slug,
+        pdf_url=pdf_url,
+        abstract_url=abstract_url,
+        citation_count=item.get('citationCount', 0) or 0,
+    )
+# ---------------------------------------------------------------------------
+# Citation enrichment
+# ---------------------------------------------------------------------------
+def enrich_citations(papers: List[Paper], session: RetrySession) -> List[Paper]:
+    """
+    Batch-enrich citation counts from Semantic Scholar.
+    This is best-effort: on failure the papers are returned unchanged.
+    """
+    if not papers:
+        return papers
+    # Build lookup of arXiv IDs (strip prefix)
+    ids = []
+    for p in papers:
+        if p.paper_id.startswith('arxiv:'):
+            ids.append(f'ArXiv:{p.paper_id[6:]}')
+        elif p.paper_id.startswith('s2:'):
+            ids.append(p.paper_id[3:])
+    if not ids:
+        return papers
+    url = f'{SEMSCHOLAR_BASE_URL}/paper/batch'
+    headers = {}
+    if SEMANTIC_SCHOLAR_API_KEY:
+        headers['x-api-key'] = SEMANTIC_SCHOLAR_API_KEY
+    try:
+        response = session.post(
+            url,
+            json={'ids': ids},
+            headers=headers,
+        )
+        results = response.json()
+    except (FetchError, ValueError) as exc:
+        logger.warning('Citation enrichment failed (best-effort): %s', exc)
+        return papers
+    # Map S2 results back to papers
+    result_map: dict = {}
+    for item in results:
+        if item and 'paperId' in item:
+            ext = item.get('externalIds', {}) or {}
+            arxiv = ext.get('ArXiv')
+            if arxiv:
+                result_map[f'arxiv:{arxiv}'] = item.get('citationCount', 0) or 0
+            result_map[f's2:{item["paperId"]}'] = item.get('citationCount', 0) or 0
+    for paper in papers:
+        if paper.paper_id in result_map:
+            paper.citation_count = result_map[paper.paper_id]
+    logger.info('Enriched citations for %d papers', len(papers))
+    return papers

app/ranker/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Paper scoring & ranking

app/ranker/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (146 Bytes). View file

app/ranker/__pycache__/citation_scorer.cpython-312.pyc ADDED Viewed

Binary file (2.11 kB). View file

app/ranker/__pycache__/composite_ranker.cpython-312.pyc ADDED Viewed

Binary file (3.18 kB). View file

app/ranker/__pycache__/tfidf_ranker.cpython-312.pyc ADDED Viewed

Binary file (9.25 kB). View file

app/ranker/citation_scorer.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+ResearchRadar — Citation velocity scorer.
+Normalises raw citation counts into a [0.0, 1.0] score and applies a
+recency bonus for very fresh papers.
+"""
+from __future__ import annotations
+from datetime import date, timedelta
+from typing import List
+from app.core.config import CITATION_NORM, RECENCY_BONUS
+from app.core.models import Paper
+def score(paper: Paper) -> float:
+    """
+    Return a citation score in [0.0, 1.0].
+    - ``citation_score = min(citation_count / CITATION_NORM, 1.0)``
+    - Papers published < 3 days ago get a recency bonus.
+    """
+    citation_score = min(paper.citation_count / max(CITATION_NORM, 1), 1.0)
+    days_old = (date.today() - paper.published_date).days
+    if days_old < 3:
+        citation_score = min(citation_score + RECENCY_BONUS, 1.0)
+    return citation_score
+def score_many(papers: List[Paper]) -> List[Paper]:
+    """Set ``citation_score`` on each paper and return the list (in-place)."""
+    for p in papers:
+        # Store on the Paper via the composite score pipeline; we use
+        # a transient attribute.  The composite ranker reads this.
+        p._citation_score = score(p)  # type: ignore[attr-defined]
+    return papers
+def recency_score(paper: Paper) -> float:
+    """
+    Return a recency score in [0.0, 1.0].
+    1.0 = published today, 0.0 = published ≥ 7 days ago.
+    """
+    days_old = max((date.today() - paper.published_date).days, 0)
+    return max(1.0 - days_old / 7.0, 0.0)

app/ranker/composite_ranker.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+ResearchRadar — Composite ranker.
+Combines relevance, citation, and recency scores with user-configurable
+weights to produce a final ``composite_score`` for each paper.
+"""
+from __future__ import annotations
+import logging
+from typing import Dict, List
+from app.core.config import (
+    TOP_N_PER_CATEGORY,
+    WEIGHT_CITATION,
+    WEIGHT_RECENCY,
+    WEIGHT_RELEVANCE,
+)
+from app.core.models import Paper, UserProfile
+from app.ranker import citation_scorer
+from app.ranker.tfidf_ranker import TfidfRanker
+logger = logging.getLogger(__name__)
+def rank_all(
+    papers_by_category: Dict[str, List[Paper]],
+    profile: UserProfile,
+    cache_dir: str = '',
+) -> Dict[str, List[Paper]]:
+    """
+    Score and sort papers per category.
+    Returns a dict ``{category: [Paper, ...]}`` with each list sorted by
+    ``composite_score`` descending and sliced to ``top_n``.
+    """
+    w_rel = profile.weight_relevance
+    w_cit = profile.weight_citation
+    w_rec = profile.weight_recency
+    top_n = profile.top_n_per_category
+    # Validate weights
+    total = w_rel + w_cit + w_rec
+    if abs(total - 1.0) > 0.01:
+        logger.warning(
+            'Ranking weights sum to %.2f (expected 1.0) — normalising', total
+        )
+        w_rel /= total
+        w_cit /= total
+        w_rec /= total
+    # Build TF-IDF ranker
+    ranker = TfidfRanker(cache_dir=cache_dir)
+    if not ranker.load_cache():
+        ranker.fit_profile(profile.interests)
+    ranked: Dict[str, List[Paper]] = {}
+    for category, papers in papers_by_category.items():
+        if not papers:
+            ranked[category] = []
+            continue
+        # Relevance scores
+        ranker.score_many(papers)
+        # Citation + recency scores
+        for paper in papers:
+            cit_score = citation_scorer.score(paper)
+            rec_score = citation_scorer.recency_score(paper)
+            paper.composite_score = (
+                w_rel * paper.relevance_score
+                + w_cit * cit_score
+                + w_rec * rec_score
+            )
+        # Sort and slice
+        papers.sort(key=lambda p: p.composite_score, reverse=True)
+        ranked[category] = papers[:top_n]
+        logger.info(
+            'Ranked [%s]: %d → top %d (best=%.3f)',
+            category, len(papers), min(top_n, len(papers)),
+            papers[0].composite_score if papers else 0.0,
+        )
+    return ranked

app/ranker/tfidf_ranker.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""
+ResearchRadar — TF-IDF relevance scorer.
+Computes cosine similarity between paper text and the user interest profile.
+Falls back to a hand-written bag-of-words implementation if scikit-learn
+is not available (mobile build edge case).
+"""
+from __future__ import annotations
+import logging
+import os
+import pickle
+from typing import Dict, List, Optional
+from app.core.models import Paper
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Try scikit-learn; fall back to pure-Python BoW
+# ---------------------------------------------------------------------------
+try:
+    from sklearn.feature_extraction.text import TfidfVectorizer
+    from sklearn.metrics.pairwise import cosine_similarity as _cosine
+    _HAS_SKLEARN = True
+except ImportError:
+    _HAS_SKLEARN = False
+    logger.warning('scikit-learn not available — using fallback BoW scorer')
+class TfidfRanker:
+    """Score papers against a user interest profile using TF-IDF cosine similarity."""
+    def __init__(self, cache_dir: str = ''):
+        self._cache_dir = cache_dir
+        self._vectorizer = None
+        self._profile_vectors: Dict[str, object] = {}
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def fit_profile(self, interests: Dict[str, str]) -> None:
+        """
+        Build / rebuild the TF-IDF model from user interest keywords.
+        *interests*: ``{'ml': 'deep learning transformers', ...}``
+        """
+        if _HAS_SKLEARN:
+            self._fit_sklearn(interests)
+        else:
+            self._fit_bow(interests)
+    def score(self, paper: Paper) -> float:
+        """
+        Return relevance score in [0.0, 1.0] for a paper against its
+        category's profile vector.
+        """
+        cat = paper.app_category
+        text = f'{paper.title} {paper.abstract}'
+        if _HAS_SKLEARN:
+            return self._score_sklearn(text, cat)
+        else:
+            return self._score_bow(text, cat)
+    def score_many(self, papers: List[Paper]) -> List[Paper]:
+        """Set ``relevance_score`` on each paper in-place and return the list."""
+        for p in papers:
+            p.relevance_score = self.score(p)
+        return papers
+    # ------------------------------------------------------------------
+    # scikit-learn implementation
+    # ------------------------------------------------------------------
+    def _fit_sklearn(self, interests: Dict[str, str]) -> None:
+        corpus = list(interests.values())
+        self._vectorizer = TfidfVectorizer(
+            max_features=5000, stop_words='english'
+        )
+        self._vectorizer.fit(corpus)
+        self._profile_vectors = {}
+        for cat, text in interests.items():
+            vec = self._vectorizer.transform([text])
+            self._profile_vectors[cat] = vec
+        self._save_cache()
+    def _score_sklearn(self, text: str, category: str) -> float:
+        if self._vectorizer is None or category not in self._profile_vectors:
+            return 0.0
+        paper_vec = self._vectorizer.transform([text])
+        sim = _cosine(paper_vec, self._profile_vectors[category])
+        return float(max(0.0, min(sim[0][0], 1.0)))
+    # ------------------------------------------------------------------
+    # Pure-Python bag-of-words fallback
+    # ------------------------------------------------------------------
+    def _fit_bow(self, interests: Dict[str, str]) -> None:
+        self._bow_profiles: Dict[str, Dict[str, int]] = {}
+        for cat, text in interests.items():
+            self._bow_profiles[cat] = _word_freq(text.lower())
+    def _score_bow(self, text: str, category: str) -> float:
+        profile = getattr(self, '_bow_profiles', {}).get(category)
+        if not profile:
+            return 0.0
+        paper_freq = _word_freq(text.lower())
+        return _cosine_bow(paper_freq, profile)
+    # ------------------------------------------------------------------
+    # Cache management
+    # ------------------------------------------------------------------
+    def _save_cache(self) -> None:
+        if not self._cache_dir or not _HAS_SKLEARN:
+            return
+        path = os.path.join(self._cache_dir, 'tfidf_cache.pkl')
+        try:
+            with open(path, 'wb') as f:
+                pickle.dump(
+                    (self._vectorizer, self._profile_vectors), f
+                )
+        except Exception:
+            logger.debug('Could not save TF-IDF cache', exc_info=True)
+    def load_cache(self) -> bool:
+        """Attempt to load a cached vectorizer. Returns True on success."""
+        if not self._cache_dir or not _HAS_SKLEARN:
+            return False
+        path = os.path.join(self._cache_dir, 'tfidf_cache.pkl')
+        if not os.path.exists(path):
+            return False
+        try:
+            with open(path, 'rb') as f:
+                self._vectorizer, self._profile_vectors = pickle.load(f)
+            return True
+        except Exception:
+            logger.warning('TF-IDF cache corrupt — rebuilding', exc_info=True)
+            try:
+                os.remove(path)
+            except OSError:
+                pass
+            return False
+# ---------------------------------------------------------------------------
+# BoW helpers
+# ---------------------------------------------------------------------------
+_STOPWORDS = frozenset(
+    'a an the is are was were be been being have has had do does did '
+    'will would shall should may might can could of in to for on with '
+    'at by from and or but not no nor so yet both either neither '
+    'each every all any few more most other some such that this these '
+    'those i me my we our you your he him his she her it its they them '
+    'their what which who whom when where why how'.split()
+)
+def _word_freq(text: str) -> Dict[str, int]:
+    freq: Dict[str, int] = {}
+    for word in text.split():
+        w = ''.join(c for c in word if c.isalnum())
+        if w and w not in _STOPWORDS and len(w) > 2:
+            freq[w] = freq.get(w, 0) + 1
+    return freq
+def _cosine_bow(a: Dict[str, int], b: Dict[str, int]) -> float:
+    common = set(a) & set(b)
+    if not common:
+        return 0.0
+    dot = sum(a[k] * b[k] for k in common)
+    mag_a = sum(v * v for v in a.values()) ** 0.5
+    mag_b = sum(v * v for v in b.values()) ** 0.5
+    if mag_a == 0 or mag_b == 0:
+        return 0.0
+    return dot / (mag_a * mag_b)

app/summarizer/__pycache__/groq_client.cpython-312.pyc ADDED Viewed

Binary file (4.35 kB). View file

app/summarizer/groq_client.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+ResearchRadar — Groq LLM summarizer.
+Summarizes papers using Groq API (llama-3.1-8b-instant).
+Follows user's requested structural (Idea, Method, Results) and
+enforces rate limit delays (30 RPM).
+"""
+from __future__ import annotations
+import logging
+import time
+from typing import List, Optional
+import requests
+from app.core.config import (
+    GROQ_API_KEY, GROQ_BASE_URL, GROQ_MODEL, GROQ_DELAY
+)
+from app.core.models import Paper
+logger = logging.getLogger(__name__)
+class GroqSummarizer:
+    """Handles LLM calls to Groq with rate-limiting and structured prompts."""
+    def __init__(self, api_key: str = GROQ_API_KEY):
+        self.api_key = api_key
+        self.last_call_time = 0.0
+    def summarize_paper(self, paper: Paper) -> Optional[str]:
+        """
+        Produce a structured summary of the paper.
+        Structure:
+        - Idea: The core core concept.
+        - Method: The approach or architecture.
+        - Results: The outcome or findings.
+        """
+        if not self.api_key:
+            logger.info("Skip Groq summarization: NO API KEY.")
+            return None
+        # Prepare prompt
+        prompt = (
+            f"Please summarize the following research paper abstract into three brief sections:\n"
+            f"1. Idea: (The core concept)\n"
+            f"2. Method: (The proposed approach)\n"
+            f"3. Results: (Key findings)\n\n"
+            f"Title: {paper.title}\n"
+            f"Abstract: {paper.abstract}\n\n"
+            "Keep it concise and professional. Respond in plain text with those three labels."
+        )
+        # Enforce rate limit delay
+        elapsed = time.time() - self.last_call_time
+        if elapsed < GROQ_DELAY:
+            sleep_time = GROQ_DELAY - elapsed
+            logger.debug(f"Groq Rate Limit: Sleeping for {sleep_time:.2f}s")
+            time.sleep(sleep_time)
+        try:
+            logger.info(f"Summarizing paper [{paper.paper_id}] via Groq...")
+            response = requests.post(
+                GROQ_BASE_URL,
+                headers={
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json"
+                },
+                json={
+                    "model": GROQ_MODEL,
+                    "messages": [
+                        {"role": "system", "content": "You are a scientific research assistant summarizing papers."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.3,
+                    "max_tokens": 300
+                },
+                timeout=30
+            )
+            self.last_call_time = time.time()
+            if response.status_code == 200:
+                data = response.json()
+                summary = data['choices'][0]['message']['content'].strip()
+                return summary
+            else:
+                logger.error(f"Groq API error ({response.status_code}): {response.text}")
+                return None
+        except Exception as exc:
+            logger.exception(f"Unexpected error during Groq summarization: {exc}")
+            return None
+    def summarize_many(self, papers: List[Paper]):
+        """
+        Iterate through papers and update their summary_llm field.
+        """
+        for p in papers:
+            # We only summarize if it doesn't already have a summary
+            if not p.summary_llm:
+                p.summary_llm = self.summarize_paper(p)

app/ui/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Kivy UI screens & widgets

app/ui/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (142 Bytes). View file

app/ui/__pycache__/detail_screen.cpython-312.pyc ADDED Viewed

Binary file (6.43 kB). View file

app/ui/__pycache__/home_screen.cpython-312.pyc ADDED Viewed

Binary file (5.99 kB). View file

app/ui/__pycache__/settings_screen.cpython-312.pyc ADDED Viewed

Binary file (6.95 kB). View file

app/ui/detail_screen.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""
+ResearchRadar — DetailScreen.
+Displays ranked papers for a single category with bookmark & read
+functionality.  Tapping a paper opens a modal with the full abstract.
+"""
+from __future__ import annotations
+import logging
+import webbrowser
+from kivy.lang import Builder
+from kivy.properties import BooleanProperty, ListProperty, StringProperty
+from kivy.uix.boxlayout import BoxLayout
+from kivy.uix.modalview import ModalView
+from kivy.uix.screenmanager import Screen
+logger = logging.getLogger(__name__)
+import os
+_KV_PATH = os.path.join(os.path.dirname(__file__), 'kv', 'detail.kv')
+if os.path.exists(_KV_PATH):
+    Builder.load_file(_KV_PATH)
+class PaperRow(BoxLayout):
+    """A single paper row in the detail list."""
+    rank = StringProperty('1')
+    title = StringProperty('')
+    authors = StringProperty('')
+    date_str = StringProperty('')
+    score_text = StringProperty('0.00')
+    is_bookmarked = BooleanProperty(False)
+    paper_id = StringProperty('')
+    abstract_url = StringProperty('')
+    pdf_url = StringProperty('')
+    abstract_text = StringProperty('')
+    def toggle_bookmark(self):
+        from kivy.app import App
+        app = App.get_running_app()
+        if app:
+            new_state = app.toggle_bookmark(self.paper_id)
+            self.is_bookmarked = new_state
+    def show_detail(self):
+        popup = PaperDetailPopup()
+        popup.paper_title = self.title
+        popup.paper_authors = self.authors
+        popup.paper_abstract = self.abstract_text
+        popup.paper_url = self.abstract_url
+        popup.paper_pdf = self.pdf_url
+        popup.open()
+class PaperDetailPopup(ModalView):
+    """Modal showing full paper details."""
+    paper_title = StringProperty('')
+    paper_authors = StringProperty('')
+    paper_abstract = StringProperty('')
+    paper_url = StringProperty('')
+    paper_pdf = StringProperty('')
+    def open_in_browser(self):
+        if self.paper_url:
+            try:
+                webbrowser.open(self.paper_url)
+            except Exception:
+                logger.warning('Could not open browser')
+    def open_pdf(self):
+        if self.paper_pdf:
+            try:
+                webbrowser.open(self.paper_pdf)
+            except Exception:
+                logger.warning('Could not open PDF')
+class DetailScreen(Screen):
+    """Screen showing papers for a single category."""
+    category_slug = StringProperty('')
+    category_name = StringProperty('')
+    week_range = StringProperty('')
+    paper_rows = ListProperty([])
+    def load_papers(self, category_slug: str):
+        """Populate the screen with papers from the latest digest."""
+        from kivy.app import App
+        app = App.get_running_app()
+        if not app:
+            return
+        from app.core.config import CATEGORY_LABELS
+        self.category_slug = category_slug
+        self.category_name = CATEGORY_LABELS.get(category_slug, category_slug.title())
+        digest = app.get_latest_digest()
+        container = self.ids.get('paper_container')
+        if container is None:
+            return
+        container.clear_widgets()
+        if digest is None:
+            self.week_range = 'No data'
+            return
+        self.week_range = f'Week of {digest.week_start.isoformat()}'
+        papers = digest.papers.get(category_slug, [])
+        for i, paper in enumerate(papers, 1):
+            row = PaperRow()
+            row.rank = str(i)
+            row.paper_id = paper.paper_id
+            row.title = paper.title
+            row.abstract_text = paper.abstract
+            if paper.authors:
+                if len(paper.authors) > 2:
+                    row.authors = f'{paper.authors[0]} et al.'
+                else:
+                    row.authors = ', '.join(paper.authors)
+            else:
+                row.authors = 'Unknown'
+            row.date_str = paper.published_date.isoformat()
+            row.score_text = f'{paper.composite_score:.2f}'
+            row.is_bookmarked = paper.is_bookmarked
+            row.abstract_url = paper.abstract_url
+            row.pdf_url = paper.pdf_url or ''
+            container.add_widget(row)

app/ui/home_screen.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+ResearchRadar — HomeScreen.
+Displays the latest digest as a scrollable list of DigestCard widgets,
+one per category.  Includes a "Refresh Now" FAB and empty-state onboarding.
+"""
+from __future__ import annotations
+import logging
+import threading
+from kivy.clock import Clock
+from kivy.lang import Builder
+from kivy.properties import (
+    BooleanProperty,
+    ListProperty,
+    ObjectProperty,
+    StringProperty,
+)
+from kivy.uix.boxlayout import BoxLayout
+from kivy.uix.screenmanager import Screen
+from app.core.config import CATEGORY_LABELS
+logger = logging.getLogger(__name__)
+# Load KV
+import os
+_KV_PATH = os.path.join(os.path.dirname(__file__), 'kv', 'home.kv')
+if os.path.exists(_KV_PATH):
+    Builder.load_file(_KV_PATH)
+class DigestCard(BoxLayout):
+    """A single category card showing paper count and top paper title."""
+    category_slug = StringProperty('')
+    category_name = StringProperty('')
+    paper_count = StringProperty('0')
+    top_paper_title = StringProperty('No papers yet')
+    top_score = StringProperty('—')
+    def on_touch_up(self, touch):
+        if self.collide_point(*touch.pos):
+            app = self._get_app()
+            if app:
+                app.show_detail(self.category_slug)
+        return super().on_touch_up(touch)
+    def _get_app(self):
+        from kivy.app import App
+        return App.get_running_app()
+class HomeScreen(Screen):
+    """Main screen showing the latest weekly digest."""
+    is_fetching = BooleanProperty(False)
+    last_fetched = StringProperty('Never')
+    digest_cards = ListProperty([])
+    def on_enter(self):
+        """Load digest when screen becomes visible."""
+        self.load_digest()
+    def load_digest(self):
+        """Load the latest digest from the database and populate cards."""
+        from kivy.app import App
+        app = App.get_running_app()
+        if not app:
+            return
+        digest = app.get_latest_digest()
+        container = self.ids.get('card_container')
+        if container is None:
+            return
+        container.clear_widgets()
+        if digest is None:
+            self.last_fetched = 'Never — tap Fetch Now!'
+            return
+        self.last_fetched = digest.generated_at.strftime('%Y-%m-%d %H:%M')
+        for cat_slug, papers in digest.papers.items():
+            card = DigestCard()
+            card.category_slug = cat_slug
+            card.category_name = CATEGORY_LABELS.get(cat_slug, cat_slug.title())
+            card.paper_count = str(len(papers))
+            if papers:
+                title = papers[0].title
+                if len(title) > 70:
+                    title = title[:67] + '...'
+                card.top_paper_title = title
+                card.top_score = f'{papers[0].composite_score:.2f}'
+            container.add_widget(card)
+    def trigger_fetch(self):
+        """Run the weekly fetch in a background thread."""
+        if self.is_fetching:
+            return
+        self.is_fetching = True
+        self.last_fetched = 'Fetching...'
+        from kivy.app import App
+        app = App.get_running_app()
+        def _run():
+            try:
+                app.run_fetch()
+            except Exception:
+                logger.exception('Background fetch failed')
+            finally:
+                Clock.schedule_once(lambda dt: self._on_fetch_done(), 0)
+        thread = threading.Thread(target=_run, daemon=True)
+        thread.start()
+    def _on_fetch_done(self):
+        self.is_fetching = False
+        self.load_digest()

app/ui/kv/detail.kv ADDED Viewed

	@@ -0,0 +1,243 @@

+#:kivy 2.3.0
+# =====================================================================
+# DetailScreen — ranked paper list for a single category
+# =====================================================================
+<PaperRow>:
+    orientation: 'vertical'
+    size_hint_y: None
+    height: dp(100)
+    padding: dp(14), dp(8)
+    spacing: dp(4)
+    canvas.before:
+        Color:
+            rgba: 0.14, 0.16, 0.22, 1
+        RoundedRectangle:
+            pos: self.x + dp(4), self.y + dp(2)
+            size: self.width - dp(8), self.height - dp(4)
+            radius: [dp(10)]
+    BoxLayout:
+        size_hint_y: None
+        height: dp(24)
+        Label:
+            text: '#' + root.rank
+            font_size: sp(16)
+            bold: True
+            color: 0.55, 0.78, 1.0, 1
+            size_hint_x: None
+            width: dp(36)
+            halign: 'center'
+            text_size: self.size
+        Label:
+            text: root.title
+            font_size: sp(14)
+            color: 0.92, 0.92, 0.95, 1
+            halign: 'left'
+            text_size: self.width, None
+            shorten: True
+            shorten_from: 'right'
+        Button:
+            text: '★' if root.is_bookmarked else '☆'
+            font_size: sp(20)
+            size_hint_x: None
+            width: dp(40)
+            background_color: 0, 0, 0, 0
+            color: (1, 0.85, 0.2, 1) if root.is_bookmarked else (0.5, 0.5, 0.5, 1)
+            on_release: root.toggle_bookmark()
+    Label:
+        text: root.authors + '  ·  ' + root.date_str
+        font_size: sp(11)
+        color: 0.55, 0.55, 0.6, 1
+        halign: 'left'
+        text_size: self.size
+        size_hint_y: None
+        height: dp(18)
+    BoxLayout:
+        size_hint_y: None
+        height: dp(24)
+        # Score bar
+        BoxLayout:
+            size_hint_x: 0.6
+            Widget:
+                canvas:
+                    Color:
+                        rgba: 0.2, 0.2, 0.3, 1
+                    RoundedRectangle:
+                        pos: self.pos
+                        size: self.width, dp(6)
+                        radius: [dp(3)]
+                    Color:
+                        rgba: 0.3, 0.75, 0.5, 1
+                    RoundedRectangle:
+                        pos: self.pos
+                        size: self.width * min(float(root.score_text or 0), 1.0), dp(6)
+                        radius: [dp(3)]
+        Label:
+            text: root.score_text
+            font_size: sp(12)
+            color: 0.4, 0.85, 0.6, 1
+            size_hint_x: 0.2
+            halign: 'right'
+            text_size: self.size
+        Button:
+            text: 'View'
+            font_size: sp(12)
+            size_hint_x: 0.2
+            background_normal: ''
+            background_color: 0.2, 0.4, 0.7, 1
+            color: 1, 1, 1, 1
+            on_release: root.show_detail()
+<PaperDetailPopup>:
+    size_hint: 0.92, 0.85
+    auto_dismiss: True
+    canvas.before:
+        Color:
+            rgba: 0.1, 0.12, 0.16, 0.98
+        RoundedRectangle:
+            pos: self.pos
+            size: self.size
+            radius: [dp(16)]
+    BoxLayout:
+        orientation: 'vertical'
+        padding: dp(20)
+        spacing: dp(12)
+        Label:
+            text: root.paper_title
+            font_size: sp(17)
+            bold: True
+            color: 1, 1, 1, 1
+            halign: 'left'
+            valign: 'top'
+            text_size: self.width, None
+            size_hint_y: None
+            height: self.texture_size[1]
+        Label:
+            text: root.paper_authors
+            font_size: sp(12)
+            color: 0.6, 0.6, 0.7, 1
+            halign: 'left'
+            text_size: self.size
+            size_hint_y: None
+            height: dp(20)
+        ScrollView:
+            Label:
+                text: root.paper_abstract
+                font_size: sp(13)
+                color: 0.85, 0.85, 0.9, 1
+                halign: 'left'
+                valign: 'top'
+                text_size: self.width, None
+                size_hint_y: None
+                height: self.texture_size[1]
+                markup: False
+        BoxLayout:
+            size_hint_y: None
+            height: dp(44)
+            spacing: dp(10)
+            Button:
+                text: '🌐 Open Abstract'
+                font_size: sp(13)
+                background_normal: ''
+                background_color: 0.2, 0.45, 0.8, 1
+                color: 1, 1, 1, 1
+                on_release: root.open_in_browser()
+            Button:
+                text: '📄 Open PDF'
+                font_size: sp(13)
+                background_normal: ''
+                background_color: 0.3, 0.65, 0.4, 1
+                color: 1, 1, 1, 1
+                on_release: root.open_pdf()
+            Button:
+                text: '✕ Close'
+                font_size: sp(13)
+                background_normal: ''
+                background_color: 0.5, 0.2, 0.2, 1
+                color: 1, 1, 1, 1
+                on_release: root.dismiss()
+<DetailScreen>:
+    name: 'detail'
+    BoxLayout:
+        orientation: 'vertical'
+        canvas.before:
+            Color:
+                rgba: 0.08, 0.09, 0.12, 1
+            Rectangle:
+                pos: self.pos
+                size: self.size
+        # ── Header ──────────────────────────────────────
+        BoxLayout:
+            size_hint_y: None
+            height: dp(60)
+            padding: dp(12), dp(10)
+            canvas.before:
+                Color:
+                    rgba: 0.10, 0.12, 0.18, 1
+                Rectangle:
+                    pos: self.pos
+                    size: self.size
+            Button:
+                text: '←'
+                font_size: sp(22)
+                size_hint_x: None
+                width: dp(48)
+                background_color: 0, 0, 0, 0
+                color: 0.7, 0.7, 0.8, 1
+                on_release: app.go_home()
+            BoxLayout:
+                orientation: 'vertical'
+                Label:
+                    text: root.category_name
+                    font_size: sp(18)
+                    bold: True
+                    color: 0.55, 0.78, 1.0, 1
+                    halign: 'left'
+                    text_size: self.size
+                Label:
+                    text: root.week_range
+                    font_size: sp(12)
+                    color: 0.5, 0.5, 0.6, 1
+                    halign: 'left'
+                    text_size: self.size
+        # ── Paper list ──────────────────────────────────
+        ScrollView:
+            do_scroll_x: False
+            BoxLayout:
+                id: paper_container
+                orientation: 'vertical'
+                size_hint_y: None
+                height: self.minimum_height
+                padding: dp(6)
+                spacing: dp(8)

app/ui/kv/home.kv ADDED Viewed

	@@ -0,0 +1,162 @@

+#:kivy 2.3.0
+# =====================================================================
+# HomeScreen — digest card list with FAB and status bar
+# =====================================================================
+<DigestCard>:
+    orientation: 'vertical'
+    size_hint_y: None
+    height: dp(120)
+    padding: dp(16)
+    spacing: dp(6)
+    canvas.before:
+        Color:
+            rgba: 0.15, 0.17, 0.22, 1
+        RoundedRectangle:
+            pos: self.x + dp(4), self.y + dp(2)
+            size: self.width - dp(8), self.height - dp(4)
+            radius: [dp(14)]
+        Color:
+            rgba: 0.22, 0.25, 0.32, 1
+        RoundedRectangle:
+            pos: self.x + dp(4), self.y + dp(4)
+            size: self.width - dp(8), self.height - dp(4)
+            radius: [dp(14)]
+    BoxLayout:
+        size_hint_y: None
+        height: dp(28)
+        Label:
+            text: root.category_name
+            font_size: sp(18)
+            bold: True
+            color: 0.55, 0.78, 1.0, 1
+            halign: 'left'
+            text_size: self.size
+        Label:
+            text: root.paper_count + ' papers'
+            font_size: sp(14)
+            color: 0.6, 0.6, 0.7, 1
+            halign: 'right'
+            text_size: self.size
+            size_hint_x: 0.3
+    Label:
+        text: root.top_paper_title
+        font_size: sp(14)
+        color: 0.85, 0.85, 0.9, 1
+        halign: 'left'
+        valign: 'top'
+        text_size: self.width, None
+        shorten: True
+        shorten_from: 'right'
+    BoxLayout:
+        size_hint_y: None
+        height: dp(22)
+        Label:
+            text: 'Score: ' + root.top_score
+            font_size: sp(12)
+            color: 0.4, 0.85, 0.6, 1
+            halign: 'left'
+            text_size: self.size
+        Label:
+            text: 'Tap to explore →'
+            font_size: sp(11)
+            color: 0.5, 0.5, 0.6, 1
+            halign: 'right'
+            text_size: self.size
+<HomeScreen>:
+    name: 'home'
+    BoxLayout:
+        orientation: 'vertical'
+        padding: 0
+        spacing: 0
+        canvas.before:
+            Color:
+                rgba: 0.08, 0.09, 0.12, 1
+            Rectangle:
+                pos: self.pos
+                size: self.size
+        # ── Header ──────────────────────────────────────
+        BoxLayout:
+            size_hint_y: None
+            height: dp(64)
+            padding: dp(16), dp(12)
+            canvas.before:
+                Color:
+                    rgba: 0.10, 0.12, 0.18, 1
+                Rectangle:
+                    pos: self.pos
+                    size: self.size
+            Label:
+                text: '📡  ResearchRadar'
+                font_size: sp(22)
+                bold: True
+                color: 1, 1, 1, 1
+                halign: 'left'
+                text_size: self.size
+                valign: 'center'
+            Button:
+                text: '⚙'
+                font_size: sp(22)
+                size_hint_x: None
+                width: dp(48)
+                background_color: 0, 0, 0, 0
+                color: 0.7, 0.7, 0.8, 1
+                on_release: app.show_settings()
+        # ── Digest cards ────────────────────────────────
+        ScrollView:
+            do_scroll_x: False
+            BoxLayout:
+                id: card_container
+                orientation: 'vertical'
+                size_hint_y: None
+                height: self.minimum_height
+                padding: dp(8)
+                spacing: dp(10)
+        # ── Bottom bar ──────────────────────────────────
+        BoxLayout:
+            size_hint_y: None
+            height: dp(56)
+            padding: dp(12), dp(8)
+            canvas.before:
+                Color:
+                    rgba: 0.10, 0.12, 0.18, 1
+                Rectangle:
+                    pos: self.pos
+                    size: self.size
+            Label:
+                text: 'Last: ' + root.last_fetched
+                font_size: sp(12)
+                color: 0.5, 0.5, 0.6, 1
+                halign: 'left'
+                text_size: self.size
+                valign: 'center'
+            Button:
+                text: '🔄 Fetch Now'
+                font_size: sp(14)
+                bold: True
+                size_hint_x: None
+                width: dp(130)
+                background_normal: ''
+                background_color: 0.2, 0.5, 0.9, 1
+                color: 1, 1, 1, 1
+                disabled: root.is_fetching
+                on_release: root.trigger_fetch()

app/ui/kv/settings.kv ADDED Viewed

	@@ -0,0 +1,318 @@

+#:kivy 2.3.0
+# =====================================================================
+# SettingsScreen — interest keywords, weights, API keys
+# =====================================================================
+<SettingsScreen>:
+    name: 'settings'
+    BoxLayout:
+        orientation: 'vertical'
+        canvas.before:
+            Color:
+                rgba: 0.08, 0.09, 0.12, 1
+            Rectangle:
+                pos: self.pos
+                size: self.size
+        # ── Header ──────────────────────────────────────
+        BoxLayout:
+            size_hint_y: None
+            height: dp(60)
+            padding: dp(12), dp(10)
+            canvas.before:
+                Color:
+                    rgba: 0.10, 0.12, 0.18, 1
+                Rectangle:
+                    pos: self.pos
+                    size: self.size
+            Button:
+                text: '←'
+                font_size: sp(22)
+                size_hint_x: None
+                width: dp(48)
+                background_color: 0, 0, 0, 0
+                color: 0.7, 0.7, 0.8, 1
+                on_release: app.go_home()
+            Label:
+                text: '⚙  Settings'
+                font_size: sp(20)
+                bold: True
+                color: 1, 1, 1, 1
+                halign: 'left'
+                text_size: self.size
+                valign: 'center'
+        # ── Scrollable content ──────────────────────────
+        ScrollView:
+            do_scroll_x: False
+            BoxLayout:
+                orientation: 'vertical'
+                size_hint_y: None
+                height: self.minimum_height
+                padding: dp(16)
+                spacing: dp(14)
+                # ── Interest Keywords ───────────────────
+                Label:
+                    text: 'Interest Keywords'
+                    font_size: sp(16)
+                    bold: True
+                    color: 0.55, 0.78, 1.0, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(30)
+                Label:
+                    text: 'Machine Learning'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.ml_keywords
+                    on_text: root.ml_keywords = self.text
+                    multiline: False
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                    cursor_color: 0.55, 0.78, 1.0, 1
+                Label:
+                    text: 'Artificial Intelligence'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.ai_keywords
+                    on_text: root.ai_keywords = self.text
+                    multiline: False
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                    cursor_color: 0.55, 0.78, 1.0, 1
+                Label:
+                    text: 'Computer Science'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.cs_keywords
+                    on_text: root.cs_keywords = self.text
+                    multiline: False
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                    cursor_color: 0.55, 0.78, 1.0, 1
+                Label:
+                    text: 'Neuroscience'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.neuro_keywords
+                    on_text: root.neuro_keywords = self.text
+                    multiline: False
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                    cursor_color: 0.55, 0.78, 1.0, 1
+                Label:
+                    text: 'Brain-Computer Interface'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.bci_keywords
+                    on_text: root.bci_keywords = self.text
+                    multiline: False
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                    cursor_color: 0.55, 0.78, 1.0, 1
+                # ── Ranking Weights ─────────────────────
+                Label:
+                    text: 'Ranking Weights'
+                    font_size: sp(16)
+                    bold: True
+                    color: 0.55, 0.78, 1.0, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(30)
+                Label:
+                    text: 'Relevance: ' + '{:.0%}'.format(root.weight_relevance)
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                Slider:
+                    value: root.weight_relevance
+                    on_value: root.weight_relevance = self.value
+                    min: 0
+                    max: 1
+                    step: 0.05
+                    size_hint_y: None
+                    height: dp(36)
+                    cursor_size: dp(20), dp(20)
+                Label:
+                    text: 'Citations: ' + '{:.0%}'.format(root.weight_citation)
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                Slider:
+                    value: root.weight_citation
+                    on_value: root.weight_citation = self.value
+                    min: 0
+                    max: 1
+                    step: 0.05
+                    size_hint_y: None
+                    height: dp(36)
+                    cursor_size: dp(20), dp(20)
+                Label:
+                    text: 'Recency: ' + '{:.0%}'.format(root.weight_recency)
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                Slider:
+                    value: root.weight_recency
+                    on_value: root.weight_recency = self.value
+                    min: 0
+                    max: 1
+                    step: 0.05
+                    size_hint_y: None
+                    height: dp(36)
+                    cursor_size: dp(20), dp(20)
+                # ── Papers per category ─────────────────
+                Label:
+                    text: 'Papers per Category'
+                    font_size: sp(16)
+                    bold: True
+                    color: 0.55, 0.78, 1.0, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(30)
+                Spinner:
+                    text: str(int(root.top_n))
+                    values: ['3', '5', '7', '10']
+                    on_text: root.top_n = int(self.text) if self.text else 5
+                    size_hint_y: None
+                    height: dp(40)
+                    font_size: sp(14)
+                    background_color: 0.18, 0.2, 0.28, 1
+                    color: 0.9, 0.9, 0.95, 1
+                # ── API Keys (optional) ─────────────────
+                Label:
+                    text: 'API Keys (optional)'
+                    font_size: sp(16)
+                    bold: True
+                    color: 0.55, 0.78, 1.0, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(30)
+                Label:
+                    text: 'Semantic Scholar API Key'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.semantic_scholar_key
+                    on_text: root.semantic_scholar_key = self.text
+                    multiline: False
+                    password: True
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                Label:
+                    text: 'PubMed (NCBI) API Key'
+                    font_size: sp(13)
+                    color: 0.7, 0.7, 0.8, 1
+                    halign: 'left'
+                    text_size: self.size
+                    size_hint_y: None
+                    height: dp(20)
+                TextInput:
+                    text: root.pubmed_key
+                    on_text: root.pubmed_key = self.text
+                    multiline: False
+                    password: True
+                    size_hint_y: None
+                    height: dp(38)
+                    font_size: sp(13)
+                    background_color: 0.15, 0.17, 0.22, 1
+                    foreground_color: 0.9, 0.9, 0.95, 1
+                # ── Save Button ─────────────────────────
+                Button:
+                    text: '💾  Save Settings'
+                    font_size: sp(16)
+                    bold: True
+                    size_hint_y: None
+                    height: dp(50)
+                    background_normal: ''
+                    background_color: 0.2, 0.55, 0.35, 1
+                    color: 1, 1, 1, 1
+                    on_release: root.save_settings()
+                # Spacer
+                Widget:
+                    size_hint_y: None
+                    height: dp(40)