elfsong commited on
Commit
c65838f
·
1 Parent(s): ba9bdfb

Enhance Streamlit app with new features and UI improvements

Browse files

- Added padding to the block container for better layout.
- Introduced a new Hugging Face trending repository for enhanced data integration.
- Refactored data handling in push_to_hf_dataset and pull_from_hf_dataset functions for improved readability.
- Updated the fetch_daily_papers function to streamline paper retrieval.
- Added a new TRENDING_SYSTEM_PROMPT for identifying key research trends in papers.

Files changed (1) hide show
  1. src/streamlit_app.py +334 -54
src/streamlit_app.py CHANGED
@@ -28,6 +28,7 @@ st.markdown(
28
  /* ---------- global ---------- */
29
  [data-testid="stAppViewContainer"] { background: #f6f8fa; }
30
  [data-testid="stHeader"] { background: #f6f8fa; }
 
31
 
32
  h1, h2, h3, h4 { color: #1f2328 !important; }
33
  p, li, span, label { color: #424a53; }
@@ -228,10 +229,12 @@ div[data-testid="stHorizontalBlock"] > div[data-testid="stColumn"] > div > div[d
228
  # ---------------------------------------------------------------------------
229
  DATA_DIR = Path(__file__).resolve().parent.parent / "data"
230
  HF_DATASET_REPO = "Elfsong/hf_paper_summary"
 
231
 
232
 
233
  def _get_hf_token() -> str | None:
234
  import os
 
235
  token = os.getenv("HF_TOKEN", "")
236
  if token:
237
  return token
@@ -256,27 +259,34 @@ def _split_to_date(split_name: str) -> str:
256
  def push_to_hf_dataset(papers: list[dict], date_str: str):
257
  """Push papers list to HuggingFace dataset as a date split."""
258
  from datasets import Dataset
 
259
  token = _get_hf_token()
260
  if not token:
261
  return
262
 
263
  rows = []
264
  for p in papers:
265
- rows.append({
266
- "title": p.get("title", ""),
267
- "paper_id": p.get("paper_id", ""),
268
- "hf_url": p.get("hf_url", ""),
269
- "arxiv_url": p.get("arxiv_url", ""),
270
- "pdf_url": p.get("pdf_url", ""),
271
- "authors": p.get("authors", []),
272
- "summary": p.get("summary", ""),
273
- "upvotes": p.get("upvotes", 0),
274
- "published_at": p.get("published_at", ""),
275
- "concise_summary": p.get("concise_summary", ""),
276
- "concise_summary_zh": p.get("concise_summary_zh", ""),
277
- "detailed_analysis": json.dumps(p.get("detailed_analysis", {}), ensure_ascii=False),
278
- "detailed_analysis_zh": json.dumps(p.get("detailed_analysis_zh", {}), ensure_ascii=False),
279
- })
 
 
 
 
 
 
280
 
281
  ds = Dataset.from_list(rows)
282
  split_name = _date_to_split(date_str)
@@ -286,6 +296,7 @@ def push_to_hf_dataset(papers: list[dict], date_str: str):
286
  def _list_dataset_splits() -> list[str]:
287
  """List available date splits from the HF dataset repo without loading data."""
288
  from huggingface_hub import HfApi
 
289
  token = _get_hf_token()
290
  api = HfApi(token=token)
291
  try:
@@ -307,6 +318,7 @@ def pull_from_hf_dataset(target_date: str | None = None) -> dict[str, list[dict]
307
  """Load a date split from HF dataset. If target_date is None, load the latest.
308
  Returns {date_str: papers_list}."""
309
  from datasets import load_dataset
 
310
  token = _get_hf_token()
311
 
312
  splits = _list_dataset_splits()
@@ -331,7 +343,9 @@ def pull_from_hf_dataset(target_date: str | None = None) -> dict[str, list[dict]
331
  for row in ds:
332
  paper = dict(row)
333
  paper["detailed_analysis"] = json.loads(paper.get("detailed_analysis", "{}"))
334
- paper["detailed_analysis_zh"] = json.loads(paper.get("detailed_analysis_zh", "{}"))
 
 
335
  papers.append(paper)
336
  return {date_str: papers}
337
 
@@ -373,6 +387,7 @@ def load_papers(source) -> list[dict]:
373
  SSL_CTX = ssl.create_default_context()
374
  try:
375
  import certifi
 
376
  SSL_CTX.load_verify_locations(certifi.where())
377
  except ImportError:
378
  SSL_CTX.check_hostname = False
@@ -401,6 +416,29 @@ with the same structure: "summary", "pros", "cons".
401
 
402
  Reply with ONLY valid JSON — no markdown fences, no extra text."""
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
  def fetch_daily_papers(date_str: str) -> list[dict]:
406
  url = f"{HF_API_URL}?date={date_str}"
@@ -416,23 +454,26 @@ def fetch_daily_papers(date_str: str) -> list[dict]:
416
  paper = item.get("paper", {})
417
  paper_id = paper.get("id", "")
418
  authors = [a.get("name", "") for a in paper.get("authors", [])]
419
- papers.append({
420
- "title": paper.get("title", ""),
421
- "paper_id": paper_id,
422
- "hf_url": f"https://huggingface.co/papers/{paper_id}",
423
- "arxiv_url": f"https://arxiv.org/abs/{paper_id}",
424
- "pdf_url": f"https://arxiv.org/pdf/{paper_id}",
425
- "authors": authors,
426
- "summary": paper.get("summary", ""),
427
- "upvotes": paper.get("upvotes", 0),
428
- "published_at": paper.get("publishedAt", ""),
429
- })
 
 
430
  papers.sort(key=lambda x: x["upvotes"], reverse=True)
431
  return papers
432
 
433
 
434
  def _get_gemini_key() -> str:
435
  import os
 
436
  api_key = os.getenv("GEMINI_API_KEY", "")
437
  if api_key:
438
  return api_key
@@ -441,11 +482,14 @@ def _get_gemini_key() -> str:
441
  for line in env_path.read_text().splitlines():
442
  if line.startswith("GEMINI_API_KEY="):
443
  return line.split("=", 1)[1].strip()
444
- raise RuntimeError("GEMINI_API_KEY not found. Set it as a HF Space secret or in .env")
 
 
445
 
446
 
447
  def summarize_paper_gemini(title: str, abstract: str) -> dict:
448
  from google import genai
 
449
  api_key = _get_gemini_key()
450
  client = genai.Client(api_key=api_key)
451
  resp = client.models.generate_content(
@@ -514,6 +558,189 @@ def crawl_and_summarize(date_str: str) -> Path:
514
  return output_path
515
 
516
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  # ---------------------------------------------------------------------------
518
  # Summary dialog
519
  # ---------------------------------------------------------------------------
@@ -539,7 +766,9 @@ def show_summary(paper: dict):
539
 
540
  # TL;DR
541
  if lang:
542
- concise = paper.get("concise_summary_zh", "") or paper.get("concise_summary", "")
 
 
543
  else:
544
  concise = paper.get("concise_summary", "")
545
  if concise:
@@ -548,7 +777,9 @@ def show_summary(paper: dict):
548
 
549
  # Detailed Analysis
550
  if lang:
551
- analysis = paper.get("detailed_analysis_zh", {}) or paper.get("detailed_analysis", {})
 
 
552
  else:
553
  analysis = paper.get("detailed_analysis", {})
554
  if analysis:
@@ -619,7 +850,11 @@ with col_date:
619
  available_dates = list_available_dates()
620
  selected_date = st.date_input(
621
  "Select date",
622
- value=datetime.strptime(available_dates[0], "%Y-%m-%d").date() if available_dates else (datetime.now(timezone.utc) - timedelta(days=1)).date(),
 
 
 
 
623
  format="YYYY-MM-DD",
624
  label_visibility="collapsed",
625
  )
@@ -630,38 +865,35 @@ with col_lang:
630
 
631
  latest_date = selected_date_str
632
 
633
- # Try HF dataset for selected date
634
- hf_data = pull_from_hf_dataset(target_date=selected_date_str)
635
- if hf_data:
636
- papers = hf_data[selected_date_str]
637
 
638
- # Fall back to local files
639
- if not papers:
640
- json_files = find_json_files()
641
- if selected_date_str in json_files:
642
- papers = load_papers(json_files[selected_date_str])
643
 
644
- # Auto-fetch if no data for selected date
645
- if not papers:
646
- st.toast(f"No cached data for {selected_date_str}. Fetching and summarizing...", icon="🔄")
647
- result_path = crawl_and_summarize(selected_date_str)
648
- if result_path:
649
- papers = load_papers(result_path)
650
 
651
  if not papers:
652
- st.info("No papers found. Please check back later.")
653
  st.stop()
654
 
655
  papers.sort(key=lambda p: p.get("upvotes", 0), reverse=True)
656
 
657
  date_label = latest_date
658
- st.markdown(
659
- f"""<div class="stats-bar">
660
- <div class="stat-item"><span class="stat-value">{date_label}</span></div>
661
- <div class="stat-item"><span class="stat-value">{len(papers)}</span> Papers</div>
662
- </div>""",
663
- unsafe_allow_html=True,
664
- )
665
 
666
  # --- Render paper grid (3 columns) ---
667
  NUM_COLS = 3
@@ -673,3 +905,51 @@ for row_start in range(0, len(papers), NUM_COLS):
673
  break
674
  with col:
675
  render_card(papers[paper_idx], rank=paper_idx + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  /* ---------- global ---------- */
29
  [data-testid="stAppViewContainer"] { background: #f6f8fa; }
30
  [data-testid="stHeader"] { background: #f6f8fa; }
31
+ .block-container { padding-top: 1rem !important; }
32
 
33
  h1, h2, h3, h4 { color: #1f2328 !important; }
34
  p, li, span, label { color: #424a53; }
 
229
  # ---------------------------------------------------------------------------
230
  DATA_DIR = Path(__file__).resolve().parent.parent / "data"
231
  HF_DATASET_REPO = "Elfsong/hf_paper_summary"
232
+ HF_TRENDING_REPO = "Elfsong/hf_paper_trending"
233
 
234
 
235
  def _get_hf_token() -> str | None:
236
  import os
237
+
238
  token = os.getenv("HF_TOKEN", "")
239
  if token:
240
  return token
 
259
  def push_to_hf_dataset(papers: list[dict], date_str: str):
260
  """Push papers list to HuggingFace dataset as a date split."""
261
  from datasets import Dataset
262
+
263
  token = _get_hf_token()
264
  if not token:
265
  return
266
 
267
  rows = []
268
  for p in papers:
269
+ rows.append(
270
+ {
271
+ "title": p.get("title", ""),
272
+ "paper_id": p.get("paper_id", ""),
273
+ "hf_url": p.get("hf_url", ""),
274
+ "arxiv_url": p.get("arxiv_url", ""),
275
+ "pdf_url": p.get("pdf_url", ""),
276
+ "authors": p.get("authors", []),
277
+ "summary": p.get("summary", ""),
278
+ "upvotes": p.get("upvotes", 0),
279
+ "published_at": p.get("published_at", ""),
280
+ "concise_summary": p.get("concise_summary", ""),
281
+ "concise_summary_zh": p.get("concise_summary_zh", ""),
282
+ "detailed_analysis": json.dumps(
283
+ p.get("detailed_analysis", {}), ensure_ascii=False
284
+ ),
285
+ "detailed_analysis_zh": json.dumps(
286
+ p.get("detailed_analysis_zh", {}), ensure_ascii=False
287
+ ),
288
+ }
289
+ )
290
 
291
  ds = Dataset.from_list(rows)
292
  split_name = _date_to_split(date_str)
 
296
  def _list_dataset_splits() -> list[str]:
297
  """List available date splits from the HF dataset repo without loading data."""
298
  from huggingface_hub import HfApi
299
+
300
  token = _get_hf_token()
301
  api = HfApi(token=token)
302
  try:
 
318
  """Load a date split from HF dataset. If target_date is None, load the latest.
319
  Returns {date_str: papers_list}."""
320
  from datasets import load_dataset
321
+
322
  token = _get_hf_token()
323
 
324
  splits = _list_dataset_splits()
 
343
  for row in ds:
344
  paper = dict(row)
345
  paper["detailed_analysis"] = json.loads(paper.get("detailed_analysis", "{}"))
346
+ paper["detailed_analysis_zh"] = json.loads(
347
+ paper.get("detailed_analysis_zh", "{}")
348
+ )
349
  papers.append(paper)
350
  return {date_str: papers}
351
 
 
387
  SSL_CTX = ssl.create_default_context()
388
  try:
389
  import certifi
390
+
391
  SSL_CTX.load_verify_locations(certifi.where())
392
  except ImportError:
393
  SSL_CTX.check_hostname = False
 
416
 
417
  Reply with ONLY valid JSON — no markdown fences, no extra text."""
418
 
419
+ TRENDING_SYSTEM_PROMPT = """\
420
+ You are a senior AI researcher. Given a collection of top papers from the last several days, \
421
+ identify the key research trends and produce a JSON object with exactly six keys:
422
+
423
+ 1. "trending_summary": A 2-3 sentence English summary of the dominant research trends \
424
+ and themes across these papers. Focus on emerging patterns, hot topics, and notable shifts.
425
+
426
+ 2. "trending_summary_zh": The same trending summary translated into Chinese (简体中文).
427
+
428
+ 3. "top_topics": A list of 3-5 short topic labels (e.g. "Multimodal LLMs", "Efficient Fine-tuning") \
429
+ representing the most prominent themes, in English.
430
+
431
+ 4. "top_topics_zh": The same topic labels translated into Chinese (简体中文).
432
+
433
+ 5. "keywords": A list of 5-10 specific technical keywords or terms that appear frequently \
434
+ or are central to the papers (e.g. "LoRA", "RLHF", "diffusion", "chain-of-thought", "MoE", \
435
+ "RAG", "MLLM", "DPO"). Use the canonical technical term, not a paraphrase.
436
+
437
+ 6. "keywords_zh": The same technical keywords translated into Chinese where applicable \
438
+ (keep English acronyms as-is, e.g. "LoRA", "RLHF", "扩散模型", "思维链").
439
+
440
+ Reply with ONLY valid JSON — no markdown fences, no extra text."""
441
+
442
 
443
  def fetch_daily_papers(date_str: str) -> list[dict]:
444
  url = f"{HF_API_URL}?date={date_str}"
 
454
  paper = item.get("paper", {})
455
  paper_id = paper.get("id", "")
456
  authors = [a.get("name", "") for a in paper.get("authors", [])]
457
+ papers.append(
458
+ {
459
+ "title": paper.get("title", ""),
460
+ "paper_id": paper_id,
461
+ "hf_url": f"https://huggingface.co/papers/{paper_id}",
462
+ "arxiv_url": f"https://arxiv.org/abs/{paper_id}",
463
+ "pdf_url": f"https://arxiv.org/pdf/{paper_id}",
464
+ "authors": authors,
465
+ "summary": paper.get("summary", ""),
466
+ "upvotes": paper.get("upvotes", 0),
467
+ "published_at": paper.get("publishedAt", ""),
468
+ }
469
+ )
470
  papers.sort(key=lambda x: x["upvotes"], reverse=True)
471
  return papers
472
 
473
 
474
  def _get_gemini_key() -> str:
475
  import os
476
+
477
  api_key = os.getenv("GEMINI_API_KEY", "")
478
  if api_key:
479
  return api_key
 
482
  for line in env_path.read_text().splitlines():
483
  if line.startswith("GEMINI_API_KEY="):
484
  return line.split("=", 1)[1].strip()
485
+ raise RuntimeError(
486
+ "GEMINI_API_KEY not found. Set it as a HF Space secret or in .env"
487
+ )
488
 
489
 
490
  def summarize_paper_gemini(title: str, abstract: str) -> dict:
491
  from google import genai
492
+
493
  api_key = _get_gemini_key()
494
  client = genai.Client(api_key=api_key)
495
  resp = client.models.generate_content(
 
558
  return output_path
559
 
560
 
561
+ # ---------------------------------------------------------------------------
562
+ # Trending summary
563
+ # ---------------------------------------------------------------------------
564
+ def _load_recent_papers(n_days: int = 5) -> tuple[list[dict], str, str]:
565
+ """Load top papers from the most recent n_days splits.
566
+ Returns (papers, earliest_date, latest_date)."""
567
+ from datasets import load_dataset
568
+
569
+ token = _get_hf_token()
570
+ splits = _list_dataset_splits()[:n_days]
571
+ all_papers = []
572
+ loaded_dates = []
573
+ for split in splits:
574
+ try:
575
+ ds = load_dataset(HF_DATASET_REPO, split=split, token=token)
576
+ date = _split_to_date(split)
577
+ loaded_dates.append(date)
578
+ for row in ds:
579
+ paper = dict(row)
580
+ paper["_date"] = date
581
+ all_papers.append(paper)
582
+ except Exception:
583
+ continue
584
+ all_papers.sort(key=lambda p: p.get("upvotes", 0), reverse=True)
585
+ earliest = min(loaded_dates) if loaded_dates else ""
586
+ latest = max(loaded_dates) if loaded_dates else ""
587
+ return all_papers, earliest, latest
588
+
589
+
590
+ def generate_trending_summary(papers: list[dict]) -> dict:
591
+ """Call Gemini to produce a trending summary from recent papers."""
592
+ from google import genai
593
+
594
+ api_key = _get_gemini_key()
595
+ client = genai.Client(api_key=api_key)
596
+
597
+ # Build input: title + concise_summary + detailed analysis for each paper
598
+ lines = []
599
+ for p in papers:
600
+ date = p.get("_date", "")
601
+ title = p.get("title", "")
602
+ summary = p.get("concise_summary", "") or p.get("summary", "")
603
+ upvotes = p.get("upvotes", 0)
604
+ parts = [f"[{date}] (upvotes: {upvotes}) {title}", summary]
605
+ analysis = p.get("detailed_analysis", {})
606
+ if isinstance(analysis, str):
607
+ try:
608
+ analysis = json.loads(analysis)
609
+ except Exception:
610
+ analysis = {}
611
+ if analysis:
612
+ if analysis.get("summary"):
613
+ parts.append(f"Analysis: {analysis['summary']}")
614
+ pros = analysis.get("pros", [])
615
+ if pros:
616
+ parts.append("Strengths: " + "; ".join(pros))
617
+ cons = analysis.get("cons", [])
618
+ if cons:
619
+ parts.append("Limitations: " + "; ".join(cons))
620
+ lines.append("\n".join(parts))
621
+ content = "\n\n".join(lines)
622
+
623
+ resp = client.models.generate_content(
624
+ model="gemini-2.5-flash",
625
+ contents=content,
626
+ config=genai.types.GenerateContentConfig(
627
+ system_instruction=TRENDING_SYSTEM_PROMPT,
628
+ temperature=0.3,
629
+ max_output_tokens=4096*6,
630
+ response_mime_type="application/json",
631
+ ),
632
+ )
633
+ return json.loads(resp.text)
634
+
635
+
636
+ def push_trending_to_hf(trending: dict, date_str: str):
637
+ """Push trending summary to HF dataset."""
638
+ from datasets import Dataset
639
+
640
+ token = _get_hf_token()
641
+ if not token:
642
+ return
643
+ row = {
644
+ "trending_summary": trending.get("trending_summary", ""),
645
+ "trending_summary_zh": trending.get("trending_summary_zh", ""),
646
+ "top_topics": json.dumps(trending.get("top_topics", []), ensure_ascii=False),
647
+ "top_topics_zh": json.dumps(
648
+ trending.get("top_topics_zh", []), ensure_ascii=False
649
+ ),
650
+ "keywords": json.dumps(trending.get("keywords", []), ensure_ascii=False),
651
+ "keywords_zh": json.dumps(trending.get("keywords_zh", []), ensure_ascii=False),
652
+ "date_range": trending.get("date_range", ""),
653
+ "generated_date": date_str,
654
+ }
655
+ ds = Dataset.from_list([row])
656
+ split_name = _date_to_split(date_str)
657
+ ds.push_to_hub(HF_TRENDING_REPO, split=split_name, token=token)
658
+
659
+
660
+ def pull_trending_from_hf(target_date: str | None = None) -> dict | None:
661
+ """Load trending summary from HF dataset. Returns dict or None."""
662
+ from huggingface_hub import HfApi
663
+ from datasets import load_dataset
664
+
665
+ token = _get_hf_token()
666
+ api = HfApi(token=token)
667
+ try:
668
+ files = api.list_repo_files(HF_TRENDING_REPO, repo_type="dataset")
669
+ except Exception:
670
+ return None
671
+
672
+ splits = set()
673
+ for f in files:
674
+ name = f.split("/")[-1]
675
+ for part in name.replace(".parquet", "").replace(".arrow", "").split("-"):
676
+ if part.startswith("date_"):
677
+ splits.add(part)
678
+ break
679
+ splits = sorted(splits, reverse=True)
680
+ if not splits:
681
+ return None
682
+
683
+ if target_date:
684
+ target_split = _date_to_split(target_date)
685
+ if target_split not in splits:
686
+ return None
687
+ split_to_load = target_split
688
+ else:
689
+ split_to_load = splits[0]
690
+
691
+ try:
692
+ ds = load_dataset(HF_TRENDING_REPO, split=split_to_load, token=token)
693
+ except Exception:
694
+ return None
695
+
696
+ row = dict(ds[0])
697
+ row["top_topics"] = json.loads(row.get("top_topics", "[]"))
698
+ row["top_topics_zh"] = json.loads(row.get("top_topics_zh", "[]"))
699
+ row["keywords"] = json.loads(row.get("keywords", "[]"))
700
+ row["keywords_zh"] = json.loads(row.get("keywords_zh", "[]"))
701
+ return row
702
+
703
+
704
+ def get_or_generate_trending(date_str: str, status=None) -> tuple[dict | None, str]:
705
+ """Get trending from HF cache, or generate and push it.
706
+ Returns (trending_dict, date_range_str)."""
707
+ if status:
708
+ status.info("Checking cached trending summary...")
709
+ trending = pull_trending_from_hf(target_date=date_str)
710
+ if trending:
711
+ date_range = trending.get("date_range", "")
712
+ return trending, date_range
713
+
714
+ # Generate fresh trending
715
+ if status:
716
+ status.info("Loading recent papers for trending analysis...")
717
+ recent_papers, earliest, latest = _load_recent_papers(n_days=5)
718
+ if not recent_papers:
719
+ if status:
720
+ status.warning("No recent papers available for trending analysis.")
721
+ return None, ""
722
+ date_range = f"{earliest} ~ {latest}" if earliest and latest else ""
723
+ try:
724
+ if status:
725
+ status.info("Generating trending summary with Gemini...")
726
+ trending = generate_trending_summary(recent_papers)
727
+ trending["date_range"] = date_range
728
+ except Exception as e:
729
+ if status:
730
+ status.error(f"Trending generation failed: {e}")
731
+ return None, ""
732
+
733
+ try:
734
+ if status:
735
+ status.info("Saving trending summary to HuggingFace...")
736
+ push_trending_to_hf(trending, date_str)
737
+ except Exception as e:
738
+ if status:
739
+ status.warning(f"HF push failed: {e}")
740
+
741
+ return trending, date_range
742
+
743
+
744
  # ---------------------------------------------------------------------------
745
  # Summary dialog
746
  # ---------------------------------------------------------------------------
 
766
 
767
  # TL;DR
768
  if lang:
769
+ concise = paper.get("concise_summary_zh", "") or paper.get(
770
+ "concise_summary", ""
771
+ )
772
  else:
773
  concise = paper.get("concise_summary", "")
774
  if concise:
 
777
 
778
  # Detailed Analysis
779
  if lang:
780
+ analysis = paper.get("detailed_analysis_zh", {}) or paper.get(
781
+ "detailed_analysis", {}
782
+ )
783
  else:
784
  analysis = paper.get("detailed_analysis", {})
785
  if analysis:
 
850
  available_dates = list_available_dates()
851
  selected_date = st.date_input(
852
  "Select date",
853
+ value=(
854
+ datetime.strptime(available_dates[0], "%Y-%m-%d").date()
855
+ if available_dates
856
+ else (datetime.now(timezone.utc) - timedelta(days=1)).date()
857
+ ),
858
  format="YYYY-MM-DD",
859
  label_visibility="collapsed",
860
  )
 
865
 
866
  latest_date = selected_date_str
867
 
868
+ with st.spinner("Loading papers..."):
869
+ hf_data = pull_from_hf_dataset(target_date=selected_date_str)
870
+ if hf_data:
871
+ papers = hf_data[selected_date_str]
872
 
873
+ if not papers:
874
+ json_files = find_json_files()
875
+ if selected_date_str in json_files:
876
+ papers = load_papers(json_files[selected_date_str])
 
877
 
878
+ if not papers:
879
+ result_path = crawl_and_summarize(selected_date_str)
880
+ if result_path:
881
+ papers = load_papers(result_path)
 
 
882
 
883
  if not papers:
884
+ st.error("No papers found. Please check back later.")
885
  st.stop()
886
 
887
  papers.sort(key=lambda p: p.get("upvotes", 0), reverse=True)
888
 
889
  date_label = latest_date
890
+ lang = st.session_state.get("global_lang_toggle", False)
891
+
892
+ # --- Trending status (spinner under title, filled later) ---
893
+ trending_spinner = st.empty()
894
+
895
+ # --- Trending summary placeholder (filled after papers render) ---
896
+ trending_placeholder = st.empty()
897
 
898
  # --- Render paper grid (3 columns) ---
899
  NUM_COLS = 3
 
905
  break
906
  with col:
907
  render_card(papers[paper_idx], rank=paper_idx + 1)
908
+
909
+ # --- Trending summary (loaded after papers are displayed) ---
910
+ with trending_spinner.container():
911
+ with st.spinner("Loading trending summary..."):
912
+ trending, trending_date_range = get_or_generate_trending(
913
+ selected_date_str, status=None
914
+ )
915
+ trending_spinner.empty()
916
+
917
+ if trending:
918
+ if lang:
919
+ summary_text = trending.get("trending_summary_zh", "") or trending.get(
920
+ "trending_summary", ""
921
+ )
922
+ topics = trending.get("top_topics_zh", []) or trending.get("top_topics", [])
923
+ keywords = trending.get("keywords_zh", []) or trending.get("keywords", [])
924
+ else:
925
+ summary_text = trending.get("trending_summary", "")
926
+ topics = trending.get("top_topics", [])
927
+ keywords = trending.get("keywords", [])
928
+ topics_html = " ".join(
929
+ f'<span style="background:#eef1f5;padding:2px 10px;border-radius:12px;'
930
+ f'font-size:12px;font-weight:600;color:#2563eb;">{t}</span>'
931
+ for t in topics
932
+ )
933
+ keywords_html = " ".join(
934
+ f'<span style="background:#fff8e1;padding:2px 10px;border-radius:12px;'
935
+ f'font-size:11px;font-weight:500;color:#9a6700;border:1px solid #f0d060;">{k}</span>'
936
+ for k in keywords
937
+ )
938
+ date_range_label = (
939
+ f'<span style="font-size:12px;color:#9a6700;font-weight:600;">({trending_date_range})</span>'
940
+ if trending_date_range
941
+ else ""
942
+ )
943
+ trending_placeholder.markdown(
944
+ f"""<div class="stats-bar">
945
+ <div style="flex:1;min-width:200px;">
946
+ <div style="font-size:13px;color:#656d76;margin-bottom:4px;">
947
+ {"🔥 趋势" if lang else "🔥 Trending"} {date_range_label}
948
+ </div>
949
+ <div style="font-size:13px;color:#424a53;line-height:1.5;">{summary_text}</div>
950
+ <div style="display:flex;gap:6px;flex-wrap:wrap;margin-top:8px;">{topics_html}</div>
951
+ <div style="display:flex;gap:6px;flex-wrap:wrap;margin-top:8px;">{keywords_html}</div>
952
+ </div>
953
+ </div>""",
954
+ unsafe_allow_html=True,
955
+ )