Elfsong commited on
Commit
58a637f
·
1 Parent(s): c293d96

chore: initialize virtual environment and install project dependencies

Browse files
src/daily_retrieve.py CHANGED
@@ -31,7 +31,7 @@ load_dotenv(ROOT / ".env")
31
  # ---------------------------------------------------------------------------
32
  DATA_DIR = ROOT / "data"
33
  HF_DATASET_REPO = "Elfsong/hf_paper_summary"
34
- HF_TRENDING_REPO = "Elfsong/hf_paper_trending"
35
  HF_API_URL = "https://huggingface.co/api/daily_papers"
36
 
37
  SSL_CTX = ssl.create_default_context()
 
31
  # ---------------------------------------------------------------------------
32
  DATA_DIR = ROOT / "data"
33
  HF_DATASET_REPO = "Elfsong/hf_paper_summary"
34
+ HF_TRENDING_REPO = "Elfsong/hf_paper_daily_trending"
35
  HF_API_URL = "https://huggingface.co/api/daily_papers"
36
 
37
  SSL_CTX = ssl.create_default_context()
src/monthly_retrieve.py CHANGED
@@ -195,26 +195,33 @@ def _build_paper_prompt_content(papers: list[dict]) -> str:
195
  # ---------------------------------------------------------------------------
196
  # Gemini call
197
  # ---------------------------------------------------------------------------
198
- def generate_monthly_trending(papers: list[dict]) -> dict:
199
  from google import genai
200
  api_key = _get_env("GEMINI_API_KEY")
201
  if not api_key:
202
  raise RuntimeError("GEMINI_API_KEY not set")
203
  content = _build_paper_prompt_content(papers)
204
  client = genai.Client(api_key=api_key)
205
- resp = client.models.generate_content(
206
- model="gemini-3.1-pro-preview",
207
- contents=content,
208
- config=genai.types.GenerateContentConfig(
209
- system_instruction=MONTHLY_TRENDING_SYSTEM_PROMPT,
210
- temperature=0.3,
211
- max_output_tokens=4096 * 6,
212
- response_mime_type="application/json",
213
- ),
214
- )
215
- decoder = json.JSONDecoder()
216
- result, _ = decoder.raw_decode(resp.text.strip())
217
- return result
 
 
 
 
 
 
 
218
 
219
 
220
  # ---------------------------------------------------------------------------
@@ -461,9 +468,10 @@ def run(month_str: str, no_push: bool = False):
461
  topics = trending.get("top_topics", [])
462
  display.complete_step(3, f"{len(topics)} topics, {len(trending.get('keywords', []))} keywords")
463
  except Exception as e:
464
- display.error_step(3, str(e)[:60])
465
  display.skip_step(4)
466
  display.finish()
 
467
  return
468
 
469
  # --- Step 4: Push ---
@@ -475,7 +483,10 @@ def run(month_str: str, no_push: bool = False):
475
  push_monthly_trending_to_hf(trending, month_str)
476
  display.complete_step(4, f"split={target_split}")
477
  except Exception as e:
478
- display.error_step(4, str(e)[:60])
 
 
 
479
 
480
  display.finish()
481
 
 
195
  # ---------------------------------------------------------------------------
196
  # Gemini call
197
  # ---------------------------------------------------------------------------
198
+ def generate_monthly_trending(papers: list[dict], max_retries: int = 3) -> dict:
199
  from google import genai
200
  api_key = _get_env("GEMINI_API_KEY")
201
  if not api_key:
202
  raise RuntimeError("GEMINI_API_KEY not set")
203
  content = _build_paper_prompt_content(papers)
204
  client = genai.Client(api_key=api_key)
205
+ for attempt in range(max_retries):
206
+ try:
207
+ resp = client.models.generate_content(
208
+ model="gemini-3.1-pro-preview",
209
+ contents=content,
210
+ config=genai.types.GenerateContentConfig(
211
+ system_instruction=MONTHLY_TRENDING_SYSTEM_PROMPT,
212
+ temperature=0.3,
213
+ max_output_tokens=65536,
214
+ response_mime_type="application/json",
215
+ ),
216
+ )
217
+ decoder = json.JSONDecoder()
218
+ result, _ = decoder.raw_decode(resp.text.strip())
219
+ return result
220
+ except Exception as e:
221
+ if attempt < max_retries - 1:
222
+ time.sleep((attempt + 1) * 5)
223
+ else:
224
+ raise
225
 
226
 
227
  # ---------------------------------------------------------------------------
 
468
  topics = trending.get("top_topics", [])
469
  display.complete_step(3, f"{len(topics)} topics, {len(trending.get('keywords', []))} keywords")
470
  except Exception as e:
471
+ display.error_step(3, "failed")
472
  display.skip_step(4)
473
  display.finish()
474
+ print(f"\n {_YELLOW}{_BOLD}Error:{_RESET} {e}\n")
475
  return
476
 
477
  # --- Step 4: Push ---
 
483
  push_monthly_trending_to_hf(trending, month_str)
484
  display.complete_step(4, f"split={target_split}")
485
  except Exception as e:
486
+ display.error_step(4, "failed")
487
+ display.finish()
488
+ print(f"\n {_YELLOW}{_BOLD}Error:{_RESET} {e}\n")
489
+ return
490
 
491
  display.finish()
492
 
src/streamlit_app.py CHANGED
@@ -280,7 +280,7 @@ div[data-testid="stHorizontalBlock"] > div[data-testid="stColumn"] > div > div[d
280
  # ---------------------------------------------------------------------------
281
  DATA_DIR = Path(__file__).resolve().parent.parent / "data"
282
  HF_DATASET_REPO = "Elfsong/hf_paper_summary"
283
- HF_TRENDING_REPO = "Elfsong/hf_paper_trending"
284
  HF_MONTHLY_TRENDING_REPO = "Elfsong/hf_paper_monthly_trending"
285
  MONTH_RANGE = 6
286
 
@@ -1452,13 +1452,12 @@ def _load_trending_and_render(
1452
  # ---------------------------------------------------------------------------
1453
  yesterday_str = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%d")
1454
 
1455
- # --- Tabs: Daily / Monthly ---
1456
- tab_daily, tab_monthly = st.tabs(["Daily", "Monthly"])
1457
-
1458
  today = datetime.now(timezone.utc).date()
 
1459
 
1460
  # ---- Daily tab ----
1461
- with tab_daily:
1462
  col_date_d, col_spacer_d, col_lang_d = st.columns([0.15, 0.75, 0.1], vertical_alignment="center")
1463
  with col_date_d:
1464
  available_dates = list_available_dates()
@@ -1514,7 +1513,7 @@ with tab_daily:
1514
  _render_papers_section(papers, lang, selected_date_str, "daily")
1515
 
1516
  # ---- Monthly tab ----
1517
- with tab_monthly:
1518
  # Discover available monthly trending splits on HF
1519
  _monthly_splits_key = "monthly_available_splits"
1520
  if _monthly_splits_key not in st.session_state:
 
280
  # ---------------------------------------------------------------------------
281
  DATA_DIR = Path(__file__).resolve().parent.parent / "data"
282
  HF_DATASET_REPO = "Elfsong/hf_paper_summary"
283
+ HF_TRENDING_REPO = "Elfsong/hf_paper_daily_trending"
284
  HF_MONTHLY_TRENDING_REPO = "Elfsong/hf_paper_monthly_trending"
285
  MONTH_RANGE = 6
286
 
 
1452
  # ---------------------------------------------------------------------------
1453
  yesterday_str = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%d")
1454
 
1455
+ # --- Tab selection ---
 
 
1456
  today = datetime.now(timezone.utc).date()
1457
+ active_tab = st.segmented_control("", ["Daily", "Monthly"], default="Daily", key="active_tab")
1458
 
1459
  # ---- Daily tab ----
1460
+ if active_tab == "Daily":
1461
  col_date_d, col_spacer_d, col_lang_d = st.columns([0.15, 0.75, 0.1], vertical_alignment="center")
1462
  with col_date_d:
1463
  available_dates = list_available_dates()
 
1513
  _render_papers_section(papers, lang, selected_date_str, "daily")
1514
 
1515
  # ---- Monthly tab ----
1516
+ elif active_tab == "Monthly":
1517
  # Discover available monthly trending splits on HF
1518
  _monthly_splits_key = "monthly_available_splits"
1519
  if _monthly_splits_key not in st.session_state: