Spaces:
Running
Running
chore: initialize virtual environment and install project dependencies
Browse files- src/daily_retrieve.py +1 -1
- src/monthly_retrieve.py +27 -16
- src/streamlit_app.py +5 -6
src/daily_retrieve.py
CHANGED
|
@@ -31,7 +31,7 @@ load_dotenv(ROOT / ".env")
|
|
| 31 |
# ---------------------------------------------------------------------------
|
| 32 |
DATA_DIR = ROOT / "data"
|
| 33 |
HF_DATASET_REPO = "Elfsong/hf_paper_summary"
|
| 34 |
-
HF_TRENDING_REPO = "Elfsong/
|
| 35 |
HF_API_URL = "https://huggingface.co/api/daily_papers"
|
| 36 |
|
| 37 |
SSL_CTX = ssl.create_default_context()
|
|
|
|
| 31 |
# ---------------------------------------------------------------------------
|
| 32 |
DATA_DIR = ROOT / "data"
|
| 33 |
HF_DATASET_REPO = "Elfsong/hf_paper_summary"
|
| 34 |
+
HF_TRENDING_REPO = "Elfsong/hf_paper_daily_trending"
|
| 35 |
HF_API_URL = "https://huggingface.co/api/daily_papers"
|
| 36 |
|
| 37 |
SSL_CTX = ssl.create_default_context()
|
src/monthly_retrieve.py
CHANGED
|
@@ -195,26 +195,33 @@ def _build_paper_prompt_content(papers: list[dict]) -> str:
|
|
| 195 |
# ---------------------------------------------------------------------------
|
| 196 |
# Gemini call
|
| 197 |
# ---------------------------------------------------------------------------
|
| 198 |
-
def generate_monthly_trending(papers: list[dict]) -> dict:
|
| 199 |
from google import genai
|
| 200 |
api_key = _get_env("GEMINI_API_KEY")
|
| 201 |
if not api_key:
|
| 202 |
raise RuntimeError("GEMINI_API_KEY not set")
|
| 203 |
content = _build_paper_prompt_content(papers)
|
| 204 |
client = genai.Client(api_key=api_key)
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
|
| 220 |
# ---------------------------------------------------------------------------
|
|
@@ -461,9 +468,10 @@ def run(month_str: str, no_push: bool = False):
|
|
| 461 |
topics = trending.get("top_topics", [])
|
| 462 |
display.complete_step(3, f"{len(topics)} topics, {len(trending.get('keywords', []))} keywords")
|
| 463 |
except Exception as e:
|
| 464 |
-
display.error_step(3,
|
| 465 |
display.skip_step(4)
|
| 466 |
display.finish()
|
|
|
|
| 467 |
return
|
| 468 |
|
| 469 |
# --- Step 4: Push ---
|
|
@@ -475,7 +483,10 @@ def run(month_str: str, no_push: bool = False):
|
|
| 475 |
push_monthly_trending_to_hf(trending, month_str)
|
| 476 |
display.complete_step(4, f"split={target_split}")
|
| 477 |
except Exception as e:
|
| 478 |
-
display.error_step(4,
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
display.finish()
|
| 481 |
|
|
|
|
| 195 |
# ---------------------------------------------------------------------------
|
| 196 |
# Gemini call
|
| 197 |
# ---------------------------------------------------------------------------
|
| 198 |
+
def generate_monthly_trending(papers: list[dict], max_retries: int = 3) -> dict:
|
| 199 |
from google import genai
|
| 200 |
api_key = _get_env("GEMINI_API_KEY")
|
| 201 |
if not api_key:
|
| 202 |
raise RuntimeError("GEMINI_API_KEY not set")
|
| 203 |
content = _build_paper_prompt_content(papers)
|
| 204 |
client = genai.Client(api_key=api_key)
|
| 205 |
+
for attempt in range(max_retries):
|
| 206 |
+
try:
|
| 207 |
+
resp = client.models.generate_content(
|
| 208 |
+
model="gemini-3.1-pro-preview",
|
| 209 |
+
contents=content,
|
| 210 |
+
config=genai.types.GenerateContentConfig(
|
| 211 |
+
system_instruction=MONTHLY_TRENDING_SYSTEM_PROMPT,
|
| 212 |
+
temperature=0.3,
|
| 213 |
+
max_output_tokens=65536,
|
| 214 |
+
response_mime_type="application/json",
|
| 215 |
+
),
|
| 216 |
+
)
|
| 217 |
+
decoder = json.JSONDecoder()
|
| 218 |
+
result, _ = decoder.raw_decode(resp.text.strip())
|
| 219 |
+
return result
|
| 220 |
+
except Exception as e:
|
| 221 |
+
if attempt < max_retries - 1:
|
| 222 |
+
time.sleep((attempt + 1) * 5)
|
| 223 |
+
else:
|
| 224 |
+
raise
|
| 225 |
|
| 226 |
|
| 227 |
# ---------------------------------------------------------------------------
|
|
|
|
| 468 |
topics = trending.get("top_topics", [])
|
| 469 |
display.complete_step(3, f"{len(topics)} topics, {len(trending.get('keywords', []))} keywords")
|
| 470 |
except Exception as e:
|
| 471 |
+
display.error_step(3, "failed")
|
| 472 |
display.skip_step(4)
|
| 473 |
display.finish()
|
| 474 |
+
print(f"\n {_YELLOW}{_BOLD}Error:{_RESET} {e}\n")
|
| 475 |
return
|
| 476 |
|
| 477 |
# --- Step 4: Push ---
|
|
|
|
| 483 |
push_monthly_trending_to_hf(trending, month_str)
|
| 484 |
display.complete_step(4, f"split={target_split}")
|
| 485 |
except Exception as e:
|
| 486 |
+
display.error_step(4, "failed")
|
| 487 |
+
display.finish()
|
| 488 |
+
print(f"\n {_YELLOW}{_BOLD}Error:{_RESET} {e}\n")
|
| 489 |
+
return
|
| 490 |
|
| 491 |
display.finish()
|
| 492 |
|
src/streamlit_app.py
CHANGED
|
@@ -280,7 +280,7 @@ div[data-testid="stHorizontalBlock"] > div[data-testid="stColumn"] > div > div[d
|
|
| 280 |
# ---------------------------------------------------------------------------
|
| 281 |
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
|
| 282 |
HF_DATASET_REPO = "Elfsong/hf_paper_summary"
|
| 283 |
-
HF_TRENDING_REPO = "Elfsong/
|
| 284 |
HF_MONTHLY_TRENDING_REPO = "Elfsong/hf_paper_monthly_trending"
|
| 285 |
MONTH_RANGE = 6
|
| 286 |
|
|
@@ -1452,13 +1452,12 @@ def _load_trending_and_render(
|
|
| 1452 |
# ---------------------------------------------------------------------------
|
| 1453 |
yesterday_str = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%d")
|
| 1454 |
|
| 1455 |
-
# ---
|
| 1456 |
-
tab_daily, tab_monthly = st.tabs(["Daily", "Monthly"])
|
| 1457 |
-
|
| 1458 |
today = datetime.now(timezone.utc).date()
|
|
|
|
| 1459 |
|
| 1460 |
# ---- Daily tab ----
|
| 1461 |
-
|
| 1462 |
col_date_d, col_spacer_d, col_lang_d = st.columns([0.15, 0.75, 0.1], vertical_alignment="center")
|
| 1463 |
with col_date_d:
|
| 1464 |
available_dates = list_available_dates()
|
|
@@ -1514,7 +1513,7 @@ with tab_daily:
|
|
| 1514 |
_render_papers_section(papers, lang, selected_date_str, "daily")
|
| 1515 |
|
| 1516 |
# ---- Monthly tab ----
|
| 1517 |
-
|
| 1518 |
# Discover available monthly trending splits on HF
|
| 1519 |
_monthly_splits_key = "monthly_available_splits"
|
| 1520 |
if _monthly_splits_key not in st.session_state:
|
|
|
|
| 280 |
# ---------------------------------------------------------------------------
|
| 281 |
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
|
| 282 |
HF_DATASET_REPO = "Elfsong/hf_paper_summary"
|
| 283 |
+
HF_TRENDING_REPO = "Elfsong/hf_paper_daily_trending"
|
| 284 |
HF_MONTHLY_TRENDING_REPO = "Elfsong/hf_paper_monthly_trending"
|
| 285 |
MONTH_RANGE = 6
|
| 286 |
|
|
|
|
| 1452 |
# ---------------------------------------------------------------------------
|
| 1453 |
yesterday_str = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%d")
|
| 1454 |
|
| 1455 |
+
# --- Tab selection ---
|
|
|
|
|
|
|
| 1456 |
today = datetime.now(timezone.utc).date()
|
| 1457 |
+
active_tab = st.segmented_control("", ["Daily", "Monthly"], default="Daily", key="active_tab")
|
| 1458 |
|
| 1459 |
# ---- Daily tab ----
|
| 1460 |
+
if active_tab == "Daily":
|
| 1461 |
col_date_d, col_spacer_d, col_lang_d = st.columns([0.15, 0.75, 0.1], vertical_alignment="center")
|
| 1462 |
with col_date_d:
|
| 1463 |
available_dates = list_available_dates()
|
|
|
|
| 1513 |
_render_papers_section(papers, lang, selected_date_str, "daily")
|
| 1514 |
|
| 1515 |
# ---- Monthly tab ----
|
| 1516 |
+
elif active_tab == "Monthly":
|
| 1517 |
# Discover available monthly trending splits on HF
|
| 1518 |
_monthly_splits_key = "monthly_available_splits"
|
| 1519 |
if _monthly_splits_key not in st.session_state:
|