grantforge-api / backend /core /zus_client.py
GrantForge Bot
Deploy to Hugging Face
afd56bc
import os
import json
import logging
import hashlib
from datetime import datetime, timedelta, timezone
from typing import Optional
from pathlib import Path
import httpx
logger = logging.getLogger(__name__)
CACHE_DIR = Path(__file__).parent.parent / "cache"
CACHE_DIR.mkdir(exist_ok=True)
ZUS_CACHE_FILE = CACHE_DIR / "zus_nabory.json"
ZUS_CACHE_TTL_HOURS = 24
# ZUS zazwyczaj organizuje Konkurs na Dofinansowanie BHP
ZUS_BHP_URL = "https://bip.zus.pl/konkurs-bhp"
class ZUSClient:
"""
Klient pobieraj膮cy aktualne programy wsparcia z ZUS (g艂贸wnie Dofinansowanie na popraw臋 BHP).
"""
def _load_cache(self) -> Optional[dict]:
if not ZUS_CACHE_FILE.exists():
return None
try:
with open(ZUS_CACHE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
fetched_at = datetime.fromisoformat(data.get("fetched_at", "2000-01-01"))
if fetched_at.tzinfo is None:
fetched_at = fetched_at.replace(tzinfo=timezone.utc)
if datetime.now(timezone.utc) - fetched_at < timedelta(hours=ZUS_CACHE_TTL_HOURS):
return data
except Exception as e:
logger.warning(f"B艂膮d odczytu ZUS cache: {e}")
return None
def _save_cache(self, nabory: list) -> None:
try:
payload = {
"fetched_at": datetime.now(timezone.utc).isoformat(),
"nabory": nabory,
}
with open(ZUS_CACHE_FILE, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
except Exception as e:
logger.warning(f"B艂膮d zapisu ZUS cache: {e}")
async def _fetch_live(self) -> list:
from core.date_utils import filter_outdated_grants
import os
import requests
logger.info("Rozpoczynam pobieranie na 偶ywo nabor贸w ZUS...")
api_key = os.getenv("FIRECRAWL_API_KEY")
all_grants = []
if api_key:
logger.info("U偶ywam Firecrawl do omini臋cia zabezpiecze艅 ZUS...")
try:
resp = requests.post(
"https://api.firecrawl.dev/v1/scrape",
headers={"Authorization": f"Bearer {api_key}"},
json={"url": ZUS_BHP_URL, "formats": ["markdown"]},
timeout=30.0
)
if resp.status_code == 200:
data = resp.json()
md = data.get("data", {}).get("markdown", "")
if md:
all_grants = await self._parse_firecrawl_markdown(md)
logger.info(f"Firecrawl zwr贸ci艂 {len(all_grants)} nabor贸w z ZUS.")
else:
logger.warning(f"B艂膮d Firecrawl API (ZUS): {resp.status_code} - {resp.text}")
except Exception as e:
logger.error(f"Wyj膮tek podczas wywo艂ania Firecrawl API (ZUS): {e}")
else:
logger.warning("Brak klucza FIRECRAWL_API_KEY. Brak nabor贸w z ZUS.")
# Filtrowanie przestarza艂ych dat
active_grants = filter_outdated_grants(all_grants)
return active_grants
async def _parse_firecrawl_markdown(self, md: str) -> list:
"""Skanuje markdown za pomoc膮 LLM w celu wydobycia listy nabor贸w ZUS."""
try:
from core.llm_router import get_llm
from pydantic import BaseModel, Field
from typing import List
class Grant(BaseModel):
name: str = Field(description="Tytu艂 konkursu/naboru ZUS")
deadline: str = Field(default="", description="Termin sk艂adania wniosk贸w (deadline) w formacie YYYY-MM-DD. Je艣li brak, zostaw puste.")
class GrantsList(BaseModel):
grants: List[Grant]
llm = get_llm("fast").with_structured_output(GrantsList)
md_subset = md[:10000]
prompt = f"Wydob膮d藕 list臋 aktualnych konkurs贸w lub dofinansowa艅 ZUS z poni偶szego tekstu Markdown:\n\n{md_subset}"
result = await llm.ainvoke(prompt)
nabory = []
for g in result.grants:
uid = hashlib.md5(g.name.encode()).hexdigest()[:12]
nabory.append({
"id": uid,
"name": g.name,
"program": "ZUS",
"type": "Bezpiecze艅stwo pracy",
"status": "active",
"url": ZUS_BHP_URL,
"deadline": g.deadline,
"max_dofinansowanie_pln": 300000,
"min_dofinansowanie_pln": 10000,
"dofinansowanie_pct_max": 80,
"eligible_regions": ["Ca艂a Polska"],
"eligible_company_sizes": ["mikro", "ma艂e", "艣rednie", "du偶e"],
"description": "Program wsparcia ZUS dla p艂atnik贸w sk艂adek na inwestycje zmniejszaj膮ce ryzyko wypadk贸w przy pracy (BHP).",
"legal_source": "Regulamin Konkursu na dofinansowanie przez ZUS",
"source": "zus_scrape",
"fetched_at": datetime.now(timezone.utc).isoformat(),
})
return nabory
except Exception as e:
logger.warning(f"B艂膮d parsowania markdowna z LLM (ZUS): {e}")
return []
def _enrich_urls(self, nabory: list) -> None:
import urllib.parse
for n in nabory:
q_gov = n.get("name", "")
if "official_doc_url" not in n:
n["official_doc_url"] = f"https://bip.zus.pl/wyszukiwarka?query={urllib.parse.quote(q_gov)}"
if "eurlex_url" not in n:
n["eurlex_url"] = "" # Brak zwi膮zku ZUS z prawem UE
async def get_active_nabory(self, force_refresh: bool = False) -> list:
if not force_refresh:
cached = self._load_cache()
if cached:
nabory = cached["nabory"]
self._enrich_urls(nabory)
return nabory
nabory = await self._fetch_live()
self._enrich_urls(nabory)
self._save_cache(nabory)
return nabory
zus_client = ZUSClient()