Mbonea commited on
Commit
ab5728a
·
1 Parent(s): d4e1918

Skip existing bond details during refresh

Browse files
Files changed (2) hide show
  1. App/routers/bonds/utils.py +9 -1
  2. App/scheduler.py +9 -2
App/routers/bonds/utils.py CHANGED
@@ -230,7 +230,11 @@ class BondDataScraper:
230
  isin=json_data.get("ISIN")
231
  )
232
 
233
- async def scrape_all_bond_data(self) -> AsyncGenerator[BondCreate, None]:
 
 
 
 
234
  async with AsyncSession() as session:
235
  # First GET request to establish session cookies if necessary
236
  await session.get(self.TBONDS_URL, headers=self.headers, impersonate=self.IMPERSONATE_PROFILE,timeout=60*5)
@@ -245,6 +249,10 @@ class BondDataScraper:
245
  print(f"Found {len(initial_bond_rows)} initial bond rows from main table.")
246
 
247
  for row_data in initial_bond_rows:
 
 
 
 
248
  print(f"Fetching details for au_no: {row_data['au_no']}...")
249
  await asyncio.sleep(0.5) # Small delay to be polite
250
 
 
230
  isin=json_data.get("ISIN")
231
  )
232
 
233
+ async def scrape_all_bond_data(
234
+ self, existing_auction_numbers: Optional[set[int]] = None
235
+ ) -> AsyncGenerator[BondCreate, None]:
236
+ existing_auction_numbers = existing_auction_numbers or set()
237
+
238
  async with AsyncSession() as session:
239
  # First GET request to establish session cookies if necessary
240
  await session.get(self.TBONDS_URL, headers=self.headers, impersonate=self.IMPERSONATE_PROFILE,timeout=60*5)
 
249
  print(f"Found {len(initial_bond_rows)} initial bond rows from main table.")
250
 
251
  for row_data in initial_bond_rows:
252
+ if row_data["au_no"] in existing_auction_numbers:
253
+ print(f"Skipping existing bond details for au_no: {row_data['au_no']}")
254
+ continue
255
+
256
  print(f"Fetching details for au_no: {row_data['au_no']}...")
257
  await asyncio.sleep(0.5) # Small delay to be polite
258
 
App/scheduler.py CHANGED
@@ -117,13 +117,19 @@ async def refresh_bonds() -> None:
117
  from App.routers.bonds.models import Bond
118
  from App.routers.bonds.utils import BondDataScraper
119
 
120
- logger.info("[scheduler] Bond refresh: scraping bot.go.tz/TBonds")
 
 
 
 
121
  scraper = BondDataScraper()
122
  created = updated = failed = 0
123
  processed_isins: set[str] = set()
124
 
125
  try:
126
- async for bond_data in scraper.scrape_all_bond_data():
 
 
127
  if not bond_data:
128
  failed += 1
129
  continue
@@ -150,6 +156,7 @@ async def refresh_bonds() -> None:
150
  else:
151
  await Bond.create(**bond_data.dict())
152
  created += 1
 
153
 
154
  if bond_data.isin:
155
  processed_isins.add(bond_data.isin)
 
117
  from App.routers.bonds.models import Bond
118
  from App.routers.bonds.utils import BondDataScraper
119
 
120
+ existing_auction_numbers = set(await Bond.all().values_list("auction_number", flat=True))
121
+ logger.info(
122
+ "[scheduler] Bond refresh: checking bot.go.tz/TBonds, %s auction(s) already stored",
123
+ len(existing_auction_numbers),
124
+ )
125
  scraper = BondDataScraper()
126
  created = updated = failed = 0
127
  processed_isins: set[str] = set()
128
 
129
  try:
130
+ async for bond_data in scraper.scrape_all_bond_data(
131
+ existing_auction_numbers=existing_auction_numbers
132
+ ):
133
  if not bond_data:
134
  failed += 1
135
  continue
 
156
  else:
157
  await Bond.create(**bond_data.dict())
158
  created += 1
159
+ existing_auction_numbers.add(bond_data.auction_number)
160
 
161
  if bond_data.isin:
162
  processed_isins.add(bond_data.isin)