seawolf2357 commited on
Commit
033dc30
·
verified ·
1 Parent(s): 0e3c1f2

Update cache_db.py

Browse files
Files changed (1) hide show
  1. cache_db.py +26 -2
cache_db.py CHANGED
@@ -511,8 +511,27 @@ def get_content_cache() -> ContentVectorCache:
511
  return _content_cache_instance
512
  _indexing_thread = None
513
  _indexing_stop_flag = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  def background_content_indexer():
515
- """백그라운드 본문 인덱싱 (서비스 무중단)"""
516
  global _indexing_stop_flag
517
  from file_api import download_file, extract_text_from_file
518
  content_cache = get_content_cache()
@@ -526,10 +545,15 @@ def background_content_indexer():
526
  all_items = meta_cache.get_all()
527
  indexed_ids = content_cache.get_indexed_ids()
528
  items_to_index = []
 
529
  for item in all_items:
530
  pblanc_id = safe_str(item.get("pblancId") or item.get("seq"))
531
  print_url = safe_str(item.get("printFlpthNm"))
532
  print_name = safe_str(item.get("printFileNm"))
 
 
 
 
533
  if pblanc_id and print_url and print_name:
534
  if pblanc_id not in indexed_ids:
535
  items_to_index.append({
@@ -542,7 +566,7 @@ def background_content_indexer():
542
  content_cache.index_status["progress_total"] = total
543
  content_cache.index_status["progress_current"] = 0
544
  content_cache._save_index_status()
545
- logger.info(f"Starting background content indexing: {total} items to process")
546
  for i, item in enumerate(items_to_index):
547
  if _indexing_stop_flag:
548
  logger.info("Content indexing stopped by flag")
 
511
  return _content_cache_instance
512
  _indexing_thread = None
513
  _indexing_stop_flag = False
514
+ def is_ongoing_check(req_dt: str) -> bool:
515
+ """진행중 공고인지 확인 (cache_db 내부용)"""
516
+ import re
517
+ if not req_dt:
518
+ return True
519
+ if any(kw in req_dt for kw in ["소진", "추후", "상시", "별도"]):
520
+ return True
521
+ try:
522
+ if "~" in req_dt:
523
+ end_date_str = req_dt.split("~")[-1].strip()
524
+ else:
525
+ end_date_str = req_dt.strip()
526
+ end_date_str = re.sub(r'[^0-9]', '', end_date_str)
527
+ if len(end_date_str) >= 8:
528
+ deadline = datetime.strptime(end_date_str[:8], "%Y%m%d")
529
+ return deadline >= datetime.now()
530
+ except:
531
+ pass
532
+ return True
533
  def background_content_indexer():
534
+ """백그라운드 본문 인덱싱 (서비스 무중단, 진행중 공고만)"""
535
  global _indexing_stop_flag
536
  from file_api import download_file, extract_text_from_file
537
  content_cache = get_content_cache()
 
545
  all_items = meta_cache.get_all()
546
  indexed_ids = content_cache.get_indexed_ids()
547
  items_to_index = []
548
+ skipped_expired = 0
549
  for item in all_items:
550
  pblanc_id = safe_str(item.get("pblancId") or item.get("seq"))
551
  print_url = safe_str(item.get("printFlpthNm"))
552
  print_name = safe_str(item.get("printFileNm"))
553
+ req_dt = safe_str(item.get("reqstDt") or item.get("reqstBeginEndDe"))
554
+ if not is_ongoing_check(req_dt):
555
+ skipped_expired += 1
556
+ continue
557
  if pblanc_id and print_url and print_name:
558
  if pblanc_id not in indexed_ids:
559
  items_to_index.append({
 
566
  content_cache.index_status["progress_total"] = total
567
  content_cache.index_status["progress_current"] = 0
568
  content_cache._save_index_status()
569
+ logger.info(f"Starting background content indexing: {total} items (skipped {skipped_expired} expired)")
570
  for i, item in enumerate(items_to_index):
571
  if _indexing_stop_flag:
572
  logger.info("Content indexing stopped by flag")