Update cache_db.py
Browse files- cache_db.py +26 -2
cache_db.py
CHANGED
|
@@ -511,8 +511,27 @@ def get_content_cache() -> ContentVectorCache:
|
|
| 511 |
return _content_cache_instance
|
| 512 |
_indexing_thread = None
|
| 513 |
_indexing_stop_flag = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
def background_content_indexer():
|
| 515 |
-
"""백그라운드 본문 인덱싱 (서비스
|
| 516 |
global _indexing_stop_flag
|
| 517 |
from file_api import download_file, extract_text_from_file
|
| 518 |
content_cache = get_content_cache()
|
|
@@ -526,10 +545,15 @@ def background_content_indexer():
|
|
| 526 |
all_items = meta_cache.get_all()
|
| 527 |
indexed_ids = content_cache.get_indexed_ids()
|
| 528 |
items_to_index = []
|
|
|
|
| 529 |
for item in all_items:
|
| 530 |
pblanc_id = safe_str(item.get("pblancId") or item.get("seq"))
|
| 531 |
print_url = safe_str(item.get("printFlpthNm"))
|
| 532 |
print_name = safe_str(item.get("printFileNm"))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
if pblanc_id and print_url and print_name:
|
| 534 |
if pblanc_id not in indexed_ids:
|
| 535 |
items_to_index.append({
|
|
@@ -542,7 +566,7 @@ def background_content_indexer():
|
|
| 542 |
content_cache.index_status["progress_total"] = total
|
| 543 |
content_cache.index_status["progress_current"] = 0
|
| 544 |
content_cache._save_index_status()
|
| 545 |
-
logger.info(f"Starting background content indexing: {total} items
|
| 546 |
for i, item in enumerate(items_to_index):
|
| 547 |
if _indexing_stop_flag:
|
| 548 |
logger.info("Content indexing stopped by flag")
|
|
|
|
| 511 |
return _content_cache_instance
|
| 512 |
_indexing_thread = None
|
| 513 |
_indexing_stop_flag = False
|
| 514 |
+
def is_ongoing_check(req_dt: str) -> bool:
|
| 515 |
+
"""진행중 공고인지 확인 (cache_db 내부용)"""
|
| 516 |
+
import re
|
| 517 |
+
if not req_dt:
|
| 518 |
+
return True
|
| 519 |
+
if any(kw in req_dt for kw in ["소진", "추후", "상시", "별도"]):
|
| 520 |
+
return True
|
| 521 |
+
try:
|
| 522 |
+
if "~" in req_dt:
|
| 523 |
+
end_date_str = req_dt.split("~")[-1].strip()
|
| 524 |
+
else:
|
| 525 |
+
end_date_str = req_dt.strip()
|
| 526 |
+
end_date_str = re.sub(r'[^0-9]', '', end_date_str)
|
| 527 |
+
if len(end_date_str) >= 8:
|
| 528 |
+
deadline = datetime.strptime(end_date_str[:8], "%Y%m%d")
|
| 529 |
+
return deadline >= datetime.now()
|
| 530 |
+
except:
|
| 531 |
+
pass
|
| 532 |
+
return True
|
| 533 |
def background_content_indexer():
|
| 534 |
+
"""백그라운드 본문 인덱싱 (서비스 무중단, 진행중 공고만)"""
|
| 535 |
global _indexing_stop_flag
|
| 536 |
from file_api import download_file, extract_text_from_file
|
| 537 |
content_cache = get_content_cache()
|
|
|
|
| 545 |
all_items = meta_cache.get_all()
|
| 546 |
indexed_ids = content_cache.get_indexed_ids()
|
| 547 |
items_to_index = []
|
| 548 |
+
skipped_expired = 0
|
| 549 |
for item in all_items:
|
| 550 |
pblanc_id = safe_str(item.get("pblancId") or item.get("seq"))
|
| 551 |
print_url = safe_str(item.get("printFlpthNm"))
|
| 552 |
print_name = safe_str(item.get("printFileNm"))
|
| 553 |
+
req_dt = safe_str(item.get("reqstDt") or item.get("reqstBeginEndDe"))
|
| 554 |
+
if not is_ongoing_check(req_dt):
|
| 555 |
+
skipped_expired += 1
|
| 556 |
+
continue
|
| 557 |
if pblanc_id and print_url and print_name:
|
| 558 |
if pblanc_id not in indexed_ids:
|
| 559 |
items_to_index.append({
|
|
|
|
| 566 |
content_cache.index_status["progress_total"] = total
|
| 567 |
content_cache.index_status["progress_current"] = 0
|
| 568 |
content_cache._save_index_status()
|
| 569 |
+
logger.info(f"Starting background content indexing: {total} items (skipped {skipped_expired} expired)")
|
| 570 |
for i, item in enumerate(items_to_index):
|
| 571 |
if _indexing_stop_flag:
|
| 572 |
logger.info("Content indexing stopped by flag")
|