AI_Menu_Search / scripts /00_import_csv.py
Juhaha
HF Spaces 데모 배포 (Streamlit + Qdrant 임베디드, 색인 빌드타임 생성)
fbd1091
Raw
History Blame Contribute Delete
4.68 kB
"""
Step 0: menu.csv → data/raw/real_menus.json 변환
실행: python scripts/00_import_csv.py
필터 조건:
- IS_VISIBLE_MENU == "true"
- DEPTH1_NAME 존재
- DEPTH4_NAME에 "팝업" 미포함
menu_id = SCR_{SCREEN_NUM}
menu_path: DEPTH2 == DEPTH1이면 중복 레벨 제거
"""
import csv
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from config import RAW_DIR
CSV_PATH = Path(__file__).parent.parent.parent / "menu.csv"
OUTPUT_PATH = RAW_DIR / "real_menus.json"
def build_menu_path(row: dict) -> str:
"""DEPTH 계층에서 menu_path 생성. DEPTH2 == DEPTH1이면 DEPTH2 제거."""
d1 = (row.get("DEPTH1_NAME") or "").strip()
d2 = (row.get("DEPTH2_NAME") or "").strip()
d3 = (row.get("DEPTH3_NAME") or "").strip()
d4 = (row.get("DEPTH4_NAME") or "").strip()
# DEPTH2가 DEPTH1과 동일하면 중복 레벨 제거
if d2 == d1:
d2 = ""
parts = [p for p in [d1, d2, d3, d4] if p]
return " > ".join(parts)
def parse_keywords(raw: str) -> list:
"""MENU_KEYWORD 콤마 구분 파싱 → List[str]"""
if not raw or not raw.strip():
return []
return [kw.strip() for kw in raw.split(",") if kw.strip()]
def parse_search_count(raw: str) -> int:
"""SEARCH_COUNT 정수 변환 (없으면 0)"""
try:
return int(raw.strip()) if raw and raw.strip() else 0
except ValueError:
return 0
def main():
if not CSV_PATH.exists():
print(f"[오류] menu.csv 파일을 찾을 수 없습니다: {CSV_PATH}")
print("menu.csv를 프로젝트 루트(prototype 상위 폴더)에 위치시켜 주세요.")
sys.exit(1)
RAW_DIR.mkdir(parents=True, exist_ok=True)
menus = []
skipped = {"invisible": 0, "no_depth1": 0, "popup": 0}
screen_num_set = set()
with open(CSV_PATH, encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f)
for row in reader:
# 필터 1: IS_VISIBLE_MENU == "true"
if row.get("IS_VISIBLE_MENU", "").strip().lower() != "true":
skipped["invisible"] += 1
continue
# 필터 2: DEPTH1_NAME 존재
d1 = (row.get("DEPTH1_NAME") or "").strip()
if not d1:
skipped["no_depth1"] += 1
continue
# 필터 3: 팝업 제외
d4 = (row.get("DEPTH4_NAME") or "").strip()
if "팝업" in d4:
skipped["popup"] += 1
continue
screen_num = (row.get("SCREEN_NUM") or "").strip()
if not screen_num:
continue
menu_id = f"SCR_{screen_num}"
menu_name = (row.get("DEPTH4_NAME") or row.get("DEPTH3_NAME") or d1).strip()
menu_path = build_menu_path(row)
category = d1
keywords = parse_keywords(row.get("MENU_KEYWORD", ""))
search_count = parse_search_count(row.get("SEARCH_COUNT", ""))
# SCREEN_NUM 중복 체크 (이론상 없어야 하지만 안전 처리)
if screen_num in screen_num_set:
print(f" [경고] SCREEN_NUM 중복: {screen_num} ({menu_name}) 스킵")
continue
screen_num_set.add(screen_num)
menus.append({
"menu_id": menu_id,
"menu_name": menu_name,
"menu_path": menu_path,
"category": category,
"screen_num": screen_num,
"search_count": search_count,
"keywords": keywords,
})
OUTPUT_PATH.write_text(
json.dumps(menus, ensure_ascii=False, indent=2),
encoding="utf-8"
)
print(f"변환 완료: {len(menus)}개 메뉴")
print(f" 스킵 - IS_VISIBLE_MENU 제외: {skipped['invisible']}개")
print(f" 스킵 - DEPTH1 없음: {skipped['no_depth1']}개")
print(f" 스킵 - 팝업 제외: {skipped['popup']}개")
print(f"저장: {OUTPUT_PATH}")
# 카테고리별 집계
from collections import Counter
cat_counts = Counter(m["category"] for m in menus)
print("\n카테고리별 메뉴 수:")
for cat, cnt in cat_counts.most_common():
print(f" {cat}: {cnt}개")
# keywords 보유율
has_kw = sum(1 for m in menus if m["keywords"])
print(f"\nkeywords 보유: {has_kw}/{len(menus)} ({has_kw/len(menus)*100:.1f}%)")
# search_count 보유율
has_sc = sum(1 for m in menus if m["search_count"] > 0)
print(f"search_count > 0: {has_sc}/{len(menus)} ({has_sc/len(menus)*100:.1f}%)")
if __name__ == "__main__":
main()