capstone_backend_v2 / scripts /update_pipeline.py
dongchan21
Fixed LFS tracking for index file and removed unnecessary excels
c9ace58
import os
import time
from datetime import datetime
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
# πŸ“ κ°μ‹œ λŒ€μƒ 폴더
EXCEL_DIR = "data/raw_excels"
CSV_DIR = "data/raw_csv"
TXT_DIR = "data/raw_txt"
# πŸ—‚ 둜그 및 μž„μ‹œ 경둜
LOG_PATH = "logs/update_log.txt"
JSON_PATH = "data/deposit_docs.json"
def log(msg):
"""터미널 및 둜그 νŒŒμΌμ— λ™μ‹œμ— 좜λ ₯"""
msg_full = f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
print(msg_full)
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
with open(LOG_PATH, "a", encoding="utf-8") as f:
f.write(msg_full + "\n")
def run_pipeline(file_path, file_type="excel"):
log(f"πŸ“‚ μƒˆ {file_type.upper()} 파일 감지됨: {file_path}")
log("πŸ”„ λ³€ν™˜ 및 인덱싱 νŒŒμ΄ν”„λΌμΈ μ‹œμž‘")
# 1️⃣ μ—‘μ…€/CSV β†’ JSON λ³€ν™˜
if file_type == "excel":
log("πŸ“„ Excel β†’ JSON λ³€ν™˜ 쀑 ...")
os.system(f"python scripts/convert_excel_to_json.py \"{file_path}\"")
elif file_type == "csv":
log("πŸ“„ CSV β†’ JSON λ³€ν™˜ 쀑 ...")
os.system(f"python scripts/convert_csv_to_json.py \"{file_path}\"")
elif file_type == "txt":
log("πŸ“„ TXT β†’ JSON λ³€ν™˜ 쀑 ...")
os.system(f"python scripts/convert_txt_to_json.py \"{file_path}\"")
# 2️⃣ 인덱슀 μž¬μƒμ„±
log("🧠 벑터 인덱슀 μž¬μƒμ„± 쀑 ...")
os.system("python scripts/build_index.py")
# 3️⃣ JSON 파일 μ‚­μ œ (μž„μ‹œ μΊμ‹œ 제거)
if os.path.exists(JSON_PATH):
try:
os.remove(JSON_PATH)
log(f"🧹 μž„μ‹œ JSON 파일 μ‚­μ œ μ™„λ£Œ β†’ {JSON_PATH}")
except Exception as e:
log(f"⚠️ JSON μ‚­μ œ 쀑 였λ₯˜ λ°œμƒ: {e}")
# μ™„λ£Œ 둜그
log("βœ… μ—…λ°μ΄νŠΈ μ™„λ£Œ!\n")
class DataEventHandler(FileSystemEventHandler):
"""폴더 λ‚΄ .xlsx / .xls / .csv 파일 λ³€κ²½ 감지 μ‹œ μžλ™ μ‹€ν–‰"""
def on_modified(self, event):
if event.is_directory:
return
if event.src_path.endswith((".xlsx", ".xls")):
run_pipeline(event.src_path, "excel")
elif event.src_path.endswith(".csv"):
run_pipeline(event.src_path, "csv")
elif event.src_path.endswith(".txt"):
run_pipeline(event.src_path, "txt")
def on_created(self, event):
if event.is_directory:
return
if event.src_path.endswith((".xlsx", ".xls")):
run_pipeline(event.src_path, "excel")
elif event.src_path.endswith(".csv"):
run_pipeline(event.src_path, "csv")
elif event.src_path.endswith(".txt"):
run_pipeline(event.src_path, "txt")
if __name__ == "__main__":
os.makedirs(EXCEL_DIR, exist_ok=True)
os.makedirs(CSV_DIR, exist_ok=True)
os.makedirs(TXT_DIR, exist_ok=True)
log("πŸ‘€ Excel & CSV & TXT 폴더 κ°μ‹œ μ‹œμž‘ ... (Ctrl+C둜 μ’…λ£Œ)")
observer = Observer()
handler = DataEventHandler()
# 두 폴더 κ°μ‹œ 등둝
observer.schedule(handler, path=EXCEL_DIR, recursive=False)
observer.schedule(handler, path=CSV_DIR, recursive=False)
observer.schedule(handler, path=TXT_DIR, recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
log("πŸ›‘ 폴더 κ°μ‹œ 쀑단됨")
observer.join()