Spaces:
Sleeping
Sleeping
File size: 3,434 Bytes
c9ace58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
import time
from datetime import datetime
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
# π κ°μ λμ ν΄λ
EXCEL_DIR = "data/raw_excels"
CSV_DIR = "data/raw_csv"
TXT_DIR = "data/raw_txt"
# π λ‘κ·Έ λ° μμ κ²½λ‘
LOG_PATH = "logs/update_log.txt"
JSON_PATH = "data/deposit_docs.json"
def log(msg):
"""ν°λ―Έλ λ° λ‘κ·Έ νμΌμ λμμ μΆλ ₯"""
msg_full = f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
print(msg_full)
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
with open(LOG_PATH, "a", encoding="utf-8") as f:
f.write(msg_full + "\n")
def run_pipeline(file_path, file_type="excel"):
log(f"π μ {file_type.upper()} νμΌ κ°μ§λ¨: {file_path}")
log("π λ³ν λ° μΈλ±μ± νμ΄νλΌμΈ μμ")
# 1οΈβ£ μμ
/CSV β JSON λ³ν
if file_type == "excel":
log("π Excel β JSON λ³ν μ€ ...")
os.system(f"python scripts/convert_excel_to_json.py \"{file_path}\"")
elif file_type == "csv":
log("π CSV β JSON λ³ν μ€ ...")
os.system(f"python scripts/convert_csv_to_json.py \"{file_path}\"")
elif file_type == "txt":
log("π TXT β JSON λ³ν μ€ ...")
os.system(f"python scripts/convert_txt_to_json.py \"{file_path}\"")
# 2οΈβ£ μΈλ±μ€ μ¬μμ±
log("π§ λ²‘ν° μΈλ±μ€ μ¬μμ± μ€ ...")
os.system("python scripts/build_index.py")
# 3οΈβ£ JSON νμΌ μμ (μμ μΊμ μ κ±°)
if os.path.exists(JSON_PATH):
try:
os.remove(JSON_PATH)
log(f"π§Ή μμ JSON νμΌ μμ μλ£ β {JSON_PATH}")
except Exception as e:
log(f"β οΈ JSON μμ μ€ μ€λ₯ λ°μ: {e}")
# μλ£ λ‘κ·Έ
log("β
μ
λ°μ΄νΈ μλ£!\n")
class DataEventHandler(FileSystemEventHandler):
"""ν΄λ λ΄ .xlsx / .xls / .csv νμΌ λ³κ²½ κ°μ§ μ μλ μ€ν"""
def on_modified(self, event):
if event.is_directory:
return
if event.src_path.endswith((".xlsx", ".xls")):
run_pipeline(event.src_path, "excel")
elif event.src_path.endswith(".csv"):
run_pipeline(event.src_path, "csv")
elif event.src_path.endswith(".txt"):
run_pipeline(event.src_path, "txt")
def on_created(self, event):
if event.is_directory:
return
if event.src_path.endswith((".xlsx", ".xls")):
run_pipeline(event.src_path, "excel")
elif event.src_path.endswith(".csv"):
run_pipeline(event.src_path, "csv")
elif event.src_path.endswith(".txt"):
run_pipeline(event.src_path, "txt")
if __name__ == "__main__":
os.makedirs(EXCEL_DIR, exist_ok=True)
os.makedirs(CSV_DIR, exist_ok=True)
os.makedirs(TXT_DIR, exist_ok=True)
log("π Excel & CSV & TXT ν΄λ κ°μ μμ ... (Ctrl+Cλ‘ μ’
λ£)")
observer = Observer()
handler = DataEventHandler()
# λ ν΄λ κ°μ λ±λ‘
observer.schedule(handler, path=EXCEL_DIR, recursive=False)
observer.schedule(handler, path=CSV_DIR, recursive=False)
observer.schedule(handler, path=TXT_DIR, recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
log("π ν΄λ κ°μ μ€λ¨λ¨")
observer.join()
|