Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| from datetime import datetime | |
| from watchdog.observers import Observer | |
| from watchdog.events import FileSystemEventHandler | |
| # π κ°μ λμ ν΄λ | |
| EXCEL_DIR = "data/raw_excels" | |
| CSV_DIR = "data/raw_csv" | |
| TXT_DIR = "data/raw_txt" | |
| # π λ‘κ·Έ λ° μμ κ²½λ‘ | |
| LOG_PATH = "logs/update_log.txt" | |
| JSON_PATH = "data/deposit_docs.json" | |
| def log(msg): | |
| """ν°λ―Έλ λ° λ‘κ·Έ νμΌμ λμμ μΆλ ₯""" | |
| msg_full = f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}" | |
| print(msg_full) | |
| os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True) | |
| with open(LOG_PATH, "a", encoding="utf-8") as f: | |
| f.write(msg_full + "\n") | |
| def run_pipeline(file_path, file_type="excel"): | |
| log(f"π μ {file_type.upper()} νμΌ κ°μ§λ¨: {file_path}") | |
| log("π λ³ν λ° μΈλ±μ± νμ΄νλΌμΈ μμ") | |
| # 1οΈβ£ μμ /CSV β JSON λ³ν | |
| if file_type == "excel": | |
| log("π Excel β JSON λ³ν μ€ ...") | |
| os.system(f"python scripts/convert_excel_to_json.py \"{file_path}\"") | |
| elif file_type == "csv": | |
| log("π CSV β JSON λ³ν μ€ ...") | |
| os.system(f"python scripts/convert_csv_to_json.py \"{file_path}\"") | |
| elif file_type == "txt": | |
| log("π TXT β JSON λ³ν μ€ ...") | |
| os.system(f"python scripts/convert_txt_to_json.py \"{file_path}\"") | |
| # 2οΈβ£ μΈλ±μ€ μ¬μμ± | |
| log("π§ λ²‘ν° μΈλ±μ€ μ¬μμ± μ€ ...") | |
| os.system("python scripts/build_index.py") | |
| # 3οΈβ£ JSON νμΌ μμ (μμ μΊμ μ κ±°) | |
| if os.path.exists(JSON_PATH): | |
| try: | |
| os.remove(JSON_PATH) | |
| log(f"π§Ή μμ JSON νμΌ μμ μλ£ β {JSON_PATH}") | |
| except Exception as e: | |
| log(f"β οΈ JSON μμ μ€ μ€λ₯ λ°μ: {e}") | |
| # μλ£ λ‘κ·Έ | |
| log("β μ λ°μ΄νΈ μλ£!\n") | |
| class DataEventHandler(FileSystemEventHandler): | |
| """ν΄λ λ΄ .xlsx / .xls / .csv νμΌ λ³κ²½ κ°μ§ μ μλ μ€ν""" | |
| def on_modified(self, event): | |
| if event.is_directory: | |
| return | |
| if event.src_path.endswith((".xlsx", ".xls")): | |
| run_pipeline(event.src_path, "excel") | |
| elif event.src_path.endswith(".csv"): | |
| run_pipeline(event.src_path, "csv") | |
| elif event.src_path.endswith(".txt"): | |
| run_pipeline(event.src_path, "txt") | |
| def on_created(self, event): | |
| if event.is_directory: | |
| return | |
| if event.src_path.endswith((".xlsx", ".xls")): | |
| run_pipeline(event.src_path, "excel") | |
| elif event.src_path.endswith(".csv"): | |
| run_pipeline(event.src_path, "csv") | |
| elif event.src_path.endswith(".txt"): | |
| run_pipeline(event.src_path, "txt") | |
| if __name__ == "__main__": | |
| os.makedirs(EXCEL_DIR, exist_ok=True) | |
| os.makedirs(CSV_DIR, exist_ok=True) | |
| os.makedirs(TXT_DIR, exist_ok=True) | |
| log("π Excel & CSV & TXT ν΄λ κ°μ μμ ... (Ctrl+Cλ‘ μ’ λ£)") | |
| observer = Observer() | |
| handler = DataEventHandler() | |
| # λ ν΄λ κ°μ λ±λ‘ | |
| observer.schedule(handler, path=EXCEL_DIR, recursive=False) | |
| observer.schedule(handler, path=CSV_DIR, recursive=False) | |
| observer.schedule(handler, path=TXT_DIR, recursive=False) | |
| observer.start() | |
| try: | |
| while True: | |
| time.sleep(1) | |
| except KeyboardInterrupt: | |
| observer.stop() | |
| log("π ν΄λ κ°μ μ€λ¨λ¨") | |
| observer.join() | |