import json from pathlib import Path from openpyxl import load_workbook # ========================================================= # FILE PATHS # ========================================================= # Fixed JSON FIXED_JSON = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master_fixed.json" # Existing Excel INPUT_XLSX = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master.xlsx" # Output Excel OUTPUT_XLSX = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master_fixed.xlsx" # ========================================================= # LOAD FIXED JSON # ========================================================= with open(FIXED_JSON, "r", encoding="utf-8") as f: json_data = json.load(f) # ========================================================= # LOAD EXCEL # ========================================================= wb = load_workbook(INPUT_XLSX) ws = wb.active # ========================================================= # FIND COLUMN INDEXES # ========================================================= headers = {} for col in range(1, ws.max_column + 1): header = ws.cell(row=1, column=col).value if header: headers[str(header).strip()] = col paper_title_col = headers.get("Paper_Title") source_file_col = headers.get("Source_File") if not paper_title_col or not source_file_col: raise Exception("Paper_Title or Source_File column not found!") # ========================================================= # BUILD SOURCE_FILE -> PAPER_TITLE MAP # ========================================================= title_map = {} for item in json_data: source_file = str(item.get("Source_File", "")).strip().lower() paper_title = item.get("Paper_Title", "") if source_file: title_map[source_file] = paper_title # ========================================================= # UPDATE EXCEL # ========================================================= updated = 0 for row in range(2, ws.max_row + 1): source_file = ws.cell(row=row, column=source_file_col).value if not source_file: continue normalized_source = str(source_file).strip().lower() if normalized_source in title_map: ws.cell(row=row, column=paper_title_col).value = title_map[normalized_source] updated += 1 # ========================================================= # SAVE # ========================================================= wb.save(OUTPUT_XLSX) print(f"[DONE] Updated rows: {updated}") print(f"[SAVED] {OUTPUT_XLSX}")