Spaces:
Running
Running
| import json | |
| from pathlib import Path | |
| from openpyxl import load_workbook | |
| # ========================================================= | |
| # FILE PATHS | |
| # ========================================================= | |
| # Fixed JSON | |
| FIXED_JSON = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master_fixed.json" | |
| # Existing Excel | |
| INPUT_XLSX = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master.xlsx" | |
| # Output Excel | |
| OUTPUT_XLSX = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master_fixed.xlsx" | |
| # ========================================================= | |
| # LOAD FIXED JSON | |
| # ========================================================= | |
| with open(FIXED_JSON, "r", encoding="utf-8") as f: | |
| json_data = json.load(f) | |
| # ========================================================= | |
| # LOAD EXCEL | |
| # ========================================================= | |
| wb = load_workbook(INPUT_XLSX) | |
| ws = wb.active | |
| # ========================================================= | |
| # FIND COLUMN INDEXES | |
| # ========================================================= | |
| headers = {} | |
| for col in range(1, ws.max_column + 1): | |
| header = ws.cell(row=1, column=col).value | |
| if header: | |
| headers[str(header).strip()] = col | |
| paper_title_col = headers.get("Paper_Title") | |
| source_file_col = headers.get("Source_File") | |
| if not paper_title_col or not source_file_col: | |
| raise Exception("Paper_Title or Source_File column not found!") | |
| # ========================================================= | |
| # BUILD SOURCE_FILE -> PAPER_TITLE MAP | |
| # ========================================================= | |
| title_map = {} | |
| for item in json_data: | |
| source_file = str(item.get("Source_File", "")).strip().lower() | |
| paper_title = item.get("Paper_Title", "") | |
| if source_file: | |
| title_map[source_file] = paper_title | |
| # ========================================================= | |
| # UPDATE EXCEL | |
| # ========================================================= | |
| updated = 0 | |
| for row in range(2, ws.max_row + 1): | |
| source_file = ws.cell(row=row, column=source_file_col).value | |
| if not source_file: | |
| continue | |
| normalized_source = str(source_file).strip().lower() | |
| if normalized_source in title_map: | |
| ws.cell(row=row, column=paper_title_col).value = title_map[normalized_source] | |
| updated += 1 | |
| # ========================================================= | |
| # SAVE | |
| # ========================================================= | |
| wb.save(OUTPUT_XLSX) | |
| print(f"[DONE] Updated rows: {updated}") | |
| print(f"[SAVED] {OUTPUT_XLSX}") |