File size: 2,661 Bytes
55deb86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import json
from pathlib import Path
from openpyxl import load_workbook

# =========================================================
# FILE PATHS
# =========================================================

# Fixed JSON
FIXED_JSON = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master_fixed.json"

# Existing Excel
INPUT_XLSX = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master.xlsx"

# Output Excel
OUTPUT_XLSX = r"C:\Users\vinay\OneDrive\Desktop\minors\topic-modelling\methodology_output\computational_techniques_master_fixed.xlsx"


# =========================================================
# LOAD FIXED JSON
# =========================================================

with open(FIXED_JSON, "r", encoding="utf-8") as f:
    json_data = json.load(f)

# =========================================================
# LOAD EXCEL
# =========================================================

wb = load_workbook(INPUT_XLSX)
ws = wb.active

# =========================================================
# FIND COLUMN INDEXES
# =========================================================

headers = {}

for col in range(1, ws.max_column + 1):
    header = ws.cell(row=1, column=col).value
    if header:
        headers[str(header).strip()] = col

paper_title_col = headers.get("Paper_Title")
source_file_col = headers.get("Source_File")

if not paper_title_col or not source_file_col:
    raise Exception("Paper_Title or Source_File column not found!")

# =========================================================
# BUILD SOURCE_FILE -> PAPER_TITLE MAP
# =========================================================

title_map = {}

for item in json_data:
    source_file = str(item.get("Source_File", "")).strip().lower()
    paper_title = item.get("Paper_Title", "")

    if source_file:
        title_map[source_file] = paper_title

# =========================================================
# UPDATE EXCEL
# =========================================================

updated = 0

for row in range(2, ws.max_row + 1):

    source_file = ws.cell(row=row, column=source_file_col).value

    if not source_file:
        continue

    normalized_source = str(source_file).strip().lower()

    if normalized_source in title_map:
        ws.cell(row=row, column=paper_title_col).value = title_map[normalized_source]
        updated += 1

# =========================================================
# SAVE
# =========================================================

wb.save(OUTPUT_XLSX)

print(f"[DONE] Updated rows: {updated}")
print(f"[SAVED] {OUTPUT_XLSX}")