import os import json import re DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") MD_FILE = os.path.join(DATA_DIR, "slide_directory.md") DOCS_FILE = os.path.join(DATA_DIR, "documents.json") OUT_JSON = os.path.join(DATA_DIR, "slide_directory_index.json") def parse_markdown_table(file_path): with open(file_path, "r", encoding="utf-8") as f: lines = f.readlines() table_data = [] in_table = False for line in lines: line = line.strip() if line.startswith("|") and not line.startswith("| ---"): # Check if it's header or row cells = [cell.strip() for cell in line.split("|")[1:-1]] if len(cells) >= 7 and "Slide" not in cells[0]: slide_num_raw = cells[0].replace("**", "") if slide_num_raw.isdigit(): table_data.append({ "slide": int(slide_num_raw), "file_name": cells[1], "period": cells[2], "topics": cells[3], "kpis": cells[4], "synonyms": cells[5], "description": cells[6], "visual_layout": cells[7] if len(cells) > 7 else "" }) return table_data def update_documents_json(table_data): if not os.path.exists(DOCS_FILE): return with open(DOCS_FILE, "r", encoding="utf-8") as f: docs = json.load(f) # We assume we're updating the emiratesnbd_investor_presentation_2026_q1 document for doc in docs: if doc.get("filename") == "emiratesnbd_investor_presentation_2026_q1.pdf": new_map = {} for row in table_data: # Combine topics and description for a richer section map new_map[str(row["slide"])] = f"{row['topics']} | {row['description']} | Visuals: {row.get('visual_layout', '')}" doc["page_section_map"] = new_map with open(DOCS_FILE, "w", encoding="utf-8") as f: json.dump(docs, f, indent=2) def main(): if not os.path.exists(MD_FILE): print(f"Error: {MD_FILE} not found.") return print(f"Parsing {MD_FILE}...") table_data = parse_markdown_table(MD_FILE) print(f"Parsed {len(table_data)} slides.") # Save the detailed index with open(OUT_JSON, "w", encoding="utf-8") as f: json.dump(table_data, f, indent=2) print(f"Saved detailed index to {OUT_JSON}") # Update documents.json update_documents_json(table_data) print(f"Updated page_section_map in {DOCS_FILE}") if __name__ == "__main__": main()