Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import re | |
| DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") | |
| MD_FILE = os.path.join(DATA_DIR, "slide_directory.md") | |
| DOCS_FILE = os.path.join(DATA_DIR, "documents.json") | |
| OUT_JSON = os.path.join(DATA_DIR, "slide_directory_index.json") | |
| def parse_markdown_table(file_path): | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| lines = f.readlines() | |
| table_data = [] | |
| in_table = False | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith("|") and not line.startswith("| ---"): | |
| # Check if it's header or row | |
| cells = [cell.strip() for cell in line.split("|")[1:-1]] | |
| if len(cells) >= 7 and "Slide" not in cells[0]: | |
| slide_num_raw = cells[0].replace("**", "") | |
| if slide_num_raw.isdigit(): | |
| table_data.append({ | |
| "slide": int(slide_num_raw), | |
| "file_name": cells[1], | |
| "period": cells[2], | |
| "topics": cells[3], | |
| "kpis": cells[4], | |
| "synonyms": cells[5], | |
| "description": cells[6], | |
| "visual_layout": cells[7] if len(cells) > 7 else "" | |
| }) | |
| return table_data | |
| def update_documents_json(table_data): | |
| if not os.path.exists(DOCS_FILE): | |
| return | |
| with open(DOCS_FILE, "r", encoding="utf-8") as f: | |
| docs = json.load(f) | |
| # We assume we're updating the emiratesnbd_investor_presentation_2026_q1 document | |
| for doc in docs: | |
| if doc.get("filename") == "emiratesnbd_investor_presentation_2026_q1.pdf": | |
| new_map = {} | |
| for row in table_data: | |
| # Combine topics and description for a richer section map | |
| new_map[str(row["slide"])] = f"{row['topics']} | {row['description']} | Visuals: {row.get('visual_layout', '')}" | |
| doc["page_section_map"] = new_map | |
| with open(DOCS_FILE, "w", encoding="utf-8") as f: | |
| json.dump(docs, f, indent=2) | |
| def main(): | |
| if not os.path.exists(MD_FILE): | |
| print(f"Error: {MD_FILE} not found.") | |
| return | |
| print(f"Parsing {MD_FILE}...") | |
| table_data = parse_markdown_table(MD_FILE) | |
| print(f"Parsed {len(table_data)} slides.") | |
| # Save the detailed index | |
| with open(OUT_JSON, "w", encoding="utf-8") as f: | |
| json.dump(table_data, f, indent=2) | |
| print(f"Saved detailed index to {OUT_JSON}") | |
| # Update documents.json | |
| update_documents_json(table_data) | |
| print(f"Updated page_section_map in {DOCS_FILE}") | |
| if __name__ == "__main__": | |
| main() | |