import re import os import html import pandas as pd from docx import Document from docx.shared import Pt, Cm, Inches, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_TAB_ALIGNMENT, WD_TAB_LEADER from docx.enum.table import WD_ALIGN_VERTICAL, WD_TABLE_ALIGNMENT from docx.enum.style import WD_STYLE_TYPE from docx.enum.section import WD_SECTION from docx.oxml import parse_xml from docx.oxml.ns import nsdecls from docx.oxml.shared import OxmlElement, qn import zipfile from collections import defaultdict import tempfile THEME_COLOR_HEX = "5FFFDF" # Hex version for XML elements THEME_COLOR = RGBColor.from_string(THEME_COLOR_HEX) # Common paper sizes (width x height in inches) PAPER_SIZES = { 'LETTER': (8.5, 11), # US Letter 'A4': (8.27, 11.69), # A4 'A4_WIDE': (8.77, 11.69), 'A3': (11.69, 16.54), # A3 'A5': (5.83, 8.27), # A5 'LEGAL': (8.5, 14), # US Legal 'TABLOID': (11, 17), # Tabloid 'LEDGER': (17, 11), # Ledger } def get_circled_number(num): """Convert a number to its circled Unicode equivalent""" # Unicode circled numbers 1-50 circled_numbers = { 1: '①', 2: '②', 3: '③', 4: '④', 5: '⑤', 6: '⑥', 7: '⑦', 8: '⑧', 9: '⑨', 10: '⑩', 11: '⑪', 12: '⑫', 13: '⑬', 14: '⑭', 15: '⑮', 16: '⑯', 17: '⑰', 18: '⑱', 19: '⑲', 20: '⑳', 21: '㉑', 22: '㉒', 23: '㉓', 24: '㉔', 25: '㉕', 26: '㉖', 27: '㉗', 28: '㉘', 29: '㉙', 30: '㉚', 31: '㉛', 32: '㉜', 33: '㉝', 34: '㉞', 35: '㉟', 36: '㊱', 37: '㊲', 38: '㊳', 39: '㊴', 40: '㊵', 41: '㊶', 42: '㊷', 43: '㊸', 44: '㊹', 45: '㊺', 46: '㊻', 47: '㊼', 48: '㊽', 49: '㊾', 50: '㊿' } if num in circled_numbers: return circled_numbers[num] else: # For numbers > 50, use parentheses as fallback return f"({num})" def prepare_image_folder(path): """ Prepare the image folder. If it's a zip file, extract it to a temporary folder. Returns None gracefully if path is None or invalid. """ # Handle None or empty path if path is None or str(path).strip() == '': print("ℹ️ No image folder provided - images will be skipped") return None, False, None path = str(path).strip() # Check if it's a zip file if path.lower().endswith('.zip') and os.path.isfile(path): print(f"📦 Detected ZIP file: {os.path.basename(path)}") print(f" Extracting to temporary folder...") try: # Create temporary directory temp_dir = tempfile.TemporaryDirectory() # Extract zip file with zipfile.ZipFile(path, 'r') as zip_ref: zip_ref.extractall(temp_dir.name) # Count extracted files all_files = [] for root, dirs, files in os.walk(temp_dir.name): all_files.extend([os.path.join(root, f) for f in files]) image_files = [f for f in all_files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'))] print(f" ✓ Extracted {len(all_files)} files ({len(image_files)} images)") print(f" Using folder: {temp_dir.name}") return temp_dir.name, True, temp_dir except Exception as e: print(f" ✗ Error extracting ZIP: {e}") return None, False, None # Check if it's a regular folder elif os.path.isdir(path): print(f"📁 Using folder: {path}") return path, False, None else: print(f"⚠️ WARNING: Path is neither a folder nor a ZIP file: {path}") print(f"ℹ️ Images will be skipped") return None, False, None def map_images_from_excel(excel_path, image_folder): """ Map images to questions based on Photo Q and Photo C columns in Excel. Returns empty dict if image_folder is None. """ # If no image folder, return empty dict immediately if image_folder is None: print("ℹ️ No image folder available - skipping image mapping") return {} xls = pd.ExcelFile(excel_path) first_sheet = xls.sheet_names[0] df = pd.read_excel(excel_path, sheet_name=first_sheet) # Dictionary to store question -> image mappings question_images = defaultdict(lambda: {'photo_q': None, 'photo_c': None}) # Check if Photo Q and Photo C columns exist has_photo_q = 'Photo Q' in df.columns has_photo_c = 'Photo C' in df.columns if not has_photo_q and not has_photo_c: print("ℹ️ No 'Photo Q' or 'Photo C' columns found in Excel") return {} print(f"\n=== MAPPING IMAGES FROM FOLDER ===") print(f"Image folder: {image_folder}") print(f"Folder exists: {os.path.exists(image_folder)}") if os.path.exists(image_folder): try: images_in_folder = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'))] print(f"Images found in folder: {len(images_in_folder)}") except Exception as e: print(f"Error reading folder: {e}") return {} else: print(f"ERROR: Folder does not exist!") return {} current_question = None # Scan through all rows for idx, row in df.iterrows(): # Detect new question if pd.notna(row.get('Numero')): current_question = row['Numero'] if current_question is None: continue # Check Photo Q on this row if has_photo_q and pd.notna(row['Photo Q']): photo_q_value = str(row['Photo Q']).strip() if photo_q_value and photo_q_value.lower() not in ['nan', 'none', ''] and not photo_q_value.startswith('='): # Only set if not already set (first occurrence wins) if not question_images[current_question]['photo_q']: image_path = find_image_in_folder(photo_q_value, image_folder) if image_path: question_images[current_question]['photo_q'] = image_path print(f"Q{current_question}: Photo Q -> {os.path.basename(image_path)}") # Check Photo C on this row if has_photo_c and pd.notna(row['Photo C']): photo_c_value = str(row['Photo C']).strip() if photo_c_value and photo_c_value.lower() not in ['nan', 'none', ''] and not photo_c_value.startswith('='): # Only set if not already set (first occurrence wins) if not question_images[current_question]['photo_c']: image_path = find_image_in_folder(photo_c_value, image_folder) if image_path: question_images[current_question]['photo_c'] = image_path print(f"Q{current_question}: Photo C -> {os.path.basename(image_path)}") print(f"\n✓ Mapped images to {len(question_images)} questions") return dict(question_images) def find_image_in_folder(filename, image_folder): """ Find an image file in the specified folder. Returns None if image_folder is None or if image not found. """ if image_folder is None: return None if not filename or str(filename).strip().lower() in ['nan', 'none', '']: return None filename = str(filename).strip() # If the filename already has the full path and exists, return it if os.path.isabs(filename) and os.path.exists(filename): return filename # Common image extensions to try image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'] # Get the filename without extension (if it has one) name_without_ext = os.path.splitext(filename)[0] original_ext = os.path.splitext(filename)[1].lower() # Function to search in a directory (including subdirectories) def search_in_dir(search_dir): # Try exact match first in this directory exact_path = os.path.join(search_dir, filename) if os.path.exists(exact_path): return exact_path # Try case-insensitive match in this directory try: files_in_dir = os.listdir(search_dir) for file in files_in_dir: if file.lower() == filename.lower(): found_path = os.path.join(search_dir, file) return found_path # If no extension provided, try all common extensions if not original_ext: for ext in image_extensions: test_path = os.path.join(search_dir, name_without_ext + ext) if os.path.exists(test_path): return test_path # Also try case-insensitive for file in files_in_dir: if file.lower() == (name_without_ext + ext).lower(): found_path = os.path.join(search_dir, file) return found_path except Exception: pass return None # Search in main folder first result = search_in_dir(image_folder) if result: print(f" ✓ Found: {os.path.relpath(result, image_folder)}") return result # Search in all subdirectories try: for root, dirs, files in os.walk(image_folder): result = search_in_dir(root) if result: print(f" ✓ Found in subfolder: {os.path.relpath(result, image_folder)}") return result except Exception as e: print(f" ✗ Error searching subfolders: {e}") print(f" ✗ Not found: {filename}") return None def process_excel_to_word(excel_file_path, output_word_path, image_folder=None, display_name=None, use_two_columns=True, add_separator_line=True, balance_method="dynamic", theme_hex=None): """Main function to process Excel and create a Word document with TOC on the first page""" if theme_hex is None: theme_hex = THEME_COLOR_HEX theme_color = RGBColor.from_string(theme_hex) # Prepare image folder (extract if ZIP) - gracefully handle None actual_image_folder, is_temp, temp_dir_obj = prepare_image_folder(image_folder) # Map images from the prepared folder (returns empty dict if None) question_photos = map_images_from_excel(excel_file_path, actual_image_folder) # ... rest of the function remains the same ... # The code will now handle missing images gracefully since question_photos will be empty # At the end, clean up temporary folder if it was created if is_temp and temp_dir_obj is not None: print(f"\n🧹 Cleaning up temporary folder...") try: temp_dir_obj.cleanup() print(f" ✓ Temporary files removed") except Exception as e: print(f" ⚠️ Could not clean up: {e}") def preview_image_mapping(question_images): """Preview the image mapping for verification""" print("\n" + "=" * 60) print("IMAGE MAPPING PREVIEW") print("=" * 60) for q_num in sorted(question_images.keys()): photos = question_images[q_num] print(f"\nQuestion {q_num}:") if photos['photo_q']: exists = "✓" if os.path.exists(photos['photo_q']) else "✗" print(f" Photo Q: {exists} {os.path.basename(photos['photo_q'])}") else: print(f" Photo Q: (none)") if photos['photo_c']: exists = "✓" if os.path.exists(photos['photo_c']) else "✗" print(f" Photo C: {exists} {os.path.basename(photos['photo_c'])}") else: print(f" Photo C: (none)") print("=" * 60 + "\n") def is_only_x_string(text): """Check if a string contains only X's (case insensitive)""" if not text or pd.isna(text): return False cleaned_text = str(text).strip() if not cleaned_text: return False return all(c in ('x', 'X') for c in cleaned_text) def set_page_size(section, width_inches, height_inches): """Set custom page size for a section""" sectPr = section._sectPr # Create or get pgSz element pgSz = sectPr.find(qn('w:pgSz')) if pgSz is None: pgSz = OxmlElement('w:pgSz') sectPr.insert(0, pgSz) # Convert inches to twentieths of a point (1 inch = 1440 twips) width_twips = int(width_inches * 1440) height_twips = int(height_inches * 1440) pgSz.set(qn('w:w'), str(width_twips)) pgSz.set(qn('w:h'), str(height_twips)) def set_two_column_layout(doc, add_separator_line=True, balance_columns=True): """Set the document to use a two-column layout with optional separator line and column balancing""" # Get the current section section = doc.sections[0] # Create sectPr element if it doesn't exist sectPr = section._sectPr # Create cols element for columns cols = sectPr.find(qn('w:cols')) if cols is None: cols = OxmlElement('w:cols') sectPr.append(cols) # Set number of columns to 2 cols.set(qn('w:num'), '2') # Set space between columns (reduced for better space utilization) cols.set(qn('w:space'), '432') # 0.3 inch in twentieths of a point (was 708) # Enable column balancing if requested if balance_columns: cols.set(qn('w:equalWidth'), '1') # Equal width columns return doc def set_cell_borders(cell, top=False, bottom=False, left=False, right=False): """Set specific borders for a table cell""" from docx.oxml import parse_xml from docx.oxml.ns import nsdecls # Get the cell's table cell properties tcPr = cell._tc.get_or_add_tcPr() # Create borders element tcBorders = tcPr.find(qn('w:tcBorders')) if tcBorders is None: tcBorders = parse_xml(f'') tcPr.append(tcBorders) # Define border settings border_settings = { 'top': top, 'bottom': bottom, 'left': left, 'right': right } for border_name, should_show in border_settings.items(): border_element = tcBorders.find(qn(f'w:{border_name}')) if border_element is not None: tcBorders.remove(border_element) if should_show: # Create visible border border_xml = f'' border_element = parse_xml(border_xml) tcBorders.append(border_element) # If should_show is False, don't add any border element (let table-level borders show through) def continue_two_column_layout(doc): """Continue with the existing two-column layout for answer tables""" # Add a column break to start fresh in the columns add_column_break(doc) return doc def add_column_break(doc): """Add a column break to move to the next column""" para = doc.add_paragraph() run = para.runs[0] if para.runs else para.add_run() # Create column break element br = OxmlElement('w:br') br.set(qn('w:type'), 'column') run._element.append(br) def add_page_break(doc): """Add a page break to the document""" doc.add_page_break() def create_course_title(doc, course_number, course_title, theme_color=None, theme_hex=None, question_count=None): """Create a course title section with rounded frame (unfilled) matching module style Automatically wraps to two lines and doubles height if text is too long""" if theme_hex is None: theme_hex = THEME_COLOR_HEX if theme_color is None: theme_color = RGBColor.from_string(theme_hex) # Add minimal space before course title course_para = doc.add_paragraph() course_para.alignment = WD_ALIGN_PARAGRAPH.CENTER # Remove all spacing before and after course_para.paragraph_format.space_before = Pt(0) course_para.paragraph_format.space_after = Pt(0) course_para.paragraph_format.keep_with_next = True course_para.paragraph_format.keep_together = True # Format the text circled_num = get_circled_number(question_count) full_text = f"{course_number}. {course_title} {circled_num}" text_length = len(full_text) # ========== CUSTOMIZE COURSE TITLE APPEARANCE HERE ========== MAX_CHARS_SINGLE_LINE = 40 # Threshold for wrapping to two lines SINGLE_LINE_HEIGHT = 31 # Frame height for single line DOUBLE_LINE_HEIGHT = 55 # Frame height for two lines (almost double) COURSE_ROUNDNESS = 50 # Corner roundness % COURSE_FONT_SIZE = 26 # Font size in half-points (26=13pt) COURSE_TEXT_COLOR = theme_hex COURSE_STROKE_COLOR = theme_hex COURSE_STROKE_WEIGHT = "2pt" MAX_WIDTH_PT = 280 # Maximum width in points for the frame # ============================================================ # Determine if we need two lines needs_two_lines = text_length > MAX_CHARS_SINGLE_LINE # Common XML properties to reduce repetition xml_size_color = f'' if needs_two_lines: # Split text intelligently words = course_title.split() mid_point = len(words) // 2 # Try to split at middle, but prefer breaking after shorter first line # (We calculate lengths including the number to match your width logic) prefix_len = len(f"{course_number}. ") first_part_title = " ".join(words[:mid_point]) while (prefix_len + len(first_part_title)) > MAX_CHARS_SINGLE_LINE and mid_point > 1: mid_point -= 1 first_part_title = " ".join(words[:mid_point]) # Define the two parts of the TITLE only title_part_1 = " ".join(words[:mid_point]) title_part_2 = " ".join(words[mid_point:]) # Escape texts for XML esc_num = html.escape(f"{course_number}. ") esc_title_1 = html.escape(title_part_1) # Add a trailing space to title part 2 to separate it from the circle esc_title_2 = html.escape(title_part_2 + " ") esc_circle = html.escape(f"{circled_num}") # Calculate width based on the longest visual line # Line 1: Number + Title Part 1 # Line 2: Title Part 2 + Circle len_line_1 = len(f"{course_number}. {title_part_1}") len_line_2 = len(f"{title_part_2} {circled_num}") max_line_length = max(len_line_1, len_line_2) estimated_width = min((max_line_length * 8) + 20, MAX_WIDTH_PT) frame_height = DOUBLE_LINE_HEIGHT # Two-line XML with 5 separate runs to handle fonts and line break text_content = f''' {xml_size_color} {esc_num} {xml_size_color} {esc_title_1} {xml_size_color} {esc_title_2} {xml_size_color} {esc_circle} ''' else: # Single line estimated_width = min((text_length * 9) + 20, MAX_WIDTH_PT) frame_height = SINGLE_LINE_HEIGHT # Escape texts esc_num = html.escape(f"{course_number}. ") esc_title = html.escape(f"{course_title} ") esc_circle = html.escape(f"{circled_num}") # Single-line XML with 3 separate runs for the fonts text_content = f''' {xml_size_color} {esc_num} {xml_size_color} {esc_title} {xml_size_color} {esc_circle} ''' # Create rounded rectangle shape (UNFILLED with stroke) shape_xml = f''' {text_content} ''' shape_element = parse_xml(shape_xml) course_para._p.append(shape_element) return course_para def highlight_words_in_text(paragraph, text, highlight_words, theme_color, font_name='Inter Display Medium', font_size=10.5, bold=False): """ Add text to paragraph with specific words/substrings highlighted in theme color. Highlights literal text matches (including special characters like parentheses, backslashes). Args: paragraph: The paragraph to add text to text: The full text to add highlight_words: List of literal strings to highlight theme_color: RGBColor object for highlighting font_name: Font to use font_size: Font size in points bold: Whether text should be bold """ if not highlight_words or not text: # No highlighting needed, just add normal text run = paragraph.add_run(text) run.font.name = font_name run.font.size = Pt(font_size) if bold: run.font.bold = True return # Create pattern for matching (escape each string to treat as literal text) import re # Escape each word/phrase to match it literally, then join with OR escaped_words = [re.escape(word) for word in highlight_words] pattern = '(' + '|'.join(escaped_words) + ')' # Split text by highlighted words/substrings parts = re.split(pattern, text, flags=re.IGNORECASE) for i, part in enumerate(parts): if not part: continue run = paragraph.add_run(part) run.font.name = font_name run.font.size = Pt(font_size) if bold: run.font.bold = True # Check if this part should be highlighted (odd indices after split are matches) if i % 2 == 1: run.font.color.rgb = theme_color def format_question_block(doc, question_num, question_text, choices, correct_answers, source, comment=None, choice_commentaire=None, photo_q=None, photo_c=None, theme_color=None, theme_hex=None, show_comments=True, highlight_words=None): if theme_color is None: theme_color = THEME_COLOR if theme_hex is None: theme_hex = THEME_COLOR_HEX if highlight_words is None: highlight_words = [] """Format a single question block with reduced spacing and keep together formatting""" if 'TinySpace' not in doc.styles: tiny_style = doc.styles.add_style('TinySpace', WD_STYLE_TYPE.PARAGRAPH) tiny_style.font.name = 'SF Pro' tiny_style.font.size = Pt(5) tiny_style.paragraph_format.line_spacing = Pt(5) tiny_style.paragraph_format.space_before = Pt(0) tiny_style.paragraph_format.space_after = Pt(0) # Question title with reduced spacing and keep-together formatting question_para = doc.add_paragraph() question_para.paragraph_format.space_before = Pt(1) question_para.paragraph_format.space_after = Pt(0) question_para.paragraph_format.keep_with_next = True question_para.paragraph_format.keep_together = True question_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # Question number in Axiforma Black num_run = question_para.add_run(f"{question_num}. ") num_run.font.name = 'Inter ExtraBold' num_run.font.size = Pt(10) num_run.font.bold = True num_run.font.color.rgb = theme_color # Add question text with highlighting (REMOVE THE DUPLICATE!) highlight_words_in_text(question_para, question_text, highlight_words, theme_color, font_name='Inter ExtraBold', font_size=10) # Display ALL choices for this question with minimal spacing # Filter out choices that are only X's filtered_choices = [(letter, text) for letter, text in choices if not is_only_x_string(text)] # Display filtered choices for this question with minimal spacing choice_paragraphs = [] for i, (choice_letter, choice_text) in enumerate(filtered_choices): choice_para = doc.add_paragraph() choice_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY choice_para.paragraph_format.space_before = Pt(1) choice_para.paragraph_format.space_after = Pt(1) choice_para.paragraph_format.keep_together = True # Keep all choices together, and keep the last choice with the source if i < len(choices) - 1: choice_para.paragraph_format.keep_with_next = True else: # Last choice should stay with what comes next (Photo C or source) choice_para.paragraph_format.keep_with_next = True # Ensure each choice ends with a dot if not str(choice_text).strip().endswith('.'): choice_text = str(choice_text).strip() + '.' # Choice letter (e.g., "A-") letter_run = choice_para.add_run(f"{choice_letter}- ") letter_run.font.name = 'Inter ExtraBold' letter_run.font.size = Pt(10) # Choice text text_run = choice_para.add_run(choice_text) text_run.font.name = 'Inter Display SemiBold' text_run.font.size = Pt(10) # Choice text with highlighting (REMOVE THE DUPLICATE AND FIX TYPO!) # highlight_words_in_text(choice_para, choice_text, highlight_words, theme_color, # font_name='Inter Display Medium', font_size=10.5) # ADD Photo C HERE (right after choices, before source) if photo_c: photo_c_clean = str(photo_c).strip() if photo_c_clean and photo_c_clean.lower() not in ['nan', 'none', '']: if os.path.exists(photo_c_clean): try: print(f"DEBUG: Adding Photo C from: {photo_c_clean}") photo_para = doc.add_paragraph() photo_para.alignment = WD_ALIGN_PARAGRAPH.CENTER photo_para.paragraph_format.space_before = Pt(2) photo_para.paragraph_format.space_after = Pt(2) photo_para.paragraph_format.keep_with_next = True # Keep with source run = photo_para.add_run() run.add_picture(photo_c_clean, width=Inches(2.5)) print(f"DEBUG: Successfully added Photo C") except Exception as e: print(f"ERROR: Could not add Photo C: {e}") # Add error message in document error_para = doc.add_paragraph() error_para.alignment = WD_ALIGN_PARAGRAPH.CENTER error_run = error_para.add_run(f"[Photo C error: {str(e)[:50]}]") error_run.font.size = Pt(7) error_run.font.italic = True else: print(f"WARNING: Photo C path does not exist: {photo_c_clean}") # PROF.PY: Source and Answer line (using table for side-by-side layout) # Create a single-row, 2-column table info_table = doc.add_table(rows=1, cols=2) info_table.alignment = WD_TABLE_ALIGNMENT.LEFT info_table.allow_autofit = False # Disable autofit for manual control # Remove all borders tblPr = info_table._tbl.tblPr if tblPr is None: tblPr = OxmlElement('w:tblPr') info_table._tbl.insert(0, tblPr) # Set borders to none tblBorders = parse_xml(f''' ''') tblPr.append(tblBorders) # Set column widths left_cell = info_table.rows[0].cells[0] right_cell = info_table.rows[0].cells[1] left_cell.width = Inches(1.5) # Left cell for "Réponse: ABC" right_cell.width = Inches(4.5) # Right cell for source # Set spacing left_cell._element.get_or_add_tcPr() right_cell._element.get_or_add_tcPr() # LEFT cell - Answer left_para = left_cell.paragraphs[0] left_para.alignment = WD_ALIGN_PARAGRAPH.LEFT left_para.paragraph_format.space_before = Pt(2) left_para.paragraph_format.space_after = Pt(2) # Prepare answer text - convert list to space-separated string if isinstance(correct_answers, list): if len(correct_answers) == 0: answer_text = "/" else: answer_text = " ".join(correct_answers) else: answer_text = str(correct_answers) if correct_answers else "/" answer_label_run = left_para.add_run("Réponse:") answer_label_run.font.name = 'Inter ExtraBold' answer_label_run.font.size = Pt(8) answer_label_run.font.bold = True answer_label_run.font.underline = True answer_value_run = left_para.add_run(f' {answer_text}') answer_value_run.font.name = 'Inter ExtraBold' answer_value_run.font.size = Pt(8) answer_value_run.font.color.rgb = theme_color # RIGHT cell - Source right_para = right_cell.paragraphs[0] right_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT right_para.paragraph_format.space_before = Pt(2) right_para.paragraph_format.space_after = Pt(2) source_label_run = right_para.add_run("Source:") source_label_run.font.name = 'Inter ExtraBold' source_label_run.font.size = Pt(8) source_label_run.font.bold = True source_label_run.font.underline = True source_value_run = right_para.add_run(f" {source}") source_value_run.font.name = 'Inter ExtraBold' source_value_run.font.size = Pt(8) source_value_run.font.color.rgb = theme_color # Keep with comment if exists if comment and str(comment).strip() and str(comment).lower() != 'nan': right_para.paragraph_format.keep_with_next = True # Only show comments if show_comments is True if show_comments and (comment or choice_commentaire or photo_q): add_choice_commentaire_section( doc, choice_commentaire, photo_q, theme_color, theme_hex, general_comment=comment, question_num=question_num, highlight_words=highlight_words ) else: # Only add empty space if there's no comment box empty_para = doc.add_paragraph(' ', style='TinySpace') empty_para.paragraph_format.space_before = Pt(0) empty_para.paragraph_format.space_after = Pt(0) empty_para.paragraph_format.line_spacing = Pt(7) empty_run = empty_para.add_run(' ') empty_run.font.size = Pt(7) def add_page_numbers(doc, theme_hex=None): """Add page numbers to the footer of all pages (keeps existing module headers), starting from page 1 after TOC.""" if theme_hex is None: theme_hex = THEME_COLOR_HEX def create_footer_content(footer_elem, theme_hex): """Helper function to create footer content with page number and TOC link""" # Add an empty line above the page number empty_para = footer_elem.paragraphs[0] empty_para.paragraph_format.space_before = Pt(0) empty_para.paragraph_format.space_after = Pt(0) empty_para.paragraph_format.line_spacing = 1.0 # Add the page number paragraph paragraph = footer_elem.add_paragraph() paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # Set vertical alignment to center paragraph.paragraph_format.space_before = Pt(0) paragraph.paragraph_format.space_after = Pt(0) # Add page number in center run = paragraph.add_run() # Create the PAGE field fldChar1 = OxmlElement('w:fldChar') fldChar1.set(qn('w:fldCharType'), 'begin') instrText = OxmlElement('w:instrText') instrText.set(qn('xml:space'), 'preserve') instrText.text = "PAGE" fldChar2 = OxmlElement('w:fldChar') fldChar2.set(qn('w:fldCharType'), 'end') run._r.append(fldChar1) run._r.append(instrText) run._r.append(fldChar2) run.font.name = 'Montserrat' run.font.size = Pt(14) run.font.bold = True run.font.color.rgb = RGBColor.from_string(theme_hex) # ===== ADD TOC LINK IN TEXT BOX (BOTTOM RIGHT) ===== # Create TOC link text box - absolutely positioned, does not affect page number centering toc_textbox_xml = f''' ↗️ SOM ''' toc_textbox_element = parse_xml(toc_textbox_xml) paragraph._p.append(toc_textbox_element) for section_idx, section in enumerate(doc.sections): # ===== HEADER (keep existing text like module name) ===== header = section.header header.is_linked_to_previous = False section.header_distance = Cm(0.3) # If header is empty, add a blank paragraph if not header.paragraphs: header.add_paragraph() # ===== FOOTER FOR ODD/DEFAULT PAGES (page numbers + TOC link) ===== footer = section.footer footer.is_linked_to_previous = False section.footer_distance = Cm(0.4) # Distance from bottom of page to footer # Clear existing text in footer if footer.paragraphs: footer.paragraphs[0].clear() else: footer.add_paragraph() # Skip page numbers for the first section (TOC) if section_idx == 0: continue # For the second section (first content page), restart numbering at 1 if section_idx == 1: sectPr = section._sectPr pgNumType = sectPr.find(qn('w:pgNumType')) if pgNumType is None: pgNumType = OxmlElement('w:pgNumType') sectPr.append(pgNumType) pgNumType.set(qn('w:start'), '1') # Start at page 1 # Create footer content for odd/default pages create_footer_content(footer, theme_hex) # ===== CREATE EVEN PAGE FOOTER ===== try: # Check if even_page_footer property exists if hasattr(section, 'even_page_footer'): footer_even = section.even_page_footer footer_even.is_linked_to_previous = False if not footer_even.paragraphs: footer_even.add_paragraph() else: footer_even.paragraphs[0].clear() create_footer_content(footer_even, theme_hex) print("✓ Created even page footer using built-in property") else: # Manual method - create even footer via XML from docx.opc.packuri import PackURI from docx.opc.part import XmlPart # Build even footer XML with same structure as odd footer even_ftr_xml = f''' PAGE ↗️ SOM ''' # Create part partname = PackURI(f'/word/footer_even_{id(section)}.xml') element = parse_xml(even_ftr_xml) content_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml' package = section.part.package even_part = XmlPart(partname, content_type, element, package) # Create relationship rId = section.part.relate_to(even_part, 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer') # Add footer reference sectPr = section._sectPr # Remove any existing even footer references for ref in list(sectPr.findall(qn('w:footerReference'))): if ref.get(qn('w:type')) == 'even': sectPr.remove(ref) ftr_ref = OxmlElement('w:footerReference') ftr_ref.set(qn('w:type'), 'even') ftr_ref.set(qn('r:id'), rId) sectPr.append(ftr_ref) print("✓ Created even page footer via manual part creation") except Exception as e: print(f"Warning: Could not create even page footer: {e}") import traceback traceback.print_exc() def add_toc_bookmark(doc, toc_title_para): """Add a bookmark to the TOC title paragraph""" bookmark_start = OxmlElement('w:bookmarkStart') bookmark_start.set(qn('w:id'), '0') bookmark_start.set(qn('w:name'), 'TOC_BOOKMARK') toc_title_para._p.insert(0, bookmark_start) bookmark_end = OxmlElement('w:bookmarkEnd') bookmark_end.set(qn('w:id'), '0') toc_title_para._p.append(bookmark_end) def set_module_header(doc, module_name): """Update the top-left header text with the current module name.""" for section in doc.sections: header = section.header header.is_linked_to_previous = False if not header.paragraphs: header.add_paragraph() header.paragraphs[0].clear() para = header.paragraphs[0] para.alignment = WD_ALIGN_PARAGRAPH.LEFT run = para.add_run(f"{module_name.upper()}") run.font.name = 'Montserrat' run.font.size = Pt(10) run.font.bold = True run.font.color.rgb = RGBColor(0, 0, 0) def set_zero_spacing(paragraph): """Force paragraph spacing to 0 before and after.""" paragraph.paragraph_format.space_before = Pt(0) paragraph.paragraph_format.space_after = Pt(0) def is_valid_cours_number(cours_value): """Check if cours value is valid (numeric and not 'S2')""" if pd.isna(cours_value): return False cours_str = str(cours_value).strip().upper() # Skip S2 courses and other specific invalid values if cours_str in ['S2', 'NAN', '']: return False # Try to convert to numeric - if it works and is positive, it's valid try: numeric_value = float(cours_str) # Check if it's a positive number (courses should be positive integers) return numeric_value > 0 and numeric_value == int(numeric_value) except (ValueError, TypeError, OverflowError): return False def check_if_course_has_e_choices(course_questions): """Check if any question in the course has an E choice""" for q_data in course_questions: for choice in q_data['choices']: if choice['letter'].upper() == 'E': return True return False def create_comment_boxes_section(doc, questions_by_course, cours_titles, module_name, theme_color=None, theme_hex=None): """Create comment boxes for all questions that have comments, organized by course This appears after the answer tables""" if theme_color is None: theme_color = THEME_COLOR if theme_hex is None: theme_hex = THEME_COLOR_HEX # Check if there are any comments at all has_any_comments = False for cours_num, course_questions in questions_by_course.items(): for q_data in course_questions: if (q_data.get('comment') or q_data.get('choice_commentaire') or q_data.get('photo_q')): has_any_comments = True break if has_any_comments: break if not has_any_comments: return # Add title for comments section add_column_break(doc) # Start in new column title_para = doc.add_paragraph() title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER title_para.paragraph_format.space_before = Pt(12) title_para.paragraph_format.space_after = Pt(8) # Calculate width based on text length comment_text = "COMMENTAIRES" text_length = len(comment_text) estimated_width = (text_length * 12) + 60 # Create rounded rectangle shape for COMMENTAIRES shape_xml = f''' {comment_text} ''' shape_element = parse_xml(shape_xml) title_para._p.append(shape_element) # Track overall question number overall_question_number = 1 # Process each course for cours_num in sorted(questions_by_course.keys()): course_questions = questions_by_course[cours_num] course_title = cours_titles.get(cours_num, f"COURSE {cours_num}") # Check if this course has any comments course_has_comments = False for q_data in course_questions: if (q_data.get('comment') or q_data.get('choice_commentaire') or q_data.get('photo_q')): course_has_comments = True break if not course_has_comments: overall_question_number += len(course_questions) continue # Add course title course_title_para = doc.add_paragraph() course_title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER course_title_para.paragraph_format.space_before = Pt(8) course_title_para.paragraph_format.space_after = Pt(4) course_title_run = course_title_para.add_run(f"{cours_num}. {course_title}") course_title_run.font.name = 'Montserrat' course_title_run.font.size = Pt(13) course_title_run.font.bold = True course_title_run.font.color.rgb = theme_color # Add comment boxes for questions in this course for q_data in course_questions: question_num = overall_question_number comment = q_data.get('comment') choice_commentaire = q_data.get('choice_commentaire') photo_q = q_data.get('photo_q') # Only add if there are comments or photo if comment or choice_commentaire or photo_q: add_choice_commentaire_section(doc, choice_commentaire, photo_q, theme_color, theme_hex, general_comment=comment, question_num=question_num) overall_question_number += 1 def create_answer_tables(doc, questions_by_course, cours_titles, module_name, bookmark_id, theme_hex=None): """Create multiple choice answer tables organized by course in two-column layout Each course table is split in half with two tables side by side Comment boxes appear directly after each course's answer table""" if theme_hex is None: theme_hex = THEME_COLOR_HEX theme_color = RGBColor.from_string(theme_hex) # Continue with two-column layout for answer tables continue_two_column_layout(doc) # Add title for answer section with rounded frame title_para = doc.add_paragraph() title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER title_para.paragraph_format.space_before = Pt(12) title_para.paragraph_format.space_after = Pt(8) # Calculate width based on text length response_text = "RÉPONSES" text_length = len(response_text) estimated_width = (text_length * 12) + 60 # Create rounded rectangle shape for RÉPONSES shape_xml = f''' {response_text} ''' shape_element = parse_xml(shape_xml) title_para._p.append(shape_element) # Add bookmark to the responses section with module name bm_responses_name = sanitize_bookmark_name(f"RESPONSES_{module_name}") add_bookmark_to_paragraph(title_para, bm_responses_name, bookmark_id) # Create the TOC entry information toc_entry = {'level': 'responses', 'text': f"RÉPONSES - {module_name}", 'bm': bm_responses_name} bookmark_id += 1 # Process each course overall_question_number = 1 for cours_num in sorted(questions_by_course.keys()): course_questions = questions_by_course[cours_num] course_title = cours_titles.get(cours_num, f"COURSE {cours_num}") # Add course title with keep_with_next course_title_para = doc.add_paragraph() course_title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER course_title_para.paragraph_format.space_before = Pt(8) course_title_para.paragraph_format.space_after = Pt(4) course_title_para.paragraph_format.keep_with_next = True course_title_para.paragraph_format.keep_together = True course_title_para.paragraph_format.page_break_before = False # Add widow/orphan control pPr = course_title_para._element.get_or_add_pPr() widowControl = OxmlElement('w:widowControl') widowControl.set(qn('w:val'), '1') pPr.append(widowControl) num_questions = len(course_questions) circled_num = get_circled_number(num_questions) if num_questions == 0: continue # 1. The Course Number (e.g., "101.") run_num = course_title_para.add_run(f"{cours_num}. ") run_num.font.name = 'Inter ExtraBold' run_num.font.size = Pt(13) run_num.font.bold = True run_num.font.color.rgb = theme_color # 2. The Course Title (e.g., "Introduction to Python") run_name = course_title_para.add_run(f"{course_title} ") run_name.font.name = 'Montserrat' run_name.font.size = Pt(13) run_name.font.bold = True run_name.font.color.rgb = theme_color # 3. The Circled Number (e.g., "①") run_circle = course_title_para.add_run(f"{circled_num}") run_circle.font.name = 'MS UI ghotic' run_circle.font.size = Pt(13) # Making the circle smaller run_circle.font.bold = True run_circle.font.color.rgb = theme_color # Check if this course has E choices has_e_choices = check_if_course_has_e_choices(course_questions) # Determine number of columns and headers if has_e_choices: num_cols = 6 headers = ['', 'A', 'B', 'C', 'D', 'E'] choice_letters = ['A', 'B', 'C', 'D', 'E'] else: num_cols = 5 headers = ['', 'A', 'B', 'C', 'D'] choice_letters = ['A', 'B', 'C', 'D'] # Split questions in half mid_point = (num_questions + 1) // 2 first_half = course_questions[:mid_point] second_half = course_questions[mid_point:] # Create container table container_table = doc.add_table(rows=1, cols=2) container_table.alignment = WD_TABLE_ALIGNMENT.CENTER container_table.allow_autofit = False # Set table properties to prevent splitting tblPr = container_table._tbl.tblPr if tblPr is None: tblPr = OxmlElement('w:tblPr') container_table._tbl.insert(0, tblPr) cantSplit = OxmlElement('w:cantSplit') tblPr.append(cantSplit) for row in container_table.rows: for cell in row.cells: tcPr = cell._tc.get_or_add_tcPr() for para in cell.paragraphs: para.paragraph_format.keep_together = True para.paragraph_format.keep_with_next = True # Set container borders to none tblBorders = parse_xml(f''' ''') tblPr.append(tblBorders) # Create tables left_cell = container_table.rows[0].cells[0] create_half_answer_table(left_cell, first_half, num_cols, headers, choice_letters, 1, has_e_choices) right_cell = container_table.rows[0].cells[1] create_half_answer_table(right_cell, second_half, num_cols, headers, choice_letters, mid_point + 1, has_e_choices) # Add spacing after the container table spacing_para = doc.add_paragraph() spacing_para.paragraph_format.space_after = Pt(12) spacing_para.paragraph_format.keep_together = True # Update overall counter AFTER processing all questions in this course overall_question_number += num_questions # Return both bookmark_id and toc_entry return bookmark_id, toc_entry def create_half_answer_table(cell, questions, num_cols, headers, choice_letters, start_q_num, has_e_choices): """Create one half of an answer table inside a cell""" if len(questions) == 0: return num_questions = len(questions) # Fixed Q column width to match the exact measurements from the document q_col_width = Inches(0.75) # Fixed width for Q column to fit all numbers # Create table inside the cell table = cell.add_table(rows=num_questions + 1, cols=num_cols) table.alignment = WD_TABLE_ALIGNMENT.CENTER table.style = None table.allow_autofit = False # CRITICAL: Apply cantSplit to inner table as well tblPr = table._tbl.tblPr if tblPr is None: tblPr = OxmlElement('w:tblPr') table._tbl.insert(0, tblPr) # Prevent table from splitting across pages cantSplit = OxmlElement('w:cantSplit') tblPr.append(cantSplit) tbl = table._tbl tblRows = tbl.xpath(".//w:tr") if tblRows: first_row = tblRows[0] trPr = first_row.get_or_add_trPr() tblHeader = OxmlElement('w:tblHeader') trPr.append(tblHeader) # CRITICAL: Make header row not splittable cantSplit_row = OxmlElement('w:cantSplit') trPr.append(cantSplit_row) # Add table-level border tblBorders = parse_xml(f''' ''') tblPr.append(tblBorders) # CRITICAL: Apply keep-together to all rows for row_idx, row in enumerate(table.rows): # Get or create row properties trPr = row._tr.get_or_add_trPr() # Add cantSplit to each row to prevent it from breaking cantSplit_row = OxmlElement('w:cantSplit') trPr.append(cantSplit_row) for cell_item in row.cells: for paragraph in cell_item.paragraphs: paragraph.paragraph_format.keep_together = True # Keep all rows together by keeping each with next if row_idx < len(table.rows) - 1: paragraph.paragraph_format.keep_with_next = True else: paragraph.paragraph_format.keep_with_next = False # Set exact column widths matching the document measurements choice_col_width = Inches(0.1) # Equal width for all choice columns (A, B, C, D, E) for row in table.rows: for col_idx, cell_item in enumerate(row.cells): if col_idx == 0: cell_item.width = q_col_width else: cell_item.width = choice_col_width # Header row header_cells = table.rows[0].cells for i, header in enumerate(headers): header_cells[i].text = header paragraph = header_cells[i].paragraphs[0] set_zero_spacing(paragraph) paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(header) run.font.name = 'Inter SemiBold' run.font.size = Pt(11) header_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER # Borders if i == 0: set_cell_borders(header_cells[i], top=True, bottom=True, left=True, right=False) elif i == len(headers) - 1: set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=True) else: set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=False) # Gray shading shading_elm = OxmlElement('w:shd') shading_elm.set(qn('w:val'), 'clear') shading_elm.set(qn('w:color'), 'auto') shading_elm.set(qn('w:fill'), 'D9D9D9') header_cells[i]._tc.get_or_add_tcPr().append(shading_elm) # Fill data rows for row_idx, q_data in enumerate(questions, 1): row_cells = table.rows[row_idx].cells is_last_row = (row_idx == num_questions) # Question number q_num = start_q_num + row_idx - 1 paragraph = row_cells[0].paragraphs[0] paragraph.clear() set_zero_spacing(paragraph) paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER run = paragraph.add_run(f"Q{q_num}") run.font.name = 'Inter ExtraBold' run.font.size = Pt(7.5) run.font.bold = True row_cells[0].vertical_alignment = WD_ALIGN_VERTICAL.CENTER set_cell_borders(row_cells[0], top=False, bottom=is_last_row, left=True, right=False) # Get correct answers and available choices correct_answers = [choice['letter'] for choice in q_data['choices'] if choice['is_correct']] available_choices = [choice['letter'].upper() for choice in q_data['choices']] has_no_answers = len(correct_answers) == 0 # Fill choice columns for i, letter in enumerate(choice_letters, 1): if letter not in available_choices: row_cells[i].text = '' elif has_no_answers: row_cells[i].text = '▨' elif letter in correct_answers: row_cells[i].text = '☒' else: row_cells[i].text = '☐' paragraph = row_cells[i].paragraphs[0] set_zero_spacing(paragraph) paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if row_cells[i].text: run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(row_cells[i].text) run.font.name = 'Calibri' run.font.size = Pt(11) run.font.bold = True row_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER # Borders if i == len(choice_letters): set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=True) else: set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=False) def create_empty_course_table(doc, course_questions, course_num, overall_start_num): """Create an empty answer table for all questions of one course with dynamic E column Split in half with two tables side by side, matching create_answer_tables layout""" num_questions = len(course_questions) if num_questions == 0: return overall_start_num # Check if this course has E choices has_e_choices = check_if_course_has_e_choices(course_questions) # Determine number of columns and headers if has_e_choices: num_cols = 6 # Q, A, B, C, D, E headers = ['', 'A', 'B', 'C', 'D', 'E'] choice_letters = ['A', 'B', 'C', 'D', 'E'] else: num_cols = 5 # Q, A, B, C, D headers = ['', 'A', 'B', 'C', 'D'] choice_letters = ['A', 'B', 'C', 'D'] # Split questions in half mid_point = (num_questions + 1) // 2 # Round up for first half first_half = course_questions[:mid_point] second_half = course_questions[mid_point:] print( f"DEBUG: Empty table for Course {course_num} - Total questions: {num_questions}, Split: {len(first_half)} + {len(second_half)}") # Create a container table with 1 row and 2 columns to hold both tables side by side container_table = doc.add_table(rows=1, cols=2) container_table.alignment = WD_TABLE_ALIGNMENT.CENTER container_table.allow_autofit = False # Set table properties to prevent splitting tblPr = container_table._tbl.tblPr if tblPr is None: tblPr = OxmlElement('w:tblPr') container_table._tbl.insert(0, tblPr) # Add cantSplit property to prevent table from breaking across pages cantSplit = OxmlElement('w:cantSplit') tblPr.append(cantSplit) # Apply to all cells in the container to reinforce keep-together for row in container_table.rows: for cell in row.cells: tcPr = cell._tc.get_or_add_tcPr() for para in cell.paragraphs: para.paragraph_format.keep_together = True para.paragraph_format.keep_with_next = True # Set container borders to none tblBorders = parse_xml(f''' ''') tblPr.append(tblBorders) # Create left table (first half) left_cell = container_table.rows[0].cells[0] create_half_empty_table(left_cell, first_half, num_cols, headers, choice_letters, overall_start_num, has_e_choices) # Create right table (second half) right_cell = container_table.rows[0].cells[1] start_q_num_right = overall_start_num + len(first_half) create_half_empty_table(right_cell, second_half, num_cols, headers, choice_letters, start_q_num_right, has_e_choices) # Add spacing after the container table spacing_para = doc.add_paragraph() spacing_para.paragraph_format.space_after = Pt(12) spacing_para.paragraph_format.keep_together = True return overall_start_num + num_questions def create_half_empty_table(cell, questions, num_cols, headers, choice_letters, start_q_num, has_e_choices): """Create one half of an empty answer table inside a cell""" if len(questions) == 0: return num_questions = len(questions) # Fixed Q column width to match answer tables q_col_width = Inches(0.75) # Fixed width for Q column # Create table inside the cell table = cell.add_table(rows=num_questions + 1, cols=num_cols) table.alignment = WD_TABLE_ALIGNMENT.CENTER table.style = None table.allow_autofit = False # CRITICAL: Apply cantSplit to inner table as well tblPr = table._tbl.tblPr if tblPr is None: tblPr = OxmlElement('w:tblPr') table._tbl.insert(0, tblPr) # Prevent table from splitting across pages cantSplit = OxmlElement('w:cantSplit') tblPr.append(cantSplit) # Mark first row as header row tbl = table._tbl tblRows = tbl.xpath(".//w:tr") if tblRows: first_row = tblRows[0] trPr = first_row.get_or_add_trPr() tblHeader = OxmlElement('w:tblHeader') trPr.append(tblHeader) # Make header row not splittable cantSplit_row = OxmlElement('w:cantSplit') trPr.append(cantSplit_row) # Add table-level border tblBorders = parse_xml(f''' ''') tblPr.append(tblBorders) # CRITICAL: Apply keep-together to all rows for row_idx, row in enumerate(table.rows): # Get or create row properties trPr = row._tr.get_or_add_trPr() # Add cantSplit to each row to prevent it from breaking cantSplit_row = OxmlElement('w:cantSplit') trPr.append(cantSplit_row) for cell_item in row.cells: for paragraph in cell_item.paragraphs: paragraph.paragraph_format.keep_together = True # Keep all rows together by keeping each with next if row_idx < len(table.rows) - 1: paragraph.paragraph_format.keep_with_next = True else: paragraph.paragraph_format.keep_with_next = False # Set exact column widths matching the answer table measurements choice_col_width = Inches(0.1) # Equal width for all choice columns for row in table.rows: for col_idx, cell_item in enumerate(row.cells): if col_idx == 0: cell_item.width = q_col_width else: cell_item.width = choice_col_width # Header row header_cells = table.rows[0].cells for i, header in enumerate(headers): header_cells[i].text = header paragraph = header_cells[i].paragraphs[0] set_zero_spacing(paragraph) paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(header) run.font.name = 'Inter SemiBold' run.font.size = Pt(11) header_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER # Borders if i == 0: set_cell_borders(header_cells[i], top=True, bottom=True, left=True, right=False) elif i == len(headers) - 1: set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=True) else: set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=False) # Gray shading shading_elm = OxmlElement('w:shd') shading_elm.set(qn('w:val'), 'clear') shading_elm.set(qn('w:color'), 'auto') shading_elm.set(qn('w:fill'), 'D9D9D9') header_cells[i]._tc.get_or_add_tcPr().append(shading_elm) # Fill data rows with empty checkboxes for row_idx, q_data in enumerate(questions, 1): row_cells = table.rows[row_idx].cells is_last_row = (row_idx == num_questions) # Question number q_num = start_q_num + row_idx - 1 paragraph = row_cells[0].paragraphs[0] paragraph.clear() set_zero_spacing(paragraph) paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER run = paragraph.add_run(f"Q{q_num}") run.font.name = 'Inter ExtraBold' run.font.size = Pt(7.5) run.font.bold = True row_cells[0].vertical_alignment = WD_ALIGN_VERTICAL.CENTER set_cell_borders(row_cells[0], top=False, bottom=is_last_row, left=True, right=False) # Get available choices for this specific question available_choices = [choice['letter'].upper() for choice in q_data['choices']] # Fill choice columns with empty checkboxes for i, letter in enumerate(choice_letters, 1): if letter not in available_choices: # Choice doesn't exist - leave empty row_cells[i].text = '' else: # Choice exists - show empty checkbox row_cells[i].text = '☐' paragraph = row_cells[i].paragraphs[0] set_zero_spacing(paragraph) paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if row_cells[i].text: run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(row_cells[i].text) run.font.name = 'Calibri' run.font.size = Pt(11) run.font.bold = True row_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER # Borders if i == len(choice_letters): set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=True) else: set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=False) def sanitize_bookmark_name(text): """Create a safe bookmark name (letters, numbers, underscores).""" name = re.sub(r'[^A-Za-z0-9_]', '_', str(text)) # Word has bookmark name length limits — keep it short return name[:40] def add_bookmark_to_paragraph(paragraph, bookmark_name, bm_id): """Wrap the paragraph with a Word bookmark (start & end).""" # bookmarkStart: should be before the paragraph text bookmark_start = OxmlElement('w:bookmarkStart') bookmark_start.set(qn('w:id'), str(bm_id)) bookmark_start.set(qn('w:name'), bookmark_name) paragraph._p.insert(0, bookmark_start) # bookmarkEnd: appended after paragraph content bookmark_end = OxmlElement('w:bookmarkEnd') bookmark_end.set(qn('w:id'), str(bm_id)) paragraph._p.append(bookmark_end) def add_pagenumber_field_in_paragraph(paragraph, bookmark_name, right_inch=Inches(6.5)): """ Insert a PAGEREF field pointing to bookmark_name. This function also adds a right tab stop with dotted leader and a tab character so the page number appears at the right edge with dot leaders. """ # add a right aligned tab stop with dots try: paragraph.paragraph_format.tab_stops.add_tab_stop(right_inch, WD_TAB_ALIGNMENT.RIGHT, WD_TAB_LEADER.DOTS) except Exception: # If the tab_stop API differs, ignore and still try to insert the field pass # Add a tab character so the PAGEREF sits at the right tab stop tab_run = paragraph.add_run('\t') # Create field: begin -> instrText -> end fldChar1 = OxmlElement('w:fldChar'); fldChar1.set(qn('w:fldCharType'), 'begin') instrText = OxmlElement('w:instrText'); instrText.set(qn('xml:space'), 'preserve') instrText.text = f"PAGEREF {bookmark_name} \\h" fldChar2 = OxmlElement('w:fldChar'); fldChar2.set(qn('w:fldCharType'), 'end') tab_run._r.append(fldChar1) tab_run._r.append(instrText) tab_run._r.append(fldChar2) def estimate_content_length(questions_by_course, cours_titles): """Estimate relative content length for each question to better balance columns""" question_lengths = [] total_estimated_lines = 0 for cours_num in sorted(questions_by_course.keys()): course_questions = questions_by_course[cours_num] course_title = cours_titles.get(cours_num, f"COURSE {cours_num}") # Add course title weight (approximately 2-3 lines) course_weight = 3 total_estimated_lines += course_weight for q_data in course_questions: # Estimate lines for this question question_lines = 2 # Question line + spacing question_lines += len(q_data['choices']) # Choice lines question_lines += 2 # Source/answer line + spacing if q_data.get('comment') and str(q_data['comment']).strip() and str(q_data['comment']).lower() != 'nan': question_lines += 2 # Comment lines question_lengths.append({ 'cours': cours_num, 'question': q_data, 'estimated_lines': question_lines }) total_estimated_lines += question_lines return question_lengths, total_estimated_lines def read_course_titles_from_module_sheet(excel_file_path, module_name): """Read course titles from a module-specific sheet (case-insensitive)""" cours_titles = {} print(f" DEBUG: Looking for sheet matching module '{module_name}'") # Get all sheet names from the Excel file xls = pd.ExcelFile(excel_file_path) sheet_names = xls.sheet_names # Find matching sheet (case-insensitive) target_sheet = None module_name_lower = str(module_name).strip().lower() print(f" DEBUG: Module name (lowercase): '{module_name_lower}'") print(f" DEBUG: Available sheets: {sheet_names}") for sheet in sheet_names: sheet_lower = sheet.strip().lower() print(f" DEBUG: Comparing '{module_name_lower}' with '{sheet_lower}'") if sheet_lower == module_name_lower: target_sheet = sheet print(f" DEBUG: MATCH FOUND! Using sheet '{target_sheet}'") break if target_sheet is None: print(f" DEBUG: No sheet found matching module '{module_name}'") return cours_titles # Read the matching sheet cours_df = pd.read_excel(excel_file_path, sheet_name=target_sheet) print(f" DEBUG: Sheet '{target_sheet}' has {len(cours_df)} rows") print(f" DEBUG: Sheet columns: {list(cours_df.columns)}") if not cours_df.empty and 'cours' in cours_df.columns and 'titre' in cours_df.columns: for idx, row in cours_df.iterrows(): print(f" DEBUG: Row {idx}: cours={row['cours']}, titre={row.get('titre', 'N/A')}") if pd.notna(row['cours']) and pd.notna(row['titre']): # Only store valid numeric courses if is_valid_cours_number(row['cours']): cours_num = int(float(str(row['cours']).strip())) cours_titles[cours_num] = row['titre'] print(f" DEBUG: Added cours {cours_num}: {row['titre']}") else: print(f" DEBUG: Skipped invalid cours: {row['cours']}") print(f" DEBUG: Final count: {len(cours_titles)} course titles from sheet '{target_sheet}'") else: print(f" DEBUG: Sheet '{target_sheet}' doesn't have expected structure") print(f" DEBUG: Has 'cours' column: {'cours' in cours_df.columns}") print(f" DEBUG: Has 'titre' column: {'titre' in cours_df.columns}") return cours_titles def enable_odd_even_headers(doc): """Enable different odd and even page headers/footers for the entire document""" try: # Access the document settings settings = doc.settings settings_element = settings.element # Add evenAndOddHeaders element if it doesn't exist even_odd = settings_element.find(qn('w:evenAndOddHeaders')) if even_odd is None: even_odd = OxmlElement('w:evenAndOddHeaders') # Insert at the beginning of settings settings_element.insert(0, even_odd) print("✓ Enabled odd/even page headers in document settings") else: print("✓ Odd/even page headers already enabled") except Exception as e: print(f"Warning: Could not enable odd/even headers: {e}") # Try alternative method - modify the XML directly try: doc_element = doc.element body = doc_element.body # Find or create sectPr sectPr = body.sectPr if sectPr is not None: print("✓ Document structure ready for odd/even headers") except Exception as e2: print(f"Warning: Alternative method also failed: {e2}") def create_flexible_header(section, module_name, sheet_name, display_name=None, left_margin_inches=0, right_margin_inches=0, theme_hex=None): """Create flexible header text boxes that switch positions on odd/even pages""" if theme_hex is None: theme_hex = THEME_COLOR_HEX section.header_distance = Cm(0.6) module_name_str = str(module_name).upper() # Use display_name if provided, otherwise use sheet_name if display_name: sheet_name_str = str(display_name).upper() else: sheet_name_str = str(sheet_name).upper() module_name_str = html.escape(module_name_str) sheet_name_str = html.escape(sheet_name_str) # Calculate approximate widths based on text length module_width = max(len(module_name_str) * 10 + 60, 100) sheet_width = max(len(sheet_name_str) * 10 + 60, 100) def create_header_content(paragraph, left_text, left_width, right_text, right_width): """Helper to create header content with two text boxes""" paragraph.clear() left_xml = f''' {left_text} ''' right_xml = f''' {right_text} ''' paragraph._p.append(parse_xml(left_xml)) paragraph._p.append(parse_xml(right_xml)) # ========== CREATE DEFAULT/ODD PAGES HEADER (Sheet Left, Module Right) ========== header_odd = section.header header_odd.is_linked_to_previous = False if not header_odd.paragraphs: header_odd.add_paragraph() create_header_content(header_odd.paragraphs[0], sheet_name_str, sheet_width, module_name_str, module_width) # ========== CREATE EVEN PAGES HEADER (Module Left, Sheet Right) ========== try: # Check if even_page_header property exists if hasattr(section, 'even_page_header'): header_even = section.even_page_header header_even.is_linked_to_previous = False if not header_even.paragraphs: header_even.add_paragraph() create_header_content(header_even.paragraphs[0], module_name_str, module_width, sheet_name_str, sheet_width) print("✓ Created even page header using built-in property") else: # Manual method from docx.opc.packuri import PackURI from docx.opc.part import XmlPart # Build even header XML even_hdr_xml = f''' {module_name_str} {sheet_name_str} ''' # Create part partname = PackURI(f'/word/header_even_{id(section)}.xml') element = parse_xml(even_hdr_xml) content_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml' package = section.part.package even_part = XmlPart(partname, content_type, element, package) # Create relationship rId = section.part.relate_to(even_part, 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/header') # Add header reference sectPr = section._sectPr for ref in list(sectPr.findall(qn('w:headerReference'))): if ref.get(qn('w:type')) == 'even': sectPr.remove(ref) hdr_ref = OxmlElement('w:headerReference') hdr_ref.set(qn('w:type'), 'even') hdr_ref.set(qn('r:id'), rId) sectPr.append(hdr_ref) print("✓ Created even page header via manual part creation") except Exception as e: print(f"Warning: Could not create even header: {e}") import traceback traceback.print_exc() def extract_display_name_from_excel(excel_file_path): """Extract display name from Excel file - checks multiple locations""" try: xls = pd.ExcelFile(excel_file_path) first_sheet_name = xls.sheet_names[0] df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name, nrows=5) # Strategy 1: Look for a cell with "Name:", "Display Name:", etc. for col in df.columns: for idx, val in df[col].items(): if pd.notna(val): val_str = str(val).strip().lower() if any(keyword in val_str for keyword in ['name:', 'nom:', 'display name:', 'titre:']): # Get the value from next cell or same row try: if ':' in str(val): return str(val).split(':', 1)[1].strip() elif idx + 1 < len(df): next_val = df[col].iloc[idx + 1] if pd.notna(next_val): return str(next_val).strip() except: pass # Strategy 2: Check for a dedicated "Info" or "Metadata" sheet for sheet_name in xls.sheet_names: if any(keyword in sheet_name.lower() for keyword in ['info', 'metadata', 'details', 'nom']): info_df = pd.read_excel(excel_file_path, sheet_name=sheet_name, nrows=10) for col in info_df.columns: for idx, val in info_df[col].items(): if pd.notna(val) and 'name' in str(val).lower(): if idx + 1 < len(info_df): next_val = info_df[col].iloc[idx + 1] if pd.notna(next_val): return str(next_val).strip() # Strategy 3: Check first cell of first sheet if not df.empty and pd.notna(df.iloc[0, 0]): first_cell = str(df.iloc[0, 0]).strip() if len(first_cell) < 50 and not any(char.isdigit() for char in first_cell[:10]): return first_cell # Fallback: Use filename without extension return os.path.splitext(os.path.basename(excel_file_path))[0] except Exception as e: print(f"Error extracting display name: {e}") # Ultimate fallback return os.path.splitext(os.path.basename(excel_file_path))[0] def add_colored_column_separator(section, theme_hex=None): """Add a custom colored vertical line between columns to both odd and even headers""" if theme_hex is None: theme_hex = THEME_COLOR_HEX def add_line_to_header(header_elem, line_id="columnSeparator"): """Helper function to add the separator line to a header""" # Find or create the first paragraph in header if not header_elem.paragraphs: header_elem.add_paragraph() header_para = header_elem.paragraphs[0] # Create a vertical line using VML shape # The line starts AFTER the header and goes to the bottom line_xml = f''' ''' line_element = parse_xml(line_xml) header_para._p.append(line_element) # Add line to odd/default header header = section.header add_line_to_header(header, "columnSeparatorOdd") # Add line to even header try: # Check if even_page_header property exists if hasattr(section, 'even_page_header'): header_even = section.even_page_header add_line_to_header(header_even, "columnSeparatorEven") print("✓ Added column separator to even page header using built-in property") else: # Manual method - we need to add the line to the already-created even header # Find the even header part sectPr = section._sectPr even_header_refs = [ref for ref in sectPr.findall(qn('w:headerReference')) if ref.get(qn('w:type')) == 'even'] if even_header_refs: # Get the relationship ID rId = even_header_refs[0].get(qn('r:id')) # Get the header part even_header_part = section.part.related_parts[rId] # Find the first paragraph in the even header even_header_element = even_header_part.element paras = even_header_element.findall(qn('w:p')) if paras: # Add the line to the first paragraph line_xml_content = f''' ''' line_element = parse_xml(line_xml_content) paras[0].append(line_element) print("✓ Added column separator to even page header via manual part access") else: print("⚠ No even header reference found - skipping even page separator line") except Exception as e: print(f"Warning: Could not add separator line to even page header: {e}") import traceback traceback.print_exc() def add_choice_commentaire_section(doc, choice_commentaire, photo_q_path, theme_color=None, theme_hex=None, general_comment=None, question_num=None, highlight_words=None): """Add a framed section with general comment, choice commentaires and optional photo Q Split into 2/3 for comments and 1/3 for photo (or full width if no photo) WITH DASHED BORDER AND SHADED BACKGROUND""" if highlight_words is None: highlight_words = [] if theme_color is None: theme_color = THEME_COLOR if theme_hex is None: theme_hex = THEME_COLOR_HEX # Only add if there are comments or photo if not choice_commentaire and not photo_q_path and not general_comment: return print( f"DEBUG: add_choice_commentaire_section called with {len(choice_commentaire) if choice_commentaire else 0} comments") # Check if photo exists and is valid has_photo = False if photo_q_path: # Clean the path photo_q_path_clean = str(photo_q_path).strip() print(f"DEBUG: Checking photo path: '{photo_q_path_clean}'") if photo_q_path_clean and photo_q_path_clean.lower() not in ['nan', 'none', '']: # Check file existence if os.path.exists(photo_q_path_clean): has_photo = True print(f"DEBUG: ✓ Photo Q exists: {photo_q_path_clean}") # Check if it's a valid image file valid_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'] file_ext = os.path.splitext(photo_q_path_clean)[1].lower() if file_ext not in valid_extensions: print(f"WARNING: File extension '{file_ext}' might not be supported. Valid: {valid_extensions}") else: print(f"DEBUG: ✗ Photo Q does NOT exist at: {photo_q_path_clean}") print(f"DEBUG: Current working directory: {os.getcwd()}") print(f"DEBUG: Absolute path would be: {os.path.abspath(photo_q_path_clean)}") # Create a table with 1 row and 2 columns (or 1 if no photo) if has_photo: table = doc.add_table(rows=1, cols=2) table.alignment = WD_TABLE_ALIGNMENT.LEFT table.allow_autofit = False # Set column widths: 2/3 for text, 1/3 for photo left_cell = table.rows[0].cells[0] right_cell = table.rows[0].cells[1] # Set explicit widths left_cell.width = Inches(3.5) # 2/3 of available width right_cell.width = Inches(1.75) # 1/3 of available width # Set vertical alignment to top for both cells left_cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP right_cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP else: table = doc.add_table(rows=1, cols=1) table.alignment = WD_TABLE_ALIGNMENT.LEFT left_cell = table.rows[0].cells[0] left_cell.width = Inches(5.25) # Full width # Add DASHED border to the table with theme color tblPr = table._tbl.tblPr if tblPr is None: tblPr = OxmlElement('w:tblPr') table._tbl.insert(0, tblPr) # Use theme_hex (the input color) for borders border_color = theme_hex # Border size: 1.5pt = 12 eighths of a point (1.5 * 8 = 12) tblBorders = parse_xml(f''' ''') tblPr.append(tblBorders) # Add padding to cells for cell in table.rows[0].cells: tcPr = cell._tc.get_or_add_tcPr() tcMar = OxmlElement('w:tcMar') for margin in ['top', 'left', 'bottom', 'right']: mar = OxmlElement(f'w:{margin}') mar.set(qn('w:w'), '80') # 80 twips = ~0.06 inches padding mar.set(qn('w:type'), 'dxa') tcMar.append(mar) tcPr.append(tcMar) # Add light gray shading to left cell left_tcPr = left_cell._tc.get_or_add_tcPr() shading_elm = OxmlElement('w:shd') shading_elm.set(qn('w:val'), 'clear') shading_elm.set(qn('w:color'), 'auto') shading_elm.set(qn('w:fill'), 'F2F2F2') # Light gray (20% black) left_tcPr.append(shading_elm) # If there's a photo, also add shading to the right cell if has_photo: right_tcPr = right_cell._tc.get_or_add_tcPr() shading_elm_right = OxmlElement('w:shd') shading_elm_right.set(qn('w:val'), 'clear') shading_elm_right.set(qn('w:color'), 'auto') shading_elm_right.set(qn('w:fill'), 'F2F2F2') # Same light gray right_tcPr.append(shading_elm_right) # Clear the default empty paragraph first if left_cell.paragraphs: left_cell.paragraphs[0].clear() comment_index = 0 # ADD GENERAL COMMENT FIRST if it exists if question_num and general_comment and str(general_comment).strip() and str(general_comment).lower() != 'nan': # Use the first paragraph for the general comment if comment_index == 0 and left_cell.paragraphs: question_num_para = left_cell.paragraphs[0] else: question_num_para = left_cell.add_paragraph() question_num_para.paragraph_format.space_before = Pt(1) question_num_para.paragraph_format.space_after = Pt(1) question_num_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # Add the general comment text text_run = question_num_para.add_run(f" {str(general_comment)}") text_run.font.name = 'Inter SemiBold' text_run.font.size = Pt(8) comment_index += 1 # Add choice commentaires if choice_commentaire: # Filter out comments that are only X's filtered_commentaire = {letter: text for letter, text in choice_commentaire.items() if not is_only_x_string(text)} print(f"DEBUG: Adding {len(filtered_commentaire)} choice comments") for choice_letter in sorted(filtered_commentaire.keys()): comment_text = filtered_commentaire[choice_letter] print(f"DEBUG: Adding comment {choice_letter}: {comment_text[:50]}...") # Use the first paragraph if no general comment, otherwise add new if comment_index == 0 and left_cell.paragraphs: comment_para = left_cell.paragraphs[0] else: comment_para = left_cell.add_paragraph() comment_para.paragraph_format.space_before = Pt(1) comment_para.paragraph_format.space_after = Pt(0) comment_para.paragraph_format.line_spacing = 1.0 comment_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # Choice letter in bold with theme color letter_run = comment_para.add_run(f"{choice_letter}- ") letter_run.font.name = 'Inter ExtraBold' letter_run.font.size = Pt(8) letter_run.font.bold = True letter_run.font.color.rgb = theme_color # Comment text text_run = comment_para.add_run(comment_text) text_run.font.name = 'Inter Display SemiBold' text_run.font.size = Pt(8) # highlight_words_in_text(comment_para, comment_text, highlight_words, theme_color, font_name='Inter Display SemiBold', font_size=8) comment_index += 1 # If no comments at all but has photo, add placeholder text if comment_index == 0: print("DEBUG: No comments found, adding placeholder") placeholder_para = left_cell.paragraphs[0] if left_cell.paragraphs else left_cell.add_paragraph() placeholder_para.alignment = WD_ALIGN_PARAGRAPH.CENTER placeholder_run = placeholder_para.add_run("[See image]") placeholder_run.font.name = 'Inter Display' placeholder_run.font.size = Pt(9) placeholder_run.font.italic = True # Add photo to right cell if exists if has_photo: try: print(f"DEBUG: Attempting to add photo: {photo_q_path_clean}") # Clear the default empty paragraph and reuse it if right_cell.paragraphs: photo_para = right_cell.paragraphs[0] photo_para.clear() else: photo_para = right_cell.add_paragraph() photo_para.alignment = WD_ALIGN_PARAGRAPH.CENTER photo_para.paragraph_format.space_before = Pt(0) photo_para.paragraph_format.space_after = Pt(0) run = photo_para.add_run() # Try different image sizes try: run.add_picture(photo_q_path_clean, width=Inches(1.5)) print(f"DEBUG: ✓ Successfully added Photo Q at 1.5 inches width") except Exception as e1: print(f"DEBUG: Failed at 1.5 inches, trying height-based: {e1}") run.add_picture(photo_q_path_clean, height=Inches(2.0)) print(f"DEBUG: ✓ Successfully added Photo Q at 2.0 inches height") except Exception as e: # If photo fails to load, add error text print(f"ERROR: Failed to add Photo Q: {type(e).__name__}: {str(e)}") error_para = right_cell.add_paragraph() error_para.alignment = WD_ALIGN_PARAGRAPH.CENTER error_run = error_para.add_run(f"[Photo error: {type(e).__name__}]") error_run.font.size = Pt(7) error_run.font.italic = True error_run.font.color.rgb = RGBColor(255, 0, 0) # Add spacing after the table empty_para = doc.add_paragraph(' ', style='TinySpace') empty_para.paragraph_format.space_before = Pt(0) empty_para.paragraph_format.space_after = Pt(0) empty_para.paragraph_format.line_spacing = Pt(7) empty_run = empty_para.add_run(' ') empty_run.font.size = Pt(7) def extract_embedded_images_info(excel_file_path): """ Inform user about embedded images in Excel. Excel formulas like =DISPIMG() cannot be extracted programmatically with pandas. """ print("\n" + "!" * 60) print("IMPORTANT: EMBEDDED IMAGES DETECTED") print("!" * 60) print("Your Excel file contains embedded images using =DISPIMG() formulas.") print("These images are stored INSIDE the Excel file and cannot be accessed") print("as file paths.") print() print("TO FIX THIS:") print("1. Open your Excel file") print("2. Save the images as separate files (right-click > Save as Picture)") print("3. Update the 'Photo Q' and 'Photo C' columns with the file paths") print(" Example: 'images/question1.png' instead of '=DISPIMG(...)'") print() print("Alternative: Use OneDrive/SharePoint links or export images first") print("!" * 60 + "\n") def process_excel_to_word(excel_file_path, output_word_path, image_folder, display_name=None, use_two_columns=True, add_separator_line=True, balance_method="dynamic", theme_hex=None, highlight_words=None, show_comments=True): """Main function to process Excel and create a Word document with TOC on the first page Args: show_comments (bool): If True, display comment boxes. If False, hide all comments. Default is True. """ if highlight_words is None: highlight_words = [] if theme_hex is None: theme_hex = THEME_COLOR_HEX theme_color = RGBColor.from_string(theme_hex) # Prepare image folder (extract if ZIP) actual_image_folder, is_temp, temp_dir_obj = prepare_image_folder(image_folder) # Map images from the prepared folder question_photos = map_images_from_excel(excel_file_path, actual_image_folder) # Read the Excel file xls = pd.ExcelFile(excel_file_path) first_sheet_name = xls.sheet_names[0] # Get the first sheet name questions_df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name) # Extract display name if not provided if display_name is None: display_name = extract_display_name_from_excel(excel_file_path) print(f"Extracted display name: {display_name}") # Get unique modules from Questions sheet (case-insensitive) module_col = None for col in questions_df.columns: if col.lower().strip() == 'module': module_col = col break if module_col: xls_temp = pd.ExcelFile(excel_file_path) all_sheets = xls_temp.sheet_names modules_in_questions = questions_df[module_col].dropna().unique() # Create a mapping from lowercase module name to actual sheet name module_to_sheet = {} for module in modules_in_questions: module_lower = str(module).strip().lower() for sheet in all_sheets: if sheet.strip().lower() == module_lower: module_to_sheet[module] = sheet break # Normalize all module names in the dataframe questions_df[module_col] = questions_df[module_col].apply( lambda x: module_to_sheet.get(x, x) if pd.notna(x) else x ) # Get unique modules in sheet order modules = [] seen = set() for sheet in all_sheets: sheet_lower = sheet.strip().lower() for module in modules_in_questions: if str(module).strip().lower() == sheet_lower and sheet not in seen: modules.append(sheet) seen.add(sheet) break else: modules = [] # Read course titles from module-specific sheets modules_data = {} xls = pd.ExcelFile(excel_file_path) for module in modules: try: cours_titles_for_module = read_course_titles_from_module_sheet(excel_file_path, module) modules_data[module] = cours_titles_for_module except Exception as e: print(f"DEBUG: Error reading module '{module}': {e}") # Clean column names questions_df.columns = questions_df.columns.str.strip() # Check if photo columns exist has_photo_q_col = 'Photo Q' in questions_df.columns has_photo_c_col = 'Photo C' in questions_df.columns if not has_photo_q_col and not has_photo_c_col: print("ℹ️ No photo columns found in Excel - images will be skipped") elif not has_photo_q_col: print("ℹ️ 'Photo Q' column not found - question images will be skipped") elif not has_photo_c_col: print("ℹ️ 'Photo C' column not found - choice images will be skipped") # Create Word document doc = Document() enable_odd_even_headers(doc) core_props = doc.core_properties core_props.author = "Natural Killer" core_props.title = "Manhattan Project" core_props.subject = "QCM" core_props.comments = "Created By NK" core_props.last_modified_by = "NK" core_props.generator = "Microsoft Word" set_page_size(doc.sections[0], PAPER_SIZES['A4_WIDE'][0], PAPER_SIZES['A4'][1]) # ======================================== # ADD THREE EMPTY PAGES AT THE BEGINNING # ======================================== for i in range(3): doc.add_paragraph() # Add empty paragraph if i < 2: # Add page breaks for first 2 pages (3rd page leads to TOC) doc.add_page_break() # TOC helpers toc_entries = [] bookmark_id = 1 # Set page margins for section in doc.sections: section.top_margin = Inches(0.5) section.bottom_margin = Inches(0.5) section.left_margin = Cm(1.1) section.right_margin = Cm(1.1) # ======================================== # CREATE TOC SECTION FIRST (TWO COLUMNS - SPLIT PAGE) # ======================================== toc_section = doc.sections[0] sectPr = toc_section._sectPr cols = sectPr.find(qn('w:cols')) if cols is None: cols = OxmlElement('w:cols') sectPr.append(cols) cols.set(qn('w:num'), '2') cols.set(qn('w:space'), '432') # 0.3 inch spacing between columns # Add TOC title toc_title = doc.add_paragraph() toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER toc_title.paragraph_format.space_after = Pt(12) toc_title_run = toc_title.add_run("Sommaire") toc_title_run.font.name = 'Montserrat' toc_title_run.font.size = Pt(16) toc_title_run.font.bold = True toc_title_run.font.color.rgb = theme_color # Add bookmark to TOC title add_toc_bookmark(doc, toc_title) # Remember position to insert TOC entries later toc_insert_index = len(doc.paragraphs) # ======================================== # START NEW SECTION FOR CONTENT (TWO COLUMNS) # ======================================== doc.add_section(WD_SECTION.NEW_PAGE) # Process questions processed_questions = [] current_question = None current_choices = [] skipped_s2_questions = 0 for idx, row in questions_df.iterrows(): numero = row['Numero'] if pd.notna(numero): if current_question is not None and current_choices and is_valid_cours_number(current_cours): processed_questions.append({ 'numero': current_question, 'question_text': current_question_text, 'source': current_source, 'comment': current_comment, 'cours': int(float(str(current_cours).strip())), 'module': current_module, 'choices': current_choices.copy(), 'choice_commentaire': current_choice_commentaire, 'photo_q': question_photos.get(current_question, {}).get('photo_q', None), # LINKED! 'photo_c': question_photos.get(current_question, {}).get('photo_c', None) # LINKED! }) elif current_question is not None and not is_valid_cours_number(current_cours): skipped_s2_questions += 1 current_question = numero current_question_text = str(row['Question']).strip() current_source = str(row['Source']).strip() if pd.notna(row['Source']) else "" current_comment = str(row['Comment']).strip() if pd.notna(row['Comment']) and str( row['Comment']).lower() != 'nan' else None current_cours = row['Cours'] if pd.notna(row['Cours']) else 1 current_module = row[module_col] if module_col and pd.notna(row[module_col]) else None current_choices = [] current_choice_commentaire = {} # NEW: Initialize per question # Initialize photo storage for this question if current_question not in question_photos: question_photos[current_question] = {'photo_q': None, 'photo_c': None} current_choice_commentaire = {} # CHECK FOR PHOTOS ON THIS ROW - Store DIRECTLY in question_photos dict if has_photo_q_col and pd.notna(row.get('Photo Q', None)): photo_q_raw = str(row['Photo Q']).strip() if has_photo_c_col and pd.notna(row.get('Photo C', None)): photo_c_raw = str(row['Photo C']).strip() # Process each CHOICE row - CHECK FOR PHOTOS ON EVERY ROW! if is_valid_cours_number(current_cours): choice_letter = str(row['Order']).strip().upper() choice_text = str(row['ChoiceText']).strip() ct_value = str(row['CT']).strip().upper() if pd.notna(row['CT']) else "" is_correct = ct_value == 'X' # Read choice commentaire for THIS specific choice if pd.notna(row.get('Choice commentaire', None)): choice_comment = str(row['Choice commentaire']).strip() if choice_comment and choice_comment.lower() not in ['nan', 'none', '']: current_choice_commentaire[choice_letter] = choice_comment # CHECK FOR PHOTOS ON THIS ROW (could be any choice row!) # CRITICAL FIX: Store directly in question_photos, not in temporary variables if has_photo_q_col and pd.notna(row.get('Photo Q', None)): photo_q_raw = str(row['Photo Q']).strip() if has_photo_c_col and pd.notna(row.get('Photo C', None)): photo_c_raw = str(row['Photo C']).strip() if choice_text and choice_text.lower() != 'nan' and choice_text != '': current_choices.append({ 'letter': choice_letter, 'text': choice_text, 'is_correct': is_correct }) if current_question is not None and current_choices and is_valid_cours_number(current_cours): processed_questions.append({ 'numero': current_question, 'question_text': current_question_text, 'source': current_source, 'comment': current_comment, 'cours': int(float(str(current_cours).strip())), 'module': current_module, 'choices': current_choices.copy(), 'choice_commentaire': current_choice_commentaire, 'photo_q': question_photos.get(current_question, {}).get('photo_q', None), # LINKED! 'photo_c': question_photos.get(current_question, {}).get('photo_c', None) # LINKED! }) elif current_question is not None and not is_valid_cours_number(current_cours): skipped_s2_questions += 1 # Group questions by module and course questions_by_module = {} for q_data in processed_questions: module_name = q_data['module'] cours_num = q_data['cours'] if module_name not in questions_by_module: questions_by_module[module_name] = {} if cours_num not in questions_by_module[module_name]: questions_by_module[module_name][cours_num] = [] questions_by_module[module_name][cours_num].append(q_data) # Check for E choices total_e_choices = 0 for module_name, questions_by_course in questions_by_module.items(): for cours_num, course_questions in questions_by_course.items(): course_e_count = sum(1 for q_data in course_questions for choice in q_data['choices'] if choice['letter'].upper() == 'E') if course_e_count > 0: total_e_choices += course_e_count # Column balancing column_break_after_question = 0 if use_two_columns and balance_method == "dynamic": total_estimated_lines = 0 all_question_lengths = [] for module_name in modules: if module_name not in questions_by_module: continue questions_by_course = questions_by_module[module_name] cours_titles = modules_data.get(module_name, {}) total_estimated_lines += 5 question_lengths, module_lines = estimate_content_length(questions_by_course, cours_titles) total_estimated_lines += module_lines all_question_lengths.extend(question_lengths) target_lines_first_column = total_estimated_lines * 0.52 cumulative_lines = 0 global_question_counter = 0 for module_name in modules: if module_name not in questions_by_module: continue cumulative_lines += 5 questions_by_course = questions_by_module[module_name] for cours_num in sorted(questions_by_course.keys()): cumulative_lines += 3 course_questions = questions_by_course[cours_num] for q_data in course_questions: global_question_counter += 1 for q_length in all_question_lengths: if q_length['question'] == q_data: cumulative_lines += q_length['estimated_lines'] break if cumulative_lines >= target_lines_first_column and column_break_after_question == 0: column_break_after_question = global_question_counter break if column_break_after_question > 0: break if column_break_after_question > 0: break # Format questions grouped by module overall_question_count = 1 global_question_counter = 0 column_break_added = False for module_index, module_name in enumerate(modules): if module_name not in questions_by_module: continue if module_index == 0: section = doc.sections[-1] else: section = doc.add_section(WD_SECTION.NEW_PAGE) if use_two_columns: sectPr = section._sectPr cols = sectPr.find(qn('w:cols')) if cols is None: cols = OxmlElement('w:cols') sectPr.append(cols) cols.set(qn('w:num'), '2') cols.set(qn('w:space'), '432') cols.set(qn('w:equalWidth'), '1') if use_two_columns: sectPr = section._sectPr cols = sectPr.find(qn('w:cols')) if cols is None: cols = OxmlElement('w:cols') sectPr.append(cols) cols.set(qn('w:num'), '2') cols.set(qn('w:space'), '432') cols.set(qn('w:equalWidth'), '1') create_flexible_header(section, module_name, first_sheet_name, display_name, theme_hex=theme_hex) # ADD THE COLORED SEPARATOR if add_separator_line: add_colored_column_separator(section, theme_hex) # ========== CUSTOMIZE MODULE TITLE APPEARANCE HERE ========== MODULE_HEIGHT = 31 # Frame height in points MODULE_ROUNDNESS = 50 # Corner roundness % (0=square, 50=pill) MODULE_FONT_SIZE = 35 # Font size in half-points (28=14pt, 24=12pt, 32=16pt) MODULE_BG_COLOR = theme_hex # Purple background color MODULE_TEXT_COLOR = "FFFFFF" # White text color MODULE_PADDING = 60 # Extra width padding # ============================================================ # Add module title as rounded shape shape_para = doc.add_paragraph() shape_para.alignment = WD_ALIGN_PARAGRAPH.CENTER shape_para.paragraph_format.space_before = Pt(12) shape_para.paragraph_format.space_after = Pt(8) # Calculate width based on text length text_length = len(module_name.upper()) estimated_width = (text_length * 12) + MODULE_PADDING module_name_escaped = html.escape(module_name.upper()) # Create rounded rectangle shape shape_xml = f''' {module_name_escaped} ''' shape_element = parse_xml(shape_xml) shape_para._p.append(shape_element) # Add bookmark bm_name = sanitize_bookmark_name(f"MOD_{module_name}") add_bookmark_to_paragraph(shape_para, bm_name, bookmark_id) toc_entries.append({'level': 'module', 'text': f"MODULE: {module_name}", 'bm': bm_name}) bookmark_id += 1 questions_by_course = questions_by_module[module_name] cours_titles = modules_data.get(module_name, {}) for natural_num, cours_num in enumerate(sorted(questions_by_course.keys()), start=1): course_questions = questions_by_course[cours_num] course_question_count = 1 course_title = cours_titles.get(cours_num, f"COURSE {cours_num}") num_questions = len(course_questions) course_para = create_course_title(doc, natural_num, course_title, theme_color, theme_hex=theme_hex, question_count=num_questions) bm_course_name = sanitize_bookmark_name(f"COURSE_{module_name}_{cours_num}") add_bookmark_to_paragraph(course_para, bm_course_name, bookmark_id) toc_entries.append({'level': 'course', 'text': f"{natural_num}. {course_title}", 'bm': bm_course_name}) bookmark_id += 1 for q_data in course_questions: global_question_counter += 1 if (use_two_columns and balance_method == "dynamic" and not column_break_added and global_question_counter == column_break_after_question): add_column_break(doc) column_break_added = True choices = [(choice['letter'], choice['text']) for choice in q_data['choices']] choices.sort(key=lambda x: x[0]) correct_answers = [choice['letter'] for choice in q_data['choices'] if choice['is_correct']] correct_answers_str = ''.join(sorted(correct_answers)) if choices: format_question_block( doc, course_question_count, q_data['question_text'], choices, correct_answers_str, q_data['source'], q_data['comment'], q_data.get('choice_commentaire', {}), # NEW q_data.get('photo_q', None), # NEW q_data.get('photo_c', None), # NEW theme_color, theme_hex, show_comments, # Pass the show_comments parameter highlight_words ) course_question_count += 1 overall_question_count += 1 # PROF.PY: NO EMPTY TABLES - create_empty_course_table(doc, course_questions, cours_num, 1) # PROF.PY: NO ANSWER TABLES - bookmark_id, responses_toc_entry = create_answer_tables(doc, questions_by_course, cours_titles, module_name, bookmark_id, theme_hex) # PROF.PY: NO ANSWER TABLES - toc_entries.append(responses_toc_entry) # ======================================== # INSERT TOC ENTRIES IN THE FIRST SECTION # ======================================== # We need to insert TOC entries in the FIRST section, before the section break # Get the body element body = doc._element.body # Find where to insert - right after toc_title, before the section break toc_title_element = toc_title._element insert_index = list(body).index(toc_title_element) + 1 # In the TOC generation section, update the formatting code: # Generate the TOC entries and insert them at the correct position # Mark last course entries for each module (for spacing) for i, entry in enumerate(toc_entries): entry['is_last_course_in_module'] = False if entry['level'] == 'course': # Check if next entry is a module or responses (or if this is the last entry) if i + 1 >= len(toc_entries) or toc_entries[i + 1]['level'] in ['module', 'responses']: entry['is_last_course_in_module'] = True for entry in toc_entries: # Create a new paragraph element new_p = body.makeelement(qn('w:p'), nsmap=body.nsmap) # Set paragraph properties pPr = new_p.makeelement(qn('w:pPr'), nsmap=new_p.nsmap) # Alignment - LEFT (for two-column layout) jc = pPr.makeelement(qn('w:jc'), nsmap=pPr.nsmap) jc.set(qn('w:val'), 'left') pPr.append(jc) # Set spacing spacing = pPr.makeelement(qn('w:spacing'), nsmap=pPr.nsmap) # Add spacing before module entries to separate module blocks if entry['level'] == 'module': spacing.set(qn('w:before'), '180') # 9pt spacing before module entries else: spacing.set(qn('w:before'), '0') spacing.set(qn('w:after'), '0') pPr.append(spacing) # Add tab stops with dotted leader tabs = pPr.makeelement(qn('w:tabs'), nsmap=pPr.nsmap) tab = tabs.makeelement(qn('w:tab'), nsmap=tabs.nsmap) tab.set(qn('w:val'), 'right') tab.set(qn('w:leader'), 'dot') # This adds the dots! tab.set(qn('w:pos'), '5040') # 3.5 inches in twentieths of a point (adjusted for two-column layout) tabs.append(tab) pPr.append(tabs) # Indent course entries and responses entries if entry['level'] == 'course': ind = pPr.makeelement(qn('w:ind'), nsmap=pPr.nsmap) ind.set(qn('w:left'), '360') # 0.25 inches pPr.append(ind) elif entry['level'] == 'responses': ind = pPr.makeelement(qn('w:ind'), nsmap=pPr.nsmap) ind.set(qn('w:left'), '360') # 0.25 inches - same as course pPr.append(ind) new_p.append(pPr) # Add text run with font formatting r = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap) # Add run properties (font) rPr = r.makeelement(qn('w:rPr'), nsmap=r.nsmap) # Font family rFonts = rPr.makeelement(qn('w:rFonts'), nsmap=rPr.nsmap) rFonts.set(qn('w:ascii'), 'Montserrat') rFonts.set(qn('w:hAnsi'), 'Montserrat') rPr.append(rFonts) # Font size and styling based on level sz = rPr.makeelement(qn('w:sz'), nsmap=rPr.nsmap) if entry['level'] == 'module': sz.set(qn('w:val'), '22') # 11pt # Bold for module b = rPr.makeelement(qn('w:b'), nsmap=rPr.nsmap) rPr.append(b) # Color for module color = rPr.makeelement(qn('w:color'), nsmap=rPr.nsmap) color.set(qn('w:val'), theme_hex) rPr.append(color) elif entry['level'] == 'responses': sz.set(qn('w:val'), '20') # 10pt # Bold and italic for responses b = rPr.makeelement(qn('w:b'), nsmap=rPr.nsmap) rPr.append(b) i = rPr.makeelement(qn('w:i'), nsmap=rPr.nsmap) rPr.append(i) # Purple color for responses to match the box color = rPr.makeelement(qn('w:color'), nsmap=rPr.nsmap) color.set(qn('w:val'), theme_hex) rPr.append(color) else: # course level sz.set(qn('w:val'), '20') # 10pt rPr.append(sz) r.append(rPr) # Add text t = r.makeelement(qn('w:t'), nsmap=r.nsmap) t.set(qn('xml:space'), 'preserve') t.text = entry['text'] r.append(t) new_p.append(r) # Add tab run (this triggers the dotted leader) r_tab = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap) tab_char = r_tab.makeelement(qn('w:tab'), nsmap=r_tab.nsmap) r_tab.append(tab_char) new_p.append(r_tab) # Add PAGEREF field runs with theme color and Montserrat font formatting r_field_begin = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap) # Add formatting to field begin rPr_field = r_field_begin.makeelement(qn('w:rPr'), nsmap=r_field_begin.nsmap) # Add Montserrat font rFonts_field = rPr_field.makeelement(qn('w:rFonts'), nsmap=rPr_field.nsmap) rFonts_field.set(qn('w:ascii'), 'Montserrat') rFonts_field.set(qn('w:hAnsi'), 'Montserrat') rPr_field.append(rFonts_field) # Add bold b_field = rPr_field.makeelement(qn('w:b'), nsmap=rPr_field.nsmap) rPr_field.append(b_field) color_field = rPr_field.makeelement(qn('w:color'), nsmap=rPr_field.nsmap) color_field.set(qn('w:val'), theme_hex) rPr_field.append(color_field) r_field_begin.append(rPr_field) fldChar1 = r_field_begin.makeelement(qn('w:fldChar'), nsmap=r_field_begin.nsmap) fldChar1.set(qn('w:fldCharType'), 'begin') r_field_begin.append(fldChar1) new_p.append(r_field_begin) r_instr = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap) # Add formatting to instruction text rPr_instr = r_instr.makeelement(qn('w:rPr'), nsmap=r_instr.nsmap) # Add Montserrat font rFonts_instr = rPr_instr.makeelement(qn('w:rFonts'), nsmap=rPr_instr.nsmap) rFonts_instr.set(qn('w:ascii'), 'Montserrat') rFonts_instr.set(qn('w:hAnsi'), 'Montserrat') rPr_instr.append(rFonts_instr) # Add bold b_instr = rPr_instr.makeelement(qn('w:b'), nsmap=rPr_instr.nsmap) rPr_instr.append(b_instr) color_instr = rPr_instr.makeelement(qn('w:color'), nsmap=rPr_instr.nsmap) color_instr.set(qn('w:val'), theme_hex) rPr_instr.append(color_instr) r_instr.append(rPr_instr) instrText = r_instr.makeelement(qn('w:instrText'), nsmap=r_instr.nsmap) instrText.set(qn('xml:space'), 'preserve') instrText.text = f"PAGEREF {entry['bm']} \\h" r_instr.append(instrText) new_p.append(r_instr) r_field_end = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap) # Add formatting to field end rPr_end = r_field_end.makeelement(qn('w:rPr'), nsmap=r_field_end.nsmap) # Add Montserrat font rFonts_end = rPr_end.makeelement(qn('w:rFonts'), nsmap=rPr_end.nsmap) rFonts_end.set(qn('w:ascii'), 'Montserrat') rFonts_end.set(qn('w:hAnsi'), 'Montserrat') rPr_end.append(rFonts_end) # Add bold b_end = rPr_end.makeelement(qn('w:b'), nsmap=rPr_end.nsmap) rPr_end.append(b_end) color_end = rPr_end.makeelement(qn('w:color'), nsmap=rPr_end.nsmap) color_end.set(qn('w:val'), theme_hex) rPr_end.append(color_end) r_field_end.append(rPr_end) fldChar2 = r_field_end.makeelement(qn('w:fldChar'), nsmap=r_field_end.nsmap) fldChar2.set(qn('w:fldCharType'), 'end') r_field_end.append(fldChar2) new_p.append(r_field_end) # Insert the paragraph at the correct position body.insert(insert_index, new_p) insert_index += 1 # Increment for next insertion # Add page numbers add_page_numbers(doc, theme_hex) # Call it before generating the document: verify_photo_associations(question_photos) # Save document doc.save(output_word_path) print(f"\n🎉 SUCCESS: Document saved as: {output_word_path}") print(f"📊 Total questions processed: {overall_question_count - 1}") print(f"🚫 Total S2/invalid questions skipped: {skipped_s2_questions}") if total_e_choices > 0: print(f"✨ Dynamic E columns added for courses with 5-choice questions") # Clean up temporary folder if it was created if is_temp and temp_dir_obj is not None: print(f"\n🧹 Cleaning up temporary folder...") try: temp_dir_obj.cleanup() print(f" ✓ Temporary files removed") except Exception as e: print(f" ⚠️ Could not clean up: {e}") def debug_excel_structure(excel_file_path): """Debug function to analyze Excel structure""" print("=== DEBUGGING EXCEL STRUCTURE ===") xls = pd.ExcelFile(excel_file_path) first_sheet_name = xls.sheet_names[0] # Get the first sheet name questions_df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name) print(f"Total rows: {len(questions_df)}") print(f"Columns: {list(questions_df.columns)}") # Check unique values in key columns if 'Numero' in questions_df.columns: try: print(f"Unique Numero values: {sorted(questions_df['Numero'].dropna().unique())}") except Exception as e: print(f"Unique Numero values: {list(questions_df['Numero'].dropna().unique())} (couldn't sort: {e})") if 'Order' in questions_df.columns: try: unique_orders = sorted(questions_df['Order'].dropna().unique()) print(f"Unique Order values: {unique_orders}") # Check specifically for E choices e_count = sum(1 for order in questions_df['Order'].dropna() if str(order).strip().upper() == 'E') print(f"Total E choices found: {e_count}") except Exception as e: print(f"Unique Order values: {list(questions_df['Order'].dropna().unique())} (couldn't sort: {e})") if 'Cours' in questions_df.columns: unique_cours = questions_df['Cours'].dropna().unique() # Convert all to strings first for display, then separate by validity unique_cours_str = [str(c) for c in unique_cours] print(f"Unique Cours values: {unique_cours_str}") # Check which cours values are valid vs invalid valid_cours = [] invalid_cours = [] for c in unique_cours: if is_valid_cours_number(c): valid_cours.append(c) else: invalid_cours.append(str(c)) # Sort valid ones (numeric) and invalid ones (as strings) separately try: valid_cours_sorted = sorted([float(c) for c in valid_cours]) print(f"Valid cours values: {valid_cours_sorted}") except Exception: print(f"Valid cours values: {valid_cours}") try: invalid_cours_sorted = sorted(invalid_cours) print(f"Invalid/S2 cours values: {invalid_cours_sorted}") except Exception: print(f"Invalid/S2 cours values: {invalid_cours}") # Check module column and corresponding sheets if 'module' in questions_df.columns: unique_modules = questions_df['module'].dropna().unique() print(f"\nUnique Module values: {list(unique_modules)}") # Check if sheets exist for each module xls = pd.ExcelFile(excel_file_path) sheet_names = xls.sheet_names sheet_names_lower = [s.lower() for s in sheet_names] print("\nModule sheet availability:") for module in unique_modules: module_lower = str(module).strip().lower() if module_lower in sheet_names_lower: actual_sheet = sheet_names[sheet_names_lower.index(module_lower)] print(f" ✓ Module '{module}' -> Sheet '{actual_sheet}' found") # Try to read and show course info from this sheet try: module_df = pd.read_excel(excel_file_path, sheet_name=actual_sheet) if 'cours' in module_df.columns and 'titre' in module_df.columns: print(f" Courses in this module:") for _, row in module_df.iterrows(): if pd.notna(row['cours']): print(f" - {row['cours']}: {row.get('titre', 'N/A')}") except Exception as e: print(f" Error reading sheet: {e}") else: print(f" ✗ Module '{module}' -> No matching sheet found") # Check Cours sheet try: cours_df = pd.read_excel(excel_file_path, sheet_name='Cours') print(f"\nCours sheet - Total rows: {len(cours_df)}") print(f"Cours sheet columns: {list(cours_df.columns)}") if not cours_df.empty: print("Course titles:") for _, row in cours_df.iterrows(): cours_val = row.get('cours', 'N/A') is_valid = is_valid_cours_number(cours_val) status = "✓" if is_valid else "✗ (SKIPPED)" print(f" Course {cours_val}: {row.get('titre', 'N/A')} {status}") except Exception as e: print(f"Error reading Cours sheet: {e}") def test_excel_photo_columns(excel_file_path): """Test function to check what's actually in your Excel file""" print("\n" + "=" * 60) print("TESTING EXCEL PHOTO AND COMMENT COLUMNS") print("=" * 60) xls = pd.ExcelFile(excel_file_path) first_sheet = xls.sheet_names[0] df = pd.read_excel(excel_file_path, sheet_name=first_sheet, nrows=10) print(f"\nColumns in sheet '{first_sheet}':") for col in df.columns: print(f" - {col}") has_embedded_images = False # Check for Choice commentaire if 'Choice commentaire' in df.columns: print("\n✓ Found 'Choice commentaire' column") print("NOTE: Each row has ONE comment for ONE choice (A, B, C, D, or E)") for idx, val in enumerate(df['Choice commentaire'].head()): if pd.notna(val): order = df['Order'].iloc[idx] if 'Order' in df.columns else '?' print(f" Row {idx} (Choice {order}): {repr(str(val)[:100])}") else: print("\n✗ 'Choice commentaire' column NOT found") # Check for Photo Q if 'Photo Q' in df.columns: print("\n✓ Found 'Photo Q' column") for idx, val in enumerate(df['Photo Q'].head()): if pd.notna(val): val_str = str(val).strip() if val_str.startswith('=DISPIMG'): print(f" Row {idx}: EMBEDDED IMAGE (formula: {val_str[:50]}...)") has_embedded_images = True else: exists = os.path.exists(val_str) print(f" Row {idx}: '{val_str}' - Exists: {exists}") else: print("\n✗ 'Photo Q' column NOT found") # Check for Photo C if 'Photo C' in df.columns: print("\n✓ Found 'Photo C' column") for idx, val in enumerate(df['Photo C'].head()): if pd.notna(val): val_str = str(val).strip() if val_str.startswith('=DISPIMG'): print(f" Row {idx}: EMBEDDED IMAGE (formula: {val_str[:50]}...)") has_embedded_images = True else: exists = os.path.exists(val_str) print(f" Row {idx}: '{val_str}' - Exists: {exists}") else: print("\n✗ 'Photo C' column NOT found") print("=" * 60 + "\n") if has_embedded_images: extract_embedded_images_info(excel_file_path) def verify_photo_associations(question_photos): """Debug function to verify all photo-question associations""" print("\n" + "=" * 60) print("PHOTO-QUESTION ASSOCIATIONS") print("=" * 60) for q_num in sorted(question_photos.keys()): photos = question_photos[q_num] photo_q = photos.get('photo_q') photo_c = photos.get('photo_c') if photo_q or photo_c: print(f"\nQuestion {q_num}:") if photo_q: exists = "✓" if os.path.exists(photo_q) else "✗" print(f" Photo Q: {exists} {photo_q}") if photo_c: exists = "✓" if os.path.exists(photo_c) else "✗" print(f" Photo C: {exists} {photo_c}") print("=" * 60 + "\n")