Manhattan-Prof / prof.py
TiH0's picture
Update prof.py
91d9dad verified
import re
import os
import html
import pandas as pd
from docx import Document
from docx.shared import Pt, Cm, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_TAB_ALIGNMENT, WD_TAB_LEADER
from docx.enum.table import WD_ALIGN_VERTICAL, WD_TABLE_ALIGNMENT
from docx.enum.style import WD_STYLE_TYPE
from docx.enum.section import WD_SECTION
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.oxml.shared import OxmlElement, qn
import zipfile
from collections import defaultdict
import tempfile
THEME_COLOR_HEX = "5FFFDF" # Hex version for XML elements
THEME_COLOR = RGBColor.from_string(THEME_COLOR_HEX)
# Common paper sizes (width x height in inches)
PAPER_SIZES = {
'LETTER': (8.5, 11), # US Letter
'A4': (8.27, 11.69), # A4
'A4_WIDE': (8.77, 11.69),
'A3': (11.69, 16.54), # A3
'A5': (5.83, 8.27), # A5
'LEGAL': (8.5, 14), # US Legal
'TABLOID': (11, 17), # Tabloid
'LEDGER': (17, 11), # Ledger
}
def get_circled_number(num):
"""Convert a number to its circled Unicode equivalent"""
# Unicode circled numbers 1-50
circled_numbers = {
1: '①', 2: '②', 3: '③', 4: '④', 5: '⑤',
6: '⑥', 7: '⑦', 8: '⑧', 9: '⑨', 10: '⑩',
11: '⑪', 12: '⑫', 13: '⑬', 14: '⑭', 15: '⑮',
16: '⑯', 17: '⑰', 18: '⑱', 19: '⑲', 20: '⑳',
21: '㉑', 22: '㉒', 23: '㉓', 24: '㉔', 25: '㉕',
26: '㉖', 27: '㉗', 28: '㉘', 29: '㉙', 30: '㉚',
31: '㉛', 32: '㉜', 33: '㉝', 34: '㉞', 35: '㉟',
36: '㊱', 37: '㊲', 38: '㊳', 39: '㊴', 40: '㊵',
41: '㊶', 42: '㊷', 43: '㊸', 44: '㊹', 45: '㊺',
46: '㊻', 47: '㊼', 48: '㊽', 49: '㊾', 50: '㊿'
}
if num in circled_numbers:
return circled_numbers[num]
else:
# For numbers > 50, use parentheses as fallback
return f"({num})"
def prepare_image_folder(path):
"""
Prepare the image folder. If it's a zip file, extract it to a temporary folder.
Returns None gracefully if path is None or invalid.
"""
# Handle None or empty path
if path is None or str(path).strip() == '':
print("ℹ️ No image folder provided - images will be skipped")
return None, False, None
path = str(path).strip()
# Check if it's a zip file
if path.lower().endswith('.zip') and os.path.isfile(path):
print(f"📦 Detected ZIP file: {os.path.basename(path)}")
print(f" Extracting to temporary folder...")
try:
# Create temporary directory
temp_dir = tempfile.TemporaryDirectory()
# Extract zip file
with zipfile.ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(temp_dir.name)
# Count extracted files
all_files = []
for root, dirs, files in os.walk(temp_dir.name):
all_files.extend([os.path.join(root, f) for f in files])
image_files = [f for f in all_files if
f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'))]
print(f" ✓ Extracted {len(all_files)} files ({len(image_files)} images)")
print(f" Using folder: {temp_dir.name}")
return temp_dir.name, True, temp_dir
except Exception as e:
print(f" ✗ Error extracting ZIP: {e}")
return None, False, None
# Check if it's a regular folder
elif os.path.isdir(path):
print(f"📁 Using folder: {path}")
return path, False, None
else:
print(f"⚠️ WARNING: Path is neither a folder nor a ZIP file: {path}")
print(f"ℹ️ Images will be skipped")
return None, False, None
def map_images_from_excel(excel_path, image_folder):
"""
Map images to questions based on Photo Q and Photo C columns in Excel.
Returns empty dict if image_folder is None.
"""
# If no image folder, return empty dict immediately
if image_folder is None:
print("ℹ️ No image folder available - skipping image mapping")
return {}
xls = pd.ExcelFile(excel_path)
first_sheet = xls.sheet_names[0]
df = pd.read_excel(excel_path, sheet_name=first_sheet)
# Dictionary to store question -> image mappings
question_images = defaultdict(lambda: {'photo_q': None, 'photo_c': None})
# Check if Photo Q and Photo C columns exist
has_photo_q = 'Photo Q' in df.columns
has_photo_c = 'Photo C' in df.columns
if not has_photo_q and not has_photo_c:
print("ℹ️ No 'Photo Q' or 'Photo C' columns found in Excel")
return {}
print(f"\n=== MAPPING IMAGES FROM FOLDER ===")
print(f"Image folder: {image_folder}")
print(f"Folder exists: {os.path.exists(image_folder)}")
if os.path.exists(image_folder):
try:
images_in_folder = [f for f in os.listdir(image_folder)
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'))]
print(f"Images found in folder: {len(images_in_folder)}")
except Exception as e:
print(f"Error reading folder: {e}")
return {}
else:
print(f"ERROR: Folder does not exist!")
return {}
current_question = None
# Scan through all rows
for idx, row in df.iterrows():
# Detect new question
if pd.notna(row.get('Numero')):
current_question = row['Numero']
if current_question is None:
continue
# Check Photo Q on this row
if has_photo_q and pd.notna(row['Photo Q']):
photo_q_value = str(row['Photo Q']).strip()
if photo_q_value and photo_q_value.lower() not in ['nan', 'none', ''] and not photo_q_value.startswith('='):
# Only set if not already set (first occurrence wins)
if not question_images[current_question]['photo_q']:
image_path = find_image_in_folder(photo_q_value, image_folder)
if image_path:
question_images[current_question]['photo_q'] = image_path
print(f"Q{current_question}: Photo Q -> {os.path.basename(image_path)}")
# Check Photo C on this row
if has_photo_c and pd.notna(row['Photo C']):
photo_c_value = str(row['Photo C']).strip()
if photo_c_value and photo_c_value.lower() not in ['nan', 'none', ''] and not photo_c_value.startswith('='):
# Only set if not already set (first occurrence wins)
if not question_images[current_question]['photo_c']:
image_path = find_image_in_folder(photo_c_value, image_folder)
if image_path:
question_images[current_question]['photo_c'] = image_path
print(f"Q{current_question}: Photo C -> {os.path.basename(image_path)}")
print(f"\n✓ Mapped images to {len(question_images)} questions")
return dict(question_images)
def find_image_in_folder(filename, image_folder):
"""
Find an image file in the specified folder.
Returns None if image_folder is None or if image not found.
"""
if image_folder is None:
return None
if not filename or str(filename).strip().lower() in ['nan', 'none', '']:
return None
filename = str(filename).strip()
# If the filename already has the full path and exists, return it
if os.path.isabs(filename) and os.path.exists(filename):
return filename
# Common image extensions to try
image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp']
# Get the filename without extension (if it has one)
name_without_ext = os.path.splitext(filename)[0]
original_ext = os.path.splitext(filename)[1].lower()
# Function to search in a directory (including subdirectories)
def search_in_dir(search_dir):
# Try exact match first in this directory
exact_path = os.path.join(search_dir, filename)
if os.path.exists(exact_path):
return exact_path
# Try case-insensitive match in this directory
try:
files_in_dir = os.listdir(search_dir)
for file in files_in_dir:
if file.lower() == filename.lower():
found_path = os.path.join(search_dir, file)
return found_path
# If no extension provided, try all common extensions
if not original_ext:
for ext in image_extensions:
test_path = os.path.join(search_dir, name_without_ext + ext)
if os.path.exists(test_path):
return test_path
# Also try case-insensitive
for file in files_in_dir:
if file.lower() == (name_without_ext + ext).lower():
found_path = os.path.join(search_dir, file)
return found_path
except Exception:
pass
return None
# Search in main folder first
result = search_in_dir(image_folder)
if result:
print(f" ✓ Found: {os.path.relpath(result, image_folder)}")
return result
# Search in all subdirectories
try:
for root, dirs, files in os.walk(image_folder):
result = search_in_dir(root)
if result:
print(f" ✓ Found in subfolder: {os.path.relpath(result, image_folder)}")
return result
except Exception as e:
print(f" ✗ Error searching subfolders: {e}")
print(f" ✗ Not found: {filename}")
return None
def process_excel_to_word(excel_file_path, output_word_path, image_folder=None, display_name=None, use_two_columns=True,
add_separator_line=True, balance_method="dynamic", theme_hex=None):
"""Main function to process Excel and create a Word document with TOC on the first page"""
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
theme_color = RGBColor.from_string(theme_hex)
# Prepare image folder (extract if ZIP) - gracefully handle None
actual_image_folder, is_temp, temp_dir_obj = prepare_image_folder(image_folder)
# Map images from the prepared folder (returns empty dict if None)
question_photos = map_images_from_excel(excel_file_path, actual_image_folder)
# ... rest of the function remains the same ...
# The code will now handle missing images gracefully since question_photos will be empty
# At the end, clean up temporary folder if it was created
if is_temp and temp_dir_obj is not None:
print(f"\n🧹 Cleaning up temporary folder...")
try:
temp_dir_obj.cleanup()
print(f" ✓ Temporary files removed")
except Exception as e:
print(f" ⚠️ Could not clean up: {e}")
def preview_image_mapping(question_images):
"""Preview the image mapping for verification"""
print("\n" + "=" * 60)
print("IMAGE MAPPING PREVIEW")
print("=" * 60)
for q_num in sorted(question_images.keys()):
photos = question_images[q_num]
print(f"\nQuestion {q_num}:")
if photos['photo_q']:
exists = "✓" if os.path.exists(photos['photo_q']) else "✗"
print(f" Photo Q: {exists} {os.path.basename(photos['photo_q'])}")
else:
print(f" Photo Q: (none)")
if photos['photo_c']:
exists = "✓" if os.path.exists(photos['photo_c']) else "✗"
print(f" Photo C: {exists} {os.path.basename(photos['photo_c'])}")
else:
print(f" Photo C: (none)")
print("=" * 60 + "\n")
def is_only_x_string(text):
"""Check if a string contains only X's (case insensitive)"""
if not text or pd.isna(text):
return False
cleaned_text = str(text).strip()
if not cleaned_text:
return False
return all(c in ('x', 'X') for c in cleaned_text)
def set_page_size(section, width_inches, height_inches):
"""Set custom page size for a section"""
sectPr = section._sectPr
# Create or get pgSz element
pgSz = sectPr.find(qn('w:pgSz'))
if pgSz is None:
pgSz = OxmlElement('w:pgSz')
sectPr.insert(0, pgSz)
# Convert inches to twentieths of a point (1 inch = 1440 twips)
width_twips = int(width_inches * 1440)
height_twips = int(height_inches * 1440)
pgSz.set(qn('w:w'), str(width_twips))
pgSz.set(qn('w:h'), str(height_twips))
def set_two_column_layout(doc, add_separator_line=True, balance_columns=True):
"""Set the document to use a two-column layout with optional separator line and column balancing"""
# Get the current section
section = doc.sections[0]
# Create sectPr element if it doesn't exist
sectPr = section._sectPr
# Create cols element for columns
cols = sectPr.find(qn('w:cols'))
if cols is None:
cols = OxmlElement('w:cols')
sectPr.append(cols)
# Set number of columns to 2
cols.set(qn('w:num'), '2')
# Set space between columns (reduced for better space utilization)
cols.set(qn('w:space'), '432') # 0.3 inch in twentieths of a point (was 708)
# Enable column balancing if requested
if balance_columns:
cols.set(qn('w:equalWidth'), '1') # Equal width columns
return doc
def set_cell_borders(cell, top=False, bottom=False, left=False, right=False):
"""Set specific borders for a table cell"""
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
# Get the cell's table cell properties
tcPr = cell._tc.get_or_add_tcPr()
# Create borders element
tcBorders = tcPr.find(qn('w:tcBorders'))
if tcBorders is None:
tcBorders = parse_xml(f'<w:tcBorders {nsdecls("w")}></w:tcBorders>')
tcPr.append(tcBorders)
# Define border settings
border_settings = {
'top': top,
'bottom': bottom,
'left': left,
'right': right
}
for border_name, should_show in border_settings.items():
border_element = tcBorders.find(qn(f'w:{border_name}'))
if border_element is not None:
tcBorders.remove(border_element)
if should_show:
# Create visible border
border_xml = f'<w:{border_name} {nsdecls("w")} w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
border_element = parse_xml(border_xml)
tcBorders.append(border_element)
# If should_show is False, don't add any border element (let table-level borders show through)
def continue_two_column_layout(doc):
"""Continue with the existing two-column layout for answer tables"""
# Add a column break to start fresh in the columns
add_column_break(doc)
return doc
def add_column_break(doc):
"""Add a column break to move to the next column"""
para = doc.add_paragraph()
run = para.runs[0] if para.runs else para.add_run()
# Create column break element
br = OxmlElement('w:br')
br.set(qn('w:type'), 'column')
run._element.append(br)
def add_page_break(doc):
"""Add a page break to the document"""
doc.add_page_break()
def create_course_title(doc, course_number, course_title, theme_color=None, theme_hex=None, question_count=None):
"""Create a course title section with rounded frame (unfilled) matching module style
Automatically wraps to two lines and doubles height if text is too long"""
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
if theme_color is None:
theme_color = RGBColor.from_string(theme_hex)
# Add minimal space before course title
course_para = doc.add_paragraph()
course_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Remove all spacing before and after
course_para.paragraph_format.space_before = Pt(0)
course_para.paragraph_format.space_after = Pt(0)
course_para.paragraph_format.keep_with_next = True
course_para.paragraph_format.keep_together = True
# Format the text
circled_num = get_circled_number(question_count)
full_text = f"{course_number}. {course_title} {circled_num}"
text_length = len(full_text)
# ========== CUSTOMIZE COURSE TITLE APPEARANCE HERE ==========
MAX_CHARS_SINGLE_LINE = 40 # Threshold for wrapping to two lines
SINGLE_LINE_HEIGHT = 31 # Frame height for single line
DOUBLE_LINE_HEIGHT = 55 # Frame height for two lines (almost double)
COURSE_ROUNDNESS = 50 # Corner roundness %
COURSE_FONT_SIZE = 26 # Font size in half-points (26=13pt)
COURSE_TEXT_COLOR = theme_hex
COURSE_STROKE_COLOR = theme_hex
COURSE_STROKE_WEIGHT = "2pt"
MAX_WIDTH_PT = 280 # Maximum width in points for the frame
# ============================================================
# Determine if we need two lines
needs_two_lines = text_length > MAX_CHARS_SINGLE_LINE
# Common XML properties to reduce repetition
xml_size_color = f'<w:sz w:val="{COURSE_FONT_SIZE}"/><w:color w:val="{COURSE_TEXT_COLOR}"/>'
if needs_two_lines:
# Split text intelligently
words = course_title.split()
mid_point = len(words) // 2
# Try to split at middle, but prefer breaking after shorter first line
# (We calculate lengths including the number to match your width logic)
prefix_len = len(f"{course_number}. ")
first_part_title = " ".join(words[:mid_point])
while (prefix_len + len(first_part_title)) > MAX_CHARS_SINGLE_LINE and mid_point > 1:
mid_point -= 1
first_part_title = " ".join(words[:mid_point])
# Define the two parts of the TITLE only
title_part_1 = " ".join(words[:mid_point])
title_part_2 = " ".join(words[mid_point:])
# Escape texts for XML
esc_num = html.escape(f"{course_number}. ")
esc_title_1 = html.escape(title_part_1)
# Add a trailing space to title part 2 to separate it from the circle
esc_title_2 = html.escape(title_part_2 + " ")
esc_circle = html.escape(f"{circled_num}")
# Calculate width based on the longest visual line
# Line 1: Number + Title Part 1
# Line 2: Title Part 2 + Circle
len_line_1 = len(f"{course_number}. {title_part_1}")
len_line_2 = len(f"{title_part_2} {circled_num}")
max_line_length = max(len_line_1, len_line_2)
estimated_width = min((max_line_length * 8) + 20, MAX_WIDTH_PT)
frame_height = DOUBLE_LINE_HEIGHT
# Two-line XML with 5 separate runs to handle fonts and line break
text_content = f'''
<w:r>
<w:rPr>
<w:rFonts w:ascii="Inter ExtraBold" w:hAnsi="Inter ExtraBold"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t xml:space="preserve">{esc_num}</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t xml:space="preserve">{esc_title_1}</w:t>
</w:r>
<w:r>
<w:br/>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t xml:space="preserve">{esc_title_2}</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="MS Gothic" w:hAnsi="MS Gothic"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t>{esc_circle}</w:t>
</w:r>'''
else:
# Single line
estimated_width = min((text_length * 9) + 20, MAX_WIDTH_PT)
frame_height = SINGLE_LINE_HEIGHT
# Escape texts
esc_num = html.escape(f"{course_number}. ")
esc_title = html.escape(f"{course_title} ")
esc_circle = html.escape(f"{circled_num}")
# Single-line XML with 3 separate runs for the fonts
text_content = f'''
<w:r>
<w:rPr>
<w:rFonts w:ascii="Inter ExtraBold" w:hAnsi="Inter ExtraBold"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t xml:space="preserve">{esc_num}</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t xml:space="preserve">{esc_title}</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="MS Gothic" w:hAnsi="MS Gothic"/>
<w:b/>
{xml_size_color}
</w:rPr>
<w:t>{esc_circle}</w:t>
</w:r>'''
# Create rounded rectangle shape (UNFILLED with stroke)
shape_xml = f'''
<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml">
<w:pict>
<v:roundrect style="width:{estimated_width}pt;height:{frame_height}pt"
arcsize="{COURSE_ROUNDNESS}%"
filled="f"
strokecolor="#{COURSE_STROKE_COLOR}"
strokeweight="{COURSE_STROKE_WEIGHT}">
<v:textbox inset="0pt,3pt,0pt,3pt" style="v-text-anchor:middle">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="center"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>{text_content}
</w:p>
</w:txbxContent>
</v:textbox>
</v:roundrect>
</w:pict>
</w:r>
'''
shape_element = parse_xml(shape_xml)
course_para._p.append(shape_element)
return course_para
def highlight_words_in_text(paragraph, text, highlight_words, theme_color, font_name='Inter Display Medium',
font_size=10.5, bold=False):
"""
Add text to paragraph with specific words/substrings highlighted in theme color.
Highlights literal text matches (including special characters like parentheses, backslashes).
Args:
paragraph: The paragraph to add text to
text: The full text to add
highlight_words: List of literal strings to highlight
theme_color: RGBColor object for highlighting
font_name: Font to use
font_size: Font size in points
bold: Whether text should be bold
"""
if not highlight_words or not text:
# No highlighting needed, just add normal text
run = paragraph.add_run(text)
run.font.name = font_name
run.font.size = Pt(font_size)
if bold:
run.font.bold = True
return
# Create pattern for matching (escape each string to treat as literal text)
import re
# Escape each word/phrase to match it literally, then join with OR
escaped_words = [re.escape(word) for word in highlight_words]
pattern = '(' + '|'.join(escaped_words) + ')'
# Split text by highlighted words/substrings
parts = re.split(pattern, text, flags=re.IGNORECASE)
for i, part in enumerate(parts):
if not part:
continue
run = paragraph.add_run(part)
run.font.name = font_name
run.font.size = Pt(font_size)
if bold:
run.font.bold = True
# Check if this part should be highlighted (odd indices after split are matches)
if i % 2 == 1:
run.font.color.rgb = theme_color
def format_question_block(doc, question_num, question_text, choices, correct_answers, source, comment=None,
choice_commentaire=None, photo_q=None, photo_c=None, theme_color=None, theme_hex=None,
show_comments=True,
highlight_words=None):
if theme_color is None:
theme_color = THEME_COLOR
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
if highlight_words is None:
highlight_words = []
"""Format a single question block with reduced spacing and keep together formatting"""
if 'TinySpace' not in doc.styles:
tiny_style = doc.styles.add_style('TinySpace', WD_STYLE_TYPE.PARAGRAPH)
tiny_style.font.name = 'SF Pro'
tiny_style.font.size = Pt(5)
tiny_style.paragraph_format.line_spacing = Pt(5)
tiny_style.paragraph_format.space_before = Pt(0)
tiny_style.paragraph_format.space_after = Pt(0)
# Question title with reduced spacing and keep-together formatting
question_para = doc.add_paragraph()
question_para.paragraph_format.space_before = Pt(1)
question_para.paragraph_format.space_after = Pt(0)
question_para.paragraph_format.keep_with_next = True
question_para.paragraph_format.keep_together = True
question_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
# Question number in Axiforma Black
num_run = question_para.add_run(f"{question_num}. ")
num_run.font.name = 'Inter ExtraBold'
num_run.font.size = Pt(10)
num_run.font.bold = True
num_run.font.color.rgb = theme_color
# Add question text with highlighting (REMOVE THE DUPLICATE!)
highlight_words_in_text(question_para, question_text, highlight_words, theme_color,
font_name='Inter ExtraBold', font_size=10)
# Display ALL choices for this question with minimal spacing
# Filter out choices that are only X's
filtered_choices = [(letter, text) for letter, text in choices if not is_only_x_string(text)]
# Display filtered choices for this question with minimal spacing
choice_paragraphs = []
for i, (choice_letter, choice_text) in enumerate(filtered_choices):
choice_para = doc.add_paragraph()
choice_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
choice_para.paragraph_format.space_before = Pt(1)
choice_para.paragraph_format.space_after = Pt(1)
choice_para.paragraph_format.keep_together = True
# Keep all choices together, and keep the last choice with the source
if i < len(choices) - 1:
choice_para.paragraph_format.keep_with_next = True
else:
# Last choice should stay with what comes next (Photo C or source)
choice_para.paragraph_format.keep_with_next = True
# Ensure each choice ends with a dot
if not str(choice_text).strip().endswith('.'):
choice_text = str(choice_text).strip() + '.'
# Choice letter (e.g., "A-")
letter_run = choice_para.add_run(f"{choice_letter}- ")
letter_run.font.name = 'Inter ExtraBold'
letter_run.font.size = Pt(10)
# Choice text
text_run = choice_para.add_run(choice_text)
text_run.font.name = 'Inter Display SemiBold'
text_run.font.size = Pt(10)
# Choice text with highlighting (REMOVE THE DUPLICATE AND FIX TYPO!)
# highlight_words_in_text(choice_para, choice_text, highlight_words, theme_color,
# font_name='Inter Display Medium', font_size=10.5)
# ADD Photo C HERE (right after choices, before source)
if photo_c:
photo_c_clean = str(photo_c).strip()
if photo_c_clean and photo_c_clean.lower() not in ['nan', 'none', '']:
if os.path.exists(photo_c_clean):
try:
print(f"DEBUG: Adding Photo C from: {photo_c_clean}")
photo_para = doc.add_paragraph()
photo_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
photo_para.paragraph_format.space_before = Pt(2)
photo_para.paragraph_format.space_after = Pt(2)
photo_para.paragraph_format.keep_with_next = True # Keep with source
run = photo_para.add_run()
run.add_picture(photo_c_clean, width=Inches(2.5))
print(f"DEBUG: Successfully added Photo C")
except Exception as e:
print(f"ERROR: Could not add Photo C: {e}")
# Add error message in document
error_para = doc.add_paragraph()
error_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
error_run = error_para.add_run(f"[Photo C error: {str(e)[:50]}]")
error_run.font.size = Pt(7)
error_run.font.italic = True
else:
print(f"WARNING: Photo C path does not exist: {photo_c_clean}")
# PROF.PY: Source and Answer line (using table for side-by-side layout)
# Create a single-row, 2-column table
info_table = doc.add_table(rows=1, cols=2)
info_table.alignment = WD_TABLE_ALIGNMENT.LEFT
info_table.allow_autofit = False # Disable autofit for manual control
# Remove all borders
tblPr = info_table._tbl.tblPr
if tblPr is None:
tblPr = OxmlElement('w:tblPr')
info_table._tbl.insert(0, tblPr)
# Set borders to none
tblBorders = parse_xml(f'''
<w:tblBorders {nsdecls("w")}>
<w:top w:val="none"/>
<w:left w:val="none"/>
<w:bottom w:val="none"/>
<w:right w:val="none"/>
<w:insideH w:val="none"/>
<w:insideV w:val="none"/>
</w:tblBorders>
''')
tblPr.append(tblBorders)
# Set column widths
left_cell = info_table.rows[0].cells[0]
right_cell = info_table.rows[0].cells[1]
left_cell.width = Inches(1.5) # Left cell for "Réponse: ABC"
right_cell.width = Inches(4.5) # Right cell for source
# Set spacing
left_cell._element.get_or_add_tcPr()
right_cell._element.get_or_add_tcPr()
# LEFT cell - Answer
left_para = left_cell.paragraphs[0]
left_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
left_para.paragraph_format.space_before = Pt(2)
left_para.paragraph_format.space_after = Pt(2)
# Prepare answer text - convert list to space-separated string
if isinstance(correct_answers, list):
if len(correct_answers) == 0:
answer_text = "/"
else:
answer_text = " ".join(correct_answers)
else:
answer_text = str(correct_answers) if correct_answers else "/"
answer_label_run = left_para.add_run("Réponse:")
answer_label_run.font.name = 'Inter ExtraBold'
answer_label_run.font.size = Pt(8)
answer_label_run.font.bold = True
answer_label_run.font.underline = True
answer_value_run = left_para.add_run(f' {answer_text}')
answer_value_run.font.name = 'Inter ExtraBold'
answer_value_run.font.size = Pt(8)
answer_value_run.font.color.rgb = theme_color
# RIGHT cell - Source
right_para = right_cell.paragraphs[0]
right_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
right_para.paragraph_format.space_before = Pt(2)
right_para.paragraph_format.space_after = Pt(2)
source_label_run = right_para.add_run("Source:")
source_label_run.font.name = 'Inter ExtraBold'
source_label_run.font.size = Pt(8)
source_label_run.font.bold = True
source_label_run.font.underline = True
source_value_run = right_para.add_run(f" {source}")
source_value_run.font.name = 'Inter ExtraBold'
source_value_run.font.size = Pt(8)
source_value_run.font.color.rgb = theme_color
# Keep with comment if exists
if comment and str(comment).strip() and str(comment).lower() != 'nan':
right_para.paragraph_format.keep_with_next = True
# Only show comments if show_comments is True
if show_comments and (comment or choice_commentaire or photo_q):
add_choice_commentaire_section(
doc,
choice_commentaire,
photo_q,
theme_color,
theme_hex,
general_comment=comment,
question_num=question_num,
highlight_words=highlight_words
)
else:
# Only add empty space if there's no comment box
empty_para = doc.add_paragraph(' ', style='TinySpace')
empty_para.paragraph_format.space_before = Pt(0)
empty_para.paragraph_format.space_after = Pt(0)
empty_para.paragraph_format.line_spacing = Pt(7)
empty_run = empty_para.add_run(' ')
empty_run.font.size = Pt(7)
def add_page_numbers(doc, theme_hex=None):
"""Add page numbers to the footer of all pages (keeps existing module headers), starting from page 1 after TOC."""
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
def create_footer_content(footer_elem, theme_hex):
"""Helper function to create footer content with page number and TOC link"""
# Add an empty line above the page number
empty_para = footer_elem.paragraphs[0]
empty_para.paragraph_format.space_before = Pt(0)
empty_para.paragraph_format.space_after = Pt(0)
empty_para.paragraph_format.line_spacing = 1.0
# Add the page number paragraph
paragraph = footer_elem.add_paragraph()
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Set vertical alignment to center
paragraph.paragraph_format.space_before = Pt(0)
paragraph.paragraph_format.space_after = Pt(0)
# Add page number in center
run = paragraph.add_run()
# Create the PAGE field
fldChar1 = OxmlElement('w:fldChar')
fldChar1.set(qn('w:fldCharType'), 'begin')
instrText = OxmlElement('w:instrText')
instrText.set(qn('xml:space'), 'preserve')
instrText.text = "PAGE"
fldChar2 = OxmlElement('w:fldChar')
fldChar2.set(qn('w:fldCharType'), 'end')
run._r.append(fldChar1)
run._r.append(instrText)
run._r.append(fldChar2)
run.font.name = 'Montserrat'
run.font.size = Pt(14)
run.font.bold = True
run.font.color.rgb = RGBColor.from_string(theme_hex)
# ===== ADD TOC LINK IN TEXT BOX (BOTTOM RIGHT) =====
# Create TOC link text box - absolutely positioned, does not affect page number centering
toc_textbox_xml = f'''
<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:w10="urn:schemas-microsoft-com:office:word">
<w:pict>
<v:shape style="position:absolute;margin-left:0in;margin-top:0;width:60pt;height:20pt;z-index:1;mso-position-horizontal:right;mso-position-horizontal-relative:margin;mso-position-vertical-relative:line" fillcolor="#FFFFFF" filled="f" stroked="f">
<v:textbox inset="5pt,0pt,5pt,0pt" style="mso-fit-shape-to-text:t">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="right"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:hyperlink w:anchor="TOC_BOOKMARK">
<w:r>
<w:rPr>
<w:rFonts w:ascii="Aptos" w:hAnsi="Aptos"/>
<w:sz w:val="28"/>
<w:color w:val="{theme_hex}"/>
</w:rPr>
<w:t>↗️</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="18"/>
<w:color w:val="{theme_hex}"/>
<w:u w:val="single"/>
</w:rPr>
<w:t> SOM</w:t>
</w:r>
</w:hyperlink>
</w:p>
</w:txbxContent>
</v:textbox>
</v:shape>
</w:pict>
</w:r>
'''
toc_textbox_element = parse_xml(toc_textbox_xml)
paragraph._p.append(toc_textbox_element)
for section_idx, section in enumerate(doc.sections):
# ===== HEADER (keep existing text like module name) =====
header = section.header
header.is_linked_to_previous = False
section.header_distance = Cm(0.3)
# If header is empty, add a blank paragraph
if not header.paragraphs:
header.add_paragraph()
# ===== FOOTER FOR ODD/DEFAULT PAGES (page numbers + TOC link) =====
footer = section.footer
footer.is_linked_to_previous = False
section.footer_distance = Cm(0.4) # Distance from bottom of page to footer
# Clear existing text in footer
if footer.paragraphs:
footer.paragraphs[0].clear()
else:
footer.add_paragraph()
# Skip page numbers for the first section (TOC)
if section_idx == 0:
continue
# For the second section (first content page), restart numbering at 1
if section_idx == 1:
sectPr = section._sectPr
pgNumType = sectPr.find(qn('w:pgNumType'))
if pgNumType is None:
pgNumType = OxmlElement('w:pgNumType')
sectPr.append(pgNumType)
pgNumType.set(qn('w:start'), '1') # Start at page 1
# Create footer content for odd/default pages
create_footer_content(footer, theme_hex)
# ===== CREATE EVEN PAGE FOOTER =====
try:
# Check if even_page_footer property exists
if hasattr(section, 'even_page_footer'):
footer_even = section.even_page_footer
footer_even.is_linked_to_previous = False
if not footer_even.paragraphs:
footer_even.add_paragraph()
else:
footer_even.paragraphs[0].clear()
create_footer_content(footer_even, theme_hex)
print("✓ Created even page footer using built-in property")
else:
# Manual method - create even footer via XML
from docx.opc.packuri import PackURI
from docx.opc.part import XmlPart
# Build even footer XML with same structure as odd footer
even_ftr_xml = f'''<w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<w:p>
<w:pPr><w:spacing w:before="0" w:after="0"/></w:pPr>
</w:p>
<w:p>
<w:pPr>
<w:jc w:val="center"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:r>
<w:fldChar w:fldCharType="begin"/>
</w:r>
<w:r>
<w:instrText xml:space="preserve">PAGE</w:instrText>
</w:r>
<w:r>
<w:fldChar w:fldCharType="end"/>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="28"/>
<w:color w:val="{theme_hex}"/>
</w:rPr>
</w:r>
<w:r>
<w:pict>
<v:shape style="position:absolute;margin-left:0in;margin-top:0;width:60pt;height:20pt;z-index:1;mso-position-horizontal:right;mso-position-horizontal-relative:margin;mso-position-vertical-relative:line" fillcolor="#FFFFFF" filled="f" stroked="f">
<v:textbox inset="5pt,0pt,5pt,0pt" style="mso-fit-shape-to-text:t">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="right"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:hyperlink w:anchor="TOC_BOOKMARK">
<w:r>
<w:rPr>
<w:rFonts w:ascii="Aptos" w:hAnsi="Aptos"/>
<w:sz w:val="28"/>
<w:color w:val="{theme_hex}"/>
</w:rPr>
<w:t>↗️</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="18"/>
<w:color w:val="{theme_hex}"/>
<w:u w:val="single"/>
</w:rPr>
<w:t> SOM</w:t>
</w:r>
</w:hyperlink>
</w:p>
</w:txbxContent>
</v:textbox>
</v:shape>
</w:pict>
</w:r>
</w:p>
</w:ftr>'''
# Create part
partname = PackURI(f'/word/footer_even_{id(section)}.xml')
element = parse_xml(even_ftr_xml)
content_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml'
package = section.part.package
even_part = XmlPart(partname, content_type, element, package)
# Create relationship
rId = section.part.relate_to(even_part,
'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer')
# Add footer reference
sectPr = section._sectPr
# Remove any existing even footer references
for ref in list(sectPr.findall(qn('w:footerReference'))):
if ref.get(qn('w:type')) == 'even':
sectPr.remove(ref)
ftr_ref = OxmlElement('w:footerReference')
ftr_ref.set(qn('w:type'), 'even')
ftr_ref.set(qn('r:id'), rId)
sectPr.append(ftr_ref)
print("✓ Created even page footer via manual part creation")
except Exception as e:
print(f"Warning: Could not create even page footer: {e}")
import traceback
traceback.print_exc()
def add_toc_bookmark(doc, toc_title_para):
"""Add a bookmark to the TOC title paragraph"""
bookmark_start = OxmlElement('w:bookmarkStart')
bookmark_start.set(qn('w:id'), '0')
bookmark_start.set(qn('w:name'), 'TOC_BOOKMARK')
toc_title_para._p.insert(0, bookmark_start)
bookmark_end = OxmlElement('w:bookmarkEnd')
bookmark_end.set(qn('w:id'), '0')
toc_title_para._p.append(bookmark_end)
def set_module_header(doc, module_name):
"""Update the top-left header text with the current module name."""
for section in doc.sections:
header = section.header
header.is_linked_to_previous = False
if not header.paragraphs:
header.add_paragraph()
header.paragraphs[0].clear()
para = header.paragraphs[0]
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = para.add_run(f"{module_name.upper()}")
run.font.name = 'Montserrat'
run.font.size = Pt(10)
run.font.bold = True
run.font.color.rgb = RGBColor(0, 0, 0)
def set_zero_spacing(paragraph):
"""Force paragraph spacing to 0 before and after."""
paragraph.paragraph_format.space_before = Pt(0)
paragraph.paragraph_format.space_after = Pt(0)
def is_valid_cours_number(cours_value):
"""Check if cours value is valid (numeric and not 'S2')"""
if pd.isna(cours_value):
return False
cours_str = str(cours_value).strip().upper()
# Skip S2 courses and other specific invalid values
if cours_str in ['S2', 'NAN', '']:
return False
# Try to convert to numeric - if it works and is positive, it's valid
try:
numeric_value = float(cours_str)
# Check if it's a positive number (courses should be positive integers)
return numeric_value > 0 and numeric_value == int(numeric_value)
except (ValueError, TypeError, OverflowError):
return False
def check_if_course_has_e_choices(course_questions):
"""Check if any question in the course has an E choice"""
for q_data in course_questions:
for choice in q_data['choices']:
if choice['letter'].upper() == 'E':
return True
return False
def create_comment_boxes_section(doc, questions_by_course, cours_titles, module_name, theme_color=None, theme_hex=None):
"""Create comment boxes for all questions that have comments, organized by course
This appears after the answer tables"""
if theme_color is None:
theme_color = THEME_COLOR
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
# Check if there are any comments at all
has_any_comments = False
for cours_num, course_questions in questions_by_course.items():
for q_data in course_questions:
if (q_data.get('comment') or q_data.get('choice_commentaire') or q_data.get('photo_q')):
has_any_comments = True
break
if has_any_comments:
break
if not has_any_comments:
return
# Add title for comments section
add_column_break(doc) # Start in new column
title_para = doc.add_paragraph()
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_para.paragraph_format.space_before = Pt(12)
title_para.paragraph_format.space_after = Pt(8)
# Calculate width based on text length
comment_text = "COMMENTAIRES"
text_length = len(comment_text)
estimated_width = (text_length * 12) + 60
# Create rounded rectangle shape for COMMENTAIRES
shape_xml = f'''
<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml">
<w:pict>
<v:roundrect style="width:{estimated_width}pt;height:31pt"
arcsize="50%" fillcolor="#{theme_hex}" stroked="f">
<v:textbox inset="10pt,0pt,10pt,0pt" style="v-text-anchor:middle">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="center"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="35"/>
<w:color w:val="FFFFFF"/>
</w:rPr>
<w:t>{comment_text}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:roundrect>
</w:pict>
</w:r>
'''
shape_element = parse_xml(shape_xml)
title_para._p.append(shape_element)
# Track overall question number
overall_question_number = 1
# Process each course
for cours_num in sorted(questions_by_course.keys()):
course_questions = questions_by_course[cours_num]
course_title = cours_titles.get(cours_num, f"COURSE {cours_num}")
# Check if this course has any comments
course_has_comments = False
for q_data in course_questions:
if (q_data.get('comment') or q_data.get('choice_commentaire') or q_data.get('photo_q')):
course_has_comments = True
break
if not course_has_comments:
overall_question_number += len(course_questions)
continue
# Add course title
course_title_para = doc.add_paragraph()
course_title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
course_title_para.paragraph_format.space_before = Pt(8)
course_title_para.paragraph_format.space_after = Pt(4)
course_title_run = course_title_para.add_run(f"{cours_num}. {course_title}")
course_title_run.font.name = 'Montserrat'
course_title_run.font.size = Pt(13)
course_title_run.font.bold = True
course_title_run.font.color.rgb = theme_color
# Add comment boxes for questions in this course
for q_data in course_questions:
question_num = overall_question_number
comment = q_data.get('comment')
choice_commentaire = q_data.get('choice_commentaire')
photo_q = q_data.get('photo_q')
# Only add if there are comments or photo
if comment or choice_commentaire or photo_q:
add_choice_commentaire_section(doc, choice_commentaire, photo_q, theme_color, theme_hex,
general_comment=comment, question_num=question_num)
overall_question_number += 1
def create_answer_tables(doc, questions_by_course, cours_titles, module_name, bookmark_id, theme_hex=None):
"""Create multiple choice answer tables organized by course in two-column layout
Each course table is split in half with two tables side by side
Comment boxes appear directly after each course's answer table"""
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
theme_color = RGBColor.from_string(theme_hex)
# Continue with two-column layout for answer tables
continue_two_column_layout(doc)
# Add title for answer section with rounded frame
title_para = doc.add_paragraph()
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_para.paragraph_format.space_before = Pt(12)
title_para.paragraph_format.space_after = Pt(8)
# Calculate width based on text length
response_text = "RÉPONSES"
text_length = len(response_text)
estimated_width = (text_length * 12) + 60
# Create rounded rectangle shape for RÉPONSES
shape_xml = f'''
<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml">
<w:pict>
<v:roundrect style="width:{estimated_width}pt;height:31pt"
arcsize="50%" fillcolor="#{theme_hex}" stroked="f">
<v:textbox inset="10pt,0pt,10pt,0pt" style="v-text-anchor:middle">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="center"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="35"/>
<w:color w:val="FFFFFF"/>
</w:rPr>
<w:t>{response_text}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:roundrect>
</w:pict>
</w:r>
'''
shape_element = parse_xml(shape_xml)
title_para._p.append(shape_element)
# Add bookmark to the responses section with module name
bm_responses_name = sanitize_bookmark_name(f"RESPONSES_{module_name}")
add_bookmark_to_paragraph(title_para, bm_responses_name, bookmark_id)
# Create the TOC entry information
toc_entry = {'level': 'responses', 'text': f"RÉPONSES - {module_name}", 'bm': bm_responses_name}
bookmark_id += 1
# Process each course
overall_question_number = 1
for cours_num in sorted(questions_by_course.keys()):
course_questions = questions_by_course[cours_num]
course_title = cours_titles.get(cours_num, f"COURSE {cours_num}")
# Add course title with keep_with_next
course_title_para = doc.add_paragraph()
course_title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
course_title_para.paragraph_format.space_before = Pt(8)
course_title_para.paragraph_format.space_after = Pt(4)
course_title_para.paragraph_format.keep_with_next = True
course_title_para.paragraph_format.keep_together = True
course_title_para.paragraph_format.page_break_before = False
# Add widow/orphan control
pPr = course_title_para._element.get_or_add_pPr()
widowControl = OxmlElement('w:widowControl')
widowControl.set(qn('w:val'), '1')
pPr.append(widowControl)
num_questions = len(course_questions)
circled_num = get_circled_number(num_questions)
if num_questions == 0:
continue
# 1. The Course Number (e.g., "101.")
run_num = course_title_para.add_run(f"{cours_num}. ")
run_num.font.name = 'Inter ExtraBold'
run_num.font.size = Pt(13)
run_num.font.bold = True
run_num.font.color.rgb = theme_color
# 2. The Course Title (e.g., "Introduction to Python")
run_name = course_title_para.add_run(f"{course_title} ")
run_name.font.name = 'Montserrat'
run_name.font.size = Pt(13)
run_name.font.bold = True
run_name.font.color.rgb = theme_color
# 3. The Circled Number (e.g., "①")
run_circle = course_title_para.add_run(f"{circled_num}")
run_circle.font.name = 'MS UI ghotic'
run_circle.font.size = Pt(13) # Making the circle smaller
run_circle.font.bold = True
run_circle.font.color.rgb = theme_color
# Check if this course has E choices
has_e_choices = check_if_course_has_e_choices(course_questions)
# Determine number of columns and headers
if has_e_choices:
num_cols = 6
headers = ['', 'A', 'B', 'C', 'D', 'E']
choice_letters = ['A', 'B', 'C', 'D', 'E']
else:
num_cols = 5
headers = ['', 'A', 'B', 'C', 'D']
choice_letters = ['A', 'B', 'C', 'D']
# Split questions in half
mid_point = (num_questions + 1) // 2
first_half = course_questions[:mid_point]
second_half = course_questions[mid_point:]
# Create container table
container_table = doc.add_table(rows=1, cols=2)
container_table.alignment = WD_TABLE_ALIGNMENT.CENTER
container_table.allow_autofit = False
# Set table properties to prevent splitting
tblPr = container_table._tbl.tblPr
if tblPr is None:
tblPr = OxmlElement('w:tblPr')
container_table._tbl.insert(0, tblPr)
cantSplit = OxmlElement('w:cantSplit')
tblPr.append(cantSplit)
for row in container_table.rows:
for cell in row.cells:
tcPr = cell._tc.get_or_add_tcPr()
for para in cell.paragraphs:
para.paragraph_format.keep_together = True
para.paragraph_format.keep_with_next = True
# Set container borders to none
tblBorders = parse_xml(f'''
<w:tblBorders {nsdecls("w")}>
<w:top w:val="none"/>
<w:left w:val="none"/>
<w:bottom w:val="none"/>
<w:right w:val="none"/>
<w:insideH w:val="none"/>
<w:insideV w:val="none"/>
</w:tblBorders>
''')
tblPr.append(tblBorders)
# Create tables
left_cell = container_table.rows[0].cells[0]
create_half_answer_table(left_cell, first_half, num_cols, headers, choice_letters, 1, has_e_choices)
right_cell = container_table.rows[0].cells[1]
create_half_answer_table(right_cell, second_half, num_cols, headers, choice_letters, mid_point + 1,
has_e_choices)
# Add spacing after the container table
spacing_para = doc.add_paragraph()
spacing_para.paragraph_format.space_after = Pt(12)
spacing_para.paragraph_format.keep_together = True
# Update overall counter AFTER processing all questions in this course
overall_question_number += num_questions
# Return both bookmark_id and toc_entry
return bookmark_id, toc_entry
def create_half_answer_table(cell, questions, num_cols, headers, choice_letters, start_q_num, has_e_choices):
"""Create one half of an answer table inside a cell"""
if len(questions) == 0:
return
num_questions = len(questions)
# Fixed Q column width to match the exact measurements from the document
q_col_width = Inches(0.75) # Fixed width for Q column to fit all numbers
# Create table inside the cell
table = cell.add_table(rows=num_questions + 1, cols=num_cols)
table.alignment = WD_TABLE_ALIGNMENT.CENTER
table.style = None
table.allow_autofit = False
# CRITICAL: Apply cantSplit to inner table as well
tblPr = table._tbl.tblPr
if tblPr is None:
tblPr = OxmlElement('w:tblPr')
table._tbl.insert(0, tblPr)
# Prevent table from splitting across pages
cantSplit = OxmlElement('w:cantSplit')
tblPr.append(cantSplit)
tbl = table._tbl
tblRows = tbl.xpath(".//w:tr")
if tblRows:
first_row = tblRows[0]
trPr = first_row.get_or_add_trPr()
tblHeader = OxmlElement('w:tblHeader')
trPr.append(tblHeader)
# CRITICAL: Make header row not splittable
cantSplit_row = OxmlElement('w:cantSplit')
trPr.append(cantSplit_row)
# Add table-level border
tblBorders = parse_xml(f'''
<w:tblBorders {nsdecls("w")}>
<w:bottom w:val="single" w:sz="4" w:space="0" w:color="000000"/>
</w:tblBorders>
''')
tblPr.append(tblBorders)
# CRITICAL: Apply keep-together to all rows
for row_idx, row in enumerate(table.rows):
# Get or create row properties
trPr = row._tr.get_or_add_trPr()
# Add cantSplit to each row to prevent it from breaking
cantSplit_row = OxmlElement('w:cantSplit')
trPr.append(cantSplit_row)
for cell_item in row.cells:
for paragraph in cell_item.paragraphs:
paragraph.paragraph_format.keep_together = True
# Keep all rows together by keeping each with next
if row_idx < len(table.rows) - 1:
paragraph.paragraph_format.keep_with_next = True
else:
paragraph.paragraph_format.keep_with_next = False
# Set exact column widths matching the document measurements
choice_col_width = Inches(0.1) # Equal width for all choice columns (A, B, C, D, E)
for row in table.rows:
for col_idx, cell_item in enumerate(row.cells):
if col_idx == 0:
cell_item.width = q_col_width
else:
cell_item.width = choice_col_width
# Header row
header_cells = table.rows[0].cells
for i, header in enumerate(headers):
header_cells[i].text = header
paragraph = header_cells[i].paragraphs[0]
set_zero_spacing(paragraph)
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(header)
run.font.name = 'Inter SemiBold'
run.font.size = Pt(11)
header_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
# Borders
if i == 0:
set_cell_borders(header_cells[i], top=True, bottom=True, left=True, right=False)
elif i == len(headers) - 1:
set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=True)
else:
set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=False)
# Gray shading
shading_elm = OxmlElement('w:shd')
shading_elm.set(qn('w:val'), 'clear')
shading_elm.set(qn('w:color'), 'auto')
shading_elm.set(qn('w:fill'), 'D9D9D9')
header_cells[i]._tc.get_or_add_tcPr().append(shading_elm)
# Fill data rows
for row_idx, q_data in enumerate(questions, 1):
row_cells = table.rows[row_idx].cells
is_last_row = (row_idx == num_questions)
# Question number
q_num = start_q_num + row_idx - 1
paragraph = row_cells[0].paragraphs[0]
paragraph.clear()
set_zero_spacing(paragraph)
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.add_run(f"Q{q_num}")
run.font.name = 'Inter ExtraBold'
run.font.size = Pt(7.5)
run.font.bold = True
row_cells[0].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
set_cell_borders(row_cells[0], top=False, bottom=is_last_row, left=True, right=False)
# Get correct answers and available choices
correct_answers = [choice['letter'] for choice in q_data['choices'] if choice['is_correct']]
available_choices = [choice['letter'].upper() for choice in q_data['choices']]
has_no_answers = len(correct_answers) == 0
# Fill choice columns
for i, letter in enumerate(choice_letters, 1):
if letter not in available_choices:
row_cells[i].text = ''
elif has_no_answers:
row_cells[i].text = '▨'
elif letter in correct_answers:
row_cells[i].text = '☒'
else:
row_cells[i].text = '☐'
paragraph = row_cells[i].paragraphs[0]
set_zero_spacing(paragraph)
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
if row_cells[i].text:
run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(row_cells[i].text)
run.font.name = 'Calibri'
run.font.size = Pt(11)
run.font.bold = True
row_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
# Borders
if i == len(choice_letters):
set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=True)
else:
set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=False)
def create_empty_course_table(doc, course_questions, course_num, overall_start_num):
"""Create an empty answer table for all questions of one course with dynamic E column
Split in half with two tables side by side, matching create_answer_tables layout"""
num_questions = len(course_questions)
if num_questions == 0:
return overall_start_num
# Check if this course has E choices
has_e_choices = check_if_course_has_e_choices(course_questions)
# Determine number of columns and headers
if has_e_choices:
num_cols = 6 # Q, A, B, C, D, E
headers = ['', 'A', 'B', 'C', 'D', 'E']
choice_letters = ['A', 'B', 'C', 'D', 'E']
else:
num_cols = 5 # Q, A, B, C, D
headers = ['', 'A', 'B', 'C', 'D']
choice_letters = ['A', 'B', 'C', 'D']
# Split questions in half
mid_point = (num_questions + 1) // 2 # Round up for first half
first_half = course_questions[:mid_point]
second_half = course_questions[mid_point:]
print(
f"DEBUG: Empty table for Course {course_num} - Total questions: {num_questions}, Split: {len(first_half)} + {len(second_half)}")
# Create a container table with 1 row and 2 columns to hold both tables side by side
container_table = doc.add_table(rows=1, cols=2)
container_table.alignment = WD_TABLE_ALIGNMENT.CENTER
container_table.allow_autofit = False
# Set table properties to prevent splitting
tblPr = container_table._tbl.tblPr
if tblPr is None:
tblPr = OxmlElement('w:tblPr')
container_table._tbl.insert(0, tblPr)
# Add cantSplit property to prevent table from breaking across pages
cantSplit = OxmlElement('w:cantSplit')
tblPr.append(cantSplit)
# Apply to all cells in the container to reinforce keep-together
for row in container_table.rows:
for cell in row.cells:
tcPr = cell._tc.get_or_add_tcPr()
for para in cell.paragraphs:
para.paragraph_format.keep_together = True
para.paragraph_format.keep_with_next = True
# Set container borders to none
tblBorders = parse_xml(f'''
<w:tblBorders {nsdecls("w")}>
<w:top w:val="none"/>
<w:left w:val="none"/>
<w:bottom w:val="none"/>
<w:right w:val="none"/>
<w:insideH w:val="none"/>
<w:insideV w:val="none"/>
</w:tblBorders>
''')
tblPr.append(tblBorders)
# Create left table (first half)
left_cell = container_table.rows[0].cells[0]
create_half_empty_table(left_cell, first_half, num_cols, headers, choice_letters, overall_start_num, has_e_choices)
# Create right table (second half)
right_cell = container_table.rows[0].cells[1]
start_q_num_right = overall_start_num + len(first_half)
create_half_empty_table(right_cell, second_half, num_cols, headers, choice_letters, start_q_num_right,
has_e_choices)
# Add spacing after the container table
spacing_para = doc.add_paragraph()
spacing_para.paragraph_format.space_after = Pt(12)
spacing_para.paragraph_format.keep_together = True
return overall_start_num + num_questions
def create_half_empty_table(cell, questions, num_cols, headers, choice_letters, start_q_num, has_e_choices):
"""Create one half of an empty answer table inside a cell"""
if len(questions) == 0:
return
num_questions = len(questions)
# Fixed Q column width to match answer tables
q_col_width = Inches(0.75) # Fixed width for Q column
# Create table inside the cell
table = cell.add_table(rows=num_questions + 1, cols=num_cols)
table.alignment = WD_TABLE_ALIGNMENT.CENTER
table.style = None
table.allow_autofit = False
# CRITICAL: Apply cantSplit to inner table as well
tblPr = table._tbl.tblPr
if tblPr is None:
tblPr = OxmlElement('w:tblPr')
table._tbl.insert(0, tblPr)
# Prevent table from splitting across pages
cantSplit = OxmlElement('w:cantSplit')
tblPr.append(cantSplit)
# Mark first row as header row
tbl = table._tbl
tblRows = tbl.xpath(".//w:tr")
if tblRows:
first_row = tblRows[0]
trPr = first_row.get_or_add_trPr()
tblHeader = OxmlElement('w:tblHeader')
trPr.append(tblHeader)
# Make header row not splittable
cantSplit_row = OxmlElement('w:cantSplit')
trPr.append(cantSplit_row)
# Add table-level border
tblBorders = parse_xml(f'''
<w:tblBorders {nsdecls("w")}>
<w:bottom w:val="single" w:sz="4" w:space="0" w:color="000000"/>
</w:tblBorders>
''')
tblPr.append(tblBorders)
# CRITICAL: Apply keep-together to all rows
for row_idx, row in enumerate(table.rows):
# Get or create row properties
trPr = row._tr.get_or_add_trPr()
# Add cantSplit to each row to prevent it from breaking
cantSplit_row = OxmlElement('w:cantSplit')
trPr.append(cantSplit_row)
for cell_item in row.cells:
for paragraph in cell_item.paragraphs:
paragraph.paragraph_format.keep_together = True
# Keep all rows together by keeping each with next
if row_idx < len(table.rows) - 1:
paragraph.paragraph_format.keep_with_next = True
else:
paragraph.paragraph_format.keep_with_next = False
# Set exact column widths matching the answer table measurements
choice_col_width = Inches(0.1) # Equal width for all choice columns
for row in table.rows:
for col_idx, cell_item in enumerate(row.cells):
if col_idx == 0:
cell_item.width = q_col_width
else:
cell_item.width = choice_col_width
# Header row
header_cells = table.rows[0].cells
for i, header in enumerate(headers):
header_cells[i].text = header
paragraph = header_cells[i].paragraphs[0]
set_zero_spacing(paragraph)
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(header)
run.font.name = 'Inter SemiBold'
run.font.size = Pt(11)
header_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
# Borders
if i == 0:
set_cell_borders(header_cells[i], top=True, bottom=True, left=True, right=False)
elif i == len(headers) - 1:
set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=True)
else:
set_cell_borders(header_cells[i], top=True, bottom=True, left=False, right=False)
# Gray shading
shading_elm = OxmlElement('w:shd')
shading_elm.set(qn('w:val'), 'clear')
shading_elm.set(qn('w:color'), 'auto')
shading_elm.set(qn('w:fill'), 'D9D9D9')
header_cells[i]._tc.get_or_add_tcPr().append(shading_elm)
# Fill data rows with empty checkboxes
for row_idx, q_data in enumerate(questions, 1):
row_cells = table.rows[row_idx].cells
is_last_row = (row_idx == num_questions)
# Question number
q_num = start_q_num + row_idx - 1
paragraph = row_cells[0].paragraphs[0]
paragraph.clear()
set_zero_spacing(paragraph)
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.add_run(f"Q{q_num}")
run.font.name = 'Inter ExtraBold'
run.font.size = Pt(7.5)
run.font.bold = True
row_cells[0].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
set_cell_borders(row_cells[0], top=False, bottom=is_last_row, left=True, right=False)
# Get available choices for this specific question
available_choices = [choice['letter'].upper() for choice in q_data['choices']]
# Fill choice columns with empty checkboxes
for i, letter in enumerate(choice_letters, 1):
if letter not in available_choices:
# Choice doesn't exist - leave empty
row_cells[i].text = ''
else:
# Choice exists - show empty checkbox
row_cells[i].text = '☐'
paragraph = row_cells[i].paragraphs[0]
set_zero_spacing(paragraph)
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
if row_cells[i].text:
run = paragraph.runs[0] if paragraph.runs else paragraph.add_run(row_cells[i].text)
run.font.name = 'Calibri'
run.font.size = Pt(11)
run.font.bold = True
row_cells[i].vertical_alignment = WD_ALIGN_VERTICAL.CENTER
# Borders
if i == len(choice_letters):
set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=True)
else:
set_cell_borders(row_cells[i], top=False, bottom=is_last_row, left=False, right=False)
def sanitize_bookmark_name(text):
"""Create a safe bookmark name (letters, numbers, underscores)."""
name = re.sub(r'[^A-Za-z0-9_]', '_', str(text))
# Word has bookmark name length limits — keep it short
return name[:40]
def add_bookmark_to_paragraph(paragraph, bookmark_name, bm_id):
"""Wrap the paragraph with a Word bookmark (start & end)."""
# bookmarkStart: should be before the paragraph text
bookmark_start = OxmlElement('w:bookmarkStart')
bookmark_start.set(qn('w:id'), str(bm_id))
bookmark_start.set(qn('w:name'), bookmark_name)
paragraph._p.insert(0, bookmark_start)
# bookmarkEnd: appended after paragraph content
bookmark_end = OxmlElement('w:bookmarkEnd')
bookmark_end.set(qn('w:id'), str(bm_id))
paragraph._p.append(bookmark_end)
def add_pagenumber_field_in_paragraph(paragraph, bookmark_name, right_inch=Inches(6.5)):
"""
Insert a PAGEREF field pointing to bookmark_name.
This function also adds a right tab stop with dotted leader and a tab character
so the page number appears at the right edge with dot leaders.
"""
# add a right aligned tab stop with dots
try:
paragraph.paragraph_format.tab_stops.add_tab_stop(right_inch, WD_TAB_ALIGNMENT.RIGHT, WD_TAB_LEADER.DOTS)
except Exception:
# If the tab_stop API differs, ignore and still try to insert the field
pass
# Add a tab character so the PAGEREF sits at the right tab stop
tab_run = paragraph.add_run('\t')
# Create field: begin -> instrText -> end
fldChar1 = OxmlElement('w:fldChar');
fldChar1.set(qn('w:fldCharType'), 'begin')
instrText = OxmlElement('w:instrText');
instrText.set(qn('xml:space'), 'preserve')
instrText.text = f"PAGEREF {bookmark_name} \\h"
fldChar2 = OxmlElement('w:fldChar');
fldChar2.set(qn('w:fldCharType'), 'end')
tab_run._r.append(fldChar1)
tab_run._r.append(instrText)
tab_run._r.append(fldChar2)
def estimate_content_length(questions_by_course, cours_titles):
"""Estimate relative content length for each question to better balance columns"""
question_lengths = []
total_estimated_lines = 0
for cours_num in sorted(questions_by_course.keys()):
course_questions = questions_by_course[cours_num]
course_title = cours_titles.get(cours_num, f"COURSE {cours_num}")
# Add course title weight (approximately 2-3 lines)
course_weight = 3
total_estimated_lines += course_weight
for q_data in course_questions:
# Estimate lines for this question
question_lines = 2 # Question line + spacing
question_lines += len(q_data['choices']) # Choice lines
question_lines += 2 # Source/answer line + spacing
if q_data.get('comment') and str(q_data['comment']).strip() and str(q_data['comment']).lower() != 'nan':
question_lines += 2 # Comment lines
question_lengths.append({
'cours': cours_num,
'question': q_data,
'estimated_lines': question_lines
})
total_estimated_lines += question_lines
return question_lengths, total_estimated_lines
def read_course_titles_from_module_sheet(excel_file_path, module_name):
"""Read course titles from a module-specific sheet (case-insensitive)"""
cours_titles = {}
print(f" DEBUG: Looking for sheet matching module '{module_name}'")
# Get all sheet names from the Excel file
xls = pd.ExcelFile(excel_file_path)
sheet_names = xls.sheet_names
# Find matching sheet (case-insensitive)
target_sheet = None
module_name_lower = str(module_name).strip().lower()
print(f" DEBUG: Module name (lowercase): '{module_name_lower}'")
print(f" DEBUG: Available sheets: {sheet_names}")
for sheet in sheet_names:
sheet_lower = sheet.strip().lower()
print(f" DEBUG: Comparing '{module_name_lower}' with '{sheet_lower}'")
if sheet_lower == module_name_lower:
target_sheet = sheet
print(f" DEBUG: MATCH FOUND! Using sheet '{target_sheet}'")
break
if target_sheet is None:
print(f" DEBUG: No sheet found matching module '{module_name}'")
return cours_titles
# Read the matching sheet
cours_df = pd.read_excel(excel_file_path, sheet_name=target_sheet)
print(f" DEBUG: Sheet '{target_sheet}' has {len(cours_df)} rows")
print(f" DEBUG: Sheet columns: {list(cours_df.columns)}")
if not cours_df.empty and 'cours' in cours_df.columns and 'titre' in cours_df.columns:
for idx, row in cours_df.iterrows():
print(f" DEBUG: Row {idx}: cours={row['cours']}, titre={row.get('titre', 'N/A')}")
if pd.notna(row['cours']) and pd.notna(row['titre']):
# Only store valid numeric courses
if is_valid_cours_number(row['cours']):
cours_num = int(float(str(row['cours']).strip()))
cours_titles[cours_num] = row['titre']
print(f" DEBUG: Added cours {cours_num}: {row['titre']}")
else:
print(f" DEBUG: Skipped invalid cours: {row['cours']}")
print(f" DEBUG: Final count: {len(cours_titles)} course titles from sheet '{target_sheet}'")
else:
print(f" DEBUG: Sheet '{target_sheet}' doesn't have expected structure")
print(f" DEBUG: Has 'cours' column: {'cours' in cours_df.columns}")
print(f" DEBUG: Has 'titre' column: {'titre' in cours_df.columns}")
return cours_titles
def enable_odd_even_headers(doc):
"""Enable different odd and even page headers/footers for the entire document"""
try:
# Access the document settings
settings = doc.settings
settings_element = settings.element
# Add evenAndOddHeaders element if it doesn't exist
even_odd = settings_element.find(qn('w:evenAndOddHeaders'))
if even_odd is None:
even_odd = OxmlElement('w:evenAndOddHeaders')
# Insert at the beginning of settings
settings_element.insert(0, even_odd)
print("✓ Enabled odd/even page headers in document settings")
else:
print("✓ Odd/even page headers already enabled")
except Exception as e:
print(f"Warning: Could not enable odd/even headers: {e}")
# Try alternative method - modify the XML directly
try:
doc_element = doc.element
body = doc_element.body
# Find or create sectPr
sectPr = body.sectPr
if sectPr is not None:
print("✓ Document structure ready for odd/even headers")
except Exception as e2:
print(f"Warning: Alternative method also failed: {e2}")
def create_flexible_header(section, module_name, sheet_name, display_name=None, left_margin_inches=0,
right_margin_inches=0, theme_hex=None):
"""Create flexible header text boxes that switch positions on odd/even pages"""
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
section.header_distance = Cm(0.6)
module_name_str = str(module_name).upper()
# Use display_name if provided, otherwise use sheet_name
if display_name:
sheet_name_str = str(display_name).upper()
else:
sheet_name_str = str(sheet_name).upper()
module_name_str = html.escape(module_name_str)
sheet_name_str = html.escape(sheet_name_str)
# Calculate approximate widths based on text length
module_width = max(len(module_name_str) * 10 + 60, 100)
sheet_width = max(len(sheet_name_str) * 10 + 60, 100)
def create_header_content(paragraph, left_text, left_width, right_text, right_width):
"""Helper to create header content with two text boxes"""
paragraph.clear()
left_xml = f'''<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:w10="urn:schemas-microsoft-com:office:word">
<w:pict>
<v:shape style="position:absolute;margin-left:{left_margin_inches}in;margin-top:0;width:{left_width}pt;height:25pt;z-index:1;mso-position-horizontal:left;mso-position-horizontal-relative:margin;mso-position-vertical-relative:line" fillcolor="#FFFFFF" filled="f" stroked="f">
<v:textbox inset="5pt,0pt,5pt,0pt" style="mso-fit-shape-to-text:t">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="left"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="26"/>
<w:color w:val="{theme_hex}"/>
</w:rPr>
<w:t>{left_text}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:shape>
</w:pict>
</w:r>'''
right_xml = f'''<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:w10="urn:schemas-microsoft-com:office:word">
<w:pict>
<v:shape style="position:absolute;margin-left:{right_margin_inches}in;margin-top:0;width:{right_width}pt;height:25pt;z-index:1;mso-position-horizontal:right;mso-position-horizontal-relative:margin;mso-position-vertical-relative:line" fillcolor="#FFFFFF" filled="f" stroked="f">
<v:textbox inset="5pt,0pt,5pt,0pt" style="mso-fit-shape-to-text:t">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="right"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="26"/>
<w:color w:val="{theme_hex}"/>
</w:rPr>
<w:t>{right_text}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:shape>
</w:pict>
</w:r>'''
paragraph._p.append(parse_xml(left_xml))
paragraph._p.append(parse_xml(right_xml))
# ========== CREATE DEFAULT/ODD PAGES HEADER (Sheet Left, Module Right) ==========
header_odd = section.header
header_odd.is_linked_to_previous = False
if not header_odd.paragraphs:
header_odd.add_paragraph()
create_header_content(header_odd.paragraphs[0], sheet_name_str, sheet_width, module_name_str, module_width)
# ========== CREATE EVEN PAGES HEADER (Module Left, Sheet Right) ==========
try:
# Check if even_page_header property exists
if hasattr(section, 'even_page_header'):
header_even = section.even_page_header
header_even.is_linked_to_previous = False
if not header_even.paragraphs:
header_even.add_paragraph()
create_header_content(header_even.paragraphs[0], module_name_str, module_width, sheet_name_str, sheet_width)
print("✓ Created even page header using built-in property")
else:
# Manual method
from docx.opc.packuri import PackURI
from docx.opc.part import XmlPart
# Build even header XML
even_hdr_xml = f'''<w:hdr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<w:p>
<w:pPr><w:spacing w:before="0" w:after="0"/></w:pPr>
<w:r>
<w:pict>
<v:shape style="position:absolute;margin-left:{left_margin_inches}in;margin-top:0;width:{module_width}pt;height:25pt;z-index:1;mso-position-horizontal:left;mso-position-horizontal-relative:margin;mso-position-vertical-relative:line" fillcolor="#FFFFFF" filled="f" stroked="f">
<v:textbox inset="5pt,0pt,5pt,0pt" style="mso-fit-shape-to-text:t">
<w:txbxContent>
<w:p>
<w:pPr><w:jc w:val="left"/><w:spacing w:before="0" w:after="0"/></w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/><w:sz w:val="26"/><w:color w:val="{theme_hex}"/>
</w:rPr>
<w:t>{module_name_str}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:shape>
</w:pict>
</w:r>
<w:r>
<w:pict>
<v:shape style="position:absolute;margin-left:{right_margin_inches}in;margin-top:0;width:{sheet_width}pt;height:25pt;z-index:1;mso-position-horizontal:right;mso-position-horizontal-relative:margin;mso-position-vertical-relative:line" fillcolor="#FFFFFF" filled="f" stroked="f">
<v:textbox inset="5pt,0pt,5pt,0pt" style="mso-fit-shape-to-text:t">
<w:txbxContent>
<w:p>
<w:pPr><w:jc w:val="right"/><w:spacing w:before="0" w:after="0"/></w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/><w:sz w:val="26"/><w:color w:val="{theme_hex}"/>
</w:rPr>
<w:t>{sheet_name_str}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:shape>
</w:pict>
</w:r>
</w:p>
</w:hdr>'''
# Create part
partname = PackURI(f'/word/header_even_{id(section)}.xml')
element = parse_xml(even_hdr_xml)
content_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml'
package = section.part.package
even_part = XmlPart(partname, content_type, element, package)
# Create relationship
rId = section.part.relate_to(even_part,
'http://schemas.openxmlformats.org/officeDocument/2006/relationships/header')
# Add header reference
sectPr = section._sectPr
for ref in list(sectPr.findall(qn('w:headerReference'))):
if ref.get(qn('w:type')) == 'even':
sectPr.remove(ref)
hdr_ref = OxmlElement('w:headerReference')
hdr_ref.set(qn('w:type'), 'even')
hdr_ref.set(qn('r:id'), rId)
sectPr.append(hdr_ref)
print("✓ Created even page header via manual part creation")
except Exception as e:
print(f"Warning: Could not create even header: {e}")
import traceback
traceback.print_exc()
def extract_display_name_from_excel(excel_file_path):
"""Extract display name from Excel file - checks multiple locations"""
try:
xls = pd.ExcelFile(excel_file_path)
first_sheet_name = xls.sheet_names[0]
df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name, nrows=5)
# Strategy 1: Look for a cell with "Name:", "Display Name:", etc.
for col in df.columns:
for idx, val in df[col].items():
if pd.notna(val):
val_str = str(val).strip().lower()
if any(keyword in val_str for keyword in ['name:', 'nom:', 'display name:', 'titre:']):
# Get the value from next cell or same row
try:
if ':' in str(val):
return str(val).split(':', 1)[1].strip()
elif idx + 1 < len(df):
next_val = df[col].iloc[idx + 1]
if pd.notna(next_val):
return str(next_val).strip()
except:
pass
# Strategy 2: Check for a dedicated "Info" or "Metadata" sheet
for sheet_name in xls.sheet_names:
if any(keyword in sheet_name.lower() for keyword in ['info', 'metadata', 'details', 'nom']):
info_df = pd.read_excel(excel_file_path, sheet_name=sheet_name, nrows=10)
for col in info_df.columns:
for idx, val in info_df[col].items():
if pd.notna(val) and 'name' in str(val).lower():
if idx + 1 < len(info_df):
next_val = info_df[col].iloc[idx + 1]
if pd.notna(next_val):
return str(next_val).strip()
# Strategy 3: Check first cell of first sheet
if not df.empty and pd.notna(df.iloc[0, 0]):
first_cell = str(df.iloc[0, 0]).strip()
if len(first_cell) < 50 and not any(char.isdigit() for char in first_cell[:10]):
return first_cell
# Fallback: Use filename without extension
return os.path.splitext(os.path.basename(excel_file_path))[0]
except Exception as e:
print(f"Error extracting display name: {e}")
# Ultimate fallback
return os.path.splitext(os.path.basename(excel_file_path))[0]
def add_colored_column_separator(section, theme_hex=None):
"""Add a custom colored vertical line between columns to both odd and even headers"""
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
def add_line_to_header(header_elem, line_id="columnSeparator"):
"""Helper function to add the separator line to a header"""
# Find or create the first paragraph in header
if not header_elem.paragraphs:
header_elem.add_paragraph()
header_para = header_elem.paragraphs[0]
# Create a vertical line using VML shape
# The line starts AFTER the header and goes to the bottom
line_xml = f'''
<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office">
<w:pict>
<v:line id="{line_id}"
style="position:absolute;left:0;text-align:left;z-index:-1;
mso-position-horizontal:center;
mso-position-horizontal-relative:margin;
mso-position-vertical-relative:page"
from="0,0.49in" to="0,11.05in"
strokecolor="#{theme_hex}"
strokeweight="1.5pt">
<o:lock v:ext="edit" aspectratio="f"/>
</v:line>
</w:pict>
</w:r>
'''
line_element = parse_xml(line_xml)
header_para._p.append(line_element)
# Add line to odd/default header
header = section.header
add_line_to_header(header, "columnSeparatorOdd")
# Add line to even header
try:
# Check if even_page_header property exists
if hasattr(section, 'even_page_header'):
header_even = section.even_page_header
add_line_to_header(header_even, "columnSeparatorEven")
print("✓ Added column separator to even page header using built-in property")
else:
# Manual method - we need to add the line to the already-created even header
# Find the even header part
sectPr = section._sectPr
even_header_refs = [ref for ref in sectPr.findall(qn('w:headerReference'))
if ref.get(qn('w:type')) == 'even']
if even_header_refs:
# Get the relationship ID
rId = even_header_refs[0].get(qn('r:id'))
# Get the header part
even_header_part = section.part.related_parts[rId]
# Find the first paragraph in the even header
even_header_element = even_header_part.element
paras = even_header_element.findall(qn('w:p'))
if paras:
# Add the line to the first paragraph
line_xml_content = f'''<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office">
<w:pict>
<v:line id="columnSeparatorEven"
style="position:absolute;left:0;text-align:left;z-index:-1;
mso-position-horizontal:center;
mso-position-horizontal-relative:margin;
mso-position-vertical-relative:page"
from="0,0.49in" to="0,11.05in"
strokecolor="#{theme_hex}"
strokeweight="1.5pt">
<o:lock v:ext="edit" aspectratio="f"/>
</v:line>
</w:pict>
</w:r>'''
line_element = parse_xml(line_xml_content)
paras[0].append(line_element)
print("✓ Added column separator to even page header via manual part access")
else:
print("⚠ No even header reference found - skipping even page separator line")
except Exception as e:
print(f"Warning: Could not add separator line to even page header: {e}")
import traceback
traceback.print_exc()
def add_choice_commentaire_section(doc, choice_commentaire, photo_q_path, theme_color=None, theme_hex=None,
general_comment=None, question_num=None, highlight_words=None):
"""Add a framed section with general comment, choice commentaires and optional photo Q
Split into 2/3 for comments and 1/3 for photo (or full width if no photo)
WITH DASHED BORDER AND SHADED BACKGROUND"""
if highlight_words is None:
highlight_words = []
if theme_color is None:
theme_color = THEME_COLOR
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
# Only add if there are comments or photo
if not choice_commentaire and not photo_q_path and not general_comment:
return
print(
f"DEBUG: add_choice_commentaire_section called with {len(choice_commentaire) if choice_commentaire else 0} comments")
# Check if photo exists and is valid
has_photo = False
if photo_q_path:
# Clean the path
photo_q_path_clean = str(photo_q_path).strip()
print(f"DEBUG: Checking photo path: '{photo_q_path_clean}'")
if photo_q_path_clean and photo_q_path_clean.lower() not in ['nan', 'none', '']:
# Check file existence
if os.path.exists(photo_q_path_clean):
has_photo = True
print(f"DEBUG: ✓ Photo Q exists: {photo_q_path_clean}")
# Check if it's a valid image file
valid_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']
file_ext = os.path.splitext(photo_q_path_clean)[1].lower()
if file_ext not in valid_extensions:
print(f"WARNING: File extension '{file_ext}' might not be supported. Valid: {valid_extensions}")
else:
print(f"DEBUG: ✗ Photo Q does NOT exist at: {photo_q_path_clean}")
print(f"DEBUG: Current working directory: {os.getcwd()}")
print(f"DEBUG: Absolute path would be: {os.path.abspath(photo_q_path_clean)}")
# Create a table with 1 row and 2 columns (or 1 if no photo)
if has_photo:
table = doc.add_table(rows=1, cols=2)
table.alignment = WD_TABLE_ALIGNMENT.LEFT
table.allow_autofit = False
# Set column widths: 2/3 for text, 1/3 for photo
left_cell = table.rows[0].cells[0]
right_cell = table.rows[0].cells[1]
# Set explicit widths
left_cell.width = Inches(3.5) # 2/3 of available width
right_cell.width = Inches(1.75) # 1/3 of available width
# Set vertical alignment to top for both cells
left_cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP
right_cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP
else:
table = doc.add_table(rows=1, cols=1)
table.alignment = WD_TABLE_ALIGNMENT.LEFT
left_cell = table.rows[0].cells[0]
left_cell.width = Inches(5.25) # Full width
# Add DASHED border to the table with theme color
tblPr = table._tbl.tblPr
if tblPr is None:
tblPr = OxmlElement('w:tblPr')
table._tbl.insert(0, tblPr)
# Use theme_hex (the input color) for borders
border_color = theme_hex
# Border size: 1.5pt = 12 eighths of a point (1.5 * 8 = 12)
tblBorders = parse_xml(f'''
<w:tblBorders {nsdecls("w")}>
<w:top w:val="dashed" w:sz="12" w:space="0" w:color="{border_color}"/>
<w:left w:val="dashed" w:sz="12" w:space="0" w:color="{border_color}"/>
<w:bottom w:val="dashed" w:sz="12" w:space="0" w:color="{border_color}"/>
<w:right w:val="dashed" w:sz="12" w:space="0" w:color="{border_color}"/>
<w:insideH w:val="dashed" w:sz="12" w:space="0" w:color="{border_color}"/>
<w:insideV w:val="dashed" w:sz="12" w:space="0" w:color="{border_color}"/>
</w:tblBorders>
''')
tblPr.append(tblBorders)
# Add padding to cells
for cell in table.rows[0].cells:
tcPr = cell._tc.get_or_add_tcPr()
tcMar = OxmlElement('w:tcMar')
for margin in ['top', 'left', 'bottom', 'right']:
mar = OxmlElement(f'w:{margin}')
mar.set(qn('w:w'), '80') # 80 twips = ~0.06 inches padding
mar.set(qn('w:type'), 'dxa')
tcMar.append(mar)
tcPr.append(tcMar)
# Add light gray shading to left cell
left_tcPr = left_cell._tc.get_or_add_tcPr()
shading_elm = OxmlElement('w:shd')
shading_elm.set(qn('w:val'), 'clear')
shading_elm.set(qn('w:color'), 'auto')
shading_elm.set(qn('w:fill'), 'F2F2F2') # Light gray (20% black)
left_tcPr.append(shading_elm)
# If there's a photo, also add shading to the right cell
if has_photo:
right_tcPr = right_cell._tc.get_or_add_tcPr()
shading_elm_right = OxmlElement('w:shd')
shading_elm_right.set(qn('w:val'), 'clear')
shading_elm_right.set(qn('w:color'), 'auto')
shading_elm_right.set(qn('w:fill'), 'F2F2F2') # Same light gray
right_tcPr.append(shading_elm_right)
# Clear the default empty paragraph first
if left_cell.paragraphs:
left_cell.paragraphs[0].clear()
comment_index = 0
# ADD GENERAL COMMENT FIRST if it exists
if question_num and general_comment and str(general_comment).strip() and str(general_comment).lower() != 'nan':
# Use the first paragraph for the general comment
if comment_index == 0 and left_cell.paragraphs:
question_num_para = left_cell.paragraphs[0]
else:
question_num_para = left_cell.add_paragraph()
question_num_para.paragraph_format.space_before = Pt(1)
question_num_para.paragraph_format.space_after = Pt(1)
question_num_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
# Add the general comment text
text_run = question_num_para.add_run(f" {str(general_comment)}")
text_run.font.name = 'Inter SemiBold'
text_run.font.size = Pt(8)
comment_index += 1
# Add choice commentaires
if choice_commentaire:
# Filter out comments that are only X's
filtered_commentaire = {letter: text for letter, text in choice_commentaire.items()
if not is_only_x_string(text)}
print(f"DEBUG: Adding {len(filtered_commentaire)} choice comments")
for choice_letter in sorted(filtered_commentaire.keys()):
comment_text = filtered_commentaire[choice_letter]
print(f"DEBUG: Adding comment {choice_letter}: {comment_text[:50]}...")
# Use the first paragraph if no general comment, otherwise add new
if comment_index == 0 and left_cell.paragraphs:
comment_para = left_cell.paragraphs[0]
else:
comment_para = left_cell.add_paragraph()
comment_para.paragraph_format.space_before = Pt(1)
comment_para.paragraph_format.space_after = Pt(0)
comment_para.paragraph_format.line_spacing = 1.0
comment_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
# Choice letter in bold with theme color
letter_run = comment_para.add_run(f"{choice_letter}- ")
letter_run.font.name = 'Inter ExtraBold'
letter_run.font.size = Pt(8)
letter_run.font.bold = True
letter_run.font.color.rgb = theme_color
# Comment text
text_run = comment_para.add_run(comment_text)
text_run.font.name = 'Inter Display SemiBold'
text_run.font.size = Pt(8)
# highlight_words_in_text(comment_para, comment_text, highlight_words, theme_color, font_name='Inter Display SemiBold', font_size=8)
comment_index += 1
# If no comments at all but has photo, add placeholder text
if comment_index == 0:
print("DEBUG: No comments found, adding placeholder")
placeholder_para = left_cell.paragraphs[0] if left_cell.paragraphs else left_cell.add_paragraph()
placeholder_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
placeholder_run = placeholder_para.add_run("[See image]")
placeholder_run.font.name = 'Inter Display'
placeholder_run.font.size = Pt(9)
placeholder_run.font.italic = True
# Add photo to right cell if exists
if has_photo:
try:
print(f"DEBUG: Attempting to add photo: {photo_q_path_clean}")
# Clear the default empty paragraph and reuse it
if right_cell.paragraphs:
photo_para = right_cell.paragraphs[0]
photo_para.clear()
else:
photo_para = right_cell.add_paragraph()
photo_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
photo_para.paragraph_format.space_before = Pt(0)
photo_para.paragraph_format.space_after = Pt(0)
run = photo_para.add_run()
# Try different image sizes
try:
run.add_picture(photo_q_path_clean, width=Inches(1.5))
print(f"DEBUG: ✓ Successfully added Photo Q at 1.5 inches width")
except Exception as e1:
print(f"DEBUG: Failed at 1.5 inches, trying height-based: {e1}")
run.add_picture(photo_q_path_clean, height=Inches(2.0))
print(f"DEBUG: ✓ Successfully added Photo Q at 2.0 inches height")
except Exception as e:
# If photo fails to load, add error text
print(f"ERROR: Failed to add Photo Q: {type(e).__name__}: {str(e)}")
error_para = right_cell.add_paragraph()
error_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
error_run = error_para.add_run(f"[Photo error: {type(e).__name__}]")
error_run.font.size = Pt(7)
error_run.font.italic = True
error_run.font.color.rgb = RGBColor(255, 0, 0)
# Add spacing after the table
empty_para = doc.add_paragraph(' ', style='TinySpace')
empty_para.paragraph_format.space_before = Pt(0)
empty_para.paragraph_format.space_after = Pt(0)
empty_para.paragraph_format.line_spacing = Pt(7)
empty_run = empty_para.add_run(' ')
empty_run.font.size = Pt(7)
def extract_embedded_images_info(excel_file_path):
"""
Inform user about embedded images in Excel.
Excel formulas like =DISPIMG() cannot be extracted programmatically with pandas.
"""
print("\n" + "!" * 60)
print("IMPORTANT: EMBEDDED IMAGES DETECTED")
print("!" * 60)
print("Your Excel file contains embedded images using =DISPIMG() formulas.")
print("These images are stored INSIDE the Excel file and cannot be accessed")
print("as file paths.")
print()
print("TO FIX THIS:")
print("1. Open your Excel file")
print("2. Save the images as separate files (right-click > Save as Picture)")
print("3. Update the 'Photo Q' and 'Photo C' columns with the file paths")
print(" Example: 'images/question1.png' instead of '=DISPIMG(...)'")
print()
print("Alternative: Use OneDrive/SharePoint links or export images first")
print("!" * 60 + "\n")
def process_excel_to_word(excel_file_path, output_word_path, image_folder, display_name=None, use_two_columns=True,
add_separator_line=True, balance_method="dynamic", theme_hex=None, highlight_words=None,
show_comments=True):
"""Main function to process Excel and create a Word document with TOC on the first page
Args:
show_comments (bool): If True, display comment boxes. If False, hide all comments. Default is True.
"""
if highlight_words is None:
highlight_words = []
if theme_hex is None:
theme_hex = THEME_COLOR_HEX
theme_color = RGBColor.from_string(theme_hex)
# Prepare image folder (extract if ZIP)
actual_image_folder, is_temp, temp_dir_obj = prepare_image_folder(image_folder)
# Map images from the prepared folder
question_photos = map_images_from_excel(excel_file_path, actual_image_folder)
# Read the Excel file
xls = pd.ExcelFile(excel_file_path)
first_sheet_name = xls.sheet_names[0] # Get the first sheet name
questions_df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name)
# Extract display name if not provided
if display_name is None:
display_name = extract_display_name_from_excel(excel_file_path)
print(f"Extracted display name: {display_name}")
# Get unique modules from Questions sheet (case-insensitive)
module_col = None
for col in questions_df.columns:
if col.lower().strip() == 'module':
module_col = col
break
if module_col:
xls_temp = pd.ExcelFile(excel_file_path)
all_sheets = xls_temp.sheet_names
modules_in_questions = questions_df[module_col].dropna().unique()
# Create a mapping from lowercase module name to actual sheet name
module_to_sheet = {}
for module in modules_in_questions:
module_lower = str(module).strip().lower()
for sheet in all_sheets:
if sheet.strip().lower() == module_lower:
module_to_sheet[module] = sheet
break
# Normalize all module names in the dataframe
questions_df[module_col] = questions_df[module_col].apply(
lambda x: module_to_sheet.get(x, x) if pd.notna(x) else x
)
# Get unique modules in sheet order
modules = []
seen = set()
for sheet in all_sheets:
sheet_lower = sheet.strip().lower()
for module in modules_in_questions:
if str(module).strip().lower() == sheet_lower and sheet not in seen:
modules.append(sheet)
seen.add(sheet)
break
else:
modules = []
# Read course titles from module-specific sheets
modules_data = {}
xls = pd.ExcelFile(excel_file_path)
for module in modules:
try:
cours_titles_for_module = read_course_titles_from_module_sheet(excel_file_path, module)
modules_data[module] = cours_titles_for_module
except Exception as e:
print(f"DEBUG: Error reading module '{module}': {e}")
# Clean column names
questions_df.columns = questions_df.columns.str.strip()
# Check if photo columns exist
has_photo_q_col = 'Photo Q' in questions_df.columns
has_photo_c_col = 'Photo C' in questions_df.columns
if not has_photo_q_col and not has_photo_c_col:
print("ℹ️ No photo columns found in Excel - images will be skipped")
elif not has_photo_q_col:
print("ℹ️ 'Photo Q' column not found - question images will be skipped")
elif not has_photo_c_col:
print("ℹ️ 'Photo C' column not found - choice images will be skipped")
# Create Word document
doc = Document()
enable_odd_even_headers(doc)
core_props = doc.core_properties
core_props.author = "Natural Killer"
core_props.title = "Manhattan Project"
core_props.subject = "QCM"
core_props.comments = "Created By NK"
core_props.last_modified_by = "NK"
core_props.generator = "Microsoft Word"
set_page_size(doc.sections[0], PAPER_SIZES['A4_WIDE'][0], PAPER_SIZES['A4'][1])
# ========================================
# ADD THREE EMPTY PAGES AT THE BEGINNING
# ========================================
for i in range(3):
doc.add_paragraph() # Add empty paragraph
if i < 2: # Add page breaks for first 2 pages (3rd page leads to TOC)
doc.add_page_break()
# TOC helpers
toc_entries = []
bookmark_id = 1
# Set page margins
for section in doc.sections:
section.top_margin = Inches(0.5)
section.bottom_margin = Inches(0.5)
section.left_margin = Cm(1.1)
section.right_margin = Cm(1.1)
# ========================================
# CREATE TOC SECTION FIRST (TWO COLUMNS - SPLIT PAGE)
# ========================================
toc_section = doc.sections[0]
sectPr = toc_section._sectPr
cols = sectPr.find(qn('w:cols'))
if cols is None:
cols = OxmlElement('w:cols')
sectPr.append(cols)
cols.set(qn('w:num'), '2')
cols.set(qn('w:space'), '432') # 0.3 inch spacing between columns
# Add TOC title
toc_title = doc.add_paragraph()
toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
toc_title.paragraph_format.space_after = Pt(12)
toc_title_run = toc_title.add_run("Sommaire")
toc_title_run.font.name = 'Montserrat'
toc_title_run.font.size = Pt(16)
toc_title_run.font.bold = True
toc_title_run.font.color.rgb = theme_color
# Add bookmark to TOC title
add_toc_bookmark(doc, toc_title)
# Remember position to insert TOC entries later
toc_insert_index = len(doc.paragraphs)
# ========================================
# START NEW SECTION FOR CONTENT (TWO COLUMNS)
# ========================================
doc.add_section(WD_SECTION.NEW_PAGE)
# Process questions
processed_questions = []
current_question = None
current_choices = []
skipped_s2_questions = 0
for idx, row in questions_df.iterrows():
numero = row['Numero']
if pd.notna(numero):
if current_question is not None and current_choices and is_valid_cours_number(current_cours):
processed_questions.append({
'numero': current_question,
'question_text': current_question_text,
'source': current_source,
'comment': current_comment,
'cours': int(float(str(current_cours).strip())),
'module': current_module,
'choices': current_choices.copy(),
'choice_commentaire': current_choice_commentaire,
'photo_q': question_photos.get(current_question, {}).get('photo_q', None), # LINKED!
'photo_c': question_photos.get(current_question, {}).get('photo_c', None) # LINKED!
})
elif current_question is not None and not is_valid_cours_number(current_cours):
skipped_s2_questions += 1
current_question = numero
current_question_text = str(row['Question']).strip()
current_source = str(row['Source']).strip() if pd.notna(row['Source']) else ""
current_comment = str(row['Comment']).strip() if pd.notna(row['Comment']) and str(
row['Comment']).lower() != 'nan' else None
current_cours = row['Cours'] if pd.notna(row['Cours']) else 1
current_module = row[module_col] if module_col and pd.notna(row[module_col]) else None
current_choices = []
current_choice_commentaire = {} # NEW: Initialize per question
# Initialize photo storage for this question
if current_question not in question_photos:
question_photos[current_question] = {'photo_q': None, 'photo_c': None}
current_choice_commentaire = {}
# CHECK FOR PHOTOS ON THIS ROW - Store DIRECTLY in question_photos dict
if has_photo_q_col and pd.notna(row.get('Photo Q', None)):
photo_q_raw = str(row['Photo Q']).strip()
if has_photo_c_col and pd.notna(row.get('Photo C', None)):
photo_c_raw = str(row['Photo C']).strip()
# Process each CHOICE row - CHECK FOR PHOTOS ON EVERY ROW!
if is_valid_cours_number(current_cours):
choice_letter = str(row['Order']).strip().upper()
choice_text = str(row['ChoiceText']).strip()
ct_value = str(row['CT']).strip().upper() if pd.notna(row['CT']) else ""
is_correct = ct_value == 'X'
# Read choice commentaire for THIS specific choice
if pd.notna(row.get('Choice commentaire', None)):
choice_comment = str(row['Choice commentaire']).strip()
if choice_comment and choice_comment.lower() not in ['nan', 'none', '']:
current_choice_commentaire[choice_letter] = choice_comment
# CHECK FOR PHOTOS ON THIS ROW (could be any choice row!)
# CRITICAL FIX: Store directly in question_photos, not in temporary variables
if has_photo_q_col and pd.notna(row.get('Photo Q', None)):
photo_q_raw = str(row['Photo Q']).strip()
if has_photo_c_col and pd.notna(row.get('Photo C', None)):
photo_c_raw = str(row['Photo C']).strip()
if choice_text and choice_text.lower() != 'nan' and choice_text != '':
current_choices.append({
'letter': choice_letter,
'text': choice_text,
'is_correct': is_correct
})
if current_question is not None and current_choices and is_valid_cours_number(current_cours):
processed_questions.append({
'numero': current_question,
'question_text': current_question_text,
'source': current_source,
'comment': current_comment,
'cours': int(float(str(current_cours).strip())),
'module': current_module,
'choices': current_choices.copy(),
'choice_commentaire': current_choice_commentaire,
'photo_q': question_photos.get(current_question, {}).get('photo_q', None), # LINKED!
'photo_c': question_photos.get(current_question, {}).get('photo_c', None) # LINKED!
})
elif current_question is not None and not is_valid_cours_number(current_cours):
skipped_s2_questions += 1
# Group questions by module and course
questions_by_module = {}
for q_data in processed_questions:
module_name = q_data['module']
cours_num = q_data['cours']
if module_name not in questions_by_module:
questions_by_module[module_name] = {}
if cours_num not in questions_by_module[module_name]:
questions_by_module[module_name][cours_num] = []
questions_by_module[module_name][cours_num].append(q_data)
# Check for E choices
total_e_choices = 0
for module_name, questions_by_course in questions_by_module.items():
for cours_num, course_questions in questions_by_course.items():
course_e_count = sum(1 for q_data in course_questions
for choice in q_data['choices']
if choice['letter'].upper() == 'E')
if course_e_count > 0:
total_e_choices += course_e_count
# Column balancing
column_break_after_question = 0
if use_two_columns and balance_method == "dynamic":
total_estimated_lines = 0
all_question_lengths = []
for module_name in modules:
if module_name not in questions_by_module:
continue
questions_by_course = questions_by_module[module_name]
cours_titles = modules_data.get(module_name, {})
total_estimated_lines += 5
question_lengths, module_lines = estimate_content_length(questions_by_course, cours_titles)
total_estimated_lines += module_lines
all_question_lengths.extend(question_lengths)
target_lines_first_column = total_estimated_lines * 0.52
cumulative_lines = 0
global_question_counter = 0
for module_name in modules:
if module_name not in questions_by_module:
continue
cumulative_lines += 5
questions_by_course = questions_by_module[module_name]
for cours_num in sorted(questions_by_course.keys()):
cumulative_lines += 3
course_questions = questions_by_course[cours_num]
for q_data in course_questions:
global_question_counter += 1
for q_length in all_question_lengths:
if q_length['question'] == q_data:
cumulative_lines += q_length['estimated_lines']
break
if cumulative_lines >= target_lines_first_column and column_break_after_question == 0:
column_break_after_question = global_question_counter
break
if column_break_after_question > 0:
break
if column_break_after_question > 0:
break
# Format questions grouped by module
overall_question_count = 1
global_question_counter = 0
column_break_added = False
for module_index, module_name in enumerate(modules):
if module_name not in questions_by_module:
continue
if module_index == 0:
section = doc.sections[-1]
else:
section = doc.add_section(WD_SECTION.NEW_PAGE)
if use_two_columns:
sectPr = section._sectPr
cols = sectPr.find(qn('w:cols'))
if cols is None:
cols = OxmlElement('w:cols')
sectPr.append(cols)
cols.set(qn('w:num'), '2')
cols.set(qn('w:space'), '432')
cols.set(qn('w:equalWidth'), '1')
if use_two_columns:
sectPr = section._sectPr
cols = sectPr.find(qn('w:cols'))
if cols is None:
cols = OxmlElement('w:cols')
sectPr.append(cols)
cols.set(qn('w:num'), '2')
cols.set(qn('w:space'), '432')
cols.set(qn('w:equalWidth'), '1')
create_flexible_header(section, module_name, first_sheet_name, display_name, theme_hex=theme_hex)
# ADD THE COLORED SEPARATOR
if add_separator_line:
add_colored_column_separator(section, theme_hex)
# ========== CUSTOMIZE MODULE TITLE APPEARANCE HERE ==========
MODULE_HEIGHT = 31 # Frame height in points
MODULE_ROUNDNESS = 50 # Corner roundness % (0=square, 50=pill)
MODULE_FONT_SIZE = 35 # Font size in half-points (28=14pt, 24=12pt, 32=16pt)
MODULE_BG_COLOR = theme_hex # Purple background color
MODULE_TEXT_COLOR = "FFFFFF" # White text color
MODULE_PADDING = 60 # Extra width padding
# ============================================================
# Add module title as rounded shape
shape_para = doc.add_paragraph()
shape_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
shape_para.paragraph_format.space_before = Pt(12)
shape_para.paragraph_format.space_after = Pt(8)
# Calculate width based on text length
text_length = len(module_name.upper())
estimated_width = (text_length * 12) + MODULE_PADDING
module_name_escaped = html.escape(module_name.upper())
# Create rounded rectangle shape
shape_xml = f'''
<w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:v="urn:schemas-microsoft-com:vml">
<w:pict>
<v:roundrect style="width:{estimated_width}pt;height:{MODULE_HEIGHT}pt"
arcsize="{MODULE_ROUNDNESS}%" fillcolor="#{MODULE_BG_COLOR}" stroked="f">
<v:textbox inset="10pt,0pt,10pt,0pt" style="v-text-anchor:middle">
<w:txbxContent>
<w:p>
<w:pPr>
<w:jc w:val="center"/>
<w:spacing w:before="0" w:after="0"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Montserrat" w:hAnsi="Montserrat"/>
<w:b/>
<w:sz w:val="{MODULE_FONT_SIZE}"/>
<w:color w:val="{MODULE_TEXT_COLOR}"/>
</w:rPr>
<w:t>{module_name_escaped}</w:t>
</w:r>
</w:p>
</w:txbxContent>
</v:textbox>
</v:roundrect>
</w:pict>
</w:r>
'''
shape_element = parse_xml(shape_xml)
shape_para._p.append(shape_element)
# Add bookmark
bm_name = sanitize_bookmark_name(f"MOD_{module_name}")
add_bookmark_to_paragraph(shape_para, bm_name, bookmark_id)
toc_entries.append({'level': 'module', 'text': f"MODULE: {module_name}", 'bm': bm_name})
bookmark_id += 1
questions_by_course = questions_by_module[module_name]
cours_titles = modules_data.get(module_name, {})
for natural_num, cours_num in enumerate(sorted(questions_by_course.keys()), start=1):
course_questions = questions_by_course[cours_num]
course_question_count = 1
course_title = cours_titles.get(cours_num, f"COURSE {cours_num}")
num_questions = len(course_questions)
course_para = create_course_title(doc, natural_num, course_title, theme_color, theme_hex=theme_hex,
question_count=num_questions)
bm_course_name = sanitize_bookmark_name(f"COURSE_{module_name}_{cours_num}")
add_bookmark_to_paragraph(course_para, bm_course_name, bookmark_id)
toc_entries.append({'level': 'course', 'text': f"{natural_num}. {course_title}", 'bm': bm_course_name})
bookmark_id += 1
for q_data in course_questions:
global_question_counter += 1
if (use_two_columns and balance_method == "dynamic" and
not column_break_added and global_question_counter == column_break_after_question):
add_column_break(doc)
column_break_added = True
choices = [(choice['letter'], choice['text']) for choice in q_data['choices']]
choices.sort(key=lambda x: x[0])
correct_answers = [choice['letter'] for choice in q_data['choices'] if choice['is_correct']]
correct_answers_str = ''.join(sorted(correct_answers))
if choices:
format_question_block(
doc,
course_question_count,
q_data['question_text'],
choices,
correct_answers_str,
q_data['source'],
q_data['comment'],
q_data.get('choice_commentaire', {}), # NEW
q_data.get('photo_q', None), # NEW
q_data.get('photo_c', None), # NEW
theme_color,
theme_hex,
show_comments, # Pass the show_comments parameter
highlight_words
)
course_question_count += 1
overall_question_count += 1
# PROF.PY: NO EMPTY TABLES - create_empty_course_table(doc, course_questions, cours_num, 1)
# PROF.PY: NO ANSWER TABLES - bookmark_id, responses_toc_entry = create_answer_tables(doc, questions_by_course, cours_titles, module_name, bookmark_id, theme_hex)
# PROF.PY: NO ANSWER TABLES - toc_entries.append(responses_toc_entry)
# ========================================
# INSERT TOC ENTRIES IN THE FIRST SECTION
# ========================================
# We need to insert TOC entries in the FIRST section, before the section break
# Get the body element
body = doc._element.body
# Find where to insert - right after toc_title, before the section break
toc_title_element = toc_title._element
insert_index = list(body).index(toc_title_element) + 1
# In the TOC generation section, update the formatting code:
# Generate the TOC entries and insert them at the correct position
# Mark last course entries for each module (for spacing)
for i, entry in enumerate(toc_entries):
entry['is_last_course_in_module'] = False
if entry['level'] == 'course':
# Check if next entry is a module or responses (or if this is the last entry)
if i + 1 >= len(toc_entries) or toc_entries[i + 1]['level'] in ['module', 'responses']:
entry['is_last_course_in_module'] = True
for entry in toc_entries:
# Create a new paragraph element
new_p = body.makeelement(qn('w:p'), nsmap=body.nsmap)
# Set paragraph properties
pPr = new_p.makeelement(qn('w:pPr'), nsmap=new_p.nsmap)
# Alignment - LEFT (for two-column layout)
jc = pPr.makeelement(qn('w:jc'), nsmap=pPr.nsmap)
jc.set(qn('w:val'), 'left')
pPr.append(jc)
# Set spacing
spacing = pPr.makeelement(qn('w:spacing'), nsmap=pPr.nsmap)
# Add spacing before module entries to separate module blocks
if entry['level'] == 'module':
spacing.set(qn('w:before'), '180') # 9pt spacing before module entries
else:
spacing.set(qn('w:before'), '0')
spacing.set(qn('w:after'), '0')
pPr.append(spacing)
# Add tab stops with dotted leader
tabs = pPr.makeelement(qn('w:tabs'), nsmap=pPr.nsmap)
tab = tabs.makeelement(qn('w:tab'), nsmap=tabs.nsmap)
tab.set(qn('w:val'), 'right')
tab.set(qn('w:leader'), 'dot') # This adds the dots!
tab.set(qn('w:pos'), '5040') # 3.5 inches in twentieths of a point (adjusted for two-column layout)
tabs.append(tab)
pPr.append(tabs)
# Indent course entries and responses entries
if entry['level'] == 'course':
ind = pPr.makeelement(qn('w:ind'), nsmap=pPr.nsmap)
ind.set(qn('w:left'), '360') # 0.25 inches
pPr.append(ind)
elif entry['level'] == 'responses':
ind = pPr.makeelement(qn('w:ind'), nsmap=pPr.nsmap)
ind.set(qn('w:left'), '360') # 0.25 inches - same as course
pPr.append(ind)
new_p.append(pPr)
# Add text run with font formatting
r = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap)
# Add run properties (font)
rPr = r.makeelement(qn('w:rPr'), nsmap=r.nsmap)
# Font family
rFonts = rPr.makeelement(qn('w:rFonts'), nsmap=rPr.nsmap)
rFonts.set(qn('w:ascii'), 'Montserrat')
rFonts.set(qn('w:hAnsi'), 'Montserrat')
rPr.append(rFonts)
# Font size and styling based on level
sz = rPr.makeelement(qn('w:sz'), nsmap=rPr.nsmap)
if entry['level'] == 'module':
sz.set(qn('w:val'), '22') # 11pt
# Bold for module
b = rPr.makeelement(qn('w:b'), nsmap=rPr.nsmap)
rPr.append(b)
# Color for module
color = rPr.makeelement(qn('w:color'), nsmap=rPr.nsmap)
color.set(qn('w:val'), theme_hex)
rPr.append(color)
elif entry['level'] == 'responses':
sz.set(qn('w:val'), '20') # 10pt
# Bold and italic for responses
b = rPr.makeelement(qn('w:b'), nsmap=rPr.nsmap)
rPr.append(b)
i = rPr.makeelement(qn('w:i'), nsmap=rPr.nsmap)
rPr.append(i)
# Purple color for responses to match the box
color = rPr.makeelement(qn('w:color'), nsmap=rPr.nsmap)
color.set(qn('w:val'), theme_hex)
rPr.append(color)
else: # course level
sz.set(qn('w:val'), '20') # 10pt
rPr.append(sz)
r.append(rPr)
# Add text
t = r.makeelement(qn('w:t'), nsmap=r.nsmap)
t.set(qn('xml:space'), 'preserve')
t.text = entry['text']
r.append(t)
new_p.append(r)
# Add tab run (this triggers the dotted leader)
r_tab = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap)
tab_char = r_tab.makeelement(qn('w:tab'), nsmap=r_tab.nsmap)
r_tab.append(tab_char)
new_p.append(r_tab)
# Add PAGEREF field runs with theme color and Montserrat font formatting
r_field_begin = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap)
# Add formatting to field begin
rPr_field = r_field_begin.makeelement(qn('w:rPr'), nsmap=r_field_begin.nsmap)
# Add Montserrat font
rFonts_field = rPr_field.makeelement(qn('w:rFonts'), nsmap=rPr_field.nsmap)
rFonts_field.set(qn('w:ascii'), 'Montserrat')
rFonts_field.set(qn('w:hAnsi'), 'Montserrat')
rPr_field.append(rFonts_field)
# Add bold
b_field = rPr_field.makeelement(qn('w:b'), nsmap=rPr_field.nsmap)
rPr_field.append(b_field)
color_field = rPr_field.makeelement(qn('w:color'), nsmap=rPr_field.nsmap)
color_field.set(qn('w:val'), theme_hex)
rPr_field.append(color_field)
r_field_begin.append(rPr_field)
fldChar1 = r_field_begin.makeelement(qn('w:fldChar'), nsmap=r_field_begin.nsmap)
fldChar1.set(qn('w:fldCharType'), 'begin')
r_field_begin.append(fldChar1)
new_p.append(r_field_begin)
r_instr = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap)
# Add formatting to instruction text
rPr_instr = r_instr.makeelement(qn('w:rPr'), nsmap=r_instr.nsmap)
# Add Montserrat font
rFonts_instr = rPr_instr.makeelement(qn('w:rFonts'), nsmap=rPr_instr.nsmap)
rFonts_instr.set(qn('w:ascii'), 'Montserrat')
rFonts_instr.set(qn('w:hAnsi'), 'Montserrat')
rPr_instr.append(rFonts_instr)
# Add bold
b_instr = rPr_instr.makeelement(qn('w:b'), nsmap=rPr_instr.nsmap)
rPr_instr.append(b_instr)
color_instr = rPr_instr.makeelement(qn('w:color'), nsmap=rPr_instr.nsmap)
color_instr.set(qn('w:val'), theme_hex)
rPr_instr.append(color_instr)
r_instr.append(rPr_instr)
instrText = r_instr.makeelement(qn('w:instrText'), nsmap=r_instr.nsmap)
instrText.set(qn('xml:space'), 'preserve')
instrText.text = f"PAGEREF {entry['bm']} \\h"
r_instr.append(instrText)
new_p.append(r_instr)
r_field_end = new_p.makeelement(qn('w:r'), nsmap=new_p.nsmap)
# Add formatting to field end
rPr_end = r_field_end.makeelement(qn('w:rPr'), nsmap=r_field_end.nsmap)
# Add Montserrat font
rFonts_end = rPr_end.makeelement(qn('w:rFonts'), nsmap=rPr_end.nsmap)
rFonts_end.set(qn('w:ascii'), 'Montserrat')
rFonts_end.set(qn('w:hAnsi'), 'Montserrat')
rPr_end.append(rFonts_end)
# Add bold
b_end = rPr_end.makeelement(qn('w:b'), nsmap=rPr_end.nsmap)
rPr_end.append(b_end)
color_end = rPr_end.makeelement(qn('w:color'), nsmap=rPr_end.nsmap)
color_end.set(qn('w:val'), theme_hex)
rPr_end.append(color_end)
r_field_end.append(rPr_end)
fldChar2 = r_field_end.makeelement(qn('w:fldChar'), nsmap=r_field_end.nsmap)
fldChar2.set(qn('w:fldCharType'), 'end')
r_field_end.append(fldChar2)
new_p.append(r_field_end)
# Insert the paragraph at the correct position
body.insert(insert_index, new_p)
insert_index += 1 # Increment for next insertion
# Add page numbers
add_page_numbers(doc, theme_hex)
# Call it before generating the document:
verify_photo_associations(question_photos)
# Save document
doc.save(output_word_path)
print(f"\n🎉 SUCCESS: Document saved as: {output_word_path}")
print(f"📊 Total questions processed: {overall_question_count - 1}")
print(f"🚫 Total S2/invalid questions skipped: {skipped_s2_questions}")
if total_e_choices > 0:
print(f"✨ Dynamic E columns added for courses with 5-choice questions")
# Clean up temporary folder if it was created
if is_temp and temp_dir_obj is not None:
print(f"\n🧹 Cleaning up temporary folder...")
try:
temp_dir_obj.cleanup()
print(f" ✓ Temporary files removed")
except Exception as e:
print(f" ⚠️ Could not clean up: {e}")
def debug_excel_structure(excel_file_path):
"""Debug function to analyze Excel structure"""
print("=== DEBUGGING EXCEL STRUCTURE ===")
xls = pd.ExcelFile(excel_file_path)
first_sheet_name = xls.sheet_names[0] # Get the first sheet name
questions_df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name)
print(f"Total rows: {len(questions_df)}")
print(f"Columns: {list(questions_df.columns)}")
# Check unique values in key columns
if 'Numero' in questions_df.columns:
try:
print(f"Unique Numero values: {sorted(questions_df['Numero'].dropna().unique())}")
except Exception as e:
print(f"Unique Numero values: {list(questions_df['Numero'].dropna().unique())} (couldn't sort: {e})")
if 'Order' in questions_df.columns:
try:
unique_orders = sorted(questions_df['Order'].dropna().unique())
print(f"Unique Order values: {unique_orders}")
# Check specifically for E choices
e_count = sum(1 for order in questions_df['Order'].dropna() if str(order).strip().upper() == 'E')
print(f"Total E choices found: {e_count}")
except Exception as e:
print(f"Unique Order values: {list(questions_df['Order'].dropna().unique())} (couldn't sort: {e})")
if 'Cours' in questions_df.columns:
unique_cours = questions_df['Cours'].dropna().unique()
# Convert all to strings first for display, then separate by validity
unique_cours_str = [str(c) for c in unique_cours]
print(f"Unique Cours values: {unique_cours_str}")
# Check which cours values are valid vs invalid
valid_cours = []
invalid_cours = []
for c in unique_cours:
if is_valid_cours_number(c):
valid_cours.append(c)
else:
invalid_cours.append(str(c))
# Sort valid ones (numeric) and invalid ones (as strings) separately
try:
valid_cours_sorted = sorted([float(c) for c in valid_cours])
print(f"Valid cours values: {valid_cours_sorted}")
except Exception:
print(f"Valid cours values: {valid_cours}")
try:
invalid_cours_sorted = sorted(invalid_cours)
print(f"Invalid/S2 cours values: {invalid_cours_sorted}")
except Exception:
print(f"Invalid/S2 cours values: {invalid_cours}")
# Check module column and corresponding sheets
if 'module' in questions_df.columns:
unique_modules = questions_df['module'].dropna().unique()
print(f"\nUnique Module values: {list(unique_modules)}")
# Check if sheets exist for each module
xls = pd.ExcelFile(excel_file_path)
sheet_names = xls.sheet_names
sheet_names_lower = [s.lower() for s in sheet_names]
print("\nModule sheet availability:")
for module in unique_modules:
module_lower = str(module).strip().lower()
if module_lower in sheet_names_lower:
actual_sheet = sheet_names[sheet_names_lower.index(module_lower)]
print(f" ✓ Module '{module}' -> Sheet '{actual_sheet}' found")
# Try to read and show course info from this sheet
try:
module_df = pd.read_excel(excel_file_path, sheet_name=actual_sheet)
if 'cours' in module_df.columns and 'titre' in module_df.columns:
print(f" Courses in this module:")
for _, row in module_df.iterrows():
if pd.notna(row['cours']):
print(f" - {row['cours']}: {row.get('titre', 'N/A')}")
except Exception as e:
print(f" Error reading sheet: {e}")
else:
print(f" ✗ Module '{module}' -> No matching sheet found")
# Check Cours sheet
try:
cours_df = pd.read_excel(excel_file_path, sheet_name='Cours')
print(f"\nCours sheet - Total rows: {len(cours_df)}")
print(f"Cours sheet columns: {list(cours_df.columns)}")
if not cours_df.empty:
print("Course titles:")
for _, row in cours_df.iterrows():
cours_val = row.get('cours', 'N/A')
is_valid = is_valid_cours_number(cours_val)
status = "✓" if is_valid else "✗ (SKIPPED)"
print(f" Course {cours_val}: {row.get('titre', 'N/A')} {status}")
except Exception as e:
print(f"Error reading Cours sheet: {e}")
def test_excel_photo_columns(excel_file_path):
"""Test function to check what's actually in your Excel file"""
print("\n" + "=" * 60)
print("TESTING EXCEL PHOTO AND COMMENT COLUMNS")
print("=" * 60)
xls = pd.ExcelFile(excel_file_path)
first_sheet = xls.sheet_names[0]
df = pd.read_excel(excel_file_path, sheet_name=first_sheet, nrows=10)
print(f"\nColumns in sheet '{first_sheet}':")
for col in df.columns:
print(f" - {col}")
has_embedded_images = False
# Check for Choice commentaire
if 'Choice commentaire' in df.columns:
print("\n✓ Found 'Choice commentaire' column")
print("NOTE: Each row has ONE comment for ONE choice (A, B, C, D, or E)")
for idx, val in enumerate(df['Choice commentaire'].head()):
if pd.notna(val):
order = df['Order'].iloc[idx] if 'Order' in df.columns else '?'
print(f" Row {idx} (Choice {order}): {repr(str(val)[:100])}")
else:
print("\n✗ 'Choice commentaire' column NOT found")
# Check for Photo Q
if 'Photo Q' in df.columns:
print("\n✓ Found 'Photo Q' column")
for idx, val in enumerate(df['Photo Q'].head()):
if pd.notna(val):
val_str = str(val).strip()
if val_str.startswith('=DISPIMG'):
print(f" Row {idx}: EMBEDDED IMAGE (formula: {val_str[:50]}...)")
has_embedded_images = True
else:
exists = os.path.exists(val_str)
print(f" Row {idx}: '{val_str}' - Exists: {exists}")
else:
print("\n✗ 'Photo Q' column NOT found")
# Check for Photo C
if 'Photo C' in df.columns:
print("\n✓ Found 'Photo C' column")
for idx, val in enumerate(df['Photo C'].head()):
if pd.notna(val):
val_str = str(val).strip()
if val_str.startswith('=DISPIMG'):
print(f" Row {idx}: EMBEDDED IMAGE (formula: {val_str[:50]}...)")
has_embedded_images = True
else:
exists = os.path.exists(val_str)
print(f" Row {idx}: '{val_str}' - Exists: {exists}")
else:
print("\n✗ 'Photo C' column NOT found")
print("=" * 60 + "\n")
if has_embedded_images:
extract_embedded_images_info(excel_file_path)
def verify_photo_associations(question_photos):
"""Debug function to verify all photo-question associations"""
print("\n" + "=" * 60)
print("PHOTO-QUESTION ASSOCIATIONS")
print("=" * 60)
for q_num in sorted(question_photos.keys()):
photos = question_photos[q_num]
photo_q = photos.get('photo_q')
photo_c = photos.get('photo_c')
if photo_q or photo_c:
print(f"\nQuestion {q_num}:")
if photo_q:
exists = "✓" if os.path.exists(photo_q) else "✗"
print(f" Photo Q: {exists} {photo_q}")
if photo_c:
exists = "✓" if os.path.exists(photo_c) else "✗"
print(f" Photo C: {exists} {photo_c}")
print("=" * 60 + "\n")