""" Word Document Generator - Generate .docx files with formatting """ import io from typing import Dict, List, Optional from datetime import datetime import logging logger = logging.getLogger(__name__) class WordGenerator: """ Generate Word documents (.docx) with styles, formatting, and professional layouts. """ def __init__(self): """Initialize Word generator.""" self.font_name = "Calibri" self.font_size = 11 self.line_spacing = 1.5 def generate_word_doc( self, title: str, content: Dict[str, str], author: str = "AI Academic Suite", include_toc: bool = True, include_citations: bool = False, citations: List[str] = None, ) -> bytes: """ Generate Word document. Args: title: Document title content: Dictionary of section titles and content author: Document author include_toc: Include table of contents include_citations: Include bibliography citations: List of citations Returns: Word document bytes """ try: from docx import Document from docx.shared import Pt, Inches, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH doc = Document() # Add title title_paragraph = doc.add_paragraph(title) title_paragraph.style = "Heading 1" title_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # Add author and date metadata = doc.add_paragraph(f"By {author}") metadata.alignment = WD_ALIGN_PARAGRAPH.CENTER metadata = doc.add_paragraph(datetime.now().strftime("%B %d, %Y")) metadata.alignment = WD_ALIGN_PARAGRAPH.CENTER doc.add_paragraph() # Add table of contents if include_toc: toc_paragraph = doc.add_paragraph("Table of Contents") toc_paragraph.style = "Heading 2" for i, section in enumerate(content.keys(), 1): doc.add_paragraph(f"{i}. {section}", style="List Number") doc.add_page_break() # Add sections for section_title, section_content in content.items(): section_para = doc.add_paragraph(section_title) section_para.style = "Heading 2" # Split content into paragraphs for para_text in section_content.split("\n\n"): if para_text.strip(): p = doc.add_paragraph(para_text) p.paragraph_format.line_spacing = self.line_spacing # Add bibliography if include_citations and citations: doc.add_page_break() ref_para = doc.add_paragraph("References") ref_para.style = "Heading 2" for citation in citations: doc.add_paragraph(citation, style="List Bullet") # Save to bytes doc_buffer = io.BytesIO() doc.save(doc_buffer) doc_buffer.seek(0) return doc_buffer.getvalue() except ImportError: logger.warning("python-docx not available") return self._generate_word_fallback(title, content) def _generate_word_fallback(self, title: str, content: Dict[str, str]) -> bytes: """Fallback Word document generation.""" try: # Create a minimal DOCX-like structure import zipfile from xml.etree import ElementTree as ET docx_content = { "[Content_Types].xml": '' '' '' '' '' "", "word/document.xml": f'' f'' f"{title}" f"{''.join(f'{sec}: {cnt[:100]}' for sec, cnt in content.items())}" f"", } # Create DOCX file docx_buffer = io.BytesIO() with zipfile.ZipFile(docx_buffer, "w") as docx: for filename, content_str in docx_content.items(): docx.writestr(filename, content_str) docx_buffer.seek(0) return docx_buffer.getvalue() except: return b"Word generation failed" def add_styles(self, doc_bytes: bytes) -> bytes: """ Add professional styles to Word document. Args: doc_bytes: Word document bytes Returns: Styled document bytes """ try: from docx import Document import io doc = Document(io.BytesIO(doc_bytes)) # Modify existing styles styles = doc.styles # Update Normal style if "Normal" in styles: style = styles["Normal"] style.font.size = Pt(12) style.font.name = "Calibri" # Update Heading styles for i in range(1, 6): heading_name = f"Heading {i}" if heading_name in styles: style = styles[heading_name] style.font.size = Pt(14 + (5 - i) * 2) style.font.bold = True # Save modified document output_buffer = io.BytesIO() doc.save(output_buffer) output_buffer.seek(0) return output_buffer.getvalue() except: return doc_bytes # Return original if styling fails def extract_text_from_docx(self, doc_bytes: bytes) -> str: """ Extract text from Word document. Args: doc_bytes: Word document bytes Returns: Extracted text """ try: from docx import Document import io doc = Document(io.BytesIO(doc_bytes)) text = "" for paragraph in doc.paragraphs: text += paragraph.text + "\n" for table in doc.tables: for row in table.rows: for cell in row.cells: text += cell.text + "\t" text += "\n" return text except: return "Document text extraction failed"