File size: 7,073 Bytes
342973b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
Word Document Generator - Generate .docx files with formatting
"""

import io
from typing import Dict, List, Optional
from datetime import datetime
import logging

logger = logging.getLogger(__name__)


class WordGenerator:
    """
    Generate Word documents (.docx) with styles, formatting, and professional layouts.
    """

    def __init__(self):
        """Initialize Word generator."""
        self.font_name = "Calibri"
        self.font_size = 11
        self.line_spacing = 1.5

    def generate_word_doc(
        self,
        title: str,
        content: Dict[str, str],
        author: str = "AI Academic Suite",
        include_toc: bool = True,
        include_citations: bool = False,
        citations: List[str] = None,
    ) -> bytes:
        """
        Generate Word document.

        Args:
            title: Document title
            content: Dictionary of section titles and content
            author: Document author
            include_toc: Include table of contents
            include_citations: Include bibliography
            citations: List of citations

        Returns:
            Word document bytes
        """
        try:
            from docx import Document
            from docx.shared import Pt, Inches, RGBColor
            from docx.enum.text import WD_ALIGN_PARAGRAPH

            doc = Document()

            # Add title
            title_paragraph = doc.add_paragraph(title)
            title_paragraph.style = "Heading 1"
            title_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

            # Add author and date
            metadata = doc.add_paragraph(f"By {author}")
            metadata.alignment = WD_ALIGN_PARAGRAPH.CENTER
            metadata = doc.add_paragraph(datetime.now().strftime("%B %d, %Y"))
            metadata.alignment = WD_ALIGN_PARAGRAPH.CENTER
            doc.add_paragraph()

            # Add table of contents
            if include_toc:
                toc_paragraph = doc.add_paragraph("Table of Contents")
                toc_paragraph.style = "Heading 2"
                for i, section in enumerate(content.keys(), 1):
                    doc.add_paragraph(f"{i}. {section}", style="List Number")
                doc.add_page_break()

            # Add sections
            for section_title, section_content in content.items():
                section_para = doc.add_paragraph(section_title)
                section_para.style = "Heading 2"

                # Split content into paragraphs
                for para_text in section_content.split("\n\n"):
                    if para_text.strip():
                        p = doc.add_paragraph(para_text)
                        p.paragraph_format.line_spacing = self.line_spacing

            # Add bibliography
            if include_citations and citations:
                doc.add_page_break()
                ref_para = doc.add_paragraph("References")
                ref_para.style = "Heading 2"

                for citation in citations:
                    doc.add_paragraph(citation, style="List Bullet")

            # Save to bytes
            doc_buffer = io.BytesIO()
            doc.save(doc_buffer)
            doc_buffer.seek(0)

            return doc_buffer.getvalue()

        except ImportError:
            logger.warning("python-docx not available")
            return self._generate_word_fallback(title, content)

    def _generate_word_fallback(self, title: str, content: Dict[str, str]) -> bytes:
        """Fallback Word document generation."""
        try:
            # Create a minimal DOCX-like structure
            import zipfile
            from xml.etree import ElementTree as ET

            docx_content = {
                "[Content_Types].xml": '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
                '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
                '<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
                '<Default Extension="xml" ContentType="application/xml"/>'
                '<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
                "</Types>",
                "word/document.xml": f'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
                f'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
                f"<w:body><w:p><w:r><w:t>{title}</w:t></w:r></w:p>"
                f"{''.join(f'<w:p><w:r><w:t>{sec}: {cnt[:100]}</w:t></w:r></w:p>' for sec, cnt in content.items())}"
                f"</w:body></w:document>",
            }

            # Create DOCX file
            docx_buffer = io.BytesIO()
            with zipfile.ZipFile(docx_buffer, "w") as docx:
                for filename, content_str in docx_content.items():
                    docx.writestr(filename, content_str)

            docx_buffer.seek(0)
            return docx_buffer.getvalue()

        except:
            return b"Word generation failed"

    def add_styles(self, doc_bytes: bytes) -> bytes:
        """
        Add professional styles to Word document.

        Args:
            doc_bytes: Word document bytes

        Returns:
            Styled document bytes
        """
        try:
            from docx import Document
            import io

            doc = Document(io.BytesIO(doc_bytes))

            # Modify existing styles
            styles = doc.styles

            # Update Normal style
            if "Normal" in styles:
                style = styles["Normal"]
                style.font.size = Pt(12)
                style.font.name = "Calibri"

            # Update Heading styles
            for i in range(1, 6):
                heading_name = f"Heading {i}"
                if heading_name in styles:
                    style = styles[heading_name]
                    style.font.size = Pt(14 + (5 - i) * 2)
                    style.font.bold = True

            # Save modified document
            output_buffer = io.BytesIO()
            doc.save(output_buffer)
            output_buffer.seek(0)

            return output_buffer.getvalue()

        except:
            return doc_bytes  # Return original if styling fails

    def extract_text_from_docx(self, doc_bytes: bytes) -> str:
        """
        Extract text from Word document.

        Args:
            doc_bytes: Word document bytes

        Returns:
            Extracted text
        """
        try:
            from docx import Document
            import io

            doc = Document(io.BytesIO(doc_bytes))
            text = ""

            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"

            for table in doc.tables:
                for row in table.rows:
                    for cell in row.cells:
                        text += cell.text + "\t"
                    text += "\n"

            return text

        except:
            return "Document text extraction failed"