Spaces:
Paused
Paused
File size: 7,073 Bytes
342973b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
"""
Word Document Generator - Generate .docx files with formatting
"""
import io
from typing import Dict, List, Optional
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
class WordGenerator:
"""
Generate Word documents (.docx) with styles, formatting, and professional layouts.
"""
def __init__(self):
"""Initialize Word generator."""
self.font_name = "Calibri"
self.font_size = 11
self.line_spacing = 1.5
def generate_word_doc(
self,
title: str,
content: Dict[str, str],
author: str = "AI Academic Suite",
include_toc: bool = True,
include_citations: bool = False,
citations: List[str] = None,
) -> bytes:
"""
Generate Word document.
Args:
title: Document title
content: Dictionary of section titles and content
author: Document author
include_toc: Include table of contents
include_citations: Include bibliography
citations: List of citations
Returns:
Word document bytes
"""
try:
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
doc = Document()
# Add title
title_paragraph = doc.add_paragraph(title)
title_paragraph.style = "Heading 1"
title_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add author and date
metadata = doc.add_paragraph(f"By {author}")
metadata.alignment = WD_ALIGN_PARAGRAPH.CENTER
metadata = doc.add_paragraph(datetime.now().strftime("%B %d, %Y"))
metadata.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_paragraph()
# Add table of contents
if include_toc:
toc_paragraph = doc.add_paragraph("Table of Contents")
toc_paragraph.style = "Heading 2"
for i, section in enumerate(content.keys(), 1):
doc.add_paragraph(f"{i}. {section}", style="List Number")
doc.add_page_break()
# Add sections
for section_title, section_content in content.items():
section_para = doc.add_paragraph(section_title)
section_para.style = "Heading 2"
# Split content into paragraphs
for para_text in section_content.split("\n\n"):
if para_text.strip():
p = doc.add_paragraph(para_text)
p.paragraph_format.line_spacing = self.line_spacing
# Add bibliography
if include_citations and citations:
doc.add_page_break()
ref_para = doc.add_paragraph("References")
ref_para.style = "Heading 2"
for citation in citations:
doc.add_paragraph(citation, style="List Bullet")
# Save to bytes
doc_buffer = io.BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
return doc_buffer.getvalue()
except ImportError:
logger.warning("python-docx not available")
return self._generate_word_fallback(title, content)
def _generate_word_fallback(self, title: str, content: Dict[str, str]) -> bytes:
"""Fallback Word document generation."""
try:
# Create a minimal DOCX-like structure
import zipfile
from xml.etree import ElementTree as ET
docx_content = {
"[Content_Types].xml": '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
'<Default Extension="xml" ContentType="application/xml"/>'
'<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
"</Types>",
"word/document.xml": f'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
f'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
f"<w:body><w:p><w:r><w:t>{title}</w:t></w:r></w:p>"
f"{''.join(f'<w:p><w:r><w:t>{sec}: {cnt[:100]}</w:t></w:r></w:p>' for sec, cnt in content.items())}"
f"</w:body></w:document>",
}
# Create DOCX file
docx_buffer = io.BytesIO()
with zipfile.ZipFile(docx_buffer, "w") as docx:
for filename, content_str in docx_content.items():
docx.writestr(filename, content_str)
docx_buffer.seek(0)
return docx_buffer.getvalue()
except:
return b"Word generation failed"
def add_styles(self, doc_bytes: bytes) -> bytes:
"""
Add professional styles to Word document.
Args:
doc_bytes: Word document bytes
Returns:
Styled document bytes
"""
try:
from docx import Document
import io
doc = Document(io.BytesIO(doc_bytes))
# Modify existing styles
styles = doc.styles
# Update Normal style
if "Normal" in styles:
style = styles["Normal"]
style.font.size = Pt(12)
style.font.name = "Calibri"
# Update Heading styles
for i in range(1, 6):
heading_name = f"Heading {i}"
if heading_name in styles:
style = styles[heading_name]
style.font.size = Pt(14 + (5 - i) * 2)
style.font.bold = True
# Save modified document
output_buffer = io.BytesIO()
doc.save(output_buffer)
output_buffer.seek(0)
return output_buffer.getvalue()
except:
return doc_bytes # Return original if styling fails
def extract_text_from_docx(self, doc_bytes: bytes) -> str:
"""
Extract text from Word document.
Args:
doc_bytes: Word document bytes
Returns:
Extracted text
"""
try:
from docx import Document
import io
doc = Document(io.BytesIO(doc_bytes))
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
text += cell.text + "\t"
text += "\n"
return text
except:
return "Document text extraction failed"
|