Upload app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,14 @@ def set_outline_level(paragraph, level: int = 0):
|
|
| 15 |
outline.set(qn('w:val'), str(level))
|
| 16 |
pPr.append(outline)
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def format_docx(file, chapter_keywords):
|
| 19 |
"""
|
| 20 |
處理上傳的 Word 文件
|
|
@@ -70,18 +78,12 @@ def format_docx(file, chapter_keywords):
|
|
| 70 |
heading.paragraph_format.line_spacing = 1.0
|
| 71 |
heading.paragraph_format.left_indent = Cm(0)
|
| 72 |
heading.paragraph_format.first_line_indent = Cm(0)
|
| 73 |
-
set_outline_level(heading, 0)
|
| 74 |
for run in heading.runs:
|
| 75 |
run.font.name = '新細明體'
|
| 76 |
run.font.size = Pt(16)
|
| 77 |
elif content_type == 'paragraph':
|
| 78 |
-
|
| 79 |
-
lines = []
|
| 80 |
-
for line in text.splitlines():
|
| 81 |
-
stripped = line.strip()
|
| 82 |
-
if stripped:
|
| 83 |
-
lines.append(stripped)
|
| 84 |
-
clean_text = ' '.join(lines)
|
| 85 |
para = doc.add_paragraph(clean_text)
|
| 86 |
para.paragraph_format.space_before = Cm(0)
|
| 87 |
para.paragraph_format.space_after = Cm(0)
|
|
|
|
| 15 |
outline.set(qn('w:val'), str(level))
|
| 16 |
pPr.append(outline)
|
| 17 |
|
| 18 |
+
def normalize_paragraph(text):
|
| 19 |
+
# 移除所有換行(包括
|
| 20 |
+
, Word 的換行符)
|
| 21 |
+
text = re.sub(r'[\r\n]+', ' ', text)
|
| 22 |
+
# 壓縮多餘空白
|
| 23 |
+
text = re.sub(r'\s{2,}', ' ', text)
|
| 24 |
+
return text.strip()
|
| 25 |
+
|
| 26 |
def format_docx(file, chapter_keywords):
|
| 27 |
"""
|
| 28 |
處理上傳的 Word 文件
|
|
|
|
| 78 |
heading.paragraph_format.line_spacing = 1.0
|
| 79 |
heading.paragraph_format.left_indent = Cm(0)
|
| 80 |
heading.paragraph_format.first_line_indent = Cm(0)
|
| 81 |
+
set_outline_level(heading, 0)
|
| 82 |
for run in heading.runs:
|
| 83 |
run.font.name = '新細明體'
|
| 84 |
run.font.size = Pt(16)
|
| 85 |
elif content_type == 'paragraph':
|
| 86 |
+
clean_text = normalize_paragraph(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
para = doc.add_paragraph(clean_text)
|
| 88 |
para.paragraph_format.space_before = Cm(0)
|
| 89 |
para.paragraph_format.space_after = Cm(0)
|