Upload app.py
Browse files
app.py
CHANGED
|
@@ -2,9 +2,19 @@ import gradio as gr
|
|
| 2 |
import re
|
| 3 |
from docx import Document
|
| 4 |
from docx.shared import Cm, Pt
|
|
|
|
|
|
|
|
|
|
| 5 |
import tempfile
|
| 6 |
import os
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def format_docx(file, chapter_keywords):
|
| 9 |
"""
|
| 10 |
處理上傳的 Word 文件
|
|
@@ -19,7 +29,6 @@ def format_docx(file, chapter_keywords):
|
|
| 19 |
doc = Document(file.name)
|
| 20 |
|
| 21 |
# 確保有 Heading 1 樣式
|
| 22 |
-
from docx.enum.style import WD_STYLE_TYPE
|
| 23 |
styles = doc.styles
|
| 24 |
if 'Heading 1' not in styles:
|
| 25 |
heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH)
|
|
@@ -61,11 +70,17 @@ def format_docx(file, chapter_keywords):
|
|
| 61 |
heading.paragraph_format.line_spacing = 1.0
|
| 62 |
heading.paragraph_format.left_indent = Cm(0)
|
| 63 |
heading.paragraph_format.first_line_indent = Cm(0)
|
|
|
|
| 64 |
for run in heading.runs:
|
| 65 |
run.font.name = '新細明體'
|
| 66 |
run.font.size = Pt(16)
|
| 67 |
elif content_type == 'paragraph':
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
clean_text = ' '.join(lines)
|
| 70 |
para = doc.add_paragraph(clean_text)
|
| 71 |
para.paragraph_format.space_before = Cm(0)
|
|
@@ -118,38 +133,6 @@ def create_interface():
|
|
| 118 |
interactive=False
|
| 119 |
)
|
| 120 |
|
| 121 |
-
gr.HTML("""<div style="margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
|
| 122 |
-
<h3>🛠️ 處理功能說明</h3>
|
| 123 |
-
<ul>
|
| 124 |
-
<li><strong>樣式調整:</strong>將所有段落設定為無間距樣式</li>
|
| 125 |
-
<li><strong>刪除多餘換行:</strong>自動合併連續的空行,避免過多空白</li>
|
| 126 |
-
<li><strong>首行縮排:</strong>每個段落首行縮排 2 個字元位置</li>
|
| 127 |
-
<li><strong>章節識別:</strong>自動識別章節標題(如「第一章」、「第二節」等)</li>
|
| 128 |
-
<li><strong>標題格式:</strong>章節標題設定為粗體標題 1 樣式(無縮排)</li>
|
| 129 |
-
<li><strong>分頁設定:</strong>每個章節前自動分頁</li>
|
| 130 |
-
</ul>
|
| 131 |
-
</div>""")
|
| 132 |
-
|
| 133 |
-
gr.HTML("""<div style="margin-top: 20px; padding: 20px; background-color: #e8f4f8; border-radius: 10px;">
|
| 134 |
-
<h3>💡 使用範例</h3>
|
| 135 |
-
<p><strong>章節關鍵字設定:</strong></p>
|
| 136 |
-
<ul>
|
| 137 |
-
<li>小說:<code>章,節</code></li>
|
| 138 |
-
<li>漫畫:<code>話,回</code></li>
|
| 139 |
-
<li>論文:<code>章,節,段</code></li>
|
| 140 |
-
<li>劇本:<code>幕,場</code></li>
|
| 141 |
-
</ul>
|
| 142 |
-
<p><strong>支援的章節格式:</strong></p>
|
| 143 |
-
<ul>
|
| 144 |
-
<li>第一章、第二章、第三章...</li>
|
| 145 |
-
<li>第1話、第2話、第3話...</li>
|
| 146 |
-
<li>第一節、第二節、第三節...</li>
|
| 147 |
-
<li>第一幕、第二幕、第三幕...</li>
|
| 148 |
-
<li>第I章、第II章、第III章...</li>
|
| 149 |
-
<li>第二十三章、第三十四節...</li>
|
| 150 |
-
</ul>
|
| 151 |
-
</div>""")
|
| 152 |
-
|
| 153 |
process_btn.click(
|
| 154 |
fn=format_docx,
|
| 155 |
inputs=[file_input, chapter_input],
|
|
|
|
| 2 |
import re
|
| 3 |
from docx import Document
|
| 4 |
from docx.shared import Cm, Pt
|
| 5 |
+
from docx.enum.style import WD_STYLE_TYPE
|
| 6 |
+
from docx.oxml import OxmlElement
|
| 7 |
+
from docx.oxml.ns import qn
|
| 8 |
import tempfile
|
| 9 |
import os
|
| 10 |
|
| 11 |
+
def set_outline_level(paragraph, level: int = 0):
|
| 12 |
+
p = paragraph._p
|
| 13 |
+
pPr = p.get_or_add_pPr()
|
| 14 |
+
outline = OxmlElement('w:outlineLvl')
|
| 15 |
+
outline.set(qn('w:val'), str(level))
|
| 16 |
+
pPr.append(outline)
|
| 17 |
+
|
| 18 |
def format_docx(file, chapter_keywords):
|
| 19 |
"""
|
| 20 |
處理上傳的 Word 文件
|
|
|
|
| 29 |
doc = Document(file.name)
|
| 30 |
|
| 31 |
# 確保有 Heading 1 樣式
|
|
|
|
| 32 |
styles = doc.styles
|
| 33 |
if 'Heading 1' not in styles:
|
| 34 |
heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH)
|
|
|
|
| 70 |
heading.paragraph_format.line_spacing = 1.0
|
| 71 |
heading.paragraph_format.left_indent = Cm(0)
|
| 72 |
heading.paragraph_format.first_line_indent = Cm(0)
|
| 73 |
+
set_outline_level(heading, 0) # 強制指定為層級 1
|
| 74 |
for run in heading.runs:
|
| 75 |
run.font.name = '新細明體'
|
| 76 |
run.font.size = Pt(16)
|
| 77 |
elif content_type == 'paragraph':
|
| 78 |
+
# 處理多行段落:分行再合併為單一段落
|
| 79 |
+
lines = []
|
| 80 |
+
for line in text.splitlines():
|
| 81 |
+
stripped = line.strip()
|
| 82 |
+
if stripped:
|
| 83 |
+
lines.append(stripped)
|
| 84 |
clean_text = ' '.join(lines)
|
| 85 |
para = doc.add_paragraph(clean_text)
|
| 86 |
para.paragraph_format.space_before = Cm(0)
|
|
|
|
| 133 |
interactive=False
|
| 134 |
)
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
process_btn.click(
|
| 137 |
fn=format_docx,
|
| 138 |
inputs=[file_input, chapter_input],
|