Upload 3 files
Browse files- app.py +168 -0
- readme.md +48 -0
- requirements.txt +3 -0
app.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import re
|
| 3 |
+
from docx import Document
|
| 4 |
+
from docx.shared import Cm
|
| 5 |
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
| 6 |
+
from docx.oxml.ns import qn
|
| 7 |
+
from docx.oxml import OxmlElement
|
| 8 |
+
import tempfile
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
def format_docx(file, chapter_keywords):
|
| 12 |
+
"""
|
| 13 |
+
處理上傳的 Word 文件
|
| 14 |
+
"""
|
| 15 |
+
if file is None:
|
| 16 |
+
return None, "請上傳一個 Word 文件"
|
| 17 |
+
|
| 18 |
+
if not chapter_keywords.strip():
|
| 19 |
+
return None, "請輸入章節分段方式(例如:章,節,話)"
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
# 讀取上傳的文件
|
| 23 |
+
doc = Document(file.name)
|
| 24 |
+
|
| 25 |
+
# 解析章節關鍵字
|
| 26 |
+
keywords = [keyword.strip() for keyword in chapter_keywords.split(',')]
|
| 27 |
+
|
| 28 |
+
# 建立正規表示式模式
|
| 29 |
+
patterns = []
|
| 30 |
+
for keyword in keywords:
|
| 31 |
+
# 匹配 "第X章"、"第X節"、"第X話" 等格式
|
| 32 |
+
pattern = rf'第\s*\d+\s*{keyword}'
|
| 33 |
+
patterns.append(pattern)
|
| 34 |
+
|
| 35 |
+
# 合併所有模式
|
| 36 |
+
combined_pattern = '|'.join(patterns)
|
| 37 |
+
|
| 38 |
+
# 處理每個段落
|
| 39 |
+
for paragraph in doc.paragraphs:
|
| 40 |
+
# 1. 將樣式改為無間距
|
| 41 |
+
paragraph.style.paragraph_format.space_before = Cm(0)
|
| 42 |
+
paragraph.style.paragraph_format.space_after = Cm(0)
|
| 43 |
+
paragraph.style.paragraph_format.line_spacing = 1.0
|
| 44 |
+
|
| 45 |
+
# 2. 縮排與行距位移兩公分
|
| 46 |
+
paragraph.paragraph_format.left_indent = Cm(2)
|
| 47 |
+
paragraph.paragraph_format.first_line_indent = Cm(0)
|
| 48 |
+
|
| 49 |
+
# 3. 檢查是否為章節標題
|
| 50 |
+
if re.search(combined_pattern, paragraph.text):
|
| 51 |
+
# 設定為標題 1 樣式
|
| 52 |
+
paragraph.style = doc.styles['Heading 1']
|
| 53 |
+
|
| 54 |
+
# 設定粗體
|
| 55 |
+
for run in paragraph.runs:
|
| 56 |
+
run.font.bold = True
|
| 57 |
+
|
| 58 |
+
# 4. 在章節前分頁
|
| 59 |
+
paragraph.paragraph_format.page_break_before = True
|
| 60 |
+
|
| 61 |
+
# 重置章節標題的縮排
|
| 62 |
+
paragraph.paragraph_format.left_indent = Cm(0)
|
| 63 |
+
|
| 64 |
+
# 儲存處理後的文件
|
| 65 |
+
output_path = tempfile.mktemp(suffix='.docx')
|
| 66 |
+
doc.save(output_path)
|
| 67 |
+
|
| 68 |
+
return output_path, f"✅ 處理完成!找到章節關鍵字:{', '.join(keywords)}"
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return None, f"❌ 處理失敗:{str(e)}"
|
| 72 |
+
|
| 73 |
+
def create_interface():
|
| 74 |
+
"""
|
| 75 |
+
建立 Gradio 介面
|
| 76 |
+
"""
|
| 77 |
+
with gr.Blocks(title="Word 文件格式化工具", theme=gr.themes.Soft()) as demo:
|
| 78 |
+
gr.HTML("""
|
| 79 |
+
<div style="text-align: center; margin-bottom: 20px;">
|
| 80 |
+
<h1>📄 Word 文件格式化工具</h1>
|
| 81 |
+
<p>自動格式化您的 Word 文件,設定章節樣式和分頁</p>
|
| 82 |
+
</div>
|
| 83 |
+
""")
|
| 84 |
+
|
| 85 |
+
with gr.Row():
|
| 86 |
+
with gr.Column(scale=1):
|
| 87 |
+
# 檔案上傳
|
| 88 |
+
file_input = gr.File(
|
| 89 |
+
label="上傳 Word 文件 (.docx)",
|
| 90 |
+
file_types=[".docx"],
|
| 91 |
+
file_count="single"
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# 章節關鍵字輸入
|
| 95 |
+
chapter_input = gr.Textbox(
|
| 96 |
+
label="章節分段方式",
|
| 97 |
+
placeholder="章,節,話",
|
| 98 |
+
info="請輸入章節關鍵字,用逗號分隔(例如:章,節,話)",
|
| 99 |
+
value="章,節,話"
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# 處理按鈕
|
| 103 |
+
process_btn = gr.Button("🔄 開始處理", variant="primary", size="lg")
|
| 104 |
+
|
| 105 |
+
with gr.Column(scale=1):
|
| 106 |
+
# 狀態顯示
|
| 107 |
+
status_output = gr.Textbox(
|
| 108 |
+
label="處理狀態",
|
| 109 |
+
interactive=False,
|
| 110 |
+
lines=3
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# 下載連結
|
| 114 |
+
download_output = gr.File(
|
| 115 |
+
label="下載處理後的文件",
|
| 116 |
+
interactive=False
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# 功能說明
|
| 120 |
+
gr.HTML("""
|
| 121 |
+
<div style="margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
|
| 122 |
+
<h3>🛠️ 處理功能說明</h3>
|
| 123 |
+
<ul>
|
| 124 |
+
<li><strong>樣式調整:</strong>將所有段落設定為無間距樣式</li>
|
| 125 |
+
<li><strong>縮排設定:</strong>所有段落左縮排 2 公分</li>
|
| 126 |
+
<li><strong>章節識別:</strong>自動識別章節標題(如「第一章」、「第二節」等)</li>
|
| 127 |
+
<li><strong>標題格式:</strong>章節標題設定為粗體標題 1 樣式</li>
|
| 128 |
+
<li><strong>分頁設定:</strong>每個章節前自動分頁</li>
|
| 129 |
+
</ul>
|
| 130 |
+
</div>
|
| 131 |
+
""")
|
| 132 |
+
|
| 133 |
+
# 使用範例
|
| 134 |
+
gr.HTML("""
|
| 135 |
+
<div style="margin-top: 20px; padding: 20px; background-color: #e8f4f8; border-radius: 10px;">
|
| 136 |
+
<h3>💡 使用範例</h3>
|
| 137 |
+
<p><strong>章節關鍵字設定:</strong></p>
|
| 138 |
+
<ul>
|
| 139 |
+
<li>小說:<code>章,節</code></li>
|
| 140 |
+
<li>漫畫:<code>話,回</code></li>
|
| 141 |
+
<li>論文:<code>章,節,段</code></li>
|
| 142 |
+
</ul>
|
| 143 |
+
<p><strong>支援的章節格式:</strong></p>
|
| 144 |
+
<ul>
|
| 145 |
+
<li>第一章、第二章、第三章...</li>
|
| 146 |
+
<li>第1話、第2話、第3話...</li>
|
| 147 |
+
<li>第一節、第二節、第三節...</li>
|
| 148 |
+
</ul>
|
| 149 |
+
</div>
|
| 150 |
+
""")
|
| 151 |
+
|
| 152 |
+
# 綁定事件
|
| 153 |
+
process_btn.click(
|
| 154 |
+
fn=format_docx,
|
| 155 |
+
inputs=[file_input, chapter_input],
|
| 156 |
+
outputs=[download_output, status_output]
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
return demo
|
| 160 |
+
|
| 161 |
+
# 建立並啟動介面
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
demo = create_interface()
|
| 164 |
+
demo.launch(
|
| 165 |
+
server_name="0.0.0.0",
|
| 166 |
+
server_port=7860,
|
| 167 |
+
share=True
|
| 168 |
+
)
|
readme.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📄 Word 文件格式化工具
|
| 2 |
+
|
| 3 |
+
這是一個基於 Gradio 的 Word 文件格式化工具,可以自動處理 .docx 文件的格式設定。
|
| 4 |
+
|
| 5 |
+
## 🚀 功能特色
|
| 6 |
+
|
| 7 |
+
- **自動格式化**:一鍵處理整個文件格式
|
| 8 |
+
- **章節識別**:智能識別章節標題(支援自定義關鍵字)
|
| 9 |
+
- **樣式統一**:設定統一的段落樣式和縮排
|
| 10 |
+
- **自動分頁**:章節前自動分頁
|
| 11 |
+
- **簡單易用**:友好的網頁介面
|
| 12 |
+
|
| 13 |
+
## 📋 處理功能
|
| 14 |
+
|
| 15 |
+
1. **樣式調整**:將所有段落設定為無間距樣式
|
| 16 |
+
2. **縮排設定**:所有段落左縮排 2 公分
|
| 17 |
+
3. **章節識別**:使用正規表示式自動識別章節標題
|
| 18 |
+
4. **標題格式**:章節標題設定為粗體標題 1 樣式
|
| 19 |
+
5. **分頁設定**:每個章節前自動分頁
|
| 20 |
+
|
| 21 |
+
## 🛠️ 使用方法
|
| 22 |
+
|
| 23 |
+
1. 上傳 .docx 文件
|
| 24 |
+
2. 輸入章節關鍵字(如:章,節,話)
|
| 25 |
+
3. 點擊「開始處理」
|
| 26 |
+
4. 下載處理後的文件
|
| 27 |
+
|
| 28 |
+
## 📖 章節格式支援
|
| 29 |
+
|
| 30 |
+
- 第一章、第二章、第三章...
|
| 31 |
+
- 第1話、第2話、第3話...
|
| 32 |
+
- 第一節、第二節、第三節...
|
| 33 |
+
|
| 34 |
+
## 💡 使用範例
|
| 35 |
+
|
| 36 |
+
- **小說**:`章,節`
|
| 37 |
+
- **漫畫**:`話,回`
|
| 38 |
+
- **論文**:`章,節,段`
|
| 39 |
+
|
| 40 |
+
## 🔧 技術實現
|
| 41 |
+
|
| 42 |
+
- **Gradio**:網頁介面框架
|
| 43 |
+
- **python-docx**:Word 文件處理
|
| 44 |
+
- **正規表示式**:章節標題識別
|
| 45 |
+
|
| 46 |
+
## 📄 授權
|
| 47 |
+
|
| 48 |
+
本專案採用 MIT 授權協議。
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=5.0.0
|
| 2 |
+
python-docx>=1.0.0
|
| 3 |
+
lxml>=4.9.0
|