import gradio as gr import re from docx import Document from docx.shared import Cm, Pt from docx.enum.style import WD_STYLE_TYPE from docx.oxml import OxmlElement from docx.oxml.ns import qn import tempfile import os def set_outline_level(paragraph, level: int = 0): p = paragraph._p pPr = p.get_or_add_pPr() outline = OxmlElement('w:outlineLvl') outline.set(qn('w:val'), str(level)) pPr.append(outline) def normalize_paragraph(text): # 處理段落內換行符號與多餘空格 text = re.sub(r'[\r\n]+', ' ', text) text = re.sub(r'\s{2,}', ' ', text) return text.strip() def process_paragraphs_with_cleanup(doc, combined_pattern): """處理段落並清理多餘空行,實現 ^p^p -> ^p 效果""" content_list = [] prev_empty = False for para in doc.paragraphs: text = para.text.strip() if not text: # 只有當前一個段落不是空的時候,才保留這個空段落 if not prev_empty: content_list.append(('empty', '')) prev_empty = True elif re.search(combined_pattern, text): content_list.append(('heading', text)) prev_empty = False else: content_list.append(('paragraph', text)) prev_empty = False return content_list def rebuild_document(doc, content_list): """重建文檔內容""" for kind, text in content_list: if kind == 'heading': heading = doc.add_paragraph(text, style='Heading 1') heading.paragraph_format.page_break_before = True heading.paragraph_format.space_before = Cm(0) heading.paragraph_format.space_after = Cm(0.3) heading.paragraph_format.line_spacing = 1.0 heading.paragraph_format.left_indent = Cm(0) heading.paragraph_format.first_line_indent = Cm(0) set_outline_level(heading, 0) for run in heading.runs: run.font.name = '新細明體' run.font.size = Pt(16) elif kind == 'paragraph': clean_text = normalize_paragraph(text) para = doc.add_paragraph(clean_text) para.paragraph_format.space_before = Cm(0) para.paragraph_format.space_after = Cm(0) para.paragraph_format.line_spacing = 1.0 para.paragraph_format.left_indent = Cm(0) para.paragraph_format.first_line_indent = Cm(0.7) for run in para.runs: run.font.name = '新細明體' elif kind == 'empty': # 只有在真正需要保留空段落時才創建 doc.add_paragraph('') def format_docx(file, chapter_keywords, remove_empty_paragraphs=True): if file is None: return None, "請上傳一個 Word 文件" if not chapter_keywords.strip(): return None, "請輸入章節分段方式(例如:章,節,話)" try: doc = Document(file.name) # 確保有 Heading 1 樣式 styles = doc.styles if 'Heading 1' not in styles: heading_style = styles.add_style('Heading 1', WD_STYLE_TYPE.PARAGRAPH) heading_style.base_style = styles['Normal'] heading_style.font.bold = True heading_style.font.size = Pt(16) # 章節關鍵字模式 keywords = [k.strip() for k in chapter_keywords.split(',')] patterns = [f'第\s*[0-9一二三四五六七八九十百千萬壹貳參肆伍陸柒捌玖拾佰仟萬IVXLCDMivxlcdm]+\s*{k}' for k in keywords] combined_pattern = '|'.join(patterns) # 處理段落並清理多餘空行 content_list = process_paragraphs_with_cleanup(doc, combined_pattern) # 如果選擇移除空段落,過濾掉所有空段落 if remove_empty_paragraphs: content_list = [item for item in content_list if item[0] != 'empty'] # 清空原始內容 for p in doc.paragraphs: p._element.getparent().remove(p._element) # 重建段落 rebuild_document(doc, content_list) output_path = tempfile.mktemp(suffix='.docx') doc.save(output_path) empty_status = "已移除所有空段落" if remove_empty_paragraphs else "保留單個空段落" return output_path, f"✅ 處理完成!找到章節關鍵字:{', '.join(keywords)},{empty_status}" except Exception as e: return None, f"❌ 處理失敗:{str(e)}" def create_interface(): with gr.Blocks(title="Word 文件格式化工具", theme=gr.themes.Soft()) as demo: gr.HTML("""

📄 Word 文件格式化工具

自動格式化您的 Word 文件,設定章節樣式和分頁

""") with gr.Row(): with gr.Column(scale=1): file_input = gr.File(label="上傳 Word 文件 (.docx)", file_types=[".docx"], file_count="single") chapter_input = gr.Textbox(label="章節分段方式", placeholder="章,節,話", value="章,節,話") remove_empty_checkbox = gr.Checkbox( label="移除空段落", value=True, info="勾選時會移除所有空段落,取消勾選時會保留單個空段落(^p^p -> ^p)" ) process_btn = gr.Button("🔄 開始處理", variant="primary", size="lg") with gr.Column(scale=1): status_output = gr.Textbox(label="處理狀態", interactive=False, lines=3) download_output = gr.File(label="下載處理後的文件", interactive=False) process_btn.click( fn=format_docx, inputs=[file_input, chapter_input, remove_empty_checkbox], outputs=[download_output, status_output] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860, share=True)