dseditor commited on
Commit
98b9000
·
verified ·
1 Parent(s): d867e16

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -7
app.py CHANGED
@@ -3,6 +3,7 @@ import re
3
  from docx import Document
4
  from docx.shared import Cm, Pt
5
  from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
 
6
  from docx.oxml.ns import qn
7
  from docx.oxml import OxmlElement
8
  import tempfile
@@ -36,8 +37,32 @@ def format_docx(file, chapter_keywords):
36
  # 合併所有模式
37
  combined_pattern = '|'.join(patterns)
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # 處理每個段落
40
  for paragraph in doc.paragraphs:
 
 
 
 
41
  # 1. 將樣式改為無間距
42
  paragraph.style.paragraph_format.space_before = Cm(0)
43
  paragraph.style.paragraph_format.space_after = Cm(0)
@@ -49,6 +74,9 @@ def format_docx(file, chapter_keywords):
49
 
50
  # 3. 檢查是否為章節標題
51
  if re.search(combined_pattern, paragraph.text):
 
 
 
52
  # 嘗試設定標題 1 樣式(處理不同語言版本的樣式名稱)
53
  try:
54
  # 英文版本
@@ -62,16 +90,21 @@ def format_docx(file, chapter_keywords):
62
  # 其他可能的名稱
63
  paragraph.style = doc.styles['Heading1']
64
  except KeyError:
65
- # 如果都找不到,手動設定樣式
66
- paragraph.style = doc.styles['Normal']
67
- # 手動設定標題樣式
68
- for run in paragraph.runs:
69
- run.font.bold = True
70
- run.font.size = Pt(16) # 16pt
 
 
 
 
71
 
72
- # 確保粗體設定
73
  for run in paragraph.runs:
74
  run.font.bold = True
 
75
 
76
  # 4. 在章節前分頁
77
  paragraph.paragraph_format.page_break_before = True
@@ -141,6 +174,7 @@ def create_interface():
141
  <h3>🛠️ 處理功能說明</h3>
142
  <ul>
143
  <li><strong>樣式調整:</strong>將所有段落設定為無間距樣式</li>
 
144
  <li><strong>首行縮排:</strong>每個段落首行縮排 2 個字元位置</li>
145
  <li><strong>章節識別:</strong>自動識別章節標題(如「第一章」、「第二節」等)</li>
146
  <li><strong>標題格式:</strong>章節標題設定為粗體標題 1 樣式(無縮排)</li>
 
3
  from docx import Document
4
  from docx.shared import Cm, Pt
5
  from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
6
+ from docx.enum.style import WD_STYLE_TYPE
7
  from docx.oxml.ns import qn
8
  from docx.oxml import OxmlElement
9
  import tempfile
 
37
  # 合併所有模式
38
  combined_pattern = '|'.join(patterns)
39
 
40
+ # 先處理多餘的換行:標記要保留的段落
41
+ paragraphs_to_keep = []
42
+ previous_empty = False
43
+
44
+ for paragraph in doc.paragraphs:
45
+ current_empty = len(paragraph.text.strip()) == 0
46
+
47
+ # 如果當前段落不是空的,或者是第一個空段落,則保留
48
+ if not current_empty or not previous_empty:
49
+ paragraphs_to_keep.append(paragraph)
50
+
51
+ previous_empty = current_empty
52
+
53
+ # 刪除不需要的段落(從後往前刪除避免索引問題)
54
+ all_paragraphs = list(doc.paragraphs)
55
+ for paragraph in reversed(all_paragraphs):
56
+ if paragraph not in paragraphs_to_keep:
57
+ p = paragraph._element
58
+ p.getparent().remove(p)
59
+
60
  # 處理每個段落
61
  for paragraph in doc.paragraphs:
62
+ # 跳過空段落
63
+ if len(paragraph.text.strip()) == 0:
64
+ continue
65
+
66
  # 1. 將樣式改為無間距
67
  paragraph.style.paragraph_format.space_before = Cm(0)
68
  paragraph.style.paragraph_format.space_after = Cm(0)
 
74
 
75
  # 3. 檢查是否為章節標題
76
  if re.search(combined_pattern, paragraph.text):
77
+ # 保存原始文字
78
+ original_text = paragraph.text
79
+
80
  # 嘗試設定標題 1 樣式(處理不同語言版本的樣式名稱)
81
  try:
82
  # 英文版本
 
90
  # 其他可能的名稱
91
  paragraph.style = doc.styles['Heading1']
92
  except KeyError:
93
+ # 如果都找不到,創建自定義標題樣式
94
+ from docx.enum.style import WD_STYLE_TYPE
95
+ try:
96
+ heading_style = doc.styles.add_style('CustomHeading1', WD_STYLE_TYPE.PARAGRAPH)
97
+ heading_style.font.bold = True
98
+ heading_style.font.size = Pt(16)
99
+ paragraph.style = heading_style
100
+ except:
101
+ # 最後手段:直接設定格式
102
+ paragraph.style = doc.styles['Normal']
103
 
104
+ # 確保粗體設定和字體大小
105
  for run in paragraph.runs:
106
  run.font.bold = True
107
+ run.font.size = Pt(16)
108
 
109
  # 4. 在章節前分頁
110
  paragraph.paragraph_format.page_break_before = True
 
174
  <h3>🛠️ 處理功能說明</h3>
175
  <ul>
176
  <li><strong>樣式調整:</strong>將所有段落設定為無間距樣式</li>
177
+ <li><strong>刪除多餘換行:</strong>自動合併連續的空行,避免過多空白</li>
178
  <li><strong>首行縮排:</strong>每個段落首行縮排 2 個字元位置</li>
179
  <li><strong>章節識別:</strong>自動識別章節標題(如「第一章」、「第二節」等)</li>
180
  <li><strong>標題格式:</strong>章節標題設定為粗體標題 1 樣式(無縮排)</li>