Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -117,9 +117,39 @@ excluding the title, author name, source information, chapter number, annotation
|
|
| 117 |
return resp.choices[0].message.content.strip()
|
| 118 |
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
# ======================================================
|
|
|
|
| 117 |
return resp.choices[0].message.content.strip()
|
| 118 |
|
| 119 |
|
| 120 |
+
import re
|
| 121 |
+
|
| 122 |
+
def split_pages(text, max_words=300):
|
| 123 |
+
"""
|
| 124 |
+
文単位でページを分割する。
|
| 125 |
+
- 文の途中でページを分割しない
|
| 126 |
+
- max_words の上限を超えないようにする
|
| 127 |
+
"""
|
| 128 |
+
# 文に分割(. ? ! のあとに改行やスペースが続くパターン)
|
| 129 |
+
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
| 130 |
+
pages = []
|
| 131 |
+
current_page = []
|
| 132 |
+
current_word_count = 0
|
| 133 |
+
|
| 134 |
+
for sentence in sentences:
|
| 135 |
+
words = sentence.split()
|
| 136 |
+
sentence_len = len(words)
|
| 137 |
+
|
| 138 |
+
# 次の文を追加すると max_words を超える場合 → 新しいページを作る
|
| 139 |
+
if current_word_count + sentence_len > max_words:
|
| 140 |
+
if current_page:
|
| 141 |
+
pages.append(" ".join(current_page))
|
| 142 |
+
current_page = [sentence]
|
| 143 |
+
current_word_count = sentence_len
|
| 144 |
+
else:
|
| 145 |
+
current_page.append(sentence)
|
| 146 |
+
current_word_count += sentence_len
|
| 147 |
+
|
| 148 |
+
# 最後のページを追加
|
| 149 |
+
if current_page:
|
| 150 |
+
pages.append(" ".join(current_page))
|
| 151 |
+
|
| 152 |
+
return pages or [text]
|
| 153 |
|
| 154 |
|
| 155 |
# ======================================================
|