Spaces:
Sleeping
Sleeping
Update first.py
Browse files
first.py
CHANGED
|
@@ -72,8 +72,8 @@ def extract_text_from_url(url, output_file):
|
|
| 72 |
p_tags = soup.find_all("p")
|
| 73 |
output_text = ""
|
| 74 |
for p in p_tags:
|
| 75 |
-
if len(output_text) + len(p.get_text()) >
|
| 76 |
-
break #
|
| 77 |
output_text += p.get_text()
|
| 78 |
output_text = output_text.replace("\n", "")
|
| 79 |
output_text = output_text.replace('\xa0', ' ')
|
|
|
|
| 72 |
p_tags = soup.find_all("p")
|
| 73 |
output_text = ""
|
| 74 |
for p in p_tags:
|
| 75 |
+
if len(output_text) + len(p.get_text()) > 7500:
|
| 76 |
+
break # 7500文字を超えたらループを終了
|
| 77 |
output_text += p.get_text()
|
| 78 |
output_text = output_text.replace("\n", "")
|
| 79 |
output_text = output_text.replace('\xa0', ' ')
|