Yasu777 commited on
Commit
3cfefb9
·
verified ·
1 Parent(s): 621abfa

Update first.py

Browse files
Files changed (1) hide show
  1. first.py +2 -2
first.py CHANGED
@@ -72,8 +72,8 @@ def extract_text_from_url(url, output_file):
72
  p_tags = soup.find_all("p")
73
  output_text = ""
74
  for p in p_tags:
75
- if len(output_text) + len(p.get_text()) > 5000:
76
- break # 5000文字を超えたらループを終了
77
  output_text += p.get_text()
78
  output_text = output_text.replace("\n", "")
79
  output_text = output_text.replace('\xa0', ' ')
 
72
  p_tags = soup.find_all("p")
73
  output_text = ""
74
  for p in p_tags:
75
+ if len(output_text) + len(p.get_text()) > 7500:
76
+ break # 7500文字を超えたらループを終了
77
  output_text += p.get_text()
78
  output_text = output_text.replace("\n", "")
79
  output_text = output_text.replace('\xa0', ' ')