tregu0458 commited on
Commit
6fabfc5
·
verified ·
1 Parent(s): e82f9ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -44,7 +44,8 @@ def extract_text(url: str, language: str = "ja", length: int = 150000,use_jina:b
44
  response = requests.get("https://r.jina.ai/"+ url)
45
  text_content = response.text
46
  else:
47
- text_content = str(fetch_and_convert_to_markdown(requests.get(url,timeout = 10)))
 
48
 
49
  if len(text_content) < length:
50
  return {"text_content": text_content}
@@ -58,9 +59,10 @@ def extract_text(url: str, language: str = "ja", length: int = 150000,use_jina:b
58
  return {"message": error_msg}
59
 
60
  @app.post("/httpx_bs", tags=["Text Extraction and beautiful soup"], dependencies=[Depends(validate_token)])
61
- def extract_text(url: str):
62
  try:
63
- text_content = str(fetch_and_convert_to_markdown(httpx.get(url)))
 
64
 
65
  if len(text_content) < length:
66
  return {"text_content": text_content}
@@ -73,11 +75,11 @@ def extract_text(url: str):
73
  error_msg = str(e)
74
  return {"message": error_msg}
75
 
76
- def fetch_and_convert_to_markdown(response):
77
- if response.status_code != 200:
78
- return f"エラー: ステータスコード {response.status_code}"
79
 
80
- soup = BeautifulSoup(response.text, 'html.parser')
81
  markdown = ""
82
 
83
  # タイトル
 
44
  response = requests.get("https://r.jina.ai/"+ url)
45
  text_content = response.text
46
  else:
47
+ response = requests.get(url,timeout = 10)
48
+ text_content = str(convert_to_markdown(response.text,url))
49
 
50
  if len(text_content) < length:
51
  return {"text_content": text_content}
 
59
  return {"message": error_msg}
60
 
61
  @app.post("/httpx_bs", tags=["Text Extraction and beautiful soup"], dependencies=[Depends(validate_token)])
62
+ def extract_text(url: str, length: int = 150000):
63
  try:
64
+ response = httpx.get(url)
65
+ text_content = str(convert_to_markdown(response,url))
66
 
67
  if len(text_content) < length:
68
  return {"text_content": text_content}
 
75
  error_msg = str(e)
76
  return {"message": error_msg}
77
 
78
+ def convert_to_markdown(response_text,url):
79
+ # if response.status_code != 200:
80
+ # return f"エラー: ステータスコード {response.status_code}"
81
 
82
+ soup = BeautifulSoup(response_text, 'html.parser')
83
  markdown = ""
84
 
85
  # タイトル