tregu0458 commited on
Commit
d0a97bd
·
verified ·
1 Parent(s): 94c7fa7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -17,7 +17,7 @@ async def validate_token(token: str = Depends(oauth2_scheme)):
17
  raise HTTPException(status_code=401, detail="Invalid API Key")
18
 
19
  @app.post("/extract_text", tags=["Text Extraction"], dependencies=[Depends(validate_token)])
20
- def extract_text(url: str, language: str = "ja", length: int = 150000):
21
  try:
22
  if "youtube.com" in url or "youtu.be" in url:
23
  # YouTubeの場合
@@ -38,7 +38,11 @@ def extract_text(url: str, language: str = "ja", length: int = 150000):
38
  # loader = WebBaseLoader(url)
39
  # docs = loader.load()
40
  # text_content = docs[0].page_content
41
- text_content = str(fetch_and_convert_to_markdown(url))
 
 
 
 
42
 
43
  if len(text_content) < length:
44
  return {"text_content": text_content}
@@ -54,9 +58,9 @@ def extract_text(url: str, language: str = "ja", length: int = 150000):
54
  def fetch_and_convert_to_markdown(url):
55
  response = requests.get(url,timeout = 10)
56
  if response.status_code != 200:
57
- response = requests.get("https://r.jina.ai/"+ url)
58
- return response.text
59
- # return f"エラー: ステータスコード {response.status_code}"
60
 
61
  soup = BeautifulSoup(response.text, 'html.parser')
62
  markdown = ""
 
17
  raise HTTPException(status_code=401, detail="Invalid API Key")
18
 
19
  @app.post("/extract_text", tags=["Text Extraction"], dependencies=[Depends(validate_token)])
20
+ def extract_text(url: str, language: str = "ja", length: int = 150000,jina:bool = true):
21
  try:
22
  if "youtube.com" in url or "youtu.be" in url:
23
  # YouTubeの場合
 
38
  # loader = WebBaseLoader(url)
39
  # docs = loader.load()
40
  # text_content = docs[0].page_content
41
+ if jina:
42
+ response = requests.get("https://r.jina.ai/"+ url)
43
+ return response.text
44
+ else:
45
+ text_content = str(fetch_and_convert_to_markdown(url))
46
 
47
  if len(text_content) < length:
48
  return {"text_content": text_content}
 
58
  def fetch_and_convert_to_markdown(url):
59
  response = requests.get(url,timeout = 10)
60
  if response.status_code != 200:
61
+ # response = requests.get("https://r.jina.ai/"+ url)
62
+ # return response.text
63
+ return f"エラー: ステータスコード {response.status_code}"
64
 
65
  soup = BeautifulSoup(response.text, 'html.parser')
66
  markdown = ""