tregu0458 commited on
Commit
1fd5f04
·
verified ·
1 Parent(s): 96a0453

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import requests
3
  from fastapi import FastAPI, HTTPException
4
  from langchain_community.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
 
5
 
6
  app = FastAPI()
7
 
@@ -19,14 +20,15 @@ def extract_text(url: str, language: str = "ja", length: int = 150000):
19
  text_content = str(docs)
20
  elif url.endswith(".pdf"):
21
  # PDFの場合
22
- response = requests.get(url)
23
- pdf_file_path = os.path.join("/app/downloads", os.path.basename(url))
24
- with open(pdf_file_path, "wb") as pdf_file:
25
- pdf_file.write(response.content)
26
  # loader = UnstructuredPDFLoader(pdf_file_path)
27
- # docs = loader.load()
28
- # text_content = docs[0].page_content
29
- text_content = pdf_file_path
 
30
  else:
31
  # それ以外の場合
32
  loader = WebBaseLoader(url)
 
2
  import requests
3
  from fastapi import FastAPI, HTTPException
4
  from langchain_community.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
5
+ from langchain_community.document_loaders import OnlinePDFLoader
6
 
7
  app = FastAPI()
8
 
 
20
  text_content = str(docs)
21
  elif url.endswith(".pdf"):
22
  # PDFの場合
23
+ # response = requests.get(url)
24
+ # pdf_file_path = os.path.join("/app/downloads", os.path.basename(url))
25
+ # with open(pdf_file_path, "wb") as pdf_file:
26
+ # pdf_file.write(response.content)
27
  # loader = UnstructuredPDFLoader(pdf_file_path)
28
+ loader = OnlinePDFLoader(url)
29
+ docs = loader.load()
30
+ text_content = docs[0].page_content
31
+ # text_content = pdf_file_path
32
  else:
33
  # それ以外の場合
34
  loader = WebBaseLoader(url)