tregu0458 commited on
Commit
09e6384
·
verified ·
1 Parent(s): 187a6e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import tempfile
3
  import requests
4
  from fastapi import FastAPI, HTTPException
5
  from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
@@ -21,13 +20,12 @@ def extract_text(url: str, language: str = "ja", length: int = 150000):
21
  elif url.endswith(".pdf"):
22
  # PDFの場合
23
  response = requests.get(url)
24
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
25
- temp_file.write(response.content)
26
- temp_file_path = temp_file.name
27
- loader = UnstructuredPDFLoader(temp_file_path)
28
  docs = loader.load()
29
  text_content = docs[0].page_content
30
- os.unlink(temp_file_path) # 一時ファイルを削除
31
  else:
32
  # それ以外の場合
33
  loader = WebBaseLoader(url)
 
1
  import os
 
2
  import requests
3
  from fastapi import FastAPI, HTTPException
4
  from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
 
20
  elif url.endswith(".pdf"):
21
  # PDFの場合
22
  response = requests.get(url)
23
+ pdf_file_path = os.path.join("/app/downloads", os.path.basename(url))
24
+ with open(pdf_file_path, "wb") as pdf_file:
25
+ pdf_file.write(response.content)
26
+ loader = UnstructuredPDFLoader(pdf_file_path)
27
  docs = loader.load()
28
  text_content = docs[0].page_content
 
29
  else:
30
  # それ以外の場合
31
  loader = WebBaseLoader(url)