tregu0458 commited on
Commit
2ee51ff
·
verified ·
1 Parent(s): 8f15a68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
3
 
@@ -17,9 +20,14 @@ def extract_text(url: str, language: str = "ja", length: int = 150000):
17
  text_content = str(docs)
18
  elif url.endswith(".pdf"):
19
  # PDFの場合
20
- loader = UnstructuredPDFLoader(url)
 
 
 
 
21
  docs = loader.load()
22
  text_content = docs[0].page_content
 
23
  else:
24
  # それ以外の場合
25
  loader = WebBaseLoader(url)
 
1
+ import os
2
+ import tempfile
3
+ import requests
4
  from fastapi import FastAPI, HTTPException
5
  from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
6
 
 
20
  text_content = str(docs)
21
  elif url.endswith(".pdf"):
22
  # PDFの場合
23
+ response = requests.get(url)
24
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
25
+ temp_file.write(response.content)
26
+ temp_file_path = temp_file.name
27
+ loader = UnstructuredPDFLoader(temp_file_path)
28
  docs = loader.load()
29
  text_content = docs[0].page_content
30
+ os.unlink(temp_file_path) # 一時ファイルを削除
31
  else:
32
  # それ以外の場合
33
  loader = WebBaseLoader(url)