URL2Text2

Build error

tregu0458 commited on May 12, 2024

Commit

2ee51ff

verified ·

1 Parent(s): 8f15a68

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 from fastapi import FastAPI, HTTPException
 from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
@@ -17,9 +20,14 @@ def extract_text(url: str, language: str = "ja", length: int = 150000):
             text_content = str(docs)
         elif url.endswith(".pdf"):
             # PDFの場合
-            loader = UnstructuredPDFLoader(url)
             docs = loader.load()
             text_content = docs[0].page_content
         else:
             # それ以外の場合
             loader = WebBaseLoader(url)

+import os
+import tempfile
+import requests
 from fastapi import FastAPI, HTTPException
 from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
             text_content = str(docs)
         elif url.endswith(".pdf"):
             # PDFの場合
+            response = requests.get(url)
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                temp_file.write(response.content)
+                temp_file_path = temp_file.name
+            loader = UnstructuredPDFLoader(temp_file_path)
             docs = loader.load()
             text_content = docs[0].page_content
+            os.unlink(temp_file_path)  # 一時ファイルを削除
         else:
             # それ以外の場合
             loader = WebBaseLoader(url)