tregu0458 commited on
Commit
7a0c4e5
·
verified ·
1 Parent(s): 451bc87

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from langchain.document_loaders import YoutubeLoader, UnstructuredPDFLoader, WebBaseLoader
3
+
4
+ app = FastAPI()
5
+
6
+ @app.post("/extract_text", tags=["Text Extraction"])
7
+ def extract_text(url: str, language: str = "ja", length: int = 150000):
8
+ try:
9
+ if "youtube.com" in url or "youtu.be" in url:
10
+ # YouTubeの場合
11
+ loader = YoutubeLoader.from_youtube_url(
12
+ youtube_url=url,
13
+ add_video_info=True,
14
+ language=[language],
15
+ )
16
+ docs = loader.load()
17
+ text_content = str(docs)
18
+ elif url.endswith(".pdf"):
19
+ # PDFの場合
20
+ loader = UnstructuredPDFLoader(url)
21
+ docs = loader.load()
22
+ text_content = docs[0].page_content
23
+ else:
24
+ # それ以外の場合
25
+ loader = WebBaseLoader(url)
26
+ docs = loader.load()
27
+ text_content = docs[0].page_content
28
+
29
+ if len(text_content) < length:
30
+ return {"text_content": text_content}
31
+ else:
32
+ return {
33
+ "text_content": text_content[: int(length / 2)]
34
+ + text_content[len(text_content) - int(length / 2) :]
35
+ }
36
+ except Exception as e:
37
+ error_msg = str(e)
38
+ return {"message": error_msg}