Spaces:
Sleeping
Sleeping
Update summary_extractor.py
Browse files- summary_extractor.py +3 -3
summary_extractor.py
CHANGED
|
@@ -8,8 +8,8 @@ from langchain.chains.mapreduce import MapReduceChain
|
|
| 8 |
from langchain.text_splitter import CharacterTextSplitter
|
| 9 |
from langchain.chains.summarize import load_summarize_chain
|
| 10 |
from langchain.prompts import PromptTemplate
|
| 11 |
-
from langchain_community.document_loaders import UnstructuredFileLoader
|
| 12 |
-
|
| 13 |
|
| 14 |
class Extractor:
|
| 15 |
|
|
@@ -35,7 +35,7 @@ class Extractor:
|
|
| 35 |
List[str]: List of text content from each page.
|
| 36 |
"""
|
| 37 |
try:
|
| 38 |
-
loader =
|
| 39 |
pages = loader.load_and_split()
|
| 40 |
return pages
|
| 41 |
|
|
|
|
| 8 |
from langchain.text_splitter import CharacterTextSplitter
|
| 9 |
from langchain.chains.summarize import load_summarize_chain
|
| 10 |
from langchain.prompts import PromptTemplate
|
| 11 |
+
# from langchain_community.document_loaders import UnstructuredFileLoader
|
| 12 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 13 |
|
| 14 |
class Extractor:
|
| 15 |
|
|
|
|
| 35 |
List[str]: List of text content from each page.
|
| 36 |
"""
|
| 37 |
try:
|
| 38 |
+
loader = PyPDFLoader(pdf_file_path.name)
|
| 39 |
pages = loader.load_and_split()
|
| 40 |
return pages
|
| 41 |
|