Recursive character text splitter
Browse files- app.py +4 -3
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from aimakerspace.vectordatabase import VectorDatabase
|
|
| 12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
| 13 |
import chainlit as cl
|
| 14 |
from PyPDF2 import PdfReader
|
|
|
|
| 15 |
|
| 16 |
system_template = """\
|
| 17 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
@@ -48,7 +49,7 @@ class RetrievalAugmentedQAPipeline:
|
|
| 48 |
|
| 49 |
return {"response": generate_response(), "context": context_list}
|
| 50 |
|
| 51 |
-
text_splitter =
|
| 52 |
|
| 53 |
|
| 54 |
def process_text_file(file: AskFileResponse):
|
|
@@ -69,8 +70,8 @@ def process_text_file(file: AskFileResponse):
|
|
| 69 |
documents = pdf_loader.load()
|
| 70 |
else:
|
| 71 |
raise ValueError("Provide a .txt or .pdf file")
|
| 72 |
-
texts = text_splitter.split_texts(documents)
|
| 73 |
-
|
| 74 |
return texts
|
| 75 |
|
| 76 |
|
|
|
|
| 12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
| 13 |
import chainlit as cl
|
| 14 |
from PyPDF2 import PdfReader
|
| 15 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 16 |
|
| 17 |
system_template = """\
|
| 18 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
|
|
| 49 |
|
| 50 |
return {"response": generate_response(), "context": context_list}
|
| 51 |
|
| 52 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
| 53 |
|
| 54 |
|
| 55 |
def process_text_file(file: AskFileResponse):
|
|
|
|
| 70 |
documents = pdf_loader.load()
|
| 71 |
else:
|
| 72 |
raise ValueError("Provide a .txt or .pdf file")
|
| 73 |
+
# texts = text_splitter.split_texts(documents)
|
| 74 |
+
texts = [x.page_content for x in text_splitter.transform_documents(documents)]
|
| 75 |
return texts
|
| 76 |
|
| 77 |
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ chainlit==0.7.700
|
|
| 3 |
openai
|
| 4 |
PyPDF2
|
| 5 |
langchain-community
|
| 6 |
-
pypdf
|
|
|
|
|
|
| 3 |
openai
|
| 4 |
PyPDF2
|
| 5 |
langchain-community
|
| 6 |
+
pypdf
|
| 7 |
+
langchain-text-splitters
|