Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,6 @@ from aimakerspace.vectordatabase import VectorDatabase
|
|
| 15 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
| 16 |
import chainlit as cl
|
| 17 |
from langchain_community.document_loaders import PyPDFLoader
|
| 18 |
-
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 19 |
|
| 20 |
system_template = """\
|
| 21 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
@@ -59,10 +58,7 @@ class RetrievalAugmentedQAPipeline:
|
|
| 59 |
|
| 60 |
|
| 61 |
text_splitter = CharacterTextSplitter()
|
| 62 |
-
|
| 63 |
-
chunk_size=1000,
|
| 64 |
-
chunk_overlap=200,
|
| 65 |
-
)
|
| 66 |
|
| 67 |
def process_text_file(file: AskFileResponse):
|
| 68 |
import tempfile
|
|
@@ -94,7 +90,7 @@ def process_pdf_file(file: AskFileResponse):
|
|
| 94 |
|
| 95 |
pdf_loader = PyPDFLoader(temp_file_path)
|
| 96 |
documents = pdf_loader.load()
|
| 97 |
-
texts =
|
| 98 |
return texts
|
| 99 |
|
| 100 |
|
|
@@ -124,6 +120,7 @@ async def on_chat_start():
|
|
| 124 |
else:
|
| 125 |
texts = process_text_file(file)
|
| 126 |
|
|
|
|
| 127 |
|
| 128 |
# Create a dict vector store
|
| 129 |
vector_db = VectorDatabase()
|
|
@@ -153,4 +150,4 @@ async def main(message):
|
|
| 153 |
async for stream_resp in result["response"]:
|
| 154 |
await msg.stream_token(stream_resp)
|
| 155 |
|
| 156 |
-
await msg.send()
|
|
|
|
| 15 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
| 16 |
import chainlit as cl
|
| 17 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
|
| 18 |
|
| 19 |
system_template = """\
|
| 20 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
text_splitter = CharacterTextSplitter()
|
| 61 |
+
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def process_text_file(file: AskFileResponse):
|
| 64 |
import tempfile
|
|
|
|
| 90 |
|
| 91 |
pdf_loader = PyPDFLoader(temp_file_path)
|
| 92 |
documents = pdf_loader.load()
|
| 93 |
+
texts = text_splitter.split_texts(documents)
|
| 94 |
return texts
|
| 95 |
|
| 96 |
|
|
|
|
| 120 |
else:
|
| 121 |
texts = process_text_file(file)
|
| 122 |
|
| 123 |
+
print(f"Processing {len(texts)} text chunks")
|
| 124 |
|
| 125 |
# Create a dict vector store
|
| 126 |
vector_db = VectorDatabase()
|
|
|
|
| 150 |
async for stream_resp in result["response"]:
|
| 151 |
await msg.stream_token(stream_resp)
|
| 152 |
|
| 153 |
+
await msg.send()
|