Spaces:
Runtime error
Runtime error
add pdf capability
Browse files- app.py +18 -7
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -10,7 +10,11 @@ from aimakerspace.openai_utils.prompts import (
|
|
| 10 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
| 11 |
from aimakerspace.vectordatabase import VectorDatabase
|
| 12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
|
|
|
|
|
|
| 13 |
import chainlit as cl
|
|
|
|
|
|
|
| 14 |
|
| 15 |
system_template = """\
|
| 16 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
@@ -47,24 +51,31 @@ class RetrievalAugmentedQAPipeline:
|
|
| 47 |
|
| 48 |
return {"response": generate_response(), "context": context_list}
|
| 49 |
|
| 50 |
-
text_splitter = CharacterTextSplitter()
|
| 51 |
-
|
| 52 |
|
| 53 |
def process_text_file(file: AskFileResponse):
|
| 54 |
-
|
| 55 |
|
| 56 |
-
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=
|
| 57 |
temp_file_path = temp_file.name
|
| 58 |
|
| 59 |
with open(temp_file_path, "wb") as f:
|
| 60 |
f.write(file.content)
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
return texts
|
| 66 |
|
| 67 |
|
|
|
|
| 68 |
@cl.on_chat_start
|
| 69 |
async def on_chat_start():
|
| 70 |
files = None
|
|
|
|
| 10 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
| 11 |
from aimakerspace.vectordatabase import VectorDatabase
|
| 12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
| 13 |
+
from langchain_experimental.text_splitter import SemanticChunker
|
| 14 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
| 15 |
import chainlit as cl
|
| 16 |
+
import tempfile
|
| 17 |
+
from langchain_community.document_loaders.pdf import PyPDFLoader
|
| 18 |
|
| 19 |
system_template = """\
|
| 20 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
|
|
| 51 |
|
| 52 |
return {"response": generate_response(), "context": context_list}
|
| 53 |
|
| 54 |
+
# text_splitter = CharacterTextSplitter()
|
| 55 |
+
text_splitter = SemanticChunker(OpenAIEmbeddings(), breakpoint_threshold_type="standard_deviation")
|
| 56 |
|
| 57 |
def process_text_file(file: AskFileResponse):
|
| 58 |
+
|
| 59 |
|
| 60 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=file.name) as temp_file:
|
| 61 |
temp_file_path = temp_file.name
|
| 62 |
|
| 63 |
with open(temp_file_path, "wb") as f:
|
| 64 |
f.write(file.content)
|
| 65 |
|
| 66 |
+
if file.type == 'text/plain':
|
| 67 |
+
text_loader = TextFileLoader(temp_file_path)
|
| 68 |
+
documents = text_loader.load_documents()
|
| 69 |
+
elif file.type == 'application/pdf':
|
| 70 |
+
pdf_loader = PyPDFLoader(temp_file_path)
|
| 71 |
+
documents = pdf_loader.load()
|
| 72 |
+
else:
|
| 73 |
+
raise ValueError("Provide a .txt or .pdf file")
|
| 74 |
+
texts = [x.page_content for x in text_splitter.transform_documents(documents)]
|
| 75 |
return texts
|
| 76 |
|
| 77 |
|
| 78 |
+
|
| 79 |
@cl.on_chat_start
|
| 80 |
async def on_chat_start():
|
| 81 |
files = None
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
numpy
|
| 2 |
chainlit==0.7.700
|
| 3 |
-
openai
|
|
|
|
|
|
|
|
|
| 1 |
numpy
|
| 2 |
chainlit==0.7.700
|
| 3 |
+
openai
|
| 4 |
+
langchain_experimental
|
| 5 |
+
langchain_openai
|