The RAG is a rag
Browse files
app.py
CHANGED
|
@@ -101,12 +101,16 @@ def process_pdf_file(file: AskFileResponse):
|
|
| 101 |
f.write(file.content)
|
| 102 |
|
| 103 |
doc = pymupdf.open(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
texts = []
|
| 105 |
for page in doc:
|
| 106 |
texts.append(page.get_text())
|
| 107 |
-
|
| 108 |
-
os.remove(temp_file_path)
|
| 109 |
-
return texts
|
| 110 |
|
| 111 |
@cl.on_chat_start
|
| 112 |
async def on_chat_start():
|
|
@@ -115,7 +119,7 @@ async def on_chat_start():
|
|
| 115 |
# Wait for the user to upload a file
|
| 116 |
while files == None:
|
| 117 |
files = await cl.AskFileMessage(
|
| 118 |
-
content="Please upload a Text or PDF file to begin!",
|
| 119 |
accept=["text/plain", "application/pdf"],
|
| 120 |
max_size_mb=2,
|
| 121 |
timeout=180,
|
|
@@ -134,7 +138,7 @@ async def on_chat_start():
|
|
| 134 |
elif file.type == "application/pdf":
|
| 135 |
texts = process_pdf_file(file)
|
| 136 |
else:
|
| 137 |
-
msg.content = "Unsupported file type."
|
| 138 |
await msg.update()
|
| 139 |
return
|
| 140 |
|
|
|
|
| 101 |
f.write(file.content)
|
| 102 |
|
| 103 |
doc = pymupdf.open(temp_file_path)
|
| 104 |
+
documents = doc.load_documents()
|
| 105 |
+
texts = text_splitter.split_texts(documents)
|
| 106 |
+
return texts
|
| 107 |
+
"""
|
| 108 |
texts = []
|
| 109 |
for page in doc:
|
| 110 |
texts.append(page.get_text())
|
| 111 |
+
"""
|
| 112 |
+
# os.remove(temp_file_path) checking whether this is better
|
| 113 |
+
#return texts
|
| 114 |
|
| 115 |
@cl.on_chat_start
|
| 116 |
async def on_chat_start():
|
|
|
|
| 119 |
# Wait for the user to upload a file
|
| 120 |
while files == None:
|
| 121 |
files = await cl.AskFileMessage(
|
| 122 |
+
content="Please upload a Text or PDF file <2MB to begin!",
|
| 123 |
accept=["text/plain", "application/pdf"],
|
| 124 |
max_size_mb=2,
|
| 125 |
timeout=180,
|
|
|
|
| 138 |
elif file.type == "application/pdf":
|
| 139 |
texts = process_pdf_file(file)
|
| 140 |
else:
|
| 141 |
+
msg.content = "Unsupported file type. Please use .txt and .pdf files only"
|
| 142 |
await msg.update()
|
| 143 |
return
|
| 144 |
|