Update app.py
Browse files
app.py
CHANGED
|
@@ -120,15 +120,44 @@ def main():
|
|
| 120 |
for file in docs:
|
| 121 |
print('file - type : ', file.type)
|
| 122 |
if file.type == 'text/plain':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
# file is .txt
|
| 124 |
doc_list.extend(get_text_file(file))
|
| 125 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
# file is .pdf
|
| 127 |
doc_list.extend(get_pdf_text(file))
|
| 128 |
elif file.type == 'text/csv':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
# file is .csv
|
| 130 |
doc_list.extend(get_csv_file(file))
|
| 131 |
elif file.type == 'application/json':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
# file is .json
|
| 133 |
doc_list.extend(get_json_file(file))
|
| 134 |
|
|
|
|
| 120 |
for file in docs:
|
| 121 |
print('file - type : ', file.type)
|
| 122 |
if file.type == 'text/plain':
|
| 123 |
+
def get_text_file(text_file):
|
| 124 |
+
with NamedTemporaryFile() as temp_file:
|
| 125 |
+
temp_file.write(text_chunks.getvalue())
|
| 126 |
+
temp_file.seek(0)
|
| 127 |
+
text_loader = TextLoader(temp_file.name)
|
| 128 |
+
text_file = text_loader.load()
|
| 129 |
+
return text_file
|
| 130 |
# file is .txt
|
| 131 |
doc_list.extend(get_text_file(file))
|
| 132 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
| 133 |
+
def get_pdf_text(pdf_docs):
|
| 134 |
+
with NamedTemporaryFile() as temp_file:
|
| 135 |
+
temp_file.write(pdf_docs.getvalue())
|
| 136 |
+
temp_file.seek(0)
|
| 137 |
+
pdf_loader = PyPDFLoader(temp_file.name)
|
| 138 |
+
pdf_doc = pdf_loader.load()
|
| 139 |
+
return pdf_doc
|
| 140 |
# file is .pdf
|
| 141 |
doc_list.extend(get_pdf_text(file))
|
| 142 |
elif file.type == 'text/csv':
|
| 143 |
+
def get_csv_file(csv_file):
|
| 144 |
+
with NamedTemporaryFile() as temp_file:
|
| 145 |
+
temp_file.write(csv_file.getvalue())
|
| 146 |
+
temp_file.seek(0)
|
| 147 |
+
csv_loader = CSVLoader(temp_file.name)
|
| 148 |
+
csv_file = csv_loader.load()
|
| 149 |
+
return csv_file
|
| 150 |
+
|
| 151 |
# file is .csv
|
| 152 |
doc_list.extend(get_csv_file(file))
|
| 153 |
elif file.type == 'application/json':
|
| 154 |
+
def get_json_file(json_file):
|
| 155 |
+
with NamedTemporaryFile() as temp_file:
|
| 156 |
+
temp_file.write(json_file.getvalue())
|
| 157 |
+
temp_file.seek(0)
|
| 158 |
+
json_loader = JSONLoader(temp_file.name)
|
| 159 |
+
json_file = json_loader.load()
|
| 160 |
+
return json_file
|
| 161 |
# file is .json
|
| 162 |
doc_list.extend(get_json_file(file))
|
| 163 |
|