Update app.py
Browse files
app.py
CHANGED
|
@@ -29,14 +29,22 @@ def get_pdf_text(pdf_docs):
|
|
| 29 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
| 30 |
|
| 31 |
def get_text_file(docs):
|
| 32 |
-
|
| 33 |
-
|
| 34 |
|
| 35 |
def get_csv_file(docs):
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def get_json_file(docs):
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
|
@@ -117,35 +125,34 @@ def main():
|
|
| 117 |
|
| 118 |
st.subheader("Your documents")
|
| 119 |
docs = st.file_uploader(
|
| 120 |
-
"Upload your
|
| 121 |
if st.button("Process"):
|
| 122 |
with st.spinner("Processing"):
|
| 123 |
-
#
|
| 124 |
doc_list = []
|
| 125 |
|
| 126 |
for file in docs:
|
| 127 |
if file.type == 'text/plain':
|
| 128 |
-
#
|
| 129 |
doc_list.extend(get_text_file(file))
|
| 130 |
-
elif file.type in ['application/octet-stream', 'application/pdf']:
|
| 131 |
-
# file is .pdf
|
| 132 |
-
doc_list.extend(get_pdf_text(file))
|
| 133 |
elif file.type == 'text/csv':
|
| 134 |
-
#
|
| 135 |
doc_list.extend(get_csv_file(file))
|
| 136 |
elif file.type == 'application/json':
|
| 137 |
-
#
|
| 138 |
doc_list.extend(get_json_file(file))
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
#
|
| 141 |
text_chunks = get_text_chunks(doc_list)
|
| 142 |
|
| 143 |
-
#
|
| 144 |
vectorstore = get_vectorstore(text_chunks)
|
| 145 |
|
| 146 |
-
#
|
| 147 |
-
st.session_state.conversation = get_conversation_chain(
|
| 148 |
-
vectorstore)
|
| 149 |
|
| 150 |
|
| 151 |
if __name__ == '__main__':
|
|
|
|
| 29 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
| 30 |
|
| 31 |
def get_text_file(docs):
|
| 32 |
+
# ν
μ€νΈ νμΌ (.txt)μμ ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μ
|
| 33 |
+
return [docs.getvalue().decode('utf-8')]
|
| 34 |
|
| 35 |
def get_csv_file(docs):
|
| 36 |
+
# CSV νμΌ (.csv)μμ ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μ
|
| 37 |
+
csv_loader = CSVLoader(docs)
|
| 38 |
+
csv_data = csv_loader.load()
|
| 39 |
+
# CSV νμΌμ κ° νμ λ¬Έμμ΄λ‘ λ³ννμ¬ λ°ν
|
| 40 |
+
return [' '.join(map(str, row)) for row in csv_data]
|
| 41 |
|
| 42 |
def get_json_file(docs):
|
| 43 |
+
# JSON νμΌ (.json)μμ ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μ
|
| 44 |
+
json_loader = JSONLoader(docs)
|
| 45 |
+
json_data = json_loader.load()
|
| 46 |
+
# JSON νμΌμ κ° νλͺ©μ λ¬Έμμ΄λ‘ λ³ννμ¬ λ°ν
|
| 47 |
+
return [json.dumps(item) for item in json_data]
|
| 48 |
|
| 49 |
|
| 50 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
|
|
|
| 125 |
|
| 126 |
st.subheader("Your documents")
|
| 127 |
docs = st.file_uploader(
|
| 128 |
+
"Upload your documents here and click on 'Process'", accept_multiple_files=True)
|
| 129 |
if st.button("Process"):
|
| 130 |
with st.spinner("Processing"):
|
| 131 |
+
# λ¬Έμμμ μΆμΆν ν
μ€νΈλ₯Ό λ΄μ 리μ€νΈ
|
| 132 |
doc_list = []
|
| 133 |
|
| 134 |
for file in docs:
|
| 135 |
if file.type == 'text/plain':
|
| 136 |
+
# .txt νμΌμ κ²½μ°
|
| 137 |
doc_list.extend(get_text_file(file))
|
|
|
|
|
|
|
|
|
|
| 138 |
elif file.type == 'text/csv':
|
| 139 |
+
# .csv νμΌμ κ²½μ°
|
| 140 |
doc_list.extend(get_csv_file(file))
|
| 141 |
elif file.type == 'application/json':
|
| 142 |
+
# .json νμΌμ κ²½μ°
|
| 143 |
doc_list.extend(get_json_file(file))
|
| 144 |
+
elif file.type in ['application/octet-stream', 'application/pdf']:
|
| 145 |
+
# .pdf νμΌμ κ²½μ°
|
| 146 |
+
doc_list.extend(get_pdf_text(file))
|
| 147 |
|
| 148 |
+
# ν
μ€νΈ μ²ν¬λ‘ λλκΈ°
|
| 149 |
text_chunks = get_text_chunks(doc_list)
|
| 150 |
|
| 151 |
+
# λ²‘ν° μ€ν μ΄ μμ±
|
| 152 |
vectorstore = get_vectorstore(text_chunks)
|
| 153 |
|
| 154 |
+
# λν μ²΄μΈ μμ±
|
| 155 |
+
st.session_state.conversation = get_conversation_chain(vectorstore)
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
if __name__ == '__main__':
|