Rename app.py to Update app.py
#12
by
nahyun0423
- opened
- app.py β Update app.py +24 -3
app.py β Update app.py
RENAMED
|
@@ -25,13 +25,22 @@ def get_pdf_text(pdf_docs):
|
|
| 25 |
# κ³Όμ
|
| 26 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
| 27 |
def get_text_file(docs):
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def get_csv_file(docs):
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def get_json_file(docs):
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
|
@@ -42,6 +51,18 @@ def get_text_chunks(documents):
|
|
| 42 |
length_function=len # ν
μ€νΈμ κΈΈμ΄λ₯Ό μΈ‘μ νλ ν¨μλ₯Ό μ§μ ν©λλ€.
|
| 43 |
)
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
documents = text_splitter.split_documents(documents) # λ¬Έμλ€μ μ²ν¬λ‘ λλλλ€.
|
| 46 |
return documents # λλ μ²ν¬λ₯Ό λ°νν©λλ€.
|
| 47 |
|
|
|
|
| 25 |
# κ³Όμ
|
| 26 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
| 27 |
def get_text_file(docs):
|
| 28 |
+
text_list = []
|
| 29 |
+
for file in docs:
|
| 30 |
+
text_list.append(file.getvalue().decode('utf-8'))
|
| 31 |
+
return text_list
|
| 32 |
|
| 33 |
def get_csv_file(docs):
|
| 34 |
+
csv_list = []
|
| 35 |
+
for file in docs:
|
| 36 |
+
csv_list.append(file.getvalue().decode('utf-8'))
|
| 37 |
+
return csv_list
|
| 38 |
|
| 39 |
def get_json_file(docs):
|
| 40 |
+
json_list = []
|
| 41 |
+
for file in docs:
|
| 42 |
+
json_list.append(file.getvalue().decode('utf-8'))
|
| 43 |
+
return json_list
|
| 44 |
|
| 45 |
|
| 46 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
|
|
|
| 51 |
length_function=len # ν
μ€νΈμ κΈΈμ΄λ₯Ό μΈ‘μ νλ ν¨μλ₯Ό μ§μ ν©λλ€.
|
| 52 |
)
|
| 53 |
|
| 54 |
+
text_documents = []
|
| 55 |
+
|
| 56 |
+
for doc in documents:
|
| 57 |
+
if doc['type'] == 'pdf':
|
| 58 |
+
text_documents.extend(get_pdf_text(doc['content']))
|
| 59 |
+
elif doc['type'] == 'text':
|
| 60 |
+
text_documents.extend(get_text_file(doc['content']))
|
| 61 |
+
elif doc['type'] == 'csv':
|
| 62 |
+
text_documents.extend(get_csv_file(doc['content']))
|
| 63 |
+
elif doc['type'] == 'json':
|
| 64 |
+
text_documents.extend(get_json_file(doc['content']))
|
| 65 |
+
|
| 66 |
documents = text_splitter.split_documents(documents) # λ¬Έμλ€μ μ²ν¬λ‘ λλλλ€.
|
| 67 |
return documents # λλ μ²ν¬λ₯Ό λ°νν©λλ€.
|
| 68 |
|