Spaces:

tony346
/

Basic_RAG_AI_Chatbot_with_Llama2

Sleeping

Rename app.py to Update app.py

#12

by nahyun0423 - opened Nov 26, 2023

←

Files changed (1) hide show

app.py → Update app.py RENAMED Viewed

@@ -25,13 +25,22 @@ def get_pdf_text(pdf_docs):
 # 과제
 # 아래 텍스트 추출 함수를 작성
 def get_text_file(docs):
-    pass
 def get_csv_file(docs):
-    pass
 def get_json_file(docs):
-    pass
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
@@ -42,6 +51,18 @@ def get_text_chunks(documents):
         length_function=len  # 텍스트의 길이를 측정하는 함수를 지정합니다.
     )
     documents = text_splitter.split_documents(documents)  # 문서들을 청크로 나눕니다.
     return documents  # 나눈 청크를 반환합니다.

 # 과제
 # 아래 텍스트 추출 함수를 작성
 def get_text_file(docs):
+    text_list = []
+    for file in docs:
+        text_list.append(file.getvalue().decode('utf-8'))
+    return text_list
 def get_csv_file(docs):
+    csv_list = []
+    for file in docs:
+        csv_list.append(file.getvalue().decode('utf-8'))
+    return csv_list
 def get_json_file(docs):
+    json_list = []
+    for file in docs:
+        json_list.append(file.getvalue().decode('utf-8'))
+    return json_list
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
         length_function=len  # 텍스트의 길이를 측정하는 함수를 지정합니다.
     )
+     text_documents = []
+    for doc in documents:
+        if doc['type'] == 'pdf':
+            text_documents.extend(get_pdf_text(doc['content']))
+        elif doc['type'] == 'text':
+            text_documents.extend(get_text_file(doc['content']))
+        elif doc['type'] == 'csv':
+            text_documents.extend(get_csv_file(doc['content']))
+        elif doc['type'] == 'json':
+            text_documents.extend(get_json_file(doc['content']))
     documents = text_splitter.split_documents(documents)  # 문서들을 청크로 나눕니다.
     return documents  # 나눈 청크를 반환합니다.