Basic_RAG_AI_Chatbot_with_chatGPT

Sleeping

App Files Files Community

PCFISH commited on Nov 27, 2023

Commit

f5f9605

1 Parent(s): 780971d

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -17

app.py CHANGED Viewed

@@ -29,14 +29,22 @@ def get_pdf_text(pdf_docs):
 # 아래 텍스트 추출 함수를 작성
 def get_text_file(docs):
-    pass
 def get_csv_file(docs):
-    pass
 def get_json_file(docs):
-    pass
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
@@ -117,35 +125,34 @@ def main():
         st.subheader("Your documents")
         docs = st.file_uploader(
-            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
         if st.button("Process"):
             with st.spinner("Processing"):
-                # get pdf text
                 doc_list = []
                 for file in docs:
                     if file.type == 'text/plain':
-                        # file is .txt
                         doc_list.extend(get_text_file(file))
-                    elif file.type in ['application/octet-stream', 'application/pdf']:
-                        # file is .pdf
-                        doc_list.extend(get_pdf_text(file))
                     elif file.type == 'text/csv':
-                        # file is .csv
                         doc_list.extend(get_csv_file(file))
                     elif file.type == 'application/json':
-                        # file is .json
                         doc_list.extend(get_json_file(file))
-                # get the text chunks
                 text_chunks = get_text_chunks(doc_list)
-                # create vector store
                 vectorstore = get_vectorstore(text_chunks)
-                # create conversation chain
-                st.session_state.conversation = get_conversation_chain(
-                    vectorstore)
 if __name__ == '__main__':

 # 아래 텍스트 추출 함수를 작성
 def get_text_file(docs):
+    # 텍스트 파일 (.txt)에서 텍스트를 추출하는 함수
+    return [docs.getvalue().decode('utf-8')]
 def get_csv_file(docs):
+    # CSV 파일 (.csv)에서 텍스트를 추출하는 함수
+    csv_loader = CSVLoader(docs)
+    csv_data = csv_loader.load()
+    # CSV 파일의 각 행을 문자열로 변환하여 반환
+    return [' '.join(map(str, row)) for row in csv_data]
 def get_json_file(docs):
+    # JSON 파일 (.json)에서 텍스트를 추출하는 함수
+    json_loader = JSONLoader(docs)
+    json_data = json_loader.load()
+    # JSON 파일의 각 항목을 문자열로 변환하여 반환
+    return [json.dumps(item) for item in json_data]
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
         st.subheader("Your documents")
         docs = st.file_uploader(
+            "Upload your documents here and click on 'Process'", accept_multiple_files=True)
         if st.button("Process"):
             with st.spinner("Processing"):
+                # 문서에서 추출한 텍스트를 담을 리스트
                 doc_list = []
                 for file in docs:
                     if file.type == 'text/plain':
+                        # .txt 파일의 경우
                         doc_list.extend(get_text_file(file))
                     elif file.type == 'text/csv':
+                        # .csv 파일의 경우
                         doc_list.extend(get_csv_file(file))
                     elif file.type == 'application/json':
+                        # .json 파일의 경우
                         doc_list.extend(get_json_file(file))
+                    elif file.type in ['application/octet-stream', 'application/pdf']:
+                        # .pdf 파일의 경우
+                        doc_list.extend(get_pdf_text(file))
+                # 텍스트 청크로 나누기
                 text_chunks = get_text_chunks(doc_list)
+                # 벡터 스토어 생성
                 vectorstore = get_vectorstore(text_chunks)
+                # 대화 체인 생성
+                st.session_state.conversation = get_conversation_chain(vectorstore)
 if __name__ == '__main__':