Basic_RAG_AI_Chatbot_with_chatGPT

Sleeping

PCFISH commited on Nov 27, 2023

Commit

fb78073

1 Parent(s): ab69028

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -57,13 +57,33 @@ def get_json_file(docs):
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000, # 청크의 크기를 지정합니다.
-        chunk_overlap=200, # 청크 사이의 중복을 지정합니다.
-        length_function=len # 텍스트의 길이를 측정하는 함수를 지정합니다.
     )
-    documents = text_splitter.split_documents(documents) # 문서들을 청크로 나눕니다
-    return documents # 나눈 청크를 반환합니다.
 # 텍스트 청크들로부터 벡터 스토어를 생성하는 함수입니다.

 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len
     )
+    text_chunks = []
+    for doc in documents:
+        if isinstance(doc, str):
+            # If the document is a string, treat it as plain text
+            text_chunks.append(doc)
+        elif hasattr(doc, 'page_content'):
+            # If the document has a 'page_content' attribute, use it
+            text_chunks.append(doc.page_content)
+        else:
+            # Handle other types of documents as needed
+            # For example, if it's a list of strings, concatenate them
+            if isinstance(doc, list) and all(isinstance(item, str) for item in doc):
+                text_chunks.append(' '.join(doc))
+            else:
+                # Handle other cases based on the actual structure of your documents
+                raise ValueError(f"Unsupported document type: {type(doc)}")
+    # Split the text chunks
+    text_chunks = text_splitter.split_documents(text_chunks)
+    return text_chunks
 # 텍스트 청크들로부터 벡터 스토어를 생성하는 함수입니다.