PCFISH commited on
Commit
fb78073
ยท
1 Parent(s): ab69028

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -57,13 +57,33 @@ def get_json_file(docs):
57
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
58
  def get_text_chunks(documents):
59
  text_splitter = RecursiveCharacterTextSplitter(
60
- chunk_size=1000, # ์ฒญํฌ์˜ ํฌ๊ธฐ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
61
- chunk_overlap=200, # ์ฒญํฌ ์‚ฌ์ด์˜ ์ค‘๋ณต์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
62
- length_function=len # ํ…์ŠคํŠธ์˜ ๊ธธ์ด๋ฅผ ์ธก์ •ํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
63
  )
64
 
65
- documents = text_splitter.split_documents(documents) # ๋ฌธ์„œ๋“ค์„ ์ฒญํฌ๋กœ ๋‚˜๋ˆ•๋‹ˆ๋‹ค
66
- return documents # ๋‚˜๋ˆˆ ์ฒญํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
57
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
58
  def get_text_chunks(documents):
59
  text_splitter = RecursiveCharacterTextSplitter(
60
+ chunk_size=1000,
61
+ chunk_overlap=200,
62
+ length_function=len
63
  )
64
 
65
+ text_chunks = []
66
+
67
+ for doc in documents:
68
+ if isinstance(doc, str):
69
+ # If the document is a string, treat it as plain text
70
+ text_chunks.append(doc)
71
+ elif hasattr(doc, 'page_content'):
72
+ # If the document has a 'page_content' attribute, use it
73
+ text_chunks.append(doc.page_content)
74
+ else:
75
+ # Handle other types of documents as needed
76
+ # For example, if it's a list of strings, concatenate them
77
+ if isinstance(doc, list) and all(isinstance(item, str) for item in doc):
78
+ text_chunks.append(' '.join(doc))
79
+ else:
80
+ # Handle other cases based on the actual structure of your documents
81
+ raise ValueError(f"Unsupported document type: {type(doc)}")
82
+
83
+ # Split the text chunks
84
+ text_chunks = text_splitter.split_documents(text_chunks)
85
+
86
+ return text_chunks
87
 
88
 
89
  # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.