han-byeol commited on
Commit
40e8746
ยท
1 Parent(s): 9731345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -76,13 +76,14 @@ def get_json_file(json_docs):
76
 
77
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
78
  def get_text_chunks(documents):
79
- texts = []
80
- for doc in documents:
81
- try:
82
- texts.append(doc.page_content)
83
- except AttributeError:
84
- texts.append(doc) # ์ฒ˜๋ฆฌ ๋ถˆ๊ฐ€๋Šฅํ•œ ๊ฒฝ์šฐ ํ…์ŠคํŠธ ์ž์ฒด๋ฅผ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
85
- return texts
 
86
 
87
 
88
 
 
76
 
77
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
78
  def get_text_chunks(documents):
79
+ text_splitter = RecursiveCharacterTextSplitter(
80
+ chunk_size=1000, # ์ฒญํฌ์˜ ํฌ๊ธฐ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
81
+ chunk_overlap=200, # ์ฒญํฌ ์‚ฌ์ด์˜ ์ค‘๋ณต์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ length_function=len # ํ…์ŠคํŠธ์˜ ๊ธธ์ด๋ฅผ ์ธก์ •ํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
83
+ )
84
+
85
+ documents = text_splitter.split_documents(documents) # ๋ฌธ์„œ๋“ค์„ ์ฒญํฌ๋กœ ๋‚˜๋ˆ•๋‹ˆ๋‹ค
86
+ return documents # ๋‚˜๋ˆˆ ์ฒญํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
87
 
88
 
89