hyonee commited on
Commit
62f60fd
Β·
1 Parent(s): 0a87e57
Files changed (1) hide show
  1. app.py +37 -9
app.py CHANGED
@@ -30,19 +30,47 @@ def get_pdf_text(pdf_docs):
30
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
31
 
32
  def get_text_file(docs):
33
- text_loader = TextLoader(docs) # TextLoaderλ₯Ό μ‚¬μš©ν•΄ ν…μŠ€νŠΈ νŒŒμΌμ„ λ‘œλ“œν•©λ‹ˆλ‹€.
34
- text_doc = text_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
35
- return text_doc
 
 
 
 
 
 
36
 
37
  def get_csv_file(docs):
38
- csv_loader = CSVLoader(docs) # CSVLoaderλ₯Ό μ‚¬μš©ν•΄ CSV νŒŒμΌμ„ λ‘œλ“œν•©λ‹ˆλ‹€.
39
- csv_doc = csv_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
40
- return csv_doc
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def get_json_file(docs):
43
- json_loader = JSONLoader(docs) # JSONLoaderλ₯Ό μ‚¬μš©ν•΄ JSON νŒŒμΌμ„ λ‘œλ“œν•©λ‹ˆλ‹€.
44
- json_doc = json_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
45
- return json_doc
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
30
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
31
 
32
  def get_text_file(docs):
33
+ def get_text_file(docs):
34
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
35
+ temp_filepath = os.path.join(temp_dir.name, "text_file.txt") # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
36
+ with open(temp_filepath, "w", encoding="utf-8") as f: # μž„μ‹œ νŒŒμΌμ„ ν…μŠ€νŠΈ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
37
+ f.write(docs) # ν…μŠ€νŠΈ λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
38
+ with open(temp_filepath, "r", encoding="utf-8") as f: # μž„μ‹œ νŒŒμΌμ„ ν…μŠ€νŠΈ 읽기 λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
39
+ text_content = f.read() # ν…μŠ€νŠΈ 파일의 λ‚΄μš©μ„ μ½μŠ΅λ‹ˆλ‹€.
40
+ return text_content # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
41
+
42
 
43
  def get_csv_file(docs):
44
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
45
+ temp_filepath = os.path.join(temp_dir.name, "csv_file.csv") # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
46
+
47
+ # ν…μŠ€νŠΈ 데이터λ₯Ό CSV νŒŒμΌμ— μ“°κΈ°
48
+ with open(temp_filepath, "w", newline="", encoding="utf-8") as csv_file:
49
+ csv_writer = csv.writer(csv_file)
50
+ for line in docs:
51
+ # μ—¬κΈ°μ„œ line은 CSV 파일의 ν•œ 행을 λ‚˜νƒ€λƒ…λ‹ˆλ‹€.
52
+ csv_writer.writerow(line.split(',')) # μ˜ˆμ‹œ: μ‰Όν‘œλ‘œ κ΅¬λΆ„λœ 데이터λ₯Ό μ‚¬μš©
53
+
54
+ # CSV νŒŒμΌμ—μ„œ 데이터 읽기
55
+ with open(temp_filepath, "r", encoding="utf-8") as csv_file:
56
+ csv_reader = csv.reader(csv_file)
57
+ csv_data = [row for row in csv_reader]
58
+
59
+ return csv_data # μ½μ–΄μ˜¨ CSV 데이터λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
60
 
61
  def get_json_file(docs):
62
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
63
+ temp_filepath = os.path.join(temp_dir.name, "json_file.json") # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
64
+
65
+ # ν…μŠ€νŠΈ 데이터λ₯Ό JSON νŒŒμΌμ— μ“°κΈ°
66
+ with open(temp_filepath, "w", encoding="utf-8") as json_file:
67
+ json.dump(docs, json_file, indent=2) # 데이터λ₯Ό JSON νŒŒμΌμ— μ“°κΈ°
68
+
69
+ # JSON νŒŒμΌμ—μ„œ 데이터 읽기
70
+ with open(temp_filepath, "r", encoding="utf-8") as json_file:
71
+ json_data = json.load(json_file)
72
+
73
+ return json_data # μ½μ–΄μ˜¨ JSON 데이터λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
74
 
75
 
76
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.