SayknowLab commited on
Commit
0ff739c
ยท
verified ยท
1 Parent(s): a69dc25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py CHANGED
@@ -82,6 +82,45 @@ def ask_sayknow(query):
82
  else:
83
  answer = raw_response.strip()
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # ๋ฌธ์žฅ ๋ ์ฒ˜๋ฆฌ
86
  if answer and answer[-1] not in ".!?":
87
  answer += "."
 
82
  else:
83
  answer = raw_response.strip()
84
 
85
+ # 2. '๋‹ต๋ณ€:' ํ‚ค์›Œ๋“œ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ์ง„์งœ ๋‹ต๋ณ€ ๋ถ€๋ถ„ ์ถ”์ถœ
86
+ if "๋‹ต๋ณ€:" in extracted_answer:
87
+ answer = extracted_answer.split("๋‹ต๋ณ€:", 1)[1].strip() # ์ฒซ ๋ฒˆ์งธ "๋‹ต๋ณ€:" ์ดํ›„๋งŒ
88
+ else:
89
+ # ๋งŒ์•ฝ "๋‹ต๋ณ€:" ํƒœ๊ทธ๊ฐ€ ์—†์œผ๋ฉด, ํ”„๋กฌํ”„ํŠธ์˜ ์ง€์‹œ์‚ฌํ•ญ ์ค‘๋ณต ๋“ฑ์„ ์ œ๊ฑฐ ์‹œ๋„
90
+ persona_end_marker = "๋‹ตํ•ด.\n" # persona_guide์˜ ํŠน์ • ๋ ๋ถ€๋ถ„์„ ํ‘œ์‹œ
91
+ if persona_end_marker in extracted_answer:
92
+ try:
93
+ answer = extracted_answer[extracted_answer.rindex(persona_end_marker) + len(persona_end_marker):].strip()
94
+ except ValueError:
95
+ answer = extracted_answer # ์•ˆ๋˜๋ฉด ๊ทธ๋ƒฅ ์ „์ฒด ์‚ฌ์šฉ
96
+ else:
97
+ answer = extracted_answer # ๊ทธ๊ฒƒ๋„ ์—†์œผ๋ฉด ๊ทธ๋ƒฅ ์ „์ฒด ์‚ฌ์šฉ
98
+
99
+ # ๊ทธ๋ž˜๋„ ๋‹ต๋ณ€์ด ๋น„์–ด์žˆ์œผ๋ฉด ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€๋ฅผ ๋Œ€์ฒด
100
+ if not answer:
101
+ answer = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†๊ฑฐ๋‚˜ ๋‚ด์šฉ์ด ๋ช…ํ™•ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค."
102
+
103
+
104
+ # 1. ์˜๋ฏธ ์—†๋Š” ์ˆ˜์‹/์˜๋ฌธ/ํŠน์ˆ˜๋ฌธ์ž/๋ฐ˜๋ณต๋ฌธ์ž ๋“ฑ ํ•„ํ„ฐ๋ง (๊ธฐ์กด๊ณผ ๋™์ผ)
105
+ # ์ด ๋ถ€๋ถ„์„ ๋จผ์ € ํ•œ๋ฒˆ ์ ์šฉํ•ด์„œ answer๊ฐ€ ์—‰๋šฑํ•œ ๋ฌธ์ž์—ด์ด ๋˜๋Š” ๊ฑธ ๋ฐฉ์ง€
106
+ answer = re.sub(r"[^๊ฐ€-ํžฃ0-9 .,!?~\n]", "", answer)
107
+ answer = re.sub(r"([.,!?~])\1{2,}", r"\1", answer)
108
+ answer = re.sub(r"[a-zA-Z]+", "", answer)
109
+ answer = re.sub(r"[=^*/\\]+", "", answer)
110
+ answer = re.sub(r"\s+", " ", answer).strip()
111
+
112
+ # 2. 80์ž ์ด๋‚ด๋กœ ์ž๋ฅด๊ธฐ (ํ•œ๊ธ€ ๊ธฐ์ค€) (๊ธฐ์กด๊ณผ ๋™์ผ)
113
+ def truncate_korean(text, max_len=80):
114
+ count = 0
115
+ result = ""
116
+ for ch in text:
117
+ result += ch
118
+ count += 1
119
+ if count >= max_len:
120
+ break
121
+ return result
122
+ answer = truncate_korean(answer, 80)
123
+
124
  # ๋ฌธ์žฅ ๋ ์ฒ˜๋ฆฌ
125
  if answer and answer[-1] not in ".!?":
126
  answer += "."