edit postprocessing logic
Browse files- postprocessing.py +11 -6
postprocessing.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def extract_relevant_text(text):
|
| 2 |
+
"""
|
| 3 |
+
์ฃผ์ด์ง ํ
์คํธ์์ `[/INST]`๋ก ์์ํ๊ณ ์ข
๋ฃ ํ๊ทธ `</>` ์ด์ ๊น์ง์ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ ํจ์.
|
| 4 |
+
"""
|
| 5 |
+
pattern = r"\[/INST\](.*?</)" # [/INST]๋ก ์์ํ๊ณ </> ์ด์ ๊น์ง์ ํ
์คํธ๋ฅผ ์ถ์ถ
|
| 6 |
+
match = re.search(pattern, text, re.DOTALL)
|
| 7 |
+
if match:
|
| 8 |
+
# `[/INST]` ์ดํ `</s>` ์ ๊น์ง์ ํ
์คํธ ๋ฐํ
|
| 9 |
+
return match.group(1).strip().replace("</", "")
|
| 10 |
+
else:
|
| 11 |
+
return "๋งค์นญ๋๋ ํ
์คํธ๊ฐ ์์ต๋๋ค."
|