Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,31 +2,126 @@ import streamlit as st
|
|
| 2 |
|
| 3 |
st.set_page_config(page_title="HAERAE Open Research Questions", layout="wide")
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
example_question = """
|
| 32 |
ํ๊ตญ์ ๋ณด์ ์ ๋ฌธ๊ฐ๊ฐ ๊ณ ๋ํ๋ ๋ฐ์ดํฐ ๋ณดํธ ์์คํ
์ ๊ฐ๋ฐํ๊ณ ์์ต๋๋ค. ์ด ์์คํ
์ 3์ฐจ์ ๊ธฐํํ์ ์ ๊ธ ๋ฉ์ปค๋์ฆ์ ์ฌ์ฉํ๋๋ฐ, ์ ๊ธ ์ฅ์น๋ ์๋ฟ ๋ชจ์์ผ๋ก ๋์ด ์๊ณ , ๋ฐ๋ฉด์ ๋ฐ์ง๋ฆ์ 6cm, ๋์ด๋ 8cm์
๋๋ค. ์ด ์๋ฟ ๋ชจ์์ ์ ๊ธ ์ฅ์น์๋ ์ํต ๋ชจ์์ ์ด์ ๊ฐ ๋ฑ ๋ง๊ฒ ๋ค์ด๊ฐ๊ฒ ์ค๊ณ๋์ด ์์ต๋๋ค.
|
|
@@ -50,38 +145,17 @@ example_question = """
|
|
| 50 |
|
| 51 |
st.code(example_question, language="markdown")
|
| 52 |
|
| 53 |
-
st.header("
|
| 54 |
-
|
| 55 |
-
st.write(""
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
""
|
| 64 |
-
|
| 65 |
-
st.
|
| 66 |
-
|
| 67 |
-
st.write("""
|
| 68 |
-
- Contribute to advancing Korean language model research
|
| 69 |
-
- Gain access to a large, high-quality dataset of math instructions
|
| 70 |
-
- Collaborate with HAERAE researchers
|
| 71 |
-
- Receive recognition in the field of NLP and math education
|
| 72 |
-
- Potential for co-authorship on related publications
|
| 73 |
-
""")
|
| 74 |
-
|
| 75 |
-
st.header("Contact Us")
|
| 76 |
-
|
| 77 |
-
st.write("""
|
| 78 |
-
For more information or to submit your results, please contact us at:
|
| 79 |
-
[Your contact information or a link to a submission form]
|
| 80 |
-
""")
|
| 81 |
-
|
| 82 |
-
st.sidebar.title("About HAERAE")
|
| 83 |
-
st.sidebar.info("""
|
| 84 |
-
HAERAE is a non-profit research lab dedicated to advancing the field of
|
| 85 |
-
Korean language model interpretability and evaluation. Our work focuses on
|
| 86 |
-
creating insightful benchmarks and tools to push the boundaries of NLP research.
|
| 87 |
-
""")
|
|
|
|
| 2 |
|
| 3 |
st.set_page_config(page_title="HAERAE Open Research Questions", layout="wide")
|
| 4 |
|
| 5 |
+
# Language selection
|
| 6 |
+
lang = st.radio("Language / ์ธ์ด", ["English", "ํ๊ตญ์ด"])
|
| 7 |
+
|
| 8 |
+
# Content in both languages
|
| 9 |
+
content = {
|
| 10 |
+
"English": {
|
| 11 |
+
"title": "HAERAE Open Research Questions",
|
| 12 |
+
"intro": """
|
| 13 |
+
HAERAE is a non-profit research lab focused on the interpretability and evaluation of Korean language models.
|
| 14 |
+
Our mission is to advance the field with insightful benchmarks and tools. Below is an overview of our projects.
|
| 15 |
+
|
| 16 |
+
We've been doing most of our projects internally, but for those that have been unsolvable,
|
| 17 |
+
we are planning to open them to get help from the open-source community.
|
| 18 |
+
""",
|
| 19 |
+
"challenge_title": "HAERAE-Math Challenge",
|
| 20 |
+
"challenge_desc": """
|
| 21 |
+
Today we are introducing our first challenge: HAERAE-Math. We've created high-quality instructions on math
|
| 22 |
+
but don't have an idea on how to generate high-quality answers for them. We are looking for solutions that
|
| 23 |
+
use open-source models with openly available licenses.
|
| 24 |
+
|
| 25 |
+
We have created a total of 20,000 instructions already and are generating more. We've opened up a preview
|
| 26 |
+
of 50 of them in this link: [HAERAE-Math Samples](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
|
| 27 |
+
|
| 28 |
+
For those who generate answers for the 50 and share the methodology/results with us, we'll share the
|
| 29 |
+
remaining instructions and credit for the resulting dataset.
|
| 30 |
+
""",
|
| 31 |
+
"example_title": "Example Question",
|
| 32 |
+
"how_to_title": "How to Participate",
|
| 33 |
+
"how_to": """
|
| 34 |
+
1. Access the 50 sample questions from the provided Hugging Face dataset link.
|
| 35 |
+
2. Generate high-quality answers for these questions using open-source models.
|
| 36 |
+
3. Document your methodology and results.
|
| 37 |
+
4. Share your findings with us through [contact information or submission form].
|
| 38 |
+
5. If your approach is promising, we'll provide access to the full dataset of 20,000 instructions.
|
| 39 |
+
6. Collaborate with us to refine and improve the answer generation process.
|
| 40 |
+
7. Receive credit as a contributor to the final HAERAE-Math dataset.
|
| 41 |
+
""",
|
| 42 |
+
"why_title": "Why Participate?",
|
| 43 |
+
"why": """
|
| 44 |
+
- Contribute to advancing Korean language model research
|
| 45 |
+
- Gain access to a large, high-quality dataset of math instructions
|
| 46 |
+
- Collaborate with HAERAE researchers
|
| 47 |
+
- Receive recognition in the field of NLP and math education
|
| 48 |
+
- Potential for co-authorship on related publications
|
| 49 |
+
""",
|
| 50 |
+
"contact_title": "Contact Us",
|
| 51 |
+
"contact": """
|
| 52 |
+
For more information or to submit your results, please contact us at:
|
| 53 |
+
[spthsrbwls123@yonsei.ac.kr](spthsrbwls123@yonsei.ac.kr)
|
| 54 |
+
""",
|
| 55 |
+
"sidebar_title": "About HAERAE",
|
| 56 |
+
"sidebar_content": """
|
| 57 |
+
HAERAE is a non-profit research lab dedicated to advancing the field of
|
| 58 |
+
Korean language model interpretability and evaluation. Our work focuses on
|
| 59 |
+
creating insightful benchmarks and tools to push the boundaries of NLP research.
|
| 60 |
+
"""
|
| 61 |
+
},
|
| 62 |
+
"ํ๊ตญ์ด": {
|
| 63 |
+
"title": "HAERAE ๊ณต๊ฐ ์ฐ๊ตฌ ์ง๋ฌธ",
|
| 64 |
+
"intro": """
|
| 65 |
+
HAERAE๋ ํ๊ตญ์ด ์ธ์ด ๋ชจ๋ธ์ ํด์ ๊ฐ๋ฅ์ฑ๊ณผ ํ๊ฐ์ ์ค์ ์ ๋ ๋น์๋ฆฌ ์ฐ๊ตฌ์์
๋๋ค.
|
| 66 |
+
์ฐ๋ฆฌ์ ๋ฏธ์
์ ํต์ฐฐ๋ ฅ ์๋ ๋ฒค์น๋งํฌ์ ๋๊ตฌ๋ฅผ ํตํด ์ด ๋ถ์ผ๋ฅผ ๋ฐ์ ์ํค๋ ๊ฒ์
๋๋ค. ๋ค์์ ์ฐ๋ฆฌ ํ๋ก์ ํธ์ ๊ฐ์์
๋๋ค.
|
| 67 |
+
|
| 68 |
+
๋๋ถ๋ถ์ ํ๋ก์ ํธ๋ฅผ ๋ด๋ถ์ ์ผ๋ก ์ํํด ์์ง๋ง, ํด๊ฒฐํ๊ธฐ ์ด๋ ค์ด ๋ฌธ์ ๋ค์ ๋ํด์๋
|
| 69 |
+
์คํ ์์ค ์ปค๋ฎค๋ํฐ์ ๋์์ ๋ฐ๊ณ ์ ๊ณต๊ฐํ ๊ณํ์
๋๋ค.
|
| 70 |
+
""",
|
| 71 |
+
"challenge_title": "HAERAE-Math ์ฑ๋ฆฐ์ง",
|
| 72 |
+
"challenge_desc": """
|
| 73 |
+
์ค๋ ์ฐ๋ฆฌ๋ ์ฒซ ๋ฒ์งธ ์ฑ๋ฆฐ์ง์ธ HAERAE-Math๋ฅผ ์๊ฐํฉ๋๋ค. ์ฐ๋ฆฌ๋ ์ํ์ ๊ดํ ๊ณ ํ์ง ์ง์๋ฌธ์ ๋ง๋ค์์ง๋ง
|
| 74 |
+
์ด์ ๋ํ ๊ณ ํ์ง ๋ต๋ณ์ ์์ฑํ๋ ๋ฐฉ๋ฒ์ ๋ํ ์์ด๋์ด๊ฐ ์์ต๋๋ค. ์ฐ๋ฆฌ๋ ๊ณต๊ฐ์ ์ผ๋ก ์ฌ์ฉ ๊ฐ๋ฅํ ๋ผ์ด์ ์ค๋ฅผ ๊ฐ์ง
|
| 75 |
+
์คํ ์์ค ๋ชจ๋ธ์ ์ฌ์ฉํ๋ ์๋ฃจ์
์ ์ฐพ๊ณ ์์ต๋๋ค.
|
| 76 |
+
|
| 77 |
+
์ฐ๋ฆฌ๋ ์ด๋ฏธ ์ด 20,000๊ฐ์ ์ง์๋ฌธ์ ๋ง๋ค์๊ณ ๋ ๋ง์ด ์์ฑํ๊ณ ์์ต๋๋ค. ์ฐ๋ฆฌ๋ ์ด ์ค 50๊ฐ์ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ
|
| 78 |
+
๋ค์ ๋งํฌ์์ ๊ณต๊ฐํ์ต๋๋ค: [HAERAE-Math ์ํ](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
|
| 79 |
+
|
| 80 |
+
50๊ฐ์ ๋ํ ๋ต๋ณ์ ์์ฑํ๊ณ ๋ฐฉ๋ฒ๋ก /๊ฒฐ๊ณผ๋ฅผ ์ฐ๋ฆฌ์ ๊ณต์ ํ๋ ๋ถ๋ค์๊ฒ๋ ๋๋จธ์ง ์ง์๋ฌธ์ ๊ณต์ ํ๊ณ
|
| 81 |
+
์ต์ข
๋ฐ์ดํฐ์
์ ๋ํ ํฌ๋ ๋ง์ ๊ณต์ ํ ๊ฒ์
๋๋ค.
|
| 82 |
+
""",
|
| 83 |
+
"example_title": "์์ ์ง๋ฌธ",
|
| 84 |
+
"how_to_title": "์ฐธ์ฌ ๋ฐฉ๋ฒ",
|
| 85 |
+
"how_to": """
|
| 86 |
+
1. ์ ๊ณต๋ Hugging Face ๋ฐ์ดํฐ์
๋งํฌ์์ 50๊ฐ์ ์ํ ์ง๋ฌธ์ ์ ๊ทผํฉ๋๋ค.
|
| 87 |
+
2. ์คํ ์์ค ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ ์ด ์ง๋ฌธ๋ค์ ๋ํ ๊ณ ํ์ง ๋ต๋ณ์ ์์ฑํฉ๋๋ค.
|
| 88 |
+
3. ๋ฐฉ๋ฒ๋ก ๊ณผ ๊ฒฐ๊ณผ๋ฅผ ๋ฌธ์ํํฉ๋๋ค.
|
| 89 |
+
4. [์ฐ๋ฝ์ฒ ์ ๋ณด ๋๋ ์ ์ถ ์์]์ ํตํด ๊ทํ์ ๊ฒฐ๊ณผ๋ฅผ ์ฐ๋ฆฌ์ ๊ณต์ ํฉ๋๋ค.
|
| 90 |
+
5. ๊ทํ์ ์ ๊ทผ ๋ฐฉ์์ด ์ ๋งํ๋ค๋ฉด, 20,000๊ฐ์ ์ ์ฒด ์ง์๋ฌธ ๋ฐ์ดํฐ์
์ ๋ํ ์ ๊ทผ ๊ถํ์ ์ ๊ณตํ ๊ฒ์
๋๋ค.
|
| 91 |
+
6. ๋ต๋ณ ์์ฑ ๊ณผ์ ์ ๊ฐ์ ํ๊ณ ๋ฐ์ ์ํค๊ธฐ ์ํด ์ฐ๋ฆฌ์ ํ๋ ฅํฉ๋๋ค.
|
| 92 |
+
7. ์ต์ข
HAERAE-Math ๋ฐ์ดํฐ์
์ ๊ธฐ์ฌ์๋ก ์ธ์ ๋ฐ์ต๋๋ค.
|
| 93 |
+
""",
|
| 94 |
+
"why_title": "์ ์ฐธ์ฌํด์ผ ํ๋์?",
|
| 95 |
+
"why": """
|
| 96 |
+
- ํ๊ตญ์ด ์ธ์ด ๋ชจ๋ธ ์ฐ๊ตฌ ๋ฐ์ ์ ๊ธฐ์ฌ
|
| 97 |
+
- ๋๊ท๋ชจ์ ๊ณ ํ์ง ์ํ ์ง์๋ฌธ ๋ฐ์ดํฐ์
์ ์ ๊ทผ
|
| 98 |
+
- HAERAE ์ฐ๊ตฌ์๋ค๊ณผ ํ๋ ฅ
|
| 99 |
+
- NLP ๋ฐ ์ํ ๊ต์ก ๋ถ์ผ์์ ์ธ์ ๋ฐ์ ๊ธฐํ
|
| 100 |
+
- ๊ด๋ จ ์ถํ๋ฌผ์ ๊ณต๋ ์ ์๊ฐ ๋ ๊ฐ๋ฅ์ฑ
|
| 101 |
+
""",
|
| 102 |
+
"contact_title": "์ฐ๋ฝ์ฒ",
|
| 103 |
+
"contact": """
|
| 104 |
+
๋ ๋ง์ ์ ๋ณด๋ฅผ ์ํ์๊ฑฐ๋ ๊ฒฐ๊ณผ๋ฅผ ์ ์ถํ๋ ค๋ฉด ๋ค์ ์ฐ๋ฝ์ฒ๋ก ๋ฌธ์ํด ์ฃผ์ธ์:
|
| 105 |
+
[spthsrbwls123@yonsei.ac.kr](spthsrbwls123@yonsei.ac.kr)
|
| 106 |
+
""",
|
| 107 |
+
"sidebar_title": "HAERAE ์๊ฐ",
|
| 108 |
+
"sidebar_content": """
|
| 109 |
+
HAERAE๋ ํ๊ตญ์ด ์ธ์ด ๋ชจ๋ธ์ ํด์ ๊ฐ๋ฅ์ฑ๊ณผ ํ๊ฐ ๋ถ์ผ๋ฅผ ๋ฐ์ ์ํค๋ ๋ฐ ์ ๋
ํ๋ ๋น์๋ฆฌ ์ฐ๊ตฌ์์
๋๋ค.
|
| 110 |
+
์ฐ๋ฆฌ์ ์ฐ๊ตฌ๋ NLP ์ฐ๊ตฌ์ ๊ฒฝ๊ณ๋ฅผ ๋ํ๊ธฐ ์ํ ํต์ฐฐ๋ ฅ ์๋ ๋ฒค์น๋งํฌ์ ๋๊ตฌ๋ฅผ ๋ง๋๋ ๋ฐ ์ค์ ์ ๋ก๋๋ค.
|
| 111 |
+
"""
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
# Main content
|
| 116 |
+
st.title(content[lang]["title"])
|
| 117 |
+
|
| 118 |
+
st.write(content[lang]["intro"])
|
| 119 |
+
|
| 120 |
+
st.header(content[lang]["challenge_title"])
|
| 121 |
+
|
| 122 |
+
st.write(content[lang]["challenge_desc"])
|
| 123 |
+
|
| 124 |
+
st.subheader(content[lang]["example_title"])
|
| 125 |
|
| 126 |
example_question = """
|
| 127 |
ํ๊ตญ์ ๋ณด์ ์ ๋ฌธ๊ฐ๊ฐ ๊ณ ๋ํ๋ ๋ฐ์ดํฐ ๋ณดํธ ์์คํ
์ ๊ฐ๋ฐํ๊ณ ์์ต๋๋ค. ์ด ์์คํ
์ 3์ฐจ์ ๊ธฐํํ์ ์ ๊ธ ๋ฉ์ปค๋์ฆ์ ์ฌ์ฉํ๋๋ฐ, ์ ๊ธ ์ฅ์น๋ ์๋ฟ ๋ชจ์์ผ๋ก ๋์ด ์๊ณ , ๋ฐ๋ฉด์ ๋ฐ์ง๋ฆ์ 6cm, ๋์ด๋ 8cm์
๋๋ค. ์ด ์๋ฟ ๋ชจ์์ ์ ๊ธ ์ฅ์น์๋ ์ํต ๋ชจ์์ ์ด์ ๊ฐ ๋ฑ ๋ง๊ฒ ๋ค์ด๊ฐ๊ฒ ์ค๊ณ๋์ด ์์ต๋๋ค.
|
|
|
|
| 145 |
|
| 146 |
st.code(example_question, language="markdown")
|
| 147 |
|
| 148 |
+
st.header(content[lang]["how_to_title"])
|
| 149 |
+
|
| 150 |
+
st.write(content[lang]["how_to"])
|
| 151 |
+
|
| 152 |
+
st.header(content[lang]["why_title"])
|
| 153 |
+
|
| 154 |
+
st.write(content[lang]["why"])
|
| 155 |
+
|
| 156 |
+
st.header(content[lang]["contact_title"])
|
| 157 |
+
|
| 158 |
+
st.write(content[lang]["contact"])
|
| 159 |
+
|
| 160 |
+
st.sidebar.title(content[lang]["sidebar_title"])
|
| 161 |
+
st.sidebar.info(content[lang]["sidebar_content"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|