Update exp2-main.py
Browse files- exp2-main.py +53 -14
exp2-main.py
CHANGED
|
@@ -208,26 +208,65 @@ def generate_rating_prompt(section_title: str) -> str:
|
|
| 208 |
def instructions_1():
|
| 209 |
st.title("Experiment 2: LLM Scene Abstraction Evaluation")
|
| 210 |
st.header("π Instructions (1/2)")
|
| 211 |
-
st.write(f"""
|
| 212 |
-
Welcome to Experiment 2! Hereβs how it works:
|
| 213 |
|
| 214 |
-
- You will read a sentence that contains a specific <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'>**keyword**</span>.
|
| 215 |
-
- You will then see <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'>**scene-level information about the keyword** in the given situation</span>, generated by a large language model (LLM).
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
<br>
|
| 222 |
-
Your task is to **evaluate each section** based on how well it reflects the information conveyed in the original sentence.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
<br>
|
|
|
|
| 230 |
If you have questions or feedback, please feel free to let us know via email.
|
|
|
|
| 231 |
<br><br>
|
| 232 |
|
| 233 |
""", unsafe_allow_html=True)
|
|
|
|
| 208 |
def instructions_1():
|
| 209 |
st.title("Experiment 2: LLM Scene Abstraction Evaluation")
|
| 210 |
st.header("π Instructions (1/2)")
|
| 211 |
+
# st.write(f"""
|
| 212 |
+
# Welcome to Experiment 2! Hereβs how it works:
|
| 213 |
|
| 214 |
+
# - You will read a sentence that contains a specific <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'>**keyword**</span>.
|
| 215 |
+
# - You will then see <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'>**scene-level information about the keyword** in the given situation</span>, generated by a large language model (LLM).
|
| 216 |
|
| 217 |
+
# - The information is organized into three sections:
|
| 218 |
+
# 1. **Engaged Events** β What is happening to the keyword in this situation?
|
| 219 |
+
# 2. **Generalizable Properties** β What context-relevant properties of the keyword are revealed through this situation?
|
| 220 |
+
# 3. **Evoked Emotions** β What emotions are associated with the keyword in this scene, and why?
|
| 221 |
+
# <br>
|
| 222 |
+
# Your task is to **evaluate each section** based on how well it reflects the information conveyed in the original sentence.
|
| 223 |
+
|
| 224 |
+
# - For each section, please rate the following dimensions on a 1β5 scale:
|
| 225 |
+
# - **Accuracy** β How accurate is it? Is the content factually consistent with the sentence?
|
| 226 |
+
# - **Completeness** β How complete and rich is it? Does it fully capture the relevant aspects of the keyword?
|
| 227 |
+
# - **Interpretability** β How interpretable is it? Is it easy to understand?
|
| 228 |
|
| 229 |
+
# <br>
|
| 230 |
+
# If you have questions or feedback, please feel free to let us know via email.
|
| 231 |
+
# <br><br>
|
| 232 |
+
|
| 233 |
+
# """, unsafe_allow_html=True)
|
| 234 |
+
st.write(f"""
|
| 235 |
+
<p style='font-size:18px;'>
|
| 236 |
+
Welcome to Experiment 2! Hereβs how it works:
|
| 237 |
+
</p>
|
| 238 |
+
|
| 239 |
+
<ul style='font-size:18px;'>
|
| 240 |
+
<li>You will read a sentence that contains a specific <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'><b>keyword</b></span>.</li>
|
| 241 |
+
<li>You will then see <span style='color:{HIGHLIGHT_COLOR}; font-weight:500;'><b>scene-level information about the keyword</b> in the given situation</span>, generated by a large language model (LLM).</li>
|
| 242 |
+
</ul>
|
| 243 |
+
|
| 244 |
+
<p style='font-size:18px;'>
|
| 245 |
+
The information is organized into three sections:
|
| 246 |
+
</p>
|
| 247 |
+
<ul style='font-size:18px;'>
|
| 248 |
+
<li><b>Engaged Events</b> β What is happening to the keyword in this situation?</li>
|
| 249 |
+
<li><b>Generalizable Properties</b> β What context-relevant properties of the keyword are revealed through this situation?</li>
|
| 250 |
+
<li><b>Evoked Emotions</b> β What emotions are associated with the keyword in this scene, and why?</li>
|
| 251 |
+
</ul>
|
| 252 |
+
<br>
|
| 253 |
+
<p style='font-size:18px;'>
|
| 254 |
+
Your task is to <b>evaluate each section</b> based on how well it reflects the information conveyed in the original sentence.
|
| 255 |
+
</p>
|
| 256 |
+
|
| 257 |
+
<p style='font-size:18px;'>
|
| 258 |
+
For each section, please rate the following dimensions on a 1β5 scale:
|
| 259 |
+
</p>
|
| 260 |
+
<ul style='font-size:18px;'>
|
| 261 |
+
<li><b>Accuracy</b> β How accurate is it? Is the content factually consistent with the sentence?</li>
|
| 262 |
+
<li><b>Completeness</b> β How complete and rich is it? Does it fully capture the relevant aspects of the keyword?</li>
|
| 263 |
+
<li><b>Interpretability</b> β How interpretable is it? Is it easy to understand?</li>
|
| 264 |
+
</ul>
|
| 265 |
|
| 266 |
<br>
|
| 267 |
+
<p style='font-size:18px;'>
|
| 268 |
If you have questions or feedback, please feel free to let us know via email.
|
| 269 |
+
</p>
|
| 270 |
<br><br>
|
| 271 |
|
| 272 |
""", unsafe_allow_html=True)
|