Spaces:
Sleeping
Sleeping
File size: 16,513 Bytes
078d100 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 | import streamlit as st
import json
import re
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
import subprocess
client = OpenAI(
api_key=os.getenv("API_KEY"),
base_url=os.getenv("GENERATOR_BASE_URL")
)
def ensure_spacy_model():
"""Ensure the required spaCy model is installed."""
model_name = "en_core_web_sm"
try:
spacy.load(model_name)
except OSError:
print(f"Downloading spaCy model: {model_name}...")
subprocess.run(["python", "-m", "spacy", "download", model_name], check=True)
def extract_key_components(rubric_text):
"""Dynamically extract key terms from rubric using NLP."""
ensure_spacy_model() # Ensure the model is downloaded
nlp = spacy.load("en_core_web_sm")
doc = nlp(rubric_text)
entities = set(ent.text.lower() for ent in doc.ents)
noun_chunks = set(chunk.text.lower() for chunk in doc.noun_chunks)
vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words='english')
tfidf_matrix = vectorizer.fit_transform([rubric_text])
feature_names = vectorizer.get_feature_names_out()
tfidf_scores = zip(feature_names, tfidf_matrix.toarray()[0])
top_terms = [term for term, score in sorted(tfidf_scores, key=lambda x: x[1], reverse=True)[:15]]
return list(entities.union(noun_chunks).union(top_terms))
def evaluate_student_answer(student_answer, rubric):
def preprocess(text):
text = text.lower()
text = re.sub(r'[^\w\s]', '', text)
return text.strip()
cleaned_answer = preprocess(student_answer)
features = {}
rubric_text = f"{rubric['criteria_for_correct_answer']} {rubric['common_misconceptions']}"
key_terms = extract_key_components(rubric_text)
features['keyword_coverage'] = sum(1 for term in key_terms if term in cleaned_answer)/len(key_terms)
doc = spacy.load("en_core_web_sm")(student_answer)
features['key_entities_present'] = len([ent for ent in doc.ents if ent.text.lower() in key_terms])
prompt = f"""Evaluate this answer against the rubric. Consider:
- Keyword matches: {features.get('keyword_coverage', 0)*100:.1f}%
- Key entities found: {features.get('key_entities_present', 0)}
Rubric Criteria:
{rubric['criteria_for_correct_answer']}
Common Misconceptions:
{rubric['common_misconceptions']}
Student Answer:
{student_answer}
Return JSON with score (0-10), breakdown - (accuracy, relevance and completeness), and feedback strictly."""
try:
response = client.chat.completions.create(
model=os.getenv("MODEL_NAME"),
messages=[{"role": "user", "content": prompt}],
temperature=0.1,
response_format={"type": "json_object"}
)
llm_output = response.choices[0].message.content
return json.loads(llm_output)
except Exception as e:
print(f"API Error: {str(e)}")
return {
"score": 0,
"breakdown": {"accuracy": 0, "relevance": 0, "completeness": 0},
"feedback": "Evaluation service unavailable"
}
# -------------------- Streamlit UI --------------------
st.set_page_config(page_title="Answer Evaluation System", layout="wide", page_icon="π")
st.markdown("""
<style>
.stMainBlockContainer {
padding: 20px 50px;
}
.stAppHeader {
position: relative;
height: 0;
}
</style>
""", unsafe_allow_html=True)
st.markdown("""
<style>
.main {padding: 2rem 3rem;}
.header {color: #2b3b52; border-bottom: 2px solid #eee;}
.stTextArea textarea {border: 1px solid #e1e4e8 !important;}
.score-container {background: #f8f9fa; border-radius: 10px; padding: 25px; margin: 20px 0;}
.feedback-box {background: #fffbe6; border-left: 4px solid #ffd700; border-radius: 5px; padding: 20px; margin: 25px 0;}
.metric-box {background: white; border-radius: 8px; padding: 20px; margin: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);}
.metric-box h1 {color: #2b3b52; margin: 5px 0;}
.stButton button {transition: all 0.3s ease;}
.stButton button:hover {transform: translateY(-2px);}
</style>
""", unsafe_allow_html=True)
EXAMPLES = {
"Select an example...": {"rubric": {}, "answer": ""},
"LDA Analysis": {
"rubric": {
"key_concept_assessed": "Understanding of Linear Discriminant Analysis (LDA) as a supervised dimensionality reduction technique and its application in pattern recognition",
"criteria_for_correct_answer": (
"A complete answer should:\n"
"1. Differentiate LDA from PCA in terms of supervision and objective\n"
"2. Explain the mathematical goal of maximizing between-class variance while minimizing within-class variance\n"
"3. Describe the assumption of normal distribution and equal class covariance matrices\n"
"4. Provide real-world applications in fields like bioinformatics or facial recognition"
),
"common_misconceptions": (
"1. Confusing LDA with Latent Dirichlet Allocation (same acronym)\n"
"2. Believing LDA is primarily a classification algorithm rather than dimensionality reduction\n"
"3. Assuming LDA requires no normality assumptions\n"
"4. Thinking LDA and PCA are interchangeable for unsupervised problems"
),
"cognitive_skill_tested": (
"Analysis: Requires breaking down LDA's mathematical framework\n"
"Evaluation: Comparing/contrasting with similar techniques like PCA\n"
"Application: Demonstrating understanding through practical use cases"
)
},
"answer": (
"Linear Discriminant Analysis (LDA) is a supervised dimensionality reduction technique that maximizes class separability by:\n\n"
"1. Calculating between-class and within-class scatter matrices\n"
"2. Finding linear combinations of features that maximize Fisher's ratio: (between-class variance)/(within-class variance)\n"
"3. Assuming multivariate normal distributions with equal covariance across classes\n\n"
"Key applications include:\n"
"- Preprocessing for classification tasks in speech recognition\n"
"- Gene expression analysis in bioinformatics\n"
"- Feature extraction in computer vision systems\n\n"
"Unlike PCA which maximizes variance without class information, LDA explicitly uses class labels to find discriminative directions."
)
},
"Climate Change Basics": {
"rubric": {
"key_concept_assessed": "Understanding of anthropogenic climate change mechanisms and evidence-based reasoning",
"criteria_for_correct_answer": (
"An exemplary response must:\n"
"1. Identify main greenhouse gases (COβ, CHβ, NβO) and their sources\n"
"2. Explain the enhanced greenhouse effect using radiative forcing\n"
"3. Distinguish between natural climate variability and anthropogenic forcing\n"
"4. Reference IPCC assessment reports and paleoclimate evidence"
),
"common_misconceptions": (
"1. Equating ozone depletion with climate change\n"
"2. Attributing current warming solely to solar cycles\n"
"3. Confusing weather variability with long-term climate trends\n"
"4. Overemphasizing natural COβ sources while ignoring anthropogenic contributions"
),
"cognitive_skill_tested": (
"Comprehension: Interpreting climate proxies and modern observations\n"
"Evaluation: Assessing credibility of different evidence types\n"
"Synthesis: Integrating physical, chemical, and biological data"
)
},
"answer": (
"Modern climate change is primarily driven by human activities through:\n\n"
"1. Fossil fuel combustion (75% of COβ emissions)\n"
"2. Agricultural practices (40% of CHβ from livestock and rice paddies)\n"
"3. Deforestation reducing carbon sinks (12-17% of anthropogenic emissions)\n\n"
"Key evidence includes:\n"
"- 50% increase in atmospheric COβ since 1750 (415 ppm vs 280 ppm pre-industrial)\n"
"- Isotopic fingerprint showing fossil fuel origin of COβ increase\n"
"- Stratospheric cooling/tropospheric warming pattern characteristic of greenhouse forcing\n"
"- Observed sea level rise (3.7 mm/yr) matching model predictions\n\n"
"Natural factors like solar irradiance and volcanic activity cannot explain the current warming trend (IPCC AR6)."
)
},
"Market Equilibrium": {
"rubric": {
"key_concept_assessed": "Understanding of price mechanism and market adjustment processes",
"criteria_for_correct_answer": (
"A strong answer should:\n"
"1. Define equilibrium price/quantity using supply-demand curves\n"
"2. Analyze effects of price floors/ceilings with real examples\n"
"3. Explain elasticity's role in tax incidence\n"
"4. Distinguish between short-run and long-run adjustments"
),
"common_misconceptions": (
"1. Believing equilibrium implies no transactions\n"
"2. Assuming price controls benefit all consumers/producers\n"
"3. Confusing movement along curves with shift of curves\n"
"4. Thinking elasticity is constant across price ranges"
),
"cognitive_skill_tested": (
"Application: Using graphical models to predict market outcomes\n"
"Evaluation: Assessing welfare impacts of policy interventions\n"
"Synthesis: Connecting abstract models to real-world markets"
)
},
"answer": (
"Market equilibrium occurs when:\n\n"
"Qd(P) = Qs(P)\n\n"
"Key concepts:\n"
"1. Price ceiling (e.g., rent control) creates shortages when below equilibrium\n"
"2. Price floor (e.g., minimum wage) creates surpluses when above equilibrium\n"
"3. Tax incidence depends on relative elasticity - inelastic side bears more burden\n\n"
"Adjustment process:\n"
"- Short-run: Inventory changes and queuing\n"
"- Long-run: Entry/exit of firms and technological adaptation\n\n"
"Example: Gasoline taxes largely borne by consumers due to inelastic demand."
)
}
}
def main():
# Session State Initialization
if 'rubric' not in st.session_state:
st.session_state.rubric = {}
if 'answer' not in st.session_state:
st.session_state.answer = ""
# Back Button
if st.button("β Back to Dashboard", key="back_btn"):
st.switch_page("app.py")
# Page Header
st.markdown("<h1 class='header'>π Automated Answer Evaluation System</h1>", unsafe_allow_html=True)
# Example Selector
selected_example = st.selectbox("Load example scenario:", options=list(EXAMPLES.keys()))
# Handle Example Selection
if selected_example == "Select an example...":
st.session_state.rubric = {}
st.session_state.answer = ""
else:
example = EXAMPLES[selected_example]
st.session_state.rubric = example["rubric"]
st.session_state.answer = example["answer"]
# Rubric Input Section
with st.expander("π― Rubric Input", expanded=True):
col1, col2 = st.columns(2)
with col1:
key_concept = st.text_area(
"Key Concept Assessed",
value=st.session_state.rubric.get("key_concept_assessed", ""),
placeholder="What key concept is being assessed?",
height=150
)
criteria = st.text_area(
"Criteria for Correct Answer",
value=st.session_state.rubric.get("criteria_for_correct_answer", ""),
placeholder="What defines a correct answer?",
height=150
)
with col2:
misconceptions = st.text_area(
"Common Misconceptions",
value=st.session_state.rubric.get("common_misconceptions", ""),
placeholder="What common errors should be watched for?",
height=150
)
cognitive_skill = st.text_area(
"Cognitive Skill Tested",
value=st.session_state.rubric.get("cognitive_skill_tested", ""),
placeholder="Which cognitive skills are being tested?",
height=150
)
# Student Answer Section
student_answer = st.text_area(
"π Student Answer",
value=st.session_state.answer,
placeholder="Paste the student's answer here...",
height=300
)
# Action Buttons
col1, col2, col3 = st.columns([1,1,2])
with col1:
if st.button("π§Ή Clear All", use_container_width=True):
st.session_state.rubric = {}
st.session_state.answer = ""
st.rerun()
with col2:
evaluate_btn = st.button("π Evaluate Answer", use_container_width=True)
# Evaluation Logic
if evaluate_btn:
if not all([key_concept, criteria, misconceptions, cognitive_skill]) or not student_answer:
st.warning("β Please complete all rubric fields and provide a student answer!")
return
rubric = {
"key_concept_assessed": key_concept,
"criteria_for_correct_answer": criteria,
"common_misconceptions": misconceptions,
"cognitive_skill_tested": cognitive_skill
}
with st.spinner("π Analyzing answer..."):
try:
result = evaluate_student_answer(student_answer, rubric)
# Results Display
st.markdown("---")
st.markdown("<h2 style='color: #2b3b52'>Evaluation Results</h2>", unsafe_allow_html=True)
# Score Container
with st.container():
st.markdown(f"""
<div class='score-container'>
<h2>Overall Score: {result.get('score', 0)}/10</h2>
</div>
""", unsafe_allow_html=True)
# Metrics
cols = st.columns(3)
metrics = result.get('breakdown', {})
with cols[0]:
st.markdown(f"""
<div class='metric-box'>
<h4>π Accuracy</h4>
<h1>{metrics.get('accuracy', 0)}</h1>
</div>
""", unsafe_allow_html=True)
with cols[1]:
st.markdown(f"""
<div class='metric-box'>
<h4>β
Completeness</h4>
<h1>{metrics.get('completeness', 0)}</h1>
</div>
""", unsafe_allow_html=True)
with cols[2]:
st.markdown(f"""
<div class='metric-box'>
<h4>π― Relevance</h4>
<h1>{metrics.get('relevance', 0)}</h1>
</div>
""", unsafe_allow_html=True)
# Feedback
st.markdown(f"""
<div class='feedback-box'>
<h4>π Detailed Feedback</h4>
<p>{result.get('feedback', 'No feedback available')}</p>
</div>
""", unsafe_allow_html=True)
# Raw JSON
with st.expander("View Raw JSON Output"):
st.json(result)
except Exception as e:
st.error(f"π¨ Evaluation Error: {str(e)}")
if __name__ == "__main__":
main()
|