File size: 16,513 Bytes
078d100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
import streamlit as st
import json
import re
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
import subprocess


client = OpenAI(
    api_key=os.getenv("API_KEY"),
    base_url=os.getenv("GENERATOR_BASE_URL")
)

def ensure_spacy_model():
    """Ensure the required spaCy model is installed."""
    model_name = "en_core_web_sm"
    try:
        spacy.load(model_name)
    except OSError:
        print(f"Downloading spaCy model: {model_name}...")
        subprocess.run(["python", "-m", "spacy", "download", model_name], check=True)

def extract_key_components(rubric_text):
    """Dynamically extract key terms from rubric using NLP."""
    ensure_spacy_model()  # Ensure the model is downloaded
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(rubric_text)

    entities = set(ent.text.lower() for ent in doc.ents)
    noun_chunks = set(chunk.text.lower() for chunk in doc.noun_chunks)

    vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([rubric_text])
    feature_names = vectorizer.get_feature_names_out()
    tfidf_scores = zip(feature_names, tfidf_matrix.toarray()[0])
    top_terms = [term for term, score in sorted(tfidf_scores, key=lambda x: x[1], reverse=True)[:15]]

    return list(entities.union(noun_chunks).union(top_terms))

def evaluate_student_answer(student_answer, rubric):
    def preprocess(text):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        return text.strip()

    cleaned_answer = preprocess(student_answer)
    features = {}

    rubric_text = f"{rubric['criteria_for_correct_answer']} {rubric['common_misconceptions']}"
    key_terms = extract_key_components(rubric_text)
    features['keyword_coverage'] = sum(1 for term in key_terms if term in cleaned_answer)/len(key_terms)

    doc = spacy.load("en_core_web_sm")(student_answer)
    features['key_entities_present'] = len([ent for ent in doc.ents if ent.text.lower() in key_terms])

    prompt = f"""Evaluate this answer against the rubric. Consider:
    - Keyword matches: {features.get('keyword_coverage', 0)*100:.1f}%
    - Key entities found: {features.get('key_entities_present', 0)}

    Rubric Criteria:
    {rubric['criteria_for_correct_answer']}

    Common Misconceptions:
    {rubric['common_misconceptions']}

    Student Answer:
    {student_answer}

    Return JSON with score (0-10), breakdown - (accuracy, relevance and completeness), and feedback strictly."""
    
    try:
        response = client.chat.completions.create(
            model=os.getenv("MODEL_NAME"),
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
            response_format={"type": "json_object"}
        )
        llm_output = response.choices[0].message.content
        return json.loads(llm_output)
    except Exception as e:
        print(f"API Error: {str(e)}")
        return {
            "score": 0,
            "breakdown": {"accuracy": 0, "relevance": 0, "completeness": 0},
            "feedback": "Evaluation service unavailable"
        }

# -------------------- Streamlit UI --------------------
st.set_page_config(page_title="Answer Evaluation System", layout="wide", page_icon="πŸ“˜")


st.markdown("""
    <style>
            .stMainBlockContainer {
            padding: 20px 50px;
        }
        .stAppHeader {
            position: relative;
            height: 0;
            }
    </style>
""", unsafe_allow_html=True)


st.markdown("""
    <style>
    .main {padding: 2rem 3rem;}
    .header {color: #2b3b52; border-bottom: 2px solid #eee;}
    .stTextArea textarea {border: 1px solid #e1e4e8 !important;}
    .score-container {background: #f8f9fa; border-radius: 10px; padding: 25px; margin: 20px 0;}
    .feedback-box {background: #fffbe6; border-left: 4px solid #ffd700; border-radius: 5px; padding: 20px; margin: 25px 0;}
    .metric-box {background: white; border-radius: 8px; padding: 20px; margin: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);}
    .metric-box h1 {color: #2b3b52; margin: 5px 0;}
    .stButton button {transition: all 0.3s ease;}
    .stButton button:hover {transform: translateY(-2px);}
    </style>
""", unsafe_allow_html=True)

EXAMPLES = {
    "Select an example...": {"rubric": {}, "answer": ""},
    
    "LDA Analysis": {
        "rubric": {
            "key_concept_assessed": "Understanding of Linear Discriminant Analysis (LDA) as a supervised dimensionality reduction technique and its application in pattern recognition",
            "criteria_for_correct_answer": (
                "A complete answer should:\n"
                "1. Differentiate LDA from PCA in terms of supervision and objective\n"
                "2. Explain the mathematical goal of maximizing between-class variance while minimizing within-class variance\n"
                "3. Describe the assumption of normal distribution and equal class covariance matrices\n"
                "4. Provide real-world applications in fields like bioinformatics or facial recognition"
            ),
            "common_misconceptions": (
                "1. Confusing LDA with Latent Dirichlet Allocation (same acronym)\n"
                "2. Believing LDA is primarily a classification algorithm rather than dimensionality reduction\n"
                "3. Assuming LDA requires no normality assumptions\n"
                "4. Thinking LDA and PCA are interchangeable for unsupervised problems"
            ),
            "cognitive_skill_tested": (
                "Analysis: Requires breaking down LDA's mathematical framework\n"
                "Evaluation: Comparing/contrasting with similar techniques like PCA\n"
                "Application: Demonstrating understanding through practical use cases"
            )
        },
        "answer": (
            "Linear Discriminant Analysis (LDA) is a supervised dimensionality reduction technique that maximizes class separability by:\n\n"
            "1. Calculating between-class and within-class scatter matrices\n"
            "2. Finding linear combinations of features that maximize Fisher's ratio: (between-class variance)/(within-class variance)\n"
            "3. Assuming multivariate normal distributions with equal covariance across classes\n\n"
            "Key applications include:\n"
            "- Preprocessing for classification tasks in speech recognition\n"
            "- Gene expression analysis in bioinformatics\n"
            "- Feature extraction in computer vision systems\n\n"
            "Unlike PCA which maximizes variance without class information, LDA explicitly uses class labels to find discriminative directions."
        )
    },
    
    "Climate Change Basics": {
        "rubric": {
            "key_concept_assessed": "Understanding of anthropogenic climate change mechanisms and evidence-based reasoning",
            "criteria_for_correct_answer": (
                "An exemplary response must:\n"
                "1. Identify main greenhouse gases (COβ‚‚, CHβ‚„, Nβ‚‚O) and their sources\n"
                "2. Explain the enhanced greenhouse effect using radiative forcing\n"
                "3. Distinguish between natural climate variability and anthropogenic forcing\n"
                "4. Reference IPCC assessment reports and paleoclimate evidence"
            ),
            "common_misconceptions": (
                "1. Equating ozone depletion with climate change\n"
                "2. Attributing current warming solely to solar cycles\n"
                "3. Confusing weather variability with long-term climate trends\n"
                "4. Overemphasizing natural COβ‚‚ sources while ignoring anthropogenic contributions"
            ),
            "cognitive_skill_tested": (
                "Comprehension: Interpreting climate proxies and modern observations\n"
                "Evaluation: Assessing credibility of different evidence types\n"
                "Synthesis: Integrating physical, chemical, and biological data"
            )
        },
        "answer": (
            "Modern climate change is primarily driven by human activities through:\n\n"
            "1. Fossil fuel combustion (75% of COβ‚‚ emissions)\n"
            "2. Agricultural practices (40% of CHβ‚„ from livestock and rice paddies)\n"
            "3. Deforestation reducing carbon sinks (12-17% of anthropogenic emissions)\n\n"
            "Key evidence includes:\n"
            "- 50% increase in atmospheric COβ‚‚ since 1750 (415 ppm vs 280 ppm pre-industrial)\n"
            "- Isotopic fingerprint showing fossil fuel origin of COβ‚‚ increase\n"
            "- Stratospheric cooling/tropospheric warming pattern characteristic of greenhouse forcing\n"
            "- Observed sea level rise (3.7 mm/yr) matching model predictions\n\n"
            "Natural factors like solar irradiance and volcanic activity cannot explain the current warming trend (IPCC AR6)."
        )
    },

    "Market Equilibrium": {
        "rubric": {
            "key_concept_assessed": "Understanding of price mechanism and market adjustment processes",
            "criteria_for_correct_answer": (
                "A strong answer should:\n"
                "1. Define equilibrium price/quantity using supply-demand curves\n"
                "2. Analyze effects of price floors/ceilings with real examples\n"
                "3. Explain elasticity's role in tax incidence\n"
                "4. Distinguish between short-run and long-run adjustments"
            ),
            "common_misconceptions": (
                "1. Believing equilibrium implies no transactions\n"
                "2. Assuming price controls benefit all consumers/producers\n"
                "3. Confusing movement along curves with shift of curves\n"
                "4. Thinking elasticity is constant across price ranges"
            ),
            "cognitive_skill_tested": (
                "Application: Using graphical models to predict market outcomes\n"
                "Evaluation: Assessing welfare impacts of policy interventions\n"
                "Synthesis: Connecting abstract models to real-world markets"
            )
        },
        "answer": (
            "Market equilibrium occurs when:\n\n"
            "Qd(P) = Qs(P)\n\n"
            "Key concepts:\n"
            "1. Price ceiling (e.g., rent control) creates shortages when below equilibrium\n"
            "2. Price floor (e.g., minimum wage) creates surpluses when above equilibrium\n"
            "3. Tax incidence depends on relative elasticity - inelastic side bears more burden\n\n"
            "Adjustment process:\n"
            "- Short-run: Inventory changes and queuing\n"
            "- Long-run: Entry/exit of firms and technological adaptation\n\n"
            "Example: Gasoline taxes largely borne by consumers due to inelastic demand."
        )
    }
}

def main():
    # Session State Initialization
    if 'rubric' not in st.session_state:
        st.session_state.rubric = {}
    if 'answer' not in st.session_state:
        st.session_state.answer = ""

    # Back Button
    if st.button("← Back to Dashboard", key="back_btn"):
        st.switch_page("app.py")

    # Page Header
    st.markdown("<h1 class='header'>πŸ“š Automated Answer Evaluation System</h1>", unsafe_allow_html=True)

    # Example Selector
    selected_example = st.selectbox("Load example scenario:", options=list(EXAMPLES.keys()))

    # Handle Example Selection
    if selected_example == "Select an example...":
        st.session_state.rubric = {}
        st.session_state.answer = ""
    else:
        example = EXAMPLES[selected_example]
        st.session_state.rubric = example["rubric"]
        st.session_state.answer = example["answer"]

    # Rubric Input Section
    with st.expander("🎯 Rubric Input", expanded=True):
        col1, col2 = st.columns(2)
        with col1:
            key_concept = st.text_area(
                "Key Concept Assessed", 
                value=st.session_state.rubric.get("key_concept_assessed", ""),
                placeholder="What key concept is being assessed?",
                height=150
            )
            criteria = st.text_area(
                "Criteria for Correct Answer", 
                value=st.session_state.rubric.get("criteria_for_correct_answer", ""),
                placeholder="What defines a correct answer?",
                height=150
            )
        with col2:
            misconceptions = st.text_area(
                "Common Misconceptions", 
                value=st.session_state.rubric.get("common_misconceptions", ""),
                placeholder="What common errors should be watched for?",
                height=150
            )
            cognitive_skill = st.text_area(
                "Cognitive Skill Tested", 
                value=st.session_state.rubric.get("cognitive_skill_tested", ""),
                placeholder="Which cognitive skills are being tested?",
                height=150
            )

    # Student Answer Section
    student_answer = st.text_area(
        "πŸ“ Student Answer", 
        value=st.session_state.answer,
        placeholder="Paste the student's answer here...",
        height=300
    )

    # Action Buttons
    col1, col2, col3 = st.columns([1,1,2])
    with col1:
        if st.button("🧹 Clear All", use_container_width=True):
            st.session_state.rubric = {}
            st.session_state.answer = ""
            st.rerun()
    with col2:
        evaluate_btn = st.button("πŸ” Evaluate Answer", use_container_width=True)

    # Evaluation Logic
    if evaluate_btn:
        if not all([key_concept, criteria, misconceptions, cognitive_skill]) or not student_answer:
            st.warning("❗ Please complete all rubric fields and provide a student answer!")
            return

        rubric = {
            "key_concept_assessed": key_concept,
            "criteria_for_correct_answer": criteria,
            "common_misconceptions": misconceptions,
            "cognitive_skill_tested": cognitive_skill
        }

        with st.spinner("πŸ” Analyzing answer..."):
            try:
                result = evaluate_student_answer(student_answer, rubric)
                
                # Results Display
                st.markdown("---")
                st.markdown("<h2 style='color: #2b3b52'>Evaluation Results</h2>", unsafe_allow_html=True)
                
                # Score Container
                with st.container():
                    st.markdown(f"""
                        <div class='score-container'>
                            <h2>Overall Score: {result.get('score', 0)}/10</h2>
                        </div>
                    """, unsafe_allow_html=True)
                
                # Metrics
                cols = st.columns(3)
                metrics = result.get('breakdown', {})
                with cols[0]:
                    st.markdown(f"""
                        <div class='metric-box'>
                            <h4>πŸ“ Accuracy</h4>
                            <h1>{metrics.get('accuracy', 0)}</h1>
                        </div>
                    """, unsafe_allow_html=True)
                with cols[1]:
                    st.markdown(f"""
                        <div class='metric-box'>
                            <h4>βœ… Completeness</h4>
                            <h1>{metrics.get('completeness', 0)}</h1>
                        </div>
                    """, unsafe_allow_html=True)
                with cols[2]:
                    st.markdown(f"""
                        <div class='metric-box'>
                            <h4>🎯 Relevance</h4>
                            <h1>{metrics.get('relevance', 0)}</h1>
                        </div>
                    """, unsafe_allow_html=True)
                
                # Feedback
                st.markdown(f"""
                    <div class='feedback-box'>
                        <h4>πŸ“Œ Detailed Feedback</h4>
                        <p>{result.get('feedback', 'No feedback available')}</p>
                    </div>
                """, unsafe_allow_html=True)
                
                # Raw JSON
                with st.expander("View Raw JSON Output"):
                    st.json(result)

            except Exception as e:
                st.error(f"🚨 Evaluation Error: {str(e)}")

if __name__ == "__main__":
    main()