Spaces:
Running
Running
| import streamlit as st | |
| import json | |
| import re | |
| import spacy | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() | |
| import subprocess | |
| client = OpenAI( | |
| api_key=os.getenv("API_KEY"), | |
| base_url=os.getenv("GENERATOR_BASE_URL") | |
| ) | |
| def ensure_spacy_model(): | |
| """Ensure the required spaCy model is installed.""" | |
| model_name = "en_core_web_sm" | |
| try: | |
| spacy.load(model_name) | |
| except OSError: | |
| print(f"Downloading spaCy model: {model_name}...") | |
| subprocess.run(["python", "-m", "spacy", "download", model_name], check=True) | |
| def extract_key_components(rubric_text): | |
| """Dynamically extract key terms from rubric using NLP.""" | |
| ensure_spacy_model() # Ensure the model is downloaded | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp(rubric_text) | |
| entities = set(ent.text.lower() for ent in doc.ents) | |
| noun_chunks = set(chunk.text.lower() for chunk in doc.noun_chunks) | |
| vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words='english') | |
| tfidf_matrix = vectorizer.fit_transform([rubric_text]) | |
| feature_names = vectorizer.get_feature_names_out() | |
| tfidf_scores = zip(feature_names, tfidf_matrix.toarray()[0]) | |
| top_terms = [term for term, score in sorted(tfidf_scores, key=lambda x: x[1], reverse=True)[:15]] | |
| return list(entities.union(noun_chunks).union(top_terms)) | |
| def evaluate_student_answer(student_answer, rubric): | |
| def preprocess(text): | |
| text = text.lower() | |
| text = re.sub(r'[^\w\s]', '', text) | |
| return text.strip() | |
| cleaned_answer = preprocess(student_answer) | |
| features = {} | |
| rubric_text = f"{rubric['criteria_for_correct_answer']} {rubric['common_misconceptions']}" | |
| key_terms = extract_key_components(rubric_text) | |
| features['keyword_coverage'] = sum(1 for term in key_terms if term in cleaned_answer)/len(key_terms) | |
| doc = spacy.load("en_core_web_sm")(student_answer) | |
| features['key_entities_present'] = len([ent for ent in doc.ents if ent.text.lower() in key_terms]) | |
| prompt = f"""Evaluate this answer against the rubric. Consider: | |
| - Keyword matches: {features.get('keyword_coverage', 0)*100:.1f}% | |
| - Key entities found: {features.get('key_entities_present', 0)} | |
| Rubric Criteria: | |
| {rubric['criteria_for_correct_answer']} | |
| Common Misconceptions: | |
| {rubric['common_misconceptions']} | |
| Student Answer: | |
| {student_answer} | |
| Return JSON with score (0-10), breakdown - (accuracy, relevance and completeness), and feedback strictly.""" | |
| try: | |
| response = client.chat.completions.create( | |
| model=os.getenv("MODEL_NAME"), | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.1, | |
| response_format={"type": "json_object"} | |
| ) | |
| llm_output = response.choices[0].message.content | |
| return json.loads(llm_output) | |
| except Exception as e: | |
| print(f"API Error: {str(e)}") | |
| return { | |
| "score": 0, | |
| "breakdown": {"accuracy": 0, "relevance": 0, "completeness": 0}, | |
| "feedback": "Evaluation service unavailable" | |
| } | |
| # -------------------- Streamlit UI -------------------- | |
| st.set_page_config(page_title="Answer Evaluation System", layout="wide", page_icon="π") | |
| st.markdown(""" | |
| <style> | |
| .stMainBlockContainer { | |
| padding: 20px 50px; | |
| } | |
| .stAppHeader { | |
| position: relative; | |
| height: 0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <style> | |
| .main {padding: 2rem 3rem;} | |
| .header {color: #2b3b52; border-bottom: 2px solid #eee;} | |
| .stTextArea textarea {border: 1px solid #e1e4e8 !important;} | |
| .score-container {background: #f8f9fa; border-radius: 10px; padding: 25px; margin: 20px 0;} | |
| .feedback-box {background: #fffbe6; border-left: 4px solid #ffd700; border-radius: 5px; padding: 20px; margin: 25px 0;} | |
| .metric-box {background: white; border-radius: 8px; padding: 20px; margin: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);} | |
| .metric-box h1 {color: #2b3b52; margin: 5px 0;} | |
| .stButton button {transition: all 0.3s ease;} | |
| .stButton button:hover {transform: translateY(-2px);} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| EXAMPLES = { | |
| "Select an example...": {"rubric": {}, "answer": ""}, | |
| "LDA Analysis": { | |
| "rubric": { | |
| "key_concept_assessed": "Understanding of Linear Discriminant Analysis (LDA) as a supervised dimensionality reduction technique and its application in pattern recognition", | |
| "criteria_for_correct_answer": ( | |
| "A complete answer should:\n" | |
| "1. Differentiate LDA from PCA in terms of supervision and objective\n" | |
| "2. Explain the mathematical goal of maximizing between-class variance while minimizing within-class variance\n" | |
| "3. Describe the assumption of normal distribution and equal class covariance matrices\n" | |
| "4. Provide real-world applications in fields like bioinformatics or facial recognition" | |
| ), | |
| "common_misconceptions": ( | |
| "1. Confusing LDA with Latent Dirichlet Allocation (same acronym)\n" | |
| "2. Believing LDA is primarily a classification algorithm rather than dimensionality reduction\n" | |
| "3. Assuming LDA requires no normality assumptions\n" | |
| "4. Thinking LDA and PCA are interchangeable for unsupervised problems" | |
| ), | |
| "cognitive_skill_tested": ( | |
| "Analysis: Requires breaking down LDA's mathematical framework\n" | |
| "Evaluation: Comparing/contrasting with similar techniques like PCA\n" | |
| "Application: Demonstrating understanding through practical use cases" | |
| ) | |
| }, | |
| "answer": ( | |
| "Linear Discriminant Analysis (LDA) is a supervised dimensionality reduction technique that maximizes class separability by:\n\n" | |
| "1. Calculating between-class and within-class scatter matrices\n" | |
| "2. Finding linear combinations of features that maximize Fisher's ratio: (between-class variance)/(within-class variance)\n" | |
| "3. Assuming multivariate normal distributions with equal covariance across classes\n\n" | |
| "Key applications include:\n" | |
| "- Preprocessing for classification tasks in speech recognition\n" | |
| "- Gene expression analysis in bioinformatics\n" | |
| "- Feature extraction in computer vision systems\n\n" | |
| "Unlike PCA which maximizes variance without class information, LDA explicitly uses class labels to find discriminative directions." | |
| ) | |
| }, | |
| "Climate Change Basics": { | |
| "rubric": { | |
| "key_concept_assessed": "Understanding of anthropogenic climate change mechanisms and evidence-based reasoning", | |
| "criteria_for_correct_answer": ( | |
| "An exemplary response must:\n" | |
| "1. Identify main greenhouse gases (COβ, CHβ, NβO) and their sources\n" | |
| "2. Explain the enhanced greenhouse effect using radiative forcing\n" | |
| "3. Distinguish between natural climate variability and anthropogenic forcing\n" | |
| "4. Reference IPCC assessment reports and paleoclimate evidence" | |
| ), | |
| "common_misconceptions": ( | |
| "1. Equating ozone depletion with climate change\n" | |
| "2. Attributing current warming solely to solar cycles\n" | |
| "3. Confusing weather variability with long-term climate trends\n" | |
| "4. Overemphasizing natural COβ sources while ignoring anthropogenic contributions" | |
| ), | |
| "cognitive_skill_tested": ( | |
| "Comprehension: Interpreting climate proxies and modern observations\n" | |
| "Evaluation: Assessing credibility of different evidence types\n" | |
| "Synthesis: Integrating physical, chemical, and biological data" | |
| ) | |
| }, | |
| "answer": ( | |
| "Modern climate change is primarily driven by human activities through:\n\n" | |
| "1. Fossil fuel combustion (75% of COβ emissions)\n" | |
| "2. Agricultural practices (40% of CHβ from livestock and rice paddies)\n" | |
| "3. Deforestation reducing carbon sinks (12-17% of anthropogenic emissions)\n\n" | |
| "Key evidence includes:\n" | |
| "- 50% increase in atmospheric COβ since 1750 (415 ppm vs 280 ppm pre-industrial)\n" | |
| "- Isotopic fingerprint showing fossil fuel origin of COβ increase\n" | |
| "- Stratospheric cooling/tropospheric warming pattern characteristic of greenhouse forcing\n" | |
| "- Observed sea level rise (3.7 mm/yr) matching model predictions\n\n" | |
| "Natural factors like solar irradiance and volcanic activity cannot explain the current warming trend (IPCC AR6)." | |
| ) | |
| }, | |
| "Market Equilibrium": { | |
| "rubric": { | |
| "key_concept_assessed": "Understanding of price mechanism and market adjustment processes", | |
| "criteria_for_correct_answer": ( | |
| "A strong answer should:\n" | |
| "1. Define equilibrium price/quantity using supply-demand curves\n" | |
| "2. Analyze effects of price floors/ceilings with real examples\n" | |
| "3. Explain elasticity's role in tax incidence\n" | |
| "4. Distinguish between short-run and long-run adjustments" | |
| ), | |
| "common_misconceptions": ( | |
| "1. Believing equilibrium implies no transactions\n" | |
| "2. Assuming price controls benefit all consumers/producers\n" | |
| "3. Confusing movement along curves with shift of curves\n" | |
| "4. Thinking elasticity is constant across price ranges" | |
| ), | |
| "cognitive_skill_tested": ( | |
| "Application: Using graphical models to predict market outcomes\n" | |
| "Evaluation: Assessing welfare impacts of policy interventions\n" | |
| "Synthesis: Connecting abstract models to real-world markets" | |
| ) | |
| }, | |
| "answer": ( | |
| "Market equilibrium occurs when:\n\n" | |
| "Qd(P) = Qs(P)\n\n" | |
| "Key concepts:\n" | |
| "1. Price ceiling (e.g., rent control) creates shortages when below equilibrium\n" | |
| "2. Price floor (e.g., minimum wage) creates surpluses when above equilibrium\n" | |
| "3. Tax incidence depends on relative elasticity - inelastic side bears more burden\n\n" | |
| "Adjustment process:\n" | |
| "- Short-run: Inventory changes and queuing\n" | |
| "- Long-run: Entry/exit of firms and technological adaptation\n\n" | |
| "Example: Gasoline taxes largely borne by consumers due to inelastic demand." | |
| ) | |
| } | |
| } | |
| def main(): | |
| # Session State Initialization | |
| if 'rubric' not in st.session_state: | |
| st.session_state.rubric = {} | |
| if 'answer' not in st.session_state: | |
| st.session_state.answer = "" | |
| # Back Button | |
| if st.button("β Back to Dashboard", key="back_btn"): | |
| st.switch_page("app.py") | |
| # Page Header | |
| st.markdown("<h1 class='header'>π Automated Answer Evaluation System</h1>", unsafe_allow_html=True) | |
| # Example Selector | |
| selected_example = st.selectbox("Load example scenario:", options=list(EXAMPLES.keys())) | |
| # Handle Example Selection | |
| if selected_example == "Select an example...": | |
| st.session_state.rubric = {} | |
| st.session_state.answer = "" | |
| else: | |
| example = EXAMPLES[selected_example] | |
| st.session_state.rubric = example["rubric"] | |
| st.session_state.answer = example["answer"] | |
| # Rubric Input Section | |
| with st.expander("π― Rubric Input", expanded=True): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| key_concept = st.text_area( | |
| "Key Concept Assessed", | |
| value=st.session_state.rubric.get("key_concept_assessed", ""), | |
| placeholder="What key concept is being assessed?", | |
| height=150 | |
| ) | |
| criteria = st.text_area( | |
| "Criteria for Correct Answer", | |
| value=st.session_state.rubric.get("criteria_for_correct_answer", ""), | |
| placeholder="What defines a correct answer?", | |
| height=150 | |
| ) | |
| with col2: | |
| misconceptions = st.text_area( | |
| "Common Misconceptions", | |
| value=st.session_state.rubric.get("common_misconceptions", ""), | |
| placeholder="What common errors should be watched for?", | |
| height=150 | |
| ) | |
| cognitive_skill = st.text_area( | |
| "Cognitive Skill Tested", | |
| value=st.session_state.rubric.get("cognitive_skill_tested", ""), | |
| placeholder="Which cognitive skills are being tested?", | |
| height=150 | |
| ) | |
| # Student Answer Section | |
| student_answer = st.text_area( | |
| "π Student Answer", | |
| value=st.session_state.answer, | |
| placeholder="Paste the student's answer here...", | |
| height=300 | |
| ) | |
| # Action Buttons | |
| col1, col2, col3 = st.columns([1,1,2]) | |
| with col1: | |
| if st.button("π§Ή Clear All", use_container_width=True): | |
| st.session_state.rubric = {} | |
| st.session_state.answer = "" | |
| st.rerun() | |
| with col2: | |
| evaluate_btn = st.button("π Evaluate Answer", use_container_width=True) | |
| # Evaluation Logic | |
| if evaluate_btn: | |
| if not all([key_concept, criteria, misconceptions, cognitive_skill]) or not student_answer: | |
| st.warning("β Please complete all rubric fields and provide a student answer!") | |
| return | |
| rubric = { | |
| "key_concept_assessed": key_concept, | |
| "criteria_for_correct_answer": criteria, | |
| "common_misconceptions": misconceptions, | |
| "cognitive_skill_tested": cognitive_skill | |
| } | |
| with st.spinner("π Analyzing answer..."): | |
| try: | |
| result = evaluate_student_answer(student_answer, rubric) | |
| # Results Display | |
| st.markdown("---") | |
| st.markdown("<h2 style='color: #2b3b52'>Evaluation Results</h2>", unsafe_allow_html=True) | |
| # Score Container | |
| with st.container(): | |
| st.markdown(f""" | |
| <div class='score-container'> | |
| <h2>Overall Score: {result.get('score', 0)}/10</h2> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Metrics | |
| cols = st.columns(3) | |
| metrics = result.get('breakdown', {}) | |
| with cols[0]: | |
| st.markdown(f""" | |
| <div class='metric-box'> | |
| <h4>π Accuracy</h4> | |
| <h1>{metrics.get('accuracy', 0)}</h1> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with cols[1]: | |
| st.markdown(f""" | |
| <div class='metric-box'> | |
| <h4>β Completeness</h4> | |
| <h1>{metrics.get('completeness', 0)}</h1> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with cols[2]: | |
| st.markdown(f""" | |
| <div class='metric-box'> | |
| <h4>π― Relevance</h4> | |
| <h1>{metrics.get('relevance', 0)}</h1> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Feedback | |
| st.markdown(f""" | |
| <div class='feedback-box'> | |
| <h4>π Detailed Feedback</h4> | |
| <p>{result.get('feedback', 'No feedback available')}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Raw JSON | |
| with st.expander("View Raw JSON Output"): | |
| st.json(result) | |
| except Exception as e: | |
| st.error(f"π¨ Evaluation Error: {str(e)}") | |
| if __name__ == "__main__": | |
| main() | |