Spaces:

jayantp2003
/

Bloomsphere-app

Sleeping

File size: 16,513 Bytes

078d100

import streamlit as st
import json
import re
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
import subprocess


client = OpenAI(
    api_key=os.getenv("API_KEY"),
    base_url=os.getenv("GENERATOR_BASE_URL")
)

def ensure_spacy_model():
    """Ensure the required spaCy model is installed."""
    model_name = "en_core_web_sm"
    try:
        spacy.load(model_name)
    except OSError:
        print(f"Downloading spaCy model: {model_name}...")
        subprocess.run(["python", "-m", "spacy", "download", model_name], check=True)

def extract_key_components(rubric_text):
    """Dynamically extract key terms from rubric using NLP."""
    ensure_spacy_model()  # Ensure the model is downloaded
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(rubric_text)

    entities = set(ent.text.lower() for ent in doc.ents)
    noun_chunks = set(chunk.text.lower() for chunk in doc.noun_chunks)

    vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([rubric_text])
    feature_names = vectorizer.get_feature_names_out()
    tfidf_scores = zip(feature_names, tfidf_matrix.toarray()[0])
    top_terms = [term for term, score in sorted(tfidf_scores, key=lambda x: x[1], reverse=True)[:15]]

    return list(entities.union(noun_chunks).union(top_terms))

def evaluate_student_answer(student_answer, rubric):
    def preprocess(text):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        return text.strip()

    cleaned_answer = preprocess(student_answer)
    features = {}

    rubric_text = f"{rubric['criteria_for_correct_answer']} {rubric['common_misconceptions']}"
    key_terms = extract_key_components(rubric_text)
    features['keyword_coverage'] = sum(1 for term in key_terms if term in cleaned_answer)/len(key_terms)

    doc = spacy.load("en_core_web_sm")(student_answer)
    features['key_entities_present'] = len([ent for ent in doc.ents if ent.text.lower() in key_terms])

    prompt = f"""Evaluate this answer against the rubric. Consider:
    - Keyword matches: {features.get('keyword_coverage', 0)*100:.1f}%
    - Key entities found: {features.get('key_entities_present', 0)}

    Rubric Criteria:
    {rubric['criteria_for_correct_answer']}

    Common Misconceptions:
    {rubric['common_misconceptions']}

    Student Answer:
    {student_answer}

    Return JSON with score (0-10), breakdown - (accuracy, relevance and completeness), and feedback strictly."""
    
    try:
        response = client.chat.completions.create(
            model=os.getenv("MODEL_NAME"),
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
            response_format={"type": "json_object"}
        )
        llm_output = response.choices[0].message.content
        return json.loads(llm_output)
    except Exception as e:
        print(f"API Error: {str(e)}")
        return {
            "score": 0,
            "breakdown": {"accuracy": 0, "relevance": 0, "completeness": 0},
            "feedback": "Evaluation service unavailable"
        }

# -------------------- Streamlit UI --------------------
st.set_page_config(page_title="Answer Evaluation System", layout="wide", page_icon="📘")


st.markdown("""
    <style>
            .stMainBlockContainer {
            padding: 20px 50px;
        }
        .stAppHeader {
            position: relative;
            height: 0;
            }
    </style>
""", unsafe_allow_html=True)


st.markdown("""
    <style>
    .main {padding: 2rem 3rem;}
    .header {color: #2b3b52; border-bottom: 2px solid #eee;}
    .stTextArea textarea {border: 1px solid #e1e4e8 !important;}
    .score-container {background: #f8f9fa; border-radius: 10px; padding: 25px; margin: 20px 0;}
    .feedback-box {background: #fffbe6; border-left: 4px solid #ffd700; border-radius: 5px; padding: 20px; margin: 25px 0;}
    .metric-box {background: white; border-radius: 8px; padding: 20px; margin: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);}
    .metric-box h1 {color: #2b3b52; margin: 5px 0;}
    .stButton button {transition: all 0.3s ease;}
    .stButton button:hover {transform: translateY(-2px);}
    </style>
""", unsafe_allow_html=True)

EXAMPLES = {
    "Select an example...": {"rubric": {}, "answer": ""},
    
    "LDA Analysis": {
        "rubric": {
            "key_concept_assessed": "Understanding of Linear Discriminant Analysis (LDA) as a supervised dimensionality reduction technique and its application in pattern recognition",
            "criteria_for_correct_answer": (
                "A complete answer should:\n"
                "1. Differentiate LDA from PCA in terms of supervision and objective\n"
                "2. Explain the mathematical goal of maximizing between-class variance while minimizing within-class variance\n"
                "3. Describe the assumption of normal distribution and equal class covariance matrices\n"
                "4. Provide real-world applications in fields like bioinformatics or facial recognition"
            ),
            "common_misconceptions": (
                "1. Confusing LDA with Latent Dirichlet Allocation (same acronym)\n"
                "2. Believing LDA is primarily a classification algorithm rather than dimensionality reduction\n"
                "3. Assuming LDA requires no normality assumptions\n"
                "4. Thinking LDA and PCA are interchangeable for unsupervised problems"
            ),
            "cognitive_skill_tested": (
                "Analysis: Requires breaking down LDA's mathematical framework\n"
                "Evaluation: Comparing/contrasting with similar techniques like PCA\n"
                "Application: Demonstrating understanding through practical use cases"
            )
        },
        "answer": (
            "Linear Discriminant Analysis (LDA) is a supervised dimensionality reduction technique that maximizes class separability by:\n\n"
            "1. Calculating between-class and within-class scatter matrices\n"
            "2. Finding linear combinations of features that maximize Fisher's ratio: (between-class variance)/(within-class variance)\n"
            "3. Assuming multivariate normal distributions with equal covariance across classes\n\n"
            "Key applications include:\n"
            "- Preprocessing for classification tasks in speech recognition\n"
            "- Gene expression analysis in bioinformatics\n"
            "- Feature extraction in computer vision systems\n\n"
            "Unlike PCA which maximizes variance without class information, LDA explicitly uses class labels to find discriminative directions."
        )
    },
    
    "Climate Change Basics": {
        "rubric": {
            "key_concept_assessed": "Understanding of anthropogenic climate change mechanisms and evidence-based reasoning",
            "criteria_for_correct_answer": (
                "An exemplary response must:\n"
                "1. Identify main greenhouse gases (CO₂, CH₄, N₂O) and their sources\n"
                "2. Explain the enhanced greenhouse effect using radiative forcing\n"
                "3. Distinguish between natural climate variability and anthropogenic forcing\n"
                "4. Reference IPCC assessment reports and paleoclimate evidence"
            ),
            "common_misconceptions": (
                "1. Equating ozone depletion with climate change\n"
                "2. Attributing current warming solely to solar cycles\n"
                "3. Confusing weather variability with long-term climate trends\n"
                "4. Overemphasizing natural CO₂ sources while ignoring anthropogenic contributions"
            ),
            "cognitive_skill_tested": (
                "Comprehension: Interpreting climate proxies and modern observations\n"
                "Evaluation: Assessing credibility of different evidence types\n"
                "Synthesis: Integrating physical, chemical, and biological data"
            )
        },
        "answer": (
            "Modern climate change is primarily driven by human activities through:\n\n"
            "1. Fossil fuel combustion (75% of CO₂ emissions)\n"
            "2. Agricultural practices (40% of CH₄ from livestock and rice paddies)\n"
            "3. Deforestation reducing carbon sinks (12-17% of anthropogenic emissions)\n\n"
            "Key evidence includes:\n"
            "- 50% increase in atmospheric CO₂ since 1750 (415 ppm vs 280 ppm pre-industrial)\n"
            "- Isotopic fingerprint showing fossil fuel origin of CO₂ increase\n"
            "- Stratospheric cooling/tropospheric warming pattern characteristic of greenhouse forcing\n"
            "- Observed sea level rise (3.7 mm/yr) matching model predictions\n\n"
            "Natural factors like solar irradiance and volcanic activity cannot explain the current warming trend (IPCC AR6)."
        )
    },

    "Market Equilibrium": {
        "rubric": {
            "key_concept_assessed": "Understanding of price mechanism and market adjustment processes",
            "criteria_for_correct_answer": (
                "A strong answer should:\n"
                "1. Define equilibrium price/quantity using supply-demand curves\n"
                "2. Analyze effects of price floors/ceilings with real examples\n"
                "3. Explain elasticity's role in tax incidence\n"
                "4. Distinguish between short-run and long-run adjustments"
            ),
            "common_misconceptions": (
                "1. Believing equilibrium implies no transactions\n"
                "2. Assuming price controls benefit all consumers/producers\n"
                "3. Confusing movement along curves with shift of curves\n"
                "4. Thinking elasticity is constant across price ranges"
            ),
            "cognitive_skill_tested": (
                "Application: Using graphical models to predict market outcomes\n"
                "Evaluation: Assessing welfare impacts of policy interventions\n"
                "Synthesis: Connecting abstract models to real-world markets"
            )
        },
        "answer": (
            "Market equilibrium occurs when:\n\n"
            "Qd(P) = Qs(P)\n\n"
            "Key concepts:\n"
            "1. Price ceiling (e.g., rent control) creates shortages when below equilibrium\n"
            "2. Price floor (e.g., minimum wage) creates surpluses when above equilibrium\n"
            "3. Tax incidence depends on relative elasticity - inelastic side bears more burden\n\n"
            "Adjustment process:\n"
            "- Short-run: Inventory changes and queuing\n"
            "- Long-run: Entry/exit of firms and technological adaptation\n\n"
            "Example: Gasoline taxes largely borne by consumers due to inelastic demand."
        )
    }
}

def main():
    # Session State Initialization
    if 'rubric' not in st.session_state:
        st.session_state.rubric = {}
    if 'answer' not in st.session_state:
        st.session_state.answer = ""

    # Back Button
    if st.button("← Back to Dashboard", key="back_btn"):
        st.switch_page("app.py")

    # Page Header
    st.markdown("<h1 class='header'>📚 Automated Answer Evaluation System</h1>", unsafe_allow_html=True)

    # Example Selector
    selected_example = st.selectbox("Load example scenario:", options=list(EXAMPLES.keys()))

    # Handle Example Selection
    if selected_example == "Select an example...":
        st.session_state.rubric = {}
        st.session_state.answer = ""
    else:
        example = EXAMPLES[selected_example]
        st.session_state.rubric = example["rubric"]
        st.session_state.answer = example["answer"]

    # Rubric Input Section
    with st.expander("🎯 Rubric Input", expanded=True):
        col1, col2 = st.columns(2)
        with col1:
            key_concept = st.text_area(
                "Key Concept Assessed", 
                value=st.session_state.rubric.get("key_concept_assessed", ""),
                placeholder="What key concept is being assessed?",
                height=150
            )
            criteria = st.text_area(
                "Criteria for Correct Answer", 
                value=st.session_state.rubric.get("criteria_for_correct_answer", ""),
                placeholder="What defines a correct answer?",
                height=150
            )
        with col2:
            misconceptions = st.text_area(
                "Common Misconceptions", 
                value=st.session_state.rubric.get("common_misconceptions", ""),
                placeholder="What common errors should be watched for?",
                height=150
            )
            cognitive_skill = st.text_area(
                "Cognitive Skill Tested", 
                value=st.session_state.rubric.get("cognitive_skill_tested", ""),
                placeholder="Which cognitive skills are being tested?",
                height=150
            )

    # Student Answer Section
    student_answer = st.text_area(
        "📝 Student Answer", 
        value=st.session_state.answer,
        placeholder="Paste the student's answer here...",
        height=300
    )

    # Action Buttons
    col1, col2, col3 = st.columns([1,1,2])
    with col1:
        if st.button("🧹 Clear All", use_container_width=True):
            st.session_state.rubric = {}
            st.session_state.answer = ""
            st.rerun()
    with col2:
        evaluate_btn = st.button("🔍 Evaluate Answer", use_container_width=True)

    # Evaluation Logic
    if evaluate_btn:
        if not all([key_concept, criteria, misconceptions, cognitive_skill]) or not student_answer:
            st.warning("❗ Please complete all rubric fields and provide a student answer!")
            return

        rubric = {
            "key_concept_assessed": key_concept,
            "criteria_for_correct_answer": criteria,
            "common_misconceptions": misconceptions,
            "cognitive_skill_tested": cognitive_skill
        }

        with st.spinner("🔍 Analyzing answer..."):
            try:
                result = evaluate_student_answer(student_answer, rubric)
                
                # Results Display
                st.markdown("---")
                st.markdown("<h2 style='color: #2b3b52'>Evaluation Results</h2>", unsafe_allow_html=True)
                
                # Score Container
                with st.container():
                    st.markdown(f"""
                        <div class='score-container'>
                            <h2>Overall Score: {result.get('score', 0)}/10</h2>
                        </div>
                    """, unsafe_allow_html=True)
                
                # Metrics
                cols = st.columns(3)
                metrics = result.get('breakdown', {})
                with cols[0]:
                    st.markdown(f"""
                        <div class='metric-box'>
                            <h4>📐 Accuracy</h4>
                            <h1>{metrics.get('accuracy', 0)}</h1>
                        </div>
                    """, unsafe_allow_html=True)
                with cols[1]:
                    st.markdown(f"""
                        <div class='metric-box'>
                            <h4>✅ Completeness</h4>
                            <h1>{metrics.get('completeness', 0)}</h1>
                        </div>
                    """, unsafe_allow_html=True)
                with cols[2]:
                    st.markdown(f"""
                        <div class='metric-box'>
                            <h4>🎯 Relevance</h4>
                            <h1>{metrics.get('relevance', 0)}</h1>
                        </div>
                    """, unsafe_allow_html=True)
                
                # Feedback
                st.markdown(f"""
                    <div class='feedback-box'>
                        <h4>📌 Detailed Feedback</h4>
                        <p>{result.get('feedback', 'No feedback available')}</p>
                    </div>
                """, unsafe_allow_html=True)
                
                # Raw JSON
                with st.expander("View Raw JSON Output"):
                    st.json(result)

            except Exception as e:
                st.error(f"🚨 Evaluation Error: {str(e)}")

if __name__ == "__main__":
    main()