| | import streamlit as st |
| | import spacy |
| | import textdescriptives as td |
| | import pandas as pd |
| | import math |
| | import numpy as np |
| | import joblib |
| | import os |
| | from pathlib import Path |
| |
|
| | |
| | st.set_page_config(page_title="Text Grade Level Assignment", page_icon="π", layout="wide") |
| |
|
| | def check_password(): |
| | """Returns `True` if the user had the correct password.""" |
| |
|
| | def password_entered(): |
| | """Checks whether a password entered by the user is correct.""" |
| | if st.session_state["password"] == "gradelevel": |
| | st.session_state["password_correct"] = True |
| | del st.session_state["password"] |
| | else: |
| | st.session_state["password_correct"] = False |
| |
|
| | if "password_correct" not in st.session_state: |
| | |
| | st.text_input( |
| | "Password", type="password", on_change=password_entered, key="password" |
| | ) |
| | return False |
| | elif not st.session_state["password_correct"]: |
| | |
| | st.text_input( |
| | "Password", type="password", on_change=password_entered, key="password" |
| | ) |
| | st.error("π Password incorrect") |
| | return False |
| | else: |
| | |
| | return True |
| |
|
| | if not check_password(): |
| | st.stop() |
| |
|
| | st.title("π Text Grade Level Assignment") |
| | st.markdown("Assign the grade level complexity of your text using quantitative metrics.") |
| |
|
| | |
| | @st.cache_resource |
| | def load_spacy_model(): |
| | try: |
| | |
| | st.warning("Downloading spacy model 'en_core_web_sm'... this might take a while.") |
| | |
| | nlp = spacy.load("en_core_web_sm") |
| | nlp.add_pipe("textdescriptives/all") |
| | return nlp |
| | except Exception as e: |
| | st.error(f"Error loading Spacy model: {e}") |
| | return None |
| |
|
| | nlp = load_spacy_model() |
| |
|
| | |
| | GRADE_BAND_ORDER = { |
| | "K-1": 0, |
| | "2-3": 1, |
| | "4-5": 2, |
| | "6-8": 3, |
| | "9-10": 4, |
| | "11-CCR": 5, |
| | "CCR+": 6 |
| | } |
| |
|
| | REVERSE_MAPPING = {v: k for k, v in GRADE_BAND_ORDER.items()} |
| |
|
| | def get_grade_level(predicted_order): |
| | """Turns model predicted grade band order into the grade level string.""" |
| | |
| | predicted_order = max(0, min(6, round(predicted_order))) |
| | return REVERSE_MAPPING.get(predicted_order, "Unknown") |
| |
|
| | |
| | MODEL_PATH = Path(__file__).parent.parent / "models" / "grade_level_quant_regression_model.pkl" |
| |
|
| | @st.cache_resource |
| | def load_regression_model(): |
| | if not os.path.exists(MODEL_PATH): |
| | return None |
| | try: |
| | return joblib.load(MODEL_PATH) |
| | except Exception as e: |
| | st.error(f"Error loading model file: {e}") |
| | return None |
| |
|
| | model = load_regression_model() |
| |
|
| | def clean_value(val, default=0.0): |
| | """Returns the default value if val is NaN or None, otherwise returns val.""" |
| | if val is None or math.isnan(val): |
| | return default |
| | return val |
| |
|
| | def analyze_text(text, nlp_model, regression_model): |
| | """ |
| | Analyzes text and returns metrics and predicted grade level. |
| | Returns: (grade_level, metrics_dict) |
| | """ |
| | if not text or not isinstance(text, str) or not text.strip(): |
| | return None, None |
| |
|
| | try: |
| | |
| | doc = nlp_model(text) |
| | doc_stats = td.extract_dict(doc)[0] |
| |
|
| | |
| | metrics = { |
| | "FK_score": clean_value(round(doc_stats['flesch_kincaid_grade'], 2)), |
| | "Gunning_fog": clean_value(round(doc_stats['gunning_fog'], 2)), |
| | "Smog": clean_value(round(doc_stats['smog'], 2)), |
| | "Lix": clean_value(round(doc_stats['lix'], 2)), |
| | "Rix": clean_value(round(doc_stats['rix'], 2)), |
| | "complexity_score_entropy": clean_value(round(doc_stats['entropy'], 2)), |
| | "Sentence_Length": clean_value(round(doc_stats['sentence_length_mean'], 2)) |
| | } |
| |
|
| | |
| | selected_var = ['FK_score', 'Gunning_fog', 'Smog', 'Lix', 'Rix', 'complexity_score_entropy', 'Sentence_Length'] |
| | |
| | |
| | input_data = [[metrics[col] for col in selected_var]] |
| | new_data_processed = pd.DataFrame(input_data, columns=selected_var) |
| |
|
| | |
| | raw_prediction = regression_model.predict(new_data_processed)[0] |
| | grade_band = get_grade_level(raw_prediction) |
| | |
| | return grade_band, metrics |
| |
|
| | except Exception as e: |
| | |
| | return "Error", {} |
| |
|
| | |
| | with st.sidebar: |
| | st.title("Upload your csv file for batch processing") |
| | st.markdown("*!!! The CSV file must contain a column named **text**.*") |
| | uploaded_file = st.file_uploader("Upload CSV", type=["csv"]) |
| | |
| | |
| | |
| | |
| |
|
| | if uploaded_file is not None and model is not None and nlp is not None: |
| | st.divider() |
| | st.header("Batch Processing Results") |
| | try: |
| | df = pd.read_csv(uploaded_file) |
| | if "text" not in df.columns: |
| | st.error("The CSV file must contain a column named 'text'.") |
| | else: |
| | if st.button("Process CSV"): |
| | progress_bar = st.progress(0, text="Processing rows...") |
| | results = [] |
| | |
| | total_rows = len(df) |
| | for index, row in df.iterrows(): |
| | text = str(row["text"]) |
| | grade, metrics = analyze_text(text, nlp, model) |
| | |
| | row_result = row.to_dict() |
| | row_result["predicted_grade_level"] = grade if grade else "N/A" |
| | row_result["metrics"] = metrics if metrics else "N/A" |
| | |
| | results.append(row_result) |
| | |
| | |
| | progress_bar.progress((index + 1) / total_rows, text=f"Processing row {index+1}/{total_rows}") |
| |
|
| | progress_bar.empty() |
| | |
| | |
| | |
| | result_df = pd.DataFrame(results) |
| | expanded_df = result_df['metrics'].apply(pd.Series) |
| | final_df = pd.concat([result_df.drop('metrics', axis=1), expanded_df], axis=1) |
| |
|
| | |
| | st.subheader("Preview (First 5 Rows)") |
| | st.dataframe(final_df.head(5)) |
| | |
| | |
| | csv = final_df.to_csv(index=False).encode('utf-8') |
| | st.download_button( |
| | label="Download results as CSV", |
| | data=csv, |
| | file_name='grade_level_predictions.csv', |
| | mime='text/csv', |
| | ) |
| | except Exception as e: |
| | st.error(f"Error processing CSV: {e}") |
| |
|
| |
|
| | |
| |
|
| | if model is None: |
| | st.warning(f"β οΈ Model file not found at `{MODEL_PATH}`.") |
| | st.info("Please place your `grade_level_quant_regression_model.pkl` file in the `models` directory at the root of your project.") |
| |
|
| | else: |
| | |
| | st.subheader("Single Text Analysis") |
| | text_input = st.text_area("Enter text to analyze:", height=200, placeholder="Paste your text here...") |
| |
|
| | if st.button("Grade Level Prediction", type="primary"): |
| | if not text_input.strip(): |
| | st.warning("Please enter some text first.") |
| | elif nlp is None: |
| | st.error("Text processing model (Spacy) is not available.") |
| | else: |
| | with st.spinner("Analyzing text complexity..."): |
| | grade_band, metrics = analyze_text(text_input, nlp, model) |
| |
|
| | if grade_band == "Error": |
| | st.error("An error occurred during analysis. Please check your input text.") |
| | elif grade_band: |
| | |
| | st.success(f"### Assigned Grade band based on Quant Metrics: **{grade_band}**") |
| | |
| | with st.expander("View Detailed Metrics"): |
| | st.json(metrics) |
| |
|