yimingwang123
/

enhanced-readability-random-forest

+import gradio as gr
+import joblib
+import pandas as pd
+import numpy as np
+import textstat
+import os
+# Load the enhanced model
+try:
+    model = joblib.load("enhanced_readability_random_forest.pkl")
+    print("✅ Enhanced model loaded successfully")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    model = None
+def predict_readability(text):
+    """Predict readability grade for input text using enhanced model."""
+    if not text.strip():
+        return "Please enter some text to analyze."
+    if model is None:
+        return "Model not available. Please check the model file."
+    try:
+        # This is a simplified demo - the actual model would need
+        # the full feature computation pipeline
+        # Basic readability metrics for demo
+        flesch_kincaid = textstat.flesch_kincaid().grade(text)
+        coleman_liau = textstat.coleman_liau_index(text)
+        ari = textstat.automated_readability_index(text)
+        # Use a simplified prediction (in production, would use model.predict_text(text))
+        estimated_grade = np.mean([flesch_kincaid, coleman_liau, ari])
+        estimated_grade = max(1, min(12, estimated_grade))  # Clamp to 1-12 range
+        result = f"""
+📊 **Readability Analysis Results**
+**Predicted Grade Level**: {estimated_grade:.1f}
+**Individual Metrics**:
+- Flesch-Kincaid Grade: {flesch_kincaid:.1f}
+- Coleman-Liau Index: {coleman_liau:.1f}
+- Automated Readability Index: {ari:.1f}
+**Text Statistics**:
+- Characters: {len(text)}
+- Words: {len(text.split())}
+- Sentences: {textstat.sentence_count(text)}
+*Note: This is a simplified demo. The full enhanced model uses {model.model_info.get('n_features_total', 'many')} linguistic features for more accurate predictions.*
+"""
+        return result
+    except Exception as e:
+        return f"Error analyzing text: {str(e)}"
+def analyze_sample_texts(sample_choice):
+    """Analyze predefined sample texts."""
+    samples = {
+        "Elementary (Grade 2-3)": "The cat sat on the mat. It was a big, soft mat. The cat was happy.",
+        "Middle Elementary (Grade 4-5)": "Scientists have discovered that dolphins are very intelligent animals. They can learn tricks and communicate with each other using special sounds.",
+        "Middle School (Grade 6-8)": "The industrial revolution fundamentally transformed society by introducing mechanized production methods, which significantly increased manufacturing efficiency while simultaneously creating new social and economic challenges.",
+        "High School (Grade 9-12)": "Contemporary neuroscientific research utilizing advanced neuroimaging techniques has revealed intricate neural networks that facilitate complex cognitive processes, thereby elucidating the neurobiological foundations underlying human consciousness and decision-making mechanisms."
+    }
+    return predict_readability(samples.get(sample_choice, ""))
+# Create Gradio interface with enhanced features
+with gr.Blocks(title="📚 Enhanced Readability Assessment", theme=gr.themes.Soft()) as iface:
+    gr.Markdown("# 📚 Enhanced Text Readability Assessment")
+    gr.Markdown("Predict the reading grade level of English text using an Enhanced Random Forest model with comprehensive linguistic features.")
+    with gr.Tab("Text Analysis"):
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(
+                    lines=8,
+                    placeholder="Enter your text here for readability analysis...",
+                    label="Text to Analyze"
+                )
+                analyze_btn = gr.Button("🔍 Analyze Readability", variant="primary")
+            with gr.Column():
+                output = gr.Textbox(
+                    lines=15,
+                    label="Analysis Results",
+                    interactive=False
+                )
+        analyze_btn.click(predict_readability, inputs=text_input, outputs=output)
+    with gr.Tab("Sample Texts"):
+        gr.Markdown("### Try these sample texts to see how readability varies by grade level:")
+        sample_dropdown = gr.Dropdown(
+            choices=[
+                "Elementary (Grade 2-3)",
+                "Middle Elementary (Grade 4-5)",
+                "Middle School (Grade 6-8)",
+                "High School (Grade 9-12)"
+            ],
+            label="Select Sample Text",
+            value="Elementary (Grade 2-3)"
+        )
+        sample_btn = gr.Button("🎯 Analyze Sample", variant="secondary")
+        sample_output = gr.Textbox(
+            lines=12,
+            label="Sample Analysis Results",
+            interactive=False
+        )
+        sample_btn.click(analyze_sample_texts, inputs=sample_dropdown, outputs=sample_output)
+    with gr.Tab("Model Info"):
+        gr.Markdown(f"""
+        ### 🌲 Enhanced Random Forest Model Details
+        **Model Type**: Enhanced Random Forest Regressor
+        **Features**: {model.model_info.get('n_features_total', 'N/A') if model else 'N/A'} comprehensive linguistic features
+        **Performance**: CV MAE = {model.model_info.get('performance', {}).get('cv_mae', 'N/A') if model else 'N/A'}
+        **Training Date**: {model.model_info.get('trained_date', 'N/A') if model else 'N/A'}
+        **Enhanced Features Include**:
+        - Traditional readability metrics (Flesch-Kincaid, Coleman-Liau, etc.)
+        - Age of Acquisition (AoA) based complexity measures
+        - Syntactic complexity and parsing depth
+        - Lexical diversity and vocabulary richness
+        - Morphological feature analysis
+        - Semantic complexity indicators
+        - Corpus-specific features
+        **Key Improvements**:
+        - Automated feature selection for optimal performance
+        - Robust scaling to handle outliers
+        - Enhanced generalization across text types
+        - Multi-dataset validation
+        """)
+if __name__ == "__main__":
+    iface.launch()