File size: 6,035 Bytes

4060abf


import gradio as gr
import joblib
import pandas as pd
import numpy as np
import textstat
import os

# Load the enhanced model
try:
    model = joblib.load("enhanced_readability_random_forest.pkl")
    print("✅ Enhanced model loaded successfully")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    model = None

def predict_readability(text):
    """Predict readability grade for input text using enhanced model."""
    if not text.strip():
        return "Please enter some text to analyze."
    
    if model is None:
        return "Model not available. Please check the model file."
    
    try:
        # This is a simplified demo - the actual model would need
        # the full feature computation pipeline
        
        # Basic readability metrics for demo
        flesch_kincaid = textstat.flesch_kincaid().grade(text)
        coleman_liau = textstat.coleman_liau_index(text)
        ari = textstat.automated_readability_index(text)
        
        # Use a simplified prediction (in production, would use model.predict_text(text))
        estimated_grade = np.mean([flesch_kincaid, coleman_liau, ari])
        estimated_grade = max(1, min(12, estimated_grade))  # Clamp to 1-12 range
        
        result = f"""
📊 **Readability Analysis Results**

**Predicted Grade Level**: {estimated_grade:.1f}

**Individual Metrics**:
- Flesch-Kincaid Grade: {flesch_kincaid:.1f}
- Coleman-Liau Index: {coleman_liau:.1f}
- Automated Readability Index: {ari:.1f}

**Text Statistics**:
- Characters: {len(text)}
- Words: {len(text.split())}
- Sentences: {textstat.sentence_count(text)}

*Note: This is a simplified demo. The full enhanced model uses {model.model_info.get('n_features_total', 'many')} linguistic features for more accurate predictions.*
"""
        return result
        
    except Exception as e:
        return f"Error analyzing text: {str(e)}"

def analyze_sample_texts(sample_choice):
    """Analyze predefined sample texts."""
    samples = {
        "Elementary (Grade 2-3)": "The cat sat on the mat. It was a big, soft mat. The cat was happy.",
        "Middle Elementary (Grade 4-5)": "Scientists have discovered that dolphins are very intelligent animals. They can learn tricks and communicate with each other using special sounds.",
        "Middle School (Grade 6-8)": "The industrial revolution fundamentally transformed society by introducing mechanized production methods, which significantly increased manufacturing efficiency while simultaneously creating new social and economic challenges.",
        "High School (Grade 9-12)": "Contemporary neuroscientific research utilizing advanced neuroimaging techniques has revealed intricate neural networks that facilitate complex cognitive processes, thereby elucidating the neurobiological foundations underlying human consciousness and decision-making mechanisms."
    }
    
    return predict_readability(samples.get(sample_choice, ""))

# Create Gradio interface with enhanced features
with gr.Blocks(title="📚 Enhanced Readability Assessment", theme=gr.themes.Soft()) as iface:
    gr.Markdown("# 📚 Enhanced Text Readability Assessment")
    gr.Markdown("Predict the reading grade level of English text using an Enhanced Random Forest model with comprehensive linguistic features.")
    
    with gr.Tab("Text Analysis"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    lines=8, 
                    placeholder="Enter your text here for readability analysis...",
                    label="Text to Analyze"
                )
                analyze_btn = gr.Button("🔍 Analyze Readability", variant="primary")
            
            with gr.Column():
                output = gr.Textbox(
                    lines=15,
                    label="Analysis Results",
                    interactive=False
                )
        
        analyze_btn.click(predict_readability, inputs=text_input, outputs=output)
    
    with gr.Tab("Sample Texts"):
        gr.Markdown("### Try these sample texts to see how readability varies by grade level:")
        
        sample_dropdown = gr.Dropdown(
            choices=[
                "Elementary (Grade 2-3)",
                "Middle Elementary (Grade 4-5)", 
                "Middle School (Grade 6-8)",
                "High School (Grade 9-12)"
            ],
            label="Select Sample Text",
            value="Elementary (Grade 2-3)"
        )
        
        sample_btn = gr.Button("🎯 Analyze Sample", variant="secondary")
        sample_output = gr.Textbox(
            lines=12,
            label="Sample Analysis Results",
            interactive=False
        )
        
        sample_btn.click(analyze_sample_texts, inputs=sample_dropdown, outputs=sample_output)
    
    with gr.Tab("Model Info"):
        gr.Markdown(f"""
        ### 🌲 Enhanced Random Forest Model Details
        
        **Model Type**: Enhanced Random Forest Regressor
        **Features**: {model.model_info.get('n_features_total', 'N/A') if model else 'N/A'} comprehensive linguistic features
        **Performance**: CV MAE = {model.model_info.get('performance', {}).get('cv_mae', 'N/A') if model else 'N/A'}
        **Training Date**: {model.model_info.get('trained_date', 'N/A') if model else 'N/A'}
        
        **Enhanced Features Include**:
        - Traditional readability metrics (Flesch-Kincaid, Coleman-Liau, etc.)
        - Age of Acquisition (AoA) based complexity measures
        - Syntactic complexity and parsing depth
        - Lexical diversity and vocabulary richness
        - Morphological feature analysis
        - Semantic complexity indicators
        - Corpus-specific features
        
        **Key Improvements**:
        - Automated feature selection for optimal performance
        - Robust scaling to handle outliers
        - Enhanced generalization across text types
        - Multi-dataset validation
        """)

if __name__ == "__main__":
    iface.launch()