|
|
|
|
|
import gradio as gr |
|
|
import joblib |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import textstat |
|
|
import os |
|
|
|
|
|
|
|
|
try: |
|
|
model = joblib.load("enhanced_readability_random_forest.pkl") |
|
|
print("β
Enhanced model loaded successfully") |
|
|
except Exception as e: |
|
|
print(f"β Error loading model: {e}") |
|
|
model = None |
|
|
|
|
|
def predict_readability(text): |
|
|
"""Predict readability grade for input text using enhanced model.""" |
|
|
if not text.strip(): |
|
|
return "Please enter some text to analyze." |
|
|
|
|
|
if model is None: |
|
|
return "Model not available. Please check the model file." |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flesch_kincaid = textstat.flesch_kincaid().grade(text) |
|
|
coleman_liau = textstat.coleman_liau_index(text) |
|
|
ari = textstat.automated_readability_index(text) |
|
|
|
|
|
|
|
|
estimated_grade = np.mean([flesch_kincaid, coleman_liau, ari]) |
|
|
estimated_grade = max(1, min(12, estimated_grade)) |
|
|
|
|
|
result = f""" |
|
|
π **Readability Analysis Results** |
|
|
|
|
|
**Predicted Grade Level**: {estimated_grade:.1f} |
|
|
|
|
|
**Individual Metrics**: |
|
|
- Flesch-Kincaid Grade: {flesch_kincaid:.1f} |
|
|
- Coleman-Liau Index: {coleman_liau:.1f} |
|
|
- Automated Readability Index: {ari:.1f} |
|
|
|
|
|
**Text Statistics**: |
|
|
- Characters: {len(text)} |
|
|
- Words: {len(text.split())} |
|
|
- Sentences: {textstat.sentence_count(text)} |
|
|
|
|
|
*Note: This is a simplified demo. The full enhanced model uses {model.model_info.get('n_features_total', 'many')} linguistic features for more accurate predictions.* |
|
|
""" |
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error analyzing text: {str(e)}" |
|
|
|
|
|
def analyze_sample_texts(sample_choice): |
|
|
"""Analyze predefined sample texts.""" |
|
|
samples = { |
|
|
"Elementary (Grade 2-3)": "The cat sat on the mat. It was a big, soft mat. The cat was happy.", |
|
|
"Middle Elementary (Grade 4-5)": "Scientists have discovered that dolphins are very intelligent animals. They can learn tricks and communicate with each other using special sounds.", |
|
|
"Middle School (Grade 6-8)": "The industrial revolution fundamentally transformed society by introducing mechanized production methods, which significantly increased manufacturing efficiency while simultaneously creating new social and economic challenges.", |
|
|
"High School (Grade 9-12)": "Contemporary neuroscientific research utilizing advanced neuroimaging techniques has revealed intricate neural networks that facilitate complex cognitive processes, thereby elucidating the neurobiological foundations underlying human consciousness and decision-making mechanisms." |
|
|
} |
|
|
|
|
|
return predict_readability(samples.get(sample_choice, "")) |
|
|
|
|
|
|
|
|
with gr.Blocks(title="π Enhanced Readability Assessment", theme=gr.themes.Soft()) as iface: |
|
|
gr.Markdown("# π Enhanced Text Readability Assessment") |
|
|
gr.Markdown("Predict the reading grade level of English text using an Enhanced Random Forest model with comprehensive linguistic features.") |
|
|
|
|
|
with gr.Tab("Text Analysis"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox( |
|
|
lines=8, |
|
|
placeholder="Enter your text here for readability analysis...", |
|
|
label="Text to Analyze" |
|
|
) |
|
|
analyze_btn = gr.Button("π Analyze Readability", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output = gr.Textbox( |
|
|
lines=15, |
|
|
label="Analysis Results", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
analyze_btn.click(predict_readability, inputs=text_input, outputs=output) |
|
|
|
|
|
with gr.Tab("Sample Texts"): |
|
|
gr.Markdown("### Try these sample texts to see how readability varies by grade level:") |
|
|
|
|
|
sample_dropdown = gr.Dropdown( |
|
|
choices=[ |
|
|
"Elementary (Grade 2-3)", |
|
|
"Middle Elementary (Grade 4-5)", |
|
|
"Middle School (Grade 6-8)", |
|
|
"High School (Grade 9-12)" |
|
|
], |
|
|
label="Select Sample Text", |
|
|
value="Elementary (Grade 2-3)" |
|
|
) |
|
|
|
|
|
sample_btn = gr.Button("π― Analyze Sample", variant="secondary") |
|
|
sample_output = gr.Textbox( |
|
|
lines=12, |
|
|
label="Sample Analysis Results", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
sample_btn.click(analyze_sample_texts, inputs=sample_dropdown, outputs=sample_output) |
|
|
|
|
|
with gr.Tab("Model Info"): |
|
|
gr.Markdown(f""" |
|
|
### π² Enhanced Random Forest Model Details |
|
|
|
|
|
**Model Type**: Enhanced Random Forest Regressor |
|
|
**Features**: {model.model_info.get('n_features_total', 'N/A') if model else 'N/A'} comprehensive linguistic features |
|
|
**Performance**: CV MAE = {model.model_info.get('performance', {}).get('cv_mae', 'N/A') if model else 'N/A'} |
|
|
**Training Date**: {model.model_info.get('trained_date', 'N/A') if model else 'N/A'} |
|
|
|
|
|
**Enhanced Features Include**: |
|
|
- Traditional readability metrics (Flesch-Kincaid, Coleman-Liau, etc.) |
|
|
- Age of Acquisition (AoA) based complexity measures |
|
|
- Syntactic complexity and parsing depth |
|
|
- Lexical diversity and vocabulary richness |
|
|
- Morphological feature analysis |
|
|
- Semantic complexity indicators |
|
|
- Corpus-specific features |
|
|
|
|
|
**Key Improvements**: |
|
|
- Automated feature selection for optimal performance |
|
|
- Robust scaling to handle outliers |
|
|
- Enhanced generalization across text types |
|
|
- Multi-dataset validation |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch() |
|
|
|