yimingwang123's picture
Upload app.py with huggingface_hub
4060abf verified
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import textstat
import os
# Load the enhanced model
try:
model = joblib.load("enhanced_readability_random_forest.pkl")
print("βœ… Enhanced model loaded successfully")
except Exception as e:
print(f"❌ Error loading model: {e}")
model = None
def predict_readability(text):
"""Predict readability grade for input text using enhanced model."""
if not text.strip():
return "Please enter some text to analyze."
if model is None:
return "Model not available. Please check the model file."
try:
# This is a simplified demo - the actual model would need
# the full feature computation pipeline
# Basic readability metrics for demo
flesch_kincaid = textstat.flesch_kincaid().grade(text)
coleman_liau = textstat.coleman_liau_index(text)
ari = textstat.automated_readability_index(text)
# Use a simplified prediction (in production, would use model.predict_text(text))
estimated_grade = np.mean([flesch_kincaid, coleman_liau, ari])
estimated_grade = max(1, min(12, estimated_grade)) # Clamp to 1-12 range
result = f"""
πŸ“Š **Readability Analysis Results**
**Predicted Grade Level**: {estimated_grade:.1f}
**Individual Metrics**:
- Flesch-Kincaid Grade: {flesch_kincaid:.1f}
- Coleman-Liau Index: {coleman_liau:.1f}
- Automated Readability Index: {ari:.1f}
**Text Statistics**:
- Characters: {len(text)}
- Words: {len(text.split())}
- Sentences: {textstat.sentence_count(text)}
*Note: This is a simplified demo. The full enhanced model uses {model.model_info.get('n_features_total', 'many')} linguistic features for more accurate predictions.*
"""
return result
except Exception as e:
return f"Error analyzing text: {str(e)}"
def analyze_sample_texts(sample_choice):
"""Analyze predefined sample texts."""
samples = {
"Elementary (Grade 2-3)": "The cat sat on the mat. It was a big, soft mat. The cat was happy.",
"Middle Elementary (Grade 4-5)": "Scientists have discovered that dolphins are very intelligent animals. They can learn tricks and communicate with each other using special sounds.",
"Middle School (Grade 6-8)": "The industrial revolution fundamentally transformed society by introducing mechanized production methods, which significantly increased manufacturing efficiency while simultaneously creating new social and economic challenges.",
"High School (Grade 9-12)": "Contemporary neuroscientific research utilizing advanced neuroimaging techniques has revealed intricate neural networks that facilitate complex cognitive processes, thereby elucidating the neurobiological foundations underlying human consciousness and decision-making mechanisms."
}
return predict_readability(samples.get(sample_choice, ""))
# Create Gradio interface with enhanced features
with gr.Blocks(title="πŸ“š Enhanced Readability Assessment", theme=gr.themes.Soft()) as iface:
gr.Markdown("# πŸ“š Enhanced Text Readability Assessment")
gr.Markdown("Predict the reading grade level of English text using an Enhanced Random Forest model with comprehensive linguistic features.")
with gr.Tab("Text Analysis"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
lines=8,
placeholder="Enter your text here for readability analysis...",
label="Text to Analyze"
)
analyze_btn = gr.Button("πŸ” Analyze Readability", variant="primary")
with gr.Column():
output = gr.Textbox(
lines=15,
label="Analysis Results",
interactive=False
)
analyze_btn.click(predict_readability, inputs=text_input, outputs=output)
with gr.Tab("Sample Texts"):
gr.Markdown("### Try these sample texts to see how readability varies by grade level:")
sample_dropdown = gr.Dropdown(
choices=[
"Elementary (Grade 2-3)",
"Middle Elementary (Grade 4-5)",
"Middle School (Grade 6-8)",
"High School (Grade 9-12)"
],
label="Select Sample Text",
value="Elementary (Grade 2-3)"
)
sample_btn = gr.Button("🎯 Analyze Sample", variant="secondary")
sample_output = gr.Textbox(
lines=12,
label="Sample Analysis Results",
interactive=False
)
sample_btn.click(analyze_sample_texts, inputs=sample_dropdown, outputs=sample_output)
with gr.Tab("Model Info"):
gr.Markdown(f"""
### 🌲 Enhanced Random Forest Model Details
**Model Type**: Enhanced Random Forest Regressor
**Features**: {model.model_info.get('n_features_total', 'N/A') if model else 'N/A'} comprehensive linguistic features
**Performance**: CV MAE = {model.model_info.get('performance', {}).get('cv_mae', 'N/A') if model else 'N/A'}
**Training Date**: {model.model_info.get('trained_date', 'N/A') if model else 'N/A'}
**Enhanced Features Include**:
- Traditional readability metrics (Flesch-Kincaid, Coleman-Liau, etc.)
- Age of Acquisition (AoA) based complexity measures
- Syntactic complexity and parsing depth
- Lexical diversity and vocabulary richness
- Morphological feature analysis
- Semantic complexity indicators
- Corpus-specific features
**Key Improvements**:
- Automated feature selection for optimal performance
- Robust scaling to handle outliers
- Enhanced generalization across text types
- Multi-dataset validation
""")
if __name__ == "__main__":
iface.launch()