yimingwang123 commited on
Commit
4060abf
Β·
verified Β·
1 Parent(s): 1154fa3

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import joblib
4
+ import pandas as pd
5
+ import numpy as np
6
+ import textstat
7
+ import os
8
+
9
+ # Load the enhanced model
10
+ try:
11
+ model = joblib.load("enhanced_readability_random_forest.pkl")
12
+ print("βœ… Enhanced model loaded successfully")
13
+ except Exception as e:
14
+ print(f"❌ Error loading model: {e}")
15
+ model = None
16
+
17
+ def predict_readability(text):
18
+ """Predict readability grade for input text using enhanced model."""
19
+ if not text.strip():
20
+ return "Please enter some text to analyze."
21
+
22
+ if model is None:
23
+ return "Model not available. Please check the model file."
24
+
25
+ try:
26
+ # This is a simplified demo - the actual model would need
27
+ # the full feature computation pipeline
28
+
29
+ # Basic readability metrics for demo
30
+ flesch_kincaid = textstat.flesch_kincaid().grade(text)
31
+ coleman_liau = textstat.coleman_liau_index(text)
32
+ ari = textstat.automated_readability_index(text)
33
+
34
+ # Use a simplified prediction (in production, would use model.predict_text(text))
35
+ estimated_grade = np.mean([flesch_kincaid, coleman_liau, ari])
36
+ estimated_grade = max(1, min(12, estimated_grade)) # Clamp to 1-12 range
37
+
38
+ result = f"""
39
+ πŸ“Š **Readability Analysis Results**
40
+
41
+ **Predicted Grade Level**: {estimated_grade:.1f}
42
+
43
+ **Individual Metrics**:
44
+ - Flesch-Kincaid Grade: {flesch_kincaid:.1f}
45
+ - Coleman-Liau Index: {coleman_liau:.1f}
46
+ - Automated Readability Index: {ari:.1f}
47
+
48
+ **Text Statistics**:
49
+ - Characters: {len(text)}
50
+ - Words: {len(text.split())}
51
+ - Sentences: {textstat.sentence_count(text)}
52
+
53
+ *Note: This is a simplified demo. The full enhanced model uses {model.model_info.get('n_features_total', 'many')} linguistic features for more accurate predictions.*
54
+ """
55
+ return result
56
+
57
+ except Exception as e:
58
+ return f"Error analyzing text: {str(e)}"
59
+
60
+ def analyze_sample_texts(sample_choice):
61
+ """Analyze predefined sample texts."""
62
+ samples = {
63
+ "Elementary (Grade 2-3)": "The cat sat on the mat. It was a big, soft mat. The cat was happy.",
64
+ "Middle Elementary (Grade 4-5)": "Scientists have discovered that dolphins are very intelligent animals. They can learn tricks and communicate with each other using special sounds.",
65
+ "Middle School (Grade 6-8)": "The industrial revolution fundamentally transformed society by introducing mechanized production methods, which significantly increased manufacturing efficiency while simultaneously creating new social and economic challenges.",
66
+ "High School (Grade 9-12)": "Contemporary neuroscientific research utilizing advanced neuroimaging techniques has revealed intricate neural networks that facilitate complex cognitive processes, thereby elucidating the neurobiological foundations underlying human consciousness and decision-making mechanisms."
67
+ }
68
+
69
+ return predict_readability(samples.get(sample_choice, ""))
70
+
71
+ # Create Gradio interface with enhanced features
72
+ with gr.Blocks(title="πŸ“š Enhanced Readability Assessment", theme=gr.themes.Soft()) as iface:
73
+ gr.Markdown("# πŸ“š Enhanced Text Readability Assessment")
74
+ gr.Markdown("Predict the reading grade level of English text using an Enhanced Random Forest model with comprehensive linguistic features.")
75
+
76
+ with gr.Tab("Text Analysis"):
77
+ with gr.Row():
78
+ with gr.Column():
79
+ text_input = gr.Textbox(
80
+ lines=8,
81
+ placeholder="Enter your text here for readability analysis...",
82
+ label="Text to Analyze"
83
+ )
84
+ analyze_btn = gr.Button("πŸ” Analyze Readability", variant="primary")
85
+
86
+ with gr.Column():
87
+ output = gr.Textbox(
88
+ lines=15,
89
+ label="Analysis Results",
90
+ interactive=False
91
+ )
92
+
93
+ analyze_btn.click(predict_readability, inputs=text_input, outputs=output)
94
+
95
+ with gr.Tab("Sample Texts"):
96
+ gr.Markdown("### Try these sample texts to see how readability varies by grade level:")
97
+
98
+ sample_dropdown = gr.Dropdown(
99
+ choices=[
100
+ "Elementary (Grade 2-3)",
101
+ "Middle Elementary (Grade 4-5)",
102
+ "Middle School (Grade 6-8)",
103
+ "High School (Grade 9-12)"
104
+ ],
105
+ label="Select Sample Text",
106
+ value="Elementary (Grade 2-3)"
107
+ )
108
+
109
+ sample_btn = gr.Button("🎯 Analyze Sample", variant="secondary")
110
+ sample_output = gr.Textbox(
111
+ lines=12,
112
+ label="Sample Analysis Results",
113
+ interactive=False
114
+ )
115
+
116
+ sample_btn.click(analyze_sample_texts, inputs=sample_dropdown, outputs=sample_output)
117
+
118
+ with gr.Tab("Model Info"):
119
+ gr.Markdown(f"""
120
+ ### 🌲 Enhanced Random Forest Model Details
121
+
122
+ **Model Type**: Enhanced Random Forest Regressor
123
+ **Features**: {model.model_info.get('n_features_total', 'N/A') if model else 'N/A'} comprehensive linguistic features
124
+ **Performance**: CV MAE = {model.model_info.get('performance', {}).get('cv_mae', 'N/A') if model else 'N/A'}
125
+ **Training Date**: {model.model_info.get('trained_date', 'N/A') if model else 'N/A'}
126
+
127
+ **Enhanced Features Include**:
128
+ - Traditional readability metrics (Flesch-Kincaid, Coleman-Liau, etc.)
129
+ - Age of Acquisition (AoA) based complexity measures
130
+ - Syntactic complexity and parsing depth
131
+ - Lexical diversity and vocabulary richness
132
+ - Morphological feature analysis
133
+ - Semantic complexity indicators
134
+ - Corpus-specific features
135
+
136
+ **Key Improvements**:
137
+ - Automated feature selection for optimal performance
138
+ - Robust scaling to handle outliers
139
+ - Enhanced generalization across text types
140
+ - Multi-dataset validation
141
+ """)
142
+
143
+ if __name__ == "__main__":
144
+ iface.launch()