Upload app.py with huggingface_hub

4060abf verified 6 months ago

6.04 kB


	import gradio as gr
	import joblib
	import pandas as pd
	import numpy as np
	import textstat
	import os

	# Load the enhanced model
	try:
	model = joblib.load("enhanced_readability_random_forest.pkl")
	print("✅ Enhanced model loaded successfully")
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	model = None

	def predict_readability(text):
	"""Predict readability grade for input text using enhanced model."""
	if not text.strip():
	return "Please enter some text to analyze."

	if model is None:
	return "Model not available. Please check the model file."

	try:
	# This is a simplified demo - the actual model would need
	# the full feature computation pipeline

	# Basic readability metrics for demo
	flesch_kincaid = textstat.flesch_kincaid().grade(text)
	coleman_liau = textstat.coleman_liau_index(text)
	ari = textstat.automated_readability_index(text)

	# Use a simplified prediction (in production, would use model.predict_text(text))
	estimated_grade = np.mean([flesch_kincaid, coleman_liau, ari])
	estimated_grade = max(1, min(12, estimated_grade)) # Clamp to 1-12 range

	result = f"""
	📊 Readability Analysis Results

	Predicted Grade Level: {estimated_grade:.1f}

	Individual Metrics:
	- Flesch-Kincaid Grade: {flesch_kincaid:.1f}
	- Coleman-Liau Index: {coleman_liau:.1f}
	- Automated Readability Index: {ari:.1f}

	Text Statistics:
	- Characters: {len(text)}
	- Words: {len(text.split())}
	- Sentences: {textstat.sentence_count(text)}

	Note: This is a simplified demo. The full enhanced model uses {model.model_info.get('n_features_total', 'many')} linguistic features for more accurate predictions.
	"""
	return result

	except Exception as e:
	return f"Error analyzing text: {str(e)}"

	def analyze_sample_texts(sample_choice):
	"""Analyze predefined sample texts."""
	samples = {
	"Elementary (Grade 2-3)": "The cat sat on the mat. It was a big, soft mat. The cat was happy.",
	"Middle Elementary (Grade 4-5)": "Scientists have discovered that dolphins are very intelligent animals. They can learn tricks and communicate with each other using special sounds.",
	"Middle School (Grade 6-8)": "The industrial revolution fundamentally transformed society by introducing mechanized production methods, which significantly increased manufacturing efficiency while simultaneously creating new social and economic challenges.",
	"High School (Grade 9-12)": "Contemporary neuroscientific research utilizing advanced neuroimaging techniques has revealed intricate neural networks that facilitate complex cognitive processes, thereby elucidating the neurobiological foundations underlying human consciousness and decision-making mechanisms."
	}

	return predict_readability(samples.get(sample_choice, ""))

	# Create Gradio interface with enhanced features
	with gr.Blocks(title="📚 Enhanced Readability Assessment", theme=gr.themes.Soft()) as iface:
	gr.Markdown("# 📚 Enhanced Text Readability Assessment")
	gr.Markdown("Predict the reading grade level of English text using an Enhanced Random Forest model with comprehensive linguistic features.")

	with gr.Tab("Text Analysis"):
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	lines=8,
	placeholder="Enter your text here for readability analysis...",
	label="Text to Analyze"
	)
	analyze_btn = gr.Button("🔍 Analyze Readability", variant="primary")

	with gr.Column():
	output = gr.Textbox(
	lines=15,
	label="Analysis Results",
	interactive=False
	)

	analyze_btn.click(predict_readability, inputs=text_input, outputs=output)

	with gr.Tab("Sample Texts"):
	gr.Markdown("### Try these sample texts to see how readability varies by grade level:")

	sample_dropdown = gr.Dropdown(
	choices=[
	"Elementary (Grade 2-3)",
	"Middle Elementary (Grade 4-5)",
	"Middle School (Grade 6-8)",
	"High School (Grade 9-12)"
	],
	label="Select Sample Text",
	value="Elementary (Grade 2-3)"
	)

	sample_btn = gr.Button("🎯 Analyze Sample", variant="secondary")
	sample_output = gr.Textbox(
	lines=12,
	label="Sample Analysis Results",
	interactive=False
	)

	sample_btn.click(analyze_sample_texts, inputs=sample_dropdown, outputs=sample_output)

	with gr.Tab("Model Info"):
	gr.Markdown(f"""
	### 🌲 Enhanced Random Forest Model Details

	Model Type: Enhanced Random Forest Regressor
	Features: {model.model_info.get('n_features_total', 'N/A') if model else 'N/A'} comprehensive linguistic features
	Performance: CV MAE = {model.model_info.get('performance', {}).get('cv_mae', 'N/A') if model else 'N/A'}
	Training Date: {model.model_info.get('trained_date', 'N/A') if model else 'N/A'}

	Enhanced Features Include:
	- Traditional readability metrics (Flesch-Kincaid, Coleman-Liau, etc.)
	- Age of Acquisition (AoA) based complexity measures
	- Syntactic complexity and parsing depth
	- Lexical diversity and vocabulary richness
	- Morphological feature analysis
	- Semantic complexity indicators
	- Corpus-specific features

	Key Improvements:
	- Automated feature selection for optimal performance
	- Robust scaling to handle outliers
	- Enhanced generalization across text types
	- Multi-dataset validation
	""")

	if __name__ == "__main__":
	iface.launch()