Spaces:

Jandayl
/

Alalay

Sleeping

App Files Files Community

Alalay / app.py

Jandayl

re-ran save_webapp_components for consistency

69fcbd3 about 2 months ago

raw

history blame contribute delete

7.15 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import joblib
	import pandas as pd
	import numpy as np
	from feature_extractor_web import extract_features_web
	import logging
	import os

	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	MODELS_DIR = os.path.join(BASE_DIR, "models")

	app = Flask(__name__)
	default_origins = "https://alalayfe.vercel.app,https://www.alalayfe.vercel.app,http://localhost:3000"
	ALLOWED_ORIGINS = [origin.strip().rstrip("/") for origin in os.getenv('ALLOWED_ORIGINS', default_origins).split(',') if origin.strip()]
	CORS(app, origins=ALLOWED_ORIGINS)
	logger = logging.getLogger(__name__)

	@app.route('/')
	def home():
	return jsonify({
	'service': 'Alalay Readability API',
	'status': 'running',
	'endpoints': [
	'/health',
	'/api/predict',
	'/api/predict/batch'
	]
	}), 200

	# Load all the saved components
	print("Loading model components...")
	try:
	model = joblib.load(os.path.join(MODELS_DIR, "readability_model.pkl"))
	label_encoder = joblib.load(os.path.join(MODELS_DIR, "label_encoder.pkl"))
	grade_mapping = joblib.load(os.path.join(MODELS_DIR, "grade_mapping.pkl"))
	thresholds = joblib.load(os.path.join(MODELS_DIR, "thresholds.pkl"))
	feature_info = joblib.load(os.path.join(MODELS_DIR, "feature_info.pkl"))
	print("All components loaded successfully!")
	print(f" Model type: {type(model.named_steps['classifier']).__name__}")
	print(f" Classes: {label_encoder.classes_}")
	except Exception as e:
	print(f"Error loading models: {e}")
	model = None


	def build_features_df(features: dict) -> pd.DataFrame:
	"""Build a model-ready DataFrame with the same feature order used in training."""
	all_features = feature_info.get('all_features', list(features.keys()))
	categorical_cols = set(feature_info.get('categorical_cols', []))

	row = {}
	for col in all_features:
	if col in features:
	row[col] = features[col]
	elif col in categorical_cols:
	row[col] = 'Other'
	else:
	row[col] = 0.0

	return pd.DataFrame([row], columns=all_features)


	def pick_class_with_thresholds(probabilities: np.ndarray) -> int:
	"""Use thresholds when available, otherwise fall back to argmax probability."""
	classes = label_encoder.classes_
	base_idx = int(np.argmax(probabilities))

	eligible = [
	i for i, class_name in enumerate(classes)
	if probabilities[i] >= thresholds.get(class_name, 0.5)
	]
	if not eligible:
	return base_idx

	return max(eligible, key=lambda i: probabilities[i])

	# MongoDB connection (optional for now)
	# try:
	# MONGO_URI = os.getenv('MONGO_URI', 'mongodb://localhost:27017/')
	# client = MongoClient(MONGO_URI)
	# db = client['readability_db']
	# # Test connection
	# client.admin.command('ping')
	# print("MongoDB connected")
	# except:
	# print("MongoDB not available - continuing without database")
	# db = None

	@app.route('/health', methods=['GET'])
	@app.route('/api/health', methods=['GET'])
	def health():
	model_name = type(model.named_steps['classifier']).__name__ if model is not None else None
	classes = label_encoder.classes_.tolist() if model is not None else []
	return jsonify({
	'status': 'healthy' if model is not None else 'degraded',
	'model': model_name,
	'classes': classes
	}), 200

	@app.route('/api/predict', methods=['POST'])
	def predict():
	if model is None:
	return jsonify({'error': 'Model not loaded. Check server logs.'}), 503
	try:
	data = request.get_json()
	text = data.get('text', '').strip()

	if not text:
	return jsonify({'error': 'No text provided'}), 400

	if len(text) < 10:
	return jsonify({'error': 'Text must be at least 10 characters'}), 400

	# Extract features
	features = extract_features_web(text)
	if not features:
	return jsonify({'error': 'Feature extraction failed. Check server logs.'}), 500

	# Convert to DataFrame using training-time feature order.
	features_df = build_features_df(features)

	# Get prediction
	probabilities = model.predict_proba(features_df)[0]

	# Apply threshold tuning with deterministic tie handling.
	final_prediction = pick_class_with_thresholds(probabilities)

	predicted_class = label_encoder.classes_[final_prediction]
	grade_level = grade_mapping.get(predicted_class, predicted_class)

	# Prepare response
	response = {
	'success': True,
	'text': text[:200] + '...' if len(text) > 200 else text,
	'prediction': {
	'predicted_class': predicted_class,
	'grade_level': grade_level,
	'confidences': {
	class_name: float(probabilities[i])
	for i, class_name in enumerate(label_encoder.classes_)
	}
	},
	'features': {k: float(v) if isinstance(v, (int, float)) else v
	for k, v in features.items()}
	}

	# # Store in MongoDB if available
	# if db:
	# db.texts.insert_one({
	# 'text': text,
	# 'prediction': response['prediction'],
	# 'features': features,
	# 'timestamp': datetime.utcnow()
	# })

	return jsonify(response), 200

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	@app.route('/api/predict/batch', methods=['POST'])
	def batch_predict():
	try:
	data = request.get_json()
	texts = data.get('texts', [])

	if not texts:
	return jsonify({'error': 'No texts provided'}), 400

	results = []
	for text in texts:
	features = extract_features_web(text)
	features_df = build_features_df(features)
	probabilities = model.predict_proba(features_df)[0]
	prediction = pick_class_with_thresholds(probabilities)

	predicted_class = label_encoder.classes_[prediction]

	results.append({
	'text': text[:100] + '...' if len(text) > 100 else text,
	'prediction': {
	'class': predicted_class,
	'grade': grade_mapping.get(predicted_class, predicted_class),
	'confidences': {
	class_name: float(probabilities[i])
	for i, class_name in enumerate(label_encoder.classes_)
	}
	}
	})

	return jsonify({
	'success': True,
	'count': len(results),
	'results': results
	}), 200

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	if __name__ == '__main__':
	# Hugging Face Spaces uses port 7860
	port = int(os.getenv('PORT', 7860))
	app.run(host='0.0.0.0', port=port)