Alalay / app.py
Jandayl's picture
re-ran save_webapp_components for consistency
69fcbd3
from flask import Flask, request, jsonify
from flask_cors import CORS
import joblib
import pandas as pd
import numpy as np
from feature_extractor_web import extract_features_web
import logging
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODELS_DIR = os.path.join(BASE_DIR, "models")
app = Flask(__name__)
default_origins = "https://alalayfe.vercel.app,https://www.alalayfe.vercel.app,http://localhost:3000"
ALLOWED_ORIGINS = [origin.strip().rstrip("/") for origin in os.getenv('ALLOWED_ORIGINS', default_origins).split(',') if origin.strip()]
CORS(app, origins=ALLOWED_ORIGINS)
logger = logging.getLogger(__name__)
@app.route('/')
def home():
return jsonify({
'service': 'Alalay Readability API',
'status': 'running',
'endpoints': [
'/health',
'/api/predict',
'/api/predict/batch'
]
}), 200
# Load all the saved components
print("Loading model components...")
try:
model = joblib.load(os.path.join(MODELS_DIR, "readability_model.pkl"))
label_encoder = joblib.load(os.path.join(MODELS_DIR, "label_encoder.pkl"))
grade_mapping = joblib.load(os.path.join(MODELS_DIR, "grade_mapping.pkl"))
thresholds = joblib.load(os.path.join(MODELS_DIR, "thresholds.pkl"))
feature_info = joblib.load(os.path.join(MODELS_DIR, "feature_info.pkl"))
print("All components loaded successfully!")
print(f" Model type: {type(model.named_steps['classifier']).__name__}")
print(f" Classes: {label_encoder.classes_}")
except Exception as e:
print(f"Error loading models: {e}")
model = None
def build_features_df(features: dict) -> pd.DataFrame:
"""Build a model-ready DataFrame with the same feature order used in training."""
all_features = feature_info.get('all_features', list(features.keys()))
categorical_cols = set(feature_info.get('categorical_cols', []))
row = {}
for col in all_features:
if col in features:
row[col] = features[col]
elif col in categorical_cols:
row[col] = 'Other'
else:
row[col] = 0.0
return pd.DataFrame([row], columns=all_features)
def pick_class_with_thresholds(probabilities: np.ndarray) -> int:
"""Use thresholds when available, otherwise fall back to argmax probability."""
classes = label_encoder.classes_
base_idx = int(np.argmax(probabilities))
eligible = [
i for i, class_name in enumerate(classes)
if probabilities[i] >= thresholds.get(class_name, 0.5)
]
if not eligible:
return base_idx
return max(eligible, key=lambda i: probabilities[i])
# MongoDB connection (optional for now)
# try:
# MONGO_URI = os.getenv('MONGO_URI', 'mongodb://localhost:27017/')
# client = MongoClient(MONGO_URI)
# db = client['readability_db']
# # Test connection
# client.admin.command('ping')
# print("MongoDB connected")
# except:
# print("MongoDB not available - continuing without database")
# db = None
@app.route('/health', methods=['GET'])
@app.route('/api/health', methods=['GET'])
def health():
model_name = type(model.named_steps['classifier']).__name__ if model is not None else None
classes = label_encoder.classes_.tolist() if model is not None else []
return jsonify({
'status': 'healthy' if model is not None else 'degraded',
'model': model_name,
'classes': classes
}), 200
@app.route('/api/predict', methods=['POST'])
def predict():
if model is None:
return jsonify({'error': 'Model not loaded. Check server logs.'}), 503
try:
data = request.get_json()
text = data.get('text', '').strip()
if not text:
return jsonify({'error': 'No text provided'}), 400
if len(text) < 10:
return jsonify({'error': 'Text must be at least 10 characters'}), 400
# Extract features
features = extract_features_web(text)
if not features:
return jsonify({'error': 'Feature extraction failed. Check server logs.'}), 500
# Convert to DataFrame using training-time feature order.
features_df = build_features_df(features)
# Get prediction
probabilities = model.predict_proba(features_df)[0]
# Apply threshold tuning with deterministic tie handling.
final_prediction = pick_class_with_thresholds(probabilities)
predicted_class = label_encoder.classes_[final_prediction]
grade_level = grade_mapping.get(predicted_class, predicted_class)
# Prepare response
response = {
'success': True,
'text': text[:200] + '...' if len(text) > 200 else text,
'prediction': {
'predicted_class': predicted_class,
'grade_level': grade_level,
'confidences': {
class_name: float(probabilities[i])
for i, class_name in enumerate(label_encoder.classes_)
}
},
'features': {k: float(v) if isinstance(v, (int, float)) else v
for k, v in features.items()}
}
# # Store in MongoDB if available
# if db:
# db.texts.insert_one({
# 'text': text,
# 'prediction': response['prediction'],
# 'features': features,
# 'timestamp': datetime.utcnow()
# })
return jsonify(response), 200
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/predict/batch', methods=['POST'])
def batch_predict():
try:
data = request.get_json()
texts = data.get('texts', [])
if not texts:
return jsonify({'error': 'No texts provided'}), 400
results = []
for text in texts:
features = extract_features_web(text)
features_df = build_features_df(features)
probabilities = model.predict_proba(features_df)[0]
prediction = pick_class_with_thresholds(probabilities)
predicted_class = label_encoder.classes_[prediction]
results.append({
'text': text[:100] + '...' if len(text) > 100 else text,
'prediction': {
'class': predicted_class,
'grade': grade_mapping.get(predicted_class, predicted_class),
'confidences': {
class_name: float(probabilities[i])
for i, class_name in enumerate(label_encoder.classes_)
}
}
})
return jsonify({
'success': True,
'count': len(results),
'results': results
}), 200
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
# Hugging Face Spaces uses port 7860
port = int(os.getenv('PORT', 7860))
app.run(host='0.0.0.0', port=port)