Spaces:

quantumbit
/

mnist-classifier-api

Sleeping

App Files Files Community

mnist-classifier-api / app.py

quantumbit

Upload 7 files

7e18445 verified 12 months ago

raw

history blame

10.5 kB

	from flask import Flask, request, jsonify
	import numpy as np
	import tensorflow as tf
	from PIL import Image
	import io
	import base64
	import re
	import joblib
	import os

	app = Flask(__name__)

	# Ensure the "images" directory exists
	IMAGE_DIR = "images"
	if not os.path.exists(IMAGE_DIR):
	os.makedirs(IMAGE_DIR)

	# Load all models - use absolute paths for Hugging Face
	MODEL_DIR = os.path.join(os.getcwd(), "models")
	models = {
	"cnn": tf.keras.models.load_model(os.path.join(MODEL_DIR, "mnist_cnn_model.h5")),
	"svm": joblib.load(os.path.join(MODEL_DIR, "mnist_svm.pkl")),
	"logistic": joblib.load(os.path.join(MODEL_DIR, "mnist_logistic_regression.pkl")),
	"random_forest": joblib.load(os.path.join(MODEL_DIR, "mnist_random_forest.pkl"))
	}

	# [Keep your existing classification_reports, preprocess_image,
	# and create_simulated_scores functions exactly as they are]
	# Classification reports for each model
	classification_reports = {
	"cnn": """
	precision recall f1-score support
	0 0.99 1.00 0.99 980
	1 1.00 1.00 1.00 1135
	2 0.99 0.99 0.99 1032
	3 0.99 1.00 0.99 1010
	4 1.00 0.99 0.99 982
	5 0.98 0.99 0.99 892
	6 1.00 0.98 0.99 958
	7 0.99 0.99 0.99 1028
	8 1.00 0.99 0.99 974
	9 0.99 0.99 0.99 1009
	accuracy 0.99 10000
	macro avg 0.99 0.99 0.99 10000
	weighted avg 0.99 0.99 0.99 10000
	""",
	"svm": """
	precision recall f1-score support
	0 0.9874 0.9896 0.9885 1343
	1 0.9882 0.9925 0.9903 1600
	2 0.9706 0.9819 0.9762 1380
	3 0.9783 0.9749 0.9766 1433
	4 0.9777 0.9822 0.9800 1295
	5 0.9827 0.9796 0.9811 1273
	6 0.9858 0.9921 0.9889 1396
	7 0.9768 0.9807 0.9788 1503
	8 0.9813 0.9683 0.9748 1357
	9 0.9807 0.9669 0.9738 1420
	accuracy 0.9810 14000
	macro avg 0.9809 0.9809 0.9809 14000
	weighted avg 0.9810 0.9810 0.9810 14000
	""",
	"random_forest": """
	precision recall f1-score support
	0 0.9844 0.9866 0.9855 1343
	1 0.9831 0.9831 0.9831 1600
	2 0.9522 0.9674 0.9597 1380
	3 0.9579 0.9532 0.9556 1433
	4 0.9617 0.9699 0.9658 1295
	5 0.9707 0.9631 0.9669 1273
	6 0.9800 0.9828 0.9814 1396
	7 0.9668 0.9681 0.9674 1503
	8 0.9599 0.9528 0.9564 1357
	9 0.9566 0.9465 0.9515 1420
	accuracy 0.9675 14000
	macro avg 0.9673 0.9674 0.9673 14000
	weighted avg 0.9675 0.9675 0.9675 14000
	""",
	"logistic": """
	precision recall f1-score support
	0 0.9636 0.9650 0.9643 1343
	1 0.9433 0.9675 0.9553 1600
	2 0.9113 0.8935 0.9023 1380
	3 0.9021 0.8939 0.8980 1433
	4 0.9225 0.9290 0.9257 1295
	5 0.8846 0.8790 0.8818 1273
	6 0.9420 0.9534 0.9477 1396
	7 0.9273 0.9421 0.9347 1503
	8 0.8973 0.8696 0.8832 1357
	9 0.9019 0.9000 0.9010 1420
	accuracy 0.9204 14000
	macro avg 0.9196 0.9193 0.9194 14000
	weighted avg 0.9201 0.9204 0.9202 14000
	"""
	}

	# Preprocess image before prediction
	def preprocess_image(image, model_type):
	image = image.resize((28, 28)).convert('L') # Convert to grayscale
	img_array = np.array(image) / 255.0 # Normalize

	if model_type == "cnn":
	# CNN expects 4D tensor with channel dimension
	return np.expand_dims(np.expand_dims(img_array, axis=0), axis=-1)
	else:
	# Other models expect flattened 1D array
	return img_array.flatten().reshape(1, -1)

	@app.route('/')
	def home():
	return jsonify({
	"message": "MNIST Classifier API",
	"available_models": list(models.keys()),
	"endpoints": {
	"/predict": "POST - Send image and model_type",
	"/get_classification_report": "POST - Get model metrics"
	}
	})

	# [Keep your existing /get_classification_report and /predict routes exactly as they are]
	@app.route('/get_classification_report', methods=['POST'])
	def get_classification_report():
	model_type = request.json['model_type']
	if model_type in classification_reports:
	return jsonify({
	'report': classification_reports[model_type]
	})
	return jsonify({'error': 'Model not found'})

	@app.route('/predict', methods=['POST'])
	def predict():
	if request.method == 'POST':
	data = request.json['image']
	model_type = request.json['model_type']

	img_data = re.sub('^data:image/png;base64,', '', data)
	img = Image.open(io.BytesIO(base64.b64decode(img_data)))

	# Save the image to "images" folder
	image_path = os.path.join(IMAGE_DIR, "digit.png")
	img.save(image_path)

	# Preprocess image and predict
	processed_image = preprocess_image(img, model_type)

	if model_type in models:
	model = models[model_type]

	# Model-specific prediction logic
	if model_type == "cnn":
	# For CNN, use softmax probabilities
	prediction = model.predict(processed_image)
	predicted_digit = np.argmax(prediction)
	confidence_scores = prediction[0].tolist()
	score_type = "probability"

	elif model_type == "svm":
	# For SVM, use decision function distances
	predicted_digit = model.predict(processed_image)[0]

	# Try to get decision function scores
	if hasattr(model, "decision_function") and callable(getattr(model, "decision_function")):
	try:
	# Get raw decision scores
	decision_scores = model.decision_function(processed_image)

	# One-vs-One SVMs have a different shape for decision_function output
	if len(decision_scores.shape) == 2:
	# This is a standard one-vs-rest SVM, shape should be (1, n_classes)
	confidence_scores = decision_scores[0].tolist()
	else:
	# One-vs-One SVM returns pairwise comparisons
	# Convert to a simplified score per class (this is an approximation)
	confidence_scores = [0] * 10
	for i in range(10):
	# Count how many times class i wins in pairwise comparisons
	confidence_scores[i] = sum(1 for score in decision_scores[0] if score > 0)

	# Normalize scores to positive values for visualization
	min_score = min(confidence_scores)
	if min_score < 0:
	confidence_scores = [score - min_score for score in confidence_scores]

	score_type = "decision_distance"
	except (AttributeError, NotImplementedError) as e:
	print(f"Error getting decision function: {e}")
	confidence_scores = create_simulated_scores(int(predicted_digit))
	score_type = "simulated"
	else:
	# Fallback if decision_function is not available
	confidence_scores = create_simulated_scores(int(predicted_digit))
	score_type = "simulated"

	else:
	# For other models (Random Forest, Logistic Regression)
	predicted_digit = model.predict(processed_image)[0]

	# Try to get probability estimates
	if hasattr(model, "predict_proba") and callable(getattr(model, "predict_proba")):
	try:
	confidence_scores = model.predict_proba(processed_image)[0].tolist()
	score_type = "probability"
	except (AttributeError, NotImplementedError):
	confidence_scores = create_simulated_scores(int(predicted_digit))
	score_type = "simulated"
	else:
	confidence_scores = create_simulated_scores(int(predicted_digit))
	score_type = "simulated"

	return jsonify({
	'digit': int(predicted_digit),
	'confidence_scores': confidence_scores,
	'score_type': score_type
	})

	return jsonify({'error': 'Model not found'})

	def create_simulated_scores(predicted_digit):
	"""Create simulated confidence scores that sum to 1.0 with highest probability for the predicted digit."""
	# Assign base probabilities
	scores = [0.01] * 10 # Give each digit a small base probability

	# Calculate remaining probability (should be around 0.9)
	remaining = 1.0 - sum(scores)

	# Assign the remaining probability to the predicted digit
	scores[predicted_digit] += remaining

	return scores

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860) # Hugging Face uses port 7860