Spaces:

axvg
/

alsp2

Sleeping

alsp2 / main.py

Alex Vega

init

f1acbf1 9 months ago

4.43 kB

	import pickle
	import numpy as np
	import io
	import math
	import cv2
	from skimage.feature import hog
	from fastapi import FastAPI, File, UploadFile, HTTPException
	from PIL import Image

	app = FastAPI(
	title="Enhanced Peruvian Sign Language (LSP) Recognition API",
	description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.",
	version="2.0.0"
	)

	MODEL_FILENAME = 'lsp_som_model_enhanced.pkl'
	try:
	with open(MODEL_FILENAME, 'rb') as f:
	model_data = pickle.load(f)
	som = model_data['som']
	label_map = model_data['label_map']
	CLASSES = model_data['classes']
	IMG_SIZE = model_data['img_size']
	HOG_PARAMS = model_data['feature_extraction_params'] # Load HOG parameters
	print(f"✅ Model '{MODEL_FILENAME}' and assets loaded successfully.")
	print(f" - Classes: {CLASSES}")
	print(f" - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}")
	print(f" - Feature Extractor: {HOG_PARAMS['type']}")
	except FileNotFoundError:
	print(f"❌ FATAL ERROR: Model file '{MODEL_FILENAME}' not found.")
	som = None

	def preprocess_and_extract_features_from_bytes(image_bytes: bytes):
	try:
	nparr = np.frombuffer(image_bytes, np.uint8)
	img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
	if img_bgr is None:
	raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.")
	img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

	ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb)
	skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135]))

	contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not contours:
	raise ValueError("No contours found in the image. The hand sign may not be clear enough.")

	largest_contour = max(contours, key=cv2.contourArea)
	x, y, w, h = cv2.boundingRect(largest_contour)
	cropped_hand = img_rgb[y:y+h, x:x+w]

	resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE))
	gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY)

	hog_features = hog(gray_hand,
	orientations=HOG_PARAMS['orientations'],
	pixels_per_cell=HOG_PARAMS['pixels_per_cell'],
	cells_per_block=HOG_PARAMS['cells_per_block'],
	transform_sqrt=HOG_PARAMS['transform_sqrt'],
	block_norm=HOG_PARAMS['block_norm'])

	return hog_features

	except Exception as e:
	raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}")


	@app.get("/", tags=["Status"])
	def read_root():
	return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"}

	@app.post("/predict", tags=["Prediction"])
	async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")):
	if not som:
	raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.")

	image_bytes = await file.read()

	feature_vector = preprocess_and_extract_features_from_bytes(image_bytes)

	winner_neuron = som.winner(feature_vector)
	predicted_index = label_map.get(winner_neuron, -1)

	is_best_guess = False
	if predicted_index == -1:
	is_best_guess = True
	min_dist = float('inf')
	for mapped_pos, mapped_label in label_map.items():
	dist = math.sqrt((winner_neuron[0] - mapped_pos[0])2 + (winner_neuron[1] - mapped_pos[1])2)
	if dist < min_dist:
	min_dist = dist
	predicted_index = mapped_label

	if predicted_index != -1:
	predicted_letter = CLASSES[predicted_index]
	prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match"
	else:
	predicted_letter = "Unknown"
	prediction_type = "Critical Error: No mapped neurons found on the entire map."

	response = {
	"filename": file.filename,
	"predicted_letter": predicted_letter,
	"prediction_type": prediction_type,
	"winner_neuron_on_map": [int(coord) for coord in winner_neuron]
	}
	print(f"[LOG] Prediction successful. Response: {response}")
	return response