| import pickle |
| import numpy as np |
| import io |
| import math |
| import cv2 |
| from skimage.feature import hog |
| from fastapi import FastAPI, File, UploadFile, HTTPException |
| from PIL import Image |
|
|
| app = FastAPI( |
| title="Enhanced Peruvian Sign Language (LSP) Recognition API", |
| description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.", |
| version="2.0.0" |
| ) |
|
|
| MODEL_FILENAME = 'lsp_som_model_enhanced.pkl' |
| try: |
| with open(MODEL_FILENAME, 'rb') as f: |
| model_data = pickle.load(f) |
| som = model_data['som'] |
| label_map = model_data['label_map'] |
| CLASSES = model_data['classes'] |
| IMG_SIZE = model_data['img_size'] |
| HOG_PARAMS = model_data['feature_extraction_params'] |
| print(f"✅ Model '{MODEL_FILENAME}' and assets loaded successfully.") |
| print(f" - Classes: {CLASSES}") |
| print(f" - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}") |
| print(f" - Feature Extractor: {HOG_PARAMS['type']}") |
| except FileNotFoundError: |
| print(f"❌ FATAL ERROR: Model file '{MODEL_FILENAME}' not found.") |
| som = None |
|
|
| def preprocess_and_extract_features_from_bytes(image_bytes: bytes): |
| try: |
| nparr = np.frombuffer(image_bytes, np.uint8) |
| img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) |
| if img_bgr is None: |
| raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.") |
| img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) |
|
|
| ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb) |
| skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135])) |
|
|
| contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
| if not contours: |
| raise ValueError("No contours found in the image. The hand sign may not be clear enough.") |
|
|
| largest_contour = max(contours, key=cv2.contourArea) |
| x, y, w, h = cv2.boundingRect(largest_contour) |
| cropped_hand = img_rgb[y:y+h, x:x+w] |
| |
| resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE)) |
| gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY) |
|
|
| hog_features = hog(gray_hand, |
| orientations=HOG_PARAMS['orientations'], |
| pixels_per_cell=HOG_PARAMS['pixels_per_cell'], |
| cells_per_block=HOG_PARAMS['cells_per_block'], |
| transform_sqrt=HOG_PARAMS['transform_sqrt'], |
| block_norm=HOG_PARAMS['block_norm']) |
| |
| return hog_features |
|
|
| except Exception as e: |
| raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}") |
|
|
|
|
| @app.get("/", tags=["Status"]) |
| def read_root(): |
| return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"} |
|
|
| @app.post("/predict", tags=["Prediction"]) |
| async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")): |
| if not som: |
| raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.") |
|
|
| image_bytes = await file.read() |
|
|
| feature_vector = preprocess_and_extract_features_from_bytes(image_bytes) |
|
|
| winner_neuron = som.winner(feature_vector) |
| predicted_index = label_map.get(winner_neuron, -1) |
|
|
| is_best_guess = False |
| if predicted_index == -1: |
| is_best_guess = True |
| min_dist = float('inf') |
| for mapped_pos, mapped_label in label_map.items(): |
| dist = math.sqrt((winner_neuron[0] - mapped_pos[0])**2 + (winner_neuron[1] - mapped_pos[1])**2) |
| if dist < min_dist: |
| min_dist = dist |
| predicted_index = mapped_label |
| |
| if predicted_index != -1: |
| predicted_letter = CLASSES[predicted_index] |
| prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match" |
| else: |
| predicted_letter = "Unknown" |
| prediction_type = "Critical Error: No mapped neurons found on the entire map." |
|
|
| response = { |
| "filename": file.filename, |
| "predicted_letter": predicted_letter, |
| "prediction_type": prediction_type, |
| "winner_neuron_on_map": [int(coord) for coord in winner_neuron] |
| } |
| print(f"[LOG] Prediction successful. Response: {response}") |
| return response |