import gradio as gr import numpy as np import pandas as pd from catboost import CatBoostRegressor from sklearn.decomposition import PCA # Load CatBoost models modelos_cargados = [] for i in range(3): model = CatBoostRegressor() model.load_model(f'./model_{i}.cbm') modelos_cargados.append(model) # Define your array columns array_cols = ['array_area', *[f'mean_{i}' for i in range(1, 151)], *[f'std_{i}' for i in range(1, 151)], *[f'med_{i}' for i in range(1, 151)], *[f'q1_{i}' for i in range(1, 151)], *[f'q3_{i}' for i in range(1, 151)], *[f'max_{i}' for i in range(1, 151)], *[f'range_{i}' for i in range(1, 151)], *[f'D1_{i}' for i in range(1, 151)], *[f'D10_{i}' for i in range(1, 151)], *[f'IQR_{i}' for i in range(1, 151)]] # Total expected columns expected_columns_count = len(array_cols) # This should be 1501 # Initialize PCA pca = PCA() # You can also specify n_components=None def get_pca_dataset(datos_df): """Fit PCA on the given DataFrame and return transformed data.""" # Fit PCA only on a larger dataset if datos_df.shape[0] > 1: # Only fit if we have more than one sample pca.fit(datos_df) transformed_data = pca.transform(datos_df) return transformed_data else: return datos_df # Return original data if not enough samples def process_soil_image(image): """Extract properties from the uploaded soil image.""" arr = np.array(image) # Convert the image to a numpy array if arr.ndim != 3: raise ValueError("Expected a 3-dimensional array (height, width, channels).") # Simulated property extraction (replace with actual logic) properties = np.random.rand(1500) # Simulated for demonstration return properties def predecir_desde_imagen(image, modelos, array_cols): """Predict soil properties from the uploaded image.""" # Process the soil image to extract properties properties = process_soil_image(image) print(f"Extracted properties shape: {properties.shape}") # Debug statement # Ensure the properties shape matches expected columns if len(properties) != expected_columns_count - 1: raise ValueError(f"Expected {expected_columns_count - 1} properties, but got {len(properties)}.") # Create DataFrame datos_df = pd.DataFrame([properties], columns=array_cols[1:]) # Skip 'array_area' if it's not part of properties print(f"DataFrame shape: {datos_df.shape}") # Debug statement # Apply PCA and predict as before pca_datos = get_pca_dataset(datos_df) # Make predictions with the models predicciones = [modelo.predict(pca_datos) for modelo in modelos] predicciones = np.array(predicciones).reshape(len(modelos), -1) # Calculate the median of predictions mediana_predicciones = np.median(predicciones, axis=0) return mediana_predicciones def predecir_desde_imagen_interface(image): """Gradio interface function to handle the prediction request.""" # Ensure the image is in the expected format predicciones = predecir_desde_imagen(image, modelos_cargados, array_cols) return { 'Fósforo (P)': float(predicciones[0]), 'Potasio (K)': float(predicciones[1]), 'Magnesio (Mg)': float(predicciones[2]), 'pH': float(predicciones[3]) } # Create Gradio interface demo = gr.Interface( fn=predecir_desde_imagen_interface, inputs=gr.Image(label="Upload Soil Image"), outputs=gr.JSON(label="Predictions") ) # Launch the Gradio application demo.launch() # No share=True since it's not supported in Hugging Face Spaces