Soil_Prediction / app.py
amirkhanbloch's picture
Update app.py
10f9430 verified
import gradio as gr
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor
from sklearn.decomposition import PCA
# Load CatBoost models
modelos_cargados = []
for i in range(3):
model = CatBoostRegressor()
model.load_model(f'./model_{i}.cbm')
modelos_cargados.append(model)
# Define your array columns
array_cols = ['array_area', *[f'mean_{i}' for i in range(1, 151)],
*[f'std_{i}' for i in range(1, 151)],
*[f'med_{i}' for i in range(1, 151)],
*[f'q1_{i}' for i in range(1, 151)],
*[f'q3_{i}' for i in range(1, 151)],
*[f'max_{i}' for i in range(1, 151)],
*[f'range_{i}' for i in range(1, 151)],
*[f'D1_{i}' for i in range(1, 151)],
*[f'D10_{i}' for i in range(1, 151)],
*[f'IQR_{i}' for i in range(1, 151)]]
# Total expected columns
expected_columns_count = len(array_cols) # This should be 1501
# Initialize PCA
pca = PCA() # You can also specify n_components=None
def get_pca_dataset(datos_df):
"""Fit PCA on the given DataFrame and return transformed data."""
# Fit PCA only on a larger dataset
if datos_df.shape[0] > 1: # Only fit if we have more than one sample
pca.fit(datos_df)
transformed_data = pca.transform(datos_df)
return transformed_data
else:
return datos_df # Return original data if not enough samples
def process_soil_image(image):
"""Extract properties from the uploaded soil image."""
arr = np.array(image) # Convert the image to a numpy array
if arr.ndim != 3:
raise ValueError("Expected a 3-dimensional array (height, width, channels).")
# Simulated property extraction (replace with actual logic)
properties = np.random.rand(1500) # Simulated for demonstration
return properties
def predecir_desde_imagen(image, modelos, array_cols):
"""Predict soil properties from the uploaded image."""
# Process the soil image to extract properties
properties = process_soil_image(image)
print(f"Extracted properties shape: {properties.shape}") # Debug statement
# Ensure the properties shape matches expected columns
if len(properties) != expected_columns_count - 1:
raise ValueError(f"Expected {expected_columns_count - 1} properties, but got {len(properties)}.")
# Create DataFrame
datos_df = pd.DataFrame([properties], columns=array_cols[1:]) # Skip 'array_area' if it's not part of properties
print(f"DataFrame shape: {datos_df.shape}") # Debug statement
# Apply PCA and predict as before
pca_datos = get_pca_dataset(datos_df)
# Make predictions with the models
predicciones = [modelo.predict(pca_datos) for modelo in modelos]
predicciones = np.array(predicciones).reshape(len(modelos), -1)
# Calculate the median of predictions
mediana_predicciones = np.median(predicciones, axis=0)
return mediana_predicciones
def predecir_desde_imagen_interface(image):
"""Gradio interface function to handle the prediction request."""
# Ensure the image is in the expected format
predicciones = predecir_desde_imagen(image, modelos_cargados, array_cols)
return {
'Fósforo (P)': float(predicciones[0]),
'Potasio (K)': float(predicciones[1]),
'Magnesio (Mg)': float(predicciones[2]),
'pH': float(predicciones[3])
}
# Create Gradio interface
demo = gr.Interface(
fn=predecir_desde_imagen_interface,
inputs=gr.Image(label="Upload Soil Image"),
outputs=gr.JSON(label="Predictions")
)
# Launch the Gradio application
demo.launch() # No share=True since it's not supported in Hugging Face Spaces