from datasets import load_dataset import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import StandardScaler from sklearn.metrics import classification_report, accuracy_score from category_encoders import OneHotEncoder import gradio as gr # Load the dataset dataset = load_dataset("ombhojane/ckv5") df = pd.DataFrame(dataset['train']) # Preprocessing encoder = OneHotEncoder(cols=['Biodiversity', 'Existing Infrastructure'], use_cat_names=True) scaler = StandardScaler() df_encoded = encoder.fit_transform(df) df_encoded[['Land Size (acres)', 'Budget (INR)']] = scaler.fit_transform(df_encoded[['Land Size (acres)', 'Budget (INR)']]) # Splitting features and target X = df_encoded.drop('Service', axis=1) y = df_encoded['Service'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Model Training model = RandomForestClassifier() param_grid = { 'n_estimators': [100, 200, 300], 'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10] } grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy') grid_search.fit(X_train, y_train) best_model = grid_search.best_estimator_ def predict_service(land_size, biodiversity, budget, infrastructure): input_df = pd.DataFrame(columns=X_train.columns) input_df.loc[0] = 0 input_df['Land Size (acres)'] = land_size input_df['Budget (INR)'] = budget for col in encoder.get_feature_names_out(): if biodiversity in col: input_df[col] = 1 if 'Biodiversity_' + biodiversity in col else 0 if infrastructure in col: input_df[col] = 1 if 'Existing Infrastructure_' + infrastructure in col else 0 input_df[['Land Size (acres)', 'Budget (INR)']] = scaler.transform(input_df[['Land Size (acres)', 'Budget (INR)']]) input_df = input_df[X_train.columns] prediction = best_model.predict(input_df) return prediction[0] def gradio_interface(land_size, biodiversity, budget, infrastructure): prediction = predict_service(land_size, biodiversity, budget, infrastructure) return f"The predicted service is: {prediction}" iface = gr.Interface(fn=gradio_interface, inputs=[ gr.Number(label="Land Size (acres)"), gr.Dropdown(label="Biodiversity", choices=df['Biodiversity'].unique().tolist()), gr.Number(label="Budget (INR)"), gr.Dropdown(label="Existing Infrastructure", choices=df['Existing Infrastructure'].unique().tolist()) ], outputs=gr.Text(label="Predicted Service"), title="Agrotourism Service Planner", description="Please give land size, available budget, and existing infrastructure to find best suitable agrotourism service on your farm!") iface.launch()