Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split, GridSearchCV | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import classification_report, accuracy_score | |
| from category_encoders import OneHotEncoder | |
| import gradio as gr | |
| # Load the dataset | |
| dataset = load_dataset("ombhojane/ckv5") | |
| df = pd.DataFrame(dataset['train']) | |
| # Preprocessing | |
| encoder = OneHotEncoder(cols=['Biodiversity', 'Existing Infrastructure'], use_cat_names=True) | |
| scaler = StandardScaler() | |
| df_encoded = encoder.fit_transform(df) | |
| df_encoded[['Land Size (acres)', 'Budget (INR)']] = scaler.fit_transform(df_encoded[['Land Size (acres)', 'Budget (INR)']]) | |
| # Splitting features and target | |
| X = df_encoded.drop('Service', axis=1) | |
| y = df_encoded['Service'] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Model Training | |
| model = RandomForestClassifier() | |
| param_grid = { | |
| 'n_estimators': [100, 200, 300], | |
| 'max_depth': [None, 10, 20, 30], | |
| 'min_samples_split': [2, 5, 10] | |
| } | |
| grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy') | |
| grid_search.fit(X_train, y_train) | |
| best_model = grid_search.best_estimator_ | |
| def predict_service(land_size, biodiversity, budget, infrastructure): | |
| input_df = pd.DataFrame(columns=X_train.columns) | |
| input_df.loc[0] = 0 | |
| input_df['Land Size (acres)'] = land_size | |
| input_df['Budget (INR)'] = budget | |
| for col in encoder.get_feature_names_out(): | |
| if biodiversity in col: | |
| input_df[col] = 1 if 'Biodiversity_' + biodiversity in col else 0 | |
| if infrastructure in col: | |
| input_df[col] = 1 if 'Existing Infrastructure_' + infrastructure in col else 0 | |
| input_df[['Land Size (acres)', 'Budget (INR)']] = scaler.transform(input_df[['Land Size (acres)', 'Budget (INR)']]) | |
| input_df = input_df[X_train.columns] | |
| prediction = best_model.predict(input_df) | |
| return prediction[0] | |
| def gradio_interface(land_size, biodiversity, budget, infrastructure): | |
| prediction = predict_service(land_size, biodiversity, budget, infrastructure) | |
| return f"The predicted service is: {prediction}" | |
| iface = gr.Interface(fn=gradio_interface, | |
| inputs=[ | |
| gr.Number(label="Land Size (acres)"), | |
| gr.Dropdown(label="Biodiversity", choices=df['Biodiversity'].unique().tolist()), | |
| gr.Number(label="Budget (INR)"), | |
| gr.Dropdown(label="Existing Infrastructure", choices=df['Existing Infrastructure'].unique().tolist()) | |
| ], | |
| outputs=gr.Text(label="Predicted Service"), | |
| title="Agrotourism Service Planner", | |
| description="Please give land size, available budget, and existing infrastructure to find best suitable agrotourism service on your farm!") | |
| iface.launch() |