Spaces:
Sleeping
Sleeping
File size: 2,971 Bytes
95c8853 2100805 8ca9b79 2100805 fb277e2 8ca9b79 494d85e 8ca9b79 70fd1e6 8ca9b79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
from datasets import load_dataset
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from category_encoders import OneHotEncoder
import gradio as gr
# Load the dataset
dataset = load_dataset("ombhojane/ckv5")
df = pd.DataFrame(dataset['train'])
# Preprocessing
encoder = OneHotEncoder(cols=['Biodiversity', 'Existing Infrastructure'], use_cat_names=True)
scaler = StandardScaler()
df_encoded = encoder.fit_transform(df)
df_encoded[['Land Size (acres)', 'Budget (INR)']] = scaler.fit_transform(df_encoded[['Land Size (acres)', 'Budget (INR)']])
# Splitting features and target
X = df_encoded.drop('Service', axis=1)
y = df_encoded['Service']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model Training
model = RandomForestClassifier()
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [None, 10, 20, 30],
'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
def predict_service(land_size, biodiversity, budget, infrastructure):
input_df = pd.DataFrame(columns=X_train.columns)
input_df.loc[0] = 0
input_df['Land Size (acres)'] = land_size
input_df['Budget (INR)'] = budget
for col in encoder.get_feature_names_out():
if biodiversity in col:
input_df[col] = 1 if 'Biodiversity_' + biodiversity in col else 0
if infrastructure in col:
input_df[col] = 1 if 'Existing Infrastructure_' + infrastructure in col else 0
input_df[['Land Size (acres)', 'Budget (INR)']] = scaler.transform(input_df[['Land Size (acres)', 'Budget (INR)']])
input_df = input_df[X_train.columns]
prediction = best_model.predict(input_df)
return prediction[0]
def gradio_interface(land_size, biodiversity, budget, infrastructure):
prediction = predict_service(land_size, biodiversity, budget, infrastructure)
return f"The predicted service is: {prediction}"
iface = gr.Interface(fn=gradio_interface,
inputs=[
gr.Number(label="Land Size (acres)"),
gr.Dropdown(label="Biodiversity", choices=df['Biodiversity'].unique().tolist()),
gr.Number(label="Budget (INR)"),
gr.Dropdown(label="Existing Infrastructure", choices=df['Existing Infrastructure'].unique().tolist())
],
outputs=gr.Text(label="Predicted Service"),
title="Agrotourism Service Planner",
description="Please give land size, available budget, and existing infrastructure to find best suitable agrotourism service on your farm!")
iface.launch() |