predictservice / app.py
ombhojane's picture
Update app.py
8ca9b79 verified
from datasets import load_dataset
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from category_encoders import OneHotEncoder
import gradio as gr
# Load the dataset
dataset = load_dataset("ombhojane/ckv5")
df = pd.DataFrame(dataset['train'])
# Preprocessing
encoder = OneHotEncoder(cols=['Biodiversity', 'Existing Infrastructure'], use_cat_names=True)
scaler = StandardScaler()
df_encoded = encoder.fit_transform(df)
df_encoded[['Land Size (acres)', 'Budget (INR)']] = scaler.fit_transform(df_encoded[['Land Size (acres)', 'Budget (INR)']])
# Splitting features and target
X = df_encoded.drop('Service', axis=1)
y = df_encoded['Service']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model Training
model = RandomForestClassifier()
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [None, 10, 20, 30],
'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
def predict_service(land_size, biodiversity, budget, infrastructure):
input_df = pd.DataFrame(columns=X_train.columns)
input_df.loc[0] = 0
input_df['Land Size (acres)'] = land_size
input_df['Budget (INR)'] = budget
for col in encoder.get_feature_names_out():
if biodiversity in col:
input_df[col] = 1 if 'Biodiversity_' + biodiversity in col else 0
if infrastructure in col:
input_df[col] = 1 if 'Existing Infrastructure_' + infrastructure in col else 0
input_df[['Land Size (acres)', 'Budget (INR)']] = scaler.transform(input_df[['Land Size (acres)', 'Budget (INR)']])
input_df = input_df[X_train.columns]
prediction = best_model.predict(input_df)
return prediction[0]
def gradio_interface(land_size, biodiversity, budget, infrastructure):
prediction = predict_service(land_size, biodiversity, budget, infrastructure)
return f"The predicted service is: {prediction}"
iface = gr.Interface(fn=gradio_interface,
inputs=[
gr.Number(label="Land Size (acres)"),
gr.Dropdown(label="Biodiversity", choices=df['Biodiversity'].unique().tolist()),
gr.Number(label="Budget (INR)"),
gr.Dropdown(label="Existing Infrastructure", choices=df['Existing Infrastructure'].unique().tolist())
],
outputs=gr.Text(label="Predicted Service"),
title="Agrotourism Service Planner",
description="Please give land size, available budget, and existing infrastructure to find best suitable agrotourism service on your farm!")
iface.launch()