Spaces:

ombhojane
/

predictservice

Sleeping

App Files Files Community

predictservice / app.py

ombhojane

Update app.py

8ca9b79 verified almost 2 years ago

raw

history blame contribute delete

2.97 kB


	from datasets import load_dataset
	import pandas as pd
	from sklearn.model_selection import train_test_split, GridSearchCV
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import classification_report, accuracy_score
	from category_encoders import OneHotEncoder
	import gradio as gr

	# Load the dataset
	dataset = load_dataset("ombhojane/ckv5")
	df = pd.DataFrame(dataset['train'])

	# Preprocessing
	encoder = OneHotEncoder(cols=['Biodiversity', 'Existing Infrastructure'], use_cat_names=True)
	scaler = StandardScaler()

	df_encoded = encoder.fit_transform(df)
	df_encoded[['Land Size (acres)', 'Budget (INR)']] = scaler.fit_transform(df_encoded[['Land Size (acres)', 'Budget (INR)']])

	# Splitting features and target
	X = df_encoded.drop('Service', axis=1)
	y = df_encoded['Service']
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Model Training
	model = RandomForestClassifier()
	param_grid = {
	'n_estimators': [100, 200, 300],
	'max_depth': [None, 10, 20, 30],
	'min_samples_split': [2, 5, 10]
	}

	grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
	grid_search.fit(X_train, y_train)

	best_model = grid_search.best_estimator_

	def predict_service(land_size, biodiversity, budget, infrastructure):
	input_df = pd.DataFrame(columns=X_train.columns)
	input_df.loc[0] = 0
	input_df['Land Size (acres)'] = land_size
	input_df['Budget (INR)'] = budget
	for col in encoder.get_feature_names_out():
	if biodiversity in col:
	input_df[col] = 1 if 'Biodiversity_' + biodiversity in col else 0
	if infrastructure in col:
	input_df[col] = 1 if 'Existing Infrastructure_' + infrastructure in col else 0
	input_df[['Land Size (acres)', 'Budget (INR)']] = scaler.transform(input_df[['Land Size (acres)', 'Budget (INR)']])
	input_df = input_df[X_train.columns]
	prediction = best_model.predict(input_df)
	return prediction[0]

	def gradio_interface(land_size, biodiversity, budget, infrastructure):
	prediction = predict_service(land_size, biodiversity, budget, infrastructure)
	return f"The predicted service is: {prediction}"


	iface = gr.Interface(fn=gradio_interface,
	inputs=[
	gr.Number(label="Land Size (acres)"),
	gr.Dropdown(label="Biodiversity", choices=df['Biodiversity'].unique().tolist()),
	gr.Number(label="Budget (INR)"),
	gr.Dropdown(label="Existing Infrastructure", choices=df['Existing Infrastructure'].unique().tolist())
	],
	outputs=gr.Text(label="Predicted Service"),
	title="Agrotourism Service Planner",
	description="Please give land size, available budget, and existing infrastructure to find best suitable agrotourism service on your farm!")

	iface.launch()