Prajwal3009 commited on
Commit
aaa0633
·
verified ·
1 Parent(s): 74130e1

Upload 11 files

Browse files
Retrain_Crop_Recommendation.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
6
+ from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV
7
+ from sklearn.preprocessing import OrdinalEncoder,LabelEncoder,OneHotEncoder
8
+ from sklearn.ensemble import RandomForestClassifier
9
+ import pickle
10
+ def crop_reco():
11
+
12
+
13
+ df = pd.read_csv('Crop_recommendation.csv')
14
+
15
+ from sklearn.preprocessing import LabelEncoder
16
+ le = LabelEncoder()
17
+ df['label'] = le.fit_transform(df['label'])
18
+
19
+ class_labels = le.classes_
20
+
21
+ x = df.drop('label',axis=1)
22
+ y = df['label']
23
+
24
+ X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.10,shuffle=True)
25
+
26
+ rf = RandomForestClassifier()
27
+ param_grid = {'n_estimators':np.arange(50,200),
28
+ 'criterion':['gini','entropy'],
29
+ 'max_depth':np.arange(2,25),
30
+ 'min_samples_split':np.arange(2,25),
31
+ 'min_samples_leaf':np.arange(2,25)}
32
+
33
+ rscv_model = RandomizedSearchCV(rf,param_grid, cv=5)
34
+ rscv_model.fit(X_train,y_train)
35
+
36
+ best_rf_model = rscv_model.best_estimator_
37
+ pickle.dump(best_rf_model, open("crop_recommendation.pickle","wb"))
38
+
Soli_to_recommandation_model_Raghuu.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1afa2e0ebbb7a999fd6eeceaf9e9be1342913ba45f777e95ff069e49a0ef84df
3
+ size 133
Soli_to_recommandation_model_Simha.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d40e3831a83c7bc49ba167e1e410157ea591413cf13cf86e2631d54fd957826c
3
+ size 39907328
Weather_app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ import time
4
+ import math
5
+ import pickle as pk
6
+ import seaborn as sns
7
+ import matplotlib.pyplot as plt
8
+ import pandas as pd
9
+ import numpy as np
10
+ import streamlit as st
11
+ import plotly.express as px
12
+ import pandas as pd
13
+
14
+ def get_weather_details(city_name):
15
+ base_url = "https://api.openweathermap.org/data/2.5/weather"
16
+ params = {
17
+ 'q': city_name,
18
+ 'appid': "d73ec4f18aca81c32b1836a8ac2506e0"
19
+ }
20
+
21
+ try:
22
+ response = requests.get(base_url, params=params)
23
+ data = response.json()
24
+
25
+ # Check if the request was successful
26
+ if response.status_code == 200:
27
+ # Extract weather details
28
+ weather_details = {
29
+ 'city': city_name,
30
+ 'temperature': data['main']['temp'],
31
+ 'description': data['weather'][0]['description'],
32
+ 'humidity': data['main']['humidity'],
33
+ 'wind_speed': data['wind']['speed']
34
+ }
35
+ return weather_details
36
+ else:
37
+ st.write("Error {}: {}".format(response.status_code, data['message']))
38
+ return None
39
+ except Exception as e:
40
+ st.write("An error occurred:", e)
41
+ return None
42
+
43
+ # -------------------------------------------------------------------------------------------Api---------------------------------------------------------
44
+ # Replace with your actual values
45
+ def run_weather_app():
46
+ st.title("City Weather Overview")
47
+ try:
48
+ global city_name
49
+ city_name = st.selectbox('Enter City',("Bagalkot", "Ballari", "Belagavi", "Bidar", "Chikkaballapur", "Chikkamagaluru", "Chitradurga", "Davanagere", "Dharwad", "Gadag", "Hassan", "Haveri", "Kalaburagi", "Kodagu", "Kolar", "Koppal", "Mandya", "Mysuru", "Raichur", "Ramanagara", "Shivamogga", "Tumakuru", "Udupi", "Uttara Kannada", "Vijayapura","Yadgir"),key="unique_key_2")
50
+
51
+ if city_name:
52
+ # api_key = "d73ec4f18aca81c32b1836a8ac2506e0"
53
+
54
+ # Get weather details
55
+ prediction = ''
56
+ if st.button('Show'):
57
+ progress = st.progress(0)
58
+ for i in range(100):
59
+ time.sleep(0.005)
60
+ progress.progress(i+1)
61
+
62
+ weather_data = get_weather_details(city_name)
63
+ if weather_data:
64
+ column1, column2 = st.columns(2)
65
+ column1.metric("City", weather_data['city'].capitalize())
66
+ column1.metric("Temperature",value=f"{ round(weather_data['temperature'] - 273.15, 2) } °C",delta= weather_data['description'])
67
+ # column1.metric("Description", weather_data['description'])
68
+ column1.metric("Humidity",value= f"{weather_data['humidity']} %")
69
+ column1.metric("Wind Speed",value= f"{math.ceil(weather_data['wind_speed']*3.6)} km/hr")
70
+ else:
71
+ st.error("Invalid City Name")
72
+
73
+ except Exception as e:
74
+ st.error("An error occurred:", e)
75
+
76
+ return None
77
+ ###-------------------------------------------------------------------Current weather------------------------------------------------------------
78
+ forecast_model = pk.load(open('rain_forecast_model.pkl','rb'))
79
+
80
+ def run_forecast():
81
+ global forecast_data
82
+ num_months = st.number_input('Enter the number of months to forecast', min_value=1, max_value=48, value=1, step=1)
83
+ # st.write(num_months)
84
+ forecast_data = forecast_model.forecast(steps = 12+num_months)
85
+ st.title('Monthly Rainfall Overview')
86
+ df = pd.DataFrame({'Month': forecast_data.index.strftime('%Y-%m'), 'Values': np.round(forecast_data.values, 2)})
87
+ # Plotly bar chart with hover information
88
+ fig = px.bar(df, x='Month', y='Values', text='Values', color='Values',
89
+ labels={'Values': 'Precipitation'},
90
+ title='Monthly Forecast Data',
91
+ template='plotly',
92
+ color_continuous_scale='viridis')
93
+ # Set hover text to rounded values
94
+ fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
95
+ # Customize x-axis labels
96
+ # fig.update_xaxes(tickangle=45)
97
+ # Display the plot in Streamlit
98
+ st.plotly_chart(fig)
99
+
100
+ #-----------------------------------------------------------------------------------------------------------------------------
101
+
102
+ def india_precipitation():
103
+ st.title('India Monthly Precipitation')
104
+
105
+ data = pd.DataFrame({'Month': forecast_data.index.strftime('%Y-%m'), 'Precipitation': np.round(forecast_data.values, 2)})
106
+
107
+ fig = px.choropleth(data, locations=["India"] * len(data),
108
+ locationmode="country names",
109
+ color='Precipitation',
110
+ hover_name='Month',
111
+ animation_frame='Month', # Use 'Month' as the animation frame
112
+ range_color=[forecast_data.min(), forecast_data.max()], # Adjust the range
113
+ color_continuous_scale='Viridis'
114
+ )
115
+
116
+ st.plotly_chart(fig)
117
+
118
+
119
+ #--------------------------------------------------------------------------------------------------------------------------
120
+
121
+ def forecast_data_for_tab():
122
+ forecast_df = pd.DataFrame()
123
+ forecast_df['Months'] = forecast_data.index.strftime('%Y-%m')
124
+ forecast_df['Precipitation'] = forecast_data.values
125
+ st.table(forecast_df)
126
+
127
+ # Add a download button for CSV
128
+ csv_button = st.download_button(
129
+ label="Download Forecast Data as CSV",
130
+ data=forecast_df.to_csv(index=False).encode('utf-8'),
131
+ file_name='forecast_data.csv',
132
+ mime='text/csv'
133
+ )
134
+
135
+ #----------------------------------------------------------------------------------------------------------------
136
+
137
+ def weather_forecast_app():
138
+ # st.set_page_config(page_title="Weather App", page_icon=":cloud:")
139
+ if True:
140
+ run_weather_app()
141
+ if True:
142
+ tab1, tab2, tab3 = st.tabs(['Forecast Barplot', 'Forecast data', 'Map'])
143
+ with tab1:
144
+ run_forecast()
145
+ with tab2:
146
+ forecast_data_for_tab()
147
+ with tab3:
148
+ india_precipitation()
149
+
150
+
retrain_Crop_yield.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import warnings
7
+ import pickle
8
+ warnings.filterwarnings('ignore')
9
+
10
+ def Crop_yel():
11
+ data = pd.read_csv('crop_yield.csv')
12
+
13
+ columns = ['Crop', 'Season', 'State']
14
+ from sklearn.preprocessing import LabelEncoder
15
+ encoder = LabelEncoder()
16
+ for col in columns:
17
+ data[col] = encoder.fit_transform(data[col])
18
+
19
+ X = data.iloc[:,:-1]
20
+ y = data.iloc[:,-1]
21
+
22
+ from sklearn.model_selection import train_test_split
23
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state= 42, test_size= 0.2)
24
+
25
+ from sklearn.ensemble import ExtraTreesRegressor
26
+ from sklearn.metrics import r2_score
27
+
28
+ model = ExtraTreesRegressor(
29
+ n_estimators=200,
30
+ criterion='squared_error',
31
+ max_depth=20,
32
+ min_samples_split=2,
33
+ min_samples_leaf=1,
34
+ max_features=5,
35
+ bootstrap=True,
36
+ random_state=42
37
+ )
38
+
39
+
40
+ model.fit(X_train, y_train)
41
+ pickle.dump(model,open('crop_yield_model.pkl','wb'))
42
+
43
+
retrain_gross_premium.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def grop():
2
+ from sklearn.ensemble import ExtraTreesRegressor
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.compose import ColumnTransformer
7
+ from sklearn.preprocessing import LabelEncoder
8
+ import pandas as pd
9
+ data = pd.read_csv('insurance.csv')
10
+ data_new = data.copy(deep = True)
11
+ data.head()
12
+ data.isnull().sum()
13
+ data.dropna(inplace = True)
14
+ X = data.drop('gross_premium', axis = 1)
15
+ y = data['gross_premium']
16
+ import re
17
+
18
+ obj_columns = list(data.select_dtypes("object").columns)
19
+ obj_columns
20
+ import re
21
+
22
+ for col in obj_columns:
23
+ data[col] = data[col].astype("str")
24
+ data[col] = data[col].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x.lower())).astype("str")
25
+ data.head()
26
+ season_catogory = list(data.season.values)
27
+ scheme_catogory = list(data.scheme.values)
28
+ state_catogory = list(data.state_name.values)
29
+ district_catogory = list(data.district_name.values)
30
+ columns = ['season','scheme','state_name','district_name']
31
+ from sklearn.preprocessing import LabelEncoder
32
+ encoder = LabelEncoder()
33
+ for col in columns:
34
+ data[col] = encoder.fit_transform(data[col])
35
+ season_label = list(data.season.values)
36
+ scheme_label = list(data.scheme.values)
37
+ state_label = list(data.state_name.values)
38
+ district_label = list(data.district_name.values)
39
+ season_category_label_dict = dict(zip(season_catogory, season_label))
40
+
41
+ scheme_category_label_dict = dict(zip(scheme_catogory, scheme_label))
42
+
43
+ state_category_label_dict = dict(zip(state_catogory, state_label))
44
+
45
+ district_category_label_dict = dict(zip(district_catogory, district_label))
46
+ data.season.value_counts()
47
+ X = data.iloc[:,:-1]
48
+ y = data.iloc[:,-1]
49
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
50
+ from sklearn.linear_model import LinearRegression
51
+ model = LinearRegression()
52
+ model.fit(X_train, y_train)
53
+ from sklearn.metrics import r2_score
54
+ y_pred = model.predict(X_test)
55
+ r2 = r2_score(y_test, y_pred)
56
+ print(f'R2 Score: {round(r2*100, 2)}')
57
+ y_pred = model.predict(X_train)
58
+ r2 = r2_score(y_train, y_pred)
59
+ print(f'R2 Score: {round(r2*100, 2)}')
60
+ # There is no miss prediction hence model is not overfitted.........(i.e if its is overfitted than we use regularzation technique)
61
+ import pickle as pk
62
+ filename= 'crop_grosspremimum_Jp.pkl'
63
+ pk.dump(model,open(filename,'wb'))
64
+ def encoding(input_data):
65
+ input_data[0] = season_category_label_dict[input_data[0].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
66
+ input_data[1] = scheme_category_label_dict[input_data[1].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
67
+ input_data[2] = state_category_label_dict[input_data[2].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
68
+ input_data[3] = district_category_label_dict[input_data[3].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
69
+ return input_data
retrain_save_soil.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import pandas as pd
6
+ import pickle
7
+ from sklearn.model_selection import train_test_split, GridSearchCV
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from sklearn.metrics import classification_report, accuracy_score
11
+ def soil_model():
12
+
13
+ data = pd.read_csv("Cr3.csv")
14
+
15
+
16
+
17
+ import re
18
+
19
+ obj_columns = data.select_dtypes("object")
20
+
21
+ for col in obj_columns:
22
+ data[col] = data[col].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x.lower())).astype("str")
23
+
24
+
25
+
26
+ data.head()
27
+
28
+ from sklearn.preprocessing import LabelEncoder
29
+
30
+ le = LabelEncoder()
31
+ data["Plant"] = le.fit_transform(data["Plant"])
32
+
33
+ # Assuming 'data' is your DataFrame
34
+ # If 'data' is not defined, make sure to load or create your dataset
35
+
36
+ X = data.drop('Plant', axis=1)
37
+ y = data['Plant']
38
+
39
+ # Split the data into training and testing sets
40
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
41
+
42
+ # Standardize the training and testing sets using StandardScaler
43
+ scaler = StandardScaler()
44
+ X_train_scaled = scaler.fit_transform(X_train)
45
+ X_test_scaled = scaler.transform(X_test)
46
+
47
+ # Hyperparameter search for RandomForestClassifier
48
+ param_grid = {
49
+ 'n_estimators': [50, 100, 200],
50
+ 'max_depth': [None, 10, 20],
51
+ 'min_samples_split': [2, 5, 10],
52
+ 'min_samples_leaf': [1, 2, 4]
53
+ }
54
+ rf_classifier = RandomForestClassifier(random_state=42)
55
+ grid_search =GridSearchCV(rf_classifier, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
56
+ grid_search.fit(X_train_scaled, y_train)
57
+
58
+ # Get the best parameters and the best estimator
59
+ best_params = grid_search.best_params_
60
+ best_rf_classifier = grid_search.best_estimator_
61
+
62
+ # Fit the final model with the best parameters on the entire dataset
63
+ final_rf_classifier = RandomForestClassifier(**best_params, random_state=42)
64
+ final_rf_classifier.fit(X, y)
65
+ pickle.dump(final_rf_classifier,open('Soli_to_recommandation_model_Raghuu.pkl','wb'))
66
+ # return final_rf_classifier
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
retrain_sum_insured.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def sumin():
2
+ import pandas as pd
3
+ import numpy as np
4
+ data = pd.read_csv("insurance(R).csv")
5
+ data_new = data.copy(deep = True)
6
+
7
+ import re
8
+
9
+ obj_columns = data.select_dtypes("object")
10
+
11
+ for col in obj_columns:
12
+ data[col] = data[col].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x.lower())).astype("str")
13
+ data.head()
14
+ season_catogory = list(data.season.values)
15
+ scheme_catogory = list(data.scheme.values)
16
+ state_catogory = list(data.state_name.values)
17
+ district_catogory = list(data.district_name.values)
18
+ columns = ['season','scheme','state_name','district_name']
19
+ from sklearn.preprocessing import LabelEncoder
20
+ encoder = LabelEncoder()
21
+ for col in columns:
22
+ data[col] = encoder.fit_transform(data[col])
23
+ season_label = list(data.season.values)
24
+ scheme_label = list(data.scheme.values)
25
+ state_label = list(data.state_name.values)
26
+ district_label = list(data.district_name.values)
27
+ season_category_label_dict = dict(zip(season_catogory, season_label))
28
+
29
+ scheme_category_label_dict = dict(zip(scheme_catogory, scheme_label))
30
+
31
+ state_category_label_dict = dict(zip(state_catogory, state_label))
32
+
33
+ district_category_label_dict = dict(zip(district_catogory, district_label))
34
+
35
+ from sklearn.compose import ColumnTransformer
36
+ from sklearn.ensemble import ExtraTreesRegressor
37
+ from sklearn.pipeline import Pipeline
38
+ from sklearn.preprocessing import LabelEncoder, StandardScaler, FunctionTransformer
39
+ from sklearn.model_selection import train_test_split
40
+ X = data.drop("sum_insured", axis=1)
41
+ y = data["sum_insured"]
42
+ X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1000, test_size=0.2)
43
+ from sklearn.ensemble import ExtraTreesRegressor
44
+ from sklearn.metrics import r2_score
45
+ # Create ExtraTreesRegressor with custom parameters
46
+ model = ExtraTreesRegressor(
47
+ n_estimators=200,
48
+ criterion='squared_error',
49
+ max_depth=None,
50
+ min_samples_split=2,
51
+ min_samples_leaf=1,
52
+ max_features=5,
53
+ random_state=1000
54
+ )
55
+ model.fit(X_train, y_train)
56
+ from sklearn.metrics import r2_score
57
+ y_pred = model.predict(X_test)
58
+ r2 = r2_score(y_test, y_pred)
59
+ print(f'R2 Score: {round(r2*100, 2)}')
60
+ y_pred = model.predict(X_train)
61
+ r2 = r2_score(y_train, y_pred)
62
+ print(f'R2 Score: {round(r2*100, 2)}')
63
+
64
+ # We can Conclude that their is low miss Prediction so model is not Overfitted
65
+ # import pickle as pk
66
+ # filename= 'crop_insurance_sum_Raghu.pkl'
67
+ # pk.dump(model,open(filename,'wb'))
68
+ def encoding(input_data):
69
+ input_data[0] = season_category_label_dict[input_data[0].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
70
+ input_data[1] = scheme_category_label_dict[input_data[1].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
71
+ input_data[2] = state_category_label_dict[input_data[2].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
72
+ input_data[3] = district_category_label_dict[input_data[3].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
73
+ return input_data
74
+ import pickle
75
+ pickle.dump(model,open('crop_insurance_sum_Raghu.pkl','wb'))
setup.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [general]\n\
5
+ email = \"your-email@domain.com\"\n\
6
+ " > ~/.streamlit/credentials.toml
7
+
8
+ echo "\
9
+ [server]\n\
10
+ headless = true\n\
11
+ enableCORS=false\n\
12
+ port = $PORT\n\
13
+ " > ~/.streamlit/config.toml
sum_insurance.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+
6
+
7
+ data = pd.read_csv("insurance(R).csv")
8
+ data_new = data.copy(deep = True)
9
+
10
+
11
+
12
+ data.head()
13
+
14
+
15
+
16
+ import re
17
+
18
+ obj_columns = data.select_dtypes("object")
19
+
20
+ for col in obj_columns:
21
+ data[col] = data[col].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x.lower())).astype("str")
22
+
23
+
24
+
25
+ data.head()
26
+
27
+
28
+ season_catogory = list(data.season.values)
29
+ scheme_catogory = list(data.scheme.values)
30
+ state_catogory = list(data.state_name.values)
31
+ district_catogory = list(data.district_name.values)
32
+
33
+
34
+
35
+ columns = ['season','scheme','state_name','district_name']
36
+ from sklearn.preprocessing import LabelEncoder
37
+ encoder = LabelEncoder()
38
+ for col in columns:
39
+ data[col] = encoder.fit_transform(data[col])
40
+
41
+
42
+
43
+ season_label = list(data.season.values)
44
+ scheme_label = list(data.scheme.values)
45
+ state_label = list(data.state_name.values)
46
+ district_label = list(data.district_name.values)
47
+
48
+
49
+
50
+ season_category_label_dict = dict(zip(season_catogory, season_label))
51
+
52
+
53
+
54
+ scheme_category_label_dict = dict(zip(scheme_catogory, scheme_label))
55
+
56
+
57
+ state_category_label_dict = dict(zip(state_catogory, state_label))
58
+
59
+
60
+ district_category_label_dict = dict(zip(district_catogory, district_label))
61
+
62
+
63
+ from sklearn.compose import ColumnTransformer
64
+ from sklearn.ensemble import ExtraTreesRegressor
65
+ from sklearn.pipeline import Pipeline
66
+ from sklearn.preprocessing import LabelEncoder, StandardScaler, FunctionTransformer
67
+ from sklearn.model_selection import train_test_split
68
+
69
+
70
+ X = data.drop("sum_insured", axis=1)
71
+ y = data["sum_insured"]
72
+
73
+
74
+ def encoding(input_data):
75
+ input_data[0] = season_category_label_dict[input_data[0].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
76
+ input_data[1] = scheme_category_label_dict[input_data[1].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
77
+ input_data[2] = state_category_label_dict[input_data[2].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
78
+ input_data[3] = district_category_label_dict[input_data[3].lower().replace(" ","").replace(" ","").replace(" ","").replace(" ","")]
79
+ return input_data
80
+
81
+
82
+
uploaded_image.jpg ADDED