File size: 3,478 Bytes
547d645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


@st.cache
def load_data():
    #Veeri setini yüklüyorum
    df=pd.read_csv('income_evaluation.csv')
    return df

df=load_data()

x=df.drop([' fnlwgt',' income'], axis=1)
y=df[' income']

numeric_features=x.select_dtypes(include=['int64']).columns
categorical_features=x.select_dtypes(include=['object']).columns

# Kategorik sütunlardaki benzersiz değerleri al
#categorical_values = [x[column].unique() for column in categorical_features]

# Kategorik sütunlarda eksik değerleri doldurmak için SimpleImputer kullanın
#imputer_cat = SimpleImputer(strategy='most_frequent')   #TypeError: 'tuple' object is not callable
#df[categorical_features] = imputer_cat.fit_transform(df[categorical_features])

# Sayısal sütunlarda eksik değerleri doldurmak ve ölçeklendirmek için StandardScaler kullanın
#scaler = StandardScaler()
#df[numeric_features] = scaler.fit_transform(df[numeric_features])

preprocessor = ColumnTransformer(  #'DataFrame' object has no attribute '_validate_params'
    transformers=[
        ('cat', OneHotEncoder(), categorical_features),
        ('num', StandardScaler(), numeric_features)  # Sayısal sütunları normalize et
    ]) 

# Pipeline oluştur
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', GradientBoostingClassifier())  # Lojistik regresyon kullanarak sınıflandırma yap
])

#x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=.2,random_state=42)
# Modeli eğit

pipeline.fit(x,y)

# Uygulama başlığı
st.title("Gradient Boosting ile Gelir Sınıflandırma Uygulaması")
age = st.number_input("Yaş", min_value=0)
workclass = st.selectbox("Çalışma Sınıfı", df[' workclass'].unique())
education = st.selectbox("Eğitim", df[' education'].unique())
education_num = st.number_input("Eğitim seviyesi", min_value=0)
marital_status = st.selectbox("Medeni durumu", df[' marital-status'].unique())
occupation = st.selectbox('Pozisyonu', df[' occupation'].unique())
relationship = st.selectbox('ilişki durumu', df[' relationship'].unique())
race = st.selectbox('Milliyeti', df[' race'].unique())
sex = st.selectbox('Cinsiyet', df[' sex'].unique())
capital_gain = st.number_input("Sermaye kazancı", min_value=0)
capital_loss = st.number_input("Sermaye Kaybı", min_value=0)
hours_per_week = st.number_input("Haftalık Çalışma Saati", min_value=0)
native_country = st.selectbox('Doğum Yeri', df[' native-country'].unique())


new_data = pd.DataFrame({
    'age': [age],
    ' workclass': [workclass],
    ' education': [education],
    ' education-num': [education_num],
    ' marital-status': [marital_status],
    ' occupation': [occupation],
    ' relationship': [relationship],
    ' race': [race],
    ' sex': [sex],
    ' capital-gain': [capital_gain],
    ' capital-loss': [capital_loss],
    ' hours-per-week': [hours_per_week],
    ' native-country': [native_country]
})

if st.button('Predict'):
    predictions = pipeline.predict(new_data)
    st.write("Income Prediction:", predictions)