Spaces:

harshiv
/

predictt

Build error

App Files Files Community

harshiv commited on Apr 15, 2023

Commit

3b812c7

1 Parent(s): 07f473b

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -54

app.py CHANGED Viewed

@@ -1,85 +1,74 @@
 import pandas as pd
-import sklearn
-from sklearn.compose import ColumnTransformer
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.impute import SimpleImputer
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import LabelEncoder, StandardScaler
-from streamlit import *
-import joblib
 # Load the CSV data
 data = pd.read_csv('dataset.csv')
-# Split the data into features and labels
 X = data.drop('PlacedOrNot', axis=1)
 y = data['PlacedOrNot']
-# Encode categorical features
-categorical_features = ['HistoryOfBacklogs']
-for feature in categorical_features:
-    encoder = LabelEncoder()
-    X[feature] = encoder.fit_transform(X[feature])
 # Split the data into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Create the pipeline
-numerical_features = ['Internships', 'CGPA']
-numerical_transformer = StandardScaler()
-categorical_features = [ 'HistoryOfBacklogs']
-categorical_transformer = SimpleImputer(strategy='most_frequent')
 preprocessor = ColumnTransformer(
     transformers=[
-        ('num', numerical_transformer, numerical_features),
-        ('cat', categorical_transformer, categorical_features)
     ])
 pipeline = Pipeline([
     ('preprocessor', preprocessor),
     ('classifier', RandomForestClassifier(random_state=42))
 ])
-# Train the model
 pipeline.fit(X_train, y_train)
-# Evaluate the model
-accuracy = pipeline.score(X_test, y_test)
 print('Accuracy:', accuracy)
 joblib.dump(pipeline, 'student_placement_model.joblib')
 # Define Streamlit API
-def predict_placement(internships, cgpa, history_of_backlogs, stream):
-    # Load the trained pipeline
-    pipeline = joblib.load('student_placement_model.joblib')
-    # Prepare input data
-    input_data = pd.DataFrame({'internships': [internships],
-                                'cgpa': [cgpa],
-                                'history_of_backlogs': [history_of_backlogs],
-                                'stream': [stream]})
-    # Make prediction
-    prediction = pipeline.predict(input_data)
-    return prediction[0]
-# Define Streamlit web app
-def streamlit_app():
-    title('Student Placement Prediction')
-    internships = number_input('Number of internships:', min_value=0, max_value=10, step=1)
-    cgpa = number_input('CGPA:', min_value=0.0, max_value=10.0, step=0.1)
-    history_of_backlogs = number_input('Number of history of backlogs:', min_value=0, max_value=10, step=1)
-    stream = selectbox('Stream:', options=['Science', 'Commerce', 'Arts'])
-    prediction = predict_placement(internships, cgpa, history_of_backlogs, stream)
-    if prediction == 1:
-        result = 'Placed'
-    else:
-        result = 'Not Placed'
-    button('Predict Placement')
-    write(f'Result: {result}')
-if __name__ == '__main__':
-    streamlit_app()

 import pandas as pd
+import numpy as np
 from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.metrics import accuracy_score
+import streamlit as st
 # Load the CSV data
 data = pd.read_csv('dataset.csv')
+# Split the data into features and target variable
 X = data.drop('PlacedOrNot', axis=1)
 y = data['PlacedOrNot']
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 # Split the data into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 preprocessor = ColumnTransformer(
     transformers=[
+        ('num', StandardScaler(), ['internships', 'cgpa', 'history_of_backlogs']),
+        ('cat', OneHotEncoder(), ['gender', 'stream'])
     ])
+# Create the pipeline with Random Forest classifier
 pipeline = Pipeline([
     ('preprocessor', preprocessor),
     ('classifier', RandomForestClassifier(random_state=42))
 ])
+# Fit the pipeline to the training data
 pipeline.fit(X_train, y_train)
+# Make predictions on the test data
+y_pred = pipeline.predict(X_test)
+# Calculate accuracy of the model
+accuracy = accuracy_score(y_test, y_pred)
 print('Accuracy:', accuracy)
 joblib.dump(pipeline, 'student_placement_model.joblib')
 # Define Streamlit API
+# Streamlit API for serving the model
+st.title('Student Job Placement Prediction')
+# Input form for user to enter features
+st.markdown('Please enter the following information:')
+internships = st.number_input('Number of Internships', min_value=0, max_value=10)
+cgpa = st.number_input('CGPA', min_value=0.0, max_value=10.0)
+history_of_backlogs = st.number_input('History of Backlogs', min_value=0, max_value=10)
+gender = st.selectbox('Gender', ('Male', 'Female'))
+stream = st.selectbox('Stream', ('Engineering', 'Science', 'Commerce'))
+submit = st.button('Submit')
+# Make prediction on user input when 'Submit' button is clicked
+if submit:
+    # Create a dataframe with user input
+    user_data = pd.DataFrame([[internships, cgpa, history_of_backlogs, gender, stream]],
+                             columns=['internships', 'cgpa', 'history_of_backlogs', 'gender', 'stream'])
+    # Make prediction using the pipeline
+    prediction = pipeline.predict(user_data)
+    # Display prediction
+    if prediction[0] == 1:
+        st.success('Congratulations! The student is likely to be placed.')
+    else:
+        st.warning('Sorry, the student is unlikely to be placed.')