Senasu commited on
Commit
49edc41
·
verified ·
1 Parent(s): 0416d5e

Upload 4 files

Browse files
Files changed (4) hide show
  1. Startups.csv +51 -0
  2. app.py +76 -0
  3. best_regression_model.pkl +3 -0
  4. requirements.txt +3 -0
Startups.csv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ R&D Spend,Administration,Marketing Spend,State,Profit
2
+ 165349.2,136897.8,471784.1,New York,192261.83
3
+ 162597.7,151377.59,443898.53,California,191792.06
4
+ 153441.51,101145.55,407934.54,Florida,191050.39
5
+ 144372.41,118671.85,383199.62,New York,182901.99
6
+ 142107.34,91391.77,366168.42,Florida,166187.94
7
+ 131876.9,99814.71,362861.36,New York,156991.12
8
+ 134615.46,147198.87,127716.82,California,156122.51
9
+ 130298.13,145530.06,323876.68,Florida,155752.6
10
+ 120542.52,148718.95,311613.29,New York,152211.77
11
+ 123334.88,108679.17,304981.62,California,149759.96
12
+ 101913.08,110594.11,229160.95,Florida,146121.95
13
+ 100671.96,91790.61,249744.55,California,144259.4
14
+ 93863.75,127320.38,249839.44,Florida,141585.52
15
+ 91992.39,135495.07,252664.93,California,134307.35
16
+ 119943.24,156547.42,256512.92,Florida,132602.65
17
+ 114523.61,122616.84,261776.23,New York,129917.04
18
+ 78013.11,121597.55,264346.06,California,126992.93
19
+ 94657.16,145077.58,282574.31,New York,125370.37
20
+ 91749.16,114175.79,294919.57,Florida,124266.9
21
+ 86419.7,153514.11,0,New York,122776.86
22
+ 76253.86,113867.3,298664.47,California,118474.03
23
+ 78389.47,153773.43,299737.29,New York,111313.02
24
+ 73994.56,122782.75,303319.26,Florida,110352.25
25
+ 67532.53,105751.03,304768.73,Florida,108733.99
26
+ 77044.01,99281.34,140574.81,New York,108552.04
27
+ 64664.71,139553.16,137962.62,California,107404.34
28
+ 75328.87,144135.98,134050.07,Florida,105733.54
29
+ 72107.6,127864.55,353183.81,New York,105008.31
30
+ 66051.52,182645.56,118148.2,Florida,103282.38
31
+ 65605.48,153032.06,107138.38,New York,101004.64
32
+ 61994.48,115641.28,91131.24,Florida,99937.59
33
+ 61136.38,152701.92,88218.23,New York,97483.56
34
+ 63408.86,129219.61,46085.25,California,97427.84
35
+ 55493.95,103057.49,214634.81,Florida,96778.92
36
+ 46426.07,157693.92,210797.67,California,96712.8
37
+ 46014.02,85047.44,205517.64,New York,96479.51
38
+ 28663.76,127056.21,201126.82,Florida,90708.19
39
+ 44069.95,51283.14,197029.42,California,89949.14
40
+ 20229.59,65947.93,185265.1,New York,81229.06
41
+ 38558.51,82982.09,174999.3,California,81005.76
42
+ 28754.33,118546.05,172795.67,California,78239.91
43
+ 27892.92,84710.77,164470.71,Florida,77798.83
44
+ 23640.93,96189.63,148001.11,California,71498.49
45
+ 15505.73,127382.3,35534.17,New York,69758.98
46
+ 22177.74,154806.14,28334.72,California,65200.33
47
+ 1000.23,124153.04,1903.93,New York,64926.08
48
+ 1315.46,115816.21,297114.46,Florida,49490.75
49
+ 0,135426.92,0,California,42559.73
50
+ 542.05,51743.15,0,New York,35673.41
51
+ 0,116983.8,45173.06,California,14681.4
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.pipeline import Pipeline
3
+ from sklearn.compose import ColumnTransformer
4
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
5
+ import streamlit as st
6
+ import joblib
7
+
8
+ # Load the trained model and dataset
9
+ model = joblib.load('best_regression_model.pkl')
10
+ df = pd.read_csv('Startups.csv')
11
+
12
+ # Preprocessor setup for scaling numerical and encoding categorical features
13
+ preprocessor = ColumnTransformer(
14
+ transformers=[
15
+ ('num', StandardScaler(), ['R&D Spend', 'Administration', 'Marketing Spend']),
16
+ ('cat', OneHotEncoder(), ['State'])
17
+ ]
18
+ )
19
+
20
+ # Pipeline for preprocessing and model prediction
21
+ pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', model)])
22
+ pipeline.fit(df[['R&D Spend', 'Administration', 'Marketing Spend', 'State']], df[['Profit']])
23
+
24
+ # Prediction function
25
+ def price_pred(rd, administration, marketing, state):
26
+ input_data = pd.DataFrame({
27
+ 'R&D Spend': [rd],
28
+ 'Administration': [administration],
29
+ 'Marketing Spend': [marketing],
30
+ 'State': [state]
31
+ })
32
+ prediction = pipeline.predict(input_data)[0]
33
+ return prediction
34
+
35
+ # Main function for Streamlit app layout
36
+ def main():
37
+ st.set_page_config(page_title="Profit Prediction", page_icon="💰", layout="wide")
38
+
39
+ # Header Section
40
+ st.title("💼 Profit Prediction Tool")
41
+ st.markdown("""
42
+ Welcome to the **Profit Prediction Tool**! This tool uses machine learning to predict the profit of a company
43
+ based on different financial parameters. Please input the values for **R&D Spend**, **Administration Spend**,
44
+ and **Marketing Spend** along with the **State** to get the predicted profit.
45
+ """)
46
+
47
+ # Sidebar Section for better organization
48
+ st.sidebar.header("Enter Your Company Details")
49
+ state = st.sidebar.selectbox('Select your State', df['State'].unique())
50
+ rd = st.sidebar.number_input('R&D Spend Amount ($)', 0, int(df['R&D Spend'].max()), step=1000)
51
+ administration = st.sidebar.number_input('Administration Spend Amount ($)', 0, int(df['Administration'].max()), step=1000)
52
+ marketing = st.sidebar.number_input('Marketing Spend Amount ($)', 0, int(df['Marketing Spend'].max()), step=1000)
53
+
54
+ # Display entered data for user confirmation
55
+ st.markdown("### You entered:")
56
+ st.write(f"**State**: {state}")
57
+ st.write(f"**R&D Spend**: ${rd:,.2f}")
58
+ st.write(f"**Administration Spend**: ${administration:,.2f}")
59
+ st.write(f"**Marketing Spend**: ${marketing:,.2f}")
60
+
61
+ # Prediction Button
62
+ if st.sidebar.button('Predict Profit'):
63
+ profit = price_pred(rd, administration, marketing, state)
64
+ profit = float(profit)
65
+ st.markdown(f"### The Predicted Profit is: **${profit:,.2f}**")
66
+
67
+ # Add some styling with markdown and display information
68
+ st.markdown("""
69
+ ---
70
+ ### About the Model
71
+ The model is built using a **regression algorithm** that takes into account the R&D spend, administration expenses,
72
+ and marketing spend for a company to predict its profit. The model was trained using real-world company data.
73
+ """)
74
+
75
+ if __name__ == '__main__':
76
+ main()
best_regression_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d1a197dc6c1c7a0c03998e4f615650f5f33a647c681655f86cdbbf2744f563a
3
+ size 178493
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ scikit-learn
2
+ streamlit
3
+ joblib