Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- Startups.csv +51 -0
- app.py +76 -0
- best_regression_model.pkl +3 -0
- requirements.txt +3 -0
Startups.csv
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
R&D Spend,Administration,Marketing Spend,State,Profit
|
| 2 |
+
165349.2,136897.8,471784.1,New York,192261.83
|
| 3 |
+
162597.7,151377.59,443898.53,California,191792.06
|
| 4 |
+
153441.51,101145.55,407934.54,Florida,191050.39
|
| 5 |
+
144372.41,118671.85,383199.62,New York,182901.99
|
| 6 |
+
142107.34,91391.77,366168.42,Florida,166187.94
|
| 7 |
+
131876.9,99814.71,362861.36,New York,156991.12
|
| 8 |
+
134615.46,147198.87,127716.82,California,156122.51
|
| 9 |
+
130298.13,145530.06,323876.68,Florida,155752.6
|
| 10 |
+
120542.52,148718.95,311613.29,New York,152211.77
|
| 11 |
+
123334.88,108679.17,304981.62,California,149759.96
|
| 12 |
+
101913.08,110594.11,229160.95,Florida,146121.95
|
| 13 |
+
100671.96,91790.61,249744.55,California,144259.4
|
| 14 |
+
93863.75,127320.38,249839.44,Florida,141585.52
|
| 15 |
+
91992.39,135495.07,252664.93,California,134307.35
|
| 16 |
+
119943.24,156547.42,256512.92,Florida,132602.65
|
| 17 |
+
114523.61,122616.84,261776.23,New York,129917.04
|
| 18 |
+
78013.11,121597.55,264346.06,California,126992.93
|
| 19 |
+
94657.16,145077.58,282574.31,New York,125370.37
|
| 20 |
+
91749.16,114175.79,294919.57,Florida,124266.9
|
| 21 |
+
86419.7,153514.11,0,New York,122776.86
|
| 22 |
+
76253.86,113867.3,298664.47,California,118474.03
|
| 23 |
+
78389.47,153773.43,299737.29,New York,111313.02
|
| 24 |
+
73994.56,122782.75,303319.26,Florida,110352.25
|
| 25 |
+
67532.53,105751.03,304768.73,Florida,108733.99
|
| 26 |
+
77044.01,99281.34,140574.81,New York,108552.04
|
| 27 |
+
64664.71,139553.16,137962.62,California,107404.34
|
| 28 |
+
75328.87,144135.98,134050.07,Florida,105733.54
|
| 29 |
+
72107.6,127864.55,353183.81,New York,105008.31
|
| 30 |
+
66051.52,182645.56,118148.2,Florida,103282.38
|
| 31 |
+
65605.48,153032.06,107138.38,New York,101004.64
|
| 32 |
+
61994.48,115641.28,91131.24,Florida,99937.59
|
| 33 |
+
61136.38,152701.92,88218.23,New York,97483.56
|
| 34 |
+
63408.86,129219.61,46085.25,California,97427.84
|
| 35 |
+
55493.95,103057.49,214634.81,Florida,96778.92
|
| 36 |
+
46426.07,157693.92,210797.67,California,96712.8
|
| 37 |
+
46014.02,85047.44,205517.64,New York,96479.51
|
| 38 |
+
28663.76,127056.21,201126.82,Florida,90708.19
|
| 39 |
+
44069.95,51283.14,197029.42,California,89949.14
|
| 40 |
+
20229.59,65947.93,185265.1,New York,81229.06
|
| 41 |
+
38558.51,82982.09,174999.3,California,81005.76
|
| 42 |
+
28754.33,118546.05,172795.67,California,78239.91
|
| 43 |
+
27892.92,84710.77,164470.71,Florida,77798.83
|
| 44 |
+
23640.93,96189.63,148001.11,California,71498.49
|
| 45 |
+
15505.73,127382.3,35534.17,New York,69758.98
|
| 46 |
+
22177.74,154806.14,28334.72,California,65200.33
|
| 47 |
+
1000.23,124153.04,1903.93,New York,64926.08
|
| 48 |
+
1315.46,115816.21,297114.46,Florida,49490.75
|
| 49 |
+
0,135426.92,0,California,42559.73
|
| 50 |
+
542.05,51743.15,0,New York,35673.41
|
| 51 |
+
0,116983.8,45173.06,California,14681.4
|
app.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from sklearn.pipeline import Pipeline
|
| 3 |
+
from sklearn.compose import ColumnTransformer
|
| 4 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import joblib
|
| 7 |
+
|
| 8 |
+
# Load the trained model and dataset
|
| 9 |
+
model = joblib.load('best_regression_model.pkl')
|
| 10 |
+
df = pd.read_csv('Startups.csv')
|
| 11 |
+
|
| 12 |
+
# Preprocessor setup for scaling numerical and encoding categorical features
|
| 13 |
+
preprocessor = ColumnTransformer(
|
| 14 |
+
transformers=[
|
| 15 |
+
('num', StandardScaler(), ['R&D Spend', 'Administration', 'Marketing Spend']),
|
| 16 |
+
('cat', OneHotEncoder(), ['State'])
|
| 17 |
+
]
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Pipeline for preprocessing and model prediction
|
| 21 |
+
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', model)])
|
| 22 |
+
pipeline.fit(df[['R&D Spend', 'Administration', 'Marketing Spend', 'State']], df[['Profit']])
|
| 23 |
+
|
| 24 |
+
# Prediction function
|
| 25 |
+
def price_pred(rd, administration, marketing, state):
|
| 26 |
+
input_data = pd.DataFrame({
|
| 27 |
+
'R&D Spend': [rd],
|
| 28 |
+
'Administration': [administration],
|
| 29 |
+
'Marketing Spend': [marketing],
|
| 30 |
+
'State': [state]
|
| 31 |
+
})
|
| 32 |
+
prediction = pipeline.predict(input_data)[0]
|
| 33 |
+
return prediction
|
| 34 |
+
|
| 35 |
+
# Main function for Streamlit app layout
|
| 36 |
+
def main():
|
| 37 |
+
st.set_page_config(page_title="Profit Prediction", page_icon="💰", layout="wide")
|
| 38 |
+
|
| 39 |
+
# Header Section
|
| 40 |
+
st.title("💼 Profit Prediction Tool")
|
| 41 |
+
st.markdown("""
|
| 42 |
+
Welcome to the **Profit Prediction Tool**! This tool uses machine learning to predict the profit of a company
|
| 43 |
+
based on different financial parameters. Please input the values for **R&D Spend**, **Administration Spend**,
|
| 44 |
+
and **Marketing Spend** along with the **State** to get the predicted profit.
|
| 45 |
+
""")
|
| 46 |
+
|
| 47 |
+
# Sidebar Section for better organization
|
| 48 |
+
st.sidebar.header("Enter Your Company Details")
|
| 49 |
+
state = st.sidebar.selectbox('Select your State', df['State'].unique())
|
| 50 |
+
rd = st.sidebar.number_input('R&D Spend Amount ($)', 0, int(df['R&D Spend'].max()), step=1000)
|
| 51 |
+
administration = st.sidebar.number_input('Administration Spend Amount ($)', 0, int(df['Administration'].max()), step=1000)
|
| 52 |
+
marketing = st.sidebar.number_input('Marketing Spend Amount ($)', 0, int(df['Marketing Spend'].max()), step=1000)
|
| 53 |
+
|
| 54 |
+
# Display entered data for user confirmation
|
| 55 |
+
st.markdown("### You entered:")
|
| 56 |
+
st.write(f"**State**: {state}")
|
| 57 |
+
st.write(f"**R&D Spend**: ${rd:,.2f}")
|
| 58 |
+
st.write(f"**Administration Spend**: ${administration:,.2f}")
|
| 59 |
+
st.write(f"**Marketing Spend**: ${marketing:,.2f}")
|
| 60 |
+
|
| 61 |
+
# Prediction Button
|
| 62 |
+
if st.sidebar.button('Predict Profit'):
|
| 63 |
+
profit = price_pred(rd, administration, marketing, state)
|
| 64 |
+
profit = float(profit)
|
| 65 |
+
st.markdown(f"### The Predicted Profit is: **${profit:,.2f}**")
|
| 66 |
+
|
| 67 |
+
# Add some styling with markdown and display information
|
| 68 |
+
st.markdown("""
|
| 69 |
+
---
|
| 70 |
+
### About the Model
|
| 71 |
+
The model is built using a **regression algorithm** that takes into account the R&D spend, administration expenses,
|
| 72 |
+
and marketing spend for a company to predict its profit. The model was trained using real-world company data.
|
| 73 |
+
""")
|
| 74 |
+
|
| 75 |
+
if __name__ == '__main__':
|
| 76 |
+
main()
|
best_regression_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d1a197dc6c1c7a0c03998e4f615650f5f33a647c681655f86cdbbf2744f563a
|
| 3 |
+
size 178493
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scikit-learn
|
| 2 |
+
streamlit
|
| 3 |
+
joblib
|