Spaces:
Sleeping
Sleeping
ibrahim yıldız commited on
Upload 2 files
Browse files- app.py +62 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.model_selection import train_test_split
|
| 4 |
+
from sklearn.linear_model import LinearRegression
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import plotly.figure_factory as ff
|
| 7 |
+
|
| 8 |
+
st.title('A Simple Profit Prediction 💵')
|
| 9 |
+
st.image('https://assets2.ignimgs.com/2013/11/12/the-hobbit-bilbo-gold-1280jpg-e9649d_160w.jpg?width=1280')
|
| 10 |
+
|
| 11 |
+
st.write('### Data ')
|
| 12 |
+
df = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/Startups.csv")
|
| 13 |
+
st.dataframe(df)
|
| 14 |
+
st.write('This data is about the R&D spend, Administration cost, Marketing Spend, State of operation, and the historical profit generated by 50 startups.')
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
df_graph = df.groupby('State')['Profit'].sum().reset_index()
|
| 18 |
+
st.write("### Total Profits by State")
|
| 19 |
+
st.bar_chart(df_graph, x="State", y="Profit")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
numeric_columns = df.select_dtypes(include=['float64', 'int64'])
|
| 23 |
+
corr_matrix = numeric_columns.corr()
|
| 24 |
+
fig = ff.create_annotated_heatmap(z=corr_matrix.values,
|
| 25 |
+
x=numeric_columns.columns.tolist(),
|
| 26 |
+
y=numeric_columns.columns.tolist(),
|
| 27 |
+
annotation_text=corr_matrix.round(2).values)
|
| 28 |
+
fig.update_layout()
|
| 29 |
+
st.write("### Correlation Heatmap")
|
| 30 |
+
st.plotly_chart(fig)
|
| 31 |
+
|
| 32 |
+
st.write('There is a clear correlation between R&D and Profit.Also, administration sucks.')
|
| 33 |
+
|
| 34 |
+
#model
|
| 35 |
+
st.write('### Model Training')
|
| 36 |
+
x = df[["R&D Spend", "Administration", "Marketing Spend"]]
|
| 37 |
+
y = df["Profit"]
|
| 38 |
+
x = x.to_numpy()
|
| 39 |
+
y = y.to_numpy()
|
| 40 |
+
y = y.reshape(-1, 1)
|
| 41 |
+
from sklearn.model_selection import train_test_split
|
| 42 |
+
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)
|
| 43 |
+
from sklearn.linear_model import LinearRegression
|
| 44 |
+
model = LinearRegression()
|
| 45 |
+
model.fit(xtrain, ytrain)
|
| 46 |
+
st.write(f"I after train test split, I have defined a LinerRegression Model. It's r2 score is. {model.score(xtest, ytest)}, meaning 90% accuracy")
|
| 47 |
+
|
| 48 |
+
st.write('### Using It')
|
| 49 |
+
st.write('Change the expenditures to see the difference in profit.')
|
| 50 |
+
# Input boxes for features
|
| 51 |
+
feature1 = st.number_input('R&D Spend', value=100000)
|
| 52 |
+
feature2 = st.number_input('Administration Spend', value=100000)
|
| 53 |
+
feature3 = st.number_input('Marketing Spend', value=100000)
|
| 54 |
+
|
| 55 |
+
features = np.array([[feature1, feature2, feature3]])
|
| 56 |
+
|
| 57 |
+
# Assuming 'model' is your trained model
|
| 58 |
+
prediction = model.predict(features)
|
| 59 |
+
st.write(f'Predicted Profit: {prediction[0]}')
|
| 60 |
+
|
| 61 |
+
st.write('### Conclusion')
|
| 62 |
+
st.write('This model abides the expenditure/profit relation of the 50 startups in this dataset. If you are one of them, this app will be useful. Otherwise, to make a more general model, we simply need more data.')
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
numpy
|
| 3 |
+
scikit-learn
|
| 4 |
+
plotly
|