ibrahim yıldız commited on
Commit
40ba88a
·
verified ·
1 Parent(s): f1f9fac

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +62 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.linear_model import LinearRegression
5
+ import streamlit as st
6
+ import plotly.figure_factory as ff
7
+
8
+ st.title('A Simple Profit Prediction 💵')
9
+ st.image('https://assets2.ignimgs.com/2013/11/12/the-hobbit-bilbo-gold-1280jpg-e9649d_160w.jpg?width=1280')
10
+
11
+ st.write('### Data ')
12
+ df = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/Startups.csv")
13
+ st.dataframe(df)
14
+ st.write('This data is about the R&D spend, Administration cost, Marketing Spend, State of operation, and the historical profit generated by 50 startups.')
15
+
16
+
17
+ df_graph = df.groupby('State')['Profit'].sum().reset_index()
18
+ st.write("### Total Profits by State")
19
+ st.bar_chart(df_graph, x="State", y="Profit")
20
+
21
+
22
+ numeric_columns = df.select_dtypes(include=['float64', 'int64'])
23
+ corr_matrix = numeric_columns.corr()
24
+ fig = ff.create_annotated_heatmap(z=corr_matrix.values,
25
+ x=numeric_columns.columns.tolist(),
26
+ y=numeric_columns.columns.tolist(),
27
+ annotation_text=corr_matrix.round(2).values)
28
+ fig.update_layout()
29
+ st.write("### Correlation Heatmap")
30
+ st.plotly_chart(fig)
31
+
32
+ st.write('There is a clear correlation between R&D and Profit.Also, administration sucks.')
33
+
34
+ #model
35
+ st.write('### Model Training')
36
+ x = df[["R&D Spend", "Administration", "Marketing Spend"]]
37
+ y = df["Profit"]
38
+ x = x.to_numpy()
39
+ y = y.to_numpy()
40
+ y = y.reshape(-1, 1)
41
+ from sklearn.model_selection import train_test_split
42
+ xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)
43
+ from sklearn.linear_model import LinearRegression
44
+ model = LinearRegression()
45
+ model.fit(xtrain, ytrain)
46
+ st.write(f"I after train test split, I have defined a LinerRegression Model. It's r2 score is. {model.score(xtest, ytest)}, meaning 90% accuracy")
47
+
48
+ st.write('### Using It')
49
+ st.write('Change the expenditures to see the difference in profit.')
50
+ # Input boxes for features
51
+ feature1 = st.number_input('R&D Spend', value=100000)
52
+ feature2 = st.number_input('Administration Spend', value=100000)
53
+ feature3 = st.number_input('Marketing Spend', value=100000)
54
+
55
+ features = np.array([[feature1, feature2, feature3]])
56
+
57
+ # Assuming 'model' is your trained model
58
+ prediction = model.predict(features)
59
+ st.write(f'Predicted Profit: {prediction[0]}')
60
+
61
+ st.write('### Conclusion')
62
+ st.write('This model abides the expenditure/profit relation of the 50 startups in this dataset. If you are one of them, this app will be useful. Otherwise, to make a more general model, we simply need more data.')
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ plotly