mjsp commited on
Commit
b1ea0f9
·
verified ·
1 Parent(s): 54ff576

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import LabelEncoder
8
+ from sklearn.ensemble import RandomForestRegressor
9
+ from sklearn.metrics import r2_score, mean_absolute_error
10
+ from transformers import pipeline
11
+ import streamlit as st
12
+
13
+ # Step 1: Data Collection
14
+ def load_data(file_path):
15
+ data = pd.read_csv(file_path)
16
+ return data
17
+
18
+ # Step 2: Data Cleaning
19
+ def clean_data(data):
20
+ data.dropna(inplace=True)
21
+ return data
22
+
23
+ # Step 3: Exploratory Data Analysis (EDA)
24
+ def perform_eda(data):
25
+ st.write(data.describe())
26
+ st.write(data.info())
27
+
28
+ # Step 4: Data Visualization
29
+ def visualize_data(data):
30
+ plt.figure(figsize=(10, 6))
31
+ sns.histplot(data['price'], kde=True)
32
+ plt.title('Price Distribution')
33
+ plt.show()
34
+
35
+ plt.figure(figsize=(10, 6))
36
+ sns.boxplot(x='brand', y='price', data=data)
37
+ plt.title('Price by Brand')
38
+ plt.show()
39
+
40
+ # Step 5: Feature Engineering
41
+ def encode_features(data):
42
+ le = LabelEncoder()
43
+ categorical_columns = ['brand', 'processor', 'Ram_type', 'ROM_type', 'GPU', 'OS']
44
+ for col in categorical_columns:
45
+ data[col] = le.fit_transform(data[col])
46
+ return data
47
+
48
+ # Step 6: Machine Learning Modeling
49
+ def build_model(data):
50
+ X = data.drop(['price'], axis=1)
51
+ y = data['price']
52
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
53
+
54
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
55
+ model.fit(X_train, y_train)
56
+ y_pred = model.predict(X_test)
57
+
58
+ # Model Evaluation
59
+ st.write(f'R² Score: {r2_score(y_test, y_pred)}')
60
+ st.write(f'Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}')
61
+
62
+ return model
63
+
64
+ # Step 7: NLP Analysis using Hugging Face (if any text data)
65
+ def analyze_text(feedback_data):
66
+ sentiment_analysis = pipeline('sentiment-analysis')
67
+ feedback_data['sentiment'] = feedback_data['feedback'].apply(lambda x: sentiment_analysis(x)[0]['label'])
68
+ return feedback_data
69
+
70
+ # Step 8: User Interaction with Streamlit
71
+ def main():
72
+ st.title("Laptop Price Predictor")
73
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
74
+
75
+ if uploaded_file is not None:
76
+ data = load_data(uploaded_file)
77
+ data = clean_data(data)
78
+
79
+ st.subheader("Exploratory Data Analysis")
80
+ perform_eda(data)
81
+
82
+ st.subheader("Data Visualization")
83
+ visualize_data(data)
84
+
85
+ st.subheader("Feature Engineering")
86
+ data = encode_features(data)
87
+
88
+ st.subheader("Machine Learning Model")
89
+ model = build_model(data)
90
+
91
+ st.subheader("Make Predictions")
92
+ if st.button('Predict'):
93
+ predictions = model.predict(data.drop(['price'], axis=1))
94
+ st.write(predictions)
95
+ plt.figure(figsize=(10, 6))
96
+ sns.histplot(predictions, kde=True)
97
+ plt.title('Predicted Price Distribution')
98
+ plt.show()
99
+
100
+ # NLP Analysis (if applicable)
101
+ # st.subheader("NLP Analysis")
102
+ # feedback_data = load_feedback_data() # Assuming a function to load text data
103
+ # feedback_data = analyze_text(feedback_data)
104
+ # st.write(feedback_data)
105
+
106
+ if __name__ == "__main__":
107
+ main()