| | |
| | import pandas as pd |
| | import numpy as np |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| | from sklearn.model_selection import train_test_split |
| | from sklearn.preprocessing import LabelEncoder |
| | from sklearn.ensemble import RandomForestRegressor |
| | from sklearn.metrics import r2_score, mean_absolute_error |
| | from transformers import pipeline |
| | import streamlit as st |
| |
|
| | |
| | def load_data(file_path): |
| | data = pd.read_csv(file_path) |
| | return data |
| |
|
| | |
| | def clean_data(data): |
| | data.dropna(inplace=True) |
| | return data |
| |
|
| | |
| | def perform_eda(data): |
| | st.write(data.describe()) |
| | st.write(data.info()) |
| |
|
| | |
| | def visualize_data(data): |
| | plt.figure(figsize=(10, 6)) |
| | sns.histplot(data['price'], kde=True) |
| | plt.title('Price Distribution') |
| | plt.show() |
| |
|
| | plt.figure(figsize=(10, 6)) |
| | sns.boxplot(x='brand', y='price', data=data) |
| | plt.title('Price by Brand') |
| | plt.show() |
| |
|
| | |
| | def encode_features(data): |
| | le = LabelEncoder() |
| | categorical_columns = ['brand', 'processor', 'Ram_type', 'ROM_type', 'GPU', 'OS'] |
| | for col in categorical_columns: |
| | data[col] = le.fit_transform(data[col]) |
| | return data |
| |
|
| | |
| | def build_model(data): |
| | X = data.drop(['price'], axis=1) |
| | y = data['price'] |
| | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
| | |
| | model = RandomForestRegressor(n_estimators=100, random_state=42) |
| | model.fit(X_train, y_train) |
| | y_pred = model.predict(X_test) |
| | |
| | |
| | st.write(f'R² Score: {r2_score(y_test, y_pred)}') |
| | st.write(f'Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}') |
| | |
| | return model |
| |
|
| | |
| | def analyze_text(feedback_data): |
| | sentiment_analysis = pipeline('sentiment-analysis') |
| | feedback_data['sentiment'] = feedback_data['feedback'].apply(lambda x: sentiment_analysis(x)[0]['label']) |
| | return feedback_data |
| |
|
| | |
| | def main(): |
| | st.title("Laptop Price Predictor") |
| | uploaded_file = st.file_uploader("Choose a CSV file", type="csv") |
| | |
| | if uploaded_file is not None: |
| | data = load_data(uploaded_file) |
| | data = clean_data(data) |
| | |
| | st.subheader("Exploratory Data Analysis") |
| | perform_eda(data) |
| | |
| | st.subheader("Data Visualization") |
| | visualize_data(data) |
| | |
| | st.subheader("Feature Engineering") |
| | data = encode_features(data) |
| | |
| | st.subheader("Machine Learning Model") |
| | model = build_model(data) |
| | |
| | st.subheader("Make Predictions") |
| | if st.button('Predict'): |
| | predictions = model.predict(data.drop(['price'], axis=1)) |
| | st.write(predictions) |
| | plt.figure(figsize=(10, 6)) |
| | sns.histplot(predictions, kde=True) |
| | plt.title('Predicted Price Distribution') |
| | plt.show() |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|