# Import necessary libraries import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score, mean_absolute_error from transformers import pipeline import streamlit as st # Step 1: Data Collection def load_data(file_path): data = pd.read_csv(file_path) return data # Step 2: Data Cleaning def clean_data(data): data.dropna(inplace=True) return data # Step 3: Exploratory Data Analysis (EDA) def perform_eda(data): st.write(data.describe()) st.write(data.info()) # Step 4: Data Visualization def visualize_data(data): plt.figure(figsize=(10, 6)) sns.histplot(data['price'], kde=True) plt.title('Price Distribution') plt.show() plt.figure(figsize=(10, 6)) sns.boxplot(x='brand', y='price', data=data) plt.title('Price by Brand') plt.show() # Step 5: Feature Engineering def encode_features(data): le = LabelEncoder() categorical_columns = ['brand', 'processor', 'Ram_type', 'ROM_type', 'GPU', 'OS'] for col in categorical_columns: data[col] = le.fit_transform(data[col]) return data # Step 6: Machine Learning Modeling def build_model(data): X = data.drop(['price'], axis=1) y = data['price'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) # Model Evaluation st.write(f'R² Score: {r2_score(y_test, y_pred)}') st.write(f'Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}') return model # Step 7: NLP Analysis using Hugging Face (if any text data) def analyze_text(feedback_data): sentiment_analysis = pipeline('sentiment-analysis') feedback_data['sentiment'] = feedback_data['feedback'].apply(lambda x: sentiment_analysis(x)[0]['label']) return feedback_data # Step 8: User Interaction with Streamlit def main(): st.title("Laptop Price Predictor") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: data = load_data(uploaded_file) data = clean_data(data) st.subheader("Exploratory Data Analysis") perform_eda(data) st.subheader("Data Visualization") visualize_data(data) st.subheader("Feature Engineering") data = encode_features(data) st.subheader("Machine Learning Model") model = build_model(data) st.subheader("Make Predictions") if st.button('Predict'): predictions = model.predict(data.drop(['price'], axis=1)) st.write(predictions) plt.figure(figsize=(10, 6)) sns.histplot(predictions, kde=True) plt.title('Predicted Price Distribution') plt.show() # NLP Analysis (if applicable) # st.subheader("NLP Analysis") # feedback_data = load_feedback_data() # Assuming a function to load text data # feedback_data = analyze_text(feedback_data) # st.write(feedback_data) if __name__ == "__main__": main()