analysis / app.py
mjsp's picture
Create app.py
b1ea0f9 verified
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
from transformers import pipeline
import streamlit as st
# Step 1: Data Collection
def load_data(file_path):
data = pd.read_csv(file_path)
return data
# Step 2: Data Cleaning
def clean_data(data):
data.dropna(inplace=True)
return data
# Step 3: Exploratory Data Analysis (EDA)
def perform_eda(data):
st.write(data.describe())
st.write(data.info())
# Step 4: Data Visualization
def visualize_data(data):
plt.figure(figsize=(10, 6))
sns.histplot(data['price'], kde=True)
plt.title('Price Distribution')
plt.show()
plt.figure(figsize=(10, 6))
sns.boxplot(x='brand', y='price', data=data)
plt.title('Price by Brand')
plt.show()
# Step 5: Feature Engineering
def encode_features(data):
le = LabelEncoder()
categorical_columns = ['brand', 'processor', 'Ram_type', 'ROM_type', 'GPU', 'OS']
for col in categorical_columns:
data[col] = le.fit_transform(data[col])
return data
# Step 6: Machine Learning Modeling
def build_model(data):
X = data.drop(['price'], axis=1)
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Model Evaluation
st.write(f'R² Score: {r2_score(y_test, y_pred)}')
st.write(f'Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}')
return model
# Step 7: NLP Analysis using Hugging Face (if any text data)
def analyze_text(feedback_data):
sentiment_analysis = pipeline('sentiment-analysis')
feedback_data['sentiment'] = feedback_data['feedback'].apply(lambda x: sentiment_analysis(x)[0]['label'])
return feedback_data
# Step 8: User Interaction with Streamlit
def main():
st.title("Laptop Price Predictor")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
data = load_data(uploaded_file)
data = clean_data(data)
st.subheader("Exploratory Data Analysis")
perform_eda(data)
st.subheader("Data Visualization")
visualize_data(data)
st.subheader("Feature Engineering")
data = encode_features(data)
st.subheader("Machine Learning Model")
model = build_model(data)
st.subheader("Make Predictions")
if st.button('Predict'):
predictions = model.predict(data.drop(['price'], axis=1))
st.write(predictions)
plt.figure(figsize=(10, 6))
sns.histplot(predictions, kde=True)
plt.title('Predicted Price Distribution')
plt.show()
# NLP Analysis (if applicable)
# st.subheader("NLP Analysis")
# feedback_data = load_feedback_data() # Assuming a function to load text data
# feedback_data = analyze_text(feedback_data)
# st.write(feedback_data)
if __name__ == "__main__":
main()