|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
from sklearn.cluster import KMeans
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
from textblob import TextBlob
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
|
|
def perform_customer_segmentation(sales_df, n_clusters=4):
|
|
|
"""
|
|
|
RFM (Recency, Frequency, Monetary) Clustering
|
|
|
"""
|
|
|
|
|
|
current_date = sales_df['Date'].max()
|
|
|
|
|
|
rfm = sales_df.groupby('CustomerID').agg({
|
|
|
'Date': lambda x: (current_date - x.max()).days,
|
|
|
'TransactionID': 'count',
|
|
|
'TotalPrice': 'sum'
|
|
|
}).rename(columns={
|
|
|
'Date': 'Recency',
|
|
|
'TransactionID': 'Frequency',
|
|
|
'TotalPrice': 'Monetary'
|
|
|
})
|
|
|
|
|
|
|
|
|
scaler = StandardScaler()
|
|
|
rfm_scaled = scaler.fit_transform(rfm)
|
|
|
|
|
|
|
|
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
|
|
|
rfm['Cluster'] = kmeans.fit_predict(rfm_scaled)
|
|
|
|
|
|
|
|
|
|
|
|
return rfm
|
|
|
|
|
|
def forecast_sales(sales_df, days_to_forecast=30):
|
|
|
"""
|
|
|
Simple Linear Regression on daily sales
|
|
|
"""
|
|
|
daily_sales = sales_df.set_index('Date').resample('D')['TotalPrice'].sum().reset_index()
|
|
|
daily_sales['DayIndex'] = np.arange(len(daily_sales))
|
|
|
|
|
|
X = daily_sales[['DayIndex']]
|
|
|
y = daily_sales['TotalPrice']
|
|
|
|
|
|
model = LinearRegression()
|
|
|
model.fit(X, y)
|
|
|
|
|
|
|
|
|
first_future_idx = int(daily_sales['DayIndex'].max()) + 1
|
|
|
future_indices = np.arange(first_future_idx, first_future_idx + days_to_forecast).reshape(-1, 1)
|
|
|
future_df = pd.DataFrame(future_indices, columns=['DayIndex'])
|
|
|
predictions = model.predict(future_df)
|
|
|
|
|
|
last_date = daily_sales['Date'].max()
|
|
|
future_dates = [last_date + pd.Timedelta(days=i) for i in range(1, days_to_forecast + 1)]
|
|
|
|
|
|
forecast_df = pd.DataFrame({
|
|
|
'Date': future_dates,
|
|
|
'Predicted_Sales': predictions
|
|
|
})
|
|
|
|
|
|
return daily_sales, forecast_df
|
|
|
|
|
|
def analyze_sentiment(reviews_df):
|
|
|
"""
|
|
|
Apply TextBlob to get polarity
|
|
|
"""
|
|
|
def get_polarity(text):
|
|
|
return TextBlob(str(text)).sentiment.polarity
|
|
|
|
|
|
reviews_df['Sentiment_Score'] = reviews_df['ReviewText'].apply(get_polarity)
|
|
|
|
|
|
|
|
|
def label_sentiment(score):
|
|
|
if score > 0.1: return 'Positive'
|
|
|
if score < -0.1: return 'Negative'
|
|
|
return 'Neutral'
|
|
|
|
|
|
reviews_df['Sentiment_Label'] = reviews_df['Sentiment_Score'].apply(label_sentiment)
|
|
|
return reviews_df
|
|
|
|