EdwardSamuel13's picture
Upload 14 files
8f69dec verified
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from textblob import TextBlob
from sklearn.linear_model import LinearRegression
def perform_customer_segmentation(sales_df, n_clusters=4):
"""
RFM (Recency, Frequency, Monetary) Clustering
"""
# Calculate RFM metrics
current_date = sales_df['Date'].max()
rfm = sales_df.groupby('CustomerID').agg({
'Date': lambda x: (current_date - x.max()).days, # Recency
'TransactionID': 'count', # Frequency
'TotalPrice': 'sum' # Monetary
}).rename(columns={
'Date': 'Recency',
'TransactionID': 'Frequency',
'TotalPrice': 'Monetary'
})
# Normalize
scaler = StandardScaler()
rfm_scaled = scaler.fit_transform(rfm)
# KMeans
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
rfm['Cluster'] = kmeans.fit_predict(rfm_scaled)
# Label mapping (rough approximation based on centroids could be better, but simple is fine)
# We just return the dataframe with Cluster ID
return rfm
def forecast_sales(sales_df, days_to_forecast=30):
"""
Simple Linear Regression on daily sales
"""
daily_sales = sales_df.set_index('Date').resample('D')['TotalPrice'].sum().reset_index()
daily_sales['DayIndex'] = np.arange(len(daily_sales))
X = daily_sales[['DayIndex']]
y = daily_sales['TotalPrice']
model = LinearRegression()
model.fit(X, y)
# Future dates
first_future_idx = int(daily_sales['DayIndex'].max()) + 1
future_indices = np.arange(first_future_idx, first_future_idx + days_to_forecast).reshape(-1, 1)
future_df = pd.DataFrame(future_indices, columns=['DayIndex'])
predictions = model.predict(future_df)
last_date = daily_sales['Date'].max()
future_dates = [last_date + pd.Timedelta(days=i) for i in range(1, days_to_forecast + 1)]
forecast_df = pd.DataFrame({
'Date': future_dates,
'Predicted_Sales': predictions
})
return daily_sales, forecast_df
def analyze_sentiment(reviews_df):
"""
Apply TextBlob to get polarity
"""
def get_polarity(text):
return TextBlob(str(text)).sentiment.polarity
reviews_df['Sentiment_Score'] = reviews_df['ReviewText'].apply(get_polarity)
# Categorize
def label_sentiment(score):
if score > 0.1: return 'Positive'
if score < -0.1: return 'Negative'
return 'Neutral'
reviews_df['Sentiment_Label'] = reviews_df['Sentiment_Score'].apply(label_sentiment)
return reviews_df