pcsales / OLD /__dbmlsystem.py
omgy's picture
Upload 52 files
97000ce verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')
# Set page configuration
st.set_page_config(
page_title="Calcium Supplement Sales Automation",
page_icon="๐Ÿ„",
layout="wide",
initial_sidebar_state="expanded"
)
# App title
st.title("๐Ÿ„ Calcium Supplement Sales Automation Dashboard")
st.markdown("---")
# Your exact ML functions
def enhanced_analyze_sales_data(data1, data2):
"""
Enhanced analysis with ML components for better predictions
"""
data1['Date'] = pd.to_datetime(data1['Date'])
data2['Date'] = pd.to_datetime(data2['Date'])
# Calculate basic metrics
data1['Conversion_Rate'] = (data1['Contact_In_Group'] / data1['Sabhasad'] * 100).round(2)
data1['Conversion_Rate'] = data1['Conversion_Rate'].replace([np.inf, -np.inf], 0).fillna(0)
data1['Untapped_Potential'] = data1['Sabhasad'] - data1['Contact_In_Group']
data1['Sales_Per_Contact'] = (data1['Total_L'] / data1['Contact_In_Group']).round(2)
data1['Sales_Per_Contact'] = data1['Sales_Per_Contact'].replace([np.inf, -np.inf], 0).fillna(0)
# Analyze recent sales
recent_sales = data2.groupby('Village').agg({
'Total_L': ['sum', 'count'],
'Date': 'max'
}).reset_index()
recent_sales.columns = ['Village', 'Recent_Sales_L', 'Recent_Customers', 'Last_Sale_Date']
recent_sales['Days_Since_Last_Sale'] = (datetime.now() - recent_sales['Last_Sale_Date']).dt.days
# Merge data
analysis_df = data1.merge(recent_sales, on='Village', how='left')
analysis_df['Recent_Sales_L'] = analysis_df['Recent_Sales_L'].fillna(0)
analysis_df['Recent_Customers'] = analysis_df['Recent_Customers'].fillna(0)
analysis_df['Days_Since_Last_Sale'] = analysis_df['Days_Since_Last_Sale'].fillna(999)
# ML Component 1: Village Clustering for Segmentation
analysis_df = apply_village_clustering(analysis_df)
# ML Component 2: Predict Sales Potential
analysis_df = predict_sales_potential(analysis_df)
# ML Component 3: Action Recommendation Classifier
analysis_df = predict_recommended_actions(analysis_df)
# Generate recommendations based on ML predictions
recommendations = generate_ml_recommendations(analysis_df)
return recommendations, analysis_df
def apply_village_clustering(analysis_df):
"""
Use K-Means clustering to segment villages into groups
"""
# Prepare features for clustering
cluster_features = analysis_df[[
'Conversion_Rate', 'Untapped_Potential', 'Sales_Per_Contact',
'Recent_Sales_L', 'Days_Since_Last_Sale'
]].fillna(0)
# Standardize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(cluster_features)
# Apply K-Means clustering
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
clusters = kmeans.fit_predict(scaled_features)
# Add clusters to dataframe
analysis_df['Cluster'] = clusters
# Name the clusters based on characteristics
cluster_names = {
0: 'High Potential - Low Engagement',
1: 'Steady Performers',
2: 'Underperforming',
3: 'New/Developing'
}
analysis_df['Segment'] = analysis_df['Cluster'].map(cluster_names)
return analysis_df
def predict_sales_potential(analysis_df):
"""
Predict sales potential for each village using Random Forest
"""
# Prepare features for prediction
prediction_features = analysis_df[[
'Sabhasad', 'Contact_In_Group', 'Conversion_Rate',
'Untapped_Potential', 'Recent_Sales_L', 'Days_Since_Last_Sale'
]].fillna(0)
# Target variable: Total_L (current sales)
target = analysis_df['Total_L'].fillna(0)
# Only train if we have enough data
if len(prediction_features) > 10:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
prediction_features, target, test_size=0.2, random_state=42
)
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Make predictions
predictions = model.predict(prediction_features)
# Calculate feature importance
feature_importance = pd.DataFrame({
'feature': prediction_features.columns,
'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
# Add predictions to dataframe
analysis_df['Predicted_Sales'] = predictions
analysis_df['Sales_Gap'] = analysis_df['Predicted_Sales'] - analysis_df['Total_L']
else:
# Fallback if not enough data
analysis_df['Predicted_Sales'] = analysis_df['Total_L']
analysis_df['Sales_Gap'] = 0
return analysis_df
def predict_recommended_actions(analysis_df):
"""
Use ML to predict the best action for each village
"""
# Define actions based on rules (for training data)
analysis_df['Action_Label'] = np.where(
analysis_df['Conversion_Rate'] < 20, 'Send Marketing Team',
np.where(
analysis_df['Untapped_Potential'] > 30, 'Call Mantri for Follow-up',
np.where(
analysis_df['Days_Since_Last_Sale'] > 30, 'Check on Mantri',
np.where(
analysis_df['Sales_Per_Contact'] > 10, 'Provide More Stock',
'Regular Follow-up'
)
)
)
)
# Prepare features for classification
classification_features = analysis_df[[
'Conversion_Rate', 'Untapped_Potential', 'Sales_Per_Contact',
'Recent_Sales_L', 'Days_Since_Last_Sale', 'Sales_Gap'
]].fillna(0)
# Target variable: Action_Label
target = analysis_df['Action_Label']
# Only train if we have enough data
if len(classification_features) > 10 and len(target.unique()) > 1:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
classification_features, target, test_size=0.2, random_state=42, stratify=target
)
# Train classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# Make predictions
predictions = clf.predict(classification_features)
prediction_proba = clf.predict_proba(classification_features)
# Add predictions to dataframe
analysis_df['ML_Recommended_Action'] = predictions
analysis_df['Action_Confidence'] = np.max(prediction_proba, axis=1)
else:
# Fallback to rule-based if not enough data
analysis_df['ML_Recommended_Action'] = analysis_df['Action_Label']
analysis_df['Action_Confidence'] = 1.0
return analysis_df
def generate_ml_recommendations(analysis_df):
"""
Generate recommendations based on ML predictions
"""
recommendations = []
for _, row in analysis_df.iterrows():
village = row['Village']
mantri = row['Mantri_Name']
mobile = row['Mantri_Mobile']
taluka = row['Taluka']
district = row['District']
segment = row['Segment']
action = row['ML_Recommended_Action']
confidence = row['Action_Confidence']
# Generate reason based on ML prediction
if action == 'Send Marketing Team':
reason = f"ML predicts marketing team needed (Confidence: {confidence:.2f}). Segment: {segment}"
priority = 'High'
elif action == 'Call Mantri for Follow-up':
reason = f"ML predicts mantri follow-up needed (Confidence: {confidence:.2f}). Segment: {segment}"
priority = 'High'
elif action == 'Check on Mantri':
reason = f"ML suggests checking on mantri (Confidence: {confidence:.2f}). Segment: {segment}"
priority = 'Medium'
elif action == 'Provide More Stock':
reason = f"ML predicts stock increase needed (Confidence: {confidence:.2f}). Segment: {segment}"
priority = 'Medium'
else:
reason = f"ML recommends regular follow-up (Confidence: {confidence:.2f}). Segment: {segment}"
priority = 'Low'
recommendations.append({
'Village': village,
'Taluka': taluka,
'District': district,
'Mantri': mantri,
'Mobile': mobile,
'Action': action,
'Reason': reason,
'Priority': priority,
'Confidence': confidence,
'Segment': segment,
'Sales_Gap': row.get('Sales_Gap', 0)
})
return pd.DataFrame(recommendations)
def generate_ml_mantri_messages(recommendations):
"""
Generate personalized messages based on ML recommendations
"""
messages = []
for _, row in recommendations.iterrows():
if row['Action'] == 'Send Marketing Team':
message = f"""
Namaste {row['Mantri']} Ji!
Our AI system has identified that your village {row['Village']} has high potential for growth.
We're sending our marketing team to conduct demo sessions and help you reach more customers.
Based on our analysis:
- Segment: {row['Segment']}
- Confidence: {row['Confidence']*100:.1f}%
Please prepare for their visit and notify potential customers.
Dhanyavaad,
Calcium Supplement Team
"""
elif row['Action'] == 'Call Mantri for Follow-up':
message = f"""
Namaste {row['Mantri']} Ji!
Our AI analysis shows significant untapped potential in {row['Village']}.
We recommend focusing on follow-up with these customers:
- Segment: {row['Segment']}
- Confidence: {row['Confidence']*100:.1f}%
A special commission offer is available for your next 10 customers.
Dhanyavaad,
Calcium Supplement Team
"""
elif row['Action'] == 'Check on Mantri':
message = f"""
Namaste {row['Mantri']} Ji!
Our system shows reduced activity in {row['Village']}.
Is everything alright? Do you need any support from our team?
- Segment: {row['Segment']}
- Confidence: {row['Confidence']*100:.1f}%
Please let us know how we can help.
Dhanyavaad,
Calcium Supplement Team
"""
elif row['Action'] == 'Provide More Stock':
message = f"""
Namaste {row['Mantri']} Ji!
Great news! Our AI predicts increased demand in {row['Village']}.
Would you like us to send additional stock?
- Segment: {row['Segment']}
- Confidence: {row['Confidence']*100:.1f}%
- Predicted Sales Gap: {row['Sales_Gap']:.1f}L
Please confirm your additional requirements.
Dhanyavaad,
Calcium Supplement Team
"""
else:
message = f"""
Namaste {row['Mantri']} Ji!
Our system shows steady performance in {row['Village']}.
Keep up the good work!
- Segment: {row['Segment']}
- Confidence: {row['Confidence']*100:.1f}%
As always, let us know if you need any support.
Dhanyavaad,
Calcium Supplement Team
"""
messages.append({
'Mantri': row['Mantri'],
'Mobile': row['Mobile'],
'Village': row['Village'],
'Action': row['Action'],
'Message': message,
'Priority': row['Priority'],
'Confidence': row['Confidence']
})
return pd.DataFrame(messages)
# Visualization functions
def plot_village_performance(analysis_df):
"""Create performance visualization for villages"""
fig = px.scatter(analysis_df,
x='Conversion_Rate',
y='Untapped_Potential',
size='Total_L',
color='Segment',
hover_name='Village',
title='Village Performance Analysis',
labels={'Conversion_Rate': 'Conversion Rate (%)',
'Untapped_Potential': 'Untapped Potential'})
fig.update_layout(height=500)
return fig
def plot_sales_trends(analysis_df):
"""Create sales trends visualization"""
fig = px.bar(analysis_df,
x='Village',
y='Total_L',
color='Segment',
title='Total Sales by Village',
labels={'Total_L': 'Total Sales (L)', 'Village': 'Village'})
fig.update_layout(height=400, xaxis_tickangle=-45)
return fig
def plot_priority_matrix(recommendations):
"""Create priority matrix visualization"""
priority_order = {'High': 3, 'Medium': 2, 'Low': 1}
recommendations['Priority_Value'] = recommendations['Priority'].map(priority_order)
fig = px.treemap(recommendations,
path=['Priority', 'Village'],
values='Priority_Value',
color='Priority_Value',
color_continuous_scale='RdYlGn_r',
title='Action Priority Matrix')
fig.update_layout(height=500)
return fig
def display_key_metrics(analysis_df):
"""Display key performance metrics"""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Villages", len(analysis_df))
with col2:
avg_conversion = analysis_df['Conversion_Rate'].mean()
st.metric("Avg Conversion Rate", f"{avg_conversion:.1f}%")
with col3:
total_untapped = analysis_df['Untapped_Potential'].sum()
st.metric("Total Untapped Potential", f"{total_untapped}")
with col4:
total_sales = analysis_df['Total_L'].sum()
st.metric("Total Sales (L)", f"{total_sales}")
# Initialize session state
if 'data1' not in st.session_state:
st.session_state.data1 = None
if 'data2' not in st.session_state:
st.session_state.data2 = None
if 'analysis_df' not in st.session_state:
st.session_state.analysis_df = None
if 'recommendations' not in st.session_state:
st.session_state.recommendations = None
if 'ml_messages' not in st.session_state:
st.session_state.ml_messages = None
# Sidebar
with st.sidebar:
st.header("Data Input")
# File uploaders
st.subheader("Upload Village Data (Data1)")
uploaded_data1 = st.file_uploader("CSV or Excel file", type=["csv", "xlsx"], key="data1")
st.subheader("Upload Sales Data (Data2)")
uploaded_data2 = st.file_uploader("CSV or Excel file", type=["csv", "xlsx"], key="data2")
if st.button("Load Data and Run ML Analysis"):
if uploaded_data1 and uploaded_data2:
try:
# Load data
if uploaded_data1.name.endswith('.csv'):
data1 = pd.read_csv(uploaded_data1)
else:
data1 = pd.read_excel(uploaded_data1)
if uploaded_data2.name.endswith('.csv'):
data2 = pd.read_csv(uploaded_data2)
else:
data2 = pd.read_excel(uploaded_data2)
# Store in session state
st.session_state.data1 = data1
st.session_state.data2 = data2
# Run ML analysis
with st.spinner("Running ML analysis..."):
recommendations, analysis_df = enhanced_analyze_sales_data(data1, data2)
st.session_state.analysis_df = analysis_df
st.session_state.recommendations = recommendations
ml_messages = generate_ml_mantri_messages(recommendations)
st.session_state.ml_messages = ml_messages
st.success("ML analysis completed successfully!")
except Exception as e:
st.error(f"Error processing data: {str(e)}")
else:
st.error("Please upload both files to proceed")
# Main content
if st.session_state.analysis_df is not None and st.session_state.recommendations is not None:
# Display dashboard
tab1, tab2, tab3, tab4 = st.tabs(["Dashboard", "Village Analysis", "Actions & Messages", "Team Dispatch"])
with tab1:
st.header("ML-Powered Performance Dashboard")
display_key_metrics(st.session_state.analysis_df)
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(plot_village_performance(st.session_state.analysis_df), use_container_width=True)
with col2:
st.plotly_chart(plot_priority_matrix(st.session_state.recommendations), use_container_width=True)
st.plotly_chart(plot_sales_trends(st.session_state.analysis_df), use_container_width=True)
with tab2:
st.header("Village Analysis with ML Segmentation")
selected_village = st.selectbox("Select Village", st.session_state.analysis_df['Village'].unique())
village_data = st.session_state.analysis_df[st.session_state.analysis_df['Village'] == selected_village].iloc[0]
col1, col2 = st.columns(2)
with col1:
st.subheader("Village Details")
st.write(f"**Village:** {village_data['Village']}")
st.write(f"**Taluka:** {village_data['Taluka']}")
st.write(f"**District:** {village_data['District']}")
st.write(f"**Mantri:** {village_data['Mantri_Name']}")
st.write(f"**Mantri Mobile:** {village_data['Mantri_Mobile']}")
st.write(f"**Segment:** {village_data.get('Segment', 'N/A')}")
st.write(f"**ML Recommended Action:** {village_data.get('ML_Recommended_Action', 'N/A')}")
st.write(f"**Action Confidence:** {village_data.get('Action_Confidence', 'N/A'):.2f}")
with col2:
st.subheader("Performance Metrics")
st.write(f"**Sabhasad:** {village_data['Sabhasad']}")
st.write(f"**Contacted:** {village_data['Contact_In_Group']}")
st.write(f"**Conversion Rate:** {village_data['Conversion_Rate']}%")
st.write(f"**Untapped Potential:** {village_data['Untapped_Potential']}")
st.write(f"**Total Sales:** {village_data['Total_L']}L")
st.write(f"**Sales per Contact:** {village_data['Sales_Per_Contact']}L")
st.write(f"**Predicted Sales:** {village_data.get('Predicted_Sales', 'N/A'):.1f}L")
st.write(f"**Sales Gap:** {village_data.get('Sales_Gap', 'N/A'):.1f}L")
with tab3:
st.header("ML-Based Actions & Messages")
st.subheader("ML-Generated Recommendations")
st.dataframe(st.session_state.recommendations)
# Download recommendations
csv_data = st.session_state.recommendations.to_csv(index=False)
st.download_button(
label="Download Recommendations as CSV",
data=csv_data,
file_name="ml_sales_recommendations.csv",
mime="text/csv"
)
st.subheader("Generate ML-Powered Messages")
selected_mantri = st.selectbox("Select Mantri", st.session_state.recommendations['Mantri'].unique())
mantri_data = st.session_state.recommendations[
st.session_state.recommendations['Mantri'] == selected_mantri].iloc[0]
message_df = st.session_state.ml_messages[
st.session_state.ml_messages['Mantri'] == selected_mantri]
if not message_df.empty:
message = message_df.iloc[0]['Message']
st.text_area("ML-Generated Message", message, height=300)
if st.button("Send Message"):
st.success(f"Message sent to {mantri_data['Mantri']} at {mantri_data['Mobile']}")
st.subheader("Bulk Message Sender")
if st.button("Generate All ML Messages"):
st.session_state.all_messages = st.session_state.ml_messages
if 'all_messages' in st.session_state:
st.dataframe(st.session_state.all_messages[['Mantri', 'Village', 'Action', 'Priority', 'Confidence']])
if st.button("Send All ML Messages"):
progress_bar = st.progress(0)
for i, row in st.session_state.all_messages.iterrows():
# Simulate sending message
progress_bar.progress((i + 1) / len(st.session_state.all_messages))
st.success("All ML-powered messages sent successfully!")
with tab4:
st.header("Marketing Team Dispatch with ML Insights")
st.subheader("Villages Needing Team Visit (ML Identified)")
high_priority = st.session_state.recommendations[
st.session_state.recommendations['Action'] == 'Send Marketing Team']
if not high_priority.empty:
for _, row in high_priority.iterrows():
with st.expander(f"{row['Village']} - {row['Mantri']} (Confidence: {row['Confidence']:.2f})"):
st.write(f"**Reason:** {row['Reason']}")
st.write(f"**Segment:** {row['Segment']}")
st.write(f"**Sales Gap:** {row['Sales_Gap']:.1f}L")
dispatch_date = st.date_input("Dispatch Date", key=f"date_{row['Village']}")
team_size = st.slider("Team Size", 1, 5, 2, key=f"size_{row['Village']}")
if st.button("Schedule Dispatch", key=f"dispatch_{row['Village']}"):
st.success(f"Team dispatch scheduled for {row['Village']} on {dispatch_date}")
else:
st.info("No villages currently require immediate team dispatch based on ML analysis.")
st.subheader("ML Performance Insights")
st.write("Based on our machine learning analysis, here are key insights:")
# Show segment distribution
segment_counts = st.session_state.analysis_df['Segment'].value_counts()
fig = px.pie(values=segment_counts.values, names=segment_counts.index,
title="Village Segment Distribution")
st.plotly_chart(fig, use_container_width=True)
# Show confidence distribution
fig = px.histogram(st.session_state.recommendations, x='Confidence',
title='Confidence Distribution of ML Recommendations')
st.plotly_chart(fig, use_container_width=True)
else:
st.info("Please upload your data files using the sidebar and click 'Load Data and Run ML Analysis' to get started.")
# Footer
st.markdown("---")
st.markdown("**ML-Powered Calcium Supplement Sales Automation System** | For internal use only")