Spaces:

nadish1210
/

Data_Analysis_Dashboard

Sleeping

App Files Files Community

nadish1210 commited on May 1

Commit

bd4b1bd

verified ·

1 Parent(s): 65d92f3

Update backend.py

Browse files

Files changed (1) hide show

backend.py +83 -321

backend.py CHANGED Viewed

@@ -1,360 +1,122 @@
 import pandas as pd
 import plotly.express as px
 from datetime import datetime
-import kaleido  # Helps prevent some write_image issues
 import os
-# ====================== SALES INSIGHTS FUNCTION ======================
-def sales_insights(file_path: str, analysis_type: str = "region") -> tuple:
-    """
-    Main function to analyze sales data and generate charts.
-    Args:
-        file_path (str): Path to the uploaded CSV or Excel file.
-        analysis_type (str): Type of analysis to perform.
-                             Options: "region", "month", "product", "profit", "top5_profit", "low5_sales".
-    Returns:
-        tuple: (plotly.graph_objects.Figure, chart_image_path, summary_excel_path)
-    Raises:
-        ValueError: If the file cannot be read or required columns are missing.
-    """
     try:
-        # Read the uploaded file
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
         else:
             df = pd.read_excel(file_path)
-    except Exception as e:
-        raise ValueError(f"Error reading file: {str(e)}")
-    # Check required columns
-    required_columns = ["Region", "Sales", "Product", "Profit", "Date"]
-    missing_cols = [col for col in required_columns if col not in df.columns]
-    if missing_cols:
-        raise ValueError(f"Missing columns in file: {missing_cols}")
-    # Ensure numeric columns are actually numeric
-    for col in ["Sales", "Profit"]:
-        df[col] = pd.to_numeric(df[col], errors='coerce')
-    # Drop rows where critical numeric data is missing
-    df = df.dropna(subset=["Sales", "Profit"])
-    # ====================== ANALYSIS LOGIC ======================
-    if analysis_type == "region":
-        summary = df.groupby("Region")["Sales"].sum().reset_index()
-        fig = px.bar(
-            summary,
-            x="Region",
-            y="Sales",
-            title="Sales by Region",
-            text="Sales",
-            color="Region"
-        )
-    elif analysis_type == "month":
-        df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
-        df = df.dropna(subset=["Date"])
-        # Create a proper monthly period for sorting, but use string for display
-        df["Month_Period"] = df["Date"].dt.to_period("M")
-        df["Month_Name"] = df["Date"].dt.strftime("%b %Y")
-        summary = df.groupby(["Month_Period", "Month_Name"])["Sales"].sum().reset_index()
-        # Sort chronologically by the Period object, then drop it
-        summary = summary.sort_values("Month_Period")
-        fig = px.line(
-            summary,
-            x="Month_Name",
-            y="Sales",
-            title="Monthly Sales Trend",
-            markers=True
-        )
-    elif analysis_type == "product":
-        summary = df.groupby("Product")["Sales"].sum().reset_index()
-        fig = px.bar(
-            summary,
-            x="Product",
-            y="Sales",
-            title="Sales by Product",
-            text="Sales",
-            color="Product"
-        )
-    elif analysis_type == "profit":
-        summary = df.groupby("Product")["Profit"].sum().reset_index()
-        fig = px.bar(
-            summary,
-            x="Product",
-            y="Profit",
-            title="Profit by Product",
-            text="Profit",
-            color="Product"
-        )
-    elif analysis_type == "top5_profit":
-        summary = (
-            df.groupby("Product")["Profit"]
-            .sum()
-            .reset_index()
-            .sort_values("Profit", ascending=False)
-            .head(5)
-        )
-        fig = px.bar(
-            summary,
-            x="Product",
-            y="Profit",
-            title="Top 5 Products by Profit",
-            text="Profit",
-            color="Product"
-        )
-    elif analysis_type == "low5_sales":
-        summary = (
-            df.groupby("Product")["Sales"]
-            .sum()
-            .reset_index()
-            .sort_values("Sales", ascending=True)
-            .head(5)
-        )
-        fig = px.bar(
-            summary,
-            x="Product",
-            y="Sales",
-            title="Bottom 5 Products by Sales",
-            text="Sales",
-            color="Product"
-        )
-    else:
-        # Default fallback
-        summary = df.groupby("Product")["Sales"].sum().reset_index()
-        fig = px.bar(
-            summary,
-            x="Product",
-            y="Sales",
-            title="Sales by Product",
-            color="Product"
-        )
-    # Improve chart appearance
-    fig.update_layout(
-        xaxis_tickangle=-45,
-        height=600,
-        title_x=0.5,
-        template="plotly_white"
-    )
-    fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
-import pandas as pd
-import plotly.express as px
-from datetime import datetime
-import os
-# ====================== SALES INSIGHTS FUNCTION ======================
-def sales_insights(file_path: str, analysis_type: str = "region") -> tuple:
-    """
-    Main function to analyze sales data and generate charts.
-    Args:
-        file_path (str): Path to the uploaded CSV or Excel file.
-        analysis_type (str): Type of analysis: "region", "month", "product",
-                            "profit", "top5_profit", "low5_sales"
-    Returns:
-        tuple: (plotly Figure, chart_image_path, summary_excel_path)
-    """
-    try:
-        # Read the file
-        if file_path.endswith('.csv'):
-            df = pd.read_csv(file_path)
         else:
-            df = pd.read_excel(file_path)
-    except Exception as e:
-        raise ValueError(f"Error reading file: {str(e)}")
-    # Check required columns
-    required_columns = ["Region", "Sales", "Product", "Profit", "Date"]
-    missing_cols = [col for col in required_columns if col not in df.columns]
-    if missing_cols:
-        raise ValueError(f"Missing required columns: {missing_cols}")
-    # Convert numeric columns safely
-    for col in ["Sales", "Profit"]:
-        df[col] = pd.to_numeric(df[col], errors='coerce')
-    # Drop rows with missing critical numeric data
-    df = df.dropna(subset=["Sales", "Profit"]).copy()
-    # ====================== ANALYSIS LOGIC ======================
-    if analysis_type == "region":
-        summary = df.groupby("Region", as_index=False)["Sales"].sum()
-        fig = px.bar(
-            summary,
-            x="Region",
-            y="Sales",
-            title="Sales by Region",
-            text="Sales",
-            color="Region"
-        )
-    elif analysis_type == "month":
-        df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
-        df = df.dropna(subset=["Date"]).copy()
-        df["Month_Period"] = df["Date"].dt.to_period("M")
-        df["Month_Name"] = df["Date"].dt.strftime("%b %Y")
-        summary = df.groupby(["Month_Period", "Month_Name"], as_index=False)["Sales"].sum()
-        summary = summary.sort_values("Month_Period")
-        fig = px.line(
-            summary,
-            x="Month_Name",
-            y="Sales",
-            title="Monthly Sales Trend",
-            markers=True,
-            line_shape="linear"
-        )
-    elif analysis_type == "product":
-        summary = df.groupby("Product", as_index=False)["Sales"].sum()
-        fig = px.bar(
-            summary, x="Product", y="Sales", title="Sales by Product",
-            text="Sales", color="Product"
-        )
-    elif analysis_type == "profit":
-        summary = df.groupby("Product", as_index=False)["Profit"].sum()
-        fig = px.bar(
-            summary, x="Product", y="Profit", title="Profit by Product",
-            text="Profit", color="Product"
-        )
-    elif analysis_type == "top5_profit":
-        summary = (
-            df.groupby("Product", as_index=False)["Profit"]
-            .sum()
-            .sort_values("Profit", ascending=False)
-            .head(5)
-        )
-        fig = px.bar(
-            summary, x="Product", y="Profit", title="Top 5 Products by Profit",
-            text="Profit", color="Product"
-        )
-    elif analysis_type == "low5_sales":
-        summary = (
-            df.groupby("Product", as_index=False)["Sales"]
-            .sum()
-            .sort_values("Sales", ascending=True)
-            .head(5)
-        )
-        fig = px.bar(
-            summary, x="Product", y="Sales", title="Bottom 5 Products by Sales",
-            text="Sales", color="Product"
-        )
-    else:
-        # Default fallback
-        summary = df.groupby("Product", as_index=False)["Sales"].sum()
-        fig = px.bar(
-            summary, x="Product", y="Sales", title="Sales by Product",
-            text="Sales", color="Product"
-        )
-    # Improve layout
-    fig.update_layout(
-        xaxis_tickangle=-45,
-        height=600,
-        title_x=0.5,
-        template="plotly_white",
-        margin=dict(l=40, r=40, t=60, b=100)
-    )
-    fig.update_traces(
-        texttemplate='%{text:.2s}',
-        textposition='outside',
-        marker_line_color='white',
-        marker_line_width=1
-    )
-    # ====================== SAVE OUTPUTS ======================
-    output_chart_path = "output_chart.png"
-    output_data_path = "output_data.xlsx"
-    # Save chart with better error handling for HF Spaces
-    try:
-        fig.write_image(output_chart_path, width=1200, height=700, scale=2, engine="kaleido")
-    except Exception as e:
-        print(f"Warning: High-res image save failed: {e}")
         try:
-            # Fallback without scale
-            fig.write_image(output_chart_path, width=1200, height=700)
-        except Exception as e2:
-            print(f"Error saving chart: {e2}")
-            # Ultimate fallback - save as static image
-            fig.write_image(output_chart_path, width=1000, height=600)
-    # Save summary data
-    try:
-        summary.to_excel(output_data_path, index=False)
     except Exception as e:
-        print(f"Error saving summary Excel: {e}")
-    return fig, output_chart_path, output_data_path
-# ====================== FEEDBACK FUNCTION ======================
 def save_feedback(name: str, comment: str, stars: int) -> str:
-    """
-    Save user feedback to feedback.xlsx with better robustness.
-    """
     feedback_file = "feedback.xlsx"
-    # Validate and sanitize inputs
     try:
-        stars = int(stars)
-        stars = max(1, min(5, stars))
-    except (ValueError, TypeError):
         stars = 3
     new_entry = {
         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-        "Name": str(name).strip()[:100] if name else "Anonymous",   # Limit length
-        "Comment": str(comment).strip()[:1000] if comment else "",  # Limit length
         "Stars": stars
     }
-    try:
-        if os.path.exists(feedback_file):
-            df = pd.read_excel(feedback_file)
-            # Ensure all expected columns exist
-            expected_cols = ["Timestamp", "Name", "Comment", "Stars"]
-            for col in expected_cols:
-                if col not in df.columns:
-                    df[col] = None
-            df = df[expected_cols]
-        else:
-            df = pd.DataFrame(columns=["Timestamp", "Name", "Comment", "Stars"])
-    except Exception as e:
-        print(f"Feedback file read error: {e}")
-        df = pd.DataFrame(columns=["Timestamp", "Name", "Comment", "Stars"])
-    # Append new feedback
-    new_df = pd.DataFrame([new_entry])
-    df = pd.concat([df, new_df], ignore_index=True)
-    # Save with error handling
-    try:
-        df.to_excel(feedback_file, index=False)
-        return "✅ Thank you! Your feedback has been saved successfully."
-    except Exception as e:
-        print(f"Error writing feedback: {e}")
-        return f"❌ Error saving feedback: {str(e)}"

 import pandas as pd
 import plotly.express as px
 from datetime import datetime
 import os
+import time
+from PIL import Image
+import numpy as np
+# Early import for kaleido
+import kaleido
+# ====================== SALES INSIGHTS ======================
+def sales_insights(file_path: str, analysis_type: str = "region"):
     try:
+        # Read data
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
         else:
             df = pd.read_excel(file_path)
+        required = ["Region", "Sales", "Product", "Profit", "Date"]
+        missing = [col for col in required if col not in df.columns]
+        if missing:
+            raise ValueError(f"Missing columns: {missing}")
+        for col in ["Sales", "Profit"]:
+            df[col] = pd.to_numeric(df[col], errors='coerce')
+        df = df.dropna(subset=["Sales", "Profit"]).copy()
+        # Analysis
+        if analysis_type == "region":
+            summary = df.groupby("Region", as_index=False)["Sales"].sum()
+            fig = px.bar(summary, x="Region", y="Sales", title="Sales by Region", color="Region", text="Sales")
+        elif analysis_type == "month":
+            df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
+            df = df.dropna(subset=["Date"]).copy()
+            df["Month_Name"] = df["Date"].dt.strftime("%b %Y")
+            summary = df.groupby("Month_Name", as_index=False)["Sales"].sum()
+            fig = px.line(summary, x="Month_Name", y="Sales", title="Monthly Sales Trend", markers=True)
+        elif analysis_type == "product":
+            summary = df.groupby("Product", as_index=False)["Sales"].sum()
+            fig = px.bar(summary, x="Product", y="Sales", title="Sales by Product", color="Product", text="Sales")
+        elif analysis_type == "profit":
+            summary = df.groupby("Product", as_index=False)["Profit"].sum()
+            fig = px.bar(summary, x="Product", y="Profit", title="Profit by Product", color="Product", text="Profit")
+        elif analysis_type == "top5_profit":
+            summary = df.groupby("Product", as_index=False)["Profit"].sum().nlargest(5, "Profit")
+            fig = px.bar(summary, x="Product", y="Profit", title="Top 5 Products by Profit", color="Product", text="Profit")
+        elif analysis_type == "low5_sales":
+            summary = df.groupby("Product", as_index=False)["Sales"].sum().nsmallest(5, "Sales")
+            fig = px.bar(summary, x="Product", y="Sales", title="Bottom 5 Products by Sales", color="Product", text="Sales")
         else:
+            summary = df.groupby("Product", as_index=False)["Sales"].sum()
+            fig = px.bar(summary, x="Product", y="Sales", title="Sales by Product", color="Product", text="Sales")
+        fig.update_layout(xaxis_tickangle=-45, height=600, title_x=0.5, template="plotly_white")
+        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
+        # ====================== SAVE IMAGE (Safe with Fallbacks) ======================
+        chart_path = "output_chart.png"
+        data_path = "output_data.xlsx"
+        # Try 1: Normal write_image
         try:
+            fig.write_image(chart_path, width=1100, height=650, scale=1.2)
+        except:
+            # Try 2: Without scale
+            try:
+                fig.write_image(chart_path, width=1100, height=650)
+            except:
+                # Try 3: Create blank image as fallback
+                try:
+                    blank = Image.new('RGB', (1100, 650), color='#f0f0f0')
+                    blank.save(chart_path)
+                    print("Warning: Used blank image as fallback")
+                except:
+                    pass
+        # Save Excel
+        summary.to_excel(data_path, index=False)
+        return fig, chart_path, data_path
     except Exception as e:
+        raise ValueError(f"Analysis failed: {str(e)}")
+# Feedback function (already safe from previous version)
 def save_feedback(name: str, comment: str, stars: int) -> str:
     feedback_file = "feedback.xlsx"
     try:
+        stars = max(1, min(5, int(stars)))
+    except:
         stars = 3
     new_entry = {
         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "Name": str(name).strip()[:100] if name else "Anonymous",
+        "Comment": str(comment).strip()[:800] if comment else "",
         "Stars": stars
     }
+    for _ in range(3):
+        try:
+            if os.path.exists(feedback_file):
+                df = pd.read_excel(feedback_file)
+            else:
+                df = pd.DataFrame(columns=["Timestamp", "Name", "Comment", "Stars"])
+            df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
+            df.to_excel(feedback_file, index=False)
+            return "✅ Thank you! Your feedback has been saved successfully."
+        except:
+            time.sleep(0.4)
+    return "❌ Could not save feedback. Please try again later."