Spaces:

SHAILJA1
/

ETL

Sleeping

App Files Files Community

SHAILJA1 commited on Sep 16, 2025

Commit

2de3793

verified ·

1 Parent(s): 43b5326

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -76

app.py CHANGED Viewed

@@ -1,89 +1,50 @@
-# etl_superstore_dashboard.py
 import pandas as pd
 import gradio as gr
-# -------------------------------
-# Step 1: ETL Functions
-# -------------------------------
-def calculate_total_sales(df, quantity_col, sales_col, total_col="TotalSales"):
-    """Calculate Total Sales as Sales * Quantity"""
-    if quantity_col in df.columns and sales_col in df.columns:
-        df[total_col] = df[quantity_col] * df[sales_col]
-    return df
-def filter_orders(df, quantity_col=None, sales_col=None, min_qty=2, min_sales=0):
-    """Filter orders by Quantity or Sales"""
-    if quantity_col and quantity_col in df.columns:
-        df = df[df[quantity_col] >= min_qty]
-    if sales_col and sales_col in df.columns:
-        df = df[df[sales_col] >= min_sales]
-    return df
-def uppercase_customer(df, customer_col):
-    """Uppercase Customer Names"""
-    if customer_col in df.columns:
-        df[customer_col] = df[customer_col].astype(str).str.upper()
-    return df
-def apply_discount(df, total_col, discount_col, discounted_col="DiscountedSales"):
-    """Apply Discount on Total Sales"""
-    if total_col in df.columns and discount_col in df.columns:
-        df[discounted_col] = df[total_col] * (1 - df[discount_col])
-    return df
-# -------------------------------
-# Step 2: Main ETL Function
-# -------------------------------
-def superstore_etl(file, quantity_col, sales_col, customer_col, discount_col,
-                   task_selection, min_qty, min_sales):
-    df = pd.read_csv(file.name)
-    if "Calculate Total Sales" in task_selection:
-        df = calculate_total_sales(df, quantity_col, sales_col)
-    if "Filter Orders" in task_selection:
-        df = filter_orders(df, quantity_col, sales_col, min_qty, min_sales)
-    if "Uppercase Customer Names" in task_selection:
-        df = uppercase_customer(df, customer_col)
-    if "Apply Discounted Sales" in task_selection:
-        df = apply_discount(df, "TotalSales", discount_col)
-    return df
-# -------------------------------
-# Step 3: Gradio Interface
-# -------------------------------
-tasks = ["Calculate Total Sales", "Filter Orders",
-         "Uppercase Customer Names", "Apply Discounted Sales"]
 iface = gr.Interface(
-    fn=superstore_etl,
-    inputs=[
-        gr.File(label="Upload Global Superstore CSV"),
-        gr.Textbox(label="Quantity Column Name", value="Quantity"),
-        gr.Textbox(label="Sales Column Name", value="Sales"),
-        gr.Textbox(label="Customer Column Name", value="Customer Name"),
-        gr.Textbox(label="Discount Column Name", value="Discount"),
-        gr.CheckboxGroup(choices=tasks, label="Select ETL Tasks"),
-        gr.Number(label="Minimum Quantity (for filtering)", value=2),
-        gr.Number(label="Minimum Sales (for filtering)", value=0)
-    ],
-    outputs=gr.DataFrame(label="Transformed Data"),
-    title="Global Superstore ETL Dashboard",
-    description="""
-    Upload your Global Superstore dataset and define column names:
-    - Quantity Column → for TotalSales calculation and filtering
-    - Sales Column → for TotalSales calculation and filtering
-    - Customer Column → for uppercase transformation
-    - Discount Column → for discounted sales calculation
-    Select ETL tasks and see results instantly.
-    """
 )
-iface.launch()

+# outlier_detection_gradio.py
 import pandas as pd
 import gradio as gr
+# Dummy dataset
+data = {
+    "Order ID": [101, 102, 103, 104, 105, 106],
+    "Customer Name": ["Alice", "Bob", "Charlie", "David", "Eva", "Frank"],
+    "Product": ["Laptop", "Mouse", "Keyboard", "Monitor", "Printer", "Tablet"],
+    "Quantity": [1, 2, 1, 3, 2, 50],  # Notice 50 is an outlier
+    "Price": [1000, 20, 50, 300, 150, 200]
+}
+df = pd.DataFrame(data)
+# Outlier detection function
+def detect_outliers(column_name):
+    if column_name not in df.columns:
+        return pd.DataFrame({"Error": [f"Column '{column_name}' does not exist."]})
+    col_data = df[column_name]
+    # Using IQR method
+    Q1 = col_data.quantile(0.25)
+    Q3 = col_data.quantile(0.75)
+    IQR = Q3 - Q1
+    lower_bound = Q1 - 1.5 * IQR
+    upper_bound = Q3 + 1.5 * IQR
+    # Flag outliers
+    df_outliers = df.copy()
+    df_outliers["Outlier"] = df_outliers[column_name].apply(
+        lambda x: "Yes" if x < lower_bound or x > upper_bound else "No"
+    )
+    return df_outliers
+# Gradio Interface
 iface = gr.Interface(
+    fn=detect_outliers,
+    inputs=gr.Textbox(label="Column Name for Outlier Detection", placeholder="e.g., Quantity"),
+    outputs=gr.Dataframe(label="Outlier Detection Result"),
+    title="Outlier Detection Demo",
+    description="Detects outliers in a specified column using the IQR method."
 )
+if __name__ == "__main__":
+    iface.launch()