SHAILJA1 commited on
Commit
2de3793
·
verified ·
1 Parent(s): 43b5326

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -76
app.py CHANGED
@@ -1,89 +1,50 @@
1
- # etl_superstore_dashboard.py
2
 
3
  import pandas as pd
4
  import gradio as gr
5
 
6
- # -------------------------------
7
- # Step 1: ETL Functions
8
- # -------------------------------
9
-
10
- def calculate_total_sales(df, quantity_col, sales_col, total_col="TotalSales"):
11
- """Calculate Total Sales as Sales * Quantity"""
12
- if quantity_col in df.columns and sales_col in df.columns:
13
- df[total_col] = df[quantity_col] * df[sales_col]
14
- return df
15
-
16
- def filter_orders(df, quantity_col=None, sales_col=None, min_qty=2, min_sales=0):
17
- """Filter orders by Quantity or Sales"""
18
- if quantity_col and quantity_col in df.columns:
19
- df = df[df[quantity_col] >= min_qty]
20
- if sales_col and sales_col in df.columns:
21
- df = df[df[sales_col] >= min_sales]
22
- return df
23
-
24
- def uppercase_customer(df, customer_col):
25
- """Uppercase Customer Names"""
26
- if customer_col in df.columns:
27
- df[customer_col] = df[customer_col].astype(str).str.upper()
28
- return df
29
-
30
- def apply_discount(df, total_col, discount_col, discounted_col="DiscountedSales"):
31
- """Apply Discount on Total Sales"""
32
- if total_col in df.columns and discount_col in df.columns:
33
- df[discounted_col] = df[total_col] * (1 - df[discount_col])
34
- return df
35
-
36
- # -------------------------------
37
- # Step 2: Main ETL Function
38
- # -------------------------------
39
-
40
- def superstore_etl(file, quantity_col, sales_col, customer_col, discount_col,
41
- task_selection, min_qty, min_sales):
42
- df = pd.read_csv(file.name)
43
 
44
- if "Calculate Total Sales" in task_selection:
45
- df = calculate_total_sales(df, quantity_col, sales_col)
46
 
47
- if "Filter Orders" in task_selection:
48
- df = filter_orders(df, quantity_col, sales_col, min_qty, min_sales)
 
 
49
 
50
- if "Uppercase Customer Names" in task_selection:
51
- df = uppercase_customer(df, customer_col)
52
 
53
- if "Apply Discounted Sales" in task_selection:
54
- df = apply_discount(df, "TotalSales", discount_col)
 
 
 
55
 
56
- return df
57
-
58
- # -------------------------------
59
- # Step 3: Gradio Interface
60
- # -------------------------------
61
-
62
- tasks = ["Calculate Total Sales", "Filter Orders",
63
- "Uppercase Customer Names", "Apply Discounted Sales"]
64
 
 
65
  iface = gr.Interface(
66
- fn=superstore_etl,
67
- inputs=[
68
- gr.File(label="Upload Global Superstore CSV"),
69
- gr.Textbox(label="Quantity Column Name", value="Quantity"),
70
- gr.Textbox(label="Sales Column Name", value="Sales"),
71
- gr.Textbox(label="Customer Column Name", value="Customer Name"),
72
- gr.Textbox(label="Discount Column Name", value="Discount"),
73
- gr.CheckboxGroup(choices=tasks, label="Select ETL Tasks"),
74
- gr.Number(label="Minimum Quantity (for filtering)", value=2),
75
- gr.Number(label="Minimum Sales (for filtering)", value=0)
76
- ],
77
- outputs=gr.DataFrame(label="Transformed Data"),
78
- title="Global Superstore ETL Dashboard",
79
- description="""
80
- Upload your Global Superstore dataset and define column names:
81
- - Quantity Column → for TotalSales calculation and filtering
82
- - Sales Column → for TotalSales calculation and filtering
83
- - Customer Column → for uppercase transformation
84
- - Discount Column → for discounted sales calculation
85
- Select ETL tasks and see results instantly.
86
- """
87
  )
88
 
89
- iface.launch()
 
 
1
+ # outlier_detection_gradio.py
2
 
3
  import pandas as pd
4
  import gradio as gr
5
 
6
+ # Dummy dataset
7
+ data = {
8
+ "Order ID": [101, 102, 103, 104, 105, 106],
9
+ "Customer Name": ["Alice", "Bob", "Charlie", "David", "Eva", "Frank"],
10
+ "Product": ["Laptop", "Mouse", "Keyboard", "Monitor", "Printer", "Tablet"],
11
+ "Quantity": [1, 2, 1, 3, 2, 50], # Notice 50 is an outlier
12
+ "Price": [1000, 20, 50, 300, 150, 200]
13
+ }
14
+
15
+ df = pd.DataFrame(data)
16
+
17
+ # Outlier detection function
18
+ def detect_outliers(column_name):
19
+ if column_name not in df.columns:
20
+ return pd.DataFrame({"Error": [f"Column '{column_name}' does not exist."]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ col_data = df[column_name]
 
23
 
24
+ # Using IQR method
25
+ Q1 = col_data.quantile(0.25)
26
+ Q3 = col_data.quantile(0.75)
27
+ IQR = Q3 - Q1
28
 
29
+ lower_bound = Q1 - 1.5 * IQR
30
+ upper_bound = Q3 + 1.5 * IQR
31
 
32
+ # Flag outliers
33
+ df_outliers = df.copy()
34
+ df_outliers["Outlier"] = df_outliers[column_name].apply(
35
+ lambda x: "Yes" if x < lower_bound or x > upper_bound else "No"
36
+ )
37
 
38
+ return df_outliers
 
 
 
 
 
 
 
39
 
40
+ # Gradio Interface
41
  iface = gr.Interface(
42
+ fn=detect_outliers,
43
+ inputs=gr.Textbox(label="Column Name for Outlier Detection", placeholder="e.g., Quantity"),
44
+ outputs=gr.Dataframe(label="Outlier Detection Result"),
45
+ title="Outlier Detection Demo",
46
+ description="Detects outliers in a specified column using the IQR method."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  )
48
 
49
+ if __name__ == "__main__":
50
+ iface.launch()