File size: 4,455 Bytes
b780c60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import pandas as pd
import numpy as np
import time

# Mock user database for authentication (in production, this should be more secure)
users_db = {"admin": "password"}

# In-memory pipelines and logs
data_pipelines = {}
logs = []

def log_event(event):
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    logs.append(f"[{timestamp}] {event}")

# User Authentication function
def authenticate(username, password):
    if username in users_db and users_db[username] == password:
        log_event(f"User {username} logged in.")
        return True, f"Welcome, {username}!"
    else:
        return False, "Invalid username or password."

# Data ingestion function
def data_ingestion(data_file):
    try:
        df = pd.read_csv(data_file)  # Handle file uploads better in Hugging Face Spaces
        data_pipelines["data_ingestion"] = df
        log_event("Data ingestion completed successfully.")
        return df.head()
    except Exception as e:
        log_event(f"Data ingestion failed: {str(e)}")
        return f"Error: {str(e)}"

# Data transformation function (basic cleaning)
def data_transformation(clean_duplicates=True, fill_missing=None):
    if "data_ingestion" not in data_pipelines:
        return "No data ingested yet."
    
    df = data_pipelines["data_ingestion"].copy()

    if clean_duplicates:
        df = df.drop_duplicates()
    if fill_missing is not None:
        df = df.fillna(fill_missing)
    
    data_pipelines["data_transformation"] = df
    log_event("Data transformation completed successfully.")
    
    return df.head()

# Data orchestration: sequential task execution
def run_pipeline():
    if "data_ingestion" in data_pipelines and "data_transformation" in data_pipelines:
        log_event("Data pipeline executed successfully.")
        return "Pipeline executed successfully."
    else:
        log_event("Pipeline execution failed.")
        return "Pipeline execution failed. Please ensure all steps are completed."

# Data monitoring and logs
def monitor_pipeline():
    return logs

# Data visualization (displaying the current state of data)
def visualize_data():
    if "data_transformation" in data_pipelines:
        return data_pipelines["data_transformation"].head()
    else:
        return "No transformed data available."

# Gradio interface setup
def main():
    # Launch Gradio Interface
    with gr.Blocks() as demo:
        # Authentication
        with gr.Row():
            username_input = gr.Textbox(label="Username")
            password_input = gr.Textbox(label="Password", type="password")
            auth_button = gr.Button("Log In")
            auth_output = gr.Textbox(label="Login Message")
            auth_button.click(authenticate, inputs=[username_input, password_input], outputs=auth_output)
        
        # Ingestion section
        with gr.Row():
            ingest_button = gr.File(label="Upload CSV Data")
            ingest_submit_button = gr.Button("Submit Data")
            ingest_output = gr.Dataframe(label="Ingested Data")
            ingest_submit_button.click(data_ingestion, inputs=ingest_button, outputs=ingest_output)

        # Transformation section
        with gr.Row():
            clean_duplicates = gr.Checkbox(label="Remove Duplicates", value=True)
            fill_missing = gr.Textbox(label="Fill Missing Values (leave empty for no filling)")
            transform_button = gr.Button("Transform Data")
            transform_output = gr.Dataframe(label="Transformed Data")
            transform_button.click(data_transformation, inputs=[clean_duplicates, fill_missing], outputs=transform_output)

        # Orchestration section
        with gr.Row():
            pipeline_button = gr.Button("Run Pipeline")
            pipeline_output = gr.Textbox(label="Pipeline Status")
            pipeline_button.click(run_pipeline, outputs=pipeline_output)

        # Monitoring section
        with gr.Row():
            monitor_button = gr.Button("Monitor Pipeline")
            monitor_output = gr.Textbox(label="Logs")
            monitor_button.click(monitor_pipeline, outputs=monitor_output)

        # Visualization section
        with gr.Row():
            visualize_button = gr.Button("Visualize Data")
            visualize_output = gr.Dataframe(label="Data Preview")
            visualize_button.click(visualize_data, outputs=visualize_output)

    demo.launch()

# Launch the app
if __name__ == "__main__":
    main()