Spaces:

Anupam202224
/

InteractiveDataPipeline-A

Build error

File size: 4,455 Bytes

b780c60

import gradio as gr
import pandas as pd
import numpy as np
import time

# Mock user database for authentication (in production, this should be more secure)
users_db = {"admin": "password"}

# In-memory pipelines and logs
data_pipelines = {}
logs = []

def log_event(event):
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    logs.append(f"[{timestamp}] {event}")

# User Authentication function
def authenticate(username, password):
    if username in users_db and users_db[username] == password:
        log_event(f"User {username} logged in.")
        return True, f"Welcome, {username}!"
    else:
        return False, "Invalid username or password."

# Data ingestion function
def data_ingestion(data_file):
    try:
        df = pd.read_csv(data_file)  # Handle file uploads better in Hugging Face Spaces
        data_pipelines["data_ingestion"] = df
        log_event("Data ingestion completed successfully.")
        return df.head()
    except Exception as e:
        log_event(f"Data ingestion failed: {str(e)}")
        return f"Error: {str(e)}"

# Data transformation function (basic cleaning)
def data_transformation(clean_duplicates=True, fill_missing=None):
    if "data_ingestion" not in data_pipelines:
        return "No data ingested yet."
    
    df = data_pipelines["data_ingestion"].copy()

    if clean_duplicates:
        df = df.drop_duplicates()
    if fill_missing is not None:
        df = df.fillna(fill_missing)
    
    data_pipelines["data_transformation"] = df
    log_event("Data transformation completed successfully.")
    
    return df.head()

# Data orchestration: sequential task execution
def run_pipeline():
    if "data_ingestion" in data_pipelines and "data_transformation" in data_pipelines:
        log_event("Data pipeline executed successfully.")
        return "Pipeline executed successfully."
    else:
        log_event("Pipeline execution failed.")
        return "Pipeline execution failed. Please ensure all steps are completed."

# Data monitoring and logs
def monitor_pipeline():
    return logs

# Data visualization (displaying the current state of data)
def visualize_data():
    if "data_transformation" in data_pipelines:
        return data_pipelines["data_transformation"].head()
    else:
        return "No transformed data available."

# Gradio interface setup
def main():
    # Launch Gradio Interface
    with gr.Blocks() as demo:
        # Authentication
        with gr.Row():
            username_input = gr.Textbox(label="Username")
            password_input = gr.Textbox(label="Password", type="password")
            auth_button = gr.Button("Log In")
            auth_output = gr.Textbox(label="Login Message")
            auth_button.click(authenticate, inputs=[username_input, password_input], outputs=auth_output)
        
        # Ingestion section
        with gr.Row():
            ingest_button = gr.File(label="Upload CSV Data")
            ingest_submit_button = gr.Button("Submit Data")
            ingest_output = gr.Dataframe(label="Ingested Data")
            ingest_submit_button.click(data_ingestion, inputs=ingest_button, outputs=ingest_output)

        # Transformation section
        with gr.Row():
            clean_duplicates = gr.Checkbox(label="Remove Duplicates", value=True)
            fill_missing = gr.Textbox(label="Fill Missing Values (leave empty for no filling)")
            transform_button = gr.Button("Transform Data")
            transform_output = gr.Dataframe(label="Transformed Data")
            transform_button.click(data_transformation, inputs=[clean_duplicates, fill_missing], outputs=transform_output)

        # Orchestration section
        with gr.Row():
            pipeline_button = gr.Button("Run Pipeline")
            pipeline_output = gr.Textbox(label="Pipeline Status")
            pipeline_button.click(run_pipeline, outputs=pipeline_output)

        # Monitoring section
        with gr.Row():
            monitor_button = gr.Button("Monitor Pipeline")
            monitor_output = gr.Textbox(label="Logs")
            monitor_button.click(monitor_pipeline, outputs=monitor_output)

        # Visualization section
        with gr.Row():
            visualize_button = gr.Button("Visualize Data")
            visualize_output = gr.Dataframe(label="Data Preview")
            visualize_button.click(visualize_data, outputs=visualize_output)

    demo.launch()

# Launch the app
if __name__ == "__main__":
    main()