mininato's picture
Update app.py
e76db24 verified
import gradio as gr
from pipeline_classes import CreateCombinedDataFrame, ScaleXYZData, ExtractFeatures, TrainModel, ClassifyMovementData, LowPassFilter, PCAHandler
from sklearn.pipeline import Pipeline
from _config import config
import pandas as pd
import numpy as np
import json
# Define pipelines
combining_dataframes_pipeline = Pipeline([
#('import_data', ImportData(use_accel=True, use_reports=True, use_combined=False, use_features=False)),
('create_combined_dataframe', CreateCombinedDataFrame(time_window=config["time_window"], label_columns=config["label_columns"])),
])
feature_extraction_pipeline = Pipeline([
#('import_data', ImportData(use_accel=False, use_reports=False, use_combined=True, use_features=False)),
('low_pass_filter', LowPassFilter(cutoff_frequency=config["cutoff_frequency"], sampling_rate=config["data_frequency"], order=config["order"])),
('scale_xyz_data', ScaleXYZData(scaler_type=config["scaler_type"])),
('extract_features', ExtractFeatures(window_length=config["window_length"],
window_step_size=config["window_step_size"],
data_frequency=config["data_frequency"],
selected_domains=config["selected_domains"],
include_magnitude=config["include_magnitude"],
label_columns=config["label_columns"])),
])
training_model_pipeline = Pipeline([
#('import_data', ImportData(use_accel=False, use_reports=False, use_combined=False, use_features=True)),
('pca_handler', PCAHandler(apply_pca=config["apply_pca"], variance=config["pca_variance"])),
('train_model', TrainModel(config=config)),
])
analyzing_data_pipeline = Pipeline([
#('import_data', ImportData(use_accel=True, use_reports=False, use_combined=False, use_features=False)),
('low_pass_filter', LowPassFilter(cutoff_frequency=config["cutoff_frequency"], sampling_rate=config["data_frequency"], order=config["order"])),
('scale_xyz_data', ScaleXYZData(scaler_type=config["scaler_type"])),
('extract_features', ExtractFeatures(window_length=config['window_length'], window_step_size=config["window_step_size"], data_frequency=config["data_frequency"],
selected_domains=config['selected_domains'], include_magnitude=config['include_magnitude'])),
('classify_movement_data', ClassifyMovementData()),
])
complete_training_model_pipeline = Pipeline([
#('import_data', ImportData(use_accel=True, use_reports=True, use_combined=False, use_features=False)),
('create_combined_dataframe', CreateCombinedDataFrame(time_window=config["time_window"], label_columns=config["label_columns"])),
('low_pass_filter', LowPassFilter(cutoff_frequency=config["cutoff_frequency"], sampling_rate=config["data_frequency"], order=config["order"])),
('scale_xyz_data', ScaleXYZData(scaler_type=config["scaler_type"])),
('extract_features', ExtractFeatures(window_length=config["window_length"],
window_step_size=config["window_step_size"],
data_frequency=config["data_frequency"],
selected_domains=config["selected_domains"],
include_magnitude=config["include_magnitude"],
label_columns=config["label_columns"])),
('pca_handler', PCAHandler(apply_pca=config["apply_pca"], variance=config["pca_variance"])),
('train_model', TrainModel(config=config)),
])
def execute_pipeline(pipeline_name, accel_file, report_file, combined_file, features_file, model_file):
try:
# Load data files only if paths are valid
accel_data = pd.read_csv(accel_file) if accel_file else None
report_data = pd.read_csv(report_file) if report_file else None
combined_data = pd.read_csv(combined_file) if combined_file else None
features_data = pd.read_csv(features_file) if features_file else None
output_file = None
secondary_output_file = None
# Validate inputs for the selected pipeline
if pipeline_name == "Combine DataFrames":
if accel_data is None or report_data is None:
return "Error: Both accelerometer and self-report data files are required for this pipeline.", None
print(report_data)
X = report_data, accel_data
print("start transform")
result = combining_dataframes_pipeline.fit_transform(X)
print("hallo3")
output_file = "combine_dataframes_output.csv"
secondary_output_file = None
print("hallo4")
result.to_csv(output_file, index=False)
print("hallo5")
elif pipeline_name == "Extract Features":
if combined_data is None:
return "Error: Combined data file is required for this pipeline.", None
result = feature_extraction_pipeline.fit_transform(combined_data)
output_file = "extract_features_output.csv"
secondary_output_file = None
result.to_csv(output_file, index=False)
elif pipeline_name == "Train Model":
if features_data is None:
return "Error: Features data file is required for this pipeline.", None
training_model_pipeline.fit(features_data)
output_file, secondary_output_file = training_model_pipeline.named_steps['train_model'].get_output_files()
elif pipeline_name == "Analyze Data":
if accel_data is None:
return "Error: Accelerometer data file is required for this pipeline.", None
result = analyzing_data_pipeline.fit_transform(accel_data)
output_file = "analyze_data_output.csv"
secondary_output_file = None
result.to_csv(output_file, index=False)
else:
return "Invalid pipeline selected.", None
return output_file, secondary_output_file
except Exception as e:
print(f"Error occurred: {str(e)}")
return str(e), None
# Function to update the configuration
def update_config(label_columns, target_label, time_window, window_length, window_step_size, data_frequency, selected_domains, include_magnitude, cutoff_frequency, order, scaler_type, apply_pca, pca_variance, classifier):
config.update({
"label_columns": label_columns.split(","),
"target_label": target_label,
"time_window": time_window,
"window_length": window_length,
"window_step_size": window_step_size,
"data_frequency": data_frequency,
"selected_domains": selected_domains.split(",") if selected_domains else None,
"include_magnitude": include_magnitude,
"cutoff_frequency": cutoff_frequency,
"order": order,
"scaler_type": scaler_type,
"apply_pca": apply_pca,
"pca_variance": pca_variance,
"classifier": classifier
})
with open('_config.json', 'w') as f:
json.dump(config, f, indent=4)
return "Configuration updated successfully!"
# Gradio Blocks Interface
with gr.Blocks() as demo:
with gr.Tab("Import Data"):
accel_file = gr.File(label="Upload Accelerometer Data")
report_file = gr.File(label="Upload Self-Report Data")
combined_file = gr.File(label="Upload Combined Data")
features_file = gr.File(label="Upload Features Data")
model_file = gr.File(label="Upload Trained Model")
with gr.Tab("Update Configuration"):
label_columns = gr.Textbox(label="Label Columns (comma-separated)", value="valence,arousal")
target_label = gr.Textbox(label="Target Label", value="arousal")
time_window = gr.Number(label="Time Window (minutes)", value=2)
window_length = gr.Number(label="Window Length (seconds)", value=60)
window_step_size = gr.Number(label="Window Step Size (seconds)", value=20)
data_frequency = gr.Number(label="Data Frequency (Hz)", value=25)
selected_domains = gr.Textbox(label="Selected Domains (comma-separated)", value="")
include_magnitude = gr.Checkbox(label="Include Magnitude", value=True)
cutoff_frequency = gr.Number(label="Cutoff Frequency", value=10)
order = gr.Number(label="Order", value=4)
scaler_type = gr.Dropdown(label="Scaler Type", choices=["standard", "minmax"], value="standard")
apply_pca = gr.Checkbox(label="Apply PCA", value=False)
pca_variance = gr.Number(label="PCA Variance", value=0.95)
classifier = gr.Dropdown(label="Classifier", choices=["xgboost", "svm", "randomforest"], value="xgboost")
update_button = gr.Button("Update Configuration")
update_output = gr.Textbox(label="Update Output")
update_button.click(update_config, inputs=[label_columns, target_label, time_window, window_length, window_step_size, data_frequency, selected_domains, include_magnitude, cutoff_frequency, order, scaler_type, apply_pca, pca_variance, classifier], outputs=update_output)
with gr.Tab("Execute Pipeline"):
pipeline_name = gr.Dropdown(label="Select Pipeline", choices=["Combine DataFrames", "Extract Features", "Train Model", "Analyze Data"])
execute_button = gr.Button("Execute Pipeline")
output_file = gr.File(label="Download Output (CSV or Model File)")
secondary_output_file = gr.File(label="Download Metadata (JSON)")
execute_button.click(
execute_pipeline,
inputs=[pipeline_name, accel_file, report_file, combined_file, features_file, model_file],
outputs=[output_file, secondary_output_file]
)
demo.launch()