import sys import json import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.tree import DecisionTreeClassifier from sklearn.cluster import KMeans def perform_analysis(data_payload): """ Expects data_payload to be a dict: { "data": [{...}, {...}], # List of records "type": "regression" | "classification" | "clustering", "params": { "features": ["col1", "col2"], "target": "target_col", "k": 3 # for clustering } } """ try: df = pd.DataFrame(data_payload['data']) analysis_type = data_payload.get('type', 'regression') params = data_payload.get('params', {}) result = {} if analysis_type == 'regression': features = params.get('features', []) target = params.get('target') if not features or not target: return {"error": "Missing features or target for regression"} X = df[features].values y = df[target].values # Simple Linear for now, can extend to Poly model = LinearRegression() model.fit(X, y) score = model.score(X, y) predictions = model.predict(X).tolist() result = { "r_squared": score, "coefficients": model.coef_.tolist(), "intercept": model.intercept_, "predictions": predictions } elif analysis_type == 'clustering': features = params.get('features', []) k = int(params.get('k', 3)) if not features: return {"error": "Missing features for clustering"} X = df[features].values kmeans = KMeans(n_clusters=k, n_init=10) kmeans.fit(X) result = { "clusters": kmeans.labels_.tolist(), "centers": kmeans.cluster_centers_.tolist() } return {"success": True, "data": result} except Exception as e: return {"success": False, "error": str(e)} if __name__ == "__main__": # Read from stdin try: input_str = sys.stdin.read() if not input_str: print(json.dumps({"success": False, "error": "No input data received"})) sys.exit(1) payload = json.loads(input_str) output = perform_analysis(payload) print(json.dumps(output)) except Exception as e: print(json.dumps({"success": False, "error": f"System Error: {str(e)}"}))