File size: 2,857 Bytes
34367da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys
import json
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans

def perform_analysis(data_payload):
    """

    Expects data_payload to be a dict:

    {

        "data": [{...}, {...}],  # List of records

        "type": "regression" | "classification" | "clustering",

        "params": {

            "features": ["col1", "col2"],

            "target": "target_col",

            "k": 3 # for clustering

        }

    }

    """
    try:
        df = pd.DataFrame(data_payload['data'])
        analysis_type = data_payload.get('type', 'regression')
        params = data_payload.get('params', {})
        
        result = {}

        if analysis_type == 'regression':
            features = params.get('features', [])
            target = params.get('target')
            
            if not features or not target:
                return {"error": "Missing features or target for regression"}

            X = df[features].values
            y = df[target].values
            
            # Simple Linear for now, can extend to Poly
            model = LinearRegression()
            model.fit(X, y)
            
            score = model.score(X, y)
            predictions = model.predict(X).tolist()
            
            result = {
                "r_squared": score,
                "coefficients": model.coef_.tolist(),
                "intercept": model.intercept_,
                "predictions": predictions
            }

        elif analysis_type == 'clustering':
            features = params.get('features', [])
            k = int(params.get('k', 3))
            
            if not features:
                return {"error": "Missing features for clustering"}
                
            X = df[features].values
            
            kmeans = KMeans(n_clusters=k, n_init=10)
            kmeans.fit(X)
            
            result = {
                "clusters": kmeans.labels_.tolist(),
                "centers": kmeans.cluster_centers_.tolist()
            }

        return {"success": True, "data": result}

    except Exception as e:
        return {"success": False, "error": str(e)}

if __name__ == "__main__":
    # Read from stdin
    try:
        input_str = sys.stdin.read()
        if not input_str:
            print(json.dumps({"success": False, "error": "No input data received"}))
            sys.exit(1)
            
        payload = json.loads(input_str)
        output = perform_analysis(payload)
        print(json.dumps(output))
    except Exception as e:
        print(json.dumps({"success": False, "error": f"System Error: {str(e)}"}))