File size: 2,485 Bytes
565a379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import numpy as np
import logging
from typing import Dict, Any, List, Optional

logger = logging.getLogger(__name__)

class DataProcessor:
    """
    Handles data processing and statistical analysis.
    Uses NumPy and Pandas for robust calculations.
    """

    def __init__(self):
        pass

    def calculate_stats(self, data: List[float]) -> Dict[str, float]:
        """
        Calculate basic statistical metrics for a list of numbers.
        """
        if not data:
            return {}

        try:
            arr = np.array(data)
            stats = {
                "mean": float(np.mean(arr)),
                "median": float(np.median(arr)),
                "std_dev": float(np.std(arr)),
                "min": float(np.min(arr)),
                "max": float(np.max(arr)),
                "variance": float(np.var(arr))
            }
            return stats
        except Exception as e:
            logger.error(f"Error calculating stats: {e}")
            return {"error": str(e)}

    def analyze_timeseries(self, dates: List[Any], values: List[float]) -> Dict[str, Any]:
        """
        Analyze time series data using Pandas.
        """
        if not dates or not values or len(dates) != len(values):
            return {"error": "Invalid input data"}

        try:
            df = pd.DataFrame({"date": dates, "value": values})
            # parse dates if strings
            if isinstance(dates[0], str):
                df["date"] = pd.to_datetime(df["date"])
            
            df.set_index("date", inplace=True)
            
            # Calculate rolling metrics
            df["rolling_mean_7d"] = df["value"].rolling(window=7, min_periods=1).mean()
            
            # Trend (simple linear regression logic via numpy polyfit)
            # x as numeric timestamps
            x = np.arange(len(df))
            y = df["value"].values
            slope, intercept = np.polyfit(x, y, 1)
            
            trend = "stable"
            if slope > 0.05: trend = "increasing"
            elif slope < -0.05: trend = "decreasing"

            return {
                "trend": trend,
                "slope": float(slope),
                "latest_value": float(df["value"].iloc[-1]),
                "rolling_mean_last": float(df["rolling_mean_7d"].iloc[-1])
            }

        except Exception as e:
            logger.error(f"Error analyzing timeseries: {e}")
            return {"error": str(e)}