logfiles_analysis / dataset_content.py
Jasper Siebelink
OC_SVM support
06b052d
import numpy as np
import json
def get_data_from_json() -> np.ndarray:
with open('cattle_log.json', 'r') as file:
log_content = json.load(file)['logs']
X = []
# Iterate over each log entry in the log_content
for log_entry in log_content:
# Extract and convert the necessary attributes
total_today_str = log_entry['distanceTraveled']['totalToday'].rstrip('m')
heart_rate = int(log_entry['healthData']['heartRate']) # Assuming heart rate is always an integer
weight_str = log_entry['healthData']['weight'].rstrip('kg')
# Convert the distance and weight to floating-point values
total_today = float(total_today_str) # Convert distance to float
weight = float(weight_str) # Convert weight to float
# Create a 3D vector for the current log entry and append it to the list of vectors
vector_3d = [total_today, heart_rate, weight]
X.append(vector_3d)
# Convert X into a NumPy array for easier slicing
X = np.array(X)
return X
def generate_random_data(num_dimensions, rng) -> np.ndarray:
# Generating a dataset with 100 points. 95 points are generated from a Gaussian distribution,
# and 5 points are anomalies added manually.
X = 0.3 * rng.randn(95, num_dimensions)
X = np.r_[X + 2, X - 2]
X_outliers = rng.uniform(low=-4, high=4, size=(5, num_dimensions))
X = np.r_[X, X_outliers]
return X