Source code for tempor.datasources.mivdp.utils.outlier_removal

"""Outlier removal utilities.

Based on:
https://github.com/healthylaife/MIMIC-IV-Data-Pipeline
``utils/outlier_removal.py``
"""

import numpy as np


[docs]def compute_outlier_imputation(arr, cut_off, left_thresh, impute): perc_up = np.percentile(arr, left_thresh) perc_down = np.percentile(arr, cut_off) # print(perc_up,perc_down) if impute: arr[arr < perc_up] = perc_up arr[arr > perc_down] = perc_down else: # print(arr[arr < perc_up].shape,arr[arr > perc_down].shape) arr[arr < perc_up] = np.nan arr[arr > perc_down] = np.nan return arr
[docs]def outlier_imputation(data, id_attribute, value_attribute, cut_off, left_thresh, impute): grouped = data.groupby([id_attribute])[value_attribute] # print(cut_off) for id_number, values in grouped: # pylint: disable=unused-variable # print("=========") # print(id_number) # print(values.max(),values.min(),values.mean()) index = values.index values = compute_outlier_imputation(values, cut_off, left_thresh, impute) data[value_attribute].iloc[index] = values data = data.dropna(subset=[value_attribute]) # print(data.shape) return data