(convert bpm to peaks as needed)
# Do all package imports
import neurokit2 as nk
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy.io
from pathlib import Path
from scipy.stats import variation
from scipy import stats
from hmmlearn import hmm
import re
filepath_data=Path.cwd()/"data/heartrate"
filepath_sleep_labels=Path.cwd()/"data/sleep_labels"
bpm_files = [f for f in sorted(filepath_data.iterdir()) #create a list of relevant files in directory
if f.suffix == '.txt']
sleep_label_files = [f for f in sorted(filepath_sleep_labels.iterdir()) #create a list of relevant files in directory
if f.suffix == '.txt']
# Read one file at a time using the above list [bpm_files], clean, convert to RRI
# Proceed with the steps below: HRV compute, save
def read_bpm_file(bpm_files, i,sampling_rate):
# Import raw HR file in bpm
bpm_raw=pd.read_csv(bpm_files[i])
# convert bpm to rri followed by rri to peaks
rri_raw=60000/bpm_raw.iloc[:,1]
peaks=nk.intervals_to_peaks(rri_raw)
# Artifact removal using simple neurokit thresholds
#clean_peaks=nk.signal_fixpeaks(peaks=peaks, interval_min=0.4,interval_max=1.5, method="neurokit") #allow 40-150 bpm
# Convert now to clean RRI
#rri = peaks_to_rri(clean_peaks, sampling_rate, interpolate=False)
return peaks
Compute N3 REM duration as example;
The script can be changed easily to study systematically sleep state architecture:
wake = 0, N1 = 1, N2 = 2, N3 = 3, REM = 5
#get sleep stats
def duration_sleep(sleep_label_data, SubjectID):
#get duration of sleep label file in hours from last value in "Time, s" column
sleep_duration=sleep_label_data.iloc[-1][0]/3600
#select only N3 deep sleep (3rd stage, deepest sleep)
N3_nrem=sleep_label_data.loc[sleep_label_data['Sleep label'] == 3]
#Get duration of N3 nrem in minutes by determining the number of rows with this condition;
#each label is assigned every 30 seconds
duration_N3_deep_sleep=len(sleep_label_data[sleep_label_data['Sleep label'] == 3.0])*30/60
# do that for other sleep states
N2_NREM=sleep_label_data.loc[sleep_label_data['Sleep label'] == 2]
duration_N2_deep_sleep=len(sleep_label_data[sleep_label_data['Sleep label'] == 2.0])*30/60
N1_NREM=sleep_label_data.loc[sleep_label_data['Sleep label'] == 1]
duration_N1_deep_sleep=len(sleep_label_data[sleep_label_data['Sleep label'] == 1.0])*30/60
REM=sleep_label_data.loc[sleep_label_data['Sleep label'] == 5]
duration_REM_sleep=len(sleep_label_data[sleep_label_data['Sleep label'] == 5.0])*30/60
awake=sleep_label_data.loc[sleep_label_data['Sleep label'] == 0]
duration_awake=len(sleep_label_data[sleep_label_data['Sleep label'] == 0])*30/60
#concatenate SubjectID,sleep state duration metrics; save later as one cohort sleep_df > Excel
sleep_statistics=[SubjectID,sleep_duration, duration_N3_deep_sleep,duration_N2_deep_sleep,duration_N1_deep_sleep,duration_REM_sleep,duration_awake]
return sleep_statistics #sleep_stats
# deal with last column which is string and needs to be skipped
def skip_last_column(lst):
# unpack the list of lists
def Extract(lst):
return [item[0] for item in lst]
# check for string in the first sublist (all I need to decide to skip it for numpy operations)
element_to_check=Extract(lst)[0]
return isinstance(element_to_check, str) #return Boolean for presence of string in the sublist
def compute_basic_stats(ts_data, SubjectID):
# compute mean and variation
# assuming "ts_data" is where my HRV metric values list is per subject
HRV_mean=np.mean(ts_data.values.tolist())
HRV_coeff_variation=variation(ts_data.values.tolist())
# this function works similar to variation() but works purely with numpy
# cv = lambda x: np.std(x) / np.mean(x)
# First quartile (Q1)
Q1 = np.percentile(ts_data, 25, interpolation = 'midpoint')
# Third quartile (Q3)
Q3 = np.percentile(ts_data, 75, interpolation = 'midpoint')
# Interquaritle range (IQR)
IQR = Q3 - Q1
midhinge = (Q3 + Q1)/2
quartile_coefficient_dispersion = (IQR/2)/midhinge
# adding entropy estimate
# HRV_ts_entropy=nk.entropy_sample(ts_data)
# yielding error "could not broadcast input array from shape (7,1) into shape (7)" | the following syntax fixes that and is more elegant in that it estimates optimal delay
# optimal_complexity_parameters = nk.complexity_delay(ts_data.to_numpy, delay_max=6, method='fraser1986', show=False)
# HRV_ts_entropy=nk.entropy_fuzzy(ts_data.to_numpy, delay=optimal_complexity_parameters)
# still yielding len error
#HRV_ts_entropy=nk.entropy_shannon(ts_data)
basic_stats=[SubjectID, HRV_mean, HRV_coeff_variation[0], quartile_coefficient_dispersion] #, HRV_ts_entropy
#print(basic_stats)
return basic_stats
#HMM Model
def do_hmm(ts_data):
#ts_data=numpy.array(data)
gm = hmm.GaussianHMM(n_components=2)
gm.fit(ts_data.reshape(-1, 1))
hmm_states = gm.predict(ts_data.reshape(-1, 1))
#hmm_states=[states.tolist()]
print(hmm_states)
return hmm_states # next, add _states_ iteratively for all subjects to states_Uber list to spot patterns
#hrv_nonlinear_edited
from warnings import warn
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
from neurokit2.complexity import (complexity_lempelziv, entropy_approximate,
entropy_fuzzy, entropy_multiscale, entropy_sample,
entropy_shannon, fractal_correlation, fractal_dfa,
fractal_higuchi, fractal_katz)
from neurokit2.misc import NeuroKitWarning, find_consecutive
from neurokit2.signal import signal_zerocrossings
from neurokit2.hrv.hrv_utils import _hrv_get_rri, _hrv_sanitize_input
def hrv_nonlinear_edited(peaks, sampling_rate=sampling_rate, show=False, **kwargs):
# Sanitize input
peaks = _hrv_sanitize_input(peaks)
if isinstance(peaks, tuple): # Detect actual sampling rate
peaks, sampling_rate = peaks[0], peaks[1]
# Compute R-R intervals (also referred to as NN) in milliseconds
rri, _ = _hrv_get_rri(peaks, sampling_rate=sampling_rate, interpolate=False)
# Initialize empty container for results
out = {}
# Poincaré features (SD1, SD2, etc.)
out = _hrv_nonlinear_poincare(rri, out)
# Heart Rate Fragmentation
out = _hrv_nonlinear_fragmentation(rri, out)
# Heart Rate Asymmetry
out = _hrv_nonlinear_poincare_hra(rri, out)
# DFA
out = _hrv_dfa(peaks, rri, out, **kwargs)
# Complexity
tolerance = 0.2 * np.std(rri, ddof=1) #could be replaced with the individually computer tolerance value
out["ApEn"], _ = entropy_approximate(rri, delay=1, dimension=2, tolerance=tolerance)
out["SampEn"], _ = entropy_sample(rri, delay=1, dimension=2, tolerance=tolerance)
out["ShanEn"], _ = entropy_shannon(rri)
#out["FuzzyEn"], _ = entropy_fuzzy(rri, delay=1, dimension=2, tolerance=tolerance)
out["MSEn"], _ = entropy_multiscale(rri, delay=1, dimension=2, method="MSEn")
out["CMSEn"], _ = entropy_multiscale(rri, dimension=2, tolerance=tolerance, method="CMSEn")
out["RCMSEn"], _ = entropy_multiscale(rri, dimension=2, tolerance=tolerance, method="RCMSEn")
out["CD"], _ = fractal_correlation(rri, delay=1, dimension=2, **kwargs)
out["HFD"], _ = fractal_higuchi(rri, k_max=10, **kwargs)
out["KFD"], _ = fractal_katz(rri)
out["LZC"], _ = complexity_lempelziv(rri, **kwargs)
if show:
_hrv_nonlinear_show(rri, out)
out = pd.DataFrame.from_dict(out, orient="index").T.add_prefix("HRV_")
return out
# =============================================================================
# Get SD1 and SD2
# =============================================================================
def _hrv_nonlinear_poincare(rri, out):
"""Compute SD1 and SD2.
- Do existing measures of Poincare plot geometry reflect nonlinear features of heart rate
variability? - Brennan (2001)
"""
# HRV and hrvanalysis
rri_n = rri[:-1]
rri_plus = rri[1:]
x1 = (rri_n - rri_plus) / np.sqrt(2) # Eq.7
x2 = (rri_n + rri_plus) / np.sqrt(2)
sd1 = np.std(x1, ddof=1)
sd2 = np.std(x2, ddof=1)
out["SD1"] = sd1
out["SD2"] = sd2
# SD1 / SD2
out["SD1SD2"] = sd1 / sd2
# Area of ellipse described by SD1 and SD2
out["S"] = np.pi * out["SD1"] * out["SD2"]
# CSI / CVI
T = 4 * out["SD1"]
L = 4 * out["SD2"]
out["CSI"] = L / T
out["CVI"] = np.log10(L * T)
out["CSI_Modified"] = L ** 2 / T
return out
def _hrv_nonlinear_poincare_hra(rri, out):
"""Heart Rate Asymmetry Indices.
- Asymmetry of Poincaré plot (or termed as heart rate asymmetry, HRA) - Yan (2017)
- Asymmetric properties of long-term and total heart rate variability - Piskorski (2011)
"""
N = len(rri) - 1
x = rri[:-1] # rri_n, x-axis
y = rri[1:] # rri_plus, y-axis
diff = y - x
decelerate_indices = np.where(diff > 0)[0] # set of points above IL where y > x
accelerate_indices = np.where(diff < 0)[0] # set of points below IL where y < x
nochange_indices = np.where(diff == 0)[0]
# Distances to centroid line l2
centroid_x = np.mean(x)
centroid_y = np.mean(y)
dist_l2_all = abs((x - centroid_x) + (y - centroid_y)) / np.sqrt(2)
# Distances to LI
dist_all = abs(y - x) / np.sqrt(2)
# Calculate the angles
theta_all = abs(np.arctan(1) - np.arctan(y / x)) # phase angle LI - phase angle of i-th point
# Calculate the radius
r = np.sqrt(x ** 2 + y ** 2)
# Sector areas
S_all = 1 / 2 * theta_all * r ** 2
# Guzik's Index (GI)
den_GI = np.sum(dist_all)
num_GI = np.sum(dist_all[decelerate_indices])
out["GI"] = (num_GI / den_GI) * 100
# Slope Index (SI)
den_SI = np.sum(theta_all)
num_SI = np.sum(theta_all[decelerate_indices])
out["SI"] = (num_SI / den_SI) * 100
# Area Index (AI)
den_AI = np.sum(S_all)
num_AI = np.sum(S_all[decelerate_indices])
out["AI"] = (num_AI / den_AI) * 100
# Porta's Index (PI)
m = N - len(nochange_indices) # all points except those on LI
b = len(accelerate_indices) # number of points below LI
out["PI"] = (b / m) * 100
# Short-term asymmetry (SD1)
sd1d = np.sqrt(np.sum(dist_all[decelerate_indices] ** 2) / (N - 1))
sd1a = np.sqrt(np.sum(dist_all[accelerate_indices] ** 2) / (N - 1))
sd1I = np.sqrt(sd1d ** 2 + sd1a ** 2)
out["C1d"] = (sd1d / sd1I) ** 2
out["C1a"] = (sd1a / sd1I) ** 2
out["SD1d"] = sd1d # SD1 deceleration
out["SD1a"] = sd1a # SD1 acceleration
# out["SD1I"] = sd1I # SD1 based on LI, whereas SD1 is based on centroid line l1
# Long-term asymmetry (SD2)
longterm_dec = np.sum(dist_l2_all[decelerate_indices] ** 2) / (N - 1)
longterm_acc = np.sum(dist_l2_all[accelerate_indices] ** 2) / (N - 1)
longterm_nodiff = np.sum(dist_l2_all[nochange_indices] ** 2) / (N - 1)
sd2d = np.sqrt(longterm_dec + 0.5 * longterm_nodiff)
sd2a = np.sqrt(longterm_acc + 0.5 * longterm_nodiff)
sd2I = np.sqrt(sd2d ** 2 + sd2a ** 2)
out["C2d"] = (sd2d / sd2I) ** 2
out["C2a"] = (sd2a / sd2I) ** 2
out["SD2d"] = sd2d # SD2 deceleration
out["SD2a"] = sd2a # SD2 acceleration
# out["SD2I"] = sd2I # identical with SD2
# Total asymmerty (SDNN)
sdnnd = np.sqrt(0.5 * (sd1d ** 2 + sd2d ** 2)) # SDNN deceleration
sdnna = np.sqrt(0.5 * (sd1a ** 2 + sd2a ** 2)) # SDNN acceleration
sdnn = np.sqrt(sdnnd ** 2 + sdnna ** 2) # should be similar to sdnn in hrv_time
out["Cd"] = (sdnnd / sdnn) ** 2
out["Ca"] = (sdnna / sdnn) ** 2
out["SDNNd"] = sdnnd
out["SDNNa"] = sdnna
return out
def _hrv_nonlinear_fragmentation(rri, out):
"""Heart Rate Fragmentation Indices - Costa (2017)
The more fragmented a time series is, the higher the PIP, IALS, PSS, and PAS indices will be.
"""
diff_rri = np.diff(rri)
zerocrossings = signal_zerocrossings(diff_rri)
# Percentage of inflection points (PIP)
out["PIP"] = len(zerocrossings) / len(rri)
# Inverse of the average length of the acceleration/deceleration segments (IALS)
accelerations = np.where(diff_rri > 0)[0]
decelerations = np.where(diff_rri < 0)[0]
consecutive = find_consecutive(accelerations) + find_consecutive(decelerations)
lengths = [len(i) for i in consecutive]
out["IALS"] = 1 / np.average(lengths)
# Percentage of short segments (PSS) - The complement of the percentage of NN intervals in
# acceleration and deceleration segments with three or more NN intervals
out["PSS"] = np.sum(np.asarray(lengths) < 3) / len(lengths)
# Percentage of NN intervals in alternation segments (PAS). An alternation segment is a sequence
# of at least four NN intervals, for which heart rate acceleration changes sign every beat. We note
# that PAS quantifies the amount of a particular sub-type of fragmentation (alternation). A time
# series may be highly fragmented and have a small amount of alternation. However, all time series
# with large amount of alternation are highly fragmented.
alternations = find_consecutive(zerocrossings)
lengths = [len(i) for i in alternations]
out["PAS"] = np.sum(np.asarray(lengths) >= 4) / len(lengths)
return out
# =============================================================================
# DFA
# =============================================================================
def _hrv_dfa(peaks, rri, out, n_windows="default", **kwargs):
# if "dfa_windows" in kwargs:
# dfa_windows = kwargs["dfa_windows"]
# else:
# dfa_windows = [(4, 11), (12, None)]
# consider using dict.get() mthd directly
dfa_windows = kwargs.get("dfa_windows", [(4, 11), (12, None)])
# Determine max beats
if dfa_windows[1][1] is None:
max_beats = len(peaks) / 10
else:
max_beats = dfa_windows[1][1]
# No. of windows to compute for short and long term
if n_windows == "default":
n_windows_short = int(dfa_windows[0][1] - dfa_windows[0][0] + 1)
n_windows_long = int(max_beats - dfa_windows[1][0] + 1)
elif isinstance(n_windows, list):
n_windows_short = n_windows[0]
n_windows_long = n_windows[1]
# Compute DFA alpha1
short_window = np.linspace(dfa_windows[0][0], dfa_windows[0][1], n_windows_short).astype(int)
# For monofractal
out["DFA_alpha1"], _ = fractal_dfa(rri, multifractal=False, scale=short_window, **kwargs)
# For multifractal
mdfa_alpha1, _ = fractal_dfa(
rri, multifractal=True, q=np.arange(-5, 6), scale=short_window, **kwargs
)
for k in mdfa_alpha1.columns:
out["MFDFA_alpha1_" + k] = mdfa_alpha1[k].values[0]
# Compute DFA alpha2
# sanatize max_beats
if max_beats < dfa_windows[1][0] + 1:
warn(
"DFA_alpha2 related indices will not be calculated. "
"The maximum duration of the windows provided for the long-term correlation is smaller "
"than the minimum duration of windows. Refer to the `scale` argument in `nk.fractal_dfa()` "
"for more information.",
category=NeuroKitWarning,
)
return out
else:
long_window = np.linspace(dfa_windows[1][0], int(max_beats), n_windows_long).astype(int)
# For monofractal
out["DFA_alpha2"], _ = fractal_dfa(rri, multifractal=False, scale=long_window, **kwargs)
# For multifractal
mdfa_alpha2, _ = fractal_dfa(
rri, multifractal=True, q=np.arange(-5, 6), scale=long_window, **kwargs
)
for k in mdfa_alpha2.columns:
out["MFDFA_alpha2_" + k] = mdfa_alpha2[k].values[0]
return out
# =============================================================================
# Plot
# =============================================================================
def _hrv_nonlinear_show(rri, out, ax=None, ax_marg_x=None, ax_marg_y=None):
mean_heart_period = np.mean(rri)
sd1 = out["SD1"]
sd2 = out["SD2"]
if isinstance(sd1, pd.Series):
sd1 = float(sd1)
if isinstance(sd2, pd.Series):
sd2 = float(sd2)
# Poincare values
ax1 = rri[:-1]
ax2 = rri[1:]
# Set grid boundaries
ax1_lim = (max(ax1) - min(ax1)) / 10
ax2_lim = (max(ax2) - min(ax2)) / 10
ax1_min = min(ax1) - ax1_lim
ax1_max = max(ax1) + ax1_lim
ax2_min = min(ax2) - ax2_lim
ax2_max = max(ax2) + ax2_lim
# Prepare figure
if ax is None and ax_marg_x is None and ax_marg_y is None:
gs = matplotlib.gridspec.GridSpec(4, 4)
fig = plt.figure(figsize=(8, 8))
ax_marg_x = plt.subplot(gs[0, 0:3])
ax_marg_y = plt.subplot(gs[1:4, 3])
ax = plt.subplot(gs[1:4, 0:3])
gs.update(wspace=0.025, hspace=0.05) # Reduce spaces
plt.suptitle("Poincaré Plot")
else:
fig = None
# Create meshgrid
xx, yy = np.mgrid[ax1_min:ax1_max:100j, ax2_min:ax2_max:100j]
# Fit Gaussian Kernel
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([ax1, ax2])
kernel = scipy.stats.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)
cmap = matplotlib.cm.get_cmap("Blues", 10)
ax.contourf(xx, yy, f, cmap=cmap)
ax.imshow(np.rot90(f), extent=[ax1_min, ax1_max, ax2_min, ax2_max], aspect="auto")
# Marginal densities
ax_marg_x.hist(
ax1, bins=int(len(ax1) / 10), density=True, alpha=1, color="#ccdff0", edgecolor="none"
)
ax_marg_y.hist(
ax2,
bins=int(len(ax2) / 10),
density=True,
alpha=1,
color="#ccdff0",
edgecolor="none",
orientation="horizontal",
zorder=1,
)
kde1 = scipy.stats.gaussian_kde(ax1)
x1_plot = np.linspace(ax1_min, ax1_max, len(ax1))
x1_dens = kde1.evaluate(x1_plot)
ax_marg_x.fill(x1_plot, x1_dens, facecolor="none", edgecolor="#1b6aaf", alpha=0.8, linewidth=2)
kde2 = scipy.stats.gaussian_kde(ax2)
x2_plot = np.linspace(ax2_min, ax2_max, len(ax2))
x2_dens = kde2.evaluate(x2_plot)
ax_marg_y.fill_betweenx(
x2_plot, x2_dens, facecolor="none", edgecolor="#1b6aaf", linewidth=2, alpha=0.8, zorder=2
)
# Turn off marginal axes labels
ax_marg_x.axis("off")
ax_marg_y.axis("off")
# Plot ellipse
angle = 45
width = 2 * sd2 + 1
height = 2 * sd1 + 1
xy = (mean_heart_period, mean_heart_period)
ellipse = matplotlib.patches.Ellipse(
xy=xy, width=width, height=height, angle=angle, linewidth=2, fill=False
)
ellipse.set_alpha(0.5)
ellipse.set_facecolor("#2196F3")
ax.add_patch(ellipse)
# Plot points only outside ellipse
cos_angle = np.cos(np.radians(180.0 - angle))
sin_angle = np.sin(np.radians(180.0 - angle))
xc = ax1 - xy[0]
yc = ax2 - xy[1]
xct = xc * cos_angle - yc * sin_angle
yct = xc * sin_angle + yc * cos_angle
rad_cc = (xct ** 2 / (width / 2.0) ** 2) + (yct ** 2 / (height / 2.0) ** 2)
points = np.where(rad_cc > 1)[0]
ax.plot(ax1[points], ax2[points], "o", color="k", alpha=0.5, markersize=4)
# SD1 and SD2 arrow
sd1_arrow = ax.arrow(
mean_heart_period,
mean_heart_period,
float(-sd1 * np.sqrt(2) / 2),
float(sd1 * np.sqrt(2) / 2),
linewidth=3,
ec="#E91E63",
fc="#E91E63",
label="SD1",
)
sd2_arrow = ax.arrow(
mean_heart_period,
mean_heart_period,
float(sd2 * np.sqrt(2) / 2),
float(sd2 * np.sqrt(2) / 2),
linewidth=3,
ec="#FF9800",
fc="#FF9800",
label="SD2",
)
ax.set_xlabel(r"$RR_{n} (ms)$")
ax.set_ylabel(r"$RR_{n+1} (ms)$")
ax.legend(handles=[sd1_arrow, sd2_arrow], fontsize=12, loc="best")
return fig
# UDF compute_HRV
# This UDF computes all [regular and extra non-linear] HRV metrics segment-wise for a HR recording
def compute_HRV(peaks, SubjectID,sampling_rate,divider):
segment=np.array_split(peaks,divider) #divide in segments of X min; the last segment may be shorter; discard during statistical analysis on HRV metrics
#create my dataframe structure to which to append the list as a row in the following
hrv_extra_columns=['optimal time delay','optimal complexity dim','optimal complexity tolerance','segment duration, s','SubjectID']
hrv_extra_complexity_df=pd.DataFrame(columns=hrv_extra_columns)
#df_length=len(hrv_extra_complexity_df)
#hrv_complexity_parameters_etc_df_total=pd.DataFrame(columns=hrv_extra_columns)
for i in range(len(segment)):
#hrv_time
hrv_time_domain_segment=nk.hrv_time(segment[i],sampling_rate=sampling_rate, show=False)
#hrv_frequency
hrv_frequency_domain_segment=nk.hrv_frequency(segment[i],sampling_rate=sampling_rate, show=False)
#hrv_rqa
hrv_rqa_domain_segment=nk.hrv_rqa(segment[i],sampling_rate=sampling_rate, show=False)
#optimal complexity_parameters
#optimal_complexity_delay_value,df_optimal_complexity_delay = nk.complexity_delay(segment[i], delay_max=500, method='rosenstein1994', show=False)
optimal_complexity_delay_value=1
#complexity_dimension
#optimal_complexity_dimension_value,df_optimal_complexity_dimension = nk.complexity_dimension(segment[i], delay=optimal_complexity_delay_value, dimension_max=20, method='afnn', show=False)
optimal_complexity_dimension_value=2
#complexity_tolerance
#optimal_complexity_tolerance_value,df_optimal_complexity_tolerance = nk.complexity_tolerance(segment[i], delay=optimal_complexity_delay_value, dimension=optimal_complexity_dimension_value, method='maxApEn', show=False)
optimal_complexity_tolerance_value=0.02
#hrv_complexity
df_hrv_complexity_segment, hrv_segment=nk.complexity(segment[i],delay=optimal_complexity_delay_value,which = ["fast", "medium"])
#hrv_nonlinear_edited
#calling the internally edited hrv_nonlinear UDF where I removed fuzz_entropy with segment-specific complexity parameters (delay, dimension, tolerance)
hrv_nonlinear_domain_segment = hrv_nonlinear_edited(segment[i], sampling_rate=sampling_rate, show=False)
segment_duration=np.sum(segment[i])/1000 #segment duration in seconds
#join all individual output floats including values of segment[i] - i.e., for each segment - and its duration in seconds as numpy.sum(segment[1])/1000
hrv_complexity_parameters_etc = [optimal_complexity_delay_value,optimal_complexity_dimension_value,optimal_complexity_tolerance_value,segment_duration,SubjectID]
hrv_extra_complexity_df.loc[len(hrv_extra_complexity_df)]=hrv_complexity_parameters_etc
# simply concatenate both df's horizontally; this scales allowing addition of other df's from bivariate computations
hrv_final_df = pd.concat([hrv_time_domain_segment,hrv_frequency_domain_segment, hrv_rqa_domain_segment, df_hrv_complexity_segment, hrv_nonlinear_domain_segment,hrv_extra_complexity_df],axis=1)
return hrv_final_df #this is per subject with SubjectID output along on the right side
# UDF compute_HRV
# This UDF computes all [regular and extra non-linear] HRV metrics for a _whole_ HR recording
def compute_HRV_whole(peaks, SubjectID,sampling_rate,divider):
segment=np.array_split(peaks,divider) #divide in segments of X min; the last segment may be shorter; discard during statistical analysis on HRV metrics
#create my dataframe structure to which to append the list as a row in the following
hrv_extra_columns=['optimal time delay','optimal complexity dim','optimal complexity tolerance','segment duration, s','SubjectID']
hrv_extra_complexity_df=pd.DataFrame(columns=hrv_extra_columns)
#df_length=len(hrv_extra_complexity_df)
#hrv_complexity_parameters_etc_df_total=pd.DataFrame(columns=hrv_extra_columns)
for i in range(len(segment)):
#hrv_time
hrv_time_domain_segment=nk.hrv_time(segment[i],sampling_rate=sampling_rate, show=False)
#hrv_frequency
hrv_frequency_domain_segment=nk.hrv_frequency(segment[i],sampling_rate=sampling_rate, show=False)
#hrv_rqa
hrv_rqa_domain_segment=nk.hrv_rqa(segment[i],sampling_rate=sampling_rate, show=False)
#optimal complexity_parameters [set to default here for speed of compute]
#optimal_complexity_delay_value,df_optimal_complexity_delay = nk.complexity_delay(segment[i], delay_max=500, method='rosenstein1994', show=False)
optimal_complexity_delay_value=1
#complexity_dimension
#optimal_complexity_dimension_value,df_optimal_complexity_dimension = nk.complexity_dimension(segment[i], delay=optimal_complexity_delay_value, dimension_max=20, method='afnn', show=False)
optimal_complexity_dimension_value=2
#complexity_tolerance
#optimal_complexity_tolerance_value,df_optimal_complexity_tolerance = nk.complexity_tolerance(segment[i], delay=optimal_complexity_delay_value, dimension=optimal_complexity_dimension_value, method='maxApEn', show=False)
optimal_complexity_tolerance_value=0.02
#hrv_complexity
df_hrv_complexity_segment, hrv_segment=nk.complexity(segment[i],delay=optimal_complexity_delay_value,which = ["fast", "medium"])
#hrv_nonlinear_edited
#calling the internally edited hrv_nonlinear UDF where I removed fuzz_entropy with segment-specific complexity parameters (delay, dimension, tolerance)
hrv_nonlinear_domain_segment = hrv_nonlinear_edited(segment[i], sampling_rate=sampling_rate, show=False)
#join all individual output floats including values of segment[i] - i.e., for each segment - and its duration in seconds as numpy.sum(segment[1])/1000
hrv_complexity_parameters_etc = [optimal_complexity_delay_value,optimal_complexity_dimension_value,optimal_complexity_tolerance_value,SubjectID]
hrv_extra_complexity_df.loc[len(hrv_extra_complexity_df)]=hrv_complexity_parameters_etc
# simply concatenate both df's horizontally; this scales allowing addition of other df's from bivariate computations
hrv_final_df = pd.concat([hrv_time_domain_segment,hrv_frequency_domain_segment, hrv_rqa_domain_segment, df_hrv_complexity_segment, hrv_nonlinear_domain_segment,hrv_extra_complexity_df],axis=1)
return hrv_final_df #this is per subject with SubjectID output along on the right side
This includes statistical and HMM estimates
def compute_higher_HRV(hrv_final_df):
# assuming "hrv_final_df" is the dataframe where my HRV metric values are listed segment-wise per subject
# compute basic stats
higher_order_HRV_basic_stats=[]
#print(type(hrv_final_df.iloc[:,[0]].to_numpy()))
#print(hrv_final_df.iloc[:,[i]])
#print(hrv_final_df.iloc[:,[0]].to_numpy().dtype)
for i in range(hrv_final_df.shape[1]): #last column is the SubjectID string, so skipping it below
#print(type(hrv_final_df.iloc[:,[i]]))
hrv_metric=hrv_final_df.iloc[:,[i]].values
#print(type(hrv_metric))
#String skip logic to skip over SubjectID column
if skip_last_column(hrv_final_df.iloc[:,[i]].values) == False:
HRV_results_temp1=compute_basic_stats(hrv_final_df.iloc[:,[i]].astype(np.float64),SubjectID)
higher_order_HRV_basic_stats.append(HRV_results_temp1)
else:
i+=1
HRV_basic_stats=pd.DataFrame(higher_order_HRV_basic_stats, columns=['SubjectID','HRV_mean', 'HRV_coeff_variation', 'quartile_coefficient_dispersion','HRV metrics entropy'])
hrv_columns=hrv_final_df.columns[0:63] #make sure I don't select the last column which has SubjectID
#print(hrv_columns)
HRV_basic_stats.index=[hrv_columns]
HRV_basic_stats_final=HRV_basic_stats.T #transpose
# Estimate HMM probabilities output for a given segmented HRV metric
# Then compute basic_stats on this estimate;
# Hypothesis: stable tracings will have tight distributions of HMM values and resemble entropy estimates;
# This will apply statistically significantly for stressed (tighter distributions) versus control subjects
higher_order_HRV_basic_stats_on_HMM=[]
for i in range(hrv_final_df.shape[1]): #last column is the SubjectID string, so removing it
hrv_metric=hrv_final_df.iloc[:,[i]].values
print("HRV_metric has the type", type(hrv_metric))
# some HRV metrics have NaNs and the "do_hmm" script crashes on those;
# Adding logic to skip if NaN is present
a=any(pd.isna(hrv_metric)) #checking if _any_ values in HRV metrics list are NaN
b=skip_last_column(hrv_metric)
skip_reasons={a:'True', b:'True'}
#NaN or string skip logic
if any(skip_reasons):
i+=1
else:
HRV_results_hmm_temp2=do_hmm(hrv_metric)
print(HRV_results_hmm_temp2)
print(type(HRV_results_hmm_temp2))
HRV_results_stats_hmm_temp=compute_basic_stats(HRV_results_hmm_temp2.tolist(),SubjectID) #j being the file number; != SubjectID
higher_order_HRV_basic_stats_on_HMM.append(HRV_results_stats_hmm_temp)
HRV_basic_stats_on_HMM=pd.DataFrame(higher_order_HRV_basic_stats_on_HMM, columns=['HRV_HMM_mean', 'HRV_HMM_coeff_variation', 'HMM_quartile_coefficient_dispersion','HMM_HRV metrics entropy'])
HRV_basic_stats_on_HMM.index=[hrv_columns]
HRV_basic_stats_on_HMM_final=HRV_basic_stats_on_HMM.T #transpose
higher_hrv_final_df=pd.concat([HRV_basic_stats_final, HRV_basic_stats_on_HMM_final],axis=1)
#higher_hrv_final_df=HRV_basic_stats_final #leaving the syntax above for when the data allow HMM analysis
return higher_hrv_final_df #this includes SubjectID
#one could change the df saved by splitting into four separate DFs: just for mean values, just for CV, etc. That will make later corr easier.
#import matplotlib.dates as md
#based on https://neuropsychology.github.io/NeuroKit/functions/complexity.html?highlight=rolling%20complexity
#plot computed complexity overtime overlayed with HR during sleep;save the figure for each SubjectID
from matplotlib_scalebar.scalebar import ScaleBar
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm
def vizualize_hrv_sleep_states(sleep_label_data,hr_df,rolling_sampen, SubjectID):
fontprops = fm.FontProperties(size=18)
plt.rcParams["figure.autolayout"] = True
fig, ax=plt.subplots(3,1, figsize=(15,8))
df1=sleep_label_data
df2=hr_df
df3=rolling_sampen.to_frame(name="SampEn")
ax[0].plot(df2.iloc[:,1])
ax[0].set_title("HR")
ax[0].get_xaxis().set_visible(False)
ax[0].get_yaxis().set_visible(True)
ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].spines['bottom'].set_visible(False)
ax[0].spines['left'].set_visible(True)
ax[1].plot(df3.iloc[:,0])
ax[1].set_title("SampEn")
ax[1].get_xaxis().set_visible(False)
ax[1].get_yaxis().set_visible(True)
ax[1].spines['top'].set_visible(False)
ax[1].spines['right'].set_visible(False)
ax[1].spines['bottom'].set_visible(False)
ax[1].spines['left'].set_visible(True)
ax[2].plot(df1)
ax[2].set_title("Sleep state architecture")
ax[2].get_xaxis().set_visible(True)
ax[2].set(xlabel='Time, s')
ax[2].get_yaxis().set_visible(True)
ax[2].spines['top'].set_visible(False)
ax[2].spines['right'].set_visible(False)
ax[2].spines['bottom'].set_visible(True)
ax[2].spines['left'].set_visible(True)
scalebar = AnchoredSizeBar(ax[2].transData,
3600, '60 m', 'lower right',
pad=0.1, borderpad=0.5,sep=5, #optimize as needed, e.g., bbox_to_anchor=Bbox.from_bounds(0, 0, 1, 1)
frameon=False,
fontproperties=fontprops)
ax[2].add_artist(scalebar)
plt.savefig(f"analysis/{SubjectID}plot.png",dpi=300)
plt.show()
return print(f"Subject {SubjectID}:heart rate, Sample Entropy and Sleep state architecture. File saved as {SubjectID}plot.png")
# Generalized execution of HRV compute for any univariate peaks or RRi input
# Using raw heart rate data, but could change to neurokit's or other artifact correction engine if desired.
# Initialize data structures
Uber_HRV=[]
i=0
sampling_rate=1000 #be careful to update as needed
#sliding_window_duration=20 #in minutes, adjust as needed; 5 minutes is the lower physiological limit; the smaller the value, the longer it will take in total to compute over the entire recording; the longer the value, the cruder the dynamic HRV estimate
# Compute & save into lists of lists
while i<=len(bpm_files)-1:
# read the raw bpm file, artifact correct it, convert to RRIs and return the results
peaks=read_bpm_file(bpm_files, i,sampling_rate)
#determine the number of sliding windows [=divider]to compute HRV on
#duration_peaks=peaks[len(peaks)-1] #gives me the duration in samples
#divider=duration_peaks/60/sliding_window_duration #sliding_window_duration (min)
#get the subject ID from the filename; adjust this as per naming convention in a dataset
ID=format(bpm_files[i].stem)
# getting numbers from string
temp = re.findall(r'\d+', ID)
SubjectID = list(map(int, temp))[0]
# compute all HRV metrics
HRV_final=compute_HRV_whole(peaks,SubjectID, sampling_rate, 1)
# update the UBER df
Uber_HRV.append(HRV_final)
#executing higher HRV and sleep separately below for ease of troubleshooting; technically, all the three steps can be done in one loop here
i+=1
if i>len(bpm_files):
break
print('Computation of time segment-wise HRV metrics completed.')
Computation of time segment-wise HRV metrics completed.
#troubleshooting: compute & save all HRV metrics for a single HR recording
Uber_HRV=[]
sampling_rate=1000 #be careful to update as needed
peaks=read_bpm_file(bpm_files, 0,sampling_rate)
#determine the number of sliding windows [=divider]to compute HRV on
duration_peaks=peaks[len(peaks)-1] #gives me the duration in samples
#divider=duration_peaks/60/sliding_window_duration #sliding_window_duration (min)
#get the subject ID from the filename; adjust this as per naming convention in a dataset
ID=format(bpm_files[i].stem)
# getting numbers from string
temp = re.findall(r'\d+', ID)
SubjectID = list(map(int, temp))[0]
# compute all HRV metrics
HRV_final=compute_HRV(peaks,SubjectID, sampling_rate, 1) #use the entire recording, rather than sliding window
# update the UBER df
Uber_HRV.append(HRV_final)
print('Computation of time segment-wise HRV metrics completed.')
# save HRV and sleep results
Uber_HRV_df=pd.concat(Uber_HRV)
Uber_HRV_df.to_excel("analysis/Sample_HRV_metrics.xlsx")
Computation of time segment-wise HRV metrics completed.
Uber_HRV
[ HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 990.477672 111.212974 79.259575 62.428444 74.035235 70.604291 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 65.869722 81.476506 53.32182 53.323425 ... 1.665799 0.816266 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.574882 3.197034 0.133318 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 16457777.0 1066528.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 851.185042 84.906506 73.026406 39.095115 70.099066 42.470791 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 66.401981 51.134541 32.427847 32.430168 ... 1.7703 0.83808 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.619404 2.745771 0.315213 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 5929355.0 1360686.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 882.875592 100.589756 82.800734 35.575903 82.024632 40.43634 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 77.594783 51.175791 33.427949 33.429245 ... 1.510093 0.78224 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.594091 2.904292 0.340332 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 11368789.0 1449548.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 1054.965796 185.657067 146.911134 60.014029 144.067413 70.77541 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 136.920864 83.754021 47.916956 47.920403 ... 1.206041 0.630214 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.514381 2.533032 0.1694 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 7309858.0 1455390.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 772.043267 71.045526 50.323203 36.517361 48.194041 40.069059 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 45.294511 44.060784 33.286208 33.288517 ... 1.434233 0.672873 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.685596 2.486309 0.381418 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 5549447.0 1818471.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 1123.663485 210.392864 151.892328 96.340431 143.797766 113.801565 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 127.057838 139.7295 80.570701 80.576518 ... 1.028174 0.645431 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.51944 2.75031 0.21582 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 7770133.0 2598705.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 989.357153 117.845833 101.088264 41.680432 98.050152 48.639944 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 93.969893 57.236713 36.647108 36.649711 ... 1.270768 0.849605 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.619019 2.677976 0.198619 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6936383.0 2638030.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 689.13558 61.793905 44.366173 37.207219 41.719785 40.702545 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 38.447924 45.239399 42.646869 42.652438 ... 1.570866 0.817562 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.764318 3.004612 0.503706 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 2638011.0 3509524.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 953.874293 279.871593 205.280481 64.664087 205.244314 74.963676 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 207.835312 87.31819 51.23367 51.236817 ... 0.783512 0.678104 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.556013 2.91262 0.143664 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 7762629.0 3997827.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 925.250531 103.027556 74.716771 54.643032 63.378342 65.932368 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 45.803672 78.971157 56.271915 56.277351 ... 1.202954 0.754606 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.72451 2.79487 0.428979 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 4790022.0 4018081.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 812.137906 243.94898 205.627019 52.775189 202.228465 67.534394 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 200.544892 84.855845 38.642034 38.643801 ... 1.252278 0.911035 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.622505 2.621222 0.155822 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 8880728.0 4314139.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 840.734534 195.264079 184.211171 50.402741 183.460123 56.995537 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 180.854335 68.727133 39.943686 39.944907 ... 1.386965 0.909561 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.515551 2.697182 0.159442 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 13725832.0 4426783.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 744.655526 81.285646 53.684736 49.431277 49.910412 54.437594 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 43.93606 61.764705 51.137233 51.142447 ... 1.357594 0.72165 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.713507 2.554488 0.429578 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 3631685.0 46343.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 700.159282 78.423382 54.767274 45.194161 53.561771 47.732816 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 50.582864 53.876679 51.333937 51.339925 ... 1.7341 0.894931 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.699154 2.952257 0.419274 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 3002283.0 5132496.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 827.489001 74.814794 61.128573 37.034556 59.964386 40.374511 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 56.124999 47.55154 28.816948 28.819418 ... 1.46886 0.857831 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.620588 2.457936 0.376606 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 4777094.0 5383425.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 769.093084 192.762123 153.086557 56.254054 151.99995 65.029756 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 143.564293 86.764764 47.505984 47.508943 ... 0.873453 0.803696 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.560849 2.752075 0.195564 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6171972.0 5498603.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 892.721285 165.841252 141.887038 64.471965 139.070628 73.930045 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 136.069768 84.745095 49.795386 49.797387 ... 1.597462 0.947361 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.536202 2.75474 0.224877 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 11005468.0 5797046.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 907.135546 100.608139 83.689062 41.877894 81.721353 46.851948 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 82.187127 51.920512 42.538576 42.541565 ... 1.402023 0.819625 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.695082 2.757141 0.29867 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6451548.0 6220552.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 862.059941 136.305105 109.459239 59.356373 108.565668 65.67991 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 107.275628 73.723906 47.495735 47.501126 ... 1.443043 0.878548 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.648225 3.023992 0.284982 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 3768064.0 759667.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 781.812027 100.764634 66.895893 62.207381 63.675317 68.246629 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 60.575211 74.862034 75.01693 75.032714 ... 1.423006 0.639578 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.735607 2.162916 0.405608 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 1859149.0 7749105.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 1200.382591 109.349032 71.836207 53.43401 66.36837 63.27454 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 54.615547 78.233526 44.578687 44.579924 ... 1.854479 0.504641 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.512877 2.733221 0.142828 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 21623692.0 781756.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 860.249716 96.106072 72.468905 49.035082 66.606951 55.493742 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 62.959716 62.438058 40.768755 40.771641 ... 1.422328 0.802485 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.594476 2.841287 0.361473 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6052717.0 8000685.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 901.185517 71.95047 54.193673 31.548052 48.099065 39.086615 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 43.063659 47.55187 32.040337 32.042999 ... 1.557344 0.489026 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.586249 1.864909 0.302373 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 5426038.0 8173033.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 1055.696661 153.974535 125.243634 54.392583 121.767235 63.86171 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 117.548311 77.520887 47.481673 47.485391 ... 1.523533 0.688557 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.54154 2.202857 0.253614 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6734289.0 8258170.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 876.190077 94.150212 68.196304 53.80219 63.225815 59.649053 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 49.107476 70.906482 48.383661 48.387039 ... 1.72443 0.763419 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.670909 2.720202 0.347186 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6269140.0 844359.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 946.361251 101.934684 71.295579 60.286004 64.028229 68.092475 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 56.346568 75.648559 54.038539 54.042326 ... 1.374842 0.824619 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.644514 3.105217 0.391448 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6745663.0 8530312.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 829.525195 69.399255 43.377864 40.055658 39.07811 44.626085 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 31.150155 47.631852 52.390373 52.394302 ... 1.325647 0.517991 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.718306 2.450265 0.480079 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 5531274.0 8686948.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 892.367865 78.877005 59.587705 40.424308 56.902291 44.243281 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 49.319525 53.922621 35.329383 35.331707 ... 1.357057 0.684355 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.645079 2.719778 0.35926 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6753440.0 8692923.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 795.983313 79.102079 56.17433 41.969512 52.036188 46.97893 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 48.697196 51.167598 46.228145 46.231015 ... 1.641477 0.533373 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.72038 2.593989 0.484602 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6391746.0 9106476.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 952.897872 106.361517 77.033374 49.158448 76.360323 53.343897 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 73.915603 60.317048 51.721545 51.725604 ... 1.255477 0.542212 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.636785 2.423849 0.525562 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 6046137.0 9618981.0 [1 rows x 129 columns], HRV_MeanNN HRV_SDNN HRV_SDANN1 HRV_SDNNI1 HRV_SDANN2 HRV_SDNNI2 \ 0 1078.844172 165.858665 135.212181 69.014773 124.548977 84.245898 HRV_SDANN5 HRV_SDNNI5 HRV_RMSSD HRV_SDSD ... HRV_RCMSEn HRV_CD \ 0 110.405108 111.714875 55.030754 55.032474 ... 1.555774 0.761533 HRV_HFD HRV_KFD HRV_LZC optimal time delay optimal complexity dim \ 0 1.516119 2.879406 0.096058 1.0 2.0 optimal complexity tolerance segment duration, s SubjectID 0 0.02 17252876.0 9961348.0 [1 rows x 129 columns]]
# save HRV and sleep results
Uber_HRV_df=pd.concat(Uber_HRV)
Uber_HRV_df.to_excel("analysis/HRV_metrics.xlsx")
# Compute higher HRV & save into lists
# This is for the case when we compute HRV in sliding windows and have more than ~7 data points per subject
# Doing so increases time to compute and was skipped in this demo run
Uber_higher_HRV=[]
i=0
while i<=len(bpm_files)-1:
# compute higher_order HRV metrics
HRV_higher_final=compute_higher_HRV(HRV_final)
# update the UBER_higher_df
Uber_higher_HRV.append(HRV_higher_final)
i+=1
if i>len(bpm_files):
break
print('Computation of higher HRV metrics completed.')
Uber_higher_HRV_df=pd.concat(Uber_higher_HRV)
Uber_higher_HRV_df.to_excel("analysis/higher_HRV_metrics.xlsx")
# Compute sleep stats & save into dataframe
Uber_sleep=[]
i=0
sleep_statistics_labels=['SubjectID','sleep_duration, h', 'duration N3 NREM, min','duration N2 NREM, min','duration N1 NREM, min','duration REM, min','duration awake, min']
while i<=len(bpm_files)-1: #assuming equal number of sleep states and HR files
# read sleep state file
sleep_label_data = pd.read_csv(sleep_label_files[i],sep=" ",header=None,
names=["Time, s", "Sleep label"])
#get the subject ID from the filename; adjust this as per naming convention in a dataset
ID=format(bpm_files[i].stem)
# getting numbers from string
temp = re.findall(r'\d+', ID)
SubjectID = list(map(int, temp))[0]
# compute sleep label stats
sleep_stats=duration_sleep(sleep_label_data, SubjectID)
# update the UBER_sleep_df
Uber_sleep.append(sleep_stats)
i+=1
if i>len(bpm_files):
break
print('Computation of sleep metrics completed.')
#save it all into a dataframe
Uber_sleep_df=pd.DataFrame(Uber_sleep, columns=sleep_statistics_labels)
Computation of sleep metrics completed.
#Uber_sleep_df=pd.concat(Uber_sleep)
Uber_sleep_df.to_excel("analysis/sleep.xlsx")
# save HRV and sleep results
Uber_HRV_df=pd.concat(Uber_HRV)
Uber_HRV_df.to_excel("analysis/HRV_metrics.xlsx")
Uber_higher_HRV_df=pd.concat(Uber_higher_HRV)
Uber_higher_HRV_df.to_excel("analysis/higher_HRV_metrics.xlsx")
#Uber_sleep_df=pd.concat(Uber_sleep)
Uber_sleep_df.to_excel("analysis/sleep.xlsx")
# Create a function-wrapper that only return the index value
sampen = lambda x: nk.entropy_sample(x)[0]
# Compute RMSSD (ms)
def compute_rmssd(bpm_file):
rri=60000/bpm_file
diff_rri = np.diff(rri)
rmssd = np.sqrt(np.nanmean(diff_rri ** 2))
return rmssd
i=0
Uber_SampEn_sleep=[]
Uber_SampEn=[]
HRV_comp_sleep_statistics_labels=['SubjectID','sleep_duration, h', 'duration_deep_sleep, min','mean SampEn','CV_sampEn','mean RMSSD','CV RMSSD']
while i<=len(bpm_files)-1:
# Import raw HR file in bpm
bpm_raw=pd.read_csv(bpm_files[i])
# read sleep state file; ensure the actual time axis is used for plotting, not the raw index
sleep_label_data = pd.read_csv(sleep_label_files[i],sep=" ",header=None, parse_dates=[0],index_col=0, squeeze=True,
names=["Time, s", "Sleep label"])
#get the subject ID from the filename; adjust this as per naming convention in a dataset
ID=format(bpm_files[i].stem)
# getting numbers from string
temp = re.findall(r'\d+', ID)
SubjectID = list(map(int, temp))[0]
# Use the sampen function wrapper in a rolling window; adjust the window as needed
rolling_sampen = pd.Series(bpm_raw.iloc[:,1].values).rolling(500, min_periods = 300, center=True).apply(sampen)
rmssd=pd.Series(bpm_raw.iloc[:,1].values).rolling(500, min_periods = 300, center=True).apply(compute_rmssd)
Uber_SampEn_rmssd_temp=[SubjectID,rolling_sampen,rmssd]
Uber_SampEn.append(Uber_SampEn_rmssd_temp) #save raw SampEn & RMSSD values for future use
# compute sleep label stats
#get duration of sleep label file in hours from last value in "Time, s" column (here read as index)
sleep_duration=sleep_label_data.index[-1]/3600
#select only N3 deep sleep (3rd stage, deepest sleep)
N3_nrem=sleep_label_data.loc[sleep_label_data == 3]
#Get duration of N3 nrem in minutes by determining the number of rows with this condition;
#each label is assigned every 30 seconds
duration_deep_sleep=len(sleep_label_data[sleep_label_data == 3.0])*30/60
#concatenate SubjectID,sleep duration and N3 NREM duration plus SampEn;
HRV_comp_sleep_statistics=[SubjectID,sleep_duration, duration_deep_sleep,np.mean(rolling_sampen),np.std(rolling_sampen) / np.mean(rolling_sampen),np.mean(rmssd),np.std(rmssd) / np.mean(rmssd)]
# update the UBER_HRV_comp_sleep list
Uber_SampEn_sleep.append(HRV_comp_sleep_statistics)
# compute SampEn metric of HRV and plot together with sleep architecture; save that plot
vizualize_hrv_sleep_states(sleep_label_data,bpm_raw,rolling_sampen, SubjectID)
i+=1
if i>len(bpm_files):
break
#save it all into a dataframe
Uber_SampEn_sleep_df=pd.DataFrame(Uber_SampEn_sleep, columns=HRV_comp_sleep_statistics_labels)
Uber_SampEn_df=pd.DataFrame(Uber_SampEn,columns=['Subject ID','SampEn','RMSSD'])
print('Computation and visualization completed. Plots saved')
Subject 1066528:heart rate, Sample Entropy and Sleep state architecture. File saved as 1066528plot.png
Subject 1360686:heart rate, Sample Entropy and Sleep state architecture. File saved as 1360686plot.png
Subject 1449548:heart rate, Sample Entropy and Sleep state architecture. File saved as 1449548plot.png
Subject 1455390:heart rate, Sample Entropy and Sleep state architecture. File saved as 1455390plot.png
Subject 1818471:heart rate, Sample Entropy and Sleep state architecture. File saved as 1818471plot.png
Subject 2598705:heart rate, Sample Entropy and Sleep state architecture. File saved as 2598705plot.png
Subject 2638030:heart rate, Sample Entropy and Sleep state architecture. File saved as 2638030plot.png
Subject 3509524:heart rate, Sample Entropy and Sleep state architecture. File saved as 3509524plot.png
Subject 3997827:heart rate, Sample Entropy and Sleep state architecture. File saved as 3997827plot.png
Subject 4018081:heart rate, Sample Entropy and Sleep state architecture. File saved as 4018081plot.png
Subject 4314139:heart rate, Sample Entropy and Sleep state architecture. File saved as 4314139plot.png
Subject 4426783:heart rate, Sample Entropy and Sleep state architecture. File saved as 4426783plot.png
Subject 46343:heart rate, Sample Entropy and Sleep state architecture. File saved as 46343plot.png
Subject 5132496:heart rate, Sample Entropy and Sleep state architecture. File saved as 5132496plot.png
Subject 5383425:heart rate, Sample Entropy and Sleep state architecture. File saved as 5383425plot.png
Subject 5498603:heart rate, Sample Entropy and Sleep state architecture. File saved as 5498603plot.png
Subject 5797046:heart rate, Sample Entropy and Sleep state architecture. File saved as 5797046plot.png
Subject 6220552:heart rate, Sample Entropy and Sleep state architecture. File saved as 6220552plot.png
Subject 759667:heart rate, Sample Entropy and Sleep state architecture. File saved as 759667plot.png
Subject 7749105:heart rate, Sample Entropy and Sleep state architecture. File saved as 7749105plot.png
Subject 781756:heart rate, Sample Entropy and Sleep state architecture. File saved as 781756plot.png
Subject 8000685:heart rate, Sample Entropy and Sleep state architecture. File saved as 8000685plot.png
Subject 8173033:heart rate, Sample Entropy and Sleep state architecture. File saved as 8173033plot.png
Subject 8258170:heart rate, Sample Entropy and Sleep state architecture. File saved as 8258170plot.png
Subject 844359:heart rate, Sample Entropy and Sleep state architecture. File saved as 844359plot.png
Subject 8530312:heart rate, Sample Entropy and Sleep state architecture. File saved as 8530312plot.png
Subject 8686948:heart rate, Sample Entropy and Sleep state architecture. File saved as 8686948plot.png
Subject 8692923:heart rate, Sample Entropy and Sleep state architecture. File saved as 8692923plot.png
Subject 9106476:heart rate, Sample Entropy and Sleep state architecture. File saved as 9106476plot.png
Subject 9618981:heart rate, Sample Entropy and Sleep state architecture. File saved as 9618981plot.png
Subject 9961348:heart rate, Sample Entropy and Sleep state architecture. File saved as 9961348plot.png Computation and visualization completed. Plots saved
Uber_SampEn_sleep_df.to_excel("analysis/sleep_SampEn.xlsx")
Uber_SampEn_df.to_excel("analysis/RAW_SampEn.xlsx")
#We can now access any row to plot it or perform further anlayses
nk.signal_plot(Uber_SampEn_df.iloc[0,1])
#simple column-specific correlations
correlation=Uber_SampEn_sleep_df.iloc[:,2].corr(Uber_SampEn_sleep_df.iloc[:,3],method='spearman')
#save the corr matrix and viz it for p<0.05
correlation
0.26658601066324045
x=Uber_SampEn_sleep_df.iloc[:,2] #deep sleep durations
y=Uber_SampEn_sleep_df.iloc[:,3] #mean SampEn
r, p = scipy.stats.spearmanr(x, y)
print('r value is',r,'. The p value is',p)
r value is 0.26658601066324045 . The p value is 0.14714845196176837
x=Uber_SampEn_sleep_df.iloc[:,2] #deep sleep durations
y=Uber_SampEn_sleep_df.iloc[:,4] #CV SampEn
r, p = scipy.stats.spearmanr(x, y)
print('r value is',r,'. The p value is',p)
r value is -0.1425690692427466 . The p value is 0.44421700243122
x=Uber_SampEn_sleep_df.iloc[:,2] #deep sleep durations
y=Uber_SampEn_sleep_df.iloc[:,5] #mean RMSSD
r, p = scipy.stats.spearmanr(x, y)
print('r value is',r,'. The p value is',p)
r value is -0.18330308902638848 . The p value is 0.32362091516465774
x=Uber_SampEn_sleep_df.iloc[:,2] #deep sleep durations
y=Uber_SampEn_sleep_df.iloc[:,6] #CV RMSSD
r, p = scipy.stats.spearmanr(x, y)
print('r value is',r,'. The p value is',p)
#note the moderate and significant correlation between deep sleep duration and CV RMSSD; let's plot it
r value is 0.5515224955854483 . The p value is 0.0012997589378533477
x=Uber_SampEn_sleep_df.iloc[:,2] #deep sleep durations
y=Uber_SampEn_sleep_df.iloc[:,6] #CV RMSSD
slope, intercept, r, p, stderr = scipy.stats.linregress(x,y)
line = f'Regression line: y={intercept:.2f}+{slope:.2f}x, r={r:.2f}, p={p:.4f}'
fig, ax = plt.subplots(figsize=(15,15))
ax.plot(x,y, linewidth=0, marker='s', label='Data points')
ax.plot(x, intercept + slope * x, label=line)
ax.set_xlabel('N3 NREM duration')
ax.set_ylabel('RMSSD variability')
ax.legend(facecolor='white')
plt.show()
#now plot all correlations together as a table and a heatmap
df1 = Uber_SampEn_sleep_df.iloc[:,[1,2]]
df2 = Uber_SampEn_sleep_df.iloc[:,[3,4,5,6]]
for i in range(df1.shape[1]):
for j in range(df2.shape[1]):
corrtest = scipy.stats.spearmanr(df1[df1.columns[i]], df2[df2.columns[j]])
coeffmat[i,j] = corrtest[0]
pvalmat[i,j] = corrtest[1]
# if you wish to save the r, p values
dfcoeff = pd.DataFrame(coeffmat, columns=df2.columns, index=df1.columns)
dfpvals = pd.DataFrame(pvalmat, columns=df2.columns, index=df1.columns)
print(dfcoeff,'Spearman R')
print(dfpvals,'P values')
mean SampEn CV_sampEn mean RMSSD CV RMSSD sleep_duration, h -0.351755 0.393304 -0.233965 -0.405405 duration_deep_sleep, min 0.266586 -0.142569 -0.183303 0.551522 Spearman R mean SampEn CV_sampEn mean RMSSD CV RMSSD sleep_duration, h 0.052315 0.028604 0.205226 0.023661 duration_deep_sleep, min 0.147148 0.444217 0.323621 0.001300 P values
plt.rcParams["figure.figsize"] = (16, 12)
sns.heatmap(Uber_SampEn_sleep_df.iloc[:,[1,2,3,4,5,6]].corr(method='spearman'), annot = True)
plt.show()
now compute and viz correlations for the entire sleep state stats & HRV metrics
HRV_labels=np.arange(7,131).tolist()
HRV_labels
[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]
corr_final_df=pd.merge(Uber_sleep_df,Uber_HRV_df, on='SubjectID')
corr_final_df
SubjectID | sleep_duration, h | duration N3 NREM, min | duration N2 NREM, min | duration N1 NREM, min | duration REM, min | duration awake, min | HRV_MeanNN | HRV_SDNN | HRV_SDANN1 | ... | HRV_CMSEn | HRV_RCMSEn | HRV_CD | HRV_HFD | HRV_KFD | HRV_LZC | optimal time delay | optimal complexity dim | optimal complexity tolerance | segment duration, s | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1066528 | 7.925000 | 31.0 | 149.5 | 48.5 | 154.5 | 92.5 | 990.477672 | 111.212974 | 79.259575 | ... | 1.390423 | 1.665799 | 0.816266 | 1.574882 | 3.197034 | 0.133318 | 1.0 | 2.0 | 0.02 | 16457777.0 |
1 | 1360686 | 8.058333 | 81.5 | 213.5 | 16.5 | 104.0 | 58.5 | 851.185042 | 84.906506 | 73.026406 | ... | 1.219804 | 1.770300 | 0.838080 | 1.619404 | 2.745771 | 0.315213 | 1.0 | 2.0 | 0.02 | 5929355.0 |
2 | 1449548 | 8.033333 | 36.0 | 258.5 | 17.5 | 112.0 | 52.5 | 882.875592 | 100.589756 | 82.800734 | ... | 1.195113 | 1.510093 | 0.782240 | 1.594091 | 2.904292 | 0.340332 | 1.0 | 2.0 | 0.02 | 11368789.0 |
3 | 1455390 | 8.241667 | 0.0 | 317.0 | 28.5 | 90.0 | 42.0 | 1054.965796 | 185.657067 | 146.911134 | ... | 1.107796 | 1.206041 | 0.630214 | 1.514381 | 2.533032 | 0.169400 | 1.0 | 2.0 | 0.02 | 7309858.0 |
4 | 1818471 | 8.225000 | 71.0 | 267.5 | 25.5 | 110.0 | 5.0 | 772.043267 | 71.045526 | 50.323203 | ... | 1.248357 | 1.434233 | 0.672873 | 1.685596 | 2.486309 | 0.381418 | 1.0 | 2.0 | 0.02 | 5549447.0 |
5 | 2598705 | 8.183333 | 76.0 | 241.0 | 24.5 | 125.5 | 10.0 | 1123.663485 | 210.392864 | 151.892328 | ... | 1.030782 | 1.028174 | 0.645431 | 1.519440 | 2.750310 | 0.215820 | 1.0 | 2.0 | 0.02 | 7770133.0 |
6 | 2638030 | 8.066667 | 63.0 | 245.5 | 21.0 | 83.0 | 61.5 | 989.357153 | 117.845833 | 101.088264 | ... | 1.093307 | 1.270768 | 0.849605 | 1.619019 | 2.677976 | 0.198619 | 1.0 | 2.0 | 0.02 | 6936383.0 |
7 | 3509524 | 3.466667 | 22.5 | 124.0 | 11.5 | 24.0 | 26.5 | 689.135580 | 61.793905 | 44.366173 | ... | 1.176231 | 1.570866 | 0.817562 | 1.764318 | 3.004612 | 0.503706 | 1.0 | 2.0 | 0.02 | 2638011.0 |
8 | 3997827 | 8.133333 | 43.5 | 259.0 | 17.5 | 146.5 | 12.5 | 953.874293 | 279.871593 | 205.280481 | ... | 0.841985 | 0.783512 | 0.678104 | 1.556013 | 2.912620 | 0.143664 | 1.0 | 2.0 | 0.02 | 7762629.0 |
9 | 4018081 | 4.241667 | 88.5 | 96.5 | 7.0 | 17.5 | 45.5 | 925.250531 | 103.027556 | 74.716771 | ... | 1.050653 | 1.202954 | 0.754606 | 1.724510 | 2.794870 | 0.428979 | 1.0 | 2.0 | 0.02 | 4790022.0 |
10 | 4314139 | 8.075000 | 48.0 | 262.0 | 37.0 | 81.5 | 52.0 | 812.137906 | 243.948980 | 205.627019 | ... | 0.977951 | 1.252278 | 0.911035 | 1.622505 | 2.621222 | 0.155822 | 1.0 | 2.0 | 0.02 | 8880728.0 |
11 | 4426783 | 8.158333 | 27.5 | 245.5 | 30.5 | 151.5 | 35.0 | 840.734534 | 195.264079 | 184.211171 | ... | 1.020806 | 1.386965 | 0.909561 | 1.515551 | 2.697182 | 0.159442 | 1.0 | 2.0 | 0.02 | 13725832.0 |
12 | 46343 | 4.716667 | 78.0 | 85.0 | 14.5 | 57.0 | 42.5 | 744.655526 | 81.285646 | 53.684736 | ... | 1.193617 | 1.357594 | 0.721650 | 1.713507 | 2.554488 | 0.429578 | 1.0 | 2.0 | 0.02 | 3631685.0 |
13 | 5132496 | 3.866667 | 76.0 | 110.5 | 29.5 | 0.0 | 16.5 | 700.159282 | 78.423382 | 54.767274 | ... | 1.110815 | 1.734100 | 0.894931 | 1.699154 | 2.952257 | 0.419274 | 1.0 | 2.0 | 0.02 | 3002283.0 |
14 | 5383425 | 8.141667 | 17.0 | 195.5 | 26.0 | 134.5 | 20.0 | 827.489001 | 74.814794 | 61.128573 | ... | 1.132688 | 1.468860 | 0.857831 | 1.620588 | 2.457936 | 0.376606 | 1.0 | 2.0 | 0.02 | 4777094.0 |
15 | 5498603 | 6.316667 | 78.5 | 169.5 | 21.5 | 49.0 | 53.5 | 769.093084 | 192.762123 | 153.086557 | ... | 0.804111 | 0.873453 | 0.803696 | 1.560849 | 2.752075 | 0.195564 | 1.0 | 2.0 | 0.02 | 6171972.0 |
16 | 5797046 | 8.008333 | 64.5 | 265.5 | 26.0 | 74.0 | 39.5 | 892.721285 | 165.841252 | 141.887038 | ... | 1.168396 | 1.597462 | 0.947361 | 1.536202 | 2.754740 | 0.224877 | 1.0 | 2.0 | 0.02 | 11005468.0 |
17 | 6220552 | 8.175000 | 90.5 | 265.5 | 24.5 | 70.5 | 26.5 | 907.135546 | 100.608139 | 83.689062 | ... | 1.158604 | 1.402023 | 0.819625 | 1.695082 | 2.757141 | 0.298670 | 1.0 | 2.0 | 0.02 | 6451548.0 |
18 | 759667 | 3.950000 | 69.5 | 69.0 | 9.5 | 80.5 | 9.0 | 862.059941 | 136.305105 | 109.459239 | ... | 1.166807 | 1.443043 | 0.878548 | 1.648225 | 3.023992 | 0.284982 | 1.0 | 2.0 | 0.02 | 3768064.0 |
19 | 7749105 | 7.991667 | 66.5 | 182.0 | 12.5 | 109.0 | 102.5 | 781.812027 | 100.764634 | 66.895893 | ... | 1.026490 | 1.423006 | 0.639578 | 1.735607 | 2.162916 | 0.405608 | 1.0 | 2.0 | 0.02 | 1859149.0 |
20 | 781756 | 8.166667 | 11.5 | 230.5 | 89.5 | 117.5 | 41.5 | 1200.382591 | 109.349032 | 71.836207 | ... | 1.403252 | 1.854479 | 0.504641 | 1.512877 | 2.733221 | 0.142828 | 1.0 | 2.0 | 0.02 | 21623692.0 |
21 | 8000685 | 8.033333 | 33.0 | 265.5 | 35.0 | 128.5 | 16.5 | 860.249716 | 96.106072 | 72.468905 | ... | 1.306279 | 1.422328 | 0.802485 | 1.594476 | 2.841287 | 0.361473 | 1.0 | 2.0 | 0.02 | 6052717.0 |
22 | 8173033 | 8.200000 | 96.5 | 220.5 | 43.0 | 103.0 | 14.5 | 901.185517 | 71.950470 | 54.193673 | ... | 1.236331 | 1.557344 | 0.489026 | 1.586249 | 1.864909 | 0.302373 | 1.0 | 2.0 | 0.02 | 5426038.0 |
23 | 8258170 | 8.091667 | 24.5 | 200.0 | 24.0 | 148.5 | 47.5 | 1055.696661 | 153.974535 | 125.243634 | ... | 1.224861 | 1.523533 | 0.688557 | 1.541540 | 2.202857 | 0.253614 | 1.0 | 2.0 | 0.02 | 6734289.0 |
24 | 844359 | 7.900000 | 57.0 | 208.0 | 37.5 | 99.5 | 46.5 | 876.190077 | 94.150212 | 68.196304 | ... | 1.225812 | 1.724430 | 0.763419 | 1.670909 | 2.720202 | 0.347186 | 1.0 | 2.0 | 0.02 | 6269140.0 |
25 | 8530312 | 8.041667 | 51.5 | 215.5 | 36.0 | 131.5 | 39.0 | 946.361251 | 101.934684 | 71.295579 | ... | 1.238153 | 1.374842 | 0.824619 | 1.644514 | 3.105217 | 0.391448 | 1.0 | 2.0 | 0.02 | 6745663.0 |
26 | 8686948 | 8.083333 | 85.5 | 254.0 | 34.0 | 79.5 | 24.5 | 829.525195 | 69.399255 | 43.377864 | ... | 1.209739 | 1.325647 | 0.517991 | 1.718306 | 2.450265 | 0.480079 | 1.0 | 2.0 | 0.02 | 5531274.0 |
27 | 8692923 | 8.033333 | 71.0 | 219.5 | 19.0 | 103.5 | 55.0 | 892.367865 | 78.877005 | 59.587705 | ... | 1.222379 | 1.357057 | 0.684355 | 1.645079 | 2.719778 | 0.359260 | 1.0 | 2.0 | 0.02 | 6753440.0 |
28 | 9106476 | 8.116667 | 79.5 | 283.0 | 30.5 | 44.0 | 43.5 | 795.983313 | 79.102079 | 56.174330 | ... | 1.286367 | 1.641477 | 0.533373 | 1.720380 | 2.593989 | 0.484602 | 1.0 | 2.0 | 0.02 | 6391746.0 |
29 | 9618981 | 7.866667 | 12.5 | 174.0 | 82.5 | 115.0 | 47.0 | 952.897872 | 106.361517 | 77.033374 | ... | 1.194281 | 1.255477 | 0.542212 | 1.636785 | 2.423849 | 0.525562 | 1.0 | 2.0 | 0.02 | 6046137.0 |
30 | 9961348 | 5.991667 | 13.0 | 184.5 | 30.0 | 97.0 | 35.5 | 1078.844172 | 165.858665 | 135.212181 | ... | 1.271898 | 1.555774 | 0.761533 | 1.516119 | 2.879406 | 0.096058 | 1.0 | 2.0 | 0.02 | 17252876.0 |
31 rows × 135 columns
#now plot all correlations together as a table and a heatmap
df1 = corr_final_df.iloc[:,[1,2,3,4,5,6]]
df2 = corr_final_df.iloc[:,HRV_labels]
coeffmat = np.zeros((df1.shape[1], df2.shape[1]))
pvalmat = np.zeros((df1.shape[1], df2.shape[1]))
for i in range(df1.shape[1]):
for j in range(df2.shape[1]):
corrtest = scipy.stats.spearmanr(df1[df1.columns[i]], df2[df2.columns[j]])
coeffmat[i,j] = corrtest[0]
pvalmat[i,j] = corrtest[1]
# if you wish to save the r, p values
dfcoeff = pd.DataFrame(coeffmat, columns=df2.columns, index=df1.columns)
dfpvals = pd.DataFrame(pvalmat, columns=df2.columns, index=df1.columns)
#print(dfcoeff,'Spearman R')
#print(dfpvals,'P values')
dfpvals
HRV_MeanNN | HRV_SDNN | HRV_SDANN1 | HRV_SDNNI1 | HRV_SDANN2 | HRV_SDNNI2 | HRV_SDANN5 | HRV_SDNNI5 | HRV_RMSSD | HRV_SDSD | ... | HRV_ApEn | HRV_SampEn | HRV_ShanEn | HRV_MSEn | HRV_CMSEn | HRV_RCMSEn | HRV_CD | HRV_HFD | HRV_KFD | HRV_LZC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sleep_duration, h | 0.083045 | 0.602875 | 0.456588 | 0.354149 | 0.436993 | 0.497140 | 0.358744 | 0.605901 | 0.031953 | 0.031953 | ... | 0.062226 | 0.075072 | 0.041973 | 0.491630 | 0.837827 | 0.459235 | 0.066650 | 0.021467 | 0.009816 | 0.091206 |
duration N3 NREM, min | 0.057465 | 0.068652 | 0.111474 | 0.201329 | 0.055170 | 0.138755 | 0.059836 | 0.153926 | 0.981103 | 0.981103 | ... | 0.372249 | 0.478760 | 0.371660 | 0.063115 | 0.493780 | 0.632721 | 0.739950 | 0.005643 | 0.714005 | 0.078251 |
duration N2 NREM, min | 0.393059 | 0.554588 | 0.287441 | 0.428600 | 0.245367 | 0.751318 | 0.192598 | 0.772709 | 0.055242 | 0.055242 | ... | 0.040165 | 0.058166 | 0.607380 | 0.241308 | 0.670844 | 0.499170 | 0.410302 | 0.114234 | 0.244009 | 0.236851 |
duration N1 NREM, min | 0.161857 | 0.895464 | 0.875046 | 0.920222 | 0.882694 | 0.839512 | 0.850481 | 0.743980 | 0.947625 | 0.947625 | ... | 0.702693 | 0.793453 | 0.019186 | 0.840354 | 0.008475 | 0.202427 | 0.355868 | 0.073175 | 0.385246 | 0.333912 |
duration REM, min | 0.010843 | 0.156322 | 0.192061 | 0.398184 | 0.121716 | 0.429474 | 0.111033 | 0.410540 | 0.624334 | 0.624334 | ... | 0.207026 | 0.191294 | 0.074814 | 0.806031 | 0.227912 | 0.979390 | 0.589487 | 0.001943 | 0.688429 | 0.049311 |
duration awake, min | 0.739950 | 0.428742 | 0.471339 | 0.751391 | 0.467986 | 0.645881 | 0.537362 | 0.416704 | 0.900599 | 0.900599 | ... | 0.958785 | 0.990551 | 0.982821 | 0.865740 | 0.915964 | 0.880168 | 0.816863 | 0.901452 | 0.442916 | 0.695547 |
6 rows × 124 columns
#now subset R values on p values less than 0.05
signif_R = dfcoeff[dfpvals < 0.05]
signif_R
HRV_MeanNN | HRV_SDNN | HRV_SDANN1 | HRV_SDNNI1 | HRV_SDANN2 | HRV_SDNNI2 | HRV_SDANN5 | HRV_SDNNI5 | HRV_RMSSD | HRV_SDSD | ... | HRV_ApEn | HRV_SampEn | HRV_ShanEn | HRV_MSEn | HRV_CMSEn | HRV_RCMSEn | HRV_CD | HRV_HFD | HRV_KFD | HRV_LZC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sleep_duration, h | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -0.386043 | -0.386043 | ... | NaN | NaN | -0.367487 | NaN | NaN | NaN | NaN | -0.411456 | -0.456636 | NaN |
duration N3 NREM, min | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.485380 | NaN | NaN |
duration N2 NREM, min | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | -0.37055 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
duration N1 NREM, min | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | -0.418314 | NaN | 0.464502 | NaN | NaN | NaN | NaN | NaN |
duration REM, min | 0.45121 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -0.534677 | NaN | -0.356048 |
duration awake, min | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
6 rows × 124 columns
font = {'family': 'DejaVu Sans',
'color': 'darkred',
'weight': 'normal',
'size': 16,
}
plt.rcParams["figure.figsize"] = (16, 12)
sns.heatmap(signif_R, annot = False)
plt.xlabel('HRV metrics', fontdict=font)
plt.ylabel('Sleep features', fontdict=font)
plt.yticks(rotation = 0)
plt.show()
#use plotly to display only significant correlations interactively; zoom in for details
import plotly.express as px
fig = px.imshow(signif_R,labels=dict(x="HRV metrics", y="Sleep features", color="Spearman R"),title='Significant correlations (p<0.05)',
text_auto=True, aspect="auto")
fig.show()
# simple way to show all R values in a heatmap
plt.rcParams["figure.figsize"] = (16, 12)
sns.heatmap(corr_final_df.corr(method='spearman'), annot = True)
plt.show()
# use this if computing "higher HRV"
#scan the entire space of HRV metrics for which correlates to sleep state stats across the cohort
# select HRV_mean value (row) for each HRV metric (column) for all subjects:
# note that here SubjectID is matched, so no need to join and can just feed as per below; otherwise need to join dfs on subjectID first
mean_HRV_subsetDataFrame = Uber_higher_HRV_df[Uber_higher_HRV_df[0] == 'HRV_mean']
i=1 #start with the first HRV metric which is in teh second column
for i<=len(Uber_HRV_df.columns)-1:
correlation=Uber_sleep_df.iloc(:,2).corr(mean_HRV_subsetDataFrame.iloc(:,i),method='spearman') #verify that syntax works as per above logic
#save the corr matrix and viz it for p<0.05
# Future work based on that: predict sleep states from HRV
# Correlate sleep and HRV metrics over time (e.g., CCF?)
# cf. https://pandas.pydata.org/docs/reference/api/pandas.Series.cov.html#pandas.Series.cov