#!/usr/bin/env python import os, sys import copy import numpy as np # =========================================================================== def calc_interval_stats_perc(A, samp_rate=None, all_perc = (10, 25, 40, 50, 60, 75, 90), verb=0 ): """Calculate percentile-based statistics of the intervals of 1D array A (which is assumed to be sorted already). A sampling rate can be input to provide output units; if none is entered, then units are those of A (which are typically unitless when A represents peaks or troughs). Parameters ---------- A : list 1D list (likely of int values if representing peaks or troughs); assumed to be sorted already samp_rate : float physical value of sampling rate associated with A all_perc : set/np.ndarray 1D Python array of percentile values in range [0, 100], for which values of the distribution of intervals within A will be calculated Returns ------- stats_arr : np.ndarray an array of stats about the intervals of A, calculated from percentile values in all_perc """ # make sure A and all_perc have values N = len(A) Nperc = len(all_perc) if not(N) or not(Nperc) : return () # make interval set intervals = [j-i for i, j in zip(A[:-1], A[1:])] # calculate percentiles stats_arr = np.percentile(intervals, q=all_perc) # scale values, if applicable if samp_rate : stats_arr*= samp_rate return stats_arr def calc_interval_stats_mmms(A, samp_rate=None, verb=0 ): """Calculate statistics (min, max, mean and stdev) of the intervals of 1D array A (which is assumed to be sorted already). A sampling rate can be input to provide output units; if none is entered, then units are those of A (which are typically unitless when A represents peaks or troughs). Parameters ---------- A : list 1D list (likely of int values if representing peaks or troughs); assumed to be sorted already samp_rate : float physical value of sampling rate associated with A Returns ------- minval : float minimum value in A maxval : float maximum value in A meanval : float mean (average) value of collection A stdval : float standard deviation value of collection A """ # make sure A and all_perc have values N = len(A) if not(N) : return () # make interval set intervals = [j-i for i, j in zip(A[:-1], A[1:])] # calculate values minval = np.min(intervals) maxval = np.max(intervals) meanval = np.mean(intervals) stdval = np.std(intervals) # scale values, if applicable if samp_rate : minval *= samp_rate maxval *= samp_rate meanval*= samp_rate stdval *= samp_rate return minval, maxval, meanval, stdval # ---------------------------------------------------------------------------- def make_str_bandpass(label, lab_title, lab_short, prefix='', odir='', ext='svg'): """Construct an output filename for the plot of the bandpassing, which is the first part of peak finding, as well as a title for the plot. Parameters ---------- label : str label for type of time series: 'card', 'resp', etc. lab_title : str longer descriptor of the idx-th step, for the title lab_short : str short descriptor of the idx-th step, for the filename prefix : str optional basename for full output odir : str optional path for output directory ext : str file extension type (prob stick with vector graphics) all_subscript : bool bc one title has a subscript, make sure *all* do, so vertical space taken by title is constant (and figs don't jump); this adds a whitespace subscript at the end of the title, if no '$' is already present in the title. Returns ------- fname : str name of file to be written title : str string for the title at the top of the figure """ # make the filename fname = '{}_{}.{}'.format(label, lab_short, ext) if prefix : fname = prefix + '_' + fname if odir : fname = odir.rstrip('/') + '/' + fname # make the title string title = 'Intermediate step for {} data: {}'.format(label, lab_title) return fname, title # --------------------------------------------------------------------------- def interleave_count(A, B, verb=0): """A and B are sorted lists of numbers. For each [i]th element of A, count how many elements in B fall between pairs A[i] and A[i+1]. This number is stored in an array C, which has len(A)-1 elements. NB: A can have no repeating elements, and for the major use case here---where A and B each store indices of peaks and troughs, respectively---neither will. This essentially provides information of how interleaved A and B are, which is useful if (for example) they respectively represent indices of peaks and troughs, for which there should theoretically be perfect interleaving. But this function exists precisely because we know that won't always be the case from estimations... For perfectly interleaved lists, each C[i] == 1. If a trough were missing between a pair of peaks, then C[i] == 0. Having "too many" troughs between a pair of peaks would be reflected by having C[i] > 1. Endpoint dangling, for A[-1], are also returned, but in a separate tuple. One could measure interleaving with A for peaks and B for troughs, or vice versa. NB: we assume (and verify that) A and B share no elements. Parameters ---------- A : list a 1D list of indices B : list a 1D list of indices Returns ------- C : list a 1D list of length len(A)-1, recording how many of the values stored in B fall between A[i] and A[i+1]. D : list a 1D list of length 2, recording how many of the values stored in B fall below A[0] and above A[-1], respectively. """ # ensure no overlap of A and B, and nonrepetition of each if check_list_olap_disorder(A, B) : sys.exit(3) NB = len(B) NA = len(A) C = np.zeros(NA-1, dtype=int) # between-pair counts D = np.zeros(2, dtype=int) # endpoint-dangling counts # precursor: fill in the first dangling endpoint. # NB: we have the start variable like this, to keep using below start = 0 j = start while j < NB : if B[j] > A[0] : # nothing to count, so stop and move along to next A[i] start = j break elif B[j] < A[0] : # in the dangling range: count it D[0]+= 1 # implied 'else': too small to be counted, just go to next ele j+= 1 # start the main loop, filling in C (use value of start from above for i in range(NA-1): j = start while j < NB : if B[j] > A[i+1] : # nothing to count, so stop and move along to next A[i] start = j break elif A[i] < B[j] : # in the between-pair range: count it C[i]+= 1 # implied 'else': too small to be counted, just go to next ele j+= 1 # postcursor: fill in the last dangling endpoint. # NB: all remaining elements of B should be dangling, but count # pedantically, anyways j = start while j < NB : if B[j] > A[-1] : # in the dangling range: count it D[1]+= 1 # implied 'else': too small to be counted, just go to next ele j+= 1 return C, D def check_list_olap_disorder(A, B): """A and B are lists, each of which are supposed to have no repetition, and which are not supposed to overlap. This function verifies that. It is particularly useful before checking for interleaving. Return 0 if things are fine (no overlap between A and B; no repetition in either A or B), or nonzero for failure. """ olap = set(A).intersection(set(B)) if len(olap): print("** ERROR: A and B cannot share any elements, but they do!") print(" " + str(list(olap))) return -1 if len(A) != len(set(A)) : print("** ERROR: List A cannot have repeated elements, but it does!") return 1 if len(B) != len(set(B)) : print("** ERROR: List B cannot have repeated elements, but it does!") return 1 return 0