Source code for blscint.bounds

import numpy as np
import matplotlib.pyplot as plt

from scipy import optimize
import scipy.stats
from astropy.stats import sigma_clip

import setigen as stg

from . import frame_processing


[docs]def plot_bounds(frame, l, r, use_db=False, cb=True, lw=2): """ Plot frame data with overlaid bounding boxes. Parameters ---------- frame : stg.Frame Analysis frame l : int Left bound r : int Right bound use_db : bool, optional Option to convert intensities to dB cb : bool, optional Option to display colorbar lw : int, optional Line width in matplotlib """ frame.plot(use_db=use_db, cb=cb) plt.axvline(l, ls='--', c='w', lw=lw) plt.axvline(r, ls='--', c='w', lw=lw)
[docs]def polyfit_bounds(spec, deg=1, snr_threshold=10): """ Bounding box set by a polynomial fit to the background. Edges are set by where the fit intersects the data on either side of the central peak. If the signal is broad compared to the spectrum or frame, the background fit will highly intersect or even model out the actual signal, hiding any peaks. Degrees to try: 1, 7. Parameters ---------- spec : ndarray Intensity spectra deg : int, optional Degree of polynomial fit snr_threshold : int, float, optional Threshold for peak detection, in units of noise standard deviations Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata related to peak-finding. Contains polynomial fit object, number of peaks, and detected peak information. """ y = sigma_clip(spec) x = np.arange(len(spec)) coeffs = np.polyfit(x[~y.mask], y[~y.mask], deg) poly = np.poly1d(coeffs) # Estimate noise std (with background fit subtraction) std = np.std(y[~y.mask] - poly(x[~y.mask])) # Get peaks above SNR threshold peaks = scipy.signal.find_peaks(spec - poly(x), prominence=snr_threshold * std) if len(peaks[0]) == 0: raise ValueError('No peaks found! Signal may be absent, too dim, or too wide') # Find highest peak i = np.argmax(peaks[1]['prominences']) peak_i = peaks[0][i] cutoffs = np.where(spec - poly(x) <= 0)[0] i = np.digitize(peak_i, cutoffs) - 1 # print(i, i+1, len(cutoffs), len(spec), cutoffs[-1]) l, r = cutoffs[i] + 1, cutoffs[i + 1] metadata = { 'poly': poly, 'num_peaks': len(peaks[0]), 'peaks': peaks } return l, r, metadata
[docs]def threshold_bounds(spec, half_width=3): """ Create bounds based on integrated intensity on either side of the central peak. Threshold is set by ideal Gaussian profile, in units of standard deviations (sigma). Parameters ---------- spec : ndarray Intensity spectra half_width : float, optional Assuming a Gaussian signal profile, the half_width determines where to set the bounds, in units of sigma from the peak. Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata. Contains noise mean and spectra maximum, which are used to normalize spec to the spectra maximum. """ noise_spec = sigma_clip(spec, masked=True) norm_spec = (spec - np.mean(noise_spec)) / (np.max(spec) - np.mean(noise_spec)) threshold = stg.func_utils.gaussian(half_width, 0, 1) cutoffs = np.where(norm_spec < threshold)[0] peak = np.argmax(norm_spec) i = np.digitize(peak, cutoffs) - 1 # print(i, i+1, len(cutoffs), len(spec), cutoffs[-1]) l, r = cutoffs[i] + 1, cutoffs[i + 1] metadata = { 'noise_mean': np.mean(noise_spec), 'spec_max': np.max(spec) } return l, r, metadata
[docs]def threshold_baseline_bounds(spec, p=0.01): """ Create bounds based on integrated intensity on either side of the central peak, as a fraction of the peak integrated intensity. Uses a 1D fit to the noise baseline. Parameters ---------- spec : ndarray Intensity spectra p : float, optional Fraction of peak, used to set left and right bounds Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata. Contains noise mean and spectra maximum, which are used to normalize spec to the spectra maximum. """ noise_spec = sigma_clip(spec, masked=True) x = np.arange(len(spec)) coeffs = np.polyfit(x[~noise_spec.mask], noise_spec[~noise_spec.mask], 1) poly = np.poly1d(coeffs) spec = spec - poly(x) norm_spec = (spec ) / (np.max(spec) ) cutoffs = np.where(norm_spec < p)[0] peak = np.argmax(norm_spec) i = np.digitize(peak, cutoffs) - 1 # print(i, i+1, len(cutoffs), len(spec), cutoffs[-1]) l, r = cutoffs[i] + 1, cutoffs[i + 1] metadata = { 'noise_mean': np.mean(noise_spec), 'spec_max': np.max(spec) } return l, r, metadata
[docs]def snr_bounds(spec, snr=5): """ Create bounds based on intensity attentuation on either side of the central peak, as an SNR threshold above the noise background. Uses a 1D fit to the noise baseline. Parameters ---------- spec : ndarray Intensity spectra snr : float, optional SNR threshold to set left and right bounds Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata. Contains noise mean and spectra maximum, which are used to normalize spec to the spectra maximum. """ peak = np.argmax(spec) y = sigma_clip(spec) x = np.arange(len(spec)) coeffs = np.polyfit(x[~y.mask], y[~y.mask], 1) poly = np.poly1d(coeffs) # Estimate noise std (with background fit subtraction) std = np.std(y[~y.mask] - poly(x[~y.mask])) # std = np.std(y) _, _, [l], [r] = scipy.signal.peak_widths(spec, [peak], 1 - (snr * std) / np.max(spec)) metadata = { 'std': std, 'spec_max': np.max(spec) } l = int(np.floor(l)) + 1 r = int(np.ceil(r)) return l, r, metadata
[docs]def gaussian_bounds(spec, half_width=3, peak_guess=None): """ Create bounds based on a Gaussian fit to the central peak. Parameters ---------- spec : ndarray Intensity spectra half_width : float, optional Assuming a Gaussian signal profile, the half_width determines where to set the bounds, in units of sigma from the peak. peak_guess : int, optional Guess for peak index. Should normally be the center of the spectrum, but can have strange side effects. Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata related to peak-finding. Contains Gaussian fit information. """ gaussian_func = lambda x, A, x0, sigma, y: A * stg.func_utils.gaussian(x, x0, sigma) + y if peak_guess is not None: peak_guess = [1, peak_guess, 1, 1] popt, _ = optimize.curve_fit(gaussian_func, np.arange(len(spec)), spec, p0=peak_guess) peak = int(popt[1]) sigma = abs(popt[2]) offset = int(sigma * half_width) metadata = { 'popt': popt } return peak - offset, peak + offset + 1, metadata
[docs]def clipped_bounds(spec, min_empty_bins=2): """ Run sigma clip on spectrum, and find central peak. This method reduces the spectrum to a mask based on sigma_clip, so it can be unreliable if the signal is broad compared to the size of the spectrum or frame. Parameters ---------- spec : ndarray Intensity spectra min_empty_bins : int, optional Minimum number of adjacent "empty", or non-clipped, bins required to mark either bound with respect to the central peak. Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata related to peak-finding. Contains detected peak information. """ mask_spec = sigma_clip(spec).mask.astype(int) peaks = scipy.signal.find_peaks(mask_spec, prominence=1) if len(peaks[0]) == 0: raise ValueError('No peaks found! Signal may be absent, too dim, or too wide') # Get the highest peak that's *closest* to the center of the frame center_bin = len(spec) // 2 prominences = peaks[1]['prominences'] max_idx = np.where(prominences == np.max(prominences))[0] peak_i = peaks[0][max_idx[np.argmin(np.abs(peaks[0][max_idx] - center_bin))]] # Convolve with ones, to find where sequences are adjacent zeros convolved = np.convolve(mask_spec, np.ones(min_empty_bins).astype(int), 'valid') c_mask = (convolved != 0).astype(int) diffs = np.diff(c_mask) # Find which range of bins the peak lies in l_idx = np.where(diffs > 0)[0] r_idx = np.where(diffs < 0)[0] # Max index with value under peak index, and min index with value over # Adjust left edge to make up for zero'd bins from the convolution, # since we only care about the signal l = l_idx[l_idx + min_empty_bins <= peak_i][-1] + min_empty_bins r = r_idx[r_idx >= peak_i][0] + 1 metadata = { 'num_peaks': len(peaks[0]), 'peaks': peaks } return l, r, metadata
[docs]def clipped_2D_bounds(frame, min_empty_bins=2, min_clipped=1, peak_prominence=4): """ Run sigma clip on 2D data array, and find central peak above a certain number of clipped values along the time axis, per frequency bin. This will return an IndexError if the signal passes outside of the frame (i.e. for very wide signals). Note that this function accepts a frame instead of a 1D numpy array spectrum. Parameters ---------- frame : setigen.Frame Frame of intensity data min_empty_bins : int, optional Minimum number of adjacent "empty", or non-clipped, bins required to mark either bound with respect to the central peak. min_clipped : int, optional Minimum number of pixels clipped by sigma_clip in the time direction in order for a frequency bin to be considered part of the signal. peak_prominence : int, optional Prominence in units of clipped pixels, for use in peak finding. Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata related to peak-finding. Contains detected peak information. """ n_frame = frame_proc.t_norm_frame(frame) clipped_data = sigma_clip(n_frame.data) mask_spec = np.sum(clipped_data.mask, axis=0) peaks = scipy.signal.find_peaks(mask_spec, prominence=peak_prominence) # idx = np.argmin(np.abs(peaks[0] - 128)) # Find highest peak # i = np.argmax(peaks[1]['prominences']) # peak_i = peaks[0][i] # Get the highest peak that's *closest* to the center of the frame center_bin = frame.fchans // 2 prominences = peaks[1]['prominences'] max_idx = np.where(prominences == np.max(prominences))[0] peak_i = peaks[0][max_idx[np.argmin(np.abs(peaks[0][max_idx] - center_bin))]] # I find that np.find_peaks doesn't do a good job for bounding boxes # l = peaks[1]['left_bases'][idx] # r = peaks[1]['right_bases'][idx] # Change mask to thresholding by number of pixels along time axis mask_spec = mask_spec >= min_clipped # print(peak_i, mask_spec) # Convolve with ones, to find where sequences are adjacent zeros convolved = np.convolve(mask_spec, np.ones(min_empty_bins).astype(int), 'valid') c_mask = (convolved != 0).astype(int) # print(c_mask) diffs = np.diff(c_mask) # print(diffs) # Find which range of bins the peak lies in l_idx = np.where(diffs > 0)[0] r_idx = np.where(diffs < 0)[0] # print(l_idx, r_idx) # Max index with value under peak index, and min index with value over # Adjust left edge to make up for zero'd bins from the convolution, # since we only care about the signal l = l_idx[l_idx + min_empty_bins <= peak_i][-1] + min_empty_bins r = r_idx[r_idx >= peak_i][0] + 1 metadata = { 'num_peaks': len(peaks[0]), 'peaks': peaks } return l, r, metadata
[docs]def boxcar_bounds(spec, window_sizes=None): """ Bounding box set by boxcar filter. Simplified version of matched filtering. Default window sizes: powers of 2. Parameters ---------- spec : ndarray Intensity spectra window_sizes : list, array-like, optional List of boxcar window sizes to try. If None, automatically uses powers of 2 as window sizes. Returns ------- l : int Left bound r : int Right bound metadata : dict Dictionary with metadata related to peak-finding. Contains peak SNR corresponding to the "best" boxcar filter. """ if window_sizes is None: p = int(np.log2(len(spec))) window_sizes = [2**x for x in range(p)] max_found = {'l': 0, 'r': len(spec), 'snr': 0} for window in window_sizes: corr = np.correlate(spec, np.ones(window), mode='valid') / window clipped_corr = sigma_clip(corr) m, s = np.mean(clipped_corr), np.std(clipped_corr) peak_snr = (np.max(corr) - m) / s peak_idx = np.argmax(corr) l = peak_idx r = peak_idx + window if peak_snr > max_found['snr']: max_found['l'] = l max_found['r'] = r max_found['snr'] = peak_snr metadata = { 'peak_snr': max_found['snr'] } return max_found['l'], max_found['r'], metadata