Source code for analysis.load

"""
Load data (from .rhd, .txt, .wav, etc)
"""
import numpy as np


[docs]def read_not_mat(notmat, unit="ms"): """ Read from .not.mat files generated from uisonganal.m Parameters ---------- notmat : path Name of the .not.mat file (path) unit : {'ms', 'second'} timescale Returns ------- onsets : np.ndarray time stamp for syllable onset (in ms) offsets : np.ndarray time stamp for syllable offset (in ms) intervals : np.ndarray temporal interval between syllables (i.e. syllable gaps) (in ms) durations : np.ndarray durations of each syllable (in ms) syllables : str song syllables contexts : str social context ('U' for undirected and 'D' for directed) """ import scipy.io onsets = scipy.io.loadmat(notmat)["onsets"].transpose()[ 0 ] # syllable onset timestamp offsets = scipy.io.loadmat(notmat)["offsets"].transpose()[ 0 ] # syllable offset timestamp intervals = onsets[1:] - offsets[:-1] # syllable gap durations (interval) durations = offsets - onsets # duration of each syllable syllables = scipy.io.loadmat(notmat)["syllables"][0] # Load the syllable info contexts = ( notmat.name.split(".")[0].split("_")[-1][0].upper() ) # extract 'U' (undirected) or 'D' (directed) from the file name if contexts not in ["U", "D"]: # if the file was not tagged with Undir or Dir contexts = None # units are in ms by default, but convert to second with the argument if unit == "second": onsets /= 1e3 offsets /= 1e3 intervals /= 1e3 durations /= 1e3 return onsets, offsets, intervals, durations, syllables, contexts
[docs]def read_spk_txt(spk_txt_file, *unit_nb, time_unit="second"): """ Read the output .txt from the Offline Sorter. column header of the input .txt -> ['Channel', 'Unit', 'Timestamp'] disregard the first column since it is always 1 column 3 to 35 stores waveforms Parameters ---------- spk_txt_file : str Name of the spk txt file unit_nb : int Number of the sorted unit. If not specified (default), it will read data from all recorded units. time_unit : Returns ------- spk_ts : np.ndarray Spike timestamps spk_waveform : np.ndarray Spike waveform (spk id x waveform) nb_spk : int Number of spikes """ spk_info = np.loadtxt(spk_txt_file, delimiter="\t", skiprows=1) # skip header # Select only the unit (there could be multiple isolated units in the same file) if unit_nb: # if the unit number is specified spk_info = spk_info[spk_info[:, 1] == unit_nb, :] spk_ts = spk_info[:, 2] # analysis time stamps spk_waveform = spk_info[:, 3:] # analysis waveform nb_spk = spk_waveform.shape[0] # total number of spikes # units are in second by default, but convert to millisecond with the argument if time_unit == "ms": spk_ts *= 1e3 return spk_ts, spk_waveform, nb_spk
[docs]def read_rhd(filename): """ Reads Intan Technologies RHD2000 data file generated by evaluation board GUI. Data are returned in a dictionary, for future extensibility. """ from ..utils.intan.load_intan_rhd_format import read_rhd as _read_rhd intan = _read_rhd(filename) return intan
[docs]def load_song(data_path, format="wav") -> dict: """ Obtain event info & serialized timestamps for song & neural analysis Search all files in the sub-directory and read from the associated .not.mat files to add the info into a single files Parameters ---------- data_path : path format : str file extension (e.g., '.wav') """ from scipy.io import wavfile from ..analysis.functions import demarcate_bout from ..utils.functions import list_files # List all audio files in the dir if not data_path.stem == "Songs": song_dir = [ folder for folder in data_path.rglob("Songs") ] # find the folder that has song data (not calls) else: song_dir = [data_path] audio_files = [] for data_dir in song_dir: audio_files += list_files(data_dir, format) # Initialize timestamp_serialized = np.array([], dtype=np.float32) # Store values in these lists file_list = [] file_start_list = [] file_end_list = [] onset_list = [] offset_list = [] duration_list = [] syllable_list = [] context_list = [] # Loop through Intan .rhd files for file in audio_files: # Load audio files print("Loading... " + file.stem) sample_rate, data = wavfile.read(file) # note that the timestamp is in second length = data.shape[0] / sample_rate timestamp = ( np.linspace(0.0, length, data.shape[0]) * 1e3 ) # start from t = 0 in ms # Load the .not.mat file notmat_file = file.with_suffix(".wav.not.mat") onsets, offsets, intervals, durations, syllables, contexts = read_not_mat( notmat_file, unit="ms" ) start_ind = timestamp_serialized.size # start of the file if timestamp_serialized.size: timestamp += timestamp_serialized[-1] + (1 / sample_rate) timestamp_serialized = np.append(timestamp_serialized, timestamp) # File information (name, start & end timestamp of each file) # file_list.append(os.path.relpath(file, ProjectLoader().path)) file_list.append(file.stem) file_start_list.append(timestamp_serialized[start_ind]) # in ms file_end_list.append(timestamp_serialized[-1]) # in ms onsets += timestamp[0] offsets += timestamp[0] # Demarcate song bouts onset_list.append(demarcate_bout(onsets, intervals)) offset_list.append(demarcate_bout(offsets, intervals)) duration_list.append(demarcate_bout(durations, intervals)) syllable_list.append(demarcate_bout(syllables, intervals)) context_list.append(contexts) # Organize event-related info into a single dictionary object song_info = { "files": file_list, "file_start": file_start_list, "file_end": file_end_list, "onsets": onset_list, "offsets": offset_list, "durations": duration_list, "syllables": syllable_list, "contexts": context_list, } return song_info
[docs]def load_audio(data_path, format="wav") -> dict: """ Load and concatenate all audio files (e.g., .wav) in the input dir (path) Parameters ---------- data_path : path format : str file extension (e.g., '.wav') Returns ------- audio_info : dict """ from scipy.io import wavfile from ..utils.functions import list_files # List all audio files in the dir files = list_files(data_path, format) # Initialize timestamp_concat = np.array([], dtype=np.float64) data_concat = np.array([], dtype=np.float64) # Store values in these lists file_list = [] # Loop through audio files for file in files: # Load data file print("Loading... " + file.stem) sample_rate, data = wavfile.read(file) # note that the timestamp is in second # Add timestamp info data_concat = np.append(data_concat, data) # Store results file_list.append(file.name) # Create timestamps timestamp_concat = ( np.arange(0, data_concat.shape[0] / sample_rate, (1 / sample_rate)) * 1e3 ) # Organize data into a dictionary audio_info = { "files": file_list, "timestamp": timestamp_concat, "data": data_concat, "sample_rate": sample_rate, } file_name = data_path / "AudioData.npy" np.save(file_name, audio_info) return audio_info