"""
Load data (from .rhd, .txt, .wav, etc)
"""
import numpy as np
[docs]def read_not_mat(notmat, unit="ms"):
"""
Read from .not.mat files generated from uisonganal.m
Parameters
----------
notmat : path
Name of the .not.mat file (path)
unit : {'ms', 'second'}
timescale
Returns
-------
onsets : np.ndarray
time stamp for syllable onset (in ms)
offsets : np.ndarray
time stamp for syllable offset (in ms)
intervals : np.ndarray
temporal interval between syllables (i.e. syllable gaps) (in ms)
durations : np.ndarray
durations of each syllable (in ms)
syllables : str
song syllables
contexts : str
social context ('U' for undirected and 'D' for directed)
"""
import scipy.io
onsets = scipy.io.loadmat(notmat)["onsets"].transpose()[
0
] # syllable onset timestamp
offsets = scipy.io.loadmat(notmat)["offsets"].transpose()[
0
] # syllable offset timestamp
intervals = onsets[1:] - offsets[:-1] # syllable gap durations (interval)
durations = offsets - onsets # duration of each syllable
syllables = scipy.io.loadmat(notmat)["syllables"][0] # Load the syllable info
contexts = (
notmat.name.split(".")[0].split("_")[-1][0].upper()
) # extract 'U' (undirected) or 'D' (directed) from the file name
if contexts not in ["U", "D"]: # if the file was not tagged with Undir or Dir
contexts = None
# units are in ms by default, but convert to second with the argument
if unit == "second":
onsets /= 1e3
offsets /= 1e3
intervals /= 1e3
durations /= 1e3
return onsets, offsets, intervals, durations, syllables, contexts
[docs]def read_spk_txt(spk_txt_file, *unit_nb, time_unit="second"):
"""
Read the output .txt from the Offline Sorter.
column header of the input .txt -> ['Channel', 'Unit', 'Timestamp']
disregard the first column since it is always 1
column 3 to 35 stores waveforms
Parameters
----------
spk_txt_file : str
Name of the spk txt file
unit_nb : int
Number of the sorted unit. If not specified (default), it will read data from all recorded units.
time_unit :
Returns
-------
spk_ts : np.ndarray
Spike timestamps
spk_waveform : np.ndarray
Spike waveform (spk id x waveform)
nb_spk : int
Number of spikes
"""
spk_info = np.loadtxt(spk_txt_file, delimiter="\t", skiprows=1) # skip header
# Select only the unit (there could be multiple isolated units in the same file)
if unit_nb: # if the unit number is specified
spk_info = spk_info[spk_info[:, 1] == unit_nb, :]
spk_ts = spk_info[:, 2] # analysis time stamps
spk_waveform = spk_info[:, 3:] # analysis waveform
nb_spk = spk_waveform.shape[0] # total number of spikes
# units are in second by default, but convert to millisecond with the argument
if time_unit == "ms":
spk_ts *= 1e3
return spk_ts, spk_waveform, nb_spk
[docs]def read_rhd(filename):
"""
Reads Intan Technologies RHD2000 data file generated by evaluation board GUI.
Data are returned in a dictionary, for future extensibility.
"""
from ..utils.intan.load_intan_rhd_format import read_rhd as _read_rhd
intan = _read_rhd(filename)
return intan
[docs]def load_song(data_path, format="wav") -> dict:
"""
Obtain event info & serialized timestamps for song & neural analysis
Search all files in the sub-directory and read from the associated .not.mat files to add the info into a single files
Parameters
----------
data_path : path
format : str
file extension (e.g., '.wav')
"""
from scipy.io import wavfile
from ..analysis.functions import demarcate_bout
from ..utils.functions import list_files
# List all audio files in the dir
if not data_path.stem == "Songs":
song_dir = [
folder for folder in data_path.rglob("Songs")
] # find the folder that has song data (not calls)
else:
song_dir = [data_path]
audio_files = []
for data_dir in song_dir:
audio_files += list_files(data_dir, format)
# Initialize
timestamp_serialized = np.array([], dtype=np.float32)
# Store values in these lists
file_list = []
file_start_list = []
file_end_list = []
onset_list = []
offset_list = []
duration_list = []
syllable_list = []
context_list = []
# Loop through Intan .rhd files
for file in audio_files:
# Load audio files
print("Loading... " + file.stem)
sample_rate, data = wavfile.read(file) # note that the timestamp is in second
length = data.shape[0] / sample_rate
timestamp = (
np.linspace(0.0, length, data.shape[0]) * 1e3
) # start from t = 0 in ms
# Load the .not.mat file
notmat_file = file.with_suffix(".wav.not.mat")
onsets, offsets, intervals, durations, syllables, contexts = read_not_mat(
notmat_file, unit="ms"
)
start_ind = timestamp_serialized.size # start of the file
if timestamp_serialized.size:
timestamp += timestamp_serialized[-1] + (1 / sample_rate)
timestamp_serialized = np.append(timestamp_serialized, timestamp)
# File information (name, start & end timestamp of each file)
# file_list.append(os.path.relpath(file, ProjectLoader().path))
file_list.append(file.stem)
file_start_list.append(timestamp_serialized[start_ind]) # in ms
file_end_list.append(timestamp_serialized[-1]) # in ms
onsets += timestamp[0]
offsets += timestamp[0]
# Demarcate song bouts
onset_list.append(demarcate_bout(onsets, intervals))
offset_list.append(demarcate_bout(offsets, intervals))
duration_list.append(demarcate_bout(durations, intervals))
syllable_list.append(demarcate_bout(syllables, intervals))
context_list.append(contexts)
# Organize event-related info into a single dictionary object
song_info = {
"files": file_list,
"file_start": file_start_list,
"file_end": file_end_list,
"onsets": onset_list,
"offsets": offset_list,
"durations": duration_list,
"syllables": syllable_list,
"contexts": context_list,
}
return song_info
[docs]def load_audio(data_path, format="wav") -> dict:
"""
Load and concatenate all audio files (e.g., .wav) in the input dir (path)
Parameters
----------
data_path : path
format : str
file extension (e.g., '.wav')
Returns
-------
audio_info : dict
"""
from scipy.io import wavfile
from ..utils.functions import list_files
# List all audio files in the dir
files = list_files(data_path, format)
# Initialize
timestamp_concat = np.array([], dtype=np.float64)
data_concat = np.array([], dtype=np.float64)
# Store values in these lists
file_list = []
# Loop through audio files
for file in files:
# Load data file
print("Loading... " + file.stem)
sample_rate, data = wavfile.read(file) # note that the timestamp is in second
# Add timestamp info
data_concat = np.append(data_concat, data)
# Store results
file_list.append(file.name)
# Create timestamps
timestamp_concat = (
np.arange(0, data_concat.shape[0] / sample_rate, (1 / sample_rate)) * 1e3
)
# Organize data into a dictionary
audio_info = {
"files": file_list,
"timestamp": timestamp_concat,
"data": data_concat,
"sample_rate": sample_rate,
}
file_name = data_path / "AudioData.npy"
np.save(file_name, audio_info)
return audio_info