%% SCRIPT DESCRIPTION % This script features the main preprocessing and cleaning of the data before the ICA process begin. Data is flagged for high standard deveiavtion % by channel to remove comically bad channels. The data is then rereferenced to the average site, and high/low pass filtered. % Channel neighbor correlations are then calculated to find odd channels and bridged channels. The outermost channel is also flagged to reduce the rank of the data. % After another rereference channel correlations are calculated by time epoch and flagged. The file is saved containing the, marks structure, then saved again % with all of the mentioned flags puregd from the data in order to prime it for ICA. % %% From Config key_strswap Description %----------------------------------------------------------------------------------------------------------------------------- % in_path = [in_path] Relative path to input data files assuming cd = work_path % montage_info = [montage_info] Montage information to fit channel locations on a standard surface. % This can be a transformations matrix (e.g. [-.14,-22,-48,-.07,0,-1.57,1080,1260,1240]) % ... or a a file containing a transformation matrix (e.g. [batch_dfn]_transmat.mat). % ... or a BIDS channel location tsv file (assuming that during BIDS init the channels were coregistered to the standard surface). % staging_script = [staging_script] Script file path/name that creates marks based on events in the raw data file. % recur_sec = [recur_sec] Recurrence (sec) for artifact detection epoching (e.g. .5, Default 1) % limit_sec = [limit_sec] Limits (sec) for artifact detection epoching (e.g. [-.5 0], Default [0 recur_sec]) % sd_t_meth = [sd_t_meth] Method used for flagging epochs (e.g. 'q' (quantiles), or 'na' (default)) % sd_t_vals = [sd_t_vals] Percentage trim for confidence intervals during epoch standard deviation criteria (e.g. [.3 .7]) % sd_t_o = [sd_t_o] z threshold for flagging epochs during standard deviation criteria (e.g. 6) % sd_t_f_meth = [sd_t_f_meth] Fixed method used for flagging epochs (should be 'fixed') % sd_t_f_vals = [sd_t_f_vals] Percentage trim for confidence intervals during epoch standard deviation criteria (e.g. [.3 .7], leave empty for 'fixed') % sd_t_f_o = [sd_t_f_o] z threshold for flagging epochs during fixed standard deviation criteria (e.g. .2) % sd_t_pad = [sd_t_pad] Number of windows to pad onto each side of the ch_sd time flag % sd_ch_meth = [sd_ch_meth] Method used for flagging channels (e.g. 'q' (quantiles), or 'na' (default)) % sd_ch_vals = [sd_ch_vals] Percentage trim for confidence intervals during channel standard deviation criteria (e.g. [.3 .7]) % sd_ch_o = [sd_ch_o] z threshold for flagging channels during standard deviation criteria (e.g. 6) % sd_ch_f_meth = [sd_ch_f_meth] Fixed method used for flagging channels (should be 'fixed') % sd_ch_f_vals = [sd_ch_f_vals] Percentage trim for confidence intervals during channel standard deviation criteria (e.g. [.3 .7], leave empty for 'fixed') % sd_ch_f_o = [sd_ch_f_o] z threshold for flagging channel during fixed standard deviation criteria (e.g. 6) % ref_loc_file = [ref_loc_file] Name of file containing the reference locations (including the relative path) % low_bound_hz = [low_bound_hz] Lower bound of the filter bass-band % high_bound_hz = [high_bound_hz] Upper bound of the filter bass-band % save_f_res = [save_f_res] 1 if you want to save the filter residuals (Default: 1) % n_nbr_ch = [n_nbr_ch] Number of channels to use in nearest neighbour r calculation (for channel criteria) % r_ch_meth = [r_ch_meth] Method used for flagging low r channels (e.g. 'q' (quantiles), or 'na' (default)) % r_ch_vals = [r_ch_vals] Percentage trim for confidence intervals during low r channel standard deviation criteria (e.g. [.3 .7]) % r_ch_o = [r_ch_o] z threshold for flagging channel during neighbour r criteria (e.g. 3) % r_ch_f_meth = [r_ch_f_meth] Fixed method used for flagging low r channels (should be 'fixed') % r_ch_f_vals = [r_ch_f_vals] Percentage trim for confidence intervals during low r channel standard deviation criteria (e.g. [.3 .7], leave empty for 'fixed') % r_ch_f_o = [r_ch_f_o] z threshold for flagging channel during fixed neighbour r criteria (e.g. 3) % bridge_trim = [bridge_trim] Percentage trim for z calculation of bridged channels (e.g. 40 = 20% top and bottom) % bridge_z = [bridge_z] z threshold for flagging channel during bridging criteria (e.g. 8) % n_nbr_t = [n_nbr_t] Number of channels to use in nearest neighbour r calculation (for epoch criteria) % r_t_meth = [r_t_meth] Method used for flagging low r epochs (e.g. 'q' (quantiles), or 'na' (default)) % r_t_vals = [r_t_vals] Percentage trim for confidence intervals during low r epoch standard deviation criteria (e.g. [.3 .7]) % r_t_o = [r_t_o] z threshold for flagging epochs during neighbour r criteria (e.g. 3) % r_t_f_meth = [r_t_f_meth] Fixed method used for flagging low r epochs (should be 'fixed') % r_t_f_vals = [r_t_f_vals] Percentage trim for confidence intervals during low r epoch standard deviation criteria (e.g. [.3 .7], leave empty for 'fixed') % r_t_f_o = [r_t_f_o] z threshold for flagging epochs during fixed neighbour r criteria (e.g. 3) % min_gap_ms = [min_gap_ms] Minimum time (ms) to allow between periods marked for rejection % out_path = [out_path] Relative path to output data files assuming cd = work_path % amica_param_file = [amica_param_file] template amicadefs.param file to modify % amica_threads_s02 = [amica_threads_s02] number of threads to use for running s02 amica script (Default: 8) %% CHECK FOR OUTPUT PATH AND CRETAE IF NECESSARY if ~exist('[out_path]/[batch_dfn,/,-1]','dir'); disp('Making directory [out_path]/[batch_dfn,/,-1]'); mkdir [out_path]/[batch_dfn,/,-1] end %% LOAD DATASET %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Check if it is a .set file logging_log('NOTICE', 'Starting scalpart'); [path name ext] = fileparts('[batch_dfn]'); if ~strcmp(ext,'.set') logging_log('ERROR', sprintf('Wrong file format: %s should be .set',ext)); exit(1); end %Load the .set file (this will need to be BIDS data file format compliant) logging_log('INFO', 'Loading set file: [batch_dfn]...'); EEG = pop_loadset('filepath','[batch_dfp]','filename','[batch_dfn]'); EEG = eeg_checkset( EEG ); if ~isfield(EEG,'marks'); if isempty(EEG.icaweights) EEG.marks=marks_init(size(EEG.data)); else EEG.marks=marks_init(size(EEG.data),min(size(EEG.icaweights))); end end EEG.marks = marks_add_label(EEG.marks,'time_info', ... {'init_ind',[0,0,1],[1:EEG.pnts]}); %execute the staging script if specified. if exist('[staging_script]'); [ssp,ssn,sse]=fileparts('[staging_script]'); addpath(ssp); eval(ssn); end %add channel coregistration procedure here. %warp locations to standard head surface: if ~isempty('[montage_info]'); if isempty(str2num('[montage_info]')); %if it is a BIDS channel tsv, load the tsv,sd_t_f_vals %else read the file that is assumed to be a transformation matrix. else EEG = warp_locs( EEG,'[ref_loc_file]', ... 'transform',[[montage_info]], ... 'manual','off'); end end % Apply average re-reference %chan_inds = marks_label2index(EEG.marks.chan_info,{'manual'},'indexes','invert','on'); %chan_m = mean(EEG.data(chan_inds,:),1); %chan_m_mat = repmat(chan_m,size(EEG.data,1),1); %EEG.data = EEG.data - chan_m_mat; %clear chan_m_mat; % Window the continuous data logging_log('INFO', 'Windowing the continous data...'); EEG = marks_continuous2epochs(EEG,'recurrence',[[recur_sec]],'limits',[[limit_sec]]); logging_log('INFO', 'LOADED DATASET...'); % rereference to selected channels.. chan_inds = marks_label2index(EEG.marks.chan_info,{'manual'},'indexes','invert','on'); epoch_inds = marks_label2index(EEG.marks.time_info,{'manual'},'indexes','invert','on'); [EEG,trim_ch_sd]=chan_variance(EEG,'data_field','data', ... 'chan_inds',chan_inds, ... 'epoch_inds',epoch_inds, ... 'plot_figs','off'); chan_dist=zeros(size(trim_ch_sd)); for i=1:size(trim_ch_sd,2); chan_dist(:,i)=(trim_ch_sd(:,i)-median(trim_ch_sd(:,i)))/diff(quantile(trim_ch_sd(:,i),[.3,.7])); end mean_chan_dist=mean(chan_dist,2); m=median(mean_chan_dist); q=quantile(mean_chan_dist,[.3,.7]); refchans=find(mean_chan_distve_trimmean(msr,[bridge_trim],1)+ve_trimstd(msr,[bridge_trim],1)*[bridge_z]); logging_log('INFO', 'IDENTIFIED BRIDGED CHANNELS...'); % Edit the channel flag info structure logging_log('INFO', 'Updating chflaginfo structure...'); lnkflags = zeros(EEG.nbchan,1); lnkflags(chan_inds(flag_b_chan_inds))=1; EEG.marks = marks_add_label(EEG.marks,'chan_info', ... {'bridge',[.7,1,.7],[.2,1,.2],-1,lnkflags}); logging_log('INFO', 'EDITED CHANFLAGINFO STRUCT...'); % Combine low_rand bridge chan_info flags into 'manual'... EEG = pop_marks_merge_labels(EEG,'chan_info',{'low_r','bridge'},'target_label','manual'); logging_log('INFO', 'COMBINED MARKS STRUCTURE INTO MANUAL FLAGS...'); %% FLAG RANK CHAN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Flags the channel that is the least unique (the best channel to remove prior % to ICA in order to account for the rereference rank deficiency. logging_log('INFO', 'Updating chflaginfo structure...'); chan_inds = marks_label2index(EEG.marks.chan_info,{'manual'},'indexes','invert','on'); [r_max,rank_ind] = max(data_r_ch(chan_inds)); rankflags = zeros(EEG.nbchan,1); rankflags(chan_inds(rank_ind))=1; EEG.marks = marks_add_label(EEG.marks,'chan_info', ... {'rank',[.1,.1,.24],[.1,.1,.24],-1,rankflags}); logging_log('INFO', 'EDITED CHANFLAGINFO STRUCT...'); %% REREFERENCE TO INTERPOLATED AVERAGE SITE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Rereference the data to an average interpolated site containined 19 channels logging_log('INFO', 'Rereferencing to averaged interpolated site...'); chan_inds = marks_label2index(EEG.marks.chan_info,{'manual'},'indexes','invert','on'); EEG = interp_ref(EEG,'[ref_loc_file]','chans',chan_inds); EEG = eeg_checkset(EEG); logging_log('INFO', 'REREFERENCED TO INTERPOLATED AVERAGE SITE...'); %% CALCULATE NEAREST NEIGHBOUR R VALUES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Similarly to the neighbor r calculation done between channels this section looks % at the correlation, but between all channels and for epochs of time. Time segmenents % are flagged for removal. logging_log('INFO', 'Calculating nearest neighbour r array for window criteria...'); chan_inds = marks_label2index(EEG.marks.chan_info,{'manual'},'indexes','invert','on'); epoch_inds = marks_label2index(EEG.marks.time_info,{'manual'},'indexes','invert','on'); [EEG,data_r_t,~,~,~] = chan_neighbour_r(EEG, ... [n_nbr_t],'max', ... 'chan_inds',chan_inds, ... 'epoch_inds',epoch_inds, ... 'plotfigs','off'); logging_log('INFO', 'CALCULATED NEAREST NEIGHBOUR R VALUES...'); % Create the window criteria vector for flagging low_r time_info... logging_log('INFO', 'Assessing epoch r distributions criteria...') [~,flag_r_t_inds]=marks_array2flags(data_r_t, ... 'flag_dim','col', ... 'init_method','[r_t_meth]', ... 'init_vals',[r_t_vals], ... 'init_dir','neg', ... 'init_crit',[r_t_o], ... 'flag_method','[r_t_f_meth]', ... 'flag_vals',[ [r_t_f_vals] ], ... 'flag_crit',[r_t_f_o], ... 'plot_figs','off'); logging_log('INFO', 'CREATED EPOCH CRITERIA VECTOR...'); % Edit the time flag info structure logging_log('INFO', 'Updating latflaginfo structure...'); lowr_epoch_flags = zeros(size(EEG.data(1,:,:))); lowr_epoch_flags(1,:,epoch_inds(flag_r_t_inds))=1; EEG.marks = marks_add_label(EEG.marks,'time_info', ... {'low_r',[0,1,0],lowr_epoch_flags}); clear lowr_epoch_flags; logging_log('INFO', 'TIME TO: UPDATE REJECTION STRUCTURE...'); % Combine low_r time_info flags into 'manual'... EEG = pop_marks_merge_labels(EEG,'time_info',{'low_r'},'target_label','manual'); logging_log('INFO', 'COMBINED MARKS STRUCTURE INTO MANUAL FLAGS...'); % Concatenate epoched data back to continuous data logging_log('INFO', 'Concatenating windowed data...'); EEG = marks_epochs2continuous(EEG); EEG = eeg_checkset(EEG,'eventconsistency'); logging_log('INFO', 'CONCATENATED THE WINDOWED DATA INTO CONTINUOUS DATA...'); EEG=pop_marks_flag_gap(EEG,{'manual'},[min_gap_ms],'mark_gap',[.8,.8,.8],'offsets',[0 0],'ref_point','both'); % Combine mark_gap time_info flags into 'manual'... EEG = pop_marks_merge_labels(EEG,'time_info',{'mark_gap'},'target_label','manual'); logging_log('INFO', 'COMBINED MARKS STRUCTURE INTO MANUAL FLAGS...'); %%%----------------------------------------------------------------------- %% SAVE sa.set FILE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% logging_log('INFO', 'Saving file: [batch_dfn,_,-1]_sa.set...'); if ~exist('[out_path]/[batch_dfn,/,-1]','dir'); mkdir('[out_path]/[batch_dfn,/,-1]'); end EEG = pop_saveset( EEG, 'filename','[out_path]/[batch_dfn,_,-1]_sa.set'); logging_log('INFO', 'SAVED sa FILE...'); %%%----------------------------------------------------------------------- %% PREPARE FILE FOR AMICA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Purges the flagged data to make sure it is primed for ICA and creates the amica paramaeter file. % Removed flagged channels and time segments sprintf('%s','Purging flagged channels...\n'); EEG = pop_marks_select_data(EEG,'channel marks',[],'labels',{'manual','rank'},'remove','on'); EEG = pop_marks_select_data(EEG,'time marks',[],'labels',{'manual'},'remove','on'); EEG = eeg_checkset(EEG); logging_log('INFO', 'TIME TO: PURGE DATA...'); % Make sure EEG.data is doubles for use in amica EEG.data = double(EEG.data); % Save the SA purge file logging_log('INFO',sprintf('%s','Saving file: [out_path]/[batch_dfn,_,-1]_sa_purge.set...')); EEG = pop_saveset( EEG, 'filename','[out_path]/[batch_dfn,_,-1]_sa_purge.set'); % Save diagnostic arrays try OCTAVE_VERSION; save('-mat7-binary','[out_path]/[batch_dfn,_,-1]_data_sd_t.mat','data_sd_t'); save('-mat7-binary','[out_path]/[batch_dfn,_,-1]_data_sd_ch.mat','data_sd_ch'); save('-mat7-binary','[out_path]/[batch_dfn,_,-1]_data_r_ch.mat','data_r_ch'); save('-mat7-binary','[out_path]/[batch_dfn,_,-1]_data_r_t.mat','data_r_t'); catch save('[out_path]/[batch_dfn,_,-1]_data_sd_t.mat','data_sd_t'); save('[out_path]/[batch_dfn,_,-1]_data_sd_ch.mat','data_sd_ch'); save('[out_path]/[batch_dfn,_,-1]_data_r_ch.mat','data_r_ch'); save('[out_path]/[batch_dfn,_,-1]_data_r_t.mat','data_r_t'); end % Save the AMICA parameter file swapstr = file_strswap('[amica_param_file]', ... '[repstr_fdt_fname]','[out_path]/[batch_dfn,_,-1]_sa_purge.fdt', ... '[repstr_outpath]','[out_path]/[batch_dfn,_,-1]_amicaout_init', ... '[repstr_nbchan]', num2str(EEG.nbchan), ... '[repstr_pnts]', sprintf('%12.0f',EEG.pnts), ... '[repstr_tpp]', '[amica_threads_s02]', ... '[repstr_pca]', num2str(EEG.nbchan)); fidparam = fopen('[out_path]/[batch_dfn,_,-1]_init.param','w'); fprintf(fidparam,'%s',swapstr); fclose(fidparam); % CREATE THE AMICA OUTPUT FOLDER %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Needed for MPI jobs on cluster only in order to prevent a race effect bug. [~] = rmdir('[out_path]/[batch_dfn,_,-1]_amicaout_init','s'); mkdir [out_path]/[batch_dfn,_,-1]_amicaout_init; logging_log('INFO', 'Created Amica Output Folder'); logging_log('INFO', 'Scalpart complete'); logging_log('INFO', 'Scheduler: [scheduler]'); print_chan_sample(EEG);