Commit 26fa1d2f authored by Brad Kennedy's avatar Brad Kennedy
Browse files

sbatch support, more robust config parsing, algorithmic setting of memory and timelimits

parent 0199ba09
......@@ -101,78 +101,7 @@ for i=1:length(batchconfig);
'Type', PropertyType('cellstr', 'column'), ...
'Category', ['Level ',num2str(i),' - ',batchconfig(i).file_name], ...
'DisplayName', ['qsub_options'], ...
'Description', ['(-r|-W)' char(9) 'timelimit (THIS IS REQUIRED FOR EVERY JOB)' char(10) ...]) ...
char(9) 'provide a runtime limit (elapsed, wallclock time, not summed' char(10) ...
char(9) 'across cpus) specified in any of the following forms:' char(10) ...
char(9) '15' char(9) '(assumed to be minutes)' char(10) ...
char(9) '15m' char(9) '(same)' char(10) ...
char(9) '.25h' char(9) '(same)' char(10) ...
char(9) '2.5h' char(9) '(2 hours 30 minutes)' char(10) ...
char(9) '3.5d' char(9) '(3 days 12 hours)' char(10) ...
char(9) '84:0' char(9) '(same, in LSF''s hours:minutes format)' char(10) ...
char(10) ...
'-i ifile' char(9) 'job reads inputs from ''ifile'' (no default)' char(10) ...
'-o ofile' char(9) 'job output to ''ofile'' (REQUIRED FOR EVERY JOB)' char(10) ...
'-e efile' char(9) 'job errors go to ''efile'' (default: same as -o)' char(10) ...
'' char(10) ...
'-t|--test' char(9) '''test'' mode: short but immediate (preemptive)' char(10) ...
'-q queue' char(9) 'queue name (serial, threaded, mpi; default serial)' char(10) ...
'-f flag' char(9) 'specify certain flags to modify behavior. flags include:' char(10) ...
char(9) 'mpi, interactive, test, mail, permitcoredump' char(10) ...
char(10) ...
'-n ncpus' char(9) 'require n cpus or cores (default 1)' char(10) ...
'-N nnodes' char(9) 'require n nodes (does not imply exclusive use)' char(10) ...
char(10) ...
'--ppn=ppn' char(9) 'start ppn proceses per node' char(10) ...
'--tpp=tpp' char(9) 'permit tpp threads per process (OMP_NUM_THREADS)' char(10) ...
'--gpp=gpp' char(9) 'allocate gpp gpus per process' char(10) ...
char(10) ...
'--mpp=' char(10) ...
'--memperproc=' char(10) char(9) 'amount of memory required by each process. may be specified' char(10) ...
char(9) 'like 64M or 2.5G (M=2^20, G=2^30). for an MPI job, this is ' char(10) ...
char(9) 'the per-rank size. for threaded jobs, it''s the process size,' char(10) ...
char(9) '(that is, not per-thread.)' char(10) ...
char(10) ...
'--nodes=clu[1-4]' char(10) ...
char(9) 'require a specific set of nodes. eg wha[1-4] or' char(10) ...
char(9) 'req666.' char(10) ...
char(10) ...
'--pack' char(9) 'require a minimal number of nodes, so processes occupy' char(10) ...
char(9) 'all cpus per node.' char(10) ...
char(10) ...
'--mail-start' char(9) 'notify when the job starts.' char(10) ...
'--mail-end' char(9) 'notify when the job ends (either normally or not).' char(10) ...
'--mail-abort' char(9) 'notify when the job ends abnormally.' char(10) ...
'-m|--mail' char(9) '(compatibility - same as mail-end)' char(10) ...
char(9) 'this email only goes to your account''s email address.' char(10) ...
char(10) ...
'-w|--waitfor=jobid[,jobid...]]' char(10) ...
char(9) 'wait for a list of jobs to complete' char(10) ...
char(10) ...
'-j|--jobname' char(9) 'provides a name for the job.' char(10) ...
char(10) ...
'--project' char(9) 'specify a project (group) for accounting purposes.' char(10) ...
char(9) 'defaults to the user''s group. may also be given via' char(10) ...
char(9) 'SQ_PROJECT environment variable.' char(10) ...
char(10) ...
'--idfile=fname' char(10) char(9) 'write the jobid into a file named ''fname''.' char(10) ...
char(10) ...
'--nompirun' char(9) 'don''t automatically invoke mpirun for mpi jobs.' char(10) ...
char(9) 'note that you should probably look at mpirun parameters' char(10) ...
char(9) 'sqsub uses, so that you get layout and binding right.' char(10) ...
char(10) ...
'-f flag' char(9) 'specify certain flags to modify behavior.' char(10) ...
char(9) 'Universal flags include: mpi, threaded, test, mail' char(10) ...
char(9) 'on some clusters, other flags have added meaning, such ' char(10) ...
char(9) 'xeon/opteron on Hound, and dual/quad on Goblin and ' char(10) ...
char(9) 'selecting sub-clusters on Kraken (bal/bru/dol/meg/tig/wha/nar)' char(10) ...
char(10) ...
'-h or --help' char(9) 'show brief usage message' char(10) ...
'--man' char(9) 'show man page' char(10) ...
char(10) ...
'-v|--verbose' char(9) 'verbose mode: shows debugging-type details' char(10) ...
'-d|--debug' char(9) 'debug mode: don''t actually submit, but show the command' char(10) ...
]) ...
'Description', 'TODO') ...
PropertyGridField(['qsub[',num2str(i),'].memory'], batchconfig(i).memory, ...
'Type', PropertyType('char', 'row'), ...
'Category', ['Level ',num2str(i),' - ',batchconfig(i).file_name], ...
......
......@@ -5,10 +5,13 @@ function out = config_parse(in)
scan = scan(~cellfun(@isempty, strtrim(scan)));
% Get first non space
indent_vals = cellfun(@max, strfind(scan, ' '), 'UniformOutput', false);
% Adjust indexes
indent_vals = cellfun(@sub_one_or_zero, indent_vals, 'UniformOutput', false);
indent_vals = cellfun(@max, strfind(scan, sprintf('\t')), 'UniformOutput', false);
indent_vals_space = cellfun(@max, strfind(scan, sprintf(' ')), 'UniformOutput', false);
for i=1:numel(indent_vals)
indent_vals{i} = ~isempty(indent_vals{i}) || ~isempty(indent_vals_space{i});
end
scan = strtrim(scan);
......@@ -32,11 +35,5 @@ function out = config_parse(in)
out = t;
end
function x = sub_one_or_zero(x)
if isempty(x)
x = 0;
end
end
function out = config_parse_multilevel(in)
% in is a string or maybe a file descriptor
scan = regexp(in, '[\n]+', 'split');
scan = scan(~cellfun(@isempty, strtrim(scan)));
% Get first non space
% TODO(brad) both spaces and tabs
indent_vals = cellfun(@max, strfind(scan, sprintf('\t')), 'UniformOutput', false);
%indent_vals = cellfun(@max, strfind(scan, sprintf(' ')), 'UniformOutput', false);
% Adjust indexes
indent_vals = cellfun(@sub_one_or_zero, indent_vals, 'UniformOutput', false);
scan = strtrim(scan);
t = tree_new();
[t, ~] = t.add(t, 1, scan{1});
parents_nodes = [1, 2];
parents_lines = [1];
for i=2:numel(indent_vals)
% Indent increased
while ~isempty(parents_lines) ...
&& indent_vals{parents_lines(end)} >= indent_vals{i}
parents_lines = parents_lines(1:end-1);
parents_nodes = parents_nodes(1:end-1);
end
[t, node] = t.add(t, parents_nodes(end), scan{i});
parents_nodes(end+1) = node;
parents_lines(end+1) = i;
end
out = t;
end
function x = sub_one_or_zero(x)
if isempty(x)
x = 0;
end
end
......@@ -32,7 +32,7 @@
%write to the Free Software Foundation, Inc., 59 Temple Place,
%Suite 330, Boston, MA 02111-1307 USA
function batch_config=init_batch_config
function batch_config=init_batch_config()
batch_config.file_name='';
batch_config.exec_func='ef_current_base';
......
......@@ -43,53 +43,37 @@
function batch_config=text2struct_bc(fname)
batch_config.file_name='';
batch_config = init_batch_config();
batch_config.exec_func='';
batch_config.replace_string={''};
batch_config.order=[];
batch_config.session_init='';
batch_config.job_name='';
batch_config.mfile_name='';
batch_config.job_init='';
batch_config.m_init='';
batch_config.qsub_options={''};
batch_config.memory='';
batch_config.time_limit='';
batch_config.mpi='';
batch_config.num_processors='';
batch_config.software='';
batch_config.program_options={''};
keywords=fieldnames(batch_config);
fileID = fopen(fname);
C = textscan(fileID,'%s', 'delimiter', '\n');
C = fread(fileID, '*char')';
t = config_parse(C);
fclose(fileID);
cell_str={C{1}{:}}';
for i=1:length(keywords);
try
key_ind(i)=find(strcmp(keywords{i},cell_str));
catch
key_ind(i)=0;
for i=1:length(keywords)
% Child elements
key_val = t.get_children_contents_of_match(t, ...
@(x) strcmp(x, keywords{i}), 1);
if isempty(key_val)
continue
end
end
key_ind_sort=sort(key_ind);
key_ind_sort(length(key_ind_sort)+1)=length(cell_str)+1;
for i=1:length(keywords);
field_ind=i;
if key_ind(field_ind)+1<=key_ind_sort(find(key_ind_sort==key_ind(field_ind))+1)-1;
key_val=cell_str(key_ind(field_ind)+1:key_ind_sort(find(key_ind_sort==key_ind(field_ind))+1)-1);
if ischar(eval(['batch_config.',keywords{field_ind}]));
batch_config=setfield(batch_config,keywords{i},key_val{:});
end
if iscell(eval(['batch_config.',keywords{field_ind}]));
batch_config=setfield(batch_config,keywords{i},key_val);
end
if isnumeric(eval(['batch_config.',keywords{field_ind}]));
batch_config=setfield(batch_config,keywords{i},str2num(key_val{:}));
% key_val is always a cell array
if iscell(batch_config.(keywords{i}))
batch_config.(keywords{i}) = key_val';
elseif ischar(batch_config.(keywords{i}))
if numel(key_val) ~= 1
error('key_val must be one sized for %s which is a char', ...
keywords{i})
end
batch_config.(keywords{i}) = key_val{1};
elseif isnumeric(batch_config.(keywords{i}))
batch_config.(keywords{i}) = str2num(key_val{:});
end
end
......@@ -22,9 +22,9 @@ function dimensions = ef_get_eegdims(dfpath,dfname)
if isempty(allDims)
allDims = struct();
end
if ~isfield(allDims,dfname)
[~,name,~] = fileparts(dfname); % remove extension so filename can be used as a field (no '.' allowed)
if ~isfield(allDims,name)
EEG_temp = pop_loadset('filename',dfname,'filepath',dfpath,'loadmode','info');
[~,name,~] = fileparts(dfname); % remove extension so filename can be used as a field (no '.' allowed)
allDims.(name) = struct('channels',EEG_temp.nbchan,'samples',EEG_temp.pnts);
end
dimensions = allDims.(name);
......
......@@ -99,8 +99,9 @@ for bfni = 1:length(job_struct(length(job_struct)).batch_dfn)
'histfname', job_struct(end).batch_hfn, ...
'jobid', jobo(job_struct(end).ordernum).id{bfni}, ...
'execstr', job_struct(end).exec_str{bfni}, ...
'execpath', ...
strrep(fullfile(job_struct(end).context_config.log,job_struct(end).m_path),'\','/'));
'execpath', strrep(fullfile(job_struct(end).context_config.log, ...
job_struct(end).m_path),'\','/'), ...
'datapath',job_struct(length(job_struct)).batch_dfp);
end
%% WRITE [SUBSTRINIT,QSUBSTR] TO A *.SUB TEXT FILE IN THE LOG PATH...
......@@ -163,11 +164,58 @@ if ~isempty(g.jobid);
qsubstr_tmp=sprintf('%s --dependency=afterok:%s',qsubstr_tmp, g.jobid);
end
%sbatch_options...
if ~isempty(batch_config.qsub_options);
if ~isempty(batch_config.qsub_options)
for i = 1:length(batch_config.qsub_options)
qsubstr_tmp = [qsubstr_tmp ' ' batch_config.qsub_options{i}];
end
end
dimensions = ef_get_eegdims(g.datapath,g.datafname);
% These variables are eval'd so they will show up as a warning
c = dimensions.channels; %#ok<NASGU> % channels
s = dimensions.samples; %#ok<NASGU> % samples
% memory_allocation...
if ~isempty(batch_config.memory)
byte_size = batch_config.memory(end);
memory_alloc = ['--mem=' num2str(eval(batch_config.memory(1:end-1))) byte_size];
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,memory_alloc);
end
% time_limit...
if ~isempty(batch_config.time_limit)
time_var = lower(batch_config.time_limit(end));
time_span = eval(batch_config.time_limit(1:end-1));
% Make everything into seconds
switch time_var
case 's'
case 'm'
time_span = time_span * 60;
case 'h'
time_span = time_span * 360;
otherwise
error('End of time_limit field needs to be one of {s, m, h}');
end
time_span = fix(time_span);
hours = fix(time_span / 360);
time_span = mod(time_span, 360);
minutes = fix(time_span / 60);
time_span = mod(time_span, 60);
seconds = fix(time_span);
time_str = sprintf('%02d:%02d:%02d', hours, minutes, seconds);
qsubstr_tmp=sprintf('%s --time=%s',qsubstr_tmp,time_str);
end
% num_processors
if ~isempty(batch_config.num_processors)
num_proc = ['--ntasks=' batch_config.num_processors];
qsubstr_tmp=sprintf('%s %s', qsubstr_tmp,num_proc);
end
%program_options...
program_options='';
if ~isempty(batch_config.program_options);
......@@ -177,12 +225,20 @@ if ~isempty(batch_config.program_options);
end
end
%software...
% TODO(brad) make this not hardcoded
wrappername = 'analysis/support/dependencies/eeglab_asr_amica/plugins/batch_context/batch/exec_func/octave_exit_wrapper.m';
if strcmp(batch_config.mpi, 'true')
g.execstr = sprintf('srun %s', g.execstr);
end
switch batch_config.software
case 'none'
% TODO(brad) unsure what this is checking, find out why we have these
% magic numbers
while g.execstr(end)==10 || g.execstr(end)==13;
% Note: these are stripping the end?
while g.execstr(end)==10 || g.execstr(end)==13
g.execstr=g.execstr(1:end-1);
end
% TODO(brad) this is a hack
......
......@@ -156,12 +156,12 @@ qsubstr_tmp=sprintf('%s %s %s',qsubstr_tmp,'-j', ...
qsubstr_tmp=sprintf('%s %s %s',qsubstr_tmp,'-o', ...
[g.execpath,'/',job_nameStr,'.log']);
%jobid wait...
% jobid wait...
if ~isempty(g.jobid);
qsubstr_tmp=sprintf('%s %s %s',qsubstr_tmp,'-w', ...
g.jobid);
end
%qsub_options...
% qsub_options...
if ~isempty(batch_config.qsub_options);
for i=1:length(batch_config.qsub_options);
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,batch_config.qsub_options{i});
......@@ -169,46 +169,41 @@ if ~isempty(batch_config.qsub_options);
end
dimensions = ef_get_eegdims(g.datapath,g.datafname);
c = dimensions.channels; % channels
s = dimensions.samples; % samples
% These variables are eval'd so they will show up as a warning
c = dimensions.channels; %#ok<NASGU> % channels
s = dimensions.samples; %#ok<NASGU> % samples
%memory_allocation...
memory_alloc='';
% memory_allocation...
if ~isempty(batch_config.memory);
byte_size = batch_config.memory(end);
memory_alloc = ['--mpp ' num2str(eval(batch_config.memory(1:end-1))) byte_size];
else
memory_alloc = '--mpp 1G'; % default: 1G
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,memory_alloc);
end
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,memory_alloc);
%time_limit...
time_limit='';
% time_limit...
if ~isempty(batch_config.time_limit);
time_var = batch_config.time_limit(end);
time_limit = ['-r ' num2str(eval(batch_config.time_limit(1:end-1))) time_var];
else
time_limit = '-r 1h'; % default: 1h
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,time_limit);
end
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,time_limit);
%mpi
mpi_tag ='';
% mpi
if strcmp(batch_config.mpi, 'true');
mpi_tag = '--ppn 1 -q mpi';
mpi_tag = '-q mpi';
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,mpi_tag);
end
qsubstr_tmp=sprintf('%s %s',qsubstr_tmp,mpi_tag);
%num_processors
num_proc='';
% num_processors
if ~isempty(batch_config.num_processors);
num_proc = ['-n' batch_config.num_processors];
else
num_proc = '-n 1';
qsubstr_tmp=sprintf('%s %s', qsubstr_tmp,num_proc);
end
qsubstr_tmp=sprintf('%s %s', qsubstr_tmp,num_proc);
%program_options...
% program_options...
program_options='';
if ~isempty(batch_config.program_options);
for i=1:length(batch_config.program_options);
......
......@@ -70,7 +70,8 @@ function [element] = tree_get(tree, elementid)
end
tree_exists(tree, elementid);
if iscell(elementid)
element = {[tree.elements([elementid{:}]){:}].element};
element = cellfun(@(x) x.element, ...
tree.elements(cell2mat(elementid)), 'UniformOutput', false);
return
end
element = tree.elements{elementid}.element;
......@@ -82,13 +83,13 @@ function [tree, node] = tree_add(tree, parent, element)
end
tree_exists(tree, parent);
new_node = new_node();
nnode = new_node();
% end+1
node = numel(tree.elements)+1;
tree.elements{parent}.children{end+1} = node;
new_node.element = element;
tree.elements{node} = new_node;
tree.numelements += 1;
nnode.element = element;
tree.elements{node} = nnode;
tree.numelements = tree.numelements + 1;
end
function [tree] = tree_remove(tree, element)
......@@ -106,6 +107,10 @@ function [elements] = tree_get_children_contents_of_match(tree, ...
comparator, parent)
potchildren = tree.get_children(tree, parent);
potchildren = potchildren(arrayfun(comparator, tree.get(tree, potchildren)));
if numel(potchildren) == 0
elements = {};
return;
end
if numel(potchildren) ~= 1
error('potential children not unique');
end
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment