function [newruns,currentforbiddenarray] = SELC(initialdesign,forbiddenarray,strength,order,level_true,direction,number_of_offspring,seed)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% SELC: A Matlab Function for the Sequential Elimination of Level Combinations %
% Algorithm %
% %
% Inputs: %
% initialdesign : matrix of information that contains the initial %
% design and the corresponding responses. %
% Levels of each factor must be positive %
% integers. %
% forbiddenarray : matrix of information that contains the forbidden %
% array design points. %
% Levels of each factor must be positive integers. %
% If no forbiddenarray exists, then specify %
% forbiddenarray = [] %
% strength : Strength of the forbidden array %
% order : Order of the forbidden array %
% level_true : Number of levels of each factor. Must have the form: %
% level_true = [1,2,3,...] %
% direction : direction of desired response %
% 1 = maximum is desired %
% 0 = minimum is desired %
% number_of_offspring : Number of desired offspring to generate %
% seed : Seed value for random operations. Default value %
% is clock time %
% %
% Output: %
% newruns : A matrix of design points that contains the new %
% offspring %
% currentforbiddenarray : A matrix that contains the design points of the %
% forbidden array plus the design points specified %
% by strength %
% %
% load -ascii initialdesign.txt %
% load -ascii forbiddenarray.txt %
% strength = 2 %
% order = 2 %
% level_true = [5,34,241] %
% direction = 1 %
% number_of_offspring = 10 %
% seed=0 %
% [newruns,currentforbiddenarray] = SELC(initialdesign,forbiddenarray, ... %
% strength,order,level_true, ... %
% direction,number_of_offspring,seed) %
% References: %
% Mandal, A., Wu, C.F.J., and Johnson, K. (2006). SELC: Sequential elimination %
% of level combinations by means of modified genetic algorithms. %
% Technometrics, 48(2), 273-283. %
% %
% Mandal, A., Johnson, K., Wu, C.F.J., and Bornemeier, D. (2007). Identifying %
% promising compounds in drug discovery: Genetic algorithms and some new %
% statistical techniques. Journal of Chemical Information and Modeling, 47(3), %
% 981-988. (DOI: 10.1021/ci600556v) Highlighted in Drug Discovery News %
% Online: http://www.drugdiscoverynews.com/index.php?newsarticle=1318 %
% %
% Written by Kjell Johnson, Abhyuday Mandal, and Tan Ding %
% March 2008 %
% Version 1.0 %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[proceed] = feasibility(initialdesign,forbiddenarray,strength,order,level_true,direction);
if proceed == 1
rand('seed',seed)
m = size(initialdesign,2);
num_main_effects = m-1;
y = initialdesign(:,m);
x = initialdesign(:,1:num_main_effects);
%Center the response
yc = y-mean(y);
%Identify the significant factors
sigfactor = evalsig(yc,x);
% Create the forbidden array based on the Initial design
yy = unique(sort(y));
index = 0.*[1:size(y,1)]';
ijk = 1;
while sum(index)0);
currentforbidden = x(index,:);
currentforbiddenarray = [forbiddenarray;currentforbidden];
% Scale y and enforce direction of optimal response
if direction == 1
y = (y - min(y)) / (max(y) - min(y));
else
y = (max(y) - y) / (max(y) - min(y));
end
newruns = [];
while size(newruns,1) < number_of_offspring
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Reproduction %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Choose 2 "good candidates" to reproduce.
population_size = 2;
candidates = choose_with_prob(y,population_size);
p1 = x(candidates(1),:);
p2 = x(candidates(2),:);
% Now we let these two parents, namely p1 and p2, reproduce
% and create new generation p1new and p2new
% First do the crossover
[p1new p2new] = crossover(p1,p2);
% Now do mutation
p1new = mutation(x,y,sigfactor,p1new,num_main_effects,level_true);
p2new = mutation(x,y,sigfactor,p2new,num_main_effects,level_true);
% Check whether these two level combanations are allowed or not
% if indicator1 is equal to 1, then p1new is allowed
% if indicator2 is equal to 1, then p2new is allowed
indicator1 = check(currentforbiddenarray,p1new,order);
indicator2 = check(currentforbiddenarray,p2new,order);
% Check whether these two level combanations are new or not
% if indicator3 is equal to 1, then p1new is new
% if indicator4 is equal to 1, then p1new is new
if indicator1 == 1
indicator3 = check(x,p1new,num_main_effects);
if (indicator3 == 1)
% If the run is not forbidden and is a new one, save it
newruns = [newruns; p1new];
end
end
if indicator2 == 1
indicator4 = check(x,p2new,num_main_effects);
if (indicator4 == 1)
% If the run is not forbidden and is a new one, save it
newruns = [newruns; p2new];
end
end
end
newruns = newruns(1:number_of_offspring,:);
else
newruns = [];
currentforbiddenarray = forbiddenarray;
disp('WARNING: FUNCTION HAS STOPPED BECAUSE OF ERRORS')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [check] = check(oldmatrix,newrow,order)
% This function compares the elements of the rows of a matrix (oldmatrix)
% with that of a new row (newrow). If the 'newrow' and any row of
% 'oldmatrix' have at least 'order'-many elements common (at the same
% locations), then this function will return 0, 1 otherwise.
%
% example : oldmatrix = [ 1 1 0 1 ;
% 2 0 1 1];
% newrow = [ 1 2 0 2];
% order = 2;
%
% then check = 0
n = size(oldmatrix,1);
check = 1;
for i = 1:n
if ( sum(newrow == oldmatrix(i,:)) >= order )
check = 0;
break;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [candidates] = choose_with_prob(x,number)
% This function chooses 'number'-many _distinct_ elements of x with probability
% proportional to the value of x
% candidates = indices
x = x(:);
x = x.*(x > 0);
prob = x/sum(x);
cumulative_prob = cumsum(prob);
candidates = [];
% change the state
% rand('state',sum(10^10*clock));
while (size(candidates) < number)
u = rand(1);
temp = (u >= cumulative_prob) .* (1:length(prob))' ;
candidates = [candidates max(temp)+1];
candidates = unique(candidates);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [p1new, p2new] = crossover(p1,p2)
% Function to perform crossover
% change the state
%rand('state',sum(10^10*clock));
% first choose the crossover location
crossover_location = ceil(rand(1)*length(p1));
% now do the crossover
p1new = [ p1(1:crossover_location-1) p2(crossover_location:length(p1))];
p2new = [ p2(1:crossover_location-1) p1(crossover_location:length(p1))];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [sigfactor] = evalsig(y,x);
X = modelmatrix(x);
X = [ones(size(X,1),1) X];
if(rank(X) < size(X,2))
disp('More runs required in initial design');
sigfactor = zeros(1,size(X,2)-1);
return;
end
[B,BINT,R,RINT,STATS] = regress(y,X,0.05);
sigfactor = [];
for i = 2:size(BINT,1)
sigfactor(i-1) = (BINT(i,1) > 0) | (BINT(i,2) < 0);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function z = identify(i,sigfactor,num_main_effects)
% Given a mutation location, this function identifies the parent factors.
% For example , if there are 4 factors, A, B, C, D and if A and AD are important, then
% i = 1 will return A and D (D will come as AD interaction is significant)
% i = 2 will return B
% i = 3 will return C
% i = 4 will return A and D (A will come as AD interaction is significant)
%
% i = effect number
% z = main effect/quadratic effect/parents
value = i; % this is the indicator of presence of significant 2fi.
if ( i <= num_main_effects)
% then it is a main effect
index = i;
%check whether it has any imp two factor interaction
value = is_there_imp_2fi(index,sigfactor,num_main_effects);
elseif ( i <= 2*num_main_effects)
% then it is a quadratic effect
% index is the corresponding main effect
index = i-num_main_effects;
value = is_there_imp_2fi(i,sigfactor,num_main_effects);
end
if (value == 0)
z = index;
else
% it is a linear by linear interaction effect
% must first decide which effects are parents
% the interaction is the int_num^th interaction from Yates' order
%
% count is now the number of the first parent
% count1 to be computed is the number of the second parent
%For this part, all credit goes to Derek Bingham
i = value;
int_num=i-2*num_main_effects;
count=0;
count1=int_num;
num_ints=0;
while count1>0
count=count+1;
count1=count1-(num_main_effects-count);
end
count1=0;
for j=1:(count-1)
count1=count1+(num_main_effects-j);
end
count1=int_num-count1;
count1=count+count1;
z = [count count1];
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [interaction_number] = is_there_imp_2fi(k,sigfactor,num_main_effects)
% if interactiton_number = 0 then no interaction is imp. Otherwise this is
% the number corresponding to _one_of_the_ important interactions
% The nonzero entries of the (k-1)th column and (k)th row of the matrix
% facmat corresponds to the 2fi's of factor k
% example : Let there be 4 factors A, B, C and D and AC interaction is
% significant. Then for k = 1 and 3 (i.e. for A and C), it will return 10
% (i.e. AC) and for k = 2 and 4, it will return 0 (i.e. No important
% interactions)
%
sigfactor = sigfactor(:);
facmat = zeros(num_main_effects-1,num_main_effects-1);
count = 1;
for ( i = 1:num_main_effects-1)
for (j = i:num_main_effects-1)
facmat(i,j) = count;
count = count + 1;
end
end
if (k == 1)
values = facmat(1,:);
elseif (k==num_main_effects)
values = facmat(:,k-1);
else
values = [facmat(:,k-1)' facmat(k,:)]';
end
values = values(values>0) + 2*num_main_effects;
values = values(:);
interaction_number = max(values.*sigfactor(values));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [X] = modelmatrix(D)
% given model matrix D, this function generates the design matrix X
% NOTE : We do _NOT_ have the intercept term here.
% This function creates the complete model matrix (main effects, quadratic effects and 2fi's (linear by linear only)).
% We use the linear-quadratic coding system
k = length(D(1,:));
X = [];
% First the main effects - standardized to have norm 1
for i = 1:k
X = [X D(:,i)/norm(D(:,i),2)];
end
% Then the quadratic effects
X = [X X.^2];
% Then the interaction effects
l = 2*k + 1;
for i = 1:k
for j = (i+1):k
X(:,l)=X(:,i).*X(:,j);
l = l + 1;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [p1] = mutation(x,y,sigfactor,p1,num_main_effects,level_true)
% Function to perform mutation
% change the state
%rand('state',sum(10^10*clock));
% first choose the mutation location
mutation_location = ceil(rand(1)*length(p1));
% Check whether this location is important or not
if sigfactor(mutation_location) == 1
i = identify(mutation_location,sigfactor,num_main_effects);
% If this is only for main effect or quadratic effect, then the mutation
% will be on that effect only. Otherwise, it will be an interaction
% effect, and the mutation will be joint on both of the parent factors.
if length(i)==1
% this is only for main effect
p1 = one_factor_mutation(p1,x,y,i,level_true);
else
% this is for quadratic effect
p1 = two_factor_mutation(p1,x,y,i(1),i(2),level_true);
end
else
% if the factor is not imp, then just do the mutation
p1(mutation_location) = ceil(rand(1)*level_true(mutation_location));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [p1] = one_factor_mutation(p1,x,y,i,level_true)
% This function calculates mutation for factor i with weight scheme given
% by the vector prob.
% first calculate the probability
prob1 = prob_main_effect(x,y,i,level_true);
% then do the actual mutation
cumulative_prob1 = cumsum(prob1);
u = rand(1);
temp = (u >= cumulative_prob1) .* (1:length(prob1))' ;
p1(i) = max(temp)+1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [prob] = prob_2fi(x,y,i,j,level_true)
% Here we calculate the mutation probability of the ith and jth factor for different levels
yprob = zeros(level_true(i),level_true(j));
% Now we calculate the mean of the y-values corresponding to each observed levels.
for ik = 1:level_true(i)
for jk = 1:level_true(j)
if size(y((x(:,i)==ik) & (x(:,j)==jk)),1) > 0
yprob(ik,jk) = mean(y((x(:,i)==ik) & (x(:,j)==jk)));
end
end
end
yprob = yprob.*(yprob>0);
yprob = yprob/sum(sum(yprob));
% uniform prob vector
uniform_prob = 1/(level_true(i)*level_true(j)) * ones(level_true(i),level_true(j));
alpha = sum(sum(yprob > 0))/(level_true(i)*level_true(j));
prob = alpha.*yprob + (1-alpha).*uniform_prob ;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [prob] = prob_main_effect(x,y,i,level_true)
% Here we calculate the mutation probability of the ith factor for
% different levels
% First we identify the possible and observed levels. Note that in the initial array we
% do not have all levels of each factors.
num_level_true = level_true(i);
level_observed = unique(x(:,i));
num_level_observed = size(level_observed,1);
% Now we calculate the mean of the y-values corresponding to each observed
% levels. First we create a column of zeros for corresponding to the possible
% levels of the factor.
ymean = zeros(num_level_true,1);
for j = 1:num_level_observed
ymean(level_observed(j)) = mean(y(x(:,i)==level_observed(j)));
end
% Now we consider only positive values of this mean vector
ymean = ymean .* (ymean > 0);
ymean = ymean/sum(ymean);
% uniform prob vector
uniform_prob = 1/num_level_true * ones(num_level_true,1);
% Here we calculate the final probability
alpha = (num_level_observed / num_level_true) ;
prob = alpha*ymean + (1-alpha)*uniform_prob;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function p1 = two_factor_mutation(p1,x,y,i,j,level_true)
% This function calculates mutation for factors i and j with weight scheme given
% by the vector prob. First calculate the probability:
prob = prob_2fi(x,y,i,j,level_true);
cumulative_prob = cumsum(prob(:));
u = rand(1);
temp = (u < cumulative_prob) .* ([1:length(prob(:))]') ;
index = min(temp(temp>0));
number_of_rows = level_true(i);
number_of_col = level_true(j);
row = index - (ceil(index/number_of_rows)-1)*number_of_rows;
col = ceil(index/number_of_rows);
p1(i) = row;
p1(j) = col;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [proceed] = feasibility(initialdesign,forbiddenarray,strength,order,level_true,direction)
%
% This function checks the input arguments for the feasibility
%
[a,b] = size(initialdesign);
[c,d] = size(forbiddenarray);
[e,f] = size(level_true);
proceed = 1;
if (b ~= (d+1))
disp('Number of factors in the forbidden array do no match the number of factors in the initial design');
proceed = 0;
elseif (f ~= d)
disp('Number of factors in level_true must be the same as the number of levels in the forbidden array')
proceed = 0;
else
m1 = max(initialdesign(:,1:b-1));
m2 = max(forbiddenarray);
if (sum(level_true >= max(m1,m2)) ~= d)
disp('There exist levels in initialdesign or forbiddenarray which are greater than the levels specified in level_true');
proceed = 0;
end
end
if (strength > (a-2))
disp('Strength must be less than or equal to');
disp(num2str(a-2))
proceed = 0;
end
if (order < 1)
disp('Order must be at least 1');
proceed = 0;
end
if (order > b-1)
disp('Order cannot be greater than');
disp(num2str(b-1));
proceed = 0;
end
if ((direction ~= 1) & (direction~=0))
disp('Direction must be 0 or 1');
proceed = 0;
end