#################################################################################### # # # SELC: An R Function for the Sequential Elimination of Level Combinations # # Algorithm # # # # Inputs: # # initialdesign : matrix of information that contains the initial # # design and the corresponding responses. # # Levels of each factor must be positive integers. # # forbiddenarray : matrix of information that contains the forbidden # # array design points. # # Levels of each factor must be positive integers. # # If no forbiddenarray exists, then specify # # forbiddenarray=cbind(0,0,...,0) # # strength : Strength of the forbidden array # # order : Order of the forbidden array # # level_true : Number of levels of each factor. Must have the form: # # level_true = c(1,2,3,...) # # direction : direction of desired response # # 1 = maximum is desired # # 0 = minimum is desired # # number_of_offspring : Number of desired offspring to generate # # seed : Seed value for random operations. Default value # # is clock time # # # # Output: # # An object is output from the function that contains 2 items: # # newruns : A matrix of design points that contains the new # # offspring # # currentforbiddenarray : A matrix that contains the design points of the # # forbidden array plus the design points specified # # by strength # # # # Example code to call SELC function: # # > initialdesign = as.matrix(read.table("initialdesign.txt")) # # > forbiddenarray = as.matrix(read.table("forbiddenarray.txt")) # # > source("SELC.R") # # > args(SELC) # # > next_gen = SELC(initialdesign,forbiddenarray,2,2,c(5,34,241),1,5) # # # # References: # # Mandal, A., Wu, C.F.J., and Johnson, K. (2006). SELC: Sequential elimination # # of level combinations by means of modified genetic algorithms. # # Technometrics, 48(2), 273-283. # # # # Mandal, A., Johnson, K., Wu, C.F.J., and Bornemeier, D. (2007). Identifying # # promising compounds in drug discovery: Genetic algorithms and some new # # statistical techniques. Journal of Chemical Information and Modeling, 47(3), # # 981-988. (DOI: 10.1021/ci600556v) Highlighted in Drug Discovery News # # Online: http://www.drugdiscoverynews.com/index.php?newsarticle=1318 # # # # Written by Kjell Johnson, Abhyuday Mandal and Tan Ding # # March 2008 # # Version 1.0 # #################################################################################### #################################################################################### # Functions needed for SELC # #################################################################################### # Given model matrix D, modelmatrix generates the design matrix X # Design matrix contains main effects, quadratic effects and 2fi's (linear by linear only). # Notes : The model matrix does not contain an intercept # Linear/quadratic system is used modelmatrix=function(D) { k = ncol(D) l = nrow(D) X = c() # First the main effects - standardized to have norm 1 p=2 for (i in 1:k) { X = cbind(X,D[,i]/(sum(abs(D[,i])^p)^(1/p))) } # Then the quadratic effects X = cbind(X,X^2) # Then the interaction effects c = 1 #Counter f = factorial(k)/(2*factorial(k-2)) Y = matrix(0,l,f) for (i in 1:(k-1)) { for (j in (i+1):k) { Y[,c]=X[,i]*X[,j] c=c+1 } } X=cbind(X,Y) return(X) } # Function to evaluate the statistical significance of each factor in the model evalsig=function(x,y) { x = modelmatrix(x) if (qr(x)$rank < ncol(x)) { print("More runs required in initial design") sigfactor=matrix(0,1,(ncol(x)-1)) }else { g = lm(y ~ x) sg = summary(g) sgcoef = sg$coef sgcoef = as.matrix(sgcoef) pvalues = sgcoef[,4] n = length(pvalues) sigfactor=matrix(0,1,n) for (i in 1:n) { if (pvalues[i]<0.05) sigfactor[i] = 1 } } return(sigfactor) } # Function to choose the elements of x with weighted probability choose_with_prob=function(x,number) { k = ncol(x) l = nrow(x) x = matrix(x,k*l,1) x = x*(x > 0) prob = x/sum(x) cumulative_prob = cumsum(prob) candidates = c() while (NROW(candidates) < number) { u = runif(1) temp = (u >= cumulative_prob)* matrix(1:nrow(prob)) candidates = rbind(candidates, max(temp)+1) candidates = unique(candidates) } return(candidates) } # Function to perform crossover crossover=function(p1,p2) { crossover_location = floor(runif(1)*length(p1)+1) p1new = c( p1[1:crossover_location-1], p2[crossover_location:NROW(p1)]) p2new = c( p2[1:crossover_location-1], p1[crossover_location:NROW(p1)]) newoff = rbind(p1new, p2new) return(newoff) } # Function to evaluate the significance of the interaction terms # if interactiton_number defines the number of significant interactions # The nonzero entries of the (k-1)th column and (k)th row of the matrix # facmat corresponds to the 2fi's of factor k is_there_imp_2fi=function(k,sigfactor,num_main_effects) { n=length(sigfactor) sigfactor = matrix(sigfactor,n,1) facmat = matrix(0,num_main_effects-1,num_main_effects-1) count = 1 for ( i in 1:(num_main_effects-1)) { for ( j in i:(num_main_effects-1)) { facmat[i,j] = count count = count + 1 } } if (k == 1) { values = facmat[1,] } else if (k==num_main_effects) { values = facmat[,k-1] } else { values = c(facmat[,k-1], facmat[k,]) } values = values[values>0] + 2*num_main_effects r=length(values) values = matrix(values,r,1) interaction_number = max(values*sigfactor[values]) return(interaction_number) } # Function to identify the parent factors identify=function(i,sigfactor,num_main_effects) { # i = effect number # z = main effect/quadratic effect/parents value = i # this is the indicator of presence of significant 2fi. if ( i <= num_main_effects) { index = i value = is_there_imp_2fi(index,sigfactor,num_main_effects) } else if ( i <= 2*num_main_effects) { index = i-num_main_effects value = is_there_imp_2fi(i,sigfactor,num_main_effects) } if (value == 0) { z=index } else { i = value int_num=i-2*num_main_effects count=0 count1=int_num num_ints=0 while (count1>0) { count=count+1 count1=count1-(num_main_effects-count) } count1=0 for (j in 1:(count-1) ) { count1=count1+(num_main_effects-j) } count1=int_num-count1 count1=count+count1 z = cbind(count, count1) } return(z) } # Function to calculate the mutation probability ith factor prob_main_effect=function(x,y,i,level_true) { num_level_true = level_true[i] level_observed = sort(unique(x[,i])) num_level_observed =length(level_observed) ymean = c(1:num_level_true*0) for (j in 1:num_level_observed) { ymean[level_observed[j]] = mean(y[x[,i]==level_observed[j]]) } ymean = ymean * (ymean > 0) ymean = ymean/sum(ymean) uniform_prob = 1/num_level_true * c(1:num_level_true*0+1) alpha = (num_level_observed / num_level_true) prob = alpha*ymean + (1-alpha)*uniform_prob return(prob) } # Function to calculate mutation for factor i with weight scheme given by the vector prob one_factor_mutation=function(p1,x,y,i,level_true) { prob1 = prob_main_effect(x,y,i,level_true) cumulative_prob1 = cumsum(prob1) u = runif(1) temp = (u >= cumulative_prob1) * (1:length(prob1)) p1[i] = max(temp) + 1 return(p1) } # Function to calculate the mutation probability for 2fis prob_2fi=function(x,y,i,j,level_true) { yprob = matrix(0,level_true[i],level_true[j]) for (ik in 1:level_true[i]) { for (jk in 1:level_true[j]) { if ((length(y[x[,i]==ik & x[,j]==jk])) > 0) { yprob[ik,jk] = mean(y[x[,i]==ik & x[,j]==jk]) } } } yprob = yprob*(yprob>0) yprob = yprob/sum(sum(yprob)) uniform_prob = 1/(level_true[i]*level_true[j]) * matrix(1,level_true[i],level_true[j]) alpha = sum(sum(yprob > 0))/(level_true[i]*level_true[j]) prob = alpha*yprob + (1-alpha)*uniform_prob return(prob) } # Function to calcuate the mutation probability for 2fis with weight scheme given by the vector prob. two_factor_mutation=function(p1,x,y,i,j,level_true) { prob = prob_2fi(x,y,i,j,level_true) cumulative_prob = cumsum(prob) u = runif(1) temp = (u < cumulative_prob) * (1:length(prob)) index = min(temp[temp>0]) number_of_rows = level_true[i] number_of_col = level_true[j] row = index - (ceiling(index/number_of_rows)-1)*number_of_rows; col = ceiling(index/number_of_rows); p1[i] = row p1[j] = col return(p1) } # Function to perform mutation mutation=function(x,y,sigfactor,p1,num_main_effects,level_true) { mutation_location = floor(runif(1)*length(p1)+1) if (sigfactor[mutation_location] == 1) { i = identify(mutation_location,sigfactor,num_main_effects) if (length(i)==1) { p1 = one_factor_mutation(p1,x,y,i,level_true) } else { p1 = two_factor_mutation(p1,x,y,i[1],i[2],level_true) } } else { p1[mutation_location] = floor(runif(1)*level_true[mutation_location]+1) } return (p1) } # Function to check element of one matrix with another check = function(oldmatrix,newrow,order) { oldmatrix=as.matrix(oldmatrix) newrow=as.matrix(newrow) n = NROW(oldmatrix) check = 1 for (i in 1:n) { if ( sum(newrow == oldmatrix[i,]) >= order ) check = 0 break } return(check) } # Function to check the input arguments for feasibility feasibility = function(initialdesign,forbiddenarray,strength,order,level_true,direction) { proceed = 1; a = nrow(initialdesign) b = ncol(initialdesign) c = nrow(forbiddenarray) d = ncol(forbiddenarray) f = ncol(t(as.matrix(level_true))) if (b != (d+1)) { print("Number of factors in the forbidden array do not match the number of factors in the initial design") proceed = 0 }else if (f != d) { print("Number of factors in level_true must be the same as the number of levels in the forbidden array") proceed = 0 }else { m1=apply(initialdesign[,1:b-1],2,max) m2=apply(forbiddenarray,2,max) m3=apply(rbind(m1,m2),2,max) if (sum(level_true >= m3) != d) { print("There exits levels in initialdesign or forbiddenarray that are greater than the levels specified in level_true") proceed = 0 } } if (strength > (a-2)) { print("Strength must be less than or equal to:") print(a-2) proceed = 0 } if (order < 1) { print("Order must be at least 1") proceed = 0 } if (order > (b-1)) { print("Order cannot be greater than:") print(b-1) proceed = 0 } if (direction!=0 & direction!=1) { print("Direction must be 0 or 1") proceed = 0 } return(proceed) } #################################################################################### # # # Main function to implement SELC # # # #################################################################################### SELC=function(initialdesign, forbiddenarray, strength, order, level_true, direction, number_of_offspring, seed_num=-1) { proceed = feasibility(initialdesign,forbiddenarray,strength,order,level_true,direction) if (proceed == 1) { if (seed_num>0) { set.seed(seed_num) } m = ncol(initialdesign) num_main_effects = m-1 x = as.matrix(initialdesign[,-m]) y = as.matrix(initialdesign[,m]) if (direction==1) { y = (y - min(y))/(max(y) - min(y)); }else if (direction==0) { y = (max(y) - y)/(max(y) - min(y)); }else { print("Check the direction") return } yc = y-mean(y) if (length(level_true)!=num_main_effects) { print("Check the number of factors") newrums=matrix() new=matrix() return } for (i in 1:num_main_effects) { if (length(unique(x[,i]))>level_true[i]) { print("Check the number of factors") newruns=matrix() newforbiddenarray=matrix() return } } #identify significant factors sigfactor = evalsig(x,yc) newruns = c() # Add new runs to the forbidden array based on the initial design yy = unique(sort(y)) index = matrix(0*1:length(y)) ijk = 1 while ( sum(index) < strength) { index = index + (y==yy[ijk]) ijk = ijk+1 } index = index*matrix(1*1:length(y)) index = index[index>0] newforbiddenarray = x[index,] currentforbiddenarray = rbind(forbiddenarray,newforbiddenarray) #Reproduction while (length(newruns) < (m-1)*number_of_offspring) { population_size = 2 candidates = choose_with_prob(y,population_size) p1 = x[candidates[1],] p2 = x[candidates[2],] # Crossover newoff = crossover(p1,p2) p1new = newoff[1,] p2new = newoff[2,] # Mutation p1new = mutation(x,y,sigfactor,p1new,num_main_effects,level_true) p2new = mutation(x,y,sigfactor,p2new,num_main_effects,level_true) # Check on eligibility indicator11 = check(newforbiddenarray,p1new,order) indicator12 = check(forbiddenarray,p1new,num_main_effects) indicator21 = check(newforbiddenarray,p2new,order) indicator22 = check(forbiddenarray,p2new,num_main_effects) if ((indicator11+indicator12)==2) {indicator1=1} else {indicator1=0} if ((indicator21+indicator22)==2) {indicator2=1} else {indicator2=0} if (indicator1 == 1) { indicator3 = check(x,p1new,num_main_effects) if (indicator3 == 1) { newruns = rbind(newruns, p1new) } } if (indicator2 == 1) { indicator4 = check(x,p2new,num_main_effects) if (indicator4 == 1) { newruns = rbind(newruns, p2new) } } } newruns = newruns[1:number_of_offspring,] return(list(newruns=newruns,currentforbiddenarray=currentforbiddenarray)) }else { print("WARNING: FUNCTION STOPPED BECAUSE OF ERRORS") } }