/*    Local Influence Diagnostics for the orange tree dataset                                           */
/*    Copyright (C) 2025 Jhessica Leticia Kirch                                                         */
/*                                                                                                      */
/*    This program is free software: you can redistribute it and/or modify                              */ 
/*    it under the terms of the GNU General Public License as published by                              */ 
/*    the Free Software Foundation, either version 3 of the License, or                                 */ 
/*    (at your option) any later version.                                                               */ 
/*                                                                                                      */
/*    This program is distributed in the hope that it will be useful,                                   */
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of                                    */
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                                     */
/*    GNU General Public License for more details.                                                      */
/*                                                                                                      */
/*    You should have received a copy of the GNU General Public License                                 */
/*    along with this program.  If not, see <https://www.gnu.org/licenses/>.                            */
/*                                                                                                      */                                            


/* Dataset available at Draper, N. R. and Smith, H. (1998), Applied Regression Analysis (3rd ed), Wiley */

data OrangeTree;
	input Tree Days y;
	cards;
1  118   30
1  484   58
1  664   87
1 1004  115
1 1231  120
1 1372  142
1 1582  145
2  118   33
2  484   69
2  664  111
2 1004  156
2 1231  172
2 1372  203
2 1582  203
3  118   30
3  484   51
3  664   75
3 1004  108
3 1231  115
3 1372  139
3 1582  140
4  118   32
4  484   62
4  664  112
4 1004  167
4 1231  179
4 1372  209
4 1582  214
5  118   30
5  484   49
5  664   81
5 1004  125
5 1231  142
5 1372  174
5 1582  177
;
run;

proc format;
	value Subject
	1 = "Tree 1"
	2 = "Tree 2"
	3 = "Tree 3"
	4 = "Tree 4"
	5 = "Tree 5";

data OrangeTree;
	set OrangeTree;
	Subject = put(Tree, Subject.);
run;

proc nlmixed data = OrangeTree HESS SUBGRADIENT = gradi;
	parms beta1 = 190 beta2 = 700 beta3 = 350 d = 1000 sigma2 = 60;
	num = b + beta1;
	den = 1 + exp(-(Days - beta2) / beta3);
	predmean = (num/den);
	model y ~ normal(num/den, sigma2);
	random b ~ normal(0, d) subject = Subject;
	ods output Hessian = h;
	predict predmean out = pred;
run;

* Figure 1;
ods html style = journal;
proc sgpanel data = pred noautolegend;
	panelby Subject / onepanel novarname columns=3 sort=data;
	rowaxis  label = "Tree circumference";
	colaxis label = "Days";
	scatter x = Days y = y/ markerattrs = (size = 10 color = black symbol = circlefilled); 
	series x = Days y = pred/ lineattrs = (color = black thickness = 2);
run;

* Local influence measures;
proc iml;
	f = 3; *number of fixed parameters;
	r = 2; *number of random parameters;
	p = f + r; *number total of parameters;
	use h;
	read all var _NUM_ into L;
	L = -L[1:nrow(L), 2:ncol(L)];
	close h;
	use gradi;
	read all var _NUM_ into Delta;
	Delta = -Delta`;
	use gradi;
	read all var _CHAR_ into S[colname = Names];
	close gradi;
	Ci = vecdiag(2 * abs(Delta` * inv(L) * Delta));
	Cut_off_Ci = repeat(2 * (sum(Ci) / nrow(Ci)), nrow(Ci), 1);
	Lfixed = repeat(0, p, p);
	Lfixed[(f + 1):p, (f + 1):p] = inv(L[(f + 1):p, (f + 1):p]);
	Ci_fixed = vecdiag(2 * abs(Delta` * (inv(L) - Lfixed) * Delta));
	Cut_off_Ci_fixed = repeat(2 * (sum(Ci_fixed) / nrow(Ci_fixed)), nrow(Ci_fixed), 1);
	Lrandom = repeat(0, p, p);
	Lrandom[1:f, 1:f] = inv(L[1:f, 1:f]);
	Ci_random = vecdiag(2 * abs(Delta` * (inv(L) - Lrandom) * Delta));
	Cut_off_Ci_random = repeat(2 * (sum(Ci_random) / nrow(Ci_random)), nrow(Ci_random), 1);
	create Local var {"S" "Ci" "Cut_off_Ci" "Ci_fixed" "Cut_off_Ci_fixed" "Ci_random" "Cut_off_Ci_random"}; 
	append;      
	close Local; 
run;

proc print data = Local;
run;

*Figure 2a;
ods escapechar="^";
proc sgplot data = Local  noautolegend ;
	scatter x = S y = Ci / markerattrs = (size = 10 color = black symbol = CircleFilled); 
	refline 2.42815 / axis = y lineattrs = (thickness = 3 color = black pattern = dash);
	yaxis values = (0 to 5) label = "C^{unicode '1D62'x}" values = (0 to 5) labelattrs = (size = 14) valueattrs = (size = 14);
	xaxis label = "Trees" labelattrs = (size = 14) valueattrs = (size = 14);
run;
  
*Figura 2b;
proc sgplot data = Local noautolegend;
	scatter x = Ci_fixed y = Ci_random / markerattrs = (size = 10 color = black symbol = CircleFilled);
	refline 2.12955 / axis = x lineattrs = (thickness = 3 color = black pattern = dash);
	refline 0.26454 / axis = y lineattrs = (thickness = 3 color = black pattern = dash);
	xaxis values = (0 to 2.5 by .5) label = "C^{unicode '1D62'x} on fixed effects parameters" labelattrs = (size = 14) valueattrs = (size = 14);
	yaxis values = (0 to 0.3 by .1) label = "C^{unicode '1D62'x} on variance components" labelattrs = (size = 14) valueattrs = (size = 14);
run;

