%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                   Main body of the GPU program 
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%---------------- Take the dimension of the matrix -----------------------%
[cT, iM, iL, iKL] = size(mX);       %%------ Take the dimensions  -------%%
iSelectSeries = 1:1:iL;             %%------ Select the iL series -------%%
vY = vY(end - cT + 1:end, iSelectSeries);   
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                       Parameter setting                                 %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Setting.cT = cT;                                    %%% Time dimension
Setting.cN = cN;                                    %%% Number of particles
Setting.iDimOmega = iKL;                            %%% State space dimension
Setting.dLambda = dLambda;                          %%% Lambda parameter
Setting.iL = iL;                                    %%% L parameter
Setting.iM = iM;                                    %%% M parameter
Setting.iDraws = iDraws;                            %%% Draws from the predictive
Setting.iEstimate = dVar;                           %%% Estimate or not the Lambda and Sigma
Setting.dKappa = dKappa;                            %%% Resampling parameter 
if iLearning == 0                                   %%% Learning or not
    Setting.iTau = -1;
else
    Setting.iTau = iTau;
end
%%-------------------- Setting the Sigma and Lambda matrix --------------%%
mSigmaTemp = [];
vSigmaTemp = [];
for i = 1:Setting.iL
    vSigmaTemp = [vSigmaTemp, log(mDataRand(i, 1))];
    mSigmaTemp = [mSigmaTemp; log(mDataRand(i, 2)) * ones(Setting.iDimOmega, 1);];  
end
Setting.Sigma = mSigmaTemp;
Setting.vSigma = vSigmaTemp;
%%% ------------ Create the matrix that will save the results ----------%%% 
mWeightsCum = zeros(Setting.cT, Setting.iDimOmega, Setting.iDraws, Setting.iL);
mYPred = zeros(Setting.cT, 3, Setting.iL, Setting.iM);
TrueweightsCumRange = zeros(Setting.cT, Setting.iDimOmega, 4, Setting.iL);
vVector = 1:Setting.iDraws:Setting.iM + 1;
%%%%%%%-------- Run the particle filter for all the simulations ----- %%%%%
iCounter = 0;
try           %%%--------- Catch possible error on GPU memory ---------%%%
tic;
for i=1:size(vVector, 2) - 1
    disp(char('We are at the iteration: ', num2str(i))); 
    mSigma = []; 
    mXInit = zeros(Setting.cT * Setting.iDraws, Setting.iDimOmega * iDraws, Setting.iL);
    mY = zeros(Setting.cT * Setting.iDraws, Setting.iDraws, Setting.iL);
    iTemp = 0; iSeries = 0; iIter = 1;
    for g = (vVector(i):vVector(i + 1) - 1)
        %%% ---------------- Construct the mX and mError -------------- %%%
        for j = 1:iL
             mXInit(iTemp + 1:iTemp + Setting.cT, iSeries + 1:iSeries + Setting.iDimOmega, j) = reshape(mX(:, g, j, :), Setting.cT, Setting.iDimOmega); 
             mY(iTemp + 1:iTemp + Setting.cT, iIter, j) = vY(:, j);
        end
        iTemp = iTemp + Setting.cT;
        iSeries = iSeries + Setting.iDimOmega; 
        mSigma = blkdiag(mSigma, Setting.Sigma); %% This is the Sigma for the PF
        iIter = iIter + 1;
    end
    %%%%%%%%%%%%%%%%%%%%% Load on the GPU %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    mXGPU = gpuArray(mXInit);
    mYGPU = gpuArray(mY);
    %%%%%%%%%%% Initializzation of the elements %%%%%%%%%%%%%%%%%%%%%%%%%%%
    [S0_GPU] = initPSGPU(Setting, mYGPU, mXGPU);
    %%%%%%%%%%%%%%%% Clean the memory before Particle Filter %%%%%%%%%%%%%%
    clear mYGPU mXGPU;
    %%%%%%%%%%%%%%%%%%%%%%% Initialize the states %%%%%%%%%%%%%%%%%%%%%%%%%
    [SSta0_GPU] = initStaGPU(Setting);
    %%%%%%%%%%%%%%%% Preparing dataset for PF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    mXTot = zeros(Setting.iDimOmega * Setting.iDraws, Setting.cT, Setting.iL);
    mError = zeros(Setting.cT, Setting.iDimOmega * Setting.iDraws, Setting.iL);
    z = 1;
    for k = 1:Setting.iDraws
        for kk = 1:Setting.iL
            mXTot(z:z + Setting.iDimOmega - 1, :, kk) = reshape(mX(:, k + iCounter, kk, :), Setting.cT, Setting.iDimOmega)';  
            vYTemp = Kronbsxfun(ones(1, Setting.iDimOmega), vY(:, kk));    %%%%Reshape the observations to create the learning errors
            mError(:, z:z + Setting.iDimOmega - 1, kk) =  (vYTemp - reshape(mX(:, k + iCounter, kk, :), Setting.cT, Setting.iDimOmega)).^2; %%%  Construct the errors 
        end
        z = z + Setting.iDimOmega;
    end
    %%%%%%%%%%%% Create the structure SettingGPU for later use %%%%%%%%%%%%
    SettingGPU = struct('cN', gpuArray(Setting.cN),'iDimOmega', gpuArray(Setting.iDimOmega),...
      'Sigma', gpuArray(Setting.Sigma),'vSigma',gpuArray(Setting.vSigma), ...
      'cT', gpuArray(Setting.cT), 'iDraws', gpuArray(Setting.iDraws), ...
      'iTau', gpuArray(Setting.iTau), 'dLambda', gpuArray(Setting.dLambda),...
      'iL', gpuArray(Setting.iL), 'iM', gpuArray(Setting.iM), 'iEstimate', ...
      gpuArray(Setting.iEstimate), 'dKappa', gpuArray(Setting.dKappa));
    %%%%%%%%%%%%%%%%%%%%% Run the Particle filter on the GPU %%%%%%%%%%%%%%
    [~, SSta] = PFGPU(SettingGPU, S0_GPU, SSta0_GPU, gpuArray(vY), gpuArray(mXTot), gpuArray(mError));
    %%%%%%%%%%%%%%%%%% Go back to the CPU and do managing %%%%%%%%%%%%%%%%%
    mXFilt = gather(SSta.mXFilt);
    mLambda = gather(SSta.mLambdaFilt);
    mYCum = gather(reshape(SSta.mYPred, Setting.cT * Setting.iDraws,  Setting.cN, []));
    mSigma = gather(SSta.mSigmaFilt);
    clear SSta S S0_GPU SSta0_GPU;     %% Cancel old variables to freee memory in the GPU
    %%%%%%%%%% Managing the mean and quantile of the predictive %%%%%%%%%%% 
    for g = 1:Setting.iDraws
        for kk=1:Setting.iL
             mYPred(:, 1, kk, g + iCounter) =  median(squeeze(mYCum((g - 1) * Setting.cT + 1:(g * Setting.cT), :, kk)), 2);
             mYPred(:, 2, kk, g + iCounter) =  quantile(squeeze(mYCum((g - 1) * Setting.cT + 1:(g * Setting.cT), :, kk))', 0.025)';
             mYPred(:, 3, kk, g + iCounter) =  quantile(squeeze(mYCum((g - 1) * Setting.cT + 1:(g * Setting.cT), :, kk))', 0.975)'; 
        end
    end
    iCounter = iCounter + Setting.iDraws; 
    %%%%%%%%%%%%%%%%%%%% Managing the Weights %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    for ga = 1:Setting.cT    
        z = 1;
        for j = 1:Setting.iDraws
           for k = 1:Setting.iL
               mWeightsCum(ga, :, j + vVector(i) - 1, k) = logmul(squeeze(mXFilt(z:z +  Setting.iDimOmega - 1, ga, k))');
           end      
           z = z + Setting.iDimOmega;
        end 
    end 
end
toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%% Final Statistics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for kk = 1:Setting.iDimOmega
    for tt=1:Setting.cT
        %%%%%%%%%%%%%%%%%%%%%%% Loop over the Series %%%%%%%%%%%%%%%%%%%%%%
        for i = 1:Setting.iL
            TrueweightsCumRange(tt, kk, 1, i) = quantile(mWeightsCum(tt, kk, :, i), 0.025);
            TrueweightsCumRange(tt, kk, 2, i) = quantile(mWeightsCum(tt, kk, :, i), 0.975);
            TrueweightsCumRange(tt, kk, 3, i) = median(mWeightsCum(tt, kk, :, i), 3);
            TrueweightsCumRange(tt, kk, 4, i) = mean(mWeightsCum(tt, kk, :, i), 3); 
        end 
    end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                           Figures 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (iPlot==1)
   FiguresGPU(TrueweightsCumRange, mSigma, mLambda, mYPred, Setting, iSave);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                       Saving the data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cd('..');
if (iSave == 1)
    cd('OutputGPU');
    save('TrueweightsCumRange.mat', 'TrueweightsCumRange');
    save('mYPred.mat', 'mYPred');  
end
catch
   disp('-----------------------------------------------------------------');
   disp(char('The dimension of the particles', num2str(Setting.cN), 'and the dimension of the blocks', num2str(iDraws)));
   disp(char('are too big for the RAM available on the GPU card.')); 
   disp(char('Please try to decrease the dimension of the particles or the blocks or both.'));
   disp(char('Please also check if the loaded dataset fulfil the required format, see Toolboxdescription.'));
   disp('-----------------------------------------------------------------');
 break;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%