%% funcToRearrange.m by: R Green, M Bebbington, S Cronin, and G Jones % This file contains the functions required to run the MatlabCode.m file which % implements the stochastic local search procedure of Green et al % (2013) "Automated statistical matching of multiple tephra records" %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [AICold,AICbest,MatchMatrixBest] = MatlabFunctions(MatchNo,AICold, AICbest,MatchMatrixBest,AnnealingConst) global nCores Data AllMatches PriorMatches InconsistentGeochemMatch AcceptedWorse ... minPoint maxPoint NewTempMatchMatrix Prior PriorLocation MatchMatrixOld %% locate which row in the MatchMatrix the match is... % for the first event in the selected 2 way match locatePoint1=[]; n1 =2; while isempty(locatePoint1) n1 = n1+2; locatePoint1 = find(ismember(MatchMatrixOld(:,n1:n1+1),AllMatches(MatchNo,1:2),'rows'),1); end % for the second event in the selected 2 way match locatePoint2=[]; n2 =2; while isempty(locatePoint2) n2 = n2+2; locatePoint2 = find(ismember(MatchMatrixOld(:,n2:n2+1),AllMatches(MatchNo,3:4),'rows'),1); end minPoint = min([locatePoint1 locatePoint2]); % locate which rowNo is the smallest maxPoint = max([locatePoint1 locatePoint2]); % locate which rowNo is the largest % locate Data is a list of all observations between locatePoint1 and 2 in the form of a 2 column matrix [SourceNo ErupNo] % this specifies the chunk of observations which we will look to rearrange locateData = reshape(nonzeros(reshape(MatchMatrixOld(minPoint:maxPoint,4:end)',2,[])')',[],2); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% TempLocateData{1} = locateData; x=1; % initialize the number of segments to divide the selected chunk into n = size(locateData,1); while n > 12 % only segments with less than 12 tephras are manageable x = x+1; % if > 12 then need to add one to the number of segments GroupSize = floor(minPoint:(maxPoint-minPoint)/x:maxPoint); TempLocateData= cell(1,numel(GroupSize)-1); %initialize the temporary locate data TempLocateData{1} = reshape(nonzeros(reshape(MatchMatrixOld(minPoint:GroupSize(2),4:end)',2,[])')',[],2); n = size(TempLocateData{1},1); for i = 2:numel(GroupSize)-1 TempLocateData{i} = reshape(nonzeros(reshape(MatchMatrixOld(GroupSize(i)+1:GroupSize(i+1),4:end)',2,[])')',[],2); n = max([n size(TempLocateData{i},1)]); end end for nChunks = 1:numel(TempLocateData) % for each of the manageable sized segements locateData = TempLocateData{nChunks}; % locate the MIN row number where the segment starts in the MatchMatrix minPoint=[]; nMin =2; while isempty(minPoint) nMin= nMin+2; minPoint = find(ismember(MatchMatrixOld(:,nMin:nMin+1),locateData(1,:),'rows'),1); end % locate the MAX row number where the segment ends in the MatchMatrix maxPoint=[]; nMax =2; while isempty(maxPoint) nMax= nMax+2; maxPoint = find(ismember(MatchMatrixOld(:,nMax:nMax+1),locateData(end,:),'rows'),1); end % %%--%% marker horizons / prior matches Prior = find(ismember(locateData,reshape(nonzeros(PriorMatches'),2,[])','rows')); %locate if any points in the locateData list are part of the prior matches if ~isempty(Prior) % if there are some terms in the locateData list that are members of a 'prior match' locateData(Prior,:)=[]; % remove those points from the locateData list PriorLocation=intersect(find(ismember(MatchMatrixOld(:,4:end),PriorMatches,'rows')),minPoint:maxPoint); %locate where in the MatchMatrixOld the PriorMatches lie end if ~isempty(locateData)&&(size(locateData,1)>1) % if there are still atleast 2 events to consider rearranging % create the temporary match matrix of points to rearrange of the form [MeanAge, SdevAge,Source,ErupNo] NewTempMatchMatrix=zeros(length(locateData),nCores*2+2); for i=1:length(locateData) LabelEvent = Data(ismember(Data(:,1:2),locateData(i,:),'rows'),[3 4 1 2]); NewTempMatchMatrix(i,[1 2 LabelEvent(3)*2+1 LabelEvent(3)*2+2]) = LabelEvent; end % store if any of the events to consider rearranging have inconsistent geochem matches with other events % store which row in the InconsistentGeochemMatch these lie rowBadGeoMatch = cell(1,size(NewTempMatchMatrix,1)); %initialize for n = 1:size(NewTempMatchMatrix,1) % for each row of the temporary match matrix rowBadGeoMatch{n} = [find(ismember(InconsistentGeochemMatch(:,1:2),nonzeros(NewTempMatchMatrix(n,3:end))','rows'));... % check the first event in the InconsistentGeochemMatch find(ismember(InconsistentGeochemMatch(:,3:4),nonzeros(NewTempMatchMatrix(n,3:end))','rows'))]; % check the second event in the InconsistentGeochemMatch end %% Compile a list of 2way, 3way, 4way etc. rearrangements of events in the temporary match matrix EndPoint = numel(unique(nonzeros(NewTempMatchMatrix(:,3:2:end)))); % events to consider rearranging are from how many unique locations? Starting= cell(1,EndPoint); %initialize for i=1:EndPoint %run through my function "funcUnique.m" which will remove inconsistent geochem matches and matches consisting from members from the same source Starting{i} = funcUnique(nchoosek(1:size(locateData,1),i), rowBadGeoMatch); end emptyCells = cellfun(@isempty,Starting); % remove any empty cells from our Starting cell array Starting(emptyCells) = []; EndPoint = size(Starting{end},2); %max possible match will be equal to the number of unique locations in the temp match matrix - or less % 'Starting' will be a cell array of all possible combinations size 1 by EndPoint % where EndPoint is the length of the largest allowable match. % i.e if the largest allowable match is a 4way match and there are 4 separate events to match we would % have: % Starting{1} Starting{2} Starting{3} Starting{4} % 1 1 2 1 2 3 1 2 3 4 % 2 1 3 1 2 4 % 3 1 4 1 3 4 % 4 2 3 2 3 4 % 2 4 % 3 4 % % where for example 2 3 shows that the we could consider matching the 2nd % and 3rd events in the TempMatchMatrix %% Create a list 'Mat' of all possible rearrangements to consider for this chunk i.e. 2 way, 3 way, 4way, ..., 2*2 way matches, 2*3 way matches, 2-2-3 match etc... v = repmat([0 2:EndPoint],1,floor(size(locateData,1)/2)); if v==0 Mat = zeros(0,floor(size(locateData,1)/2)); else Mat = nchoosek(v,floor(size(locateData,1)/2)); end Mat = unique(Mat(sum(Mat,2)<=size(locateData,1),:),'rows'); %remove ones for which the sum is > than the total number of ob's to match Mat = Mat(~ismember(Mat,zeros(1,size(Mat,2)),'rows'),:); % remove any zero rows. for k = 2:size(Mat,2) % for each column in Mat Mat = Mat(Mat(:,k)>=Mat(:,k-1),:); %rows must increase. i.e. 2-2-3 allowed but 2-3-2 not. end Mat = [[zeros(1,size(Mat,2)-1) 1];Mat]; % include a single zero row to allow for all events to be kept separate / not joined. % mat is a vector showing all the possibilities. % i.e. for size of locate data == 5 with length(unique(locateData))==5 % Mat would show: % 0 2 i.e. one 2 way match and all other events kept separate % 0 3 i.e. one 3 way match and all other events kept separate % 0 4 i.e. one 4 way match '' % 0 5 i.e. one 5 way match '' % 2 2 i.e. two 2 way matches '' % 2 3 i.e. one 2 way match and a 3 way match. '' BestCombination=[]; %initialize best combination for this chunk as empty BestAICCombination = Inf; % initialise best AIC as something really large. % first consider the null match where all terms are kept separate - run through the rearranegement function "funcMatch.m" [BestAICCombination,BestCombination] = funcMatch(BestAICCombination,BestCombination,nonzeros(Mat(1,:))); %% for all other rows of Mat we need to loop over all possible combinations % e.g in the above example where length of locate data == 5 % for row 5 of 'Mat' [2 2] we could have the arrangements: % 1 2 - 3 4 i.e. 2x2way matches where the 1st and 2nd ob's are joined, so are the 3rd and 4th ob's in TempMatchMatrix % 1 2 - 3 5 % 1 2 - 4 5 % 1 3 - 2 4 % 1 3 - 2 5 % 1 3 - 4 5 % 1 4 - 2 3 % 1 4 - 2 4 % 1 4 - 2 5 % 1 5 - 2 3 % 1 5 - 2 4 % 1 5 - 3 4 % 2 3 - 4 5 % 2 4 - 3 5 % 2 5 - 3 4 % we run a function called "functionRemoveDuplicates.m" so that 1 2 - 3 4 is considered % the same as 3 4 - 1 2 to avoid considering the same arrangement % more than one. for i=2:size(Mat,1) % for all other rows of Mat (the matrix showing the possible combinations) NonZeroMat = nonzeros(Mat(i,:)); % remove the zeros % find StartingNum - a cell array that shows the number of events to match % i.e for row 5 of 'Mat' [2 2] we would have StartingNum = [2] [2] - indicating 2 x 2 way matches % and starting would be of the form StartWith = [Starting{2}] [Starting{2}] % where each cell of StartWith shows all possible arrangements of 2 way matches. eg %1 2 join the 1st and 2nd events in the TempMatchMatrix %1 3 join the 1st and 3rd events in the TempMatchMatrix %1 4 join the 1st and 4th events in the TempMatchMatrix %... %3 4 join the 3rd and 4th events in the TempMatchMatrix StartingNum=cell(1,numel(NonZeroMat));%initalize. StartWith=cell(1,numel(NonZeroMat));%initalize. for n = 1:numel(NonZeroMat) StartingNum{n} = NonZeroMat(n); StartWith{n} = Starting{StartingNum{n}}; end %% remove the duplicates. N = [StartingNum{:}]; % Transform StartingNum into a vector not a cell array Len = size(locateData,1); % let Len be the size of the chunk to rearrange Uni = unique(N); % Uni gives the unique terms in StartingNum nUni = numel(Uni); % nUni gives the number of unique terms in StartingNum % for each unique match size run through the function functionRemoveDuplicates % to create a cell array 'Output'. Keeping with the above example if N = [2 2] and StartWith = [Starting{2}] [Starting{2}] where % Starting{2} = [1 2; 1 3; 1 4; 2 3; 2 4; 3 4] then will will get Output = [1 2 3 4; 1 3 2 4; 1 4 2 3] where the first row 1 2 3 4 % would indicate that we have 2x2 way matches - the first of which joins the 1st and 2nd terms in TempMatchMatrix and the % second of which joins the 3rd and 4th. Output = cell(1,nUni); for k = 1:nUni Index = find(N==Uni(k)); if numel(Index)==1 Output{k}= StartWith{Index}; else Output{k} = functionRemoveDuplicates(StartWith{Index}); end end Output = Output(~cellfun('isempty',Output)); nOutput = numel(Output); % now create a matrix "MachesCombined" - a list of all possible rearrangements if nOutput==nUni % if the number of cells in Output is still the same as the number of unique terms in StartingNum if nOutput==1 % and if there is only 1 cell in Output MatchesCombined = Output{:}; % then MatchesCombined = Output else for k = 1:nOutput-1 % for each cell in "Output" - except the last one if size(Output{k},2)+size(Output{k+1},2)== Len MatchesCombined = []; for j = 1:size(Output{k},1) SetDiff = setdiff(1:Len,Output{k}(j,:)); if any(ismember(Output{k+1},SetDiff,'rows')) MatchesCombined =[MatchesCombined;Output{k}(j,:), SetDiff]; end end % for j else % if sum ~=Len MatchesCombined = []; for j = 1:size(Output{k},1) Temp = Output{k+1}; Temp = Temp(~any(ismember(Temp,Output{k}(j,:))')',:); MatchesCombined = [MatchesCombined;repmat(Output{k}(j,:),size(Temp,1),1) Temp]; end %loop over j end % if ==Len Output{k+1} = MatchesCombined; end % loop over k end % if numel(Output)==1 % run the rearrangement to calculate the BestAIC MatchToConsider=cell(1,size(StartingNum,2));%initalize. for k=1:size(MatchesCombined,1) MatchToConsider{1} = MatchesCombined(k,1:StartingNum{1}); for j = 2:size(StartingNum,2) MatchToConsider{j} = MatchesCombined(k,(sum([StartingNum{1:(j-1)}])+1):sum([StartingNum{1:j}])); end if ~isempty(MatchToConsider) [BestAICCombination,BestCombination] = funcMatch(BestAICCombination,BestCombination,MatchToConsider{:}); end end end % if numelOutput==nUni end % for i=1:size(Mat) % after looping over all possible combinations for that chunk we % are left with the BestAIC if the rearrangement is accepted and % BestCombination shows what the rearrangement would look like. %% Update the new match matrix if ~isempty(BestCombination) % replace the selected chunk in the old match matrix with what the new 'best' rearrangement for that chunk is MatchMatrixNew = [MatchMatrixOld(1:minPoint-1,:) ; BestCombination ; MatchMatrixOld(maxPoint+1:end,:)]; AICnew = sum(MatchMatrixNew(:,1))+2*size(MatchMatrixNew,1); % compute the new AIC UpdateProbability = exp(-AnnealingConst*(AICnew-AICold)); % calculate the probability of accepting this new match RandomProbability = rand(1); % draw a random number. if AICnew < AICbest % if the new AIC is better than the best AIC then update the stored best combination and AIC AICbest = AICnew; MatchMatrixBest = MatchMatrixNew; end if AICnew > AICold % if the new AIC is worse than the old AIC if UpdateProbability > RandomProbability % then update the stored old combination and AIC with some probability = Update Probability AcceptedWorse = 1; % set the annealing schedule stopping condition = 1 so that we continue to update the annealing constant until we no longer accept a worse match AICold = AICnew; MatchMatrixOld = MatchMatrixNew; end else % if the new AIC is better than the old AIC then update the stored old combination and AIC AICold = AICnew; MatchMatrixOld = MatchMatrixNew; end % if AICnew < AICold end % cannot rearrange without violating constraints. end % if there are still atleast 2 events to consider rearranging end % for nChunks end % function end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [OutMatrix] = funcUnique(InMatrix,rowBadGeoMatch) % in matrix is the match to consider reducing by removing matches between pairs from the same source % and pairs with inconsistent geochem. global NewTempMatchMatrix AllOk=zeros(size(InMatrix,1),1); %intialize as zeros for num = 1:size(InMatrix,1) % if there are no matches between pairs from the same source then CheckUnique=1 CheckUnique = (length(unique(nonzeros(NewTempMatchMatrix(InMatrix(num,:),3:2:end))))==numel(InMatrix(num,:))); % CheckGeo will equal 1 if geochem is consistent CombinedGeo = cell2mat(rowBadGeoMatch(InMatrix(num,:))'); if length(unique(CombinedGeo)) ~= length(CombinedGeo) CheckGeo = 0; else CheckGeo = 1; end AllOk(num) = CheckUnique*CheckGeo; %will equal 1 if geochem ok and there are no pairs from the same source. end % looping over possibilities. OutMatrix = InMatrix(AllOk~=0,:); % remove any rows from the list that have incompatible geochem or would be matching 2 members from the same source end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [BestAICCombinationOut BestCombinationOut] = funcMatch(BestAICCombinationIn,BestCombinationIn,varargin) % the varargin are the matches to consider i.e. FirstMatch, SecondMatch,ThirdMatch etc. % check stratigraphy of events is not violated % if ok calculate chi stat global MatchMatrixOld NewTempMatchMatrix Prior PriorLocation minPoint maxPoint nCores %%%%%% initialise the output terms equal to the inputs BestAICCombinationOut = BestAICCombinationIn; BestCombinationOut = BestCombinationIn; % for each Match to consider in the selected chunk - create the match and calculate the ChiStat % NewCombination{i} will be of the form [ChiStat Age Stdev Source1 Erup1 Source2 Erup2, .....] NewCombination = cell(1,nargin-2); %preallocation for i=1:nargin-2 %Calculate the ChiStats and set up the NewCombination. NewCombination{i} = FunctionChiStat(NewTempMatchMatrix([varargin{i}],:)); end % check if there are any events that are to be left by themselves (not joint to another event) MissingPoint = setdiff(1:size(NewTempMatchMatrix,1),[varargin{:}]); NewCombinationMissingPoint = [zeros(numel(MissingPoint),1) NewTempMatchMatrix([MissingPoint],:)]; % create the Combination matrix - each row of this matrix shows events matched (or left by themselves if no matches were to be made) % of the form [ChiStat Age Stdev Source1 Erup1 Source2 Erup2, .....] Combination = [cell2mat(NewCombination');NewCombinationMissingPoint]; % if there were terms in the selected chunk that are from the list of PriorMatches % then this PriorMatch / marker horizon needs to be added back in our rearrangement of the chunk if ~isempty(Prior) Combination = [MatchMatrixOld(PriorLocation,:); Combination]; end Combination = sortrows(Combination,[2 3]); % sort MatchMatrix according to mean then stdev % sort the rows in the MatchMatrix so that each core has ascending erupNo's for i = 5:2:size(Combination,2) Indx = find(Combination(:,i)~=0); Combination(Indx,:) = sortrows(Combination(Indx,:),i); end if ~isequal(Combination(:,4:end),MatchMatrixOld(minPoint:maxPoint,4:end)) % if the new rearrangement is not the status quo / the same as the current arrangement TrialBestAIC = [MatchMatrixOld(1:minPoint-1,:) ; Combination ; MatchMatrixOld(maxPoint+1:end,:)]; % then add replace the new rearrangement with the current arrangement AICCombination = sum(TrialBestAIC(:,1))+ 2*size(TrialBestAIC,1); % and calculate the AIC % check that the stratigraphy is not violated i.e. BadStratig = checkStratig(Combination); % will = 1 if bad / stratigraphy is violated, 0 if good. if (BadStratig==0) % if the stratigraphy is not violated if (AICCombination <= BestAICCombinationIn) % and if the AIC using the new arrangement is better than AIC of the arrangement that was fed in % then make and update to the BestCombination. BestCombinationOut = Combination; BestAICCombinationOut = AICCombination; end end end end % function end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% calculate chi statistic function. function CombinationOut = FunctionChiStat(ConsiderMatches) % input vector is of form (Source1, Erup1, Age1, Sdev1, ..., etc) % this function calculates the combined mean age Ap, and new sdev of this combined age Ap and the chistat = sum((Ai-Ap)^2/Ei^2); % the output vector is of the form [ChiSq Ap Sdev Source1, Erup1, Source2 Erup2,...] global nCores CombinationOut = zeros(1,nCores*2+3); if size(ConsiderMatches,1)==1 % if tephra does not match to another tephra CombinationOut=[0 ConsiderMatches]; % then chi stat is = 0. else Means = ConsiderMatches(:,1); %otherwise calculate the mean ages and stdevs of all tephras in a match Sdevs = ConsiderMatches(:,2); x = Means./Sdevs.^2; NumAp = sum(x(~isnan(x))); % this allows special case where Sdevs are NaN DenomAp=sum(1./Sdevs(Sdevs>0).^2); Ap = NumAp./DenomAp; % calculate the pooled mean SdevAp = sqrt(1./DenomAp); % and pooled standard deviation % calculate chistat. =sum( (Ai-Ap)^2 / Ei^2) ChiStat = sum(((Means(Means>0) - Ap).^2)./Sdevs(Sdevs>0).^2); % create the Combination matrix for the selected segement, this is of % the form [ChiStat Ap SdevAp Source1 ErupNo1, .... SourceN ErupNoN] for each row of the segment CombinationOut(1:3) = [ChiStat Ap SdevAp]; Vals = nonzeros(ConsiderMatches(:,3:2:end)); for i = 1:numel(Vals) CombinationOut(Vals(i)*2+2:Vals(i)*2+3) = nonzeros(ConsiderMatches(:,Vals(i)*2+1:Vals(i)*2+2))'; end end end % function end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% check stratigraphy function function BadStratig = checkStratig(Combination) BadStratig = 0; % initialise the stratigraphy as good. for Order = 5:2:size(Combination,2) % for each core in the record if ~issorted(nonzeros(Combination(:,Order))) % if the eruption numbers are not in order BadStratig=1; % then the stratigraphy is violated return % If you find 1 source with its stratigraphy violated then break out of the function - no need to check the rest end end end % function end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function Output = functionRemoveDuplicates(varargin) %ensures for example that both 12-34 and 34-12 matches are not considered % i.e 12-34-56 is considered to be the same as 12-56-34 and 34-56-12, % 34-12-56, 56-34-12, and 56-12-34. This function is created to speed up % run time. % the input is a cell array StartWith{} for example % StartWith = [Starting{2}] [Starting{2}] where % Starting{2} = [1 2; 1 3; 1 4; 2 3; 2 4; 3 4] then this function would create MatchesCombined = [1 2 3 4; 1 3 2 4; 1 4 2 3] where the first row 1 2 3 4 % would indicate that we have 2x2 way matches - the first of which joins the 1st and 2nd terms in TempMatchMatrix and the % second of which joins the 3rd and 4th. Output=[]; for j = 1:nargin-1 if j>1 OutputOld = Output; else OutputOld = varargin{1}; end Output = []; for i = 1:size(OutputOld,1) NoPoints = max(max(varargin{end})); SetDiff = setdiff(1:NoPoints,OutputOld(i,:)); if ~isempty(SetDiff) if SetDiff==0 NewSet = zeros(0,size(varargin{j},2)); else if size(varargin{j},2)>size(SetDiff,2) NewSet=zeros(0,size(varargin{j},2)); else NewSet = nchoosek(SetDiff,size(varargin{j},2)); end end % this is to stop matches not allowed due to matching with self and matching with inconsistent geo NewSet = NewSet(ismember(NewSet,varargin{j},'rows'),:); NewSet = NewSet(OutputOld(i,j*2-1)