/****************************************************************************/ /****************************************************************************/ /****************************************************************************/ /*** STRATIFICATION PROGRAM FOR THE "NEYMANN" ALLOCATION RULE ***/ /*** ***/ /*** UNDER THE LOG-LINEAR MODEL TO CHARACTERIZE THE RELATIONSHIP ***/ /*** BETWEEN THE STRATIFICATION VARIABLE X AND DE THE SURVEY ***/ /*** VARIABLE Y. ***/ /*** ***/ /*** ***/ /*** Input variables ***/ /*** ***/ /*** data = data set used ***/ /*** x = vector of stratification variable ***/ /*** L = number of strata, default = 3 (L=1 is not allowed) ***/ /*** C = target coefficient of variation for the precision of the ***/ /*** stratified mean. Default value is 10% ( C = 0.1 ) ***/ /*** beta = coefficient of log(x) in the regression of log(y) ***/ /*** on log(x), default = 1 ***/ /*** sig2 = residual variance in the regression of log(y) on ***/ /*** log(x), default = 0 ***/ /*** bori = initial value of the strata boundaries to start the ***/ /*** algorithm. Default initial strata have all the same ***/ /*** size. To change bori: bori = ({5,10,15}) ***/ /*** (bori have to be a collumn vector) ***/ /*** print = variable that allow you to control the printing of ***/ /*** the indices of the data elements in each stratum, ***/ /*** default = 1. Printing is done only if print = 1. ***/ /*** ***/ /*** ***/ /*** Program output: ***/ /*** ***/ /*** nn = vector of the sample size for each """"iteration"""" ***/ /*** b_h = strata boundaries for each stratum ***/ /*** Mean = vector of the mean of X in each stratum ***/ /*** Variance = vector of the variance of X dans in each stratum ***/ /*** N_h = vector of the population size in each stratum ***/ /*** n_h = vector of the sample size in each stratum ***/ /*** f_h = vector of the sampling fraction ***/ /*** n = total sample size ***/ /*** Indices of the data elements in ascending order of the ***/ /*** stratification variable and group by stratum (if print=1) ***/ /*** Function's call ***/ /*** ***/ /*** ***/ /*** Program create: ***/ /*** ***/ /*** _out_ = a new data set including the stratification variable ***/ /*** and the number of the unit's stratum ***/ /*** ***/ /*** ***/ /*** WARNING : Missing values are not allowed, they have to be ***/ /*** extract of the data set. Data elements also have ***/ /*** to be over or equal to -1. ***/ /*** ***/ /*** ***/ /*** EXAMPLES: ***/ /*** ***/ /*** data listing; ***/ /*** infile "c:\documents\data.txt"; ***/ /*** input id $ rev84 rev89; ***/ /*** run; ***/ /*** ***/ /*** %include "c:\macro\strates_neymann.sas"; ***/ /*** %strates_power(data=listing, x=rev84); ***/ /*** ***/ /*** To change one or more input arguments: ***/ /*** %strates_power(data=listing, x=rev84, L=5, beta=1.1); ***/ /*** ***/ /****************************************************************************/ /****************************************************************************/ /****************************************************************************/ /*** Programmed by Nathalie Vandal (according to: A genera- ***/ /*** lization of Lavallée and Hidiroglou stratification ***/ /*** algorithm by Louis-Paul Rivest), under supervision of ***/ /*** Gaétan Daigle (août 2001) ***/ /****************************************************************************/ /****************************************************************************/ /****************************************************************************/ %macro strates_neymann(data=_last_,x=,L=3,C=0.1,beta=1,sig2=0,bori=.,print=1); proc iml; use &data; read all var{&x} into x; /***** Fonction quantile *****/ start quantile(x,probs); order = 1 +(nrow(x) - 1)#probs; forder = floor(order); z = x; z[rank(x),]=x; quantil = (1 - (order - forder))#z[forder,] + (order - forder)#z[forder + 1,]; return(quantil); finish quantile; start main; /* Computation of some constants */ N = nrow(x); cons = exp(&sig2)-1; /* Sorting of the data elements in ascending order followed by indices */ id = t(1:N); idx = id||x; idy1 = idx; idy1[rank(x),] = idx##β /* tri des observations */ id = idy1[,1]; y1 = idy1[,2]; /* Initial born computation */ if (&bori = .) then bori = quantile(y1##(1/&beta),t((1:&L-1)/&L)); else bori=&bori; bor = -1//bori##&beta//max(y1)+1; diff = 1; /* Creation if the initial strata */ strats = j(N,&L,.); tai_strat = repeat(0,&L,1); do i = 1 to &L by 1; t = t(loc((y1>= bor[i]) & (y1 0.1); /* Computing of the weigth, the variance and the quantities phi and psi for each stratums */ w_s = tai_strat/N; phi_s = t(strats[+,]/N); psi_s = t(strats[##,]/N); var_s = (N#psi_s-((n#phi_s)##2)/tai_strat ) / (tai_strat-1); /* Computing of the quantities KK, A et FF used in the partial derivatives */ KK= sqrt(w_s#(cons+1)#psi_s-phi_s##2); A= sum(KK[1:(&L-1)]); FF= (&C#y1[:])##2+sum(((1+cons)#psi_s)[1:&L-1,])/N-sum((phi_s##2/w_s)[1:&L-1,])/N; /* Computing of the partial derivatives */ derw= A#(1+cons)#psi_s/(FF#KK) - (A##2)#(phi_s##2)/(N#(FF##2)#w_s##2); derphi= -2#A#phi_s/(FF#KK) + 2#(A##2)#phi_s/(w_s#N#FF##2); derpsi= A#(1+cons)#w_s/(FF#KK) - (A##2)#(1+cons)/(N#FF##2); if (&L>2) then do; alpha1= (derpsi[1:&L-1,] - derpsi[2:&L,])[1:&L-2,]; beta1= (derphi[1:&L-1,] - derphi[2:&L,])[1:&L-2,]; gama1= (derw[1:&L-1,] - derw[2:&L,])[1:&L-2,]; end; alpha2= derpsi[&L-1,]; beta2= derphi[&L-1,]; gama2= derw[&L-1,]-N; /* Updating of the strata boundaries with the Sethi's algorithm */ if (&L>2) then do; if (any((beta1##2 - (4#alpha1#gama1))<0)) then do; bpp1 = j(&L-2,1,.); j = loc((beta1##2 - (4#alpha1#gama1))<0); g = nrow(bpp1)-nrow(j); if (g>0) then do; z = loc((beta1##2 - (4#alpha1#gama1))>=0); bpp1[z] = (-beta1[z] + sqrt(beta1[z]##2 - (4#alpha1[z]#gama1[z])))/(2#alpha1[z]); end; bpp1[loc(bpp1 = . )] = (-gama1[loc(bpp1 = . )]/beta1[loc(bpp1 = . )]); end; else bpp1 = (-beta1 + sqrt(beta1##2 - (4#alpha1#gama1)))/(2#alpha1); end; if ((beta2##2 - (4#alpha2#gama2))>=0) then bpp2 = (-beta2 + sqrt(beta2##2 - (4#alpha2#gama2)))/(2#alpha2); else bpp2 = bor[&L]; if bpp2 > y1[N-1] then bpp2 = y1[N-1]; if (&L=2) then born = bpp2; else born = bpp1//bpp2; diff = born - bor[2:&L]; /* Computation of the current sample size n and of the strata boundaries */ nn[ii] = N#w_s[&L] + A##2/FF; bor = -1//born//max(y1)+1; /* Computation of the new strata */ strats = j(N,&L,.); tai_strat = repeat(0,&L,1); do i = 1 to &L by 1; if any((y1>= bor[i]) & (y1= bor[i]) & (y1