libname lib "C:\Users\anton.bekkerman\Desktop\SAS" ; data lib.dt2 ; set lib.dt1 ; /* Remove observations with missing values */ if year <= 1999 then delete ; /* Keep only certain variables */ keep chickens acres ; run ; data lib.dt1; set lib.dt1; /* Create dummy variable for indicating year */ if year < 2000 then early=1 ; else early = 0 ; /* Create constant */ constant = 5; /* Index corresponding to observation number */ index = _n_ ; run; /* Sort in ascending order by chickens */ proc sort data=lib.dt1 ; by chickens ; run; /* Create index based on # of chickens */ data lib.dt1; set lib.dt1; index = _n_; run; /* Merging data sets by using an index */ /* (1) Sort both data sets by the index on which merging occurs */ proc sort data=lib.planted; by fips; run; proc sort data=lib.yield; by fips; run; /* (2) Merge using data step */ data lib.merged; merge lib.planted lib.yield ; by fips ; run; /* Day 2 */ /* Merge planted and yield data by fips and by year */ proc sort data=lib.planted; by fips year; run; proc sort data=lib.yield; by fips year; run; data lib.merged; merge lib.planted lib.yield; by fips year; run; /* Summary */ proc means data=lib.merged; run; /* Summary stats by year */ proc sort data=lib.merged; by year; run; proc means data=lib.merged; by year; var planted harvested yield; run; /* Summary stats, overall -- mean, std, median, observations, missing values */ proc means data=lib.merged mean std median n nmiss; var planted harvested yield; output out=lib.summ mean=/autoname; run; /* Distributional properties */ proc freq data=lib.merged; table yield; run; /* Summary of the distributional properties */ proc univariate data=lib.merged; /* Only look at years between 2001 and 2005 */ where 2001<=year<=2005; var yield; run; /* Graphing */ proc sgplot data=lib.merged; where fips = 30001; xaxis type=discrete; series x=year y=harvested / lineattrs=(color=green thickness=4); series x=year y=planted; scatter x=year y=harvested; run; proc sgplot data=lib.merged; histogram yield; density yield / type=kernel; run; /* Matrix language */ proc iml; a = {1 2 3 4}; print a; b = {1,2,3,4}; print b; ab = {1 2, 3 4, 5 6}; print ab; /* In SAS, matrix[row #, col#] */ ab[2,2] = 8 ; print ab; ab[,2] = 3; print ab; /* Function j() -- j(# rows, # cols, value to populate with) */ thebestvectorever = j(50,2,1); print thebestvectorever; m = j(4,1,.); do i=1 to nrow(m); m[i,] = i ; end; print m; /* (1) Create a 4x4 matrix (2) Run a two-level loop that populates this matrix with the index number */ a = j(4,4,.); do i=1 to nrow(a); do j=1 to ncol(a); a[j,i] = i*j; print a; end; end; r = uniform(12345); rm = j(4,4,.); do i=1 to nrow(rm); do j=1 to ncol(rm); rm[i,j] = uniform(12345); end; end; print rm; /* Repeat above, but ensure that no value is above 0.5 */ rm = j(4,4,.); do i=1 to nrow(rm); do j=1 to ncol(rm); rm[i,j] = uniform(12345); if rm[i,j]>0.5 then j=j-1; end; end; print rm; k = j(2,1,.); call randgen(k,"Uniform"); g = mean(k); /* Generating means from uniform distribution */ /* Initiate holding matrix */ hold = j(100,1,.); do i=1 to nrow(hold); t = j(3,1,.); call randgen(t,"Uniform"); t_mean = mean(t); hold[i,] = t_mean; end; print hold; mean_mean = mean(hold); print mean_mean; /* Concatenating vectors/matrix */ one = j(3,1,1); gg = {3 1, 2 1, 3 2}; jj = {5, 6, 7}; /* Horizontal concatenation */ h_conc = one || gg; print one gg; print h_conc; /* Vertical concatenation */ v_conc = one // jj; print v_conc;