/* I. Specifying a library name */ libname myLib "C:\Users\anton.bekkerman\Documents\"; /* II. Importing data */ /* Method 1: Use the SAS wizard to import Excel, CSV, Access, DBA, etc. data files */ /* Method 2: Use code to import Excel, CSV, Access, etc. data files */ /* Example 1: Importing Excel files */ PROC IMPORT OUT= MYLIB.DT1 DATAFILE= "C:\Users\anton.bekkerman\Documents\Classes\ECNS561\SAS Lab\dt1.xls" DBMS=XLS REPLACE; GETNAMES=YES; GUESSINGROWS=50; RUN; /* Example 2: Importing CSV files */ PROC IMPORT OUT= myLib.dt1 DATAFILE= "C:\Users\anton.bekkerman\Documents\Classes\ECNS561\SAS Lab\dt1.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; GUESSINGROWS=50; RUN; /* III. Creating a data set by entering values */ data myLib.dt1; input record year farm $ acres chickens $2.; label record="Farm Number" year="Year" farm="Farm Name" acres="Planted Acres" chickens="Number of chickens" ; datalines; 2 1999 HappyFam 554 44 1 1999 MmDonuts 334 32 3 1999 NoRecord . . 5 1999 TinyCow 1234 56 8 1999 Wheater 442 12 14 1999 Deere 994 72 1 2000 MmDonuts 324 41 2 2000 HappyFam 604 35 3 2000 NoRecord . . 5 2000 TinyCow 1532 76 8 2000 Wheater 204 9 14 2000 Deere 763 63 ; run; /* IV. Basic data manipulation */ /* a. Sorting data */ /* 1. Sort data by a variable in ascending order (lowest is first) */ /* Example: Sort farms such that those with lowest acreage are first */ proc sort data=myLib.dt1; by acres; run; /* 2. Sort data by a variable in descending order (highest is first) */ /* Example: Sort farms such that those with highest acreage are first */ proc sort data=myLib.dt1; by descending acres; run; /* 3. Sort data by more than one variables. That is, sort within subgroups */ /* Example: Sort farms such that those with lowest acreage in each year are first */ proc sort data=myLib.dt1; by year acres; run; /* b. Removing/keeping certain observations */ /* 1. Remove specific observations */ /* Example: Remove farms that are named "Wheater" */ data myLib.dt2; set myLib.dt1; if farm="Wheater" then delete; run; /* 2. Remove observations according to a criteria */ /* Example: Remove all farms that have acreage above or equal to 1,000 */ data myLib.dt2; set myLib.dt1; if acres >= 1000 then delete; run; /* 3. Keep observations according to a criteria */ /* Example: Keep only farms that have acreage between 300 and 900 acres */ data myLib.dt2; set myLib.dt1; if 300 < acres <= 900; run; /* 4. Remove missing observations */ /* Example: Remove farms that have a missing observation for acres */ data myLib.dt2; set myLib.dt1; if acres = . then delete; run; /* c. Manipulating variables (columns) */ /* 1. Keep only certain variables from an original data set */ /* Example: Keep only the year and farm name variables */ data myLib.dt2; set myLib.dt1(keep=year farm); run; /* 2. Delete certain variables from the original data set */ /* Example: Drop the farm number and acreage variables */ data myLib.dt2; set myLib.dt1(drop=record acres); run; /* 3. Rename variable names */ /* Example: Rename the farm number variable from "record" to "number" */ data myLib.dt2; set myLib.dt1(rename=(record=number)); run; /* 4. Convert a character variable into a numeric */ /* Example: Convert the character variable describing the number of chickens on a farm into a numeric format */ data myLib.dt2; set myLib.dt1; chickN = input(chickens, 8.); drop chickens; label chickN="Number of chickens"; rename chickN=chickens; run; /* V. Advanced data manipulation */ /* First, let's create four data sets for us to use */ data myLib.dta1; input record year farm $ acres chickens $2.; label record="Farm Number" year="Year" farm="Farm Name" acres="Planted Acres" chickens="Number of chickens" ; datalines; 2 1999 HappyFam 554 44 1 1999 MmDonuts 334 32 3 1999 NoRecord . . 5 1999 TinyCow 1234 56 8 1999 Wheater 442 12 14 1999 Deere 994 72 ; run; data myLib.dta2; input record year farm $ acres chickens $2.; label record="Farm Number" year="Year" farm="Farm Name" acres="Planted Acres" chickens="Number of chickens" ; datalines; 2 2000 HappyFam 554 35 1 2000 MmDonuts 324 41 3 2000 NoRecord . . 5 2000 TinyCow 1532 76 8 2000 Wheater 442 9 14 2000 Deere 763 63 ; run; data myLib.dta3; input record year farm $; label record="Farm Number" year="Year" farm="Farm Name" ; datalines; 2 1999 HappyFam 1 1999 MmDonuts 3 1999 NoRecord 5 1999 TinyCow 8 1999 Wheater 14 1999 Deere 1 2000 MmDonuts 2 2000 HappyFam 3 2000 NoRecord 5 2000 TinyCow 8 2000 Wheater 14 2000 Deere ; run; data myLib.dta4; input farm $ acres chickens $2.; label farm="Farm Name" acres="Planted Acres" chickens="Number of chickens" ; datalines; HappyFam 554 44 MmDonuts 334 32 NoRecord . . TinyCow 1234 56 Wheater 442 12 Deere 994 72 MmDonuts 324 41 HappyFam 604 35 NoRecord . . TinyCow 1532 76 Wheater 204 9 Deere 763 63 ; run; /* a. Setting one data set into another */ /* Example: Set the dataset "dta2" onto "dta1" */ data myLib.dta5; set myLib.dta1 myLib.dta2; run; /* b. Merging data sets together */ /* Example: Merge data sets "dta3" and "dta4" together by farm name */ proc sort data=myLib.dta3; by farm; run; proc sort dadta=myLib.dta4; by farm; run; data myLib.dta5; merge myLib.dta3 myLib.dta4; by farm; run; /* c. Append data sets */ /* Example: Append data set "dta2" to "dta1" */ proc append base=myLib.dta1 data=myLib.dta2 force; run; /* d. Exporting a SAS data set */ /* Method 1: Use the SAS wizard to export a data set */ /* Method 2: Code the export of a data set */ PROC EXPORT DATA= WORK.WEIGHT_IML OUTFILE= "C:\Users\anton.bekkerman\Documents\Classes\ECNS561\SAS Lab\dt1_exp.csv" DBMS=CSV REPLACE; PUTNAMES=YES; RUN; /* VI. Exploring and summarizing the data */ /* a. Producing basic summary statistics about the data set */ /* Example 1: Default summary statistics about all variables in a data set */ proc means data=myLib.dt1; run; /* Example 2: Default summary statistics about only specific variables */ proc means data=myLib.dt1; var acres; run; /* Example 3: Selected summary statistics */ proc means data=myLib.dt1 n nmiss mean median std; run; /* b. Frequency statistics */ /* 1. Basic frequency and additional statistics */ proc univariate data=myLib.dt1; run; /* 2. Descriptive frequency statistics */ proc freq data=myLib.dt1; tables farm acres; run; /* c. Visualizing the data -- graphics */ /* 1. Producing a basic scatter plot of the data */ /* Example: Seeing acreage as a function of farm number */ ods graphics on; proc sgplot data=myLib.dt1; scatter x=record y=acres; run; ods graphics off; /* 2. Producing a series plot (i.e. connecting the dots) */ /* Example: Seeing the number of holidays in Spain taken by US residents */ ods graphics on; proc sgplot data=sashelp.tourism; series x=year y=vsp; run; ods graphics off; /* 3. Producing a bar graph */ /* Example: Bar chart of acres on farms in 1999 */ ods graphics on; proc sgplot data=myLib.dt1; where year=1999; vbar farm / response=acres; run; ods graphics off; /* Example: Bar chart of comparing acres in 1999 and 2000*/ data myLib.dt1_graph; set myLib.dt1; if year = 1999 then acres99 = acres; if year = 2000 then acres00 = acres; label acres99 = "Acres in 1999" acres00 = "Acres in 2000"; run; ods graphics on; proc sgplot data=myLib.dt1_graph; yaxis label = "Acreage"; vbar farm / response=acres99; vbar farm / response=acres00 barwidth=0.5 transparency=0.2; run; ods graphics off; /* 4. Producing a histogram and density curve */ /* Example: Distribution of acreage */ ods graphics on; proc sgplot data=myLib.dt1; histogram acres; density acres / type=normal; density acres / type=kernel; run; ods graphics off; /* VII. Introduction to SAS matrix language */ /* Entering into IML (interactive matrix language) mode */ proc iml; /* Set option to display row and column numbers */ reset autoname; /*Construct a vector; columns separated by space, rows separated by "," */ v1 = {1 2 3 4}; print v1; /*Construct a matrix; columns separated by space, rows separated by "," */ m1 = {1 2, 3 4}; print m1; /* Change a value in a vector or a matrix */ /* Example: Change the value of the second column in the vector v1 */ v1[,2] = 5; print v1; /* Example: Change the value of the second row, first column in matrix m1 */ m1[2,1] = 5; print m1; /* Construct an identity matrix; I(num columns) */ i1 = i(2); print i1; /* Construct a matrix of all ones; J(rows, cols, value) */ ones1 = j(2,2,1); print ones1; /* Transpose a vector or matrix; t(vector name) or (vector name)` */ v1t = t(v1); m1t = m1`; print v1t, m1t; /* Inverse of square matrix; inv(matrix name) */ m1inv = inv(m1); print m1inv; /* Adding or subtracting a scalar */ v2 = v1 - 1; m2 = m1 - 1; print v2, m2; /* Multiplying/dividing by scalars */ v3 = v1 # 2; m3 = m1 # 2; print v3, m3; /* Raising to a power */ v4 = v1 ## 2; m4 = m1 ## (0.5); print v4, m4; /* Vector / matrix addition/substraction (must be the same dimensions) */ v5 = v1 + v1; m5 = m1 + m1; print v5, m5; /* Vector/matrix multiplication (must be appopriate dimensions) */ v6 = v1 * v1`; m6 = m1 * m1`; print v6, m6; /* Determine the rank of a matrix */ m1rank=round(trace(ginv(m1)*m1)); print m1rank; /* Read a SAS data set into a matrix */ /* 1. Specify which data set you wish to import into IML */ use sashelp.bweight; /* 2. Specify that you want to read all of the variables into the matrix */ read all into bweight; print bweight; /* 3. Read only specific variables and only a range of observations into a matrix */ /* Example: Read only the first 100 observations of the variables "weight" "black" and "married" */ read point (1:100) var {weight black married} into bweight; print bweight; /* Summary statistics */ summary var {weight black married} stat{mean std min max} opt{save}; print weight, black, married; /* Export matrices into SAS data sets */ cols = {"weight" "black" "married"}; create weight_iml from bweight[colname=cols] ; append from bweight; /* Exit from IML */ quit;