#First we can use R as a calculator with scalars or matrices 1 + 1 #Addition (other operators include -, *, /, and ^) log(6) #Some functions are also available including exp(), abs(), sin(), cos(),etc...) x <- c(1.8, pi, 4, 88, 13) # building a vector with 5 elements length(x) # check the size of the vector log(x) # functions can be applied to each element in the vector A <- matrix(1:6, nrow=2) #Generates a 2x3 matrix t(A) # Transpose of A A1 <- A[1:2,c(1,3)] #create a square matrix with first and last column of A solve(A1) # inverse of A1 det(A1) #determinant of A1 eigen(A1) #Eigenvalue and Eigenvectors of A1 A1%*%solve(A1) #A1 times its inverse should be equal to the identity matrix #Notice the use of %*% when multiplying matrices #* will multiply element by element together diag(2) #Creates a 2x2 identity matrix #Simulating data is very easy using R set.seed(123) #Important to replicate simulation results sim <- rnorm(10) #Simulates 10 draws from a standard normal distribution #There are many distributions in R, see 'R-Intro' #r before the distribution is a random draw, d is the density #p is the CDF, and q is the quantile function ?rnorm #A ? before any function will take you to the Help section sim2 <- rnorm(100,5,2) #100 draws from a normal distribution with a mean of 5 and sd=2 mean(sim2) #Mean of the simulation, should be close to 5 sd(sim2) #Standard deviation of simulation, should be close to 2 summary(sim2) #offers more summary information about the summary (includes quantiles, min, max, and median) hist(sim2) #Plots a histogram of the simulation, should look kind of like a normal hist(sim2, freq=FALSE) #density is shown instead of frequency lines(density(sim2), col=4) #Uses kernel smoothing to draw a line representing the density function sim3 <- runif(100,0,5) #100 draws from a uniform distribution with a min=0, max=5. #Now lets work with some data. Luckily, a bunch of data sets are loaded as a package in R #To retreive the AER datasets, Go to Packages ->Install Packages -> Pick a US Mirror # -> AER (doubleclick). Then type "require(AER)" in R Console data("CPS1988", package="AER"); write.table(CPS1988, file="K:/AAEC6311/R/CPS1988.txt") data <- read.table(file="K:/AAEC6311/R/CPS1988.txt") summary(data) #summary statistics for all variables summary(data$wage) #summary statistics for one variable. "$" is used to identify a variable within larger set summary(data$region) #summary on categorical variables head(data) #Allows you to view headers and first 6 observations tab <- table(data$region) barplot(tab) #Bar chart pie(tab) #Pie chart xtabs(~ethnicity + region, data=data) plot(ethnicity ~ region, data=data) #Now some regressions plot(log(wage) ~ education, data=data) returns <- lm(log(wage) ~ education, data=data) abline(returns) R_ols <- lm(log(wage) ~ education + experience + I(experience^2) + ethnicity , data=data) summary(R_ols) lwage <- log(data$wage); education <- data$education; experience <- data$experience; experience_sq <- data$experience^2; ethnicity <- as.numeric(data$ethnicity) - 1; #creates dummy variable ols <- function(x,y){ beta <- solve(t(x)%*%x)%*%(t(x)%*%y) ; e <- y - x%*%beta; SSE <- sum(e^2); nobs <- length(y); k <- length(beta) sigma_sq <- SSE/(nobs-k); cov_mat <- sigma_sq*solve(t(x)%*%x) se <- sqrt(diag(cov_mat)) return(data.frame(beta=beta, se=se)) } x <- cbind(matrix(1,length(lwage),1), education, experience, experience_sq, ethnicity) results <- ols(x,lwage) #Maximum Likelihood n <- 200; #Defines the number of observations x1 <- matrix(1,n,1); x2 <- rnorm(n,5); X <- cbind(x1, x2); #nx2 vector for X b <- rbind(2,4); #true b values epsilon <- rnorm(n,0,2); #randomly distributed residuals y <- X%*%b + epsilon; #calculates the dependent variable from sample #function to compute optimized values based on normal MLE normMLE <- function(par) { n <- nrow(X); k <- ncol(X); beta <- par[1:k]; sig_sq <- par[(k+1)]; LL <- (n/2)*log(2*pi*sig_sq) + sum((1/(2*sig_sq))*((y-X%*%beta)^2)) return(LL) } par0 <- rbind(b,2); #Starting values for the function opt <- optim(par0, normMLE, hessian=TRUE, control=list(maxit=10000)); #Runs the above function opt_se <- sqrt(diag(solve(opt$hessian)))