#First we can use R as a calculator with scalars or matrices

1 + 1  #Addition (other operators include -, *, /, and ^)

log(6) #Some functions are also available including exp(), abs(), sin(), cos(),etc...)

x <- c(1.8, pi, 4, 88, 13) # building a vector with 5 elements

length(x) # check the size of the vector

log(x)  # functions can be applied to each element in the vector

A <- matrix(1:6, nrow=2)  #Generates a 2x3 matrix

t(A) # Transpose of A

A1 <- A[1:2,c(1,3)] #create a square matrix with first and last column of A

solve(A1) # inverse of A1

det(A1) #determinant of A1

eigen(A1) #Eigenvalue and Eigenvectors of A1

A1%*%solve(A1) #A1 times its inverse should be equal to the identity matrix

		   #Notice the use of %*% when multiplying matrices

		   #* will multiply element by element together

diag(2)  #Creates a 2x2 identity matrix


#Simulating data is very easy using R

set.seed(123)  #Important to replicate simulation results

sim <- rnorm(10)  #Simulates 10 draws from a standard normal distribution

			#There are many distributions in R, see 'R-Intro'

			#r before the distribution is a random draw, d is the density

			#p is the CDF, and q is the quantile function

?rnorm  #A ? before any function will take you to the Help section

sim2 <- rnorm(100,5,2)  #100 draws from a normal distribution with a mean of 5 and sd=2

mean(sim2)  #Mean of the simulation, should be close to 5

sd(sim2)    #Standard deviation of simulation, should be close to 2

summary(sim2)  #offers more summary information about the summary (includes quantiles, min, max, and median)

hist(sim2)  #Plots a histogram of the simulation, should look kind of like a normal

hist(sim2, freq=FALSE) #density is shown instead of frequency

lines(density(sim2), col=4) #Uses kernel smoothing to draw a line representing the density function


sim3 <- runif(100,0,5)  #100 draws from a uniform distribution with a min=0, max=5.


#Now lets work with some data.  Luckily, a bunch of data sets are loaded as a package in R

#To retreive the AER datasets, Go to Packages ->Install Packages -> Pick a US Mirror

# -> AER (doubleclick). Then type "require(AER)" in R Console

data("CPS1988", package="AER");

write.table(CPS1988, file="K:/AAEC6311/R/CPS1988.txt")

data <- read.table(file="K:/AAEC6311/R/CPS1988.txt")


summary(data)  #summary statistics for all variables

summary(data$wage) #summary statistics for one variable.  "$" is used to identify a variable within larger set

summary(data$region) #summary on categorical variables

head(data) #Allows you to view headers and first 6 observations

tab <- table(data$region)

barplot(tab) #Bar chart

pie(tab)     #Pie chart

xtabs(~ethnicity + region, data=data)

plot(ethnicity ~ region, data=data)


#Now some regressions

plot(log(wage) ~ education, data=data)

returns <- lm(log(wage) ~ education, data=data)

abline(returns)


R_ols <- lm(log(wage) ~ education + experience + I(experience^2) + ethnicity , data=data)

summary(R_ols)


lwage <- log(data$wage); education <- data$education; experience <- data$experience; experience_sq <- data$experience^2;

ethnicity <- as.numeric(data$ethnicity) - 1;  #creates dummy variable


ols <- function(x,y){

	beta <- solve(t(x)%*%x)%*%(t(x)%*%y) ;

	e <- y - x%*%beta;

	SSE <- sum(e^2);

	nobs <- length(y); k <- length(beta)

	sigma_sq <- SSE/(nobs-k);

	cov_mat <- sigma_sq*solve(t(x)%*%x)

	se <- sqrt(diag(cov_mat))

	return(data.frame(beta=beta, se=se))	

}


x <- cbind(matrix(1,length(lwage),1), education, experience, experience_sq, ethnicity)

results <- ols(x,lwage)


#Maximum Likelihood

n <- 200;  #Defines the number of observations

x1 <- matrix(1,n,1); x2 <- rnorm(n,5); X <- cbind(x1, x2);  #nx2 vector for X

b <- rbind(2,4);  #true b values

epsilon <- rnorm(n,0,2);  #randomly distributed residuals

y <- X%*%b + epsilon;   #calculates the dependent variable from sample


#function to compute optimized values based on normal MLE

normMLE <- function(par) {

	n <- nrow(X); k <- ncol(X);

	beta <- par[1:k]; sig_sq <- par[(k+1)];

	LL <- (n/2)*log(2*pi*sig_sq) + sum((1/(2*sig_sq))*((y-X%*%beta)^2))

	return(LL)

}


par0 <- rbind(b,2);  #Starting values for the function

opt <- optim(par0, normMLE, hessian=TRUE, control=list(maxit=10000));  #Runs the above function

opt_se <- sqrt(diag(solve(opt$hessian)))