---
title: "Lab 02 in R and R Markdown"
author: "WILD 502 - Jay Rotella"
output: html_document
---

### Bring in the dataset and manipulate it slightly

The code immediately below imports the data, manipulates the data, and adds some labels to some of the variables. Here, we'll also use the *AICcmodavg* package in the model-selection work.

```{r, message=FALSE}
library(RMark) # for converting the input file
library(AICcmodavg) # for model-selection
library(ggplot2) # for plotting
library(visreg) # an alternative plotting package

# bring in the data file
fawns <- convert.inp("http://www.montana.edu/rotella/documents/502/lab02-fawns.inp",
                     covariates = c("area", "sex", "mass", "length"))

# add "length.sq" variable for use in quadratic models
fawns$length.sq <- (fawns$length)^2
head(fawns)

# convert area & sex variables to factors so that we can treat them as 
# grouping variables in the analysis to ease working with output later
fawns$area <- factor(fawns$area, 
                     levels = c(0, 1), 
                     labels = c("control", "treatment"))

fawns$sex  <- factor(fawns$sex,
                     levels = c(0, 1),
                     labels = c("female", "male"))

# Store versions of fate for use in final plotting
# first a factor version that has nice labels
fawns$fate <- factor(as.numeric(fawns$ch), 
                     levels = c(11, 10),
                     labels = c("died", "lived"))
# second, a numeric version that's 0 = dead, 1 = lived
fawns$Fate <- 2 - as.numeric(fawns$fate)

summary(fawns)

# we know from other work on this dataset that there is 1 record with 
#  a very high value for mass, which we want to drop
fawns2 = fawns[fawns$mass < 70, ]
summary(fawns2)
```

The dataset now contains information on `r nrow(fawns2)` fawns. Because we worked on the summarizing and evaluating the covariate information [in an earlier exercise](http://www.montana.edu/rotella/documents/502/FawnsDataSummary.pdf), we won't do that work here and will get right to the analyses.

### Analyze the data

Here, we run the same set of models that were run in lab in MARK. The code below shows how to do that in *glm* in R. 

```{r}
S.dot <- glm(fate ~ 1, data = fawns2, family = binomial)
S.area <- glm(fate ~ area, data = fawns2, family = binomial)
S.mass <- glm(fate ~ mass, data = fawns2, family = binomial)
S.length <- glm(fate ~ length, data = fawns2, family = binomial)
S.length.sq <- glm(fate ~ length + length.sq, data = fawns2, family = binomial)
S.sex <- glm(fate ~ sex, data = fawns2, family = binomial)
S.area.length <- glm(fate ~ area + length, data = fawns2, family = binomial)
S.length.mass <- glm(fate ~ length + mass, data = fawns2, family = binomial)
# sex-specific intercepts & common slope
S.sex.length <- glm(fate ~ sex + length, data = fawns2, family = binomial)
# 1 intercept & sex-specific slopes
S.sex.by.length <- glm(fate ~ sex:length, data = fawns2, family = binomial)
# sex-specific intercepts & slopes
S.sex.x.length <- glm(fate ~ sex * length, data = fawns2, family = binomial)

##set up named list
Cand.mods <- list("S.dot" = S.dot, 
                  "S.area" = S.area, 
                  "S.mass" = S.mass, 
                  "S.length" = S.length,
                  "S.length.sq" = S.length.sq, 
                  "S.sex" = S.sex,
                  "S.area.length" = S.area.length,
                  "S.length.mass" = S.length.mass, 
                  "S.sex.length" = S.sex.length,
                  "S.sex.by.length" = S.sex.by.length,
                  "S.sex.x.length" = S.sex.x.length)

##compute table
AICc.table <- aictab(cand.set = Cand.mods, second.ord = TRUE)
AICc.table
```

### Plot predictions from top model

```{r}
# make prediction data frame
dat.pr = expand.grid(sex = c("male", "female"),
                     length = seq(min(fawns2$length), max(fawns2$length), by = 1))
head(dat.pr)
tail(dat.pr)

# make predictions on log-odds scale
pred = predict(S.sex.x.length, newdata = dat.pr,
               type = c("link"), se.fit = TRUE)

# calculate predicted values and 95% CIS on real parameter scale
dat.pr$fit = plogis(pred$fit)
dat.pr$lci.fit = plogis(pred$fit - 1.96 * pred$se.fit)
dat.pr$uci.fit = plogis(pred$fit + 1.96 * pred$se.fit)

#  print table of covariate values and associated predicted values
head(dat.pr)
tail(dat.pr)
```

```{r, fig.width=8}
# make a plot
p <- ggplot(dat.pr, aes(x = length, y = fit, group = sex)) +
  geom_line(size = 1.5, aes(colour = sex)) +
  geom_ribbon(aes(ymin = lci.fit, ymax = uci.fit), alpha = 0.2) +
  scale_colour_brewer(palette = "Set1") +
  theme(legend.position = c(0.25, 0.75),
  legend.justification = c(1, 0)) +
  xlab("Body length (cm)") +
  ylab("Estimated Survival Rate") + 
  geom_jitter(data = fawns2, aes(y = Fate, color = sex),
              height = 0.025)
  # print the plot
p
```

You can also use the 'visreg' package to get a plot quite simply. With this method, you don't have to go to the trouble of making a prediction data.frame. This can be good (it's fast) and bad (you might not be as sure of what happened!). I do like turning the 'rug' on: this shows you the observed values of the covariate. Here you see where you have mass observations for each sex of fawn and you see what the fates for each sex. The first plot presents estimated log-odds and the second plot presents estimated survival rates.

```{r, fig.width=10}
visreg(S.sex.x.length, "length", by = "sex", rug = 2)
visreg(S.sex.x.length, "length", by = "sex", scale = "response", rug = 2)
```