###############################
#                             #
#        LECTURE THREE        #
#                             #
###############################

##################### PART 1 - EXERCISES ##########################


# Exercise 1
# Do Problem 2.32 (p. 58), questions (a) and (b)
# Try both the unpaired and the paired t-test


######################### PART 2 ##################################


# We analyse the etch dataset
etch = read.table("etch.txt",header=TRUE)
# Investigating the dataset "etch"
etch  # Full display should not be used for large datasets!
class(etch)
names(etch) # names is a useful function when you don't know what an object contains
y = etch$y # saving the data column under a shorter name
power = etch$power
class(y)
class(power)
# power is a vector of numerical values - but it should be categories.
# We can convert a numerical vector into a factor:
as.factor(power)
power = as.factor(power) # saving it as a factor
# If we wanted to convert the other way we can do the following:
as.numeric(power)  # No, label numbers :-(
as.vector(power)   # Close but text strings
as.numeric(as.vector(power))  # This works!
# Note that it is important to check whether the datatypes are right, 
# otherwise R may do something else than you expect!

# This can be useful elsewhere, but for now we need power to be a factor
# We do some calculations and plots from the book
tapply(y, power, sum)   # The first output line is the labels
tapply(y, power, mean)  # Check these with Table 3.1, p.67
boxplot(y ~ power)  # Now we have Figure 3.2(a) (p.68)
stripchart(y ~ power, xlim =c(0.5,4.5), xlab="Power", ylab="Etch rate",
  method="stack", vertical=T, pch=16) # Figure 3.2(b)

# And now the ANOVA
# The command "aov" returns an object we can work with
obj = aov(y ~ power) # could write: obj = aov(etch$y ~ etch$power)
summary(obj)  # Notice the three-starred significance of power
obj
names(obj) 
# Normality of residuals
res = obj$residuals   # or resid(obj)
qqnorm(res); qqline(res, col="red")
# Residual plots
yhat = fitted(obj)  # Or obj$fitted.values
plot(yhat, res, xlab="Fitted values", ylab="Residuals", pch=16)
abline(h=0, col="green")
plot(power, res)
stripchart(res ~ power, vert=T, method="stack")
abline(h=0, col="cyan")
# Bartlett's test
bartlett.test(y, power)
# Plot of the object.
plot(obj)
# cleaning up
rm(list=ls())

####################### EXERCISES ############################


# Exercise 2
# Do Problem 3.20 (p.133), questions (a), (b) and (c)
# You can use the dataset "pr0320.txt"
# Hint for making the factor: You can either make a vector of number,
# and convert it to a factor afterwards, or you can use the function gl()
# which will make a factor directly (?gl for help on gl()).


########################## PART 3 ############################


# We try out Kruskal-Wallis test on the etch data
etch = read.table("etch.txt",header=TRUE)

# The etch data show no indication of deviation from normality.
# In my opinion, there is no reason to use non-parametric methods
# in this case. Just for the exercise we shall do it anyhow.
kruskal.test(etch$y, etch$power)
# This test also confirms that there is difference between the groups

# Cleaning up
rm(list=ls())

########################## EXERCISES ###########################


# Exercise 3
# Load the data from exercise 3.20 (p. 133) again. Using a QQ-plot,
# test if the residuals resulting from the ANOVA are normally
# distributed. Are the assumptions fulfilled for the Kruskal-Wallis
# test? Regardless of the results, try using the Kruskal-Wallis test
# on the data. Do you trust the ANOVA or Kruskal-Wallis test the most?
