#############################
#                           #
#        LECTURE SIX        #
#                           #
#############################


############################ PART 1 ###############################


# We look at the rocket propellant again (Ex 4.4, p. 167)
rocket = read.table("rocket.txt",header=TRUE)

# The data contains a variable assem (test assembly) which we ignored
# the last time
rocket

# The design is a greaco-latin design
matrix(paste(rocket$form,rocket$assem),5)

# We will use fixed factors - only batch and oper are not already factors:
rocket$batch = as.factor(rocket$batch); rocket$oper = as.factor(rocket$oper)

# We make the ANOVA in the usual way
obj = aov(y~form+batch+oper+assem,data=rocket)
summary(obj)
# Again the formulation is significant, while it was necessary to
# include the operator - the other two variables are not significant

# the agricolae-package also has greaco-latin designs
library(agricolae)
des = design.graeco(LETTERS[1:5], letters[1:5]); des
t(matrix(paste(des[,4],des[,5]),5))

# cleaning up
rm(list=ls())


########################### EXERCISES ###############################


# Exercise 1
# Consider the dataset from exercise 4.36 on television assembly times
# (you may read the text of 4.22 and 4.36 to understand the problem).
# (a) Check that this is indeed a greaco-latin square design.
# (b) Make an analysis of variance using fixed factors.
# (c) Assume now the variable workplace had not been recorded -
# is this new design a Latin square?
# (d) Make a new analysis of variance for this reduced dataset.
# (e) Use Tukey's test on the variable "method".


############################## PART 2 ###############################


# We try out BIBD with the catalyst dataset (Ex 4.5, p. 170)
# Reaction times are measured in a chemical experiment with various catalysts.
# The block factor is various batches of material.
catalyst = read.table("catalyst.txt")
catalyst$cat = as.factor(catalyst$cat); catalyst$block = as.factor(catalyst$block)

# the design is indeed a BIBD
matrix(catalyst$cat,3)

# Unfortunately the GAD package will not allow non-orthogonal designs
# (not all combinations of levels in the two factors occur in BIBD),
# so if we wish to consider the block as a variance component, we use
# the standard way in R with the Error function
obj = aov(y ~ cat + Error(block),data=catalyst)
summary(obj)
# The choice of catalyst is significant, while we don't get a p-value 
# for the block - this is typically fine for a BIBD where the block
# is a nuisance variable.

# Table 4.25 at page 172 contains both p-values, though.
# Can we get these?
# Let's consider both factors fixed.
anova(aov(y ~ block + cat,data=catalyst))
anova(aov(y ~ cat + block,data=catalyst))
# Oops! We have to be careful here. The results are different.
# Only the former conforms with Table 4.24.
# This problem occurs when factors are not orthogonal.
# Order is important: only the last F-test in the table is reliable.

# Catalyst is significant; you cannot ignore its effect.
# (Block is even more significant, but this is not our main interest.)

# cleaning up
rm(list=ls())


############################ EXERCISE ################################


# Exercise 2
# Solve Problem 4.42 (p.181).
# Data in "pr0442.txt". When you read in the data the command
# read.table is probably the best. 


############################### PART 3  ############################


# We look at the polymer dataset in chapter 10 (Ex 10.1, p. 454)
# Here we model polymer viscosity as a function of temperature and
# ignore at first that catalyst feed rate is also given in the data
polymer = read.table("polymer.txt")
polymer

# We plot the data 
plot(y~temp,data=polymer)
# It seems that a straight line would model the data well - we need to
# find the slope and intercept of this line.

# The design matrix
X = cbind(1, polymer$temp); X
# We calculate betahat - this contains the intercept and slope
# of the straight line that fits the data best
bh = solve(t(X)%*%X) %*% t(X) %*% polymer$y; bh
# We add the fitted line to the plot
abline(bh[1],bh[2])

# The matrix H and y-hat
H = X %*% solve(t(X)%*%X) %*% t(X)
yh = H %*% polymer$y
# We plot y-hat, the predicted values according to the model
points(polymer$temp,yh,pch=19)

# We can also make a model with both temperature and catalyst feed
# rate - a multiple regression.
# Design matrix
X = cbind(1, polymer$temp, polymer$cat); X
# betahat contains intercept and slope for both temp and cat
bh = solve(t(X)%*%X) %*% t(X) %*% polymer$y; bh

# Of course there are functions implemented in R for regressions
# models. We'll use those the next time, but today we will do things
# by hand just for the exercise.

# clean-up
rm(list=ls())


########################### EXERCISES #############################


# Exercise 3
# (a) Load the data from problem 10.1 (pr1001.txt).
# (b) Plot the data
# (c) Create a design matrix for a straight line
# (d) Estimate the slope and intercept for the best-fitting straight line,
# and plot this
# (e) Repeat (c) and (d) with a second order polynomial (the command
# curve(a*x^2+b*x+c,xmin,xmax,add=TRUE) is useful for plotting this)
# (f) Do you think the second order polymial fits the data better than the
# straight line?

