###############################
#                             #
#        LECTURE SEVEN        #
#                             #
###############################


######################### PART 1 ###############################

## Example 10.1, p.393
# We model the viscosity of a polymer as a function of
# reaction temperature and catalyst feed rate.
# In Lecture 6 we used matrix methods on the dataset.
# Now we shall see what R can do directly.
polymer = read.table("polymer.txt",header=TRUE)
polymer
# This is what you see in Table 10.2 (p. 455).
# I'll show the use of lm, the command for regression in R.
reg = lm(y ~ temp + cat, data=polymer)
summary(reg)  
# We can read values for beta-hat, p-values for the test for
# removing them (t-test used for one variable!), determination-
# coefficients, and many other things, and F-test for removing
# all variables.
# We can also extract beta-hat using the coef-command:
coef(reg)
# Or the determination-coefficients
summary(reg)$r.squared; summary(reg)$adj.r.squared
# The above p-values was for the t-test, but we can also use
# the F-test - here we try to remove the variable cat:
reg2 = lm(y ~ temp, data=polymer)
anova(reg2, reg)
# Notice the same p-value
rm(list=ls())


############################ EXERCISE ###########################


# Exercise 1:
# Problem 10.7, p.475, supplemented with:
# (d) Check the determination coefficients


############################## PART 2 #############################


# We redo the model
polymer = read.table("polymer.txt",header=TRUE)
reg = lm(y ~ temp + cat, data=polymer)
# Now we know what all the values in the table of coefficients mean
coef(summary(reg))
# Note that the column Std. Error is hat(sigma)_E*sqrt(C_ii)

# To demonstrate the intervals for more than one set of values for the
# explanatory variables, we take temp=90, cat=10 and temp=100, cat=9.
# Before we begin, we check the estimates:
a = coef(reg); a
a[1] + a[2]*90 + a[3]*10
a[1] + a[2]*100 + a[3]*9
# We make a simple new data frame:
new = data.frame(temp=c(90, 100), cat=c(10, 9)); new
# The default confidence level for "predict" is 95%.
# The mean-response interval:
predict(reg, new, interval="confidence")
# The prediction interval:
predict(reg, new, interval="prediction")

# Residual diagnostics
res = resid(reg)
# QQ-plot
qqnorm(res); qqline(res)
# Residuals vs fitted values
plot(res ~ fitted(reg)); abline(h=0, lty=2)
# Residuals vs explanatory variables
plot(res ~ polymer$temp); abline(h=0, lty=2)
plot(res ~ polymer$cat); abline(h=0, lty=2)
# cleaning up
rm(list=ls())


######################## EXERCISES ###############################

# Exercise 2
# Continue with the data from Problem 10.6, and do the following:
# (d) Find the 95% confidence interval for the estimated coeeficient
# for the engine speed (rpm) (use the formula from the slides)
# (e) Find y-hat for rpm = 200, oct = 95, comp = 100, and calculate
# both the confidence interval and prediction interval.
# (f) Make residual plots.

# Exercise 3
# A regression plot using simulation.
# Do the following R-code, where you think about what happens at
# every step.
x = runif(100,50,150) # Just some x-values between 50 and 150
y = 2*x + rnorm(100, mean=0, sd=50)
plot(x, y)  # Notice different scales on the axes
abline(a=0, b=2, col="red")  # Theoretical regression line
reg = lm(y ~ x)
abline(reg, col="blue")  # Empirical regression line
res = resid(reg)
yhat = fitted(reg)
plot(res ~ yhat); abline(h=0, lty=2)
plot(res ~ x); abline(h=0, lty=2)
plot(res); abline(h=0, lty=2)  # Index plot
plot(res ~ y); abline(h=0, lty=2)  # OOPS. The useless one!
# Now try to change some of the code, such as the model,
# and see what happens.


