#############################
#                           #
#        LECTURE TWO        #
#                           #
#############################

############################ PART 1 ################################


# We use the data from exercise 2.22 (p. 61) - 16 repair times of electronic
# equipment measured in hours
times = scan("pr0222.txt")
# We will test the hypothesis that the mean repair time is 225
# H0: mu = 225
# H1: mu != 225
# Let's find the average repair time 
mean(times) # This looks bigger than 225 - but is the difference significant?
# t.test() let's us test this formally in an easy way
t.test(times,mu=225) # Nope, this is not significant - p=0.2899
# Note that the confidence interval is also given
# Could also compare the test statistics t0=1.097 with the acceptance region
n = length(times)
qt(c(0.025,0.975),n-1) # t0 is within these limits
# Cleaning up
rm(list=ls())

########################## EXERCISES ###############################


# Do exercise 2.20. Think about whether you should use a one or
# two-sided hypothesis. The help-function, ?t.test, can be used to
# figure out how to do a one-sided test.


############################ PART 2 ################################


# The first dataset in Montgomery, Chapter 2
# The scan() command reads produces a vector.
# The read.table() command produces a table.
cement = read.table("mortar2.txt",header=TRUE)

# We check the data
cement  # Data p.26 in Montgomery, one line per item
class(cement)  # R is object-oriented
plot(cement)

# I'll show how the formulae on slides (or p. 38 in the book) are used
# directly
y = cement$y
y1 = y[1:10]
y2 = y[cement$mortar == "U"]  # Alternative method. Notice ==, not =
# There are several ways to display the result of an assignment
m1 = mean(y1); m1  # Check with the book, p. 38
m2 = mean(y2); m2
v1 = var(y1); v1
v2 = var(y2); v2
s1 = sd(y1); s1
s2 = sd(y2); s2
n1 = length(y1); n1
n2 = length(y2); n2
# The pooled variance is found
v.pool = ((n1 - 1)*v1 + (n2 - 1)*v2)/(n1 + n2 - 2)
s.pool = sqrt(v.pool); s.pool  # Check with p.39 in the book
t0 = (m1 - m2)/(s.pool*sqrt(1/n1 + 1/n2)); t0
# Use of built-in statistical tables, qt = quantile in t-distribution
qt(c(0.025,0.975), 18)  # Gives the 2.5% and 97.5% quantiles
2*pt(t0, 18)  # The p-value; one-starred significance
# A precondition for this test is equal variances. Let's test that!
F = v1/v2; F
qf(c(0.025,0.975), 9, 9)  # Gives the quantiles, no significance
2*(1-pf(F, 9, 9))  # The p-value, high, i.e. no significance
# Warning! If you use F as a variable, it will mask the use of F as an
# alias for FALSE. But you can still use FALSE for this boolean value.

# Of course, R has a simpler method
t.test(y1, y2)  # Simpler but uses the Welch test (i.e. unequal variances)
var.test(y1, y2)  # No indication of different variances
t.test(y1, y2, var.equal=T)  

# Normal probability plot.
# Axes reversed compared to a part of Figure 2.11, p.42
# If you prefer the axes as in the book, use the option datax=TRUE
qqnorm(y1)
qqline(y1, col="green")

# Tidying up
rm(list=ls())

######################### EXERCISES ###########################

# Do Problem 2.25 (p. 61)
# You can find the data in the file "pr0225.txt"   

