Contingency tables

The ASTA team

Contingency tables

A contingency table

popKids <- read.delim("https://asta.math.aau.dk/datasets?file=PopularKids.txt")
library(mosaic)
tab <- tally(~Urban.Rural + Goals, data = popKids, margins = TRUE)
tab
##            Goals
## Urban.Rural Grades Popular Sports Total
##    Rural        57      50     42   149
##    Suburban     87      42     22   151
##    Urban       103      49     26   178
##    Total       247     141     90   478

A conditional distribution

tab <- tally(~Urban.Rural + Goals, data = popKids)
addmargins(round(100 * prop.table(tab, 1)),margin = 1:2)
##            Goals
## Urban.Rural Grades Popular Sports Sum
##    Rural        38      34     28 100
##    Suburban     58      28     15 101
##    Urban        58      28     15 101
##    Sum         154      90     58 302

Independence

Independence

##            Goals
## Urban.Rural Grades Popular Sports
##    Rural       500     300    200
##    Suburban    500     300    200
##    Urban       500     300    200

The Chi-squared test for independence

n <- margin.table(tab)
pctGoals <- round(100 * margin.table(tab, 2)/n, 1)
pctGoals
## Goals
##  Grades Popular  Sports 
##    51.7    29.5    18.8
##            Goals
## Urban.Rural Grades        Popular       Sports        Sum         
##    Rural     77.0 (51.7%)  44.0 (29.5%)  28.1 (18.8%) 149.0 (100%)
##    Suburban  78.0 (51.7%)  44.5 (29.5%)  28.4 (18.8%) 151.0 (100%)
##    Urban     92.0 (51.7%)  52.5 (29.5%)  33.5 (18.8%) 178.0 (100%)
##    Sum      247.0 (51.7%) 141.0 (29.5%)  90.0 (18.8%) 478.0 (100%)

Calculation of expected table

pctexptab
##            Goals
## Urban.Rural Grades        Popular       Sports        Sum         
##    Rural     77.0 (51.7%)  44.0 (29.5%)  28.1 (18.8%) 149.0 (100%)
##    Suburban  78.0 (51.7%)  44.5 (29.5%)  28.4 (18.8%) 151.0 (100%)
##    Urban     92.0 (51.7%)  52.5 (29.5%)  33.5 (18.8%) 178.0 (100%)
##    Sum      247.0 (51.7%) 141.0 (29.5%)  90.0 (18.8%) 478.0 (100%)

Chi-squared (\(\chi^2\)) test statistic

tab
##            Goals
## Urban.Rural Grades Popular Sports
##    Rural        57      50     42
##    Suburban     87      42     22
##    Urban       103      49     26
##            Goals
## Urban.Rural Grades Popular Sports Sum  
##    Rural     77.0   44.0    28.1  149.0
##    Suburban  78.0   44.5    28.4  151.0
##    Urban     92.0   52.5    33.5  178.0
##    Sum      247.0  141.0    90.0  478.0

\(\chi^2\)-test template.

1 - pdist("chisq", 18.8, df = 4)

## [1] 0.0008603303

The function chisq.test.

tab <- tally(~ Urban.Rural + Goals, data = popKids)
testStat <- chisq.test(tab, correct = FALSE)
testStat
## 
##  Pearson's Chi-squared test
## 
## data:  tab
## X-squared = 18.828, df = 4, p-value = 0.0008497
testStat$expected
##            Goals
## Urban.Rural   Grades  Popular   Sports
##    Rural    76.99372 43.95188 28.05439
##    Suburban 78.02720 44.54184 28.43096
##    Urban    91.97908 52.50628 33.51464
data <- c(57, 87, 103, 50, 42, 49, 42, 22, 26)
tab <- matrix(data, nrow = 3, ncol = 3)
row.names(tab) <- c("Rural", "Suburban", "Urban")
colnames(tab) <- c("Grades", "Popular", "Sports")
tab
##          Grades Popular Sports
## Rural        57      50     42
## Suburban     87      42     22
## Urban       103      49     26
chisq.test(tab)
## 
##  Pearson's Chi-squared test
## 
## data:  tab
## X-squared = 18.828, df = 4, p-value = 0.0008497

The \(\chi^2\)-distribution

The \(\chi^2\)-distribution

Agresti - Summary

Summary

Standardized residuals

Residual analysis

Residual analysis in R

tab <- tally(~ Urban.Rural + Goals, data = popKids)
testStat <- chisq.test(tab, correct = FALSE)
testStat$stdres
##            Goals
## Urban.Rural     Grades    Popular     Sports
##    Rural    -3.9508449  1.3096235  3.5225004
##    Suburban  1.7666608 -0.5484075 -1.6185210
##    Urban     2.0865780 -0.7274327 -1.8186224

Models for table data in R

Example

HairEyeColor <- read.delim("https://asta.math.aau.dk/datasets?file=HairEyeColor.txt")
head(HairEyeColor)
##    Hair   Eye  Sex Freq
## 1 Black Brown Male   32
## 2 Brown Brown Male   53
## 3   Red Brown Male   10
## 4 Blond Brown Male    3
## 5 Black  Blue Male   11
## 6 Brown  Blue Male   50
HairEye <- aggregate(Freq ~ Eye + Hair, FUN = sum, data = HairEyeColor)
HairEye
##      Eye  Hair Freq
## 1   Blue Black   20
## 2  Brown Black   68
## 3  Green Black    5
## 4  Hazel Black   15
## 5   Blue Blond   94
## 6  Brown Blond    7
## 7  Green Blond   16
## 8  Hazel Blond   10
## 9   Blue Brown   84
## 10 Brown Brown  119
## 11 Green Brown   29
## 12 Hazel Brown   54
## 13  Blue   Red   17
## 14 Brown   Red   26
## 15 Green   Red   14
## 16 Hazel   Red   14

Model specification

Model specification in R

model <- glm(Freq ~ Hair + Eye, family = poisson, data = HairEye)
summary(model)
## 
## Call:
## glm(formula = Freq ~ Hair + Eye, family = poisson, data = HairEye)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -7.326  -2.065  -0.212   1.235   6.172  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.66926    0.11055  33.191  < 2e-16 ***
## HairBlond    0.16206    0.13089   1.238  0.21569    
## HairBrown    0.97386    0.11294   8.623  < 2e-16 ***
## HairRed     -0.41945    0.15279  -2.745  0.00604 ** 
## EyeBrown     0.02299    0.09590   0.240  0.81054    
## EyeGreen    -1.21175    0.14239  -8.510  < 2e-16 ***
## EyeHazel    -0.83804    0.12411  -6.752 1.46e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 453.31  on 15  degrees of freedom
## Residual deviance: 146.44  on  9  degrees of freedom
## AIC: 241.04
## 
## Number of Fisher Scoring iterations: 5
1 - pdist("chisq", 146.44, df = 9)

## [1] 0

Expected values and standardized residuals

HairEye$fitted <- fitted(model)
HairEye$resid <- rstudent(model)
HairEye
##      Eye  Hair Freq fitted  resid
## 1   Blue Black   20  39.22 -4.492
## 2  Brown Black   68  40.14  5.856
## 3  Green Black    5  11.68 -2.508
## 4  Hazel Black   15  16.97 -0.583
## 5   Blue Blond   94  46.12  9.368
## 6  Brown Blond    7  47.20 -9.423
## 7  Green Blond   16  13.73  0.719
## 8  Hazel Blond   10  19.95 -2.936
## 9   Blue Brown   84 103.87 -3.437
## 10 Brown Brown  119 106.28  2.151
## 11 Green Brown   29  30.92 -0.511
## 12 Hazel Brown   54  44.93  2.023
## 13  Blue   Red   17  25.79 -2.399
## 14 Brown   Red   26  26.39 -0.101
## 15 Green   Red   14   7.68  2.368
## 16 Hazel   Red   14  11.15  0.961