7 How to create contingency tables

We can use table(), addmargins(), prop.table() and as.data.frame.matrix() to create the contingency tables that we want. See this example:

rm(list=ls())

# load packages
library(dplyr)

# create a fake data set
fk_data <- data.frame(x1 = sample(letters[1:5], 20, replace = TRUE), 
                      x2 = sample(LETTERS[1:5], 20, replace = TRUE))

# have a look at the data set
print.data.frame(fk_data)

##    x1 x2
## 1   a  E
## 2   e  D
## 3   d  B
## 4   e  B
## 5   c  B
## 6   e  E
## 7   b  B
## 8   c  C
## 9   a  E
## 10  b  B
## 11  e  B
## 12  c  C
## 13  b  A
## 14  b  D
## 15  d  D
## 16  b  C
## 17  d  A
## 18  c  E
## 19  b  A
## 20  a  A

# create a table
my_table_0 <- table(fk_data$x1, fk_data$x2)
print.table(my_table_0)

##    
##     A B C D E
##   a 1 0 0 0 2
##   b 2 2 1 1 0
##   c 0 1 2 0 1
##   d 1 1 0 1 0
##   e 0 2 0 1 1

# if we want to have row and column totals
my_table_01 <- addmargins(my_table_0)
print.table(my_table_01)

##      
##        A  B  C  D  E Sum
##   a    1  0  0  0  2   3
##   b    2  2  1  1  0   6
##   c    0  1  2  0  1   4
##   d    1  1  0  1  0   3
##   e    0  2  0  1  1   4
##   Sum  4  6  3  3  4  20

my_table_1 <- as.data.frame.matrix(my_table_0) # convert it to dataframe
# have a look at the table
print.data.frame(my_table_1)

##   A B C D E
## a 1 0 0 0 2
## b 2 2 1 1 0
## c 0 1 2 0 1
## d 1 1 0 1 0
## e 0 2 0 1 1

# to have a table of proportions based on rows
my_table_2 <- prop.table(my_table_0, margin = 1) %>% 
              as.data.frame.matrix() # convert it to dataframe   
# have a look at the table
print.data.frame(my_table_2, digits = 2)

##      A    B    C    D    E
## a 0.33 0.00 0.00 0.00 0.67
## b 0.33 0.33 0.17 0.17 0.00
## c 0.00 0.25 0.50 0.00 0.25
## d 0.33 0.33 0.00 0.33 0.00
## e 0.00 0.50 0.00 0.25 0.25

# to have a table of proportions based on columns
my_table_3 <- prop.table(my_table_0, margin = 2) %>% 
  as.data.frame.matrix() # convert it to dataframe   
# have a look at the table
print.data.frame(my_table_3, digits = 2)

##      A    B    C    D    E
## a 0.25 0.00 0.00 0.00 0.50
## b 0.50 0.33 0.33 0.33 0.00
## c 0.00 0.17 0.67 0.00 0.25
## d 0.25 0.17 0.00 0.33 0.00
## e 0.00 0.33 0.00 0.33 0.25

Remark: If there are NA’s, table() function will ignore them. If we want to include NA’s in the table, we can use dplyr::tally() plus tidyr::spread(); the following example shows how to do this. For more details about dplyr::tally(), see the next chapter, How to tally.

rm(list = ls())

# load packages
library(dplyr)
library(tidyr) # for spread()

# create a fake data set
fk_data <- data.frame(category_1 = c(rep("A", 3), "B", rep("C", 2), NA, NA), 
                      category_2 = c(rep("a", 2), rep("b", 2), rep(NA, 3), "c"))

# show the tale created by using table()
print.table(table(fk_data$category_1, fk_data$category_2))

##    
##     a b c
##   A 2 1 0
##   B 0 1 0
##   C 0 0 0

# create a contingency table using dplyr::tally and tidyr::spread
a_table <-
  fk_data %>% 
  group_by(category_1, category_2) %>% 
  tally() %>% 
  spread(key = category_2, value = n)
print.data.frame(a_table)

##   category_1  a  b  c <NA>
## 1          A  2  1 NA   NA
## 2          B NA  1 NA   NA
## 3          C NA NA NA    2
## 4       <NA> NA NA  1    1