7 How to create contingency tables
We can use table(), addmargins(), prop.table() and as.data.frame.matrix() to create the contingency tables that we want. See this example:
rm(list=ls())
# load packages
library(dplyr)
# create a fake data set
fk_data <- data.frame(x1 = sample(letters[1:5], 20, replace = TRUE),
x2 = sample(LETTERS[1:5], 20, replace = TRUE))
# have a look at the data set
print.data.frame(fk_data)## x1 x2
## 1 a E
## 2 e D
## 3 d B
## 4 e B
## 5 c B
## 6 e E
## 7 b B
## 8 c C
## 9 a E
## 10 b B
## 11 e B
## 12 c C
## 13 b A
## 14 b D
## 15 d D
## 16 b C
## 17 d A
## 18 c E
## 19 b A
## 20 a A
# create a table
my_table_0 <- table(fk_data$x1, fk_data$x2)
print.table(my_table_0)##
## A B C D E
## a 1 0 0 0 2
## b 2 2 1 1 0
## c 0 1 2 0 1
## d 1 1 0 1 0
## e 0 2 0 1 1
# if we want to have row and column totals
my_table_01 <- addmargins(my_table_0)
print.table(my_table_01)##
## A B C D E Sum
## a 1 0 0 0 2 3
## b 2 2 1 1 0 6
## c 0 1 2 0 1 4
## d 1 1 0 1 0 3
## e 0 2 0 1 1 4
## Sum 4 6 3 3 4 20
my_table_1 <- as.data.frame.matrix(my_table_0) # convert it to dataframe
# have a look at the table
print.data.frame(my_table_1)## A B C D E
## a 1 0 0 0 2
## b 2 2 1 1 0
## c 0 1 2 0 1
## d 1 1 0 1 0
## e 0 2 0 1 1
# to have a table of proportions based on rows
my_table_2 <- prop.table(my_table_0, margin = 1) %>%
as.data.frame.matrix() # convert it to dataframe
# have a look at the table
print.data.frame(my_table_2, digits = 2)## A B C D E
## a 0.33 0.00 0.00 0.00 0.67
## b 0.33 0.33 0.17 0.17 0.00
## c 0.00 0.25 0.50 0.00 0.25
## d 0.33 0.33 0.00 0.33 0.00
## e 0.00 0.50 0.00 0.25 0.25
# to have a table of proportions based on columns
my_table_3 <- prop.table(my_table_0, margin = 2) %>%
as.data.frame.matrix() # convert it to dataframe
# have a look at the table
print.data.frame(my_table_3, digits = 2)## A B C D E
## a 0.25 0.00 0.00 0.00 0.50
## b 0.50 0.33 0.33 0.33 0.00
## c 0.00 0.17 0.67 0.00 0.25
## d 0.25 0.17 0.00 0.33 0.00
## e 0.00 0.33 0.00 0.33 0.25
Remark: If there are NA’s, table() function will ignore them. If we want to include NA’s in the table, we can use dplyr::tally() plus tidyr::spread(); the following example shows how to do this. For more details about dplyr::tally(), see the next chapter, How to tally.
rm(list = ls())
# load packages
library(dplyr)
library(tidyr) # for spread()
# create a fake data set
fk_data <- data.frame(category_1 = c(rep("A", 3), "B", rep("C", 2), NA, NA),
category_2 = c(rep("a", 2), rep("b", 2), rep(NA, 3), "c"))
# show the tale created by using table()
print.table(table(fk_data$category_1, fk_data$category_2))##
## a b c
## A 2 1 0
## B 0 1 0
## C 0 0 0
# create a contingency table using dplyr::tally and tidyr::spread
a_table <-
fk_data %>%
group_by(category_1, category_2) %>%
tally() %>%
spread(key = category_2, value = n)
print.data.frame(a_table)## category_1 a b c <NA>
## 1 A 2 1 NA NA
## 2 B NA 1 NA NA
## 3 C NA NA NA 2
## 4 <NA> NA NA 1 1