Skip to contents

dta_recode_common_labels() recodes specified columns in a data frame to apply consistent labels and values. It supports numeric or factor conversion and handles predefined label categories.


  labels = c("No", "Yes"),
  values = NULL,
  is_reverse = FALSE,
  is_ordered = NULL,
  as_numeric = FALSE,
  is_force_sequential = FALSE



A data frame containing the columns to be recoded.


A tidy selection of columns to recode.


A vector of labels to assign to the recoded values. Alternatively, a single character string (e.g., "yn1", "l1", etc.) for predefined label categories.


A numeric vector of values corresponding to the labels. If NULL, defaults to sequential integers starting from 1.


Logical, whether to reverse the order of values. Default is FALSE.


Logical, whether the recoded variables should be ordered factors. Can be a single value or a vector corresponding to the selected columns. Defaults to NULL (treated as FALSE for all columns).


Logical, whether to return the recoded columns as numeric values rather than factors. Default is FALSE.


Logical indicating whether or not to force sequential values, that is, they should start at 1 and increase by 1.


A modified tibble with recoded columns.


The function allows flexible recoding by specifying custom or predefined labels. If labels is a predefined category (e.g., yesno1 for, Yes/No) it is expanded automatically using the dta_categories() function which returns the following.

Yes/No Categories


c("No", "Yes")


c("No", "Yes", "Don't know")


c("No", "Yes", "Prefer not to say")


c("No", "Yes", "Don't know", "Prefer not to say")

Likert Scale Categories


c("Strong disagree", "Disagree", "Neutral", "Agree", "Strongly agree")


c("Strong disagree", "Disagree", "Undecided", "Agree", "Strongly agree")


c("Strong dissatisfied", "Dissatisfied", "Neutral", "Satisfied", "Strongly satisfied")


c("Strong dissatisfied", "Dissatisfied", "Undecided", "Satisfied", "Strongly satisfied")


c("Very ineffective", "Ineffective", "Neutral", "Effective", "Very effective")


c("Very ineffective", "Ineffective", "Undecided", "Effective", "Very effective")

These categories can be used as quick references for consistent label recoding.


dat <- data_sample[1:15, 16:21]

# Apply the values 1 / 2 to the labels No / Yes

result <- dta_recode_common_labels(
  dat, .columns = python:spss, labels = c("No", "Yes")
glimpse(result) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <fct> No, Yes, Yes, Yes, No, Yes, Yes, No, No, No, Yes, Yes, No, Yes,…
#> $ sas    <fct> No, No, No, No, No, No, No, No, Yes, No, No, No, No, No, No
#> $ stata  <fct> No, No, Yes, Yes, Yes, No, Yes, No, Yes, No, Yes, Yes, Yes, No,…
#> $ spss   <fct> No, No, Yes, No, Yes, No, No, Yes, No, Yes, No, Yes, No, Yes, No
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# Add `as_numeric = TRUE` to return numeric
# values instead of factor

result2 <- dta_recode_common_labels(
  .columns = python:spss,
  labels = c("No", "Yes"),
  as_numeric = TRUE
glimpse(result2) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <int> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1
#> $ sas    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1
#> $ stata  <int> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2
#> $ spss   <int> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# Use predefined label category `yesno1` which will
# assign 1 = No and 2 = Yes

result3 <- dta_recode_common_labels(
  .columns = python:spss,
  labels = "yesno1",
  as_numeric = TRUE
glimpse(result3) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <int> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1
#> $ sas    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1
#> $ stata  <int> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2
#> $ spss   <int> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# ======================================================

#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <chr> "Disagree", "Neutral", "Strongly Disagree", "Neu…
#> $ battery_life          <chr> "Strongly Disagree", "Strongly Agree", "Neutral"…
#> $ camera_quality        <chr> "Strongly Agree", "Strongly Disagree", "Strongly…
#> $ value_for_money       <chr> "Agree", "Strongly Agree", "Strongly Disagree", …
#> $ design_and_appearance <chr> "Strongly Agree", "Strongly Agree", "Disagree", …

# Create the categories

mrq_options <- c(
  "Strongly Disagree",
  "Strongly Agree"

# Recode the columns `ease_of_use` to `design_and_appearance`
# as follows: 
  # "Strongly agree" = 1,
  # "Disagree" = 2,
  # "Neutral" = 3,
  # "Agree" = 4,
  # "Strongly agree" = 5

result4 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  is_ordered = TRUE
glimpse(result4) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <ord> Disagree, Neutral, Strongly Disagree, Neutral, D…
#> $ battery_life          <ord> Strongly Disagree, Strongly Agree, Neutral, Agre…
#> $ camera_quality        <ord> Strongly Agree, Strongly Disagree, Strongly Agre…
#> $ value_for_money       <ord> Agree, Strongly Agree, Strongly Disagree, Neutra…
#> $ design_and_appearance <ord> Strongly Agree, Strongly Agree, Disagree, Strong…

# To reverse the codes, that is,
  # "Strongly agree" = 5,
  # "Disagree" = 4,
  # "Neutral" = 3,
  # "Agree" = 2,
  # "Strongly agree" = 1
# and return numeric values, use the following syntax

result5 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  is_reverse = TRUE,
  is_ordered = TRUE,
  as_numeric = TRUE
glimpse(result5) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <int> 4, 3, 5, 3, 4, 2, 3, 4, 5, 3, 3, 1, 4, 4, 3
#> $ battery_life          <int> 5, 1, 3, 2, 4, 2, 4, 1, 2, 4, 2, 4, 4, 5, 3
#> $ camera_quality        <int> 1, 5, 1, 2, 2, 2, 2, 1, 4, 5, 5, 2, 1, 3, 3
#> $ value_for_money       <int> 2, 1, 5, 3, 4, 4, 1, 2, 1, 3, 1, 5, 5, 5, 3
#> $ design_and_appearance <int> 1, 1, 4, 5, 3, 1, 5, 1, 2, 2, 3, 4, 5, 2, 1

result6 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  values = LETTERS[1:5],
  is_ordered = TRUE,
  is_reverse = FALSE
glimpse(result6) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <chr> "B", "C", "A", "C", "B", "D", "C", "B", "A", "C"…
#> $ battery_life          <chr> "A", "E", "C", "D", "B", "D", "B", "E", "D", "B"…
#> $ camera_quality        <chr> "E", "A", "E", "D", "D", "D", "D", "E", "B", "A"…
#> $ value_for_money       <chr> "D", "E", "A", "C", "B", "B", "E", "D", "E", "C"…
#> $ design_and_appearance <chr> "E", "E", "B", "A", "C", "E", "A", "E", "D", "D"…