Recode columns using common labels

dta_recode_common_labels() recodes specified columns in a data frame to apply consistent labels and values. It supports numeric or factor conversion and handles predefined label categories.

Usage

dta_recode_common_labels(
  dat,
  .columns,
  labels = c("No", "Yes"),
  values = NULL,
  is_reverse = FALSE,
  is_ordered = NULL,
  as_numeric = FALSE,
  is_force_sequential = FALSE
)

Arguments

dat: A data frame containing the columns to be recoded.
.columns: A tidy selection of columns to recode.
labels: A vector of labels to assign to the recoded values. Alternatively, a single character string (e.g., "yn1", "l1", etc.) for predefined label categories.
values: A numeric vector of values corresponding to the labels. If NULL, defaults to sequential integers starting from 1.
is_reverse: Logical, whether to reverse the order of values. Default is FALSE.
is_ordered: Logical, whether the recoded variables should be ordered factors. Can be a single value or a vector corresponding to the selected columns. Defaults to NULL (treated as FALSE for all columns).
as_numeric: Logical, whether to return the recoded columns as numeric values rather than factors. Default is FALSE.
is_force_sequential: Logical indicating whether or not to force sequential values, that is, they should start at 1 and increase by 1.

Value

A modified tibble with recoded columns.

Details

The function allows flexible recoding by specifying custom or predefined labels. If labels is a predefined category (e.g., yesno1 for, Yes/No) it is expanded automatically using the dta_categories() function which returns the following.

Yes/No Categories

yesno1: c("No", "Yes")
yesno2: c("No", "Yes", "Don't know")
yesno3: c("No", "Yes", "Prefer not to say")
yesno4: c("No", "Yes", "Don't know", "Prefer not to say")

Likert Scale Categories

likert1: c("Strong disagree", "Disagree", "Neutral", "Agree", "Strongly agree")
likert2: c("Strong disagree", "Disagree", "Undecided", "Agree", "Strongly agree")
likert3: c("Strong dissatisfied", "Dissatisfied", "Neutral", "Satisfied", "Strongly satisfied")
likert4: c("Strong dissatisfied", "Dissatisfied", "Undecided", "Satisfied", "Strongly satisfied")
likert5: c("Very ineffective", "Ineffective", "Neutral", "Effective", "Very effective")
likert6: c("Very ineffective", "Ineffective", "Undecided", "Effective", "Very effective")

These categories can be used as quick references for consistent label recoding.

Examples

library(dplyr)
data("data_sample")
dat <- data_sample[1:15, 16:21]

# Apply the values 1 / 2 to the labels No / Yes

result <- dta_recode_common_labels(
  dat, .columns = python:spss, labels = c("No", "Yes")
)
glimpse(result) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <fct> No, Yes, Yes, Yes, No, Yes, Yes, No, No, No, Yes, Yes, No, Yes,…
#> $ sas    <fct> No, No, No, No, No, No, No, No, Yes, No, No, No, No, No, No
#> $ stata  <fct> No, No, Yes, Yes, Yes, No, Yes, No, Yes, No, Yes, Yes, Yes, No,…
#> $ spss   <fct> No, No, Yes, No, Yes, No, No, Yes, No, Yes, No, Yes, No, Yes, No
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# Add `as_numeric = TRUE` to return numeric
# values instead of factor

result2 <- dta_recode_common_labels(
  dat,
  .columns = python:spss,
  labels = c("No", "Yes"),
  as_numeric = TRUE
)
glimpse(result2) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <int> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1
#> $ sas    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1
#> $ stata  <int> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2
#> $ spss   <int> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# Use predefined label category `yesno1` which will
# assign 1 = No and 2 = Yes

result3 <- dta_recode_common_labels(
  dat,
  .columns = python:spss,
  labels = "yesno1",
  as_numeric = TRUE
)
glimpse(result3) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <int> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1
#> $ sas    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1
#> $ stata  <int> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2
#> $ spss   <int> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# ======================================================

data("data_phone")
glimpse(data_phone)
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <chr> "Disagree", "Neutral", "Strongly Disagree", "Neu…
#> $ battery_life          <chr> "Strongly Disagree", "Strongly Agree", "Neutral"…
#> $ camera_quality        <chr> "Strongly Agree", "Strongly Disagree", "Strongly…
#> $ value_for_money       <chr> "Agree", "Strongly Agree", "Strongly Disagree", …
#> $ design_and_appearance <chr> "Strongly Agree", "Strongly Agree", "Disagree", …

# Create the categories

mrq_options <- c(
  "Strongly Disagree",
  "Disagree",
  "Neutral",
  "Agree",
  "Strongly Agree"
)

# Recode the columns `ease_of_use` to `design_and_appearance`
# as follows: 
  # "Strongly agree" = 1,
  # "Disagree" = 2,
  # "Neutral" = 3,
  # "Agree" = 4,
  # "Strongly agree" = 5

result4 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  is_ordered = TRUE
)
glimpse(result4) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <ord> Disagree, Neutral, Strongly Disagree, Neutral, D…
#> $ battery_life          <ord> Strongly Disagree, Strongly Agree, Neutral, Agre…
#> $ camera_quality        <ord> Strongly Agree, Strongly Disagree, Strongly Agre…
#> $ value_for_money       <ord> Agree, Strongly Agree, Strongly Disagree, Neutra…
#> $ design_and_appearance <ord> Strongly Agree, Strongly Agree, Disagree, Strong…

# To reverse the codes, that is,
  # "Strongly agree" = 5,
  # "Disagree" = 4,
  # "Neutral" = 3,
  # "Agree" = 2,
  # "Strongly agree" = 1
# and return numeric values, use the following syntax

result5 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  is_reverse = TRUE,
  is_ordered = TRUE,
  as_numeric = TRUE
)
glimpse(result5) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <int> 4, 3, 5, 3, 4, 2, 3, 4, 5, 3, 3, 1, 4, 4, 3
#> $ battery_life          <int> 5, 1, 3, 2, 4, 2, 4, 1, 2, 4, 2, 4, 4, 5, 3
#> $ camera_quality        <int> 1, 5, 1, 2, 2, 2, 2, 1, 4, 5, 5, 2, 1, 3, 3
#> $ value_for_money       <int> 2, 1, 5, 3, 4, 4, 1, 2, 1, 3, 1, 5, 5, 5, 3
#> $ design_and_appearance <int> 1, 1, 4, 5, 3, 1, 5, 1, 2, 2, 3, 4, 5, 2, 1

result6 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  values = LETTERS[1:5],
  is_ordered = TRUE,
  is_reverse = FALSE
)
glimpse(result6) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <chr> "B", "C", "A", "C", "B", "D", "C", "B", "A", "C"…
#> $ battery_life          <chr> "A", "E", "C", "D", "B", "D", "B", "E", "D", "B"…
#> $ camera_quality        <chr> "E", "A", "E", "D", "D", "D", "D", "E", "B", "A"…
#> $ value_for_money       <chr> "D", "E", "A", "C", "B", "B", "E", "D", "E", "C"…
#> $ design_and_appearance <chr> "E", "E", "B", "A", "C", "E", "A", "E", "D", "D"…