Skip to contents

dta_recode_common_labels() recodes specified columns in a data frame to apply consistent labels and values. It supports numeric or factor conversion and handles predefined label categories.

Usage

dta_recode_common_labels(
  dat,
  .columns,
  labels = c("No", "Yes"),
  values = NULL,
  is_reverse = FALSE,
  is_ordered = NULL,
  as_numeric = FALSE,
  is_force_sequential = FALSE
)

Arguments

dat

A data frame containing the columns to be recoded.

.columns

A tidy selection of columns to recode.

labels

A vector of labels to assign to the recoded values. Alternatively, a single character string (e.g., "yn1", "l1", etc.) for predefined label categories.

values

A numeric vector of values corresponding to the labels. If NULL, defaults to sequential integers starting from 1.

is_reverse

Logical, whether to reverse the order of values. Default is FALSE.

is_ordered

Logical, whether the recoded variables should be ordered factors. Can be a single value or a vector corresponding to the selected columns. Defaults to NULL (treated as FALSE for all columns).

as_numeric

Logical, whether to return the recoded columns as numeric values rather than factors. Default is FALSE.

is_force_sequential

Logical indicating whether or not to force sequential values, that is, they should start at 1 and increase by 1.

Value

A modified tibble with recoded columns.

Details

The function allows flexible recoding by specifying custom or predefined labels. If labels is a predefined category (e.g., yesno1 for, Yes/No) it is expanded automatically using the dta_categories() function which returns the following.

Yes/No Categories

yesno1

c("No", "Yes")

yesno2

c("No", "Yes", "Don't know")

yesno3

c("No", "Yes", "Prefer not to say")

yesno4

c("No", "Yes", "Don't know", "Prefer not to say")

Likert Scale Categories

likert1

c("Strong disagree", "Disagree", "Neutral", "Agree", "Strongly agree")

likert2

c("Strong disagree", "Disagree", "Undecided", "Agree", "Strongly agree")

likert3

c("Strong dissatisfied", "Dissatisfied", "Neutral", "Satisfied", "Strongly satisfied")

likert4

c("Strong dissatisfied", "Dissatisfied", "Undecided", "Satisfied", "Strongly satisfied")

likert5

c("Very ineffective", "Ineffective", "Neutral", "Effective", "Very effective")

likert6

c("Very ineffective", "Ineffective", "Undecided", "Effective", "Very effective")

These categories can be used as quick references for consistent label recoding.

Examples

library(dplyr)
data("data_sample")
dat <- data_sample[1:15, 16:21]

# Apply the values 1 / 2 to the labels No / Yes

result <- dta_recode_common_labels(
  dat, .columns = python:spss, labels = c("No", "Yes")
)
glimpse(result) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <fct> No, Yes, Yes, Yes, No, Yes, Yes, No, No, No, Yes, Yes, No, Yes,…
#> $ sas    <fct> No, No, No, No, No, No, No, No, Yes, No, No, No, No, No, No
#> $ stata  <fct> No, No, Yes, Yes, Yes, No, Yes, No, Yes, No, Yes, Yes, Yes, No,…
#> $ spss   <fct> No, No, Yes, No, Yes, No, No, Yes, No, Yes, No, Yes, No, Yes, No
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# Add `as_numeric = TRUE` to return numeric
# values instead of factor

result2 <- dta_recode_common_labels(
  dat,
  .columns = python:spss,
  labels = c("No", "Yes"),
  as_numeric = TRUE
)
glimpse(result2) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <int> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1
#> $ sas    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1
#> $ stata  <int> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2
#> $ spss   <int> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# Use predefined label category `yesno1` which will
# assign 1 = No and 2 = Yes

result3 <- dta_recode_common_labels(
  dat,
  .columns = python:spss,
  labels = "yesno1",
  as_numeric = TRUE
)
glimpse(result3) # look at data type and values columns
#> Rows: 15
#> Columns: 6
#> $ r      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"…
#> $ python <int> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1
#> $ sas    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1
#> $ stata  <int> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2
#> $ spss   <int> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1
#> $ excel  <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", "N…

# ======================================================

data("data_phone")
glimpse(data_phone)
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <chr> "Disagree", "Neutral", "Strongly Disagree", "Neu…
#> $ battery_life          <chr> "Strongly Disagree", "Strongly Agree", "Neutral"…
#> $ camera_quality        <chr> "Strongly Agree", "Strongly Disagree", "Strongly…
#> $ value_for_money       <chr> "Agree", "Strongly Agree", "Strongly Disagree", …
#> $ design_and_appearance <chr> "Strongly Agree", "Strongly Agree", "Disagree", …

# Create the categories

mrq_options <- c(
  "Strongly Disagree",
  "Disagree",
  "Neutral",
  "Agree",
  "Strongly Agree"
)

# Recode the columns `ease_of_use` to `design_and_appearance`
# as follows: 
  # "Strongly agree" = 1,
  # "Disagree" = 2,
  # "Neutral" = 3,
  # "Agree" = 4,
  # "Strongly agree" = 5

result4 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  is_ordered = TRUE
)
glimpse(result4) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <ord> Disagree, Neutral, Strongly Disagree, Neutral, D…
#> $ battery_life          <ord> Strongly Disagree, Strongly Agree, Neutral, Agre…
#> $ camera_quality        <ord> Strongly Agree, Strongly Disagree, Strongly Agre…
#> $ value_for_money       <ord> Agree, Strongly Agree, Strongly Disagree, Neutra…
#> $ design_and_appearance <ord> Strongly Agree, Strongly Agree, Disagree, Strong…

# To reverse the codes, that is,
  # "Strongly agree" = 5,
  # "Disagree" = 4,
  # "Neutral" = 3,
  # "Agree" = 2,
  # "Strongly agree" = 1
# and return numeric values, use the following syntax

result5 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  is_reverse = TRUE,
  is_ordered = TRUE,
  as_numeric = TRUE
)
glimpse(result5) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <int> 4, 3, 5, 3, 4, 2, 3, 4, 5, 3, 3, 1, 4, 4, 3
#> $ battery_life          <int> 5, 1, 3, 2, 4, 2, 4, 1, 2, 4, 2, 4, 4, 5, 3
#> $ camera_quality        <int> 1, 5, 1, 2, 2, 2, 2, 1, 4, 5, 5, 2, 1, 3, 3
#> $ value_for_money       <int> 2, 1, 5, 3, 4, 4, 1, 2, 1, 3, 1, 5, 5, 5, 3
#> $ design_and_appearance <int> 1, 1, 4, 5, 3, 1, 5, 1, 2, 2, 3, 4, 5, 2, 1

result6 <- dta_recode_common_labels(
  dat = data_phone,
  .columns = ease_of_use:design_and_appearance,
  labels = mrq_options,
  values = LETTERS[1:5],
  is_ordered = TRUE,
  is_reverse = FALSE
)
glimpse(result6) # look at data type and values columns
#> Rows: 15
#> Columns: 7
#> $ id                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#> $ phone_type            <chr> "OnePlus", "Sumsung", "OnePlus", "Google", "Sums…
#> $ ease_of_use           <chr> "B", "C", "A", "C", "B", "D", "C", "B", "A", "C"…
#> $ battery_life          <chr> "A", "E", "C", "D", "B", "D", "B", "E", "D", "B"…
#> $ camera_quality        <chr> "E", "A", "E", "D", "D", "D", "D", "E", "B", "A"…
#> $ value_for_money       <chr> "D", "E", "A", "C", "B", "B", "E", "D", "E", "C"…
#> $ design_and_appearance <chr> "E", "E", "B", "A", "C", "E", "A", "E", "D", "D"…