Skip to contents

dta_recode() recodes variables in a dat using a dictionary dict. The dictionary maps the original values of each variable to their new values and labels.


  sheet = 1,
  min_categories = 2,
  max_categories = 25,
  as_numeric = FALSE,
  is_force_sequential = FALSE



A data frame or tibble containing the variables to be recoded.


A data frame or tibble serving as the dictionary, specifying variable names, values, and labels.


The name or index of the worksheet that contains the data for the dictionary.


Minimum number of categories for a variable to be recoded. Defaults to 1.


Maximum number of categories for a variable to be recoded. Defaults to 25.


Logical. If TRUE, the recoded variables are returned as numeric. Defaults to FALSE.


Logical indicating whether or not to force sequential values, that is, they should start at 1 and increase by 1.


A tibble with recoded variables. If warnings are generated, they are saved to a CSV file and displayed.


#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>     filter, lag
#> The following objects are masked from 'package:base':
#>     intersect, setdiff, setequal, union
glimpse(data_sample) # look at the data type column
#> Rows: 2,500
#> Columns: 21
#> $ id             <chr> "STM/7539", "STM/7993", "STM/7387", "STM/5598", "STM/59…
#> $ region         <chr> "Central", "Central", "South", "West", "North East", "N…
#> $ age            <dbl> 56, 46, 45, 37, 45, 51, 56, 37, 50, 38, 48, 41, 24, 34,…
#> $ age_group      <chr> "50-59", "40-49", "40-49", "30-39", "40-49", "50-59", "…
#> $ height         <dbl> 1.70, 1.57, 1.47, 1.67, 1.69, 1.90, 1.85, 1.64, 1.61, 1…
#> $ weight         <dbl> 73, 53, 85, 77, 53, 75, 69, 53, 56, 89, 73, 86, 76, 81,…
#> $ blood_group    <chr> "AB", "B", "AB", "AB", "A", "A", "AB", "B", "A", "AB", …
#> $ marital_status <chr> "Married", "Married", "Married", "Single", "Single", "M…
#> $ education      <chr> "Bachelors", "Bachelors", "Bachelors", "Bachelors", "Ba…
#> $ employed       <chr> "Yes", "No", "No", "Yes", "Yes", "No", "Yes", "No", "Ye…
#> $ ses            <chr> "Middle", "Middle", "High", "Middle", "Low", "Middle", …
#> $ language       <chr> "Mandarin", "French", "Arabic", "English", "Arabic", "M…
#> $ phone          <chr> "OnePlus", "OnePlus", "Samsung", "OnePlus", "OnePlus", …
#> $ transport      <chr> "Bicycle", "Train", "Car", "Bus", "Bus", "Bus", "Bus", …
#> $ gadgets_owned  <chr> "Smart TV, Tablet, Desktop Computer, Digital Camera, Sm…
#> $ r              <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes…
#> $ python         <chr> "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "No", "N…
#> $ sas            <chr> "No", "No", "No", "No", "No", "No", "No", "No", "Yes", …
#> $ stata          <chr> "No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Ye…
#> $ spss           <chr> "No", "No", "Yes", "No", "Yes", "No", "No", "Yes", "No"…
#> $ excel          <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "No", …

names values labels is_ordered
region 1 Central 0

2 North East

3 South

4 West
age_group 1 20-29 1

2 30-39

3 40-49

4 50-59

5 60-69

6 70+
blood_group 1 A 0

2 B

3 AB

4 O
marital_status 1 Single 0

2 Married

3 Other
education 1 Bachelors 1

2 Masters

3 Doctorate
employed 0 No 0

1 Yes
ses 1 Low 1

2 Middle

3 High
language 1 English 0

2 French

3 Spanish

4 Arabic

5 Mandarin

6 Other
phone 0 None 0

1 Samsung

2 Apple

3 Xiaomi

4 OnePlus

5 Google

6 Other
transport 1 Walking 0

2 Bicycle

3 Car

4 Bus

5 Train
r 0 No 0

1 Yes
python 0 No 0

1 Yes
sas 0 No 0

1 Yes
stata 0 No 0

1 Yes
spss 0 No 0

1 Yes
excel 0 No 0

1 Yes
# The default nature of `dta_recode()` is to drop the # labels if the values are not sequential or do not # start at 1. To maintain these labels, set # `is_force_sequential` to `TRUE`. Note that this will # reset the given values to sequential. result2 <- dta_recode( dat = data_sample, dict = dict_recode, is_force_sequential = TRUE ) glimpse(result2) #> Rows: 2,500 #> Columns: 21 #> $ id <chr> "STM/7539", "STM/7993", "STM/7387", "STM/5598", "STM/59… #> $ region <fct> Central, Central, South, West, North East, North East, … #> $ age <dbl> 56, 46, 45, 37, 45, 51, 56, 37, 50, 38, 48, 41, 24, 34,… #> $ age_group <ord> 50-59, 40-49, 40-49, 30-39, 40-49, 50-59, 50-59, 30-39,… #> $ height <dbl> 1.70, 1.57, 1.47, 1.67, 1.69, 1.90, 1.85, 1.64, 1.61, 1… #> $ weight <dbl> 73, 53, 85, 77, 53, 75, 69, 53, 56, 89, 73, 86, 76, 81,… #> $ blood_group <fct> AB, B, AB, AB, A, A, AB, B, A, AB, AB, A, B, AB, A, B, … #> $ marital_status <fct> Married, Married, Married, Single, Single, Married, Sin… #> $ education <ord> Bachelors, Bachelors, Bachelors, Bachelors, Bachelors, … #> $ employed <fct> Yes, No, No, Yes, Yes, No, Yes, No, Yes, Yes, Yes, No, … #> $ ses <ord> Middle, Middle, High, Middle, Low, Middle, Low, Low, Mi… #> $ language <fct> Mandarin, French, Arabic, English, Arabic, Mandarin, En… #> $ phone <fct> OnePlus, OnePlus, Samsung, OnePlus, OnePlus, Samsung, O… #> $ transport <fct> Bicycle, Train, Car, Bus, Bus, Bus, Bus, Train, Bicycle… #> $ gadgets_owned <chr> "Smart TV, Tablet, Desktop Computer, Digital Camera, Sm… #> $ r <fct> No, No, No, No, No, Yes, Yes, Yes, Yes, Yes, Yes, No, N… #> $ python <fct> No, Yes, Yes, Yes, No, Yes, Yes, No, No, No, Yes, Yes, … #> $ sas <fct> No, No, No, No, No, No, No, No, Yes, No, No, No, No, No… #> $ stata <fct> No, No, Yes, Yes, Yes, No, Yes, No, Yes, No, Yes, Yes, … #> $ spss <fct> No, No, Yes, No, Yes, No, No, Yes, No, Yes, No, Yes, No… #> $ excel <fct> Yes, No, No, No, No, No, No, No, No, No, No, No, Yes, N… # Return numeric codes result3 <- dta_recode( dat = data_sample, dict = dict_recode, as_numeric = TRUE ) glimpse(result3) # look at the data type column and values #> Rows: 2,500 #> Columns: 21 #> $ id <chr> "STM/7539", "STM/7993", "STM/7387", "STM/5598", "STM/59… #> $ region <dbl> 1, 1, 3, 4, 2, 2, 1, 2, 1, 3, 2, 2, 3, 2, 2, 4, 3, 3, 3… #> $ age <dbl> 56, 46, 45, 37, 45, 51, 56, 37, 50, 38, 48, 41, 24, 34,… #> $ age_group <dbl> 4, 3, 3, 2, 3, 4, 4, 2, 4, 2, 3, 3, 1, 2, 3, 5, 5, 2, 1… #> $ height <dbl> 1.70, 1.57, 1.47, 1.67, 1.69, 1.90, 1.85, 1.64, 1.61, 1… #> $ weight <dbl> 73, 53, 85, 77, 53, 75, 69, 53, 56, 89, 73, 86, 76, 81,… #> $ blood_group <dbl> 3, 2, 3, 3, 1, 1, 3, 2, 1, 3, 3, 1, 2, 3, 1, 2, 3, 3, 3… #> $ marital_status <dbl> 2, 2, 2, 1, 1, 2, 1, 1, 3, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2… #> $ education <dbl> 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 3, 2, 1… #> $ employed <dbl> 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2… #> $ ses <dbl> 2, 2, 3, 2, 1, 2, 1, 1, 2, 1, 2, 1, 3, 3, 1, 2, 1, 1, 1… #> $ language <dbl> 5, 2, 4, 1, 4, 5, 1, 5, 5, 3, 4, 1, 3, 4, 4, 3, 3, 4, 3… #> $ phone <dbl> 5, 5, 2, 5, 5, 2, 5, 5, 6, 6, 4, 4, 2, 4, 4, 4, 4, 3, 7… #> $ transport <dbl> 2, 5, 3, 4, 4, 4, 4, 5, 2, 1, 2, 3, 2, 1, 5, 3, 4, 3, 3… #> $ gadgets_owned <chr> "Smart TV, Tablet, Desktop Computer, Digital Camera, Sm… #> $ r <dbl> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2… #> $ python <dbl> 1, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1… #> $ sas <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1… #> $ stata <dbl> 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1… #> $ spss <dbl> 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1… #> $ excel <dbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1…