1 Data Wrangling
1.1 How to do data wrangling
We will use tidyverse
package to work with data.
1.1.1 Load data and package
head (iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
1.1.2 Select certain rows
<- filter(iris, Species == 'setosa')
setosa setosa
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
filter the first 5 rows
%>%
iris slice(1:5)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
1.1.3 Select certain columns
select(iris, Sepal.Length, Species)
## Sepal.Length Species
## 1 5.1 setosa
## 2 4.9 setosa
## 3 4.7 setosa
## 4 4.6 setosa
## 5 5.0 setosa
## 6 5.4 setosa
## 7 4.6 setosa
## 8 5.0 setosa
## 9 4.4 setosa
## 10 4.9 setosa
## 11 5.4 setosa
## 12 4.8 setosa
## 13 4.8 setosa
## 14 4.3 setosa
## 15 5.8 setosa
## 16 5.7 setosa
## 17 5.4 setosa
## 18 5.1 setosa
## 19 5.7 setosa
## 20 5.1 setosa
## 21 5.4 setosa
## 22 5.1 setosa
## 23 4.6 setosa
## 24 5.1 setosa
## 25 4.8 setosa
## 26 5.0 setosa
## 27 5.0 setosa
## 28 5.2 setosa
## 29 5.2 setosa
## 30 4.7 setosa
## 31 4.8 setosa
## 32 5.4 setosa
## 33 5.2 setosa
## 34 5.5 setosa
## 35 4.9 setosa
## 36 5.0 setosa
## 37 5.5 setosa
## 38 4.9 setosa
## 39 4.4 setosa
## 40 5.1 setosa
## 41 5.0 setosa
## 42 4.5 setosa
## 43 4.4 setosa
## 44 5.0 setosa
## 45 5.1 setosa
## 46 4.8 setosa
## 47 5.1 setosa
## 48 4.6 setosa
## 49 5.3 setosa
## 50 5.0 setosa
## 51 7.0 versicolor
## 52 6.4 versicolor
## 53 6.9 versicolor
## 54 5.5 versicolor
## 55 6.5 versicolor
## 56 5.7 versicolor
## 57 6.3 versicolor
## 58 4.9 versicolor
## 59 6.6 versicolor
## 60 5.2 versicolor
## 61 5.0 versicolor
## 62 5.9 versicolor
## 63 6.0 versicolor
## 64 6.1 versicolor
## 65 5.6 versicolor
## 66 6.7 versicolor
## 67 5.6 versicolor
## 68 5.8 versicolor
## 69 6.2 versicolor
## 70 5.6 versicolor
## 71 5.9 versicolor
## 72 6.1 versicolor
## 73 6.3 versicolor
## 74 6.1 versicolor
## 75 6.4 versicolor
## 76 6.6 versicolor
## 77 6.8 versicolor
## 78 6.7 versicolor
## 79 6.0 versicolor
## 80 5.7 versicolor
## 81 5.5 versicolor
## 82 5.5 versicolor
## 83 5.8 versicolor
## 84 6.0 versicolor
## 85 5.4 versicolor
## 86 6.0 versicolor
## 87 6.7 versicolor
## 88 6.3 versicolor
## 89 5.6 versicolor
## 90 5.5 versicolor
## 91 5.5 versicolor
## 92 6.1 versicolor
## 93 5.8 versicolor
## 94 5.0 versicolor
## 95 5.6 versicolor
## 96 5.7 versicolor
## 97 5.7 versicolor
## 98 6.2 versicolor
## 99 5.1 versicolor
## 100 5.7 versicolor
## 101 6.3 virginica
## 102 5.8 virginica
## 103 7.1 virginica
## 104 6.3 virginica
## 105 6.5 virginica
## 106 7.6 virginica
## 107 4.9 virginica
## 108 7.3 virginica
## 109 6.7 virginica
## 110 7.2 virginica
## 111 6.5 virginica
## 112 6.4 virginica
## 113 6.8 virginica
## 114 5.7 virginica
## 115 5.8 virginica
## 116 6.4 virginica
## 117 6.5 virginica
## 118 7.7 virginica
## 119 7.7 virginica
## 120 6.0 virginica
## 121 6.9 virginica
## 122 5.6 virginica
## 123 7.7 virginica
## 124 6.3 virginica
## 125 6.7 virginica
## 126 7.2 virginica
## 127 6.2 virginica
## 128 6.1 virginica
## 129 6.4 virginica
## 130 7.2 virginica
## 131 7.4 virginica
## 132 7.9 virginica
## 133 6.4 virginica
## 134 6.3 virginica
## 135 6.1 virginica
## 136 7.7 virginica
## 137 6.3 virginica
## 138 6.4 virginica
## 139 6.0 virginica
## 140 6.9 virginica
## 141 6.7 virginica
## 142 6.9 virginica
## 143 5.8 virginica
## 144 6.8 virginica
## 145 6.7 virginica
## 146 6.7 virginica
## 147 6.3 virginica
## 148 6.5 virginica
## 149 6.2 virginica
## 150 5.9 virginica
select(iris, -Sepal.Length, -Species)
## Sepal.Width Petal.Length Petal.Width
## 1 3.5 1.4 0.2
## 2 3.0 1.4 0.2
## 3 3.2 1.3 0.2
## 4 3.1 1.5 0.2
## 5 3.6 1.4 0.2
## 6 3.9 1.7 0.4
## 7 3.4 1.4 0.3
## 8 3.4 1.5 0.2
## 9 2.9 1.4 0.2
## 10 3.1 1.5 0.1
## 11 3.7 1.5 0.2
## 12 3.4 1.6 0.2
## 13 3.0 1.4 0.1
## 14 3.0 1.1 0.1
## 15 4.0 1.2 0.2
## 16 4.4 1.5 0.4
## 17 3.9 1.3 0.4
## 18 3.5 1.4 0.3
## 19 3.8 1.7 0.3
## 20 3.8 1.5 0.3
## 21 3.4 1.7 0.2
## 22 3.7 1.5 0.4
## 23 3.6 1.0 0.2
## 24 3.3 1.7 0.5
## 25 3.4 1.9 0.2
## 26 3.0 1.6 0.2
## 27 3.4 1.6 0.4
## 28 3.5 1.5 0.2
## 29 3.4 1.4 0.2
## 30 3.2 1.6 0.2
## 31 3.1 1.6 0.2
## 32 3.4 1.5 0.4
## 33 4.1 1.5 0.1
## 34 4.2 1.4 0.2
## 35 3.1 1.5 0.2
## 36 3.2 1.2 0.2
## 37 3.5 1.3 0.2
## 38 3.6 1.4 0.1
## 39 3.0 1.3 0.2
## 40 3.4 1.5 0.2
## 41 3.5 1.3 0.3
## 42 2.3 1.3 0.3
## 43 3.2 1.3 0.2
## 44 3.5 1.6 0.6
## 45 3.8 1.9 0.4
## 46 3.0 1.4 0.3
## 47 3.8 1.6 0.2
## 48 3.2 1.4 0.2
## 49 3.7 1.5 0.2
## 50 3.3 1.4 0.2
## 51 3.2 4.7 1.4
## 52 3.2 4.5 1.5
## 53 3.1 4.9 1.5
## 54 2.3 4.0 1.3
## 55 2.8 4.6 1.5
## 56 2.8 4.5 1.3
## 57 3.3 4.7 1.6
## 58 2.4 3.3 1.0
## 59 2.9 4.6 1.3
## 60 2.7 3.9 1.4
## 61 2.0 3.5 1.0
## 62 3.0 4.2 1.5
## 63 2.2 4.0 1.0
## 64 2.9 4.7 1.4
## 65 2.9 3.6 1.3
## 66 3.1 4.4 1.4
## 67 3.0 4.5 1.5
## 68 2.7 4.1 1.0
## 69 2.2 4.5 1.5
## 70 2.5 3.9 1.1
## 71 3.2 4.8 1.8
## 72 2.8 4.0 1.3
## 73 2.5 4.9 1.5
## 74 2.8 4.7 1.2
## 75 2.9 4.3 1.3
## 76 3.0 4.4 1.4
## 77 2.8 4.8 1.4
## 78 3.0 5.0 1.7
## 79 2.9 4.5 1.5
## 80 2.6 3.5 1.0
## 81 2.4 3.8 1.1
## 82 2.4 3.7 1.0
## 83 2.7 3.9 1.2
## 84 2.7 5.1 1.6
## 85 3.0 4.5 1.5
## 86 3.4 4.5 1.6
## 87 3.1 4.7 1.5
## 88 2.3 4.4 1.3
## 89 3.0 4.1 1.3
## 90 2.5 4.0 1.3
## 91 2.6 4.4 1.2
## 92 3.0 4.6 1.4
## 93 2.6 4.0 1.2
## 94 2.3 3.3 1.0
## 95 2.7 4.2 1.3
## 96 3.0 4.2 1.2
## 97 2.9 4.2 1.3
## 98 2.9 4.3 1.3
## 99 2.5 3.0 1.1
## 100 2.8 4.1 1.3
## 101 3.3 6.0 2.5
## 102 2.7 5.1 1.9
## 103 3.0 5.9 2.1
## 104 2.9 5.6 1.8
## 105 3.0 5.8 2.2
## 106 3.0 6.6 2.1
## 107 2.5 4.5 1.7
## 108 2.9 6.3 1.8
## 109 2.5 5.8 1.8
## 110 3.6 6.1 2.5
## 111 3.2 5.1 2.0
## 112 2.7 5.3 1.9
## 113 3.0 5.5 2.1
## 114 2.5 5.0 2.0
## 115 2.8 5.1 2.4
## 116 3.2 5.3 2.3
## 117 3.0 5.5 1.8
## 118 3.8 6.7 2.2
## 119 2.6 6.9 2.3
## 120 2.2 5.0 1.5
## 121 3.2 5.7 2.3
## 122 2.8 4.9 2.0
## 123 2.8 6.7 2.0
## 124 2.7 4.9 1.8
## 125 3.3 5.7 2.1
## 126 3.2 6.0 1.8
## 127 2.8 4.8 1.8
## 128 3.0 4.9 1.8
## 129 2.8 5.6 2.1
## 130 3.0 5.8 1.6
## 131 2.8 6.1 1.9
## 132 3.8 6.4 2.0
## 133 2.8 5.6 2.2
## 134 2.8 5.1 1.5
## 135 2.6 5.6 1.4
## 136 3.0 6.1 2.3
## 137 3.4 5.6 2.4
## 138 3.1 5.5 1.8
## 139 3.0 4.8 1.8
## 140 3.1 5.4 2.1
## 141 3.1 5.6 2.4
## 142 3.1 5.1 2.3
## 143 2.7 5.1 1.9
## 144 3.2 5.9 2.3
## 145 3.3 5.7 2.5
## 146 3.0 5.2 2.3
## 147 2.5 5.0 1.9
## 148 3.0 5.2 2.0
## 149 3.4 5.4 2.3
## 150 3.0 5.1 1.8
reorder the variables
%>% select(Species, everything()) %>%
iris print()
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.1 3.5 1.4 0.2
## 2 setosa 4.9 3.0 1.4 0.2
## 3 setosa 4.7 3.2 1.3 0.2
## 4 setosa 4.6 3.1 1.5 0.2
## 5 setosa 5.0 3.6 1.4 0.2
## 6 setosa 5.4 3.9 1.7 0.4
## 7 setosa 4.6 3.4 1.4 0.3
## 8 setosa 5.0 3.4 1.5 0.2
## 9 setosa 4.4 2.9 1.4 0.2
## 10 setosa 4.9 3.1 1.5 0.1
## 11 setosa 5.4 3.7 1.5 0.2
## 12 setosa 4.8 3.4 1.6 0.2
## 13 setosa 4.8 3.0 1.4 0.1
## 14 setosa 4.3 3.0 1.1 0.1
## 15 setosa 5.8 4.0 1.2 0.2
## 16 setosa 5.7 4.4 1.5 0.4
## 17 setosa 5.4 3.9 1.3 0.4
## 18 setosa 5.1 3.5 1.4 0.3
## 19 setosa 5.7 3.8 1.7 0.3
## 20 setosa 5.1 3.8 1.5 0.3
## 21 setosa 5.4 3.4 1.7 0.2
## 22 setosa 5.1 3.7 1.5 0.4
## 23 setosa 4.6 3.6 1.0 0.2
## 24 setosa 5.1 3.3 1.7 0.5
## 25 setosa 4.8 3.4 1.9 0.2
## 26 setosa 5.0 3.0 1.6 0.2
## 27 setosa 5.0 3.4 1.6 0.4
## 28 setosa 5.2 3.5 1.5 0.2
## 29 setosa 5.2 3.4 1.4 0.2
## 30 setosa 4.7 3.2 1.6 0.2
## 31 setosa 4.8 3.1 1.6 0.2
## 32 setosa 5.4 3.4 1.5 0.4
## 33 setosa 5.2 4.1 1.5 0.1
## 34 setosa 5.5 4.2 1.4 0.2
## 35 setosa 4.9 3.1 1.5 0.2
## 36 setosa 5.0 3.2 1.2 0.2
## 37 setosa 5.5 3.5 1.3 0.2
## 38 setosa 4.9 3.6 1.4 0.1
## 39 setosa 4.4 3.0 1.3 0.2
## 40 setosa 5.1 3.4 1.5 0.2
## 41 setosa 5.0 3.5 1.3 0.3
## 42 setosa 4.5 2.3 1.3 0.3
## 43 setosa 4.4 3.2 1.3 0.2
## 44 setosa 5.0 3.5 1.6 0.6
## 45 setosa 5.1 3.8 1.9 0.4
## 46 setosa 4.8 3.0 1.4 0.3
## 47 setosa 5.1 3.8 1.6 0.2
## 48 setosa 4.6 3.2 1.4 0.2
## 49 setosa 5.3 3.7 1.5 0.2
## 50 setosa 5.0 3.3 1.4 0.2
## 51 versicolor 7.0 3.2 4.7 1.4
## 52 versicolor 6.4 3.2 4.5 1.5
## 53 versicolor 6.9 3.1 4.9 1.5
## 54 versicolor 5.5 2.3 4.0 1.3
## 55 versicolor 6.5 2.8 4.6 1.5
## 56 versicolor 5.7 2.8 4.5 1.3
## 57 versicolor 6.3 3.3 4.7 1.6
## 58 versicolor 4.9 2.4 3.3 1.0
## 59 versicolor 6.6 2.9 4.6 1.3
## 60 versicolor 5.2 2.7 3.9 1.4
## 61 versicolor 5.0 2.0 3.5 1.0
## 62 versicolor 5.9 3.0 4.2 1.5
## 63 versicolor 6.0 2.2 4.0 1.0
## 64 versicolor 6.1 2.9 4.7 1.4
## 65 versicolor 5.6 2.9 3.6 1.3
## 66 versicolor 6.7 3.1 4.4 1.4
## 67 versicolor 5.6 3.0 4.5 1.5
## 68 versicolor 5.8 2.7 4.1 1.0
## 69 versicolor 6.2 2.2 4.5 1.5
## 70 versicolor 5.6 2.5 3.9 1.1
## 71 versicolor 5.9 3.2 4.8 1.8
## 72 versicolor 6.1 2.8 4.0 1.3
## 73 versicolor 6.3 2.5 4.9 1.5
## 74 versicolor 6.1 2.8 4.7 1.2
## 75 versicolor 6.4 2.9 4.3 1.3
## 76 versicolor 6.6 3.0 4.4 1.4
## 77 versicolor 6.8 2.8 4.8 1.4
## 78 versicolor 6.7 3.0 5.0 1.7
## 79 versicolor 6.0 2.9 4.5 1.5
## 80 versicolor 5.7 2.6 3.5 1.0
## 81 versicolor 5.5 2.4 3.8 1.1
## 82 versicolor 5.5 2.4 3.7 1.0
## 83 versicolor 5.8 2.7 3.9 1.2
## 84 versicolor 6.0 2.7 5.1 1.6
## 85 versicolor 5.4 3.0 4.5 1.5
## 86 versicolor 6.0 3.4 4.5 1.6
## 87 versicolor 6.7 3.1 4.7 1.5
## 88 versicolor 6.3 2.3 4.4 1.3
## 89 versicolor 5.6 3.0 4.1 1.3
## 90 versicolor 5.5 2.5 4.0 1.3
## 91 versicolor 5.5 2.6 4.4 1.2
## 92 versicolor 6.1 3.0 4.6 1.4
## 93 versicolor 5.8 2.6 4.0 1.2
## 94 versicolor 5.0 2.3 3.3 1.0
## 95 versicolor 5.6 2.7 4.2 1.3
## 96 versicolor 5.7 3.0 4.2 1.2
## 97 versicolor 5.7 2.9 4.2 1.3
## 98 versicolor 6.2 2.9 4.3 1.3
## 99 versicolor 5.1 2.5 3.0 1.1
## 100 versicolor 5.7 2.8 4.1 1.3
## 101 virginica 6.3 3.3 6.0 2.5
## 102 virginica 5.8 2.7 5.1 1.9
## 103 virginica 7.1 3.0 5.9 2.1
## 104 virginica 6.3 2.9 5.6 1.8
## 105 virginica 6.5 3.0 5.8 2.2
## 106 virginica 7.6 3.0 6.6 2.1
## 107 virginica 4.9 2.5 4.5 1.7
## 108 virginica 7.3 2.9 6.3 1.8
## 109 virginica 6.7 2.5 5.8 1.8
## 110 virginica 7.2 3.6 6.1 2.5
## 111 virginica 6.5 3.2 5.1 2.0
## 112 virginica 6.4 2.7 5.3 1.9
## 113 virginica 6.8 3.0 5.5 2.1
## 114 virginica 5.7 2.5 5.0 2.0
## 115 virginica 5.8 2.8 5.1 2.4
## 116 virginica 6.4 3.2 5.3 2.3
## 117 virginica 6.5 3.0 5.5 1.8
## 118 virginica 7.7 3.8 6.7 2.2
## 119 virginica 7.7 2.6 6.9 2.3
## 120 virginica 6.0 2.2 5.0 1.5
## 121 virginica 6.9 3.2 5.7 2.3
## 122 virginica 5.6 2.8 4.9 2.0
## 123 virginica 7.7 2.8 6.7 2.0
## 124 virginica 6.3 2.7 4.9 1.8
## 125 virginica 6.7 3.3 5.7 2.1
## 126 virginica 7.2 3.2 6.0 1.8
## 127 virginica 6.2 2.8 4.8 1.8
## 128 virginica 6.1 3.0 4.9 1.8
## 129 virginica 6.4 2.8 5.6 2.1
## 130 virginica 7.2 3.0 5.8 1.6
## 131 virginica 7.4 2.8 6.1 1.9
## 132 virginica 7.9 3.8 6.4 2.0
## 133 virginica 6.4 2.8 5.6 2.2
## 134 virginica 6.3 2.8 5.1 1.5
## 135 virginica 6.1 2.6 5.6 1.4
## 136 virginica 7.7 3.0 6.1 2.3
## 137 virginica 6.3 3.4 5.6 2.4
## 138 virginica 6.4 3.1 5.5 1.8
## 139 virginica 6.0 3.0 4.8 1.8
## 140 virginica 6.9 3.1 5.4 2.1
## 141 virginica 6.7 3.1 5.6 2.4
## 142 virginica 6.9 3.1 5.1 2.3
## 143 virginica 5.8 2.7 5.1 1.9
## 144 virginica 6.8 3.2 5.9 2.3
## 145 virginica 6.7 3.3 5.7 2.5
## 146 virginica 6.7 3.0 5.2 2.3
## 147 virginica 6.3 2.5 5.0 1.9
## 148 virginica 6.5 3.0 5.2 2.0
## 149 virginica 6.2 3.4 5.4 2.3
## 150 virginica 5.9 3.0 5.1 1.8
# using the direction of vector
%>% select(Species:Sepal.Length ) %>%
iris print()
## Species Petal.Width Petal.Length Sepal.Width Sepal.Length
## 1 setosa 0.2 1.4 3.5 5.1
## 2 setosa 0.2 1.4 3.0 4.9
## 3 setosa 0.2 1.3 3.2 4.7
## 4 setosa 0.2 1.5 3.1 4.6
## 5 setosa 0.2 1.4 3.6 5.0
## 6 setosa 0.4 1.7 3.9 5.4
## 7 setosa 0.3 1.4 3.4 4.6
## 8 setosa 0.2 1.5 3.4 5.0
## 9 setosa 0.2 1.4 2.9 4.4
## 10 setosa 0.1 1.5 3.1 4.9
## 11 setosa 0.2 1.5 3.7 5.4
## 12 setosa 0.2 1.6 3.4 4.8
## 13 setosa 0.1 1.4 3.0 4.8
## 14 setosa 0.1 1.1 3.0 4.3
## 15 setosa 0.2 1.2 4.0 5.8
## 16 setosa 0.4 1.5 4.4 5.7
## 17 setosa 0.4 1.3 3.9 5.4
## 18 setosa 0.3 1.4 3.5 5.1
## 19 setosa 0.3 1.7 3.8 5.7
## 20 setosa 0.3 1.5 3.8 5.1
## 21 setosa 0.2 1.7 3.4 5.4
## 22 setosa 0.4 1.5 3.7 5.1
## 23 setosa 0.2 1.0 3.6 4.6
## 24 setosa 0.5 1.7 3.3 5.1
## 25 setosa 0.2 1.9 3.4 4.8
## 26 setosa 0.2 1.6 3.0 5.0
## 27 setosa 0.4 1.6 3.4 5.0
## 28 setosa 0.2 1.5 3.5 5.2
## 29 setosa 0.2 1.4 3.4 5.2
## 30 setosa 0.2 1.6 3.2 4.7
## 31 setosa 0.2 1.6 3.1 4.8
## 32 setosa 0.4 1.5 3.4 5.4
## 33 setosa 0.1 1.5 4.1 5.2
## 34 setosa 0.2 1.4 4.2 5.5
## 35 setosa 0.2 1.5 3.1 4.9
## 36 setosa 0.2 1.2 3.2 5.0
## 37 setosa 0.2 1.3 3.5 5.5
## 38 setosa 0.1 1.4 3.6 4.9
## 39 setosa 0.2 1.3 3.0 4.4
## 40 setosa 0.2 1.5 3.4 5.1
## 41 setosa 0.3 1.3 3.5 5.0
## 42 setosa 0.3 1.3 2.3 4.5
## 43 setosa 0.2 1.3 3.2 4.4
## 44 setosa 0.6 1.6 3.5 5.0
## 45 setosa 0.4 1.9 3.8 5.1
## 46 setosa 0.3 1.4 3.0 4.8
## 47 setosa 0.2 1.6 3.8 5.1
## 48 setosa 0.2 1.4 3.2 4.6
## 49 setosa 0.2 1.5 3.7 5.3
## 50 setosa 0.2 1.4 3.3 5.0
## 51 versicolor 1.4 4.7 3.2 7.0
## 52 versicolor 1.5 4.5 3.2 6.4
## 53 versicolor 1.5 4.9 3.1 6.9
## 54 versicolor 1.3 4.0 2.3 5.5
## 55 versicolor 1.5 4.6 2.8 6.5
## 56 versicolor 1.3 4.5 2.8 5.7
## 57 versicolor 1.6 4.7 3.3 6.3
## 58 versicolor 1.0 3.3 2.4 4.9
## 59 versicolor 1.3 4.6 2.9 6.6
## 60 versicolor 1.4 3.9 2.7 5.2
## 61 versicolor 1.0 3.5 2.0 5.0
## 62 versicolor 1.5 4.2 3.0 5.9
## 63 versicolor 1.0 4.0 2.2 6.0
## 64 versicolor 1.4 4.7 2.9 6.1
## 65 versicolor 1.3 3.6 2.9 5.6
## 66 versicolor 1.4 4.4 3.1 6.7
## 67 versicolor 1.5 4.5 3.0 5.6
## 68 versicolor 1.0 4.1 2.7 5.8
## 69 versicolor 1.5 4.5 2.2 6.2
## 70 versicolor 1.1 3.9 2.5 5.6
## 71 versicolor 1.8 4.8 3.2 5.9
## 72 versicolor 1.3 4.0 2.8 6.1
## 73 versicolor 1.5 4.9 2.5 6.3
## 74 versicolor 1.2 4.7 2.8 6.1
## 75 versicolor 1.3 4.3 2.9 6.4
## 76 versicolor 1.4 4.4 3.0 6.6
## 77 versicolor 1.4 4.8 2.8 6.8
## 78 versicolor 1.7 5.0 3.0 6.7
## 79 versicolor 1.5 4.5 2.9 6.0
## 80 versicolor 1.0 3.5 2.6 5.7
## 81 versicolor 1.1 3.8 2.4 5.5
## 82 versicolor 1.0 3.7 2.4 5.5
## 83 versicolor 1.2 3.9 2.7 5.8
## 84 versicolor 1.6 5.1 2.7 6.0
## 85 versicolor 1.5 4.5 3.0 5.4
## 86 versicolor 1.6 4.5 3.4 6.0
## 87 versicolor 1.5 4.7 3.1 6.7
## 88 versicolor 1.3 4.4 2.3 6.3
## 89 versicolor 1.3 4.1 3.0 5.6
## 90 versicolor 1.3 4.0 2.5 5.5
## 91 versicolor 1.2 4.4 2.6 5.5
## 92 versicolor 1.4 4.6 3.0 6.1
## 93 versicolor 1.2 4.0 2.6 5.8
## 94 versicolor 1.0 3.3 2.3 5.0
## 95 versicolor 1.3 4.2 2.7 5.6
## 96 versicolor 1.2 4.2 3.0 5.7
## 97 versicolor 1.3 4.2 2.9 5.7
## 98 versicolor 1.3 4.3 2.9 6.2
## 99 versicolor 1.1 3.0 2.5 5.1
## 100 versicolor 1.3 4.1 2.8 5.7
## 101 virginica 2.5 6.0 3.3 6.3
## 102 virginica 1.9 5.1 2.7 5.8
## 103 virginica 2.1 5.9 3.0 7.1
## 104 virginica 1.8 5.6 2.9 6.3
## 105 virginica 2.2 5.8 3.0 6.5
## 106 virginica 2.1 6.6 3.0 7.6
## 107 virginica 1.7 4.5 2.5 4.9
## 108 virginica 1.8 6.3 2.9 7.3
## 109 virginica 1.8 5.8 2.5 6.7
## 110 virginica 2.5 6.1 3.6 7.2
## 111 virginica 2.0 5.1 3.2 6.5
## 112 virginica 1.9 5.3 2.7 6.4
## 113 virginica 2.1 5.5 3.0 6.8
## 114 virginica 2.0 5.0 2.5 5.7
## 115 virginica 2.4 5.1 2.8 5.8
## 116 virginica 2.3 5.3 3.2 6.4
## 117 virginica 1.8 5.5 3.0 6.5
## 118 virginica 2.2 6.7 3.8 7.7
## 119 virginica 2.3 6.9 2.6 7.7
## 120 virginica 1.5 5.0 2.2 6.0
## 121 virginica 2.3 5.7 3.2 6.9
## 122 virginica 2.0 4.9 2.8 5.6
## 123 virginica 2.0 6.7 2.8 7.7
## 124 virginica 1.8 4.9 2.7 6.3
## 125 virginica 2.1 5.7 3.3 6.7
## 126 virginica 1.8 6.0 3.2 7.2
## 127 virginica 1.8 4.8 2.8 6.2
## 128 virginica 1.8 4.9 3.0 6.1
## 129 virginica 2.1 5.6 2.8 6.4
## 130 virginica 1.6 5.8 3.0 7.2
## 131 virginica 1.9 6.1 2.8 7.4
## 132 virginica 2.0 6.4 3.8 7.9
## 133 virginica 2.2 5.6 2.8 6.4
## 134 virginica 1.5 5.1 2.8 6.3
## 135 virginica 1.4 5.6 2.6 6.1
## 136 virginica 2.3 6.1 3.0 7.7
## 137 virginica 2.4 5.6 3.4 6.3
## 138 virginica 1.8 5.5 3.1 6.4
## 139 virginica 1.8 4.8 3.0 6.0
## 140 virginica 2.1 5.4 3.1 6.9
## 141 virginica 2.4 5.6 3.1 6.7
## 142 virginica 2.3 5.1 3.1 6.9
## 143 virginica 1.9 5.1 2.7 5.8
## 144 virginica 2.3 5.9 3.2 6.8
## 145 virginica 2.5 5.7 3.3 6.7
## 146 virginica 2.3 5.2 3.0 6.7
## 147 virginica 1.9 5.0 2.5 6.3
## 148 virginica 2.0 5.2 3.0 6.5
## 149 virginica 2.3 5.4 3.4 6.2
## 150 virginica 1.8 5.1 3.0 5.9
1.1.4 Rename variables
rename(iris, Sepal_Width= Sepal.Width, Sepal_Length= Sepal.Length )
## Sepal_Length Sepal_Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
%>%
iris rename_with(tolower) %>% head()
## sepal.length sepal.width petal.length petal.width species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
1.1.5 Sorting in ascending or descending order
- put a minus in front of a variable for descending order
arrange(iris, Petal.Length, -Petal.Width)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 4.6 3.6 1.0 0.2 setosa
## 2 4.3 3.0 1.1 0.1 setosa
## 3 5.8 4.0 1.2 0.2 setosa
## 4 5.0 3.2 1.2 0.2 setosa
## 5 5.4 3.9 1.3 0.4 setosa
## 6 5.0 3.5 1.3 0.3 setosa
## 7 4.5 2.3 1.3 0.3 setosa
## 8 4.7 3.2 1.3 0.2 setosa
## 9 5.5 3.5 1.3 0.2 setosa
## 10 4.4 3.0 1.3 0.2 setosa
## 11 4.4 3.2 1.3 0.2 setosa
## 12 4.6 3.4 1.4 0.3 setosa
## 13 5.1 3.5 1.4 0.3 setosa
## 14 4.8 3.0 1.4 0.3 setosa
## 15 5.1 3.5 1.4 0.2 setosa
## 16 4.9 3.0 1.4 0.2 setosa
## 17 5.0 3.6 1.4 0.2 setosa
## 18 4.4 2.9 1.4 0.2 setosa
## 19 5.2 3.4 1.4 0.2 setosa
## 20 5.5 4.2 1.4 0.2 setosa
## 21 4.6 3.2 1.4 0.2 setosa
## 22 5.0 3.3 1.4 0.2 setosa
## 23 4.8 3.0 1.4 0.1 setosa
## 24 4.9 3.6 1.4 0.1 setosa
## 25 5.7 4.4 1.5 0.4 setosa
## 26 5.1 3.7 1.5 0.4 setosa
## 27 5.4 3.4 1.5 0.4 setosa
## 28 5.1 3.8 1.5 0.3 setosa
## 29 4.6 3.1 1.5 0.2 setosa
## 30 5.0 3.4 1.5 0.2 setosa
## 31 5.4 3.7 1.5 0.2 setosa
## 32 5.2 3.5 1.5 0.2 setosa
## 33 4.9 3.1 1.5 0.2 setosa
## 34 5.1 3.4 1.5 0.2 setosa
## 35 5.3 3.7 1.5 0.2 setosa
## 36 4.9 3.1 1.5 0.1 setosa
## 37 5.2 4.1 1.5 0.1 setosa
## 38 5.0 3.5 1.6 0.6 setosa
## 39 5.0 3.4 1.6 0.4 setosa
## 40 4.8 3.4 1.6 0.2 setosa
## 41 5.0 3.0 1.6 0.2 setosa
## 42 4.7 3.2 1.6 0.2 setosa
## 43 4.8 3.1 1.6 0.2 setosa
## 44 5.1 3.8 1.6 0.2 setosa
## 45 5.1 3.3 1.7 0.5 setosa
## 46 5.4 3.9 1.7 0.4 setosa
## 47 5.7 3.8 1.7 0.3 setosa
## 48 5.4 3.4 1.7 0.2 setosa
## 49 5.1 3.8 1.9 0.4 setosa
## 50 4.8 3.4 1.9 0.2 setosa
## 51 5.1 2.5 3.0 1.1 versicolor
## 52 4.9 2.4 3.3 1.0 versicolor
## 53 5.0 2.3 3.3 1.0 versicolor
## 54 5.0 2.0 3.5 1.0 versicolor
## 55 5.7 2.6 3.5 1.0 versicolor
## 56 5.6 2.9 3.6 1.3 versicolor
## 57 5.5 2.4 3.7 1.0 versicolor
## 58 5.5 2.4 3.8 1.1 versicolor
## 59 5.2 2.7 3.9 1.4 versicolor
## 60 5.8 2.7 3.9 1.2 versicolor
## 61 5.6 2.5 3.9 1.1 versicolor
## 62 5.5 2.3 4.0 1.3 versicolor
## 63 6.1 2.8 4.0 1.3 versicolor
## 64 5.5 2.5 4.0 1.3 versicolor
## 65 5.8 2.6 4.0 1.2 versicolor
## 66 6.0 2.2 4.0 1.0 versicolor
## 67 5.6 3.0 4.1 1.3 versicolor
## 68 5.7 2.8 4.1 1.3 versicolor
## 69 5.8 2.7 4.1 1.0 versicolor
## 70 5.9 3.0 4.2 1.5 versicolor
## 71 5.6 2.7 4.2 1.3 versicolor
## 72 5.7 2.9 4.2 1.3 versicolor
## 73 5.7 3.0 4.2 1.2 versicolor
## 74 6.4 2.9 4.3 1.3 versicolor
## 75 6.2 2.9 4.3 1.3 versicolor
## 76 6.7 3.1 4.4 1.4 versicolor
## 77 6.6 3.0 4.4 1.4 versicolor
## 78 6.3 2.3 4.4 1.3 versicolor
## 79 5.5 2.6 4.4 1.2 versicolor
## 80 4.9 2.5 4.5 1.7 virginica
## 81 6.0 3.4 4.5 1.6 versicolor
## 82 6.4 3.2 4.5 1.5 versicolor
## 83 5.6 3.0 4.5 1.5 versicolor
## 84 6.2 2.2 4.5 1.5 versicolor
## 85 6.0 2.9 4.5 1.5 versicolor
## 86 5.4 3.0 4.5 1.5 versicolor
## 87 5.7 2.8 4.5 1.3 versicolor
## 88 6.5 2.8 4.6 1.5 versicolor
## 89 6.1 3.0 4.6 1.4 versicolor
## 90 6.6 2.9 4.6 1.3 versicolor
## 91 6.3 3.3 4.7 1.6 versicolor
## 92 6.7 3.1 4.7 1.5 versicolor
## 93 7.0 3.2 4.7 1.4 versicolor
## 94 6.1 2.9 4.7 1.4 versicolor
## 95 6.1 2.8 4.7 1.2 versicolor
## 96 5.9 3.2 4.8 1.8 versicolor
## 97 6.2 2.8 4.8 1.8 virginica
## 98 6.0 3.0 4.8 1.8 virginica
## 99 6.8 2.8 4.8 1.4 versicolor
## 100 5.6 2.8 4.9 2.0 virginica
## 101 6.3 2.7 4.9 1.8 virginica
## 102 6.1 3.0 4.9 1.8 virginica
## 103 6.9 3.1 4.9 1.5 versicolor
## 104 6.3 2.5 4.9 1.5 versicolor
## 105 5.7 2.5 5.0 2.0 virginica
## 106 6.3 2.5 5.0 1.9 virginica
## 107 6.7 3.0 5.0 1.7 versicolor
## 108 6.0 2.2 5.0 1.5 virginica
## 109 5.8 2.8 5.1 2.4 virginica
## 110 6.9 3.1 5.1 2.3 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 5.8 2.7 5.1 1.9 virginica
## 113 5.8 2.7 5.1 1.9 virginica
## 114 5.9 3.0 5.1 1.8 virginica
## 115 6.0 2.7 5.1 1.6 versicolor
## 116 6.3 2.8 5.1 1.5 virginica
## 117 6.7 3.0 5.2 2.3 virginica
## 118 6.5 3.0 5.2 2.0 virginica
## 119 6.4 3.2 5.3 2.3 virginica
## 120 6.4 2.7 5.3 1.9 virginica
## 121 6.2 3.4 5.4 2.3 virginica
## 122 6.9 3.1 5.4 2.1 virginica
## 123 6.8 3.0 5.5 2.1 virginica
## 124 6.5 3.0 5.5 1.8 virginica
## 125 6.4 3.1 5.5 1.8 virginica
## 126 6.3 3.4 5.6 2.4 virginica
## 127 6.7 3.1 5.6 2.4 virginica
## 128 6.4 2.8 5.6 2.2 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 6.3 2.9 5.6 1.8 virginica
## 131 6.1 2.6 5.6 1.4 virginica
## 132 6.7 3.3 5.7 2.5 virginica
## 133 6.9 3.2 5.7 2.3 virginica
## 134 6.7 3.3 5.7 2.1 virginica
## 135 6.5 3.0 5.8 2.2 virginica
## 136 6.7 2.5 5.8 1.8 virginica
## 137 7.2 3.0 5.8 1.6 virginica
## 138 6.8 3.2 5.9 2.3 virginica
## 139 7.1 3.0 5.9 2.1 virginica
## 140 6.3 3.3 6.0 2.5 virginica
## 141 7.2 3.2 6.0 1.8 virginica
## 142 7.2 3.6 6.1 2.5 virginica
## 143 7.7 3.0 6.1 2.3 virginica
## 144 7.4 2.8 6.1 1.9 virginica
## 145 7.3 2.9 6.3 1.8 virginica
## 146 7.9 3.8 6.4 2.0 virginica
## 147 7.6 3.0 6.6 2.1 virginica
## 148 7.7 3.8 6.7 2.2 virginica
## 149 7.7 2.8 6.7 2.0 virginica
## 150 7.7 2.6 6.9 2.3 virginica
1.1.6 Transform variables
mutate(iris, newvar= Sepal.Width*10, Petal.Length=Petal.Length/100 )
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species newvar
## 1 5.1 3.5 0.014 0.2 setosa 35
## 2 4.9 3.0 0.014 0.2 setosa 30
## 3 4.7 3.2 0.013 0.2 setosa 32
## 4 4.6 3.1 0.015 0.2 setosa 31
## 5 5.0 3.6 0.014 0.2 setosa 36
## 6 5.4 3.9 0.017 0.4 setosa 39
## 7 4.6 3.4 0.014 0.3 setosa 34
## 8 5.0 3.4 0.015 0.2 setosa 34
## 9 4.4 2.9 0.014 0.2 setosa 29
## 10 4.9 3.1 0.015 0.1 setosa 31
## 11 5.4 3.7 0.015 0.2 setosa 37
## 12 4.8 3.4 0.016 0.2 setosa 34
## 13 4.8 3.0 0.014 0.1 setosa 30
## 14 4.3 3.0 0.011 0.1 setosa 30
## 15 5.8 4.0 0.012 0.2 setosa 40
## 16 5.7 4.4 0.015 0.4 setosa 44
## 17 5.4 3.9 0.013 0.4 setosa 39
## 18 5.1 3.5 0.014 0.3 setosa 35
## 19 5.7 3.8 0.017 0.3 setosa 38
## 20 5.1 3.8 0.015 0.3 setosa 38
## 21 5.4 3.4 0.017 0.2 setosa 34
## 22 5.1 3.7 0.015 0.4 setosa 37
## 23 4.6 3.6 0.010 0.2 setosa 36
## 24 5.1 3.3 0.017 0.5 setosa 33
## 25 4.8 3.4 0.019 0.2 setosa 34
## 26 5.0 3.0 0.016 0.2 setosa 30
## 27 5.0 3.4 0.016 0.4 setosa 34
## 28 5.2 3.5 0.015 0.2 setosa 35
## 29 5.2 3.4 0.014 0.2 setosa 34
## 30 4.7 3.2 0.016 0.2 setosa 32
## 31 4.8 3.1 0.016 0.2 setosa 31
## 32 5.4 3.4 0.015 0.4 setosa 34
## 33 5.2 4.1 0.015 0.1 setosa 41
## 34 5.5 4.2 0.014 0.2 setosa 42
## 35 4.9 3.1 0.015 0.2 setosa 31
## 36 5.0 3.2 0.012 0.2 setosa 32
## 37 5.5 3.5 0.013 0.2 setosa 35
## 38 4.9 3.6 0.014 0.1 setosa 36
## 39 4.4 3.0 0.013 0.2 setosa 30
## 40 5.1 3.4 0.015 0.2 setosa 34
## 41 5.0 3.5 0.013 0.3 setosa 35
## 42 4.5 2.3 0.013 0.3 setosa 23
## 43 4.4 3.2 0.013 0.2 setosa 32
## 44 5.0 3.5 0.016 0.6 setosa 35
## 45 5.1 3.8 0.019 0.4 setosa 38
## 46 4.8 3.0 0.014 0.3 setosa 30
## 47 5.1 3.8 0.016 0.2 setosa 38
## 48 4.6 3.2 0.014 0.2 setosa 32
## 49 5.3 3.7 0.015 0.2 setosa 37
## 50 5.0 3.3 0.014 0.2 setosa 33
## 51 7.0 3.2 0.047 1.4 versicolor 32
## 52 6.4 3.2 0.045 1.5 versicolor 32
## 53 6.9 3.1 0.049 1.5 versicolor 31
## 54 5.5 2.3 0.040 1.3 versicolor 23
## 55 6.5 2.8 0.046 1.5 versicolor 28
## 56 5.7 2.8 0.045 1.3 versicolor 28
## 57 6.3 3.3 0.047 1.6 versicolor 33
## 58 4.9 2.4 0.033 1.0 versicolor 24
## 59 6.6 2.9 0.046 1.3 versicolor 29
## 60 5.2 2.7 0.039 1.4 versicolor 27
## 61 5.0 2.0 0.035 1.0 versicolor 20
## 62 5.9 3.0 0.042 1.5 versicolor 30
## 63 6.0 2.2 0.040 1.0 versicolor 22
## 64 6.1 2.9 0.047 1.4 versicolor 29
## 65 5.6 2.9 0.036 1.3 versicolor 29
## 66 6.7 3.1 0.044 1.4 versicolor 31
## 67 5.6 3.0 0.045 1.5 versicolor 30
## 68 5.8 2.7 0.041 1.0 versicolor 27
## 69 6.2 2.2 0.045 1.5 versicolor 22
## 70 5.6 2.5 0.039 1.1 versicolor 25
## 71 5.9 3.2 0.048 1.8 versicolor 32
## 72 6.1 2.8 0.040 1.3 versicolor 28
## 73 6.3 2.5 0.049 1.5 versicolor 25
## 74 6.1 2.8 0.047 1.2 versicolor 28
## 75 6.4 2.9 0.043 1.3 versicolor 29
## 76 6.6 3.0 0.044 1.4 versicolor 30
## 77 6.8 2.8 0.048 1.4 versicolor 28
## 78 6.7 3.0 0.050 1.7 versicolor 30
## 79 6.0 2.9 0.045 1.5 versicolor 29
## 80 5.7 2.6 0.035 1.0 versicolor 26
## 81 5.5 2.4 0.038 1.1 versicolor 24
## 82 5.5 2.4 0.037 1.0 versicolor 24
## 83 5.8 2.7 0.039 1.2 versicolor 27
## 84 6.0 2.7 0.051 1.6 versicolor 27
## 85 5.4 3.0 0.045 1.5 versicolor 30
## 86 6.0 3.4 0.045 1.6 versicolor 34
## 87 6.7 3.1 0.047 1.5 versicolor 31
## 88 6.3 2.3 0.044 1.3 versicolor 23
## 89 5.6 3.0 0.041 1.3 versicolor 30
## 90 5.5 2.5 0.040 1.3 versicolor 25
## 91 5.5 2.6 0.044 1.2 versicolor 26
## 92 6.1 3.0 0.046 1.4 versicolor 30
## 93 5.8 2.6 0.040 1.2 versicolor 26
## 94 5.0 2.3 0.033 1.0 versicolor 23
## 95 5.6 2.7 0.042 1.3 versicolor 27
## 96 5.7 3.0 0.042 1.2 versicolor 30
## 97 5.7 2.9 0.042 1.3 versicolor 29
## 98 6.2 2.9 0.043 1.3 versicolor 29
## 99 5.1 2.5 0.030 1.1 versicolor 25
## 100 5.7 2.8 0.041 1.3 versicolor 28
## 101 6.3 3.3 0.060 2.5 virginica 33
## 102 5.8 2.7 0.051 1.9 virginica 27
## 103 7.1 3.0 0.059 2.1 virginica 30
## 104 6.3 2.9 0.056 1.8 virginica 29
## 105 6.5 3.0 0.058 2.2 virginica 30
## 106 7.6 3.0 0.066 2.1 virginica 30
## 107 4.9 2.5 0.045 1.7 virginica 25
## 108 7.3 2.9 0.063 1.8 virginica 29
## 109 6.7 2.5 0.058 1.8 virginica 25
## 110 7.2 3.6 0.061 2.5 virginica 36
## 111 6.5 3.2 0.051 2.0 virginica 32
## 112 6.4 2.7 0.053 1.9 virginica 27
## 113 6.8 3.0 0.055 2.1 virginica 30
## 114 5.7 2.5 0.050 2.0 virginica 25
## 115 5.8 2.8 0.051 2.4 virginica 28
## 116 6.4 3.2 0.053 2.3 virginica 32
## 117 6.5 3.0 0.055 1.8 virginica 30
## 118 7.7 3.8 0.067 2.2 virginica 38
## 119 7.7 2.6 0.069 2.3 virginica 26
## 120 6.0 2.2 0.050 1.5 virginica 22
## 121 6.9 3.2 0.057 2.3 virginica 32
## 122 5.6 2.8 0.049 2.0 virginica 28
## 123 7.7 2.8 0.067 2.0 virginica 28
## 124 6.3 2.7 0.049 1.8 virginica 27
## 125 6.7 3.3 0.057 2.1 virginica 33
## 126 7.2 3.2 0.060 1.8 virginica 32
## 127 6.2 2.8 0.048 1.8 virginica 28
## 128 6.1 3.0 0.049 1.8 virginica 30
## 129 6.4 2.8 0.056 2.1 virginica 28
## 130 7.2 3.0 0.058 1.6 virginica 30
## 131 7.4 2.8 0.061 1.9 virginica 28
## 132 7.9 3.8 0.064 2.0 virginica 38
## 133 6.4 2.8 0.056 2.2 virginica 28
## 134 6.3 2.8 0.051 1.5 virginica 28
## 135 6.1 2.6 0.056 1.4 virginica 26
## 136 7.7 3.0 0.061 2.3 virginica 30
## 137 6.3 3.4 0.056 2.4 virginica 34
## 138 6.4 3.1 0.055 1.8 virginica 31
## 139 6.0 3.0 0.048 1.8 virginica 30
## 140 6.9 3.1 0.054 2.1 virginica 31
## 141 6.7 3.1 0.056 2.4 virginica 31
## 142 6.9 3.1 0.051 2.3 virginica 31
## 143 5.8 2.7 0.051 1.9 virginica 27
## 144 6.8 3.2 0.059 2.3 virginica 32
## 145 6.7 3.3 0.057 2.5 virginica 33
## 146 6.7 3.0 0.052 2.3 virginica 30
## 147 6.3 2.5 0.050 1.9 virginica 25
## 148 6.5 3.0 0.052 2.0 virginica 30
## 149 6.2 3.4 0.054 2.3 virginica 34
## 150 5.9 3.0 0.051 1.8 virginica 30
# use mutate and ifelse to categorize the varible
rep, sample
set.seed(123)
<- tibble(
drug_trial # each id has three
id = rep(1:20, each = 3),
# all has 20
year = rep(0:2, times = 20),
# sample 20 and each repeat 3
age = sample(35:75, 20, TRUE) %>% rep(each = 3),
#
drug = sample(c("Placebo", "Active"), 20, TRUE) %>%
rep(each = 3),
#
se_headache = if_else(
== "Placebo",
drug # the possibility that outcome is 1
sample(0:1, 60, TRUE, c(.95,.05)),
sample(0:1, 60, TRUE, c(.10, .90))
)#
)head(drug_trial)
## # A tibble: 6 × 5
## id year age drug se_headache
## <int> <int> <int> <chr> <int>
## 1 1 0 65 Active 0
## 2 1 1 65 Active 1
## 3 1 2 65 Active 1
## 4 2 0 49 Active 1
## 5 2 1 49 Active 0
## 6 2 2 49 Active 1
#
%>%
drug_trial mutate(complete = c(1))
## # A tibble: 60 × 6
## id year age drug se_headache complete
## <int> <int> <int> <chr> <int> <dbl>
## 1 1 0 65 Active 0 1
## 2 1 1 65 Active 1 1
## 3 1 2 65 Active 1 1
## 4 2 0 49 Active 1 1
## 5 2 1 49 Active 0 1
## 6 2 2 49 Active 1 1
## 7 3 0 48 Placebo 0 1
## 8 3 1 48 Placebo 0 1
## 9 3 2 48 Placebo 0 1
## 10 4 0 37 Placebo 0 1
## # ℹ 50 more rows
factor is just for factor (labeling the value; while
renamelabeling the variable) and recode is for different types whose outcome depends on the right side of the equation sign. factor can change the reference group.
.
=drug_trial %>%
drug_trialmutate(mi_f = factor(se_headache , c(0, 1), c("No", "Yes")))%>%
mutate(mi_f_reverse = factor(se_headache , c(1, 0), c("yes", "no")))%>%
# change the reference group by using factor
mutate(mi = recode(se_headache , "0"=2, "1"=1))
str(drug_trial)
## tibble [60 × 8] (S3: tbl_df/tbl/data.frame)
## $ id : int [1:60] 1 1 1 2 2 2 3 3 3 4 ...
## $ year : int [1:60] 0 1 2 0 1 2 0 1 2 0 ...
## $ age : int [1:60] 65 65 65 49 49 49 48 48 48 37 ...
## $ drug : chr [1:60] "Active" "Active" "Active" "Active" ...
## $ se_headache : int [1:60] 0 1 1 1 0 1 0 0 0 0 ...
## $ mi_f : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 2 1 1 1 1 ...
## $ mi_f_reverse: Factor w/ 2 levels "yes","no": 2 1 1 1 2 1 2 2 2 2 ...
## $ mi : num [1:60] 2 1 1 1 2 1 2 2 2 2 ...
table (drug_trial$mi_f)
##
## No Yes
## 40 20
table (drug_trial$mi_f_reverse)
##
## yes no
## 20 40
%>%
drug_trial rowwise() %>%
mutate(any_se_year = sum(se_headache, year) > 0)
## # A tibble: 60 × 9
## # Rowwise:
## id year age drug se_headache mi_f mi_f_reverse mi any_se_year
## <int> <int> <int> <chr> <int> <fct> <fct> <dbl> <lgl>
## 1 1 0 65 Active 0 No no 2 FALSE
## 2 1 1 65 Active 1 Yes yes 1 TRUE
## 3 1 2 65 Active 1 Yes yes 1 TRUE
## 4 2 0 49 Active 1 Yes yes 1 TRUE
## 5 2 1 49 Active 0 No no 2 TRUE
## 6 2 2 49 Active 1 Yes yes 1 TRUE
## 7 3 0 48 Placebo 0 No no 2 FALSE
## 8 3 1 48 Placebo 0 No no 2 TRUE
## 9 3 2 48 Placebo 0 No no 2 TRUE
## 10 4 0 37 Placebo 0 No no 2 FALSE
## # ℹ 50 more rows
what is the difference of rowwise and group_by
%>% group_by(id) %>%
drug_trial mutate(any_se_year = sum(se_headache, year) )
## # A tibble: 60 × 9
## # Groups: id [20]
## id year age drug se_headache mi_f mi_f_reverse mi any_se_year
## <int> <int> <int> <chr> <int> <fct> <fct> <dbl> <int>
## 1 1 0 65 Active 0 No no 2 5
## 2 1 1 65 Active 1 Yes yes 1 5
## 3 1 2 65 Active 1 Yes yes 1 5
## 4 2 0 49 Active 1 Yes yes 1 5
## 5 2 1 49 Active 0 No no 2 5
## 6 2 2 49 Active 1 Yes yes 1 5
## 7 3 0 48 Placebo 0 No no 2 3
## 8 3 1 48 Placebo 0 No no 2 3
## 9 3 2 48 Placebo 0 No no 2 3
## 10 4 0 37 Placebo 0 No no 2 3
## # ℹ 50 more rows
%>%
drug_trial rowwise() %>%
mutate(any_se_year = sum(se_headache, year) )
## # A tibble: 60 × 9
## # Rowwise:
## id year age drug se_headache mi_f mi_f_reverse mi any_se_year
## <int> <int> <int> <chr> <int> <fct> <fct> <dbl> <int>
## 1 1 0 65 Active 0 No no 2 0
## 2 1 1 65 Active 1 Yes yes 1 2
## 3 1 2 65 Active 1 Yes yes 1 3
## 4 2 0 49 Active 1 Yes yes 1 1
## 5 2 1 49 Active 0 No no 2 1
## 6 2 2 49 Active 1 Yes yes 1 3
## 7 3 0 48 Placebo 0 No no 2 0
## 8 3 1 48 Placebo 0 No no 2 1
## 9 3 2 48 Placebo 0 No no 2 2
## 10 4 0 37 Placebo 0 No no 2 0
## # ℹ 50 more rows
starts_with and end_with
<- drug_trial %>%
drug_trial_sub select(id, year, starts_with("se")) %>%
print()
## # A tibble: 60 × 3
## id year se_headache
## <int> <int> <int>
## 1 1 0 0
## 2 1 1 1
## 3 1 2 1
## 4 2 0 1
## 5 2 1 0
## 6 2 2 1
## 7 3 0 0
## 8 3 1 0
## 9 3 2 0
## 10 4 0 0
## # ℹ 50 more rows
keep all combination levels even 0 times
%>%
drug_trial filter(year == 0) %>%
filter(age < 65) %>%
group_by(drug, se_headache, .drop = FALSE) %>%
summarise(n = n())
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
## # A tibble: 3 × 3
## # Groups: drug [2]
## drug se_headache n
## <chr> <int> <int>
## 1 Active 1 5
## 2 Placebo 0 11
## 3 Placebo 1 1
group_by
%>%
drug_trial filter(!is.na(age)) %>%
group_by(drug) %>%
summarise(mean_age = mean(age))
## # A tibble: 2 × 2
## drug mean_age
## <chr> <dbl>
## 1 Active 54.7
## 2 Placebo 53.2
1.1.7 Working with pipes %>%
%>%
iris filter(Species=="setosa") %>%
mutate (newvar=Sepal.Width*10) %>%
select (-Sepal.Width, -Petal.Width) %>%
arrange(-Sepal.Length, newvar)
## Sepal.Length Petal.Length Species newvar
## 1 5.8 1.2 setosa 40
## 2 5.7 1.7 setosa 38
## 3 5.7 1.5 setosa 44
## 4 5.5 1.3 setosa 35
## 5 5.5 1.4 setosa 42
## 6 5.4 1.7 setosa 34
## 7 5.4 1.5 setosa 34
## 8 5.4 1.5 setosa 37
## 9 5.4 1.7 setosa 39
## 10 5.4 1.3 setosa 39
## 11 5.3 1.5 setosa 37
## 12 5.2 1.4 setosa 34
## 13 5.2 1.5 setosa 35
## 14 5.2 1.5 setosa 41
## 15 5.1 1.7 setosa 33
## 16 5.1 1.5 setosa 34
## 17 5.1 1.4 setosa 35
## 18 5.1 1.4 setosa 35
## 19 5.1 1.5 setosa 37
## 20 5.1 1.5 setosa 38
## 21 5.1 1.9 setosa 38
## 22 5.1 1.6 setosa 38
## 23 5.0 1.6 setosa 30
## 24 5.0 1.2 setosa 32
## 25 5.0 1.4 setosa 33
## 26 5.0 1.5 setosa 34
## 27 5.0 1.6 setosa 34
## 28 5.0 1.3 setosa 35
## 29 5.0 1.6 setosa 35
## 30 5.0 1.4 setosa 36
## 31 4.9 1.4 setosa 30
## 32 4.9 1.5 setosa 31
## 33 4.9 1.5 setosa 31
## 34 4.9 1.4 setosa 36
## 35 4.8 1.4 setosa 30
## 36 4.8 1.4 setosa 30
## 37 4.8 1.6 setosa 31
## 38 4.8 1.6 setosa 34
## 39 4.8 1.9 setosa 34
## 40 4.7 1.3 setosa 32
## 41 4.7 1.6 setosa 32
## 42 4.6 1.5 setosa 31
## 43 4.6 1.4 setosa 32
## 44 4.6 1.4 setosa 34
## 45 4.6 1.0 setosa 36
## 46 4.5 1.3 setosa 23
## 47 4.4 1.4 setosa 29
## 48 4.4 1.3 setosa 30
## 49 4.4 1.3 setosa 32
## 50 4.3 1.1 setosa 30
1.1.8 Pivot wider (long to wide)
- If no unique identifier row in each group doesn’t work
%>%
iris pivot_wider( names_from=Species, values_from= c(Sepal.Length))
## Warning: Values from `Sepal.Length` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} %>%
## dplyr::group_by(Sepal.Width, Petal.Length, Petal.Width, Species) %>%
## dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
## dplyr::filter(n > 1L)
## # A tibble: 143 × 6
## Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
## <dbl> <dbl> <dbl> <list> <list> <list>
## 1 3.5 1.4 0.2 <dbl [1]> <NULL> <NULL>
## 2 3 1.4 0.2 <dbl [1]> <NULL> <NULL>
## 3 3.2 1.3 0.2 <dbl [2]> <NULL> <NULL>
## 4 3.1 1.5 0.2 <dbl [2]> <NULL> <NULL>
## 5 3.6 1.4 0.2 <dbl [1]> <NULL> <NULL>
## 6 3.9 1.7 0.4 <dbl [1]> <NULL> <NULL>
## 7 3.4 1.4 0.3 <dbl [1]> <NULL> <NULL>
## 8 3.4 1.5 0.2 <dbl [2]> <NULL> <NULL>
## 9 2.9 1.4 0.2 <dbl [1]> <NULL> <NULL>
## 10 3.1 1.5 0.1 <dbl [1]> <NULL> <NULL>
## # ℹ 133 more rows
- Create a unique identifier row for each name and then use pivot_wider
<- iris %>%
widedata # create groups then assign unique identifier row number in each group
group_by(Species) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from=Species, values_from= c(Petal.Length,Sepal.Length,Petal.Width,Sepal.Width))
widedata
## # A tibble: 50 × 13
## row Petal.Length_setosa Petal.Length_versicolor Petal.Length_virginica
## <int> <dbl> <dbl> <dbl>
## 1 1 1.4 4.7 6
## 2 2 1.4 4.5 5.1
## 3 3 1.3 4.9 5.9
## 4 4 1.5 4 5.6
## 5 5 1.4 4.6 5.8
## 6 6 1.7 4.5 6.6
## 7 7 1.4 4.7 4.5
## 8 8 1.5 3.3 6.3
## 9 9 1.4 4.6 5.8
## 10 10 1.5 3.9 6.1
## # ℹ 40 more rows
## # ℹ 9 more variables: Sepal.Length_setosa <dbl>, Sepal.Length_versicolor <dbl>,
## # Sepal.Length_virginica <dbl>, Petal.Width_setosa <dbl>,
## # Petal.Width_versicolor <dbl>, Petal.Width_virginica <dbl>,
## # Sepal.Width_setosa <dbl>, Sepal.Width_versicolor <dbl>,
## # Sepal.Width_virginica <dbl>
%>%
iris group_by(Species) %>%
mutate(row = row_number()) %>%
pivot_wider( names_from=Species, values_from= c(Petal.Length, Petal.Width))
## # A tibble: 150 × 9
## Sepal.Length Sepal.Width row Petal.Length_setosa Petal.Length_versicolor
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 5.1 3.5 1 1.4 NA
## 2 4.9 3 2 1.4 NA
## 3 4.7 3.2 3 1.3 NA
## 4 4.6 3.1 4 1.5 NA
## 5 5 3.6 5 1.4 NA
## 6 5.4 3.9 6 1.7 NA
## 7 4.6 3.4 7 1.4 NA
## 8 5 3.4 8 1.5 NA
## 9 4.4 2.9 9 1.4 NA
## 10 4.9 3.1 10 1.5 NA
## # ℹ 140 more rows
## # ℹ 4 more variables: Petal.Length_virginica <dbl>, Petal.Width_setosa <dbl>,
## # Petal.Width_versicolor <dbl>, Petal.Width_virginica <dbl>
1.1.9 Pivot longer (wide to long)
why long format? there are many analytic techniques that require our data to be in this format, like plotting.
= pivot_longer(widedata, -
longdata c( "row" , "Petal.Length_setosa" , "Petal.Length_versicolor",
"Petal.Length_virginica", "Sepal.Length_setosa" , "Sepal.Length_versicolor",
"Sepal.Length_virginica" , "Petal.Width_setosa" , "Petal.Width_versicolor" ,
"Petal.Width_virginica" ) , names_to="Sepal.Width", values_to="Sepal.Width.value")
longdata
## # A tibble: 150 × 12
## row Petal.Length_setosa Petal.Length_versicolor Petal.Length_virginica
## <int> <dbl> <dbl> <dbl>
## 1 1 1.4 4.7 6
## 2 1 1.4 4.7 6
## 3 1 1.4 4.7 6
## 4 2 1.4 4.5 5.1
## 5 2 1.4 4.5 5.1
## 6 2 1.4 4.5 5.1
## 7 3 1.3 4.9 5.9
## 8 3 1.3 4.9 5.9
## 9 3 1.3 4.9 5.9
## 10 4 1.5 4 5.6
## # ℹ 140 more rows
## # ℹ 8 more variables: Sepal.Length_setosa <dbl>, Sepal.Length_versicolor <dbl>,
## # Sepal.Length_virginica <dbl>, Petal.Width_setosa <dbl>,
## # Petal.Width_versicolor <dbl>, Petal.Width_virginica <dbl>,
## # Sepal.Width <chr>, Sepal.Width.value <dbl>
<- tibble(
babies id = 1001:1008,
sex = c("F", "F", "M", "F", "M", "M", "M", "F"),
weight_3 = c(9, 11, 17, 16, 11, 17, 16, 15),
weight_6 = c(13, 16, 20, 18, 15, 21, 17, 16),
weight_9 = c(16, 17, 23, 21, 16, 25, 19, 18),
weight_12 = c(17, 20, 24, 22, 18, 26, 21, 19)
%>%
) print()
## # A tibble: 8 × 6
## id sex weight_3 weight_6 weight_9 weight_12
## <int> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1001 F 9 13 16 17
## 2 1002 F 11 16 17 20
## 3 1003 M 17 20 23 24
## 4 1004 F 16 18 21 22
## 5 1005 M 11 15 16 18
## 6 1006 M 17 21 25 26
## 7 1007 M 16 17 19 21
## 8 1008 F 15 16 18 19
select which cols
%>%
babies pivot_longer(
cols = starts_with("weight"), #which cols
names_to = "months",
names_prefix = "weight_", #remove prefix
values_to = "weight"
)
## # A tibble: 32 × 4
## id sex months weight
## <int> <chr> <chr> <dbl>
## 1 1001 F 3 9
## 2 1001 F 6 13
## 3 1001 F 9 16
## 4 1001 F 12 17
## 5 1002 F 3 11
## 6 1002 F 6 16
## 7 1002 F 9 17
## 8 1002 F 12 20
## 9 1003 M 3 17
## 10 1003 M 6 20
## # ℹ 22 more rows
using names_prefix argument
“it is a regular expression used to remove matching text from the start of each variable name.”
%>%
babies pivot_longer(
cols = starts_with("weight"),
names_to = "months",
names_prefix = "\\w+_"
)
## # A tibble: 32 × 4
## id sex months value
## <int> <chr> <chr> <dbl>
## 1 1001 F 3 9
## 2 1001 F 6 13
## 3 1001 F 9 16
## 4 1001 F 12 17
## 5 1002 F 3 11
## 6 1002 F 6 16
## 7 1002 F 9 17
## 8 1002 F 12 20
## 9 1003 M 3 17
## 10 1003 M 6 20
## # ℹ 22 more rows
using default argument
%>%
babies pivot_longer(
cols = starts_with("weight")
)
## # A tibble: 32 × 4
## id sex name value
## <int> <chr> <chr> <dbl>
## 1 1001 F weight_3 9
## 2 1001 F weight_6 13
## 3 1001 F weight_9 16
## 4 1001 F weight_12 17
## 5 1002 F weight_3 11
## 6 1002 F weight_6 16
## 7 1002 F weight_9 17
## 8 1002 F weight_12 20
## 9 1003 M weight_3 17
## 10 1003 M weight_6 20
## # ℹ 22 more rows
set.seed(123)
<- tibble(
babies id = 1001:1008,
sex = c("F", "F", "M", "F", "M", "M", "M", "F"),
weight_3 = c(9, 11, 17, 16, 11, 17, 16, 15),
weight_6 = c(13, 16, 20, 18, 15, 21, 17, 16),
weight_9 = c(16, 17, 23, 21, 16, 25, 19, 18),
weight_12 = c(17, 20, 24, 22, 18, 26, 21, 19),
length_3 = c(17, 19, 23, 20, 18, 22, 21, 18),
length_6 = round(length_3 + rnorm(8, 2, 1)),
length_9 = round(length_6 + rnorm(8, 2, 1)),
length_12 = round(length_9 + rnorm(8, 2, 1)),
%>%
) print()
## # A tibble: 8 × 10
## id sex weight_3 weight_6 weight_9 weight_12 length_3 length_6 length_9
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1001 F 9 13 16 17 17 18 19
## 2 1002 F 11 16 17 20 19 21 23
## 3 1003 M 17 20 23 24 23 27 30
## 4 1004 F 16 18 21 22 20 22 24
## 5 1005 M 11 15 16 18 18 20 22
## 6 1006 M 17 21 25 26 22 26 28
## 7 1007 M 16 17 19 21 21 23 24
## 8 1008 F 15 16 18 19 18 19 23
## # ℹ 1 more variable: length_12 <dbl>
- pivoting multiple sets of cols
%>%
babies pivot_longer(
cols = c(-id, -sex),
names_to = c(".value", "months"),
names_sep = "_"
)
## # A tibble: 32 × 5
## id sex months weight length
## <int> <chr> <chr> <dbl> <dbl>
## 1 1001 F 3 9 17
## 2 1001 F 6 13 18
## 3 1001 F 9 16 19
## 4 1001 F 12 17 21
## 5 1002 F 3 11 19
## 6 1002 F 6 16 21
## 7 1002 F 9 17 23
## 8 1002 F 12 20 23
## 9 1003 M 3 17 23
## 10 1003 M 6 20 27
## # ℹ 22 more rows
%>%
babies pivot_longer(
cols = c(-id, -sex),
names_to = "months",
values_to = "weight"
%>%
) print()
## # A tibble: 64 × 4
## id sex months weight
## <int> <chr> <chr> <dbl>
## 1 1001 F weight_3 9
## 2 1001 F weight_6 13
## 3 1001 F weight_9 16
## 4 1001 F weight_12 17
## 5 1001 F length_3 17
## 6 1001 F length_6 18
## 7 1001 F length_9 19
## 8 1001 F length_12 21
## 9 1002 F weight_3 11
## 10 1002 F weight_6 16
## # ℹ 54 more rows
using names_sep and .value argument
%>%
babies pivot_longer(
cols = c(-id, -sex),
names_to = c("measure", "months"),
names_sep = "_"
)
## # A tibble: 64 × 5
## id sex measure months value
## <int> <chr> <chr> <chr> <dbl>
## 1 1001 F weight 3 9
## 2 1001 F weight 6 13
## 3 1001 F weight 9 16
## 4 1001 F weight 12 17
## 5 1001 F length 3 17
## 6 1001 F length 6 18
## 7 1001 F length 9 19
## 8 1001 F length 12 21
## 9 1002 F weight 3 11
## 10 1002 F weight 6 16
## # ℹ 54 more rows
.value tells pivot_longer() to create a new column for each unique character string that is in front of the underscore
%>%
babies pivot_longer(
cols = c(-id, -sex),
names_to = c( ".value","months" ),
names_sep = "_"
)
## # A tibble: 32 × 5
## id sex months weight length
## <int> <chr> <chr> <dbl> <dbl>
## 1 1001 F 3 9 17
## 2 1001 F 6 13 18
## 3 1001 F 9 16 19
## 4 1001 F 12 17 21
## 5 1002 F 3 11 19
## 6 1002 F 6 16 21
## 7 1002 F 9 17 23
## 8 1002 F 12 20 23
## 9 1003 M 3 17 23
## 10 1003 M 6 20 27
## # ℹ 22 more rows
%>%
babies pivot_longer(
cols = c(-id, -sex),
names_to = c( "months", ".value"),
names_sep = "_"
)
## # A tibble: 16 × 7
## id sex months `3` `6` `9` `12`
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1001 F weight 9 13 16 17
## 2 1001 F length 17 18 19 21
## 3 1002 F weight 11 16 17 20
## 4 1002 F length 19 21 23 23
## 5 1003 M weight 17 20 23 24
## 6 1003 M length 23 27 30 33
## 7 1004 F weight 16 18 21 22
## 8 1004 F length 20 22 24 26
## 9 1005 M weight 11 15 16 18
## 10 1005 M length 18 20 22 23
## 11 1006 M weight 17 21 25 26
## 12 1006 M length 22 26 28 30
## 13 1007 M weight 16 17 19 21
## 14 1007 M length 21 23 24 25
## 15 1008 F weight 15 16 18 19
## 16 1008 F length 18 19 23 24
- Pivot wider again (long to wide)
pivoting to multiple sets of cols
%>%
babies pivot_longer(
cols = c(-id, -sex),
names_to = c( ".value","months" ),
names_sep = "_"
%>%
) pivot_wider( names_from=months, values_from= c(weight, length ))
## # A tibble: 8 × 10
## id sex weight_3 weight_6 weight_9 weight_12 length_3 length_6 length_9
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1001 F 9 13 16 17 17 18 19
## 2 1002 F 11 16 17 20 19 21 23
## 3 1003 M 17 20 23 24 23 27 30
## 4 1004 F 16 18 21 22 20 22 24
## 5 1005 M 11 15 16 18 18 20 22
## 6 1006 M 17 21 25 26 22 26 28
## 7 1007 M 16 17 19 21 21 23 24
## 8 1008 F 15 16 18 19 18 19 23
## # ℹ 1 more variable: length_12 <dbl>
pivot_wider(longdata, names_from=Sepal.Width, values_from= c(Sepal.Width.value))
## # A tibble: 50 × 13
## row Petal.Length_setosa Petal.Length_versicolor Petal.Length_virginica
## <int> <dbl> <dbl> <dbl>
## 1 1 1.4 4.7 6
## 2 2 1.4 4.5 5.1
## 3 3 1.3 4.9 5.9
## 4 4 1.5 4 5.6
## 5 5 1.4 4.6 5.8
## 6 6 1.7 4.5 6.6
## 7 7 1.4 4.7 4.5
## 8 8 1.5 3.3 6.3
## 9 9 1.4 4.6 5.8
## 10 10 1.5 3.9 6.1
## # ℹ 40 more rows
## # ℹ 9 more variables: Sepal.Length_setosa <dbl>, Sepal.Length_versicolor <dbl>,
## # Sepal.Length_virginica <dbl>, Petal.Width_setosa <dbl>,
## # Petal.Width_versicolor <dbl>, Petal.Width_virginica <dbl>,
## # Sepal.Width_setosa <dbl>, Sepal.Width_versicolor <dbl>,
## # Sepal.Width_virginica <dbl>
1.1.10 Separate columns
separate(iris, Species, into = c("integer","decimal","third"), sep="o")
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 100 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Sepal.Length Sepal.Width Petal.Length Petal.Width integer decimal third
## 1 5.1 3.5 1.4 0.2 set sa <NA>
## 2 4.9 3.0 1.4 0.2 set sa <NA>
## 3 4.7 3.2 1.3 0.2 set sa <NA>
## 4 4.6 3.1 1.5 0.2 set sa <NA>
## 5 5.0 3.6 1.4 0.2 set sa <NA>
## 6 5.4 3.9 1.7 0.4 set sa <NA>
## 7 4.6 3.4 1.4 0.3 set sa <NA>
## 8 5.0 3.4 1.5 0.2 set sa <NA>
## 9 4.4 2.9 1.4 0.2 set sa <NA>
## 10 4.9 3.1 1.5 0.1 set sa <NA>
## 11 5.4 3.7 1.5 0.2 set sa <NA>
## 12 4.8 3.4 1.6 0.2 set sa <NA>
## 13 4.8 3.0 1.4 0.1 set sa <NA>
## 14 4.3 3.0 1.1 0.1 set sa <NA>
## 15 5.8 4.0 1.2 0.2 set sa <NA>
## 16 5.7 4.4 1.5 0.4 set sa <NA>
## 17 5.4 3.9 1.3 0.4 set sa <NA>
## 18 5.1 3.5 1.4 0.3 set sa <NA>
## 19 5.7 3.8 1.7 0.3 set sa <NA>
## 20 5.1 3.8 1.5 0.3 set sa <NA>
## 21 5.4 3.4 1.7 0.2 set sa <NA>
## 22 5.1 3.7 1.5 0.4 set sa <NA>
## 23 4.6 3.6 1.0 0.2 set sa <NA>
## 24 5.1 3.3 1.7 0.5 set sa <NA>
## 25 4.8 3.4 1.9 0.2 set sa <NA>
## 26 5.0 3.0 1.6 0.2 set sa <NA>
## 27 5.0 3.4 1.6 0.4 set sa <NA>
## 28 5.2 3.5 1.5 0.2 set sa <NA>
## 29 5.2 3.4 1.4 0.2 set sa <NA>
## 30 4.7 3.2 1.6 0.2 set sa <NA>
## 31 4.8 3.1 1.6 0.2 set sa <NA>
## 32 5.4 3.4 1.5 0.4 set sa <NA>
## 33 5.2 4.1 1.5 0.1 set sa <NA>
## 34 5.5 4.2 1.4 0.2 set sa <NA>
## 35 4.9 3.1 1.5 0.2 set sa <NA>
## 36 5.0 3.2 1.2 0.2 set sa <NA>
## 37 5.5 3.5 1.3 0.2 set sa <NA>
## 38 4.9 3.6 1.4 0.1 set sa <NA>
## 39 4.4 3.0 1.3 0.2 set sa <NA>
## 40 5.1 3.4 1.5 0.2 set sa <NA>
## 41 5.0 3.5 1.3 0.3 set sa <NA>
## 42 4.5 2.3 1.3 0.3 set sa <NA>
## 43 4.4 3.2 1.3 0.2 set sa <NA>
## 44 5.0 3.5 1.6 0.6 set sa <NA>
## 45 5.1 3.8 1.9 0.4 set sa <NA>
## 46 4.8 3.0 1.4 0.3 set sa <NA>
## 47 5.1 3.8 1.6 0.2 set sa <NA>
## 48 4.6 3.2 1.4 0.2 set sa <NA>
## 49 5.3 3.7 1.5 0.2 set sa <NA>
## 50 5.0 3.3 1.4 0.2 set sa <NA>
## 51 7.0 3.2 4.7 1.4 versic l r
## 52 6.4 3.2 4.5 1.5 versic l r
## 53 6.9 3.1 4.9 1.5 versic l r
## 54 5.5 2.3 4.0 1.3 versic l r
## 55 6.5 2.8 4.6 1.5 versic l r
## 56 5.7 2.8 4.5 1.3 versic l r
## 57 6.3 3.3 4.7 1.6 versic l r
## 58 4.9 2.4 3.3 1.0 versic l r
## 59 6.6 2.9 4.6 1.3 versic l r
## 60 5.2 2.7 3.9 1.4 versic l r
## 61 5.0 2.0 3.5 1.0 versic l r
## 62 5.9 3.0 4.2 1.5 versic l r
## 63 6.0 2.2 4.0 1.0 versic l r
## 64 6.1 2.9 4.7 1.4 versic l r
## 65 5.6 2.9 3.6 1.3 versic l r
## 66 6.7 3.1 4.4 1.4 versic l r
## 67 5.6 3.0 4.5 1.5 versic l r
## 68 5.8 2.7 4.1 1.0 versic l r
## 69 6.2 2.2 4.5 1.5 versic l r
## 70 5.6 2.5 3.9 1.1 versic l r
## 71 5.9 3.2 4.8 1.8 versic l r
## 72 6.1 2.8 4.0 1.3 versic l r
## 73 6.3 2.5 4.9 1.5 versic l r
## 74 6.1 2.8 4.7 1.2 versic l r
## 75 6.4 2.9 4.3 1.3 versic l r
## 76 6.6 3.0 4.4 1.4 versic l r
## 77 6.8 2.8 4.8 1.4 versic l r
## 78 6.7 3.0 5.0 1.7 versic l r
## 79 6.0 2.9 4.5 1.5 versic l r
## 80 5.7 2.6 3.5 1.0 versic l r
## 81 5.5 2.4 3.8 1.1 versic l r
## 82 5.5 2.4 3.7 1.0 versic l r
## 83 5.8 2.7 3.9 1.2 versic l r
## 84 6.0 2.7 5.1 1.6 versic l r
## 85 5.4 3.0 4.5 1.5 versic l r
## 86 6.0 3.4 4.5 1.6 versic l r
## 87 6.7 3.1 4.7 1.5 versic l r
## 88 6.3 2.3 4.4 1.3 versic l r
## 89 5.6 3.0 4.1 1.3 versic l r
## 90 5.5 2.5 4.0 1.3 versic l r
## 91 5.5 2.6 4.4 1.2 versic l r
## 92 6.1 3.0 4.6 1.4 versic l r
## 93 5.8 2.6 4.0 1.2 versic l r
## 94 5.0 2.3 3.3 1.0 versic l r
## 95 5.6 2.7 4.2 1.3 versic l r
## 96 5.7 3.0 4.2 1.2 versic l r
## 97 5.7 2.9 4.2 1.3 versic l r
## 98 6.2 2.9 4.3 1.3 versic l r
## 99 5.1 2.5 3.0 1.1 versic l r
## 100 5.7 2.8 4.1 1.3 versic l r
## 101 6.3 3.3 6.0 2.5 virginica <NA> <NA>
## 102 5.8 2.7 5.1 1.9 virginica <NA> <NA>
## 103 7.1 3.0 5.9 2.1 virginica <NA> <NA>
## 104 6.3 2.9 5.6 1.8 virginica <NA> <NA>
## 105 6.5 3.0 5.8 2.2 virginica <NA> <NA>
## 106 7.6 3.0 6.6 2.1 virginica <NA> <NA>
## 107 4.9 2.5 4.5 1.7 virginica <NA> <NA>
## 108 7.3 2.9 6.3 1.8 virginica <NA> <NA>
## 109 6.7 2.5 5.8 1.8 virginica <NA> <NA>
## 110 7.2 3.6 6.1 2.5 virginica <NA> <NA>
## 111 6.5 3.2 5.1 2.0 virginica <NA> <NA>
## 112 6.4 2.7 5.3 1.9 virginica <NA> <NA>
## 113 6.8 3.0 5.5 2.1 virginica <NA> <NA>
## 114 5.7 2.5 5.0 2.0 virginica <NA> <NA>
## 115 5.8 2.8 5.1 2.4 virginica <NA> <NA>
## 116 6.4 3.2 5.3 2.3 virginica <NA> <NA>
## 117 6.5 3.0 5.5 1.8 virginica <NA> <NA>
## 118 7.7 3.8 6.7 2.2 virginica <NA> <NA>
## 119 7.7 2.6 6.9 2.3 virginica <NA> <NA>
## 120 6.0 2.2 5.0 1.5 virginica <NA> <NA>
## 121 6.9 3.2 5.7 2.3 virginica <NA> <NA>
## 122 5.6 2.8 4.9 2.0 virginica <NA> <NA>
## 123 7.7 2.8 6.7 2.0 virginica <NA> <NA>
## 124 6.3 2.7 4.9 1.8 virginica <NA> <NA>
## 125 6.7 3.3 5.7 2.1 virginica <NA> <NA>
## 126 7.2 3.2 6.0 1.8 virginica <NA> <NA>
## 127 6.2 2.8 4.8 1.8 virginica <NA> <NA>
## 128 6.1 3.0 4.9 1.8 virginica <NA> <NA>
## 129 6.4 2.8 5.6 2.1 virginica <NA> <NA>
## 130 7.2 3.0 5.8 1.6 virginica <NA> <NA>
## 131 7.4 2.8 6.1 1.9 virginica <NA> <NA>
## 132 7.9 3.8 6.4 2.0 virginica <NA> <NA>
## 133 6.4 2.8 5.6 2.2 virginica <NA> <NA>
## 134 6.3 2.8 5.1 1.5 virginica <NA> <NA>
## 135 6.1 2.6 5.6 1.4 virginica <NA> <NA>
## 136 7.7 3.0 6.1 2.3 virginica <NA> <NA>
## 137 6.3 3.4 5.6 2.4 virginica <NA> <NA>
## 138 6.4 3.1 5.5 1.8 virginica <NA> <NA>
## 139 6.0 3.0 4.8 1.8 virginica <NA> <NA>
## 140 6.9 3.1 5.4 2.1 virginica <NA> <NA>
## 141 6.7 3.1 5.6 2.4 virginica <NA> <NA>
## 142 6.9 3.1 5.1 2.3 virginica <NA> <NA>
## 143 5.8 2.7 5.1 1.9 virginica <NA> <NA>
## 144 6.8 3.2 5.9 2.3 virginica <NA> <NA>
## 145 6.7 3.3 5.7 2.5 virginica <NA> <NA>
## 146 6.7 3.0 5.2 2.3 virginica <NA> <NA>
## 147 6.3 2.5 5.0 1.9 virginica <NA> <NA>
## 148 6.5 3.0 5.2 2.0 virginica <NA> <NA>
## 149 6.2 3.4 5.4 2.3 virginica <NA> <NA>
## 150 5.9 3.0 5.1 1.8 virginica <NA> <NA>
1.1.11 Recode/relabel data
mutate(iris, Species2 = recode(Species, "setosa"="seto", "versicolor"="versi"))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Species2
## 1 5.1 3.5 1.4 0.2 setosa seto
## 2 4.9 3.0 1.4 0.2 setosa seto
## 3 4.7 3.2 1.3 0.2 setosa seto
## 4 4.6 3.1 1.5 0.2 setosa seto
## 5 5.0 3.6 1.4 0.2 setosa seto
## 6 5.4 3.9 1.7 0.4 setosa seto
## 7 4.6 3.4 1.4 0.3 setosa seto
## 8 5.0 3.4 1.5 0.2 setosa seto
## 9 4.4 2.9 1.4 0.2 setosa seto
## 10 4.9 3.1 1.5 0.1 setosa seto
## 11 5.4 3.7 1.5 0.2 setosa seto
## 12 4.8 3.4 1.6 0.2 setosa seto
## 13 4.8 3.0 1.4 0.1 setosa seto
## 14 4.3 3.0 1.1 0.1 setosa seto
## 15 5.8 4.0 1.2 0.2 setosa seto
## 16 5.7 4.4 1.5 0.4 setosa seto
## 17 5.4 3.9 1.3 0.4 setosa seto
## 18 5.1 3.5 1.4 0.3 setosa seto
## 19 5.7 3.8 1.7 0.3 setosa seto
## 20 5.1 3.8 1.5 0.3 setosa seto
## 21 5.4 3.4 1.7 0.2 setosa seto
## 22 5.1 3.7 1.5 0.4 setosa seto
## 23 4.6 3.6 1.0 0.2 setosa seto
## 24 5.1 3.3 1.7 0.5 setosa seto
## 25 4.8 3.4 1.9 0.2 setosa seto
## 26 5.0 3.0 1.6 0.2 setosa seto
## 27 5.0 3.4 1.6 0.4 setosa seto
## 28 5.2 3.5 1.5 0.2 setosa seto
## 29 5.2 3.4 1.4 0.2 setosa seto
## 30 4.7 3.2 1.6 0.2 setosa seto
## 31 4.8 3.1 1.6 0.2 setosa seto
## 32 5.4 3.4 1.5 0.4 setosa seto
## 33 5.2 4.1 1.5 0.1 setosa seto
## 34 5.5 4.2 1.4 0.2 setosa seto
## 35 4.9 3.1 1.5 0.2 setosa seto
## 36 5.0 3.2 1.2 0.2 setosa seto
## 37 5.5 3.5 1.3 0.2 setosa seto
## 38 4.9 3.6 1.4 0.1 setosa seto
## 39 4.4 3.0 1.3 0.2 setosa seto
## 40 5.1 3.4 1.5 0.2 setosa seto
## 41 5.0 3.5 1.3 0.3 setosa seto
## 42 4.5 2.3 1.3 0.3 setosa seto
## 43 4.4 3.2 1.3 0.2 setosa seto
## 44 5.0 3.5 1.6 0.6 setosa seto
## 45 5.1 3.8 1.9 0.4 setosa seto
## 46 4.8 3.0 1.4 0.3 setosa seto
## 47 5.1 3.8 1.6 0.2 setosa seto
## 48 4.6 3.2 1.4 0.2 setosa seto
## 49 5.3 3.7 1.5 0.2 setosa seto
## 50 5.0 3.3 1.4 0.2 setosa seto
## 51 7.0 3.2 4.7 1.4 versicolor versi
## 52 6.4 3.2 4.5 1.5 versicolor versi
## 53 6.9 3.1 4.9 1.5 versicolor versi
## 54 5.5 2.3 4.0 1.3 versicolor versi
## 55 6.5 2.8 4.6 1.5 versicolor versi
## 56 5.7 2.8 4.5 1.3 versicolor versi
## 57 6.3 3.3 4.7 1.6 versicolor versi
## 58 4.9 2.4 3.3 1.0 versicolor versi
## 59 6.6 2.9 4.6 1.3 versicolor versi
## 60 5.2 2.7 3.9 1.4 versicolor versi
## 61 5.0 2.0 3.5 1.0 versicolor versi
## 62 5.9 3.0 4.2 1.5 versicolor versi
## 63 6.0 2.2 4.0 1.0 versicolor versi
## 64 6.1 2.9 4.7 1.4 versicolor versi
## 65 5.6 2.9 3.6 1.3 versicolor versi
## 66 6.7 3.1 4.4 1.4 versicolor versi
## 67 5.6 3.0 4.5 1.5 versicolor versi
## 68 5.8 2.7 4.1 1.0 versicolor versi
## 69 6.2 2.2 4.5 1.5 versicolor versi
## 70 5.6 2.5 3.9 1.1 versicolor versi
## 71 5.9 3.2 4.8 1.8 versicolor versi
## 72 6.1 2.8 4.0 1.3 versicolor versi
## 73 6.3 2.5 4.9 1.5 versicolor versi
## 74 6.1 2.8 4.7 1.2 versicolor versi
## 75 6.4 2.9 4.3 1.3 versicolor versi
## 76 6.6 3.0 4.4 1.4 versicolor versi
## 77 6.8 2.8 4.8 1.4 versicolor versi
## 78 6.7 3.0 5.0 1.7 versicolor versi
## 79 6.0 2.9 4.5 1.5 versicolor versi
## 80 5.7 2.6 3.5 1.0 versicolor versi
## 81 5.5 2.4 3.8 1.1 versicolor versi
## 82 5.5 2.4 3.7 1.0 versicolor versi
## 83 5.8 2.7 3.9 1.2 versicolor versi
## 84 6.0 2.7 5.1 1.6 versicolor versi
## 85 5.4 3.0 4.5 1.5 versicolor versi
## 86 6.0 3.4 4.5 1.6 versicolor versi
## 87 6.7 3.1 4.7 1.5 versicolor versi
## 88 6.3 2.3 4.4 1.3 versicolor versi
## 89 5.6 3.0 4.1 1.3 versicolor versi
## 90 5.5 2.5 4.0 1.3 versicolor versi
## 91 5.5 2.6 4.4 1.2 versicolor versi
## 92 6.1 3.0 4.6 1.4 versicolor versi
## 93 5.8 2.6 4.0 1.2 versicolor versi
## 94 5.0 2.3 3.3 1.0 versicolor versi
## 95 5.6 2.7 4.2 1.3 versicolor versi
## 96 5.7 3.0 4.2 1.2 versicolor versi
## 97 5.7 2.9 4.2 1.3 versicolor versi
## 98 6.2 2.9 4.3 1.3 versicolor versi
## 99 5.1 2.5 3.0 1.1 versicolor versi
## 100 5.7 2.8 4.1 1.3 versicolor versi
## 101 6.3 3.3 6.0 2.5 virginica virginica
## 102 5.8 2.7 5.1 1.9 virginica virginica
## 103 7.1 3.0 5.9 2.1 virginica virginica
## 104 6.3 2.9 5.6 1.8 virginica virginica
## 105 6.5 3.0 5.8 2.2 virginica virginica
## 106 7.6 3.0 6.6 2.1 virginica virginica
## 107 4.9 2.5 4.5 1.7 virginica virginica
## 108 7.3 2.9 6.3 1.8 virginica virginica
## 109 6.7 2.5 5.8 1.8 virginica virginica
## 110 7.2 3.6 6.1 2.5 virginica virginica
## 111 6.5 3.2 5.1 2.0 virginica virginica
## 112 6.4 2.7 5.3 1.9 virginica virginica
## 113 6.8 3.0 5.5 2.1 virginica virginica
## 114 5.7 2.5 5.0 2.0 virginica virginica
## 115 5.8 2.8 5.1 2.4 virginica virginica
## 116 6.4 3.2 5.3 2.3 virginica virginica
## 117 6.5 3.0 5.5 1.8 virginica virginica
## 118 7.7 3.8 6.7 2.2 virginica virginica
## 119 7.7 2.6 6.9 2.3 virginica virginica
## 120 6.0 2.2 5.0 1.5 virginica virginica
## 121 6.9 3.2 5.7 2.3 virginica virginica
## 122 5.6 2.8 4.9 2.0 virginica virginica
## 123 7.7 2.8 6.7 2.0 virginica virginica
## 124 6.3 2.7 4.9 1.8 virginica virginica
## 125 6.7 3.3 5.7 2.1 virginica virginica
## 126 7.2 3.2 6.0 1.8 virginica virginica
## 127 6.2 2.8 4.8 1.8 virginica virginica
## 128 6.1 3.0 4.9 1.8 virginica virginica
## 129 6.4 2.8 5.6 2.1 virginica virginica
## 130 7.2 3.0 5.8 1.6 virginica virginica
## 131 7.4 2.8 6.1 1.9 virginica virginica
## 132 7.9 3.8 6.4 2.0 virginica virginica
## 133 6.4 2.8 5.6 2.2 virginica virginica
## 134 6.3 2.8 5.1 1.5 virginica virginica
## 135 6.1 2.6 5.6 1.4 virginica virginica
## 136 7.7 3.0 6.1 2.3 virginica virginica
## 137 6.3 3.4 5.6 2.4 virginica virginica
## 138 6.4 3.1 5.5 1.8 virginica virginica
## 139 6.0 3.0 4.8 1.8 virginica virginica
## 140 6.9 3.1 5.4 2.1 virginica virginica
## 141 6.7 3.1 5.6 2.4 virginica virginica
## 142 6.9 3.1 5.1 2.3 virginica virginica
## 143 5.8 2.7 5.1 1.9 virginica virginica
## 144 6.8 3.2 5.9 2.3 virginica virginica
## 145 6.7 3.3 5.7 2.5 virginica virginica
## 146 6.7 3.0 5.2 2.3 virginica virginica
## 147 6.3 2.5 5.0 1.9 virginica virginica
## 148 6.5 3.0 5.2 2.0 virginica virginica
## 149 6.2 3.4 5.4 2.3 virginica virginica
## 150 5.9 3.0 5.1 1.8 virginica virginica
1.1.12 deduplication
1.1.12.1 Complete duplicate row
<- tribble(
df ~id, ~day, ~x,
1, 1, 1,
1, 2, 11,
2, 1, 12,
2, 2, 13,
2, 2, 14,
3, 1, 12,
3, 1, 12,
3, 2, 13,
4, 1, 13,
5, 1, 10,
5, 2, 11,
5, 1, 10
)
%>%
df distinct()
## # A tibble: 10 × 3
## id day x
## <dbl> <dbl> <dbl>
## 1 1 1 1
## 2 1 2 11
## 3 2 1 12
## 4 2 2 13
## 5 2 2 14
## 6 3 1 12
## 7 3 2 13
## 8 4 1 13
## 9 5 1 10
## 10 5 2 11
mark duplication
%>%
df mutate(dup = duplicated(df))
## # A tibble: 12 × 4
## id day x dup
## <dbl> <dbl> <dbl> <lgl>
## 1 1 1 1 FALSE
## 2 1 2 11 FALSE
## 3 2 1 12 FALSE
## 4 2 2 13 FALSE
## 5 2 2 14 FALSE
## 6 3 1 12 FALSE
## 7 3 1 12 TRUE
## 8 3 2 13 FALSE
## 9 4 1 13 FALSE
## 10 5 1 10 FALSE
## 11 5 2 11 FALSE
## 12 5 1 10 TRUE
%>%
df group_by_all() %>%
mutate(
n_row = row_number(),
dup = n_row > 1
)
## # A tibble: 12 × 5
## # Groups: id, day, x [10]
## id day x n_row dup
## <dbl> <dbl> <dbl> <int> <lgl>
## 1 1 1 1 1 FALSE
## 2 1 2 11 1 FALSE
## 3 2 1 12 1 FALSE
## 4 2 2 13 1 FALSE
## 5 2 2 14 1 FALSE
## 6 3 1 12 1 FALSE
## 7 3 1 12 2 TRUE
## 8 3 2 13 1 FALSE
## 9 4 1 13 1 FALSE
## 10 5 1 10 1 FALSE
## 11 5 2 11 1 FALSE
## 12 5 1 10 2 TRUE
1.1.12.2 Partial duplicate rows
%>%
df distinct(id, day, .keep_all = TRUE)
## # A tibble: 9 × 3
## id day x
## <dbl> <dbl> <dbl>
## 1 1 1 1
## 2 1 2 11
## 3 2 1 12
## 4 2 2 13
## 5 3 1 12
## 6 3 2 13
## 7 4 1 13
## 8 5 1 10
## 9 5 2 11
%>%
df group_by(id, day) %>%
mutate(
count = row_number(), # Counts rows by group
dup = count > 1 # TRUE if there is more than one row per group
)
## # A tibble: 12 × 5
## # Groups: id, day [9]
## id day x count dup
## <dbl> <dbl> <dbl> <int> <lgl>
## 1 1 1 1 1 FALSE
## 2 1 2 11 1 FALSE
## 3 2 1 12 1 FALSE
## 4 2 2 13 1 FALSE
## 5 2 2 14 2 TRUE
## 6 3 1 12 1 FALSE
## 7 3 1 12 2 TRUE
## 8 3 2 13 1 FALSE
## 9 4 1 13 1 FALSE
## 10 5 1 10 1 FALSE
## 11 5 2 11 1 FALSE
## 12 5 1 10 2 TRUE
select duplicates
%>%
df mutate(dup = duplicated(.) | duplicated(., fromLast = TRUE)) %>%
filter(dup==T)
## # A tibble: 4 × 4
## id day x dup
## <dbl> <dbl> <dbl> <lgl>
## 1 3 1 12 TRUE
## 2 3 1 12 TRUE
## 3 5 1 10 TRUE
## 4 5 1 10 TRUE
1.1.13 Combine data sets
- prepare data sets
<- data.frame(ID = 1:4,
data1 X1 = c("a1", "a2","a3", "a4"),
stringsAsFactors = FALSE)
<- data.frame(ID = 2:5,
data2 X2 = c("b1", "b2","b3", "b4"),
stringsAsFactors = FALSE)
- inner join
inner_join(data1, data2, by = "ID")
## ID X1 X2
## 1 2 a2 b1
## 2 3 a3 b2
## 3 4 a4 b3
- left join
there is not ID 1 in data2, thence, x2 is missing.
left_join(data1, data2, by = "ID")
## ID X1 X2
## 1 1 a1 <NA>
## 2 2 a2 b1
## 3 3 a3 b2
## 4 4 a4 b3
differing key col names
left_join(data1, data2, by = c("ID"="ID"))
## ID X1 X2
## 1 1 a1 <NA>
## 2 2 a2 b1
## 3 3 a3 b2
## 4 4 a4 b3
One-to-many relationship
has the same statement
left_join(data1, data2, by = c("ID" ))
## ID X1 X2
## 1 1 a1 <NA>
## 2 2 a2 b1
## 3 3 a3 b2
## 4 4 a4 b3
multiple data frames
%>%
data1 left_join(data2, by = "ID") %>%
left_join(data2, by = "ID")
## ID X1 X2.x X2.y
## 1 1 a1 <NA> <NA>
## 2 2 a2 b1 b1
## 3 3 a3 b2 b2
## 4 4 a4 b3 b3
multiple key values
# demographics %>%
# left_join(ultra, by = c("id", "visit"))
- right join
right_join(data1, data2, by = "ID")
## ID X1 X2
## 1 2 a2 b1
## 2 3 a3 b2
## 3 4 a4 b3
## 4 5 <NA> b4
- full join
full_join(data1, data2, by = "ID")
## ID X1 X2
## 1 1 a1 <NA>
## 2 2 a2 b1
## 3 3 a3 b2
## 4 4 a4 b3
## 5 5 <NA> b4
- keep cases of left data table without in right data table
anti_join(data1, data2, by = "ID")
## ID X1
## 1 1 a1
- keep cases of left data table in right data table
semi_join(data1, data2, by = "ID")
## ID X1
## 1 2 a2
## 2 3 a3
## 3 4 a4
- multiple full join
full_join(data1, data2, by = "ID") %>%
full_join(., data2, by = "ID")
## ID X1 X2.x X2.y
## 1 1 a1 <NA> <NA>
## 2 2 a2 b1 b1
## 3 3 a3 b2 b2
## 4 4 a4 b3 b3
## 5 5 <NA> b4 b4
- append two data tables by using join and merge
<- data.frame(col1 = c(6:8),
data_frame1 col2 = letters[1:3],
col3 = c(1,4,NA))
<- data.frame(col1 = c(5:6),
data_frame2 col5 = letters[7:8])
<- merge(data_frame1, data_frame2,
data_frame_merge by = 'col1', all = TRUE)
print (data_frame_merge)
## col1 col2 col3 col5
## 1 5 <NA> NA g
## 2 6 a 1 h
## 3 7 b 4 <NA>
## 4 8 c NA <NA>
full_join(data_frame1,data_frame2, by=c("col1"),)
## col1 col2 col3 col5
## 1 6 a 1 h
## 2 7 b 4 <NA>
## 3 8 c NA <NA>
## 4 5 <NA> NA g
- adding rows using
bind_ function
rbind doesn’t work
bind_ function conbines the data frames
based on column names; having our columns in a different order isn’t a problem.
<- data.frame(col1 = LETTERS[1:6],
df1 col2a = c(5:10),
col3a = TRUE)
<- data.frame(col1 = LETTERS[4:8],
df2 col2b= c(4:8),
col3b = FALSE)
# rbind(df1,df2)
%>%
df1 bind_rows(df2)
## col1 col2a col3a col2b col3b
## 1 A 5 TRUE NA NA
## 2 B 6 TRUE NA NA
## 3 C 7 TRUE NA NA
## 4 D 8 TRUE NA NA
## 5 E 9 TRUE NA NA
## 6 F 10 TRUE NA NA
## 7 D NA NA 4 FALSE
## 8 E NA NA 5 FALSE
## 9 F NA NA 6 FALSE
## 10 G NA NA 7 FALSE
## 11 H NA NA 8 FALSE
rename the differing cols
%>%
df1 bind_rows(df2 %>%
rename(col2a=col2b,
col3a=col3b) )
## col1 col2a col3a
## 1 A 5 TRUE
## 2 B 6 TRUE
## 3 C 7 TRUE
## 4 D 8 TRUE
## 5 E 9 TRUE
## 6 F 10 TRUE
## 7 D 4 FALSE
## 8 E 5 FALSE
## 9 F 6 FALSE
## 10 G 7 FALSE
## 11 H 8 FALSE
combining more than 2 data frames
%>%
df1 bind_rows(df2,df2)
## col1 col2a col3a col2b col3b
## 1 A 5 TRUE NA NA
## 2 B 6 TRUE NA NA
## 3 C 7 TRUE NA NA
## 4 D 8 TRUE NA NA
## 5 E 9 TRUE NA NA
## 6 F 10 TRUE NA NA
## 7 D NA NA 4 FALSE
## 8 E NA NA 5 FALSE
## 9 F NA NA 6 FALSE
## 10 G NA NA 7 FALSE
## 11 H NA NA 8 FALSE
## 12 D NA NA 4 FALSE
## 13 E NA NA 5 FALSE
## 14 F NA NA 6 FALSE
## 15 G NA NA 7 FALSE
## 16 H NA NA 8 FALSE
1.1.14 Working with character strings
library(stringr)
library(readxl)
- Look at the values
<- read_excel("C:\\Users\\hed2\\Downloads\\mybook2\\mybook2/excel.xlsx")
ehr %>%
ehr arrange(sex) %>%
pull(sex)
## [1] "Female" "Female" "Male" "Male"
- Coerce to lowercase
%>%
ehr arrange(sex) %>%
pull(sex) %>%
str_to_lower()
## [1] "female" "female" "male" "male"
- Coerce to upper case
%>%
ehr arrange(sex) %>%
pull(sex) %>%
str_to_upper()
## [1] "FEMALE" "FEMALE" "MALE" "MALE"
- Title case
%>%
ehr arrange(sex) %>%
pull(sex) %>%
str_to_title()
## [1] "Female" "Female" "Male" "Male"
- Sentence case
%>%
ehr arrange(sex) %>%
pull(sex) %>%
str_to_sentence()
## [1] "Female" "Female" "Male" "Male"
- Trim white space from the beginning and end
str_trim("Ryan Edwards ")
## [1] "Ryan Edwards"
- Remove the comma
str_replace(
string = "weston fox,",
pattern = ",",
replacement = ""
)
## [1] "weston fox"
%>%
ehr mutate(sex = str_replace(sex, ",", ""))
## # A tibble: 4 × 4
## ID sex ht_in wgt_lbs
## <chr> <chr> <dbl> <dbl>
## 1 001 Male 71 190
## 2 002 Male 69 176
## 3 003 Female 64 130
## 4 004 Female 65 154
- Separate values into component parts
regular express, ^\\w+ look for one or more consecutive word characters at the start of the character string and extract them.
str_extract("zariah hernandez", "^\\w+")
## [1] "zariah"
str_extract("zariah hernandez", "\\w+$")
## [1] "hernandez"
::str_match("mean(weight_3)", "(\\()(\\w+)(\\_)(\\d+)(\\))") stringr
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] "(weight_3)" "(" "weight" "_" "3" ")"
- Detect a special string to category
index a special string
%>%
ehr mutate(
man = str_detect(sex, "F"),
woman = str_detect(sex, "M"),
all = str_detect(sex, "ale")
%>%
) mutate(man_dummy = as.numeric(man))
## # A tibble: 4 × 8
## ID sex ht_in wgt_lbs man woman all man_dummy
## <chr> <chr> <dbl> <dbl> <lgl> <lgl> <lgl> <dbl>
## 1 001 Male 71 190 FALSE TRUE TRUE 0
## 2 002 Male 69 176 FALSE TRUE TRUE 0
## 3 003 Female 64 130 TRUE FALSE TRUE 1
## 4 004 Female 65 154 TRUE FALSE TRUE 1
%>%
ehr mutate(
dummy = ifelse(str_detect(sex, "F"),0,1)
)
## # A tibble: 4 × 5
## ID sex ht_in wgt_lbs dummy
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 001 Male 71 190 1
## 2 002 Male 69 176 1
## 3 003 Female 64 130 0
## 4 004 Female 65 154 0
1.1.15 Conditional operations
- Testing multiple conditions simultaneously
<- tibble(
blood_pressure id = 1:10,
sysbp = c(152, 120, 119, 123, 135, 83, 191, 147, 209, 166),
diasbp = c(78, 60, 88, 76, 85, 54, 116, 95, 100, 106)
%>%
) print()
## # A tibble: 10 × 3
## id sysbp diasbp
## <int> <dbl> <dbl>
## 1 1 152 78
## 2 2 120 60
## 3 3 119 88
## 4 4 123 76
## 5 5 135 85
## 6 6 83 54
## 7 7 191 116
## 8 8 147 95
## 9 9 209 100
## 10 10 166 106
%>%
blood_pressure mutate(bp = if_else(sysbp < 120 & diasbp < 80, "Normal", "Not Normal"))
## # A tibble: 10 × 4
## id sysbp diasbp bp
## <int> <dbl> <dbl> <chr>
## 1 1 152 78 Not Normal
## 2 2 120 60 Not Normal
## 3 3 119 88 Not Normal
## 4 4 123 76 Not Normal
## 5 5 135 85 Not Normal
## 6 6 83 54 Normal
## 7 7 191 116 Not Normal
## 8 8 147 95 Not Normal
## 9 9 209 100 Not Normal
## 10 10 166 106 Not Normal
- Using case_when function
%>%
blood_pressure mutate(
bp = case_when(
< 120 & diasbp < 80 ~ "Normal",
sysbp >= 120 & sysbp < 130 & diasbp < 80 ~ "Elevated",
sysbp >= 130 & sysbp < 140 | diasbp >= 80 & diasbp < 90 ~ "Hypertension Stage 1",
sysbp >= 140 | diasbp >= 90 ~ "Hypertension Stage 2"
sysbp
) )
## # A tibble: 10 × 4
## id sysbp diasbp bp
## <int> <dbl> <dbl> <chr>
## 1 1 152 78 Hypertension Stage 2
## 2 2 120 60 Elevated
## 3 3 119 88 Hypertension Stage 1
## 4 4 123 76 Elevated
## 5 5 135 85 Hypertension Stage 1
## 6 6 83 54 Normal
## 7 7 191 116 Hypertension Stage 2
## 8 8 147 95 Hypertension Stage 2
## 9 9 209 100 Hypertension Stage 2
## 10 10 166 106 Hypertension Stage 2
- Recoding variables
%>%
blood_pressure mutate(
bp= case_when(
< 120 & diasbp < 80 ~ 1,
sysbp >= 120 & sysbp < 130 & diasbp < 80 ~ 2,
sysbp >= 130 & sysbp < 140 | diasbp >= 80 & diasbp < 90 ~ 3,
sysbp >= 140 | diasbp >= 90 ~ 4
sysbp
)
,bp_f = factor( bp,
labels = c(
"normal", "elevated", "stage 1",
"stage 2")
) )
## # A tibble: 10 × 5
## id sysbp diasbp bp bp_f
## <int> <dbl> <dbl> <dbl> <fct>
## 1 1 152 78 4 stage 2
## 2 2 120 60 2 elevated
## 3 3 119 88 3 stage 1
## 4 4 123 76 2 elevated
## 5 5 135 85 3 stage 1
## 6 6 83 54 1 normal
## 7 7 191 116 4 stage 2
## 8 8 147 95 4 stage 2
## 9 9 209 100 4 stage 2
## 10 10 166 106 4 stage 2
- Recoding missing
using NA_real instead of NA.
<- tibble(
demographics race = c(1, 2, 1, 4, 7, 1, 2, 9, 1, 3),
hispanic = c(7, 0, 1, 0, 1, 0, 1, 9, 0, 1)
)
%>%
demographics mutate(
# Recode 7 and 9 to missing
race_recode = if_else(race == 7 | race == 9, NA_real_, race),
hispanic_recode = if_else(hispanic == 7 | hispanic == 9, NA_real_, hispanic)
)
## # A tibble: 10 × 4
## race hispanic race_recode hispanic_recode
## <dbl> <dbl> <dbl> <dbl>
## 1 1 7 1 NA
## 2 2 0 2 0
## 3 1 1 1 1
## 4 4 0 4 0
## 5 7 1 NA 1
## 6 1 0 1 0
## 7 2 1 2 1
## 8 9 9 NA NA
## 9 1 0 1 0
## 10 3 1 3 1
check whether or not
%>%
blood_pressure mutate(
match = sysbp == sysbp,
n_match = if_else(is.na(match), FALSE, match)
)
## # A tibble: 10 × 5
## id sysbp diasbp match n_match
## <int> <dbl> <dbl> <lgl> <lgl>
## 1 1 152 78 TRUE TRUE
## 2 2 120 60 TRUE TRUE
## 3 3 119 88 TRUE TRUE
## 4 4 123 76 TRUE TRUE
## 5 5 135 85 TRUE TRUE
## 6 6 83 54 TRUE TRUE
## 7 7 191 116 TRUE TRUE
## 8 8 147 95 TRUE TRUE
## 9 9 209 100 TRUE TRUE
## 10 10 166 106 TRUE TRUE
1.2 How to do aggregation/ summarization
1.2.1 Summarization after grouping
library(tidyverse)
%>%
iris group_by(Species) %>%
summarize(Support = mean(Sepal.Length)) %>% # average
arrange(-Support) # sort
## # A tibble: 3 × 2
## Species Support
## <fct> <dbl>
## 1 virginica 6.59
## 2 versicolor 5.94
## 3 setosa 5.01
%>%
iris group_by(Species) %>%
summarize(mean_s = mean(Sepal.Width),
meas_p = mean(Petal.Length),
diff = mean(Sepal.Width-Petal.Length)) %>%
arrange(-diff)
## # A tibble: 3 × 4
## Species mean_s meas_p diff
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 3.43 1.46 1.97
## 2 versicolor 2.77 4.26 -1.49
## 3 virginica 2.97 5.55 -2.58
%>%
iris group_by(Species) %>%
summarize(n = n(),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## # A tibble: 3 × 4
## Species n meas_p sd
## <fct> <int> <dbl> <dbl>
## 1 setosa 50 1.46 0.174
## 2 versicolor 50 4.26 0.470
## 3 virginica 50 5.55 0.552
1.2.2 Summarization with upgroup
%>%
iris ungroup( ) %>%
summarize(n = n(),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## n meas_p sd
## 1 150 3.758 1.765298
1.2.3 Mutate new variables after grouping
%>%
iris group_by(Species) %>%
mutate(n = n(),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## # A tibble: 150 × 8
## # Groups: Species [3]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species n meas_p sd
## <dbl> <dbl> <dbl> <dbl> <fct> <int> <dbl> <dbl>
## 1 5.1 3.5 1.4 0.2 setosa 50 1.46 0.174
## 2 4.9 3 1.4 0.2 setosa 50 1.46 0.174
## 3 4.7 3.2 1.3 0.2 setosa 50 1.46 0.174
## 4 4.6 3.1 1.5 0.2 setosa 50 1.46 0.174
## 5 5 3.6 1.4 0.2 setosa 50 1.46 0.174
## 6 5.4 3.9 1.7 0.4 setosa 50 1.46 0.174
## 7 4.6 3.4 1.4 0.3 setosa 50 1.46 0.174
## 8 5 3.4 1.5 0.2 setosa 50 1.46 0.174
## 9 4.4 2.9 1.4 0.2 setosa 50 1.46 0.174
## 10 4.9 3.1 1.5 0.1 setosa 50 1.46 0.174
## # ℹ 140 more rows
%>%
iris group_by(Species) %>%
mutate(n = n(),
meas_p = mean(Petal.Length, na.rm = T),
sd = sd(Petal.Length)) %>%
summarize (n_mean = paste ("sample size:",mean(n)),
meas_p = mean(Petal.Length),
sd = sd(Petal.Length))
## # A tibble: 3 × 4
## Species n_mean meas_p sd
## <fct> <chr> <dbl> <dbl>
## 1 setosa sample size: 50 1.46 0.174
## 2 versicolor sample size: 50 4.26 0.470
## 3 virginica sample size: 50 5.55 0.552
1.2.4 Recode and generate new variables, then value label
<- iris %>%
irisifelse mutate(Species2 = ifelse(Species == "setosa", NA, Species))
# relabel values
$Species2 <- factor(irisifelse$Species2,labels = c( "versi","virg"))
irisifelse irisifelse
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Species2
## 1 5.1 3.5 1.4 0.2 setosa <NA>
## 2 4.9 3.0 1.4 0.2 setosa <NA>
## 3 4.7 3.2 1.3 0.2 setosa <NA>
## 4 4.6 3.1 1.5 0.2 setosa <NA>
## 5 5.0 3.6 1.4 0.2 setosa <NA>
## 6 5.4 3.9 1.7 0.4 setosa <NA>
## 7 4.6 3.4 1.4 0.3 setosa <NA>
## 8 5.0 3.4 1.5 0.2 setosa <NA>
## 9 4.4 2.9 1.4 0.2 setosa <NA>
## 10 4.9 3.1 1.5 0.1 setosa <NA>
## 11 5.4 3.7 1.5 0.2 setosa <NA>
## 12 4.8 3.4 1.6 0.2 setosa <NA>
## 13 4.8 3.0 1.4 0.1 setosa <NA>
## 14 4.3 3.0 1.1 0.1 setosa <NA>
## 15 5.8 4.0 1.2 0.2 setosa <NA>
## 16 5.7 4.4 1.5 0.4 setosa <NA>
## 17 5.4 3.9 1.3 0.4 setosa <NA>
## 18 5.1 3.5 1.4 0.3 setosa <NA>
## 19 5.7 3.8 1.7 0.3 setosa <NA>
## 20 5.1 3.8 1.5 0.3 setosa <NA>
## 21 5.4 3.4 1.7 0.2 setosa <NA>
## 22 5.1 3.7 1.5 0.4 setosa <NA>
## 23 4.6 3.6 1.0 0.2 setosa <NA>
## 24 5.1 3.3 1.7 0.5 setosa <NA>
## 25 4.8 3.4 1.9 0.2 setosa <NA>
## 26 5.0 3.0 1.6 0.2 setosa <NA>
## 27 5.0 3.4 1.6 0.4 setosa <NA>
## 28 5.2 3.5 1.5 0.2 setosa <NA>
## 29 5.2 3.4 1.4 0.2 setosa <NA>
## 30 4.7 3.2 1.6 0.2 setosa <NA>
## 31 4.8 3.1 1.6 0.2 setosa <NA>
## 32 5.4 3.4 1.5 0.4 setosa <NA>
## 33 5.2 4.1 1.5 0.1 setosa <NA>
## 34 5.5 4.2 1.4 0.2 setosa <NA>
## 35 4.9 3.1 1.5 0.2 setosa <NA>
## 36 5.0 3.2 1.2 0.2 setosa <NA>
## 37 5.5 3.5 1.3 0.2 setosa <NA>
## 38 4.9 3.6 1.4 0.1 setosa <NA>
## 39 4.4 3.0 1.3 0.2 setosa <NA>
## 40 5.1 3.4 1.5 0.2 setosa <NA>
## 41 5.0 3.5 1.3 0.3 setosa <NA>
## 42 4.5 2.3 1.3 0.3 setosa <NA>
## 43 4.4 3.2 1.3 0.2 setosa <NA>
## 44 5.0 3.5 1.6 0.6 setosa <NA>
## 45 5.1 3.8 1.9 0.4 setosa <NA>
## 46 4.8 3.0 1.4 0.3 setosa <NA>
## 47 5.1 3.8 1.6 0.2 setosa <NA>
## 48 4.6 3.2 1.4 0.2 setosa <NA>
## 49 5.3 3.7 1.5 0.2 setosa <NA>
## 50 5.0 3.3 1.4 0.2 setosa <NA>
## 51 7.0 3.2 4.7 1.4 versicolor versi
## 52 6.4 3.2 4.5 1.5 versicolor versi
## 53 6.9 3.1 4.9 1.5 versicolor versi
## 54 5.5 2.3 4.0 1.3 versicolor versi
## 55 6.5 2.8 4.6 1.5 versicolor versi
## 56 5.7 2.8 4.5 1.3 versicolor versi
## 57 6.3 3.3 4.7 1.6 versicolor versi
## 58 4.9 2.4 3.3 1.0 versicolor versi
## 59 6.6 2.9 4.6 1.3 versicolor versi
## 60 5.2 2.7 3.9 1.4 versicolor versi
## 61 5.0 2.0 3.5 1.0 versicolor versi
## 62 5.9 3.0 4.2 1.5 versicolor versi
## 63 6.0 2.2 4.0 1.0 versicolor versi
## 64 6.1 2.9 4.7 1.4 versicolor versi
## 65 5.6 2.9 3.6 1.3 versicolor versi
## 66 6.7 3.1 4.4 1.4 versicolor versi
## 67 5.6 3.0 4.5 1.5 versicolor versi
## 68 5.8 2.7 4.1 1.0 versicolor versi
## 69 6.2 2.2 4.5 1.5 versicolor versi
## 70 5.6 2.5 3.9 1.1 versicolor versi
## 71 5.9 3.2 4.8 1.8 versicolor versi
## 72 6.1 2.8 4.0 1.3 versicolor versi
## 73 6.3 2.5 4.9 1.5 versicolor versi
## 74 6.1 2.8 4.7 1.2 versicolor versi
## 75 6.4 2.9 4.3 1.3 versicolor versi
## 76 6.6 3.0 4.4 1.4 versicolor versi
## 77 6.8 2.8 4.8 1.4 versicolor versi
## 78 6.7 3.0 5.0 1.7 versicolor versi
## 79 6.0 2.9 4.5 1.5 versicolor versi
## 80 5.7 2.6 3.5 1.0 versicolor versi
## 81 5.5 2.4 3.8 1.1 versicolor versi
## 82 5.5 2.4 3.7 1.0 versicolor versi
## 83 5.8 2.7 3.9 1.2 versicolor versi
## 84 6.0 2.7 5.1 1.6 versicolor versi
## 85 5.4 3.0 4.5 1.5 versicolor versi
## 86 6.0 3.4 4.5 1.6 versicolor versi
## 87 6.7 3.1 4.7 1.5 versicolor versi
## 88 6.3 2.3 4.4 1.3 versicolor versi
## 89 5.6 3.0 4.1 1.3 versicolor versi
## 90 5.5 2.5 4.0 1.3 versicolor versi
## 91 5.5 2.6 4.4 1.2 versicolor versi
## 92 6.1 3.0 4.6 1.4 versicolor versi
## 93 5.8 2.6 4.0 1.2 versicolor versi
## 94 5.0 2.3 3.3 1.0 versicolor versi
## 95 5.6 2.7 4.2 1.3 versicolor versi
## 96 5.7 3.0 4.2 1.2 versicolor versi
## 97 5.7 2.9 4.2 1.3 versicolor versi
## 98 6.2 2.9 4.3 1.3 versicolor versi
## 99 5.1 2.5 3.0 1.1 versicolor versi
## 100 5.7 2.8 4.1 1.3 versicolor versi
## 101 6.3 3.3 6.0 2.5 virginica virg
## 102 5.8 2.7 5.1 1.9 virginica virg
## 103 7.1 3.0 5.9 2.1 virginica virg
## 104 6.3 2.9 5.6 1.8 virginica virg
## 105 6.5 3.0 5.8 2.2 virginica virg
## 106 7.6 3.0 6.6 2.1 virginica virg
## 107 4.9 2.5 4.5 1.7 virginica virg
## 108 7.3 2.9 6.3 1.8 virginica virg
## 109 6.7 2.5 5.8 1.8 virginica virg
## 110 7.2 3.6 6.1 2.5 virginica virg
## 111 6.5 3.2 5.1 2.0 virginica virg
## 112 6.4 2.7 5.3 1.9 virginica virg
## 113 6.8 3.0 5.5 2.1 virginica virg
## 114 5.7 2.5 5.0 2.0 virginica virg
## 115 5.8 2.8 5.1 2.4 virginica virg
## 116 6.4 3.2 5.3 2.3 virginica virg
## 117 6.5 3.0 5.5 1.8 virginica virg
## 118 7.7 3.8 6.7 2.2 virginica virg
## 119 7.7 2.6 6.9 2.3 virginica virg
## 120 6.0 2.2 5.0 1.5 virginica virg
## 121 6.9 3.2 5.7 2.3 virginica virg
## 122 5.6 2.8 4.9 2.0 virginica virg
## 123 7.7 2.8 6.7 2.0 virginica virg
## 124 6.3 2.7 4.9 1.8 virginica virg
## 125 6.7 3.3 5.7 2.1 virginica virg
## 126 7.2 3.2 6.0 1.8 virginica virg
## 127 6.2 2.8 4.8 1.8 virginica virg
## 128 6.1 3.0 4.9 1.8 virginica virg
## 129 6.4 2.8 5.6 2.1 virginica virg
## 130 7.2 3.0 5.8 1.6 virginica virg
## 131 7.4 2.8 6.1 1.9 virginica virg
## 132 7.9 3.8 6.4 2.0 virginica virg
## 133 6.4 2.8 5.6 2.2 virginica virg
## 134 6.3 2.8 5.1 1.5 virginica virg
## 135 6.1 2.6 5.6 1.4 virginica virg
## 136 7.7 3.0 6.1 2.3 virginica virg
## 137 6.3 3.4 5.6 2.4 virginica virg
## 138 6.4 3.1 5.5 1.8 virginica virg
## 139 6.0 3.0 4.8 1.8 virginica virg
## 140 6.9 3.1 5.4 2.1 virginica virg
## 141 6.7 3.1 5.6 2.4 virginica virg
## 142 6.9 3.1 5.1 2.3 virginica virg
## 143 5.8 2.7 5.1 1.9 virginica virg
## 144 6.8 3.2 5.9 2.3 virginica virg
## 145 6.7 3.3 5.7 2.5 virginica virg
## 146 6.7 3.0 5.2 2.3 virginica virg
## 147 6.3 2.5 5.0 1.9 virginica virg
## 148 6.5 3.0 5.2 2.0 virginica virg
## 149 6.2 3.4 5.4 2.3 virginica virg
## 150 5.9 3.0 5.1 1.8 virginica virg
str(irisifelse)
## 'data.frame': 150 obs. of 6 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Species2 : Factor w/ 2 levels "versi","virg": NA NA NA NA NA NA NA NA NA NA ...