Google Sheets

Reading data with the googlesheets4 package

First, set up Google credentials.

library(googlesheets4)
# Prompts a browser pop-up
gs4_auth()
# Once set up, you can automate this process by passing your email
gs4_auth(email = "avamariehoffman@gmail.com")

Reading data with the googlesheets4 package

You can also supply an authorization token directly, but make sure to add any files to your .gitignore!

library(googledrive)
drive_auth(email= "<email>",
           token = readRDS("google-sheets-token.rds")) # Saved in a file

Reading data with the googlesheets4 package

Read in using read_sheet()

sheet_url <- 
  "https://docs.google.com/spreadsheets/d/1U6Cf_qEOhiR9AZqTqS3mbMF3zt2db48ZP5v3rkrAEJY/edit#gid=780868077"
sheet_dat_1 <- read_sheet(sheet_url)
✔ Reading from "gapminder".
✔ Range 'Africa'.
head(sheet_dat_1)
# A tibble: 6 × 6
  country continent  year lifeExp      pop gdpPercap
  <chr>   <chr>     <dbl>   <dbl>    <dbl>     <dbl>
1 Algeria Africa     1952    43.1  9279525     2449.
2 Algeria Africa     1957    45.7 10270856     3014.
3 Algeria Africa     1962    48.3 11000948     2551.
4 Algeria Africa     1967    51.4 12760499     3247.
5 Algeria Africa     1972    54.5 14760787     4183.
6 Algeria Africa     1977    58.0 17152804     4910.

Reading data with the googlesheets4 package

Specify the sheet name if necessary:

sheet_dat_oceania <- read_sheet(sheet_url, sheet = "Oceania")
✔ Reading from "gapminder".
✔ Range ''Oceania''.
head(sheet_dat_oceania)
# A tibble: 6 × 6
  country   continent  year lifeExp      pop gdpPercap
  <chr>     <chr>     <dbl>   <dbl>    <dbl>     <dbl>
1 Australia Oceania    1952    69.1  8691212    10040.
2 Australia Oceania    1957    70.3  9712569    10950.
3 Australia Oceania    1962    70.9 10794968    12217.
4 Australia Oceania    1967    71.1 11872264    14526.
5 Australia Oceania    1972    71.9 13177000    16789.
6 Australia Oceania    1977    73.5 14074100    18334.

Pull in a subset of data: rows

read_sheet(sheet_url, sheet = "Oceania", range =  cell_rows(1:4))
✔ Reading from "gapminder".
✔ Range ''Oceania'!1:4'.
# A tibble: 3 × 6
  country   continent  year lifeExp      pop gdpPercap
  <chr>     <chr>     <dbl>   <dbl>    <dbl>     <dbl>
1 Australia Oceania    1952    69.1  8691212    10040.
2 Australia Oceania    1957    70.3  9712569    10950.
3 Australia Oceania    1962    70.9 10794968    12217.

Pull in a subset of data: columns

read_sheet(sheet_url, sheet = "Oceania", range =  cell_cols("A:B"))
✔ Reading from "gapminder".
✔ Range ''Oceania'!A:B'.
# A tibble: 24 × 2
   country   continent
   <chr>     <chr>    
 1 Australia Oceania  
 2 Australia Oceania  
 3 Australia Oceania  
 4 Australia Oceania  
 5 Australia Oceania  
 6 Australia Oceania  
 7 Australia Oceania  
 8 Australia Oceania  
 9 Australia Oceania  
10 Australia Oceania  
# ℹ 14 more rows

Reading data with the googlesheets4 package

List out the sheet names using sheet_names().

sheet_names(sheet_url)
[1] "Africa"   "Americas" "Asia"     "Europe"   "Oceania" 

Reading data with the googlesheets4 package

Iterate through the sheet names with purrr:

gapminder_sheets <- sheet_names(sheet_url)

data_list <- map(gapminder_sheets, ~ read_sheet(sheet_url, sheet = .x))
✔ Reading from "gapminder".
✔ Range ''Africa''.
✔ Reading from "gapminder".
✔ Range ''Americas''.
✔ Reading from "gapminder".
✔ Range ''Asia''.
✔ Reading from "gapminder".
✔ Range ''Europe''.
✔ Reading from "gapminder".
✔ Range ''Oceania''.

Reading data with the googlesheets4 package

Check out the list:

str(data_list)
List of 5
 $ : tibble [624 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:624] "Algeria" "Algeria" "Algeria" "Algeria" ...
  ..$ continent: chr [1:624] "Africa" "Africa" "Africa" "Africa" ...
  ..$ year     : num [1:624] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:624] 43.1 45.7 48.3 51.4 54.5 ...
  ..$ pop      : num [1:624] 9279525 10270856 11000948 12760499 14760787 ...
  ..$ gdpPercap: num [1:624] 2449 3014 2551 3247 4183 ...
 $ : tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:300] "Argentina" "Argentina" "Argentina" "Argentina" ...
  ..$ continent: chr [1:300] "Americas" "Americas" "Americas" "Americas" ...
  ..$ year     : num [1:300] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:300] 62.5 64.4 65.1 65.6 67.1 ...
  ..$ pop      : num [1:300] 17876956 19610538 21283783 22934225 24779799 ...
  ..$ gdpPercap: num [1:300] 5911 6857 7133 8053 9443 ...
 $ : tibble [396 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:396] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
  ..$ continent: chr [1:396] "Asia" "Asia" "Asia" "Asia" ...
  ..$ year     : num [1:396] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:396] 28.8 30.3 32 34 36.1 ...
  ..$ pop      : num [1:396] 8425333 9240934 10267083 11537966 13079460 ...
  ..$ gdpPercap: num [1:396] 779 821 853 836 740 ...
 $ : tibble [360 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:360] "Albania" "Albania" "Albania" "Albania" ...
  ..$ continent: chr [1:360] "Europe" "Europe" "Europe" "Europe" ...
  ..$ year     : num [1:360] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:360] 55.2 59.3 64.8 66.2 67.7 ...
  ..$ pop      : num [1:360] 1282697 1476505 1728137 1984060 2263554 ...
  ..$ gdpPercap: num [1:360] 1601 1942 2313 2760 3313 ...
 $ : tibble [24 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:24] "Australia" "Australia" "Australia" "Australia" ...
  ..$ continent: chr [1:24] "Oceania" "Oceania" "Oceania" "Oceania" ...
  ..$ year     : num [1:24] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:24] 69.1 70.3 70.9 71.1 71.9 ...
  ..$ pop      : num [1:24] 8691212 9712569 10794968 11872264 13177000 ...
  ..$ gdpPercap: num [1:24] 10040 10950 12217 14526 16789 ...

Reading data with the googlesheets4 package

Pull out sheets as needed:

data_list[[{sheet}]]
# OR
data_list %>% pluck({sheet})
# OR (if named)
data_list${sheet}

.

Writing data with the googlesheets4 package

sheet_dat_oceania <- data_list[[5]]

sheet_dat_oceania <- sheet_dat_oceania %>%
  mutate(lifeExp_days = lifeExp * 365)

sheet_out <- gs4_create("Oceania-days", 
                       sheets = list(Oceania_days = sheet_dat_oceania))
✔ Creating new Sheet: "Oceania-days".
# Opens a browser window
gs4_browse(sheet_out)

Append data with the googlesheets4 package

sheet_append(sheet_out, data = sheet_dat_oceania, sheet = "Oceania_days")
✔ Writing to "Oceania-days".
✔ Appending 24 rows to 'Oceania_days'.

JHU Tidyverse Book

Lab

JSON: JavaScript Object Notation
Lists of stuff

Why JSON matters

#install.packages("jsonlite")
library(jsonlite)
jsonData <-
  fromJSON("https://raw.githubusercontent.com/Biuni/PokemonGO-Pokedex/master/pokedex.json")
head(jsonData)
$pokemon
   id num               name                                              img           type height   weight
1   1 001          Bulbasaur http://www.serebii.net/pokemongo/pokemon/001.png  Grass, Poison 0.71 m   6.9 kg
2   2 002            Ivysaur http://www.serebii.net/pokemongo/pokemon/002.png  Grass, Poison 0.99 m  13.0 kg
3   3 003           Venusaur http://www.serebii.net/pokemongo/pokemon/003.png  Grass, Poison 2.01 m 100.0 kg
4   4 004         Charmander http://www.serebii.net/pokemongo/pokemon/004.png           Fire 0.61 m   8.5 kg
5   5 005         Charmeleon http://www.serebii.net/pokemongo/pokemon/005.png           Fire 1.09 m  19.0 kg
6   6 006          Charizard http://www.serebii.net/pokemongo/pokemon/006.png   Fire, Flying 1.70 m  90.5 kg
7   7 007           Squirtle http://www.serebii.net/pokemongo/pokemon/007.png          Water 0.51 m   9.0 kg
8   8 008          Wartortle http://www.serebii.net/pokemongo/pokemon/008.png          Water 0.99 m  22.5 kg
9   9 009          Blastoise http://www.serebii.net/pokemongo/pokemon/009.png          Water 1.60 m  85.5 kg
10 10 010           Caterpie http://www.serebii.net/pokemongo/pokemon/010.png            Bug 0.30 m   2.9 kg
11 11 011            Metapod http://www.serebii.net/pokemongo/pokemon/011.png            Bug 0.71 m   9.9 kg
12 12 012         Butterfree http://www.serebii.net/pokemongo/pokemon/012.png    Bug, Flying 1.09 m  32.0 kg
13 13 013             Weedle http://www.serebii.net/pokemongo/pokemon/013.png    Bug, Poison 0.30 m   3.2 kg
14 14 014             Kakuna http://www.serebii.net/pokemongo/pokemon/014.png    Bug, Poison 0.61 m  10.0 kg
15 15 015           Beedrill http://www.serebii.net/pokemongo/pokemon/015.png    Bug, Poison 0.99 m  29.5 kg
16 16 016             Pidgey http://www.serebii.net/pokemongo/pokemon/016.png Normal, Flying 0.30 m   1.8 kg
17 17 017          Pidgeotto http://www.serebii.net/pokemongo/pokemon/017.png Normal, Flying 1.09 m  30.0 kg
18 18 018            Pidgeot http://www.serebii.net/pokemongo/pokemon/018.png Normal, Flying 1.50 m  39.5 kg
19 19 019            Rattata http://www.serebii.net/pokemongo/pokemon/019.png         Normal 0.30 m   3.5 kg
20 20 020           Raticate http://www.serebii.net/pokemongo/pokemon/020.png         Normal 0.71 m  18.5 kg
21 21 021            Spearow http://www.serebii.net/pokemongo/pokemon/021.png Normal, Flying 0.30 m   2.0 kg
22 22 022             Fearow http://www.serebii.net/pokemongo/pokemon/022.png Normal, Flying 1.19 m  38.0 kg
23 23 023              Ekans http://www.serebii.net/pokemongo/pokemon/023.png         Poison 2.01 m   6.9 kg
24 24 024              Arbok http://www.serebii.net/pokemongo/pokemon/024.png         Poison 3.51 m  65.0 kg
25 25 025            Pikachu http://www.serebii.net/pokemongo/pokemon/025.png       Electric 0.41 m   6.0 kg
26 26 026             Raichu http://www.serebii.net/pokemongo/pokemon/026.png       Electric 0.79 m  30.0 kg
27 27 027          Sandshrew http://www.serebii.net/pokemongo/pokemon/027.png         Ground 0.61 m  12.0 kg
28 28 028          Sandslash http://www.serebii.net/pokemongo/pokemon/028.png         Ground 0.99 m  29.5 kg
29 29 029 Nidoran ♀ (Female) http://www.serebii.net/pokemongo/pokemon/029.png         Poison 0.41 m   7.0 kg
30 30 030           Nidorina http://www.serebii.net/pokemongo/pokemon/030.png         Poison 0.79 m  20.0 kg
31 31 031          Nidoqueen http://www.serebii.net/pokemongo/pokemon/031.png Poison, Ground 1.30 m  60.0 kg
32 32 032   Nidoran ♂ (Male) http://www.serebii.net/pokemongo/pokemon/032.png         Poison 0.51 m   9.0 kg
33 33 033           Nidorino http://www.serebii.net/pokemongo/pokemon/033.png         Poison 0.89 m  19.5 kg
34 34 034           Nidoking http://www.serebii.net/pokemongo/pokemon/034.png Poison, Ground 1.40 m  62.0 kg
35 35 035           Clefairy http://www.serebii.net/pokemongo/pokemon/035.png         Normal 0.61 m   7.5 kg
36 36 036           Clefable http://www.serebii.net/pokemongo/pokemon/036.png         Normal 1.30 m  40.0 kg
37 37 037             Vulpix http://www.serebii.net/pokemongo/pokemon/037.png           Fire 0.61 m   9.9 kg
38 38 038          Ninetales http://www.serebii.net/pokemongo/pokemon/038.png           Fire 1.09 m  19.9 kg
39 39 039         Jigglypuff http://www.serebii.net/pokemongo/pokemon/039.png         Normal 0.51 m   5.5 kg
40 40 040         Wigglytuff http://www.serebii.net/pokemongo/pokemon/040.png         Normal 0.99 m  12.0 kg
41 41 041              Zubat http://www.serebii.net/pokemongo/pokemon/041.png Poison, Flying 0.79 m   7.5 kg
42 42 042             Golbat http://www.serebii.net/pokemongo/pokemon/042.png Poison, Flying 1.60 m  55.0 kg
43 43 043             Oddish http://www.serebii.net/pokemongo/pokemon/043.png  Grass, Poison 0.51 m   5.4 kg
44 44 044              Gloom http://www.serebii.net/pokemongo/pokemon/044.png  Grass, Poison 0.79 m   8.6 kg
45 45 045          Vileplume http://www.serebii.net/pokemongo/pokemon/045.png  Grass, Poison 1.19 m  18.6 kg
46 46 046              Paras http://www.serebii.net/pokemongo/pokemon/046.png     Bug, Grass 0.30 m   5.4 kg
47 47 047           Parasect http://www.serebii.net/pokemongo/pokemon/047.png     Bug, Grass 0.99 m  29.5 kg
48 48 048            Venonat http://www.serebii.net/pokemongo/pokemon/048.png    Bug, Poison 0.99 m  30.0 kg
49 49 049           Venomoth http://www.serebii.net/pokemongo/pokemon/049.png    Bug, Poison 1.50 m  12.5 kg
50 50 050            Diglett http://www.serebii.net/pokemongo/pokemon/050.png         Ground 0.20 m   0.8 kg
51 51 051            Dugtrio http://www.serebii.net/pokemongo/pokemon/051.png         Ground 0.71 m  33.3 kg
52 52 052             Meowth http://www.serebii.net/pokemongo/pokemon/052.png         Normal 0.41 m   4.2 kg
53 53 053            Persian http://www.serebii.net/pokemongo/pokemon/053.png         Normal 0.99 m  32.0 kg
54 54 054            Psyduck http://www.serebii.net/pokemongo/pokemon/054.png          Water 0.79 m  19.6 kg
55 55 055            Golduck http://www.serebii.net/pokemongo/pokemon/055.png          Water 1.70 m  76.6 kg
56 56 056             Mankey http://www.serebii.net/pokemongo/pokemon/056.png       Fighting 0.51 m  28.0 kg
57 57 057           Primeape http://www.serebii.net/pokemongo/pokemon/057.png       Fighting 0.99 m  32.0 kg
58 58 058          Growlithe http://www.serebii.net/pokemongo/pokemon/058.png           Fire 0.71 m  19.0 kg
                      candy candy_count         egg spawn_chance avg_spawns spawn_time multipliers
1           Bulbasaur Candy          25        2 km       0.6900     69.000      20:00        1.58
2           Bulbasaur Candy         100 Not in Eggs       0.0420      4.200      07:00    1.2, 1.6
3           Bulbasaur Candy          NA Not in Eggs       0.0170      1.700      11:30        NULL
4          Charmander Candy          25        2 km       0.2530     25.300      08:45        1.65
5          Charmander Candy         100 Not in Eggs       0.0120      1.200      19:00        1.79
6          Charmander Candy          NA Not in Eggs       0.0031      0.310      13:34        NULL
7            Squirtle Candy          25        2 km       0.5800     58.000      04:25         2.1
8            Squirtle Candy         100 Not in Eggs       0.0340      3.400      07:02         1.4
9            Squirtle Candy          NA Not in Eggs       0.0067      0.670      00:06        NULL
10           Caterpie Candy          12        2 km       3.0320    303.200      16:35        1.05
11           Caterpie Candy          50 Not in Eggs       0.1870     18.700      02:11  3.55, 3.79
12           Caterpie Candy          NA Not in Eggs       0.0220      2.200      05:23        NULL
13             Weedle Candy          12        2 km       7.1200    712.000      02:21  1.01, 1.09
14             Weedle Candy          50 Not in Eggs       0.4400     44.000      02:30  3.01, 3.41
15             Weedle Candy          NA Not in Eggs       0.0510      5.100      04:50        NULL
16             Pidgey Candy          12        2 km      15.9800      1.598      01:34  1.71, 1.92
17             Pidgey Candy          50 Not in Eggs       1.0200    102.000      01:30        1.79
18             Pidgey Candy          NA Not in Eggs       0.1300     13.000      01:50        NULL
19            Rattata Candy          25        2 km      13.0500      1.305      01:55  2.55, 2.73
20            Rattata Candy          NA Not in Eggs       0.4100     41.000      01:56        NULL
21            Spearow Candy          50        2 km       4.7300    473.000      12:25  2.66, 2.68
22            Spearow Candy          NA Not in Eggs       0.1500     15.000      01:11        NULL
23              Ekans Candy          50        5 km       2.2700    227.000      12:20  2.21, 2.27
24              Ekans Candy          NA Not in Eggs       0.0720      7.200      01:50        NULL
25            Pikachu Candy          50        2 km       0.2100     21.000      04:00        2.34
26            Pikachu Candy          NA Not in Eggs       0.0076      0.760      23:58        NULL
27          Sandshrew Candy          50        5 km       1.1100    111.000      01:58        2.45
28          Sandshrew Candy          NA Not in Eggs       0.0370      3.700      12:34        NULL
29 Nidoran ♀ (Female) Candy          25        5 km       1.3800    138.000      01:51  1.63, 2.48
30 Nidoran ♀ (Female) Candy         100 Not in Eggs       0.0880      8.800      07:22  1.83, 2.48
31 Nidoran ♀ (Female) Candy          NA Not in Eggs       0.0120      1.200      12:35        NULL
32   Nidoran ♂ (Male) Candy          25        5 km       1.3100    131.000      01:12  1.64, 1.70
33   Nidoran ♂ (Male) Candy         100 Not in Eggs       0.0830      8.300      09:02        1.83
34   Nidoran ♂ (Male) Candy          NA Not in Eggs       0.0170      1.700      12:16        NULL
35           Clefairy Candy          50        2 km       0.9200     92.000      03:30  2.03, 2.14
36           Clefairy Candy          NA Not in Eggs       0.0120      1.200      03:29        NULL
37             Vulpix Candy          50        5 km       0.2200     22.000      13:43  2.74, 2.81
38             Vulpix Candy          NA Not in Eggs       0.0077      0.770      01:32        NULL
39         Jigglypuff Candy          50        2 km       0.3900     39.000      08:46        1.85
40         Jigglypuff Candy          NA Not in Eggs       0.0180      1.800      12:28        NULL
41              Zubat Candy          50        2 km       6.5200    652.000      12:28  2.60, 3.67
42              Zubat Candy          NA Not in Eggs       0.4200     42.000      02:15        NULL
43             Oddish Candy          25        5 km       1.0200    102.000      03:58         1.5
44             Oddish Candy         100 Not in Eggs       0.0640      6.400      11:33        1.49
45             Oddish Candy          NA Not in Eggs       0.0097      0.970      23:58        NULL
46              Paras Candy          50        5 km       2.3600    236.000      01:42        2.02
47              Paras Candy          NA Not in Eggs       0.0740      7.400      01:22        NULL
48            Venonat Candy          50        5 km       2.2800    228.000      02:31  1.86, 1.90
49            Venonat Candy          NA Not in Eggs       0.0720      7.200      23:40        NULL
50            Diglett Candy          50        5 km       0.4000     40.000      02:22        2.69
51                  Dugtrio          NA Not in Eggs       0.0140      1.400      12:37        NULL
52             Meowth Candy          50        5 km       0.8600     86.000      02:54        1.98
53             Meowth Candy          NA Not in Eggs       0.0220      2.200      02:44        NULL
54            Psyduck Candy          50        5 km       2.5400    254.000      03:41        2.27
55            Psyduck Candy          NA Not in Eggs       0.0870      8.700      23:06        NULL
56             Mankey Candy          50        5 km       0.9200     92.000      12:52  2.17, 2.28
57             Mankey Candy          NA Not in Eggs       0.0310      3.100      12:33        NULL
58          Growlithe Candy          50        5 km       0.9200     92.000      03:57  2.31, 2.36
                             weaknesses                  next_evolution                      prev_evolution
1            Fire, Ice, Flying, Psychic     002, 003, Ivysaur, Venusaur                                NULL
2            Fire, Ice, Flying, Psychic                   003, Venusaur                      001, Bulbasaur
3            Fire, Ice, Flying, Psychic                            NULL        001, 002, Bulbasaur, Ivysaur
4                   Water, Ground, Rock 005, 006, Charmeleon, Charizard                                NULL
5                   Water, Ground, Rock                  006, Charizard                     004, Charmander
6                 Water, Electric, Rock                            NULL    004, 005, Charmander, Charmeleon
7                       Electric, Grass  008, 009, Wartortle, Blastoise                                NULL
8                       Electric, Grass                  009, Blastoise                       007, Squirtle
9                       Electric, Grass                            NULL       007, 008, Squirtle, Wartortle
10                   Fire, Flying, Rock   011, 012, Metapod, Butterfree                                NULL
11                   Fire, Flying, Rock                 012, Butterfree                       010, Caterpie
12    Fire, Electric, Ice, Flying, Rock                            NULL         010, 011, Caterpie, Metapod
13          Fire, Flying, Psychic, Rock      014, 015, Kakuna, Beedrill                                NULL
14          Fire, Flying, Psychic, Rock                   015, Beedrill                         013, Weedle
15          Fire, Flying, Psychic, Rock                            NULL            013, 014, Weedle, Kakuna
16                       Electric, Rock    017, 018, Pidgeotto, Pidgeot                                NULL
17                       Electric, Rock                    018, Pidgeot                         016, Pidgey
18                       Electric, Rock                            NULL         016, 017, Pidgey, Pidgeotto
19                             Fighting                   020, Raticate                                NULL
20                             Fighting                            NULL                        019, Rattata
21                       Electric, Rock                     022, Fearow                                NULL
22                       Electric, Rock                            NULL                        021, Spearow
23                      Ground, Psychic                      024, Arbok                                NULL
24                      Ground, Psychic                            NULL                          023, Ekans
25                               Ground                     026, Raichu                                NULL
26                               Ground                            NULL                        025, Pikachu
27                    Water, Grass, Ice                  028, Sandslash                                NULL
28                    Water, Grass, Ice                            NULL                      027, Sandshrew
29                      Ground, Psychic   030, 031, Nidorina, Nidoqueen                                NULL
30                      Ground, Psychic                  031, Nidoqueen                029, Nidoran(Female)
31          Water, Ice, Ground, Psychic                            NULL 029, 030, Nidoran(Female), Nidorina
32                      Ground, Psychic    033, 034, Nidorino, Nidoking                                NULL
33                      Ground, Psychic                   034, Nidoking                  032, Nidoran(Male)
34          Water, Ice, Ground, Psychic                            NULL   032, 033, Nidoran(Male), Nidorino
35                             Fighting                   036, Clefable                                NULL
36                             Fighting                            NULL                       035, Clefairy
37                  Water, Ground, Rock                  038, Ninetales                                NULL
38                  Water, Ground, Rock                            NULL                         037, Vulpix
39                             Fighting                 040, Wigglytuff                                NULL
40                             Fighting                            NULL                     039, Jigglypuff
41         Electric, Ice, Psychic, Rock                     042, Golbat                                NULL
42         Electric, Ice, Psychic, Rock                            NULL                          041, Zubat
43           Fire, Ice, Flying, Psychic      044, 045, Gloom, Vileplume                                NULL
44           Fire, Ice, Flying, Psychic                  045, Vileplume                         043, Oddish
45           Fire, Ice, Flying, Psychic                            NULL             043, 044, Oddish, Gloom
46 Fire, Ice, Poison, Flying, Bug, Rock                   047, Parasect                                NULL
47 Fire, Ice, Poison, Flying, Bug, Rock                            NULL                          046, Paras
48          Fire, Flying, Psychic, Rock                   049, Venomoth                                NULL
49          Fire, Flying, Psychic, Rock                            NULL                        048, Venonat
50                    Water, Grass, Ice                    051, Dugtrio                                NULL
51                    Water, Grass, Ice                            NULL                        050, Diglett
52                             Fighting                    053, Persian                                NULL
53                             Fighting                            NULL                         052, Meowth
54                      Electric, Grass                    055, Golduck                                NULL
55                      Electric, Grass                            NULL                        054, Psyduck
56               Flying, Psychic, Fairy                   057, Primeape                                NULL
57               Flying, Psychic, Fairy                            NULL                         056, Mankey
58                  Water, Ground, Rock                   059, Arcanine                                NULL
 [ reached 'max' / getOption("max.print") -- omitted 93 rows ]

Data frame structure from JSON

dim(jsonData$pokemon)
[1] 151  17
class(jsonData$pokemon)
[1] "data.frame"
jsonData$pokemon %>% filter(type == "Fire") %>% select(!(img))
    id num       name type height   weight            candy candy_count         egg spawn_chance avg_spawns
1    4 004 Charmander Fire 0.61 m   8.5 kg Charmander Candy          25        2 km       0.2530      25.30
2    5 005 Charmeleon Fire 1.09 m  19.0 kg Charmander Candy         100 Not in Eggs       0.0120       1.20
3   37 037     Vulpix Fire 0.61 m   9.9 kg     Vulpix Candy          50        5 km       0.2200      22.00
4   38 038  Ninetales Fire 1.09 m  19.9 kg     Vulpix Candy          NA Not in Eggs       0.0077       0.77
5   58 058  Growlithe Fire 0.71 m  19.0 kg  Growlithe Candy          50        5 km       0.9200      92.00
6   59 059   Arcanine Fire 1.91 m 155.0 kg  Growlithe Candy          NA Not in Eggs       0.0170       1.70
7   77 077     Ponyta Fire 0.99 m  30.0 kg     Ponyta Candy          50        5 km       0.5100      51.00
8   78 078   Rapidash Fire 1.70 m  95.0 kg     Ponyta Candy          NA Not in Eggs       0.0110       1.10
9  126 126     Magmar Fire 1.30 m  44.5 kg             None          NA       10 km       0.1000      10.00
10 136 136    Flareon Fire 0.89 m  25.0 kg      Eevee Candy          NA Not in Eggs       0.0170       1.70
   spawn_time multipliers          weaknesses                  next_evolution  prev_evolution
1       08:45        1.65 Water, Ground, Rock 005, 006, Charmeleon, Charizard            NULL
2       19:00        1.79 Water, Ground, Rock                  006, Charizard 004, Charmander
3       13:43  2.74, 2.81 Water, Ground, Rock                  038, Ninetales            NULL
4       01:32        NULL Water, Ground, Rock                            NULL     037, Vulpix
5       03:57  2.31, 2.36 Water, Ground, Rock                   059, Arcanine            NULL
6       03:11        NULL Water, Ground, Rock                            NULL  058, Growlithe
7       02:50  1.48, 1.50 Water, Ground, Rock                   078, Rapidash            NULL
8       04:00        NULL Water, Ground, Rock                            NULL     077, Ponyta
9       20:36        NULL Water, Ground, Rock                            NULL            NULL
10      07:02        NULL Water, Ground, Rock                            NULL      133, Eevee

Going deeper..

class(jsonData$pokemon$type) # Can be lists
[1] "list"
jsonData$pokemon$type
[[1]]
[1] "Grass"  "Poison"

[[2]]
[1] "Grass"  "Poison"

[[3]]
[1] "Grass"  "Poison"

[[4]]
[1] "Fire"

[[5]]
[1] "Fire"

[[6]]
[1] "Fire"   "Flying"

[[7]]
[1] "Water"

[[8]]
[1] "Water"

[[9]]
[1] "Water"

[[10]]
[1] "Bug"

[[11]]
[1] "Bug"

[[12]]
[1] "Bug"    "Flying"

[[13]]
[1] "Bug"    "Poison"

[[14]]
[1] "Bug"    "Poison"

[[15]]
[1] "Bug"    "Poison"

[[16]]
[1] "Normal" "Flying"

[[17]]
[1] "Normal" "Flying"

[[18]]
[1] "Normal" "Flying"

[[19]]
[1] "Normal"

[[20]]
[1] "Normal"

[[21]]
[1] "Normal" "Flying"

[[22]]
[1] "Normal" "Flying"

[[23]]
[1] "Poison"

[[24]]
[1] "Poison"

[[25]]
[1] "Electric"

[[26]]
[1] "Electric"

[[27]]
[1] "Ground"

[[28]]
[1] "Ground"

[[29]]
[1] "Poison"

[[30]]
[1] "Poison"

[[31]]
[1] "Poison" "Ground"

[[32]]
[1] "Poison"

[[33]]
[1] "Poison"

[[34]]
[1] "Poison" "Ground"

[[35]]
[1] "Normal"

[[36]]
[1] "Normal"

[[37]]
[1] "Fire"

[[38]]
[1] "Fire"

[[39]]
[1] "Normal"

[[40]]
[1] "Normal"

[[41]]
[1] "Poison" "Flying"

[[42]]
[1] "Poison" "Flying"

[[43]]
[1] "Grass"  "Poison"

[[44]]
[1] "Grass"  "Poison"

[[45]]
[1] "Grass"  "Poison"

[[46]]
[1] "Bug"   "Grass"

[[47]]
[1] "Bug"   "Grass"

[[48]]
[1] "Bug"    "Poison"

[[49]]
[1] "Bug"    "Poison"

[[50]]
[1] "Ground"

[[51]]
[1] "Ground"

[[52]]
[1] "Normal"

[[53]]
[1] "Normal"

[[54]]
[1] "Water"

[[55]]
[1] "Water"

[[56]]
[1] "Fighting"

[[57]]
[1] "Fighting"

[[58]]
[1] "Fire"

[[59]]
[1] "Fire"

[[60]]
[1] "Water"

[[61]]
[1] "Water"

[[62]]
[1] "Water"    "Fighting"

[[63]]
[1] "Psychic"

[[64]]
[1] "Psychic"

[[65]]
[1] "Psychic"

[[66]]
[1] "Fighting"

[[67]]
[1] "Fighting"

[[68]]
[1] "Fighting"

[[69]]
[1] "Grass"  "Poison"

[[70]]
[1] "Grass"  "Poison"

[[71]]
[1] "Grass"  "Poison"

[[72]]
[1] "Water"  "Poison"

[[73]]
[1] "Water"  "Poison"

[[74]]
[1] "Rock"   "Ground"

[[75]]
[1] "Rock"   "Ground"

[[76]]
[1] "Rock"   "Ground"

[[77]]
[1] "Fire"

[[78]]
[1] "Fire"

[[79]]
[1] "Water"   "Psychic"

[[80]]
[1] "Water"   "Psychic"

[[81]]
[1] "Electric"

[[82]]
[1] "Electric"

[[83]]
[1] "Normal" "Flying"

[[84]]
[1] "Normal" "Flying"

[[85]]
[1] "Normal" "Flying"

[[86]]
[1] "Water"

[[87]]
[1] "Water" "Ice"  

[[88]]
[1] "Poison"

[[89]]
[1] "Poison"

[[90]]
[1] "Water"

[[91]]
[1] "Water" "Ice"  

[[92]]
[1] "Ghost"  "Poison"

[[93]]
[1] "Ghost"  "Poison"

[[94]]
[1] "Ghost"  "Poison"

[[95]]
[1] "Rock"   "Ground"

[[96]]
[1] "Psychic"

[[97]]
[1] "Psychic"

[[98]]
[1] "Water"

[[99]]
[1] "Water"

[[100]]
[1] "Electric"

[[101]]
[1] "Electric"

[[102]]
[1] "Grass"   "Psychic"

[[103]]
[1] "Grass"   "Psychic"

[[104]]
[1] "Ground"

[[105]]
[1] "Ground"

[[106]]
[1] "Fighting"

[[107]]
[1] "Fighting"

[[108]]
[1] "Normal"

[[109]]
[1] "Poison"

[[110]]
[1] "Poison"

[[111]]
[1] "Ground" "Rock"  

[[112]]
[1] "Ground" "Rock"  

[[113]]
[1] "Normal"

[[114]]
[1] "Grass"

[[115]]
[1] "Normal"

[[116]]
[1] "Water"

[[117]]
[1] "Water"

[[118]]
[1] "Water"

[[119]]
[1] "Water"

[[120]]
[1] "Water"

[[121]]
[1] "Water"   "Psychic"

[[122]]
[1] "Psychic"

[[123]]
[1] "Bug"    "Flying"

[[124]]
[1] "Ice"     "Psychic"

[[125]]
[1] "Electric"

[[126]]
[1] "Fire"

[[127]]
[1] "Bug"

[[128]]
[1] "Normal"

[[129]]
[1] "Water"

[[130]]
[1] "Water"  "Flying"

[[131]]
[1] "Water" "Ice"  

[[132]]
[1] "Normal"

[[133]]
[1] "Normal"

[[134]]
[1] "Water"

[[135]]
[1] "Electric"

[[136]]
[1] "Fire"

[[137]]
[1] "Normal"

[[138]]
[1] "Rock"  "Water"

[[139]]
[1] "Rock"  "Water"

[[140]]
[1] "Rock"  "Water"

[[141]]
[1] "Rock"  "Water"

[[142]]
[1] "Rock"   "Flying"

[[143]]
[1] "Normal"

[[144]]
[1] "Ice"    "Flying"

[[145]]
[1] "Electric" "Flying"  

[[146]]
[1] "Fire"   "Flying"

[[147]]
[1] "Dragon"

[[148]]
[1] "Dragon"

[[149]]
[1] "Dragon" "Flying"

[[150]]
[1] "Psychic"

[[151]]
[1] "Psychic"

Data frame structure from JSON

class(jsonData$pokemon$next_evolution[[1]]) # Or lists of data.frames!
[1] "data.frame"
jsonData$pokemon$next_evolution
[[1]]
  num     name
1 002  Ivysaur
2 003 Venusaur

[[2]]
  num     name
1 003 Venusaur

[[3]]
NULL

[[4]]
  num       name
1 005 Charmeleon
2 006  Charizard

[[5]]
  num      name
1 006 Charizard

[[6]]
NULL

[[7]]
  num      name
1 008 Wartortle
2 009 Blastoise

[[8]]
  num      name
1 009 Blastoise

[[9]]
NULL

[[10]]
  num       name
1 011    Metapod
2 012 Butterfree

[[11]]
  num       name
1 012 Butterfree

[[12]]
NULL

[[13]]
  num     name
1 014   Kakuna
2 015 Beedrill

[[14]]
  num     name
1 015 Beedrill

[[15]]
NULL

[[16]]
  num      name
1 017 Pidgeotto
2 018   Pidgeot

[[17]]
  num    name
1 018 Pidgeot

[[18]]
NULL

[[19]]
  num     name
1 020 Raticate

[[20]]
NULL

[[21]]
  num   name
1 022 Fearow

[[22]]
NULL

[[23]]
  num  name
1 024 Arbok

[[24]]
NULL

[[25]]
  num   name
1 026 Raichu

[[26]]
NULL

[[27]]
  num      name
1 028 Sandslash

[[28]]
NULL

[[29]]
  num      name
1 030  Nidorina
2 031 Nidoqueen

[[30]]
  num      name
1 031 Nidoqueen

[[31]]
NULL

[[32]]
  num     name
1 033 Nidorino
2 034 Nidoking

[[33]]
  num     name
1 034 Nidoking

[[34]]
NULL

[[35]]
  num     name
1 036 Clefable

[[36]]
NULL

[[37]]
  num      name
1 038 Ninetales

[[38]]
NULL

[[39]]
  num       name
1 040 Wigglytuff

[[40]]
NULL

[[41]]
  num   name
1 042 Golbat

[[42]]
NULL

[[43]]
  num      name
1 044     Gloom
2 045 Vileplume

[[44]]
  num      name
1 045 Vileplume

[[45]]
NULL

[[46]]
  num     name
1 047 Parasect

[[47]]
NULL

[[48]]
  num     name
1 049 Venomoth

[[49]]
NULL

[[50]]
  num    name
1 051 Dugtrio

[[51]]
NULL

[[52]]
  num    name
1 053 Persian

[[53]]
NULL

[[54]]
  num    name
1 055 Golduck

[[55]]
NULL

[[56]]
  num     name
1 057 Primeape

[[57]]
NULL

[[58]]
  num     name
1 059 Arcanine

[[59]]
NULL

[[60]]
  num      name
1 061 Poliwhirl
2 062 Poliwrath

[[61]]
  num      name
1 062 Poliwrath

[[62]]
NULL

[[63]]
  num     name
1 064  Kadabra
2 065 Alakazam

[[64]]
  num     name
1 065 Alakazam

[[65]]
NULL

[[66]]
  num    name
1 067 Machoke
2 068 Machamp

[[67]]
  num    name
1 068 Machamp

[[68]]
NULL

[[69]]
  num       name
1 070 Weepinbell
2 071 Victreebel

[[70]]
  num       name
1 071 Victreebel

[[71]]
NULL

[[72]]
  num       name
1 073 Tentacruel

[[73]]
NULL

[[74]]
  num     name
1 075 Graveler
2 076    Golem

[[75]]
  num  name
1 076 Golem

[[76]]
NULL

[[77]]
  num     name
1 078 Rapidash

[[78]]
NULL

[[79]]
  num    name
1 080 Slowbro

[[80]]
NULL

[[81]]
  num     name
1 082 Magneton

[[82]]
NULL

[[83]]
NULL

[[84]]
  num   name
1 085 Dodrio

[[85]]
NULL

[[86]]
  num    name
1 087 Dewgong

[[87]]
NULL

[[88]]
  num name
1 089  Muk

[[89]]
NULL

[[90]]
  num     name
1 091 Cloyster

[[91]]
NULL

[[92]]
  num    name
1 093 Haunter
2 094  Gengar

[[93]]
  num   name
1 094 Gengar

[[94]]
NULL

[[95]]
NULL

[[96]]
  num  name
1 097 Hypno

[[97]]
NULL

[[98]]
  num    name
1 099 Kingler

[[99]]
NULL

[[100]]
  num      name
1 101 Electrode

[[101]]
NULL

[[102]]
  num      name
1 103 Exeggutor

[[103]]
NULL

[[104]]
  num    name
1 105 Marowak

[[105]]
NULL

[[106]]
NULL

[[107]]
NULL

[[108]]
NULL

[[109]]
  num    name
1 110 Weezing

[[110]]
NULL

[[111]]
  num   name
1 112 Rhydon

[[112]]
NULL

[[113]]
NULL

[[114]]
NULL

[[115]]
NULL

[[116]]
  num   name
1 117 Seadra

[[117]]
NULL

[[118]]
  num    name
1 119 Seaking

[[119]]
NULL

[[120]]
  num    name
1 121 Starmie

[[121]]
NULL

[[122]]
NULL

[[123]]
NULL

[[124]]
NULL

[[125]]
NULL

[[126]]
NULL

[[127]]
NULL

[[128]]
NULL

[[129]]
  num     name
1 130 Gyarados

[[130]]
NULL

[[131]]
NULL

[[132]]
NULL

[[133]]
  num     name
1 134 Vaporeon
2 135  Jolteon
3 136  Flareon

[[134]]
NULL

[[135]]
NULL

[[136]]
NULL

[[137]]
NULL

[[138]]
  num    name
1 139 Omastar

[[139]]
NULL

[[140]]
  num     name
1 141 Kabutops

[[141]]
NULL

[[142]]
NULL

[[143]]
NULL

[[144]]
NULL

[[145]]
NULL

[[146]]
NULL

[[147]]
  num      name
1 148 Dragonair
2 149 Dragonite

[[148]]
  num      name
1 149 Dragonite

[[149]]
NULL

[[150]]
NULL

[[151]]
NULL

Lab

Extra Slides: Web Scraping and APIs

This is data

View the source

What the computer sees

Ways to see the source

Inspect element

Copy XPath

Use SelectorGadget

rvest package

recount_url <- "http://bowtie-bio.sourceforge.net/recount/"
# install.packages("rvest")
library(rvest)
htmlfile <- read_html(recount_url)

nds <- html_nodes(htmlfile, xpath = '//*[@id="recounttab"]/table')
dat <- html_table(nds)
dat <- as.data.frame(dat)
head(dat)
       X1                                         X2      X3                               X4
1   Study                                       PMID Species  Number of biological replicates
2 bodymap not published, but publicly available here   human                               19
3  cheung                                   20856902   human                               41
4    core                                   19056941   human                                2
5   gilad                                   20009012   human                                6
6    maqc                                   20167110   human 14 (technical)**  2 (biological)
                                X5              X6              X7              X8
1 Number of uniquely aligned reads   ExpressionSet     Count table Phenotype table
2                    2,197,622,796            link            link            link
3                      834,584,950            link            link            link
4                        8,670,342            link            link            link
5                       41,356,738            link            link            link
6                       71,970,164 original pooled original pooled original pooled
                                               X9
1                                           Notes
2 Illumina Human BodyMap 2.0 -- tissue comparison
3                                    HapMap - CEU
4                                lung fibroblasts
5                        liver; males and femlaes
6                              experiment: MAQC-2

Little cleanup

colnames(dat) <- as.character(dat[1,])
dat <- dat[-1,]
head(dat)
       Study                                       PMID Species  Number of biological replicates
2    bodymap not published, but publicly available here   human                               19
3     cheung                                   20856902   human                               41
4       core                                   19056941   human                                2
5      gilad                                   20009012   human                                6
6       maqc                                   20167110   human 14 (technical)**  2 (biological)
7 montgomery                                   20220756   human                               60
  Number of uniquely aligned reads   ExpressionSet     Count table Phenotype table
2                    2,197,622,796            link            link            link
3                      834,584,950            link            link            link
4                        8,670,342            link            link            link
5                       41,356,738            link            link            link
6                       71,970,164 original pooled original pooled original pooled
7                     *886,468,054            link            link            link
                                            Notes
2 Illumina Human BodyMap 2.0 -- tissue comparison
3                                    HapMap - CEU
4                                lung fibroblasts
5                        liver; males and femlaes
6                              experiment: MAQC-2
7                                    HapMap - CEU

Ethics and Web Scraping

Ethics and Web Scraping

Ethics and Web Scraping

Ethics and Web Scraping

APIs

Application Programming Interfaces

In biology too!

Step 0: Did someone do this already

Step 0: Did someone do this already

Step 1: DIY

https://github.com/ThatCopy/catAPI/wiki/Usage

#install.packages("httr")
library(httr)

# Requests a random cat fact
query_url <- "https://thatcopy.pw/catapi/rest/"

req <- GET(query_url)
content(req)
{html_document}
<html>
[1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n<title>Loading...</title>\n< ...
[2] <body><script type="text/javascript">window.location.replace('https://thatcopy.pw/catapi/rest/?ch=1&js=ey ...

Not all APIs are “open”

Not all APIs are “open”