Circulator Lanes Dataset: the data is from https://data.baltimorecity.gov/Transportation/Charm-City-Circulator-Ridership/wwvu-583r
Available on: https://sisbid.github.io/Data-Wrangling/data/Charm_City_Circulator_Ridership.csv
library(tidyverse)
circ <- read_csv("https://sisbid.github.io/Data-Wrangling/data/Charm_City_Circulator_Ridership.csv")
nrow(circ)
[1] 1146
dim(circ)
[1] 1146 15
circ %>%
nrow()
[1] 1146
greenBoardings
column)?sum(circ$greenBoardings, na.rm = TRUE)
[1] 935564
circ %>% pull(greenBoardings) %>% sum(na.rm = TRUE)
[1] 935564
count(circ, wt = greenBoardings)
# A tibble: 1 × 1
n
<dbl>
1 935564
daily
column)? Use is.na()
and sum()
.daily <- circ %>% pull(daily)
sum(is.na(daily))
[1] 124
# Can also
circ %>%
count(is.na(daily))
# A tibble: 2 × 2
`is.na(daily)` n
<lgl> <int>
1 FALSE 1022
2 TRUE 124
day
). Next, find the
mean daily ridership (daily
column) and the sample size.
(hint: use group_by
and summarize
functions)circ %>%
group_by(day) %>%
summarise(mean = mean(daily, na.rm = TRUE),
n = n())
# A tibble: 7 × 3
day mean n
<chr> <dbl> <int>
1 Friday 8961. 164
2 Monday 7340. 164
3 Saturday 6743. 163
4 Sunday 4531. 163
5 Thursday 7639. 164
6 Tuesday 7642. 164
7 Wednesday 7779. 164
orangeBoardings
(use
median()
).circ %>%
summarise(median = median(orangeBoardings, na.rm = TRUE))
# A tibble: 1 × 1
median
<dbl>
1 3074
# OR
circ %>% pull(orangeBoardings) %>% median(na.rm = TRUE)
[1] 3074
orangeBoardings
(use
median()
), but this time stratify by day of the week.circ %>%
group_by(day) %>%
summarise(median = median(orangeBoardings, na.rm = TRUE))
# A tibble: 7 × 2
day median
<chr> <dbl>
1 Friday 4014.
2 Monday 3336
3 Saturday 2963
4 Sunday 1900
5 Thursday 3485
6 Tuesday 3484
7 Wednesday 3576