library(readr)
  1. Download the UFO Dataset as a text file: http://sisbid.github.io/Module1/data/ufo/ufo_data_complete.txt and read in the text file as a new R object. Confirm the number of rows is the same as the CSV-based object
ufo2 = read_delim("http://sisbid.github.io/Module1/data/ufo/ufo_data_complete.txt",
                    delim = "\t")
## Warning: Missing column names filled in: 'X12' [12]
## Parsed with column specification:
## cols(
##   datetime = col_character(),
##   city = col_character(),
##   state = col_character(),
##   country = col_character(),
##   shape = col_character(),
##   `duration (seconds)` = col_integer(),
##   `duration (hours/min)` = col_character(),
##   comments = col_character(),
##   `date posted` = col_character(),
##   latitude = col_character(),
##   longitude = col_double(),
##   X12 = col_integer()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 270 parsing failures.
## row # A tibble: 5 x 5 col     row col                expected               actual file              expected   <int> <chr>              <chr>                  <chr>  <chr>             actual 1  1750 duration (seconds) no trailing characters .5     'http://sisbid.g~ file 2  1804 duration (seconds) no trailing characters .5     'http://sisbid.g~ row 3  1811 duration (seconds) no trailing characters .1     'http://sisbid.g~ col 4  1840 duration (seconds) no trailing characters .5     'http://sisbid.g~ expected 5  2231 duration (seconds) no trailing characters .05    'http://sisbid.g~
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
  1. Rename columns to ensure they have no spaces or non-alphanumeric characters.
colnames(ufo2) = c("DateTime", "City", "State", "Country", "Shape",
                   "Duration_sec","Duration_hourMin","Comments",
                   "PostedDate", "Latitude","Longitude")
  1. Use the lubridate package to change the class of the $datetime column to the date class.
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
ufo2$PostedDate = mdy(ufo2$PostedDate)
## Warning: 194 failed to parse.
ufo2$DateTime = mdy_hm(ufo2$DateTime)
## Warning: 1262 failed to parse.
  1. Capitalize the states in the $state column
ufo2$State= toupper(ufo2$State)
head(ufo2$State)
## [1] "TX" "TX" NA   "TX" "HI" "TN"
  1. Coerce the longitude and lattitude columns to the numeric class
ufo2$Latitude = as.numeric(ufo2$Latitude)
## Warning: NAs introduced by coercion
ufo2$Longitude = as.numeric(ufo2$Longitude)
  1. Write out this updated data.frame as a new CSV file
write.csv(ufo2, file="new_ufo_data.csv",row.names=FALSE)
  1. Download the Excel-formatted UFO Dataset: http://sisbid.github.io/Module1/data/ufo/ufo_data_complete.xlsx and import the dataset as a new R object. Confirm it has the same number of lines as the CSV- and Tab-delimited-based R objects
library(readxl)
ufo_excel = read_excel("../data/ufo/ufo_data_complete.xlsx")
ufo_excel = read_excel("../data/ufo/ufo_data_complete.xlsx",
                        col_type="text")