--- title: "Aircraft Accident Patterns Over Time" author: "John Maindonald, Statistics Research Associates" date: '`r format(Sys.Date(),"%d %B %Y")`' documentclass: article classoption: b5paper fontsize: 10pt output: prettydoc::html_pretty: theme: cayman highlight: github toc: true toc_depth: 1 number_sections: false link-citations: yes pkgdown: as_is: true vignette: > %\VignetteIndexEntry{Aircraft Accident Patterns Over Time} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} bibliography: mr.bib --- ```{r setup, cache=FALSE, echo=FALSE} library(knitr) options(replace.assign=FALSE,width=72) opts_chunk$set(fig.align='center', fig.width=5.5, fig.height=4.25, par=TRUE, tidy=FALSE, comment=NA) knit_hooks$set(par=function(before, options, envir){ if (before && options$fig.show!='none') par(mar=c(4,4,2.6,.1), cex.lab=.95,cex.axis=.9,mgp=c(2,.7,0),tcl=-.3) }, crop=hook_pdfcrop) oldopt <- options(digits=4) ``` # Prepare data -- Weekly and daily counts The interest is in broad patterns over time. Weekly counts give numbers that are easier to work with. We use the function `gamclass::eventCounts()` to create counts of accidents, i) by week, and ii) by day, in both cases from January 1, 2006: ```{r eventCounts} airAccs <- gamclass::airAccs fromDate <- as.Date("2006-01-01") dfDay06 <- gamclass::eventCounts(airAccs, dateCol="Date", from= fromDate, by="1 day", prefix="num") dfDay06$day <- julian(dfDay06$Date, origin=fromDate) dfWeek06 <- gamclass::eventCounts(airAccs, dateCol="Date", from=fromDate, by="1 week", prefix="num") dfWeek06$day <- julian(dfWeek06$Date, origin=fromDate) ``` # Fits, to the daily data, and to the weekly data Figure 1 shows the fits to the weekly data: ```{r airAccsCap} cap1 <- "Figure 1: Fitted number of events (aircraft accidents) per week versus time." ``` ```{r planeCrash, out.width='60%', fig.width=6, fig.height=4, fig.cap=cap1} library(grid) library(mgcv) year <- seq(from=fromDate, to=max(dfDay06$Date), by="1 year") atyear=julian(year, origin=fromDate) dfWeek06.gam <- gam(num~s(day, k=200), data=dfWeek06, family=quasipoisson) av <- mean(predict(dfWeek06.gam)) plot(dfWeek06.gam, xaxt="n", shift=av, trans=exp, rug=FALSE, xlab="", ylab="Estimated rate per week", fg='gray') axis(1, at=atyear, labels=format(year, "%Y")) grid(lw=2,nx=NA,ny=NULL) abline(v=atyear, lwd=2, lty=3, col='lightgray') mtext(side=3, line=0.75, "Figure 1: Events per week, vs date", cex=1.25, adj=0) ``` The fitted curve and confidence band that results from modeling and plotting events per day is very similar. Code to plot events per day is: ```{r cap2} cap2 <- "Figure 2: Fitted number of events (aircraft accidents) per day versus time." ``` ```{r eventsPerDay, fig.width=6, fig.height=4, fig.cap=cap2, eval=FALSE} dfDay06.gam <- , out.width='60%', fig.width=6, fig.height=4, fig.cap=cap2} gam(formula = num ~ s(day, k=200), family = quasipoisson, data = dfDay06) av <- mean(predict(dfDay06.gam)) plot(dfDay06.gam, xaxt="n", shift=av, trans=exp, rug=FALSE, xlab="", ylab="Estimated rate per day", fg='gray') axis(1, at=atyear, labels=format(year, "%Y")) grid(lw=2,nx=NA,ny=NULL) abline(v=atyear, lwd=2, lty=3, col='lightgray') mtext(side=3, line=0.75, "A: Events per day, vs date", cex=1.25, adj=0)}