install.packages("zoo")
install.packages("data.table")

library(zoo)
library(data.table)
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


# 41 page
require(zoo)
require(data.table)

unemp <- fread("https://raw.githubusercontent.com/PracticalTimeSeriesAnalysis/BookRepo/master/Ch02/data/UNRATE.csv")
unemp[, DATE := as.Date(DATE)]
setkey(unemp, DATE)

rand.unemp.idx <- sample(1:nrow(unemp), .1*nrow(unemp))
rand.unemp <- unemp[-rand.unemp.idx]

high.unemp.idx <- which(unemp$UNRATE > 8)
num.to.select <- .2 * length(high.unemp.idx)
high.unemp.idx <- sample(high.unemp.idx,)
bias.unemp <- unemp[-high.unemp.idx]
# 43 page
all.dates <- seq(from=unemp$DATE[1], to=tail(unemp$DATE, 1), by="months")
rand.unemp = rand.unemp[J(all.dates), roll=0]
bias.unemp = bias.unemp[J(all.dates), roll=0]
rand.unemp[, rpt := is.na(UNRATE)]
rand.unemp[, impute.ff := na.locf(UNRATE, na.rm=FALSE)]
bias.unemp[, impute.ff := na.locf(UNRATE, na.rm=FALSE)]

unemp[350:400, plot(DATE, UNRATE, col=1, lwd=2, type='b')]
rand.unemp[350:400, lines(DATE, impute.ff, col=2, lwd=2, lty=2)]
rand.unemp[350:400][rpt==TRUE, points(DATE, impute.ff, col=2, pch=6, cex=2)]
NULL
NULL
NULL
# 47 page
rand.unemp[, impute.rm.nolookahead := rollapply(c(NA, NA, UNRATE), 3, 
            function(x) {
              if (!is.na(x[3])) x[3] else mean(x, na.rm=TRUE)
            })]
bias.unemp[, impute.rm.nolookahead := rollapply(c(NA, NA, UNRATE), 3, 
            function(x) {
              if (!is.na(x[3])) x[3] else mean(x, na.rm=TRUE)
            })]
rand.unemp[, complete.rm := rollapply(c(NA, UNRATE, NA), 3,
            function(x) {
              if (!is.na(x[2])) x[2] else mean(x, na.rm=TRUE)
            })]
# page 49
rand.unemp[, impute.li := na.approx(UNRATE)]
bias.unemp[, impute.li := na.approx(UNRATE)]

rand.unemp[, impute.sp := na.spline(UNRATE)]
bias.unemp[, impute.sp := na.spline(UNRATE)]

use.idx = 90:120
unemp[use.idx, plot(DATE, UNRATE, col = 1, type = 'b')]
rand.unemp[use.idx, lines(DATE, impute.li, col = 2, lwd = 2, lty = 2)]
rand.unemp[use.idx, lines(DATE, impute.sp, col = 3, lwd = 2, lty = 3)]
NULL
NULL
NULL
# 51 page
sort(rand.unemp[ , lapply(.SD, function(x) mean((x - unemp$UNRATE)^2, na.rm = TRUE)),
    .SDcols = c("impute.ff", "impute.rm.nolookahead", "impute.li", "impute.sp")])
A data.table: 1 × 4
impute.liimpute.spimpute.rm.nolookaheadimpute.ff
<dbl><dbl><dbl><dbl>
0.0025055950.0038388790.0066943130.00686019
sort(bias.unemp[ , lapply(.SD, function(x) mean((x - unemp$UNRATE)^2, na.rm = TRUE)),
    .SDcols = c("impute.ff", "impute.rm.nolookahead", "impute.li", "impute.sp")])
A data.table: 1 × 4
impute.rm.nolookaheadimpute.spimpute.liimpute.ff
<dbl><dbl><dbl><dbl>
0.0055721720.023234850.21635620.2318483
# 53 page
unemp[seq.int(from=1, to=nrow(unemp), by=12)]
A data.table: 71 × 2
DATEUNRATE
<date><dbl>
1948-01-013.4
1949-01-014.3
1950-01-016.5
1951-01-013.7
1952-01-013.2
1953-01-012.9
1954-01-014.9
1955-01-014.9
1956-01-014.0
1957-01-014.2
1958-01-015.8
1959-01-016.0
1960-01-015.2
1961-01-016.6
1962-01-015.8
1963-01-015.7
1964-01-015.6
1965-01-014.9
1966-01-014.0
1967-01-013.9
1968-01-013.7
1969-01-013.4
1970-01-013.9
1971-01-015.9
1972-01-015.8
1973-01-014.9
1974-01-015.1
1975-01-018.1
1976-01-017.9
1977-01-017.5
1989-01-015.4
1990-01-015.4
1991-01-016.4
1992-01-017.3
1993-01-017.3
1994-01-016.6
1995-01-015.6
1996-01-015.6
1997-01-015.3
1998-01-014.6
1999-01-014.3
2000-01-014.0
2001-01-014.2
2002-01-015.7
2003-01-015.8
2004-01-015.7
2005-01-015.3
2006-01-014.7
2007-01-014.6
2008-01-015.0
2009-01-017.8
2010-01-019.8
2011-01-019.1
2012-01-018.3
2013-01-018.0
2014-01-016.6
2015-01-015.7
2016-01-014.9
2017-01-014.8
2018-01-014.1
unemp[, mean(UNRATE), by=format(DATE, "%Y")]
A data.table: 71 × 2
formatV1
<chr><dbl>
19483.750000
19496.050000
19505.208333
19513.283333
19523.025000
19532.925000
19545.591667
19554.366667
19564.125000
19574.300000
19586.841667
19595.450000
19605.541667
19616.691667
19625.566667
19635.641667
19645.158333
19654.508333
19663.791667
19673.841667
19683.558333
19693.491667
19704.983333
19715.950000
19725.600000
19734.858333
19745.641667
19758.475000
19767.700000
19777.050000
19895.258333
19905.616667
19916.850000
19927.491667
19936.908333
19946.100000
19955.591667
19965.408333
19974.941667
19984.500000
19994.216667
20003.966667
20014.741667
20025.783333
20035.991667
20045.541667
20055.083333
20064.608333
20074.616667
20085.800000
20099.283333
20109.608333
20118.933333
20128.075000
20137.358333
20146.175000
20155.266667
20164.866667
20174.350000
20184.050000
# 54 page
daily.unemployment = unemp[J(all.dates), roll=31]
daily.unemployment
A data.table: 844 × 2
DATEUNRATE
<date><dbl>
1948-01-013.4
1948-02-013.8
1948-03-014.0
1948-04-013.9
1948-05-013.5
1948-06-013.6
1948-07-013.6
1948-08-013.9
1948-09-013.8
1948-10-013.7
1948-11-013.8
1948-12-014.0
1949-01-014.3
1949-02-014.7
1949-03-015.0
1949-04-015.3
1949-05-016.1
1949-06-016.2
1949-07-016.7
1949-08-016.8
1949-09-016.6
1949-10-017.9
1949-11-016.4
1949-12-016.6
1950-01-016.5
1950-02-016.4
1950-03-016.3
1950-04-015.8
1950-05-015.5
1950-06-015.4
2015-11-015.0
2015-12-015.0
2016-01-014.9
2016-02-014.9
2016-03-015.0
2016-04-015.0
2016-05-014.7
2016-06-014.9
2016-07-014.9
2016-08-014.9
2016-09-015.0
2016-10-014.9
2016-11-014.6
2016-12-014.7
2017-01-014.8
2017-02-014.7
2017-03-014.5
2017-04-014.4
2017-05-014.3
2017-06-014.3
2017-07-014.3
2017-08-014.4
2017-09-014.2
2017-10-014.1
2017-11-014.1
2017-12-014.1
2018-01-014.1
2018-02-014.1
2018-03-014.1
2018-04-013.9
# 62 page
plot(stl(AirPassengers, "periodic"))