2장 시계열 데이터의 발견과 정리 - R
시계열 데이터의 발견과 정리 중 R을 사용하는 부분의 소스코드 입니다.
install.packages("zoo")
install.packages("data.table")
library(zoo)
library(data.table)
# 41 page
require(zoo)
require(data.table)
unemp <- fread("https://raw.githubusercontent.com/PracticalTimeSeriesAnalysis/BookRepo/master/Ch02/data/UNRATE.csv")
unemp[, DATE := as.Date(DATE)]
setkey(unemp, DATE)
rand.unemp.idx <- sample(1:nrow(unemp), .1*nrow(unemp))
rand.unemp <- unemp[-rand.unemp.idx]
high.unemp.idx <- which(unemp$UNRATE > 8)
num.to.select <- .2 * length(high.unemp.idx)
high.unemp.idx <- sample(high.unemp.idx,)
bias.unemp <- unemp[-high.unemp.idx]
# 43 page
all.dates <- seq(from=unemp$DATE[1], to=tail(unemp$DATE, 1), by="months")
rand.unemp = rand.unemp[J(all.dates), roll=0]
bias.unemp = bias.unemp[J(all.dates), roll=0]
rand.unemp[, rpt := is.na(UNRATE)]
rand.unemp[, impute.ff := na.locf(UNRATE, na.rm=FALSE)]
bias.unemp[, impute.ff := na.locf(UNRATE, na.rm=FALSE)]
unemp[350:400, plot(DATE, UNRATE, col=1, lwd=2, type='b')]
rand.unemp[350:400, lines(DATE, impute.ff, col=2, lwd=2, lty=2)]
rand.unemp[350:400][rpt==TRUE, points(DATE, impute.ff, col=2, pch=6, cex=2)]
# 47 page
rand.unemp[, impute.rm.nolookahead := rollapply(c(NA, NA, UNRATE), 3,
function(x) {
if (!is.na(x[3])) x[3] else mean(x, na.rm=TRUE)
})]
bias.unemp[, impute.rm.nolookahead := rollapply(c(NA, NA, UNRATE), 3,
function(x) {
if (!is.na(x[3])) x[3] else mean(x, na.rm=TRUE)
})]
rand.unemp[, complete.rm := rollapply(c(NA, UNRATE, NA), 3,
function(x) {
if (!is.na(x[2])) x[2] else mean(x, na.rm=TRUE)
})]
# page 49
rand.unemp[, impute.li := na.approx(UNRATE)]
bias.unemp[, impute.li := na.approx(UNRATE)]
rand.unemp[, impute.sp := na.spline(UNRATE)]
bias.unemp[, impute.sp := na.spline(UNRATE)]
use.idx = 90:120
unemp[use.idx, plot(DATE, UNRATE, col = 1, type = 'b')]
rand.unemp[use.idx, lines(DATE, impute.li, col = 2, lwd = 2, lty = 2)]
rand.unemp[use.idx, lines(DATE, impute.sp, col = 3, lwd = 2, lty = 3)]
# 51 page
sort(rand.unemp[ , lapply(.SD, function(x) mean((x - unemp$UNRATE)^2, na.rm = TRUE)),
.SDcols = c("impute.ff", "impute.rm.nolookahead", "impute.li", "impute.sp")])
sort(bias.unemp[ , lapply(.SD, function(x) mean((x - unemp$UNRATE)^2, na.rm = TRUE)),
.SDcols = c("impute.ff", "impute.rm.nolookahead", "impute.li", "impute.sp")])
# 53 page
unemp[seq.int(from=1, to=nrow(unemp), by=12)]
unemp[, mean(UNRATE), by=format(DATE, "%Y")]
# 54 page
daily.unemployment = unemp[J(all.dates), roll=31]
daily.unemployment
# 62 page
plot(stl(AirPassengers, "periodic"))