Sunday, February 23, 2014

Unemployment revisited

Approximately a year ago I made a post graphing unemployment in Europe and other locations. I have always wanted to do this again, not because the R-code would be so interesting, but just because I wanted to see the plots. As time progressed I attempted not to do this in R, but in Julia. I could not get it good enough in Julia, so this is, alas, the R version.

Data

Data from Eurostat. Or, if you are lazy, Google une_rt_m, which is the name of the table. There is a bit of pre-processing of the data, mostly getting names of countries decent for plotting. The plots shown are unemployment and its first derivative, both smoothed.

Plots

Smoothed data

First derivative


Code

library(ggplot2)
library(KernSmooth)
library(plyr)
library(scales) # to access breaks/formatting functions

r1 <- read.csv("une_rt_m_1_Data.csv",na.strings=':')
levels(r1$GEO) <- sub(' countries)',')' ,levels(r1$GEO),fixed=TRUE)
levels(r1$GEO) <- sub('European Union','EU' ,levels(r1$GEO))
levels(r1$GEO)[levels(r1$GEO)=='Euro area (EA11-2000, EA12-2006, EA13-2007, EA15-2008, EA16-2010, EA17-2013, EA18)'] <- "EAll"
levels(r1$GEO)[levels(r1$GEO)=='United Kingdom'] <- 'UK'
levels(r1$GEO)[levels(r1$GEO)=='United States'] <- 'US'
levels(r1$GEO)[levels(r1$GEO)=='Germany (until 1990 former territory of the FRG)'] <- 'Germany'
levels(r1$GEO)
grep('12|13|15|16|17|25|27',x=levels(r1$GEO),value=TRUE)
r1 <- r1[!(r1$GEO %in% grep('12|13|15|16|17|25|27',x=levels(r1$GEO),value=TRUE)),]
r1$GEO <- factor(r1$GEO)
r1$Age <- factor(r1$AGE,levels=levels(r1$AGE)[c(2,1,3)])
r1$Date <- as.Date(paste(gsub('M','-',as.character(r1$TIME)),'-01',sep=''))

#
maxi <- aggregate(r1$Value,by=list(GEO=r1$GEO),FUN=max,na.rm=TRUE)
parts <- data.frame(
    low = maxi$GEO[maxi$x<quantile(maxi$x,1/3)]
    ,middle = maxi$GEO[maxi$x>quantile(maxi$x,1/3) & maxi$x<quantile(maxi$x,2/3)]
    ,high = maxi$GEO[maxi$x>quantile(maxi$x,2/3)]
)
#ggplot(r1[r1$GEO %in% low,],aes(x=Date,y=Value,colour=Age)) +
#        facet_wrap( ~ GEO, drop=TRUE) +
#        geom_line()  +
#        theme(legend.position = "bottom")
#        ylab('% Unemployment') + xlab('Year')


r1$class <- interaction(r1$GEO,r1$Age)
r3 <- r1[complete.cases(r1),]
r3$class <- factor(r3$class)
Perc <- ddply(.data=r3,.variables=.(class),
    function(piece,...) {
        lp <- locpoly(x=as.numeric(piece$Date),y=piece$Value,
            drv=0,bandwidth=90)
        sdf <- data.frame(Date=as.Date(lp$x,origin='1970-01-01'),
            sPerc=lp$y,Age=piece$Age[1],GEO=piece$GEO[1])}
    ,.inform=FALSE
)
for (i in c('low','middle','high')) {
    png(paste(i,'.png',sep=''))
    print(
        ggplot(Perc[Perc$GEO %in% parts[,i] ,],
                aes(x=Date,y=sPerc,colour=Age)) +
            facet_wrap( ~ GEO, drop=TRUE) +
            geom_line()  +
            theme(legend.position = "bottom")+
            ylab('% Unemployment') + xlab('Year') +
            scale_x_date(breaks = date_breaks("5 years"),
                labels = date_format("%y"))
    )
    dev.off()
}

dPerc <- ddply(.data=r3,.variables=.(class),
    function(piece,...) {
        lp <- locpoly(x=as.numeric(piece$Date),y=piece$Value,
            drv=1,bandwidth=365/2)
        sdf <- data.frame(Date=as.Date(lp$x,origin='1970-01-01'),
            dPerc=lp$y,Age=piece$Age[1],GEO=piece$GEO[1])}
    ,.inform=FALSE
)

for (i in c('low','middle','high')) {
    png(paste('d',i,'.png',sep=''))
    print(
        ggplot(dPerc[dPerc$GEO %in% parts[,i] ,],
                aes(x=Date,y=dPerc,colour=Age)) +
            facet_wrap( ~ GEO, drop=TRUE) +
            geom_line()  +
            theme(legend.position = "bottom")+
            ylab('Change in % Unemployment') + xlab('Year')+
            scale_x_date(breaks = date_breaks("5 years"),
                labels = date_format("%y"))
    )
    dev.off()
}

No comments:

Post a Comment