For my first experience with STAN I wanted to convert my last JAGS program into STAN. This was a bit more difficult than I expected. The JAGS program was

Data has been explained before. The only difference is that by now I am reading the data using read.xls from the gdata package. The new code is at the bottom of this post.

Initially I thought the conversion would be simple. I made some simplification and the code worked. However, it appeared my initial code was not very fast. Then I added the out of range data and that resulted in a problem seen before, out of range initialized data and hence no MCMC samples. After muddying along for a bit my second step was to make a simple regression model and get to know STAN a bit better. This knowledge set in a simple model. Extended it till the model had the features which I wanted except the out of range data. To add the out of range data I decided to initialize the more complex model using estimates from a more simple model. This worked well, the first code also serves as a kind of warm-up too. There are some warnings the way I set it up, but that is only in the first model, which is only run for a few cycles. Final running time was a disappointing close to two hours. This could probably be reduced a bit using less but longer chains.

####
Reading data

# http://www.lml.rivm.nl/data/gevalideerd/data/lmre_1992-2005.xls

# http://www.r-bloggers.com/read-excel-files-from-r/

library(gdata)

readsheet <- function(cursheet,fname) {

df = read.xls(fname,

sheet = cursheet ,

blank.lines.skip=FALSE,

colClasses = "character")

topline <- 8

test <- which(df[6,]=='C')+1

numin <- df[topline:nrow(df),test]

names(numin) <- make.names( gsub(' ','',df[6,test]))

for(i in 1:ncol(numin)) numin[,i] <- as.numeric(gsub(',','.',numin[,i]))

status = df[topline:nrow(df),test-1]

names(status) <- paste('C',make.names( gsub(' ','',df[6,test])),sep='')

numin$StartDate <- as.Date(substr(df[topline:nrow(df),1],1,10))

numin$EndDate <- as.Date(substr(df[topline:nrow(df),2],1,10))

numin <- cbind(numin,status)

numin <- numin[!is.na(numin$StartDate),]

tall <- reshape(numin,

varying=list(

make.names(gsub(' ','',df[6,test])),

paste('C',

make.names(gsub(' ','',df[6,test])),sep='')),

v.names=c('Amount','Status'),

timevar='Compound',

idvar=c('StartDate','EndDate'),

times=paste(df[6,test],'(',df[5,test],')',sep=''),

direction='long')

tall$Compound <- factor(gsub(' )',')',gsub(' +',' ',tall$Compound)))

row.names(tall)<- NULL

tall$Location <- cursheet

tall

}

readfile <- function(fname) {

sheets <- sheetNames(fname)[-1]

tt <- lapply(sheets,readsheet,fname=fname)

tt2 <- do.call(rbind,tt)

tt2$Location <- factor(tt2$Location)

tt2$fname <- fname

tt2

}

fnames <- dir(pattern='*.xls') #.\\. ?

rf <- lapply(fnames,readfile)

rf2 <- do.call(rbind,rf)

rf2$machine <- factor(ifelse(rf2$fname=="lmre_1992-2005.xls",

'Van Essen/ECN vanger',

' Eigenbrodt NSA 181 KHT'))

####
Select Fe data and prepare

Fe <- rf2[rf2$Compound==levels(rf2$Compound)[9],]

Fe$LAmount <- log10(Fe$Amount)

Fe$LAmount[Fe$Amount<0.4] <- NA

Fe2 <- Fe[!is.na(Fe$Status),]

Fe3 <- Fe2[!is.na(Fe2$LAmount),]

Fe4 <- Fe2[is.na(Fe2$LAmount),]

library(rstan)

datain <- list(

LAmount = Fe3$LAmount,

Machine=(Fe3$machine=='Van Essen/ECN vanger')+1,

Location=(1:nlevels(Fe3$Location))[Fe3$Location],

time=as.numeric(Fe3$StartDate),

nobs =nrow(Fe3),

nloc=nlevels(Fe3$Location),

MachineC=(Fe4$machine=='Van Essen/ECN vanger')+1,

LocationC=(1:nlevels(Fe4$Location))[Fe4$Location],

timeC=as.numeric(Fe4$StartDate),

nobsC =nrow(Fe4),

L=log10(0.399)

)

####
First model

parameters1 <- c('intercept','rate','FLoc_r',

'sigmaF','sfmach','sfloc','FMach_r' )

Fe_code1 <- '

data {

int<lower=0> nloc;

int<lower=0> nobs;

vector[nobs] LAmount;

int<lower=1,upper=2> Machine[nobs];

int<lower=1,upper=nloc> Location[nobs];

vector[nobs] time;

int<lower=0> nobsC;

real <upper=min(LAmount)> L;

int<lower=1,upper=2> MachineC[nobsC];

int<lower=1,upper=nloc> LocationC[nobsC];

vector[nobsC] timeC;

}

parameters {

real intercept;

real rate;

vector [nloc] FLoc_r;

vector<lower=0> [2] sigmaF;

real<lower=0> sfmach;

real<lower=0> sfloc;

vector[2] FMach_r;

}

transformed parameters {

vector[nobs] ExpLAmount;

vector<lower=0>[nobs] sigma;

real mean_FLoc;

real mean_FMach;

vector[nloc] FLoc;

vector[2] FMach;

vector[nobsC] ExpLAmountC;

vector<lower=0> [nobsC] sigmaC;

vector[nobsC] LAmountC;

mean_FLoc <- mean(FLoc_r);

FLoc <- (FLoc_r-mean_FLoc)*sfloc;

mean_FMach <- mean(FMach_r);

FMach <- (FMach_r-mean_FMach)*sfmach;

for (i in 1:nobs) {

ExpLAmount[i] <- intercept

+ .0001*rate*time[i]

+ FMach[Machine[i]] +

+ FLoc[Location[i]];

sigma[i] <- sigmaF[Machine[i]];

}

for (i in 1:nobsC) {

ExpLAmountC[i] <- intercept

+ .0001*rate*timeC[i]

+ FMach[MachineC[i]] +

+ FLoc[LocationC[i]];

sigmaC[i] <- sigmaF[MachineC[i]];

LAmountC[i] <- log10(.2);

}

}

model {

sfmach ~ cauchy(0,4);

sfloc ~ cauchy(0,4);

FMach_r ~ normal(0,1);

FLoc_r ~ normal(0,1);

sigmaF ~ cauchy(0,4);

LAmount ~ normal(ExpLAmount,sigma);

LAmountC ~ normal(ExpLAmountC,sigmaC);

rate ~ normal(0,1);

}

generated quantities {

real dailydec;

real yearlydec;

dailydec <- pow(10,rate*.0001);

yearlydec <- pow(dailydec,365);

}

'

fit1 <- stan(model_code = Fe_code1,

data = datain,

pars=parameters1,

warmup=50,

iter = 75,

chains = 4)

fit1

####
Second model

samples <- as.array(fit1)

lastsample <- dim(samples)[1]

nchain <- dim(samples)[2]

init2 <- lapply(1:nchain,function(i) {

fin <- samples[lastsample,i,]

list(

intercept=fin[1],

rate=fin[2],

FLoc_r=fin[3:(3+16)],

sigmaF=fin[20:21],

sfmach=fin[22],

sfloc=fin[23],

FMach_r=fin[24:25],

LAmountC=rep(log10(.2),datain$nobsC)

)

})

parameters2 <- c('intercept','yearlydec',

'sigmaF','FMach','sfmach','sfloc',

'FLoc' ,'rate' )

Fe_code2 <- '

data {

int<lower=0> nloc;

int<lower=0> nobs;

vector[nobs] LAmount;

int<lower=1,upper=2> Machine[nobs];

int<lower=1,upper=nloc> Location[nobs];

vector[nobs] time;

int<lower=0> nobsC;

real <upper=min(LAmount)> L;

int<lower=1,upper=2> MachineC[nobsC];

int<lower=1,upper=nloc> LocationC[nobsC];

vector[nobsC] timeC;

}

parameters {

real intercept;

real rate;

vector [nloc] FLoc_r;

vector<lower=0> [2] sigmaF;

real<lower=0> sfmach;

real<lower=0> sfloc;

vector[2] FMach_r;

vector<upper=L>[nobsC] LAmountC;

}

transformed parameters {

vector[nobs] ExpLAmount;

vector<lower=0>[nobs] sigma;

real mean_FLoc;

real mean_FMach;

vector[nloc] FLoc;

vector[2] FMach;

vector[nobsC] ExpLAmountC;

vector<lower=0> [nobsC] sigmaC;

mean_FLoc <- mean(FLoc_r);

FLoc <- (FLoc_r-mean_FLoc)*sfloc;

mean_FMach <- mean(FMach_r);

FMach <- (FMach_r-mean_FMach)*sfmach;

for (i in 1:nobs) {

ExpLAmount[i] <- intercept

+ .0001*rate*time[i]

+ FMach[Machine[i]] +

+ FLoc[Location[i]];

sigma[i] <- sigmaF[Machine[i]];

}

for (i in 1:nobsC) {

ExpLAmountC[i] <- intercept

+ .0001*rate*timeC[i]

+ FMach[MachineC[i]] +

+ FLoc[LocationC[i]];

sigmaC[i] <- sigmaF[MachineC[i]];

}

}

model {

sfmach ~ cauchy(0,4);

sfloc ~ cauchy(0,4);

FMach_r ~ normal(0,1);

FLoc_r ~ normal(0,1);

sigmaF ~ cauchy(0,4);

LAmount ~ normal(ExpLAmount,sigma);

LAmountC ~ normal(ExpLAmountC,sigmaC);

rate ~ normal(0,1);

}

generated quantities {

real dailydec;

real yearlydec;

dailydec <- pow(10,rate*.0001);

yearlydec <- pow(dailydec,365);

}

'

fit2 <- stan(model_code = Fe_code2,

data = datain,

pars=parameters2,

init=init2,

warmup=100,

iter = 500,

chains = 4)

fit2