functions {
// credit http://srmart.in/informative-priors-for-correlation-matrices-an-easy-approach/
vector lower_tri(matrix mat) {
int d = rows(mat);
int lower_tri_d = d * (d - 1) / 2;
vector[lower_tri_d] lower;
int count = 1;
for(r in 2:d) {
for(c in 1:(r - 1)) {
lower[count] = mat[r,c];
count += 1;
}
}
return(lower);
}
// credit http://srmart.in/informative-priors-for-correlation-matrices-an-easy-approach/
real lkj_corr_point_lower_tri_lpdf(matrix rho, vector point_mu_lower, vector point_scale_lower) {
real lpdf = lkj_corr_lpdf(rho | 1) + normal_lpdf(lower_tri(rho) | point_mu_lower, point_scale_lower);
return(lpdf);
}
real lkj_corr_cholesky_point_lower_tri_two_lpdf(matrix cor_L, real point_mu_lower, real point_scale_lower) {
real lpdf = lkj_corr_cholesky_lpdf(cor_L | 1);
int d = rows(cor_L);
matrix[d,d] cor = multiply_lower_tri_self_transpose(cor_L);
lpdf += normal_lpdf(cor[2,1] | point_mu_lower, point_scale_lower);
return(lpdf);
}
// pH and fi at a given time at column inlet
vector gra_state(real t, vector hplcparam) {
vector[2] sol;
real tg = hplcparam[1];
real td = hplcparam[2];
real fio = hplcparam[5];
real fik = hplcparam[6];
real pHo = hplcparam[8];
real alpha1 = hplcparam[9];
real alpha2 = hplcparam[10];
real fi;
fi = fio+(fik-fio)/tg*(t-td);
if (t<td)
fi = fio;
else if (t>tg+td)
fi = fik;
sol[1]=fi;
sol[2]=pHo+alpha1*fi+alpha2*fi^2;
return sol;
}
real funlogki(vector logkwx, vector S1, vector pKaw, vector alpha, real S2, vector apH,
int nDiss, vector chargesA, vector chargesB, vector fipH) {
real logki;
vector[3] logkix;
vector[2] pHmpKa;
real fi=fipH[1];
real pH=fipH[2];
logkix=logkwx-S1*fi/(1+S2*fi)+ chargesA*apH[1]*(pH-7) + chargesB*apH[2]*(pH-7);
pHmpKa=pH-(pKaw+alpha*fi);
if (nDiss==0) {
logki = logkix[1];
}
else if (nDiss==1){
logki=logkix[1] +
log1p_exp(log(10)*(pHmpKa[1]+logkix[2]-logkix[1]))/log(10)-
log1p_exp(log(10)*(pHmpKa[1]))/log(10);
}
else if (nDiss==2){
logki = logkix[1] +
log1p_exp(log(10)*(pHmpKa[1]+logkix[2]-logkix[1]) +
log1p_exp(log(10)*(pHmpKa[2]+logkix[3]-logkix[2])))/log(10)-
log1p_exp(log(10)*(pHmpKa[1]) +
log1p_exp(log(10)*(pHmpKa[2])))/log(10);
}
return logki;
}
vector areaandslope(real time1, real time2, real invki1, real invki2) {
vector[2] cki_b;
real bo;
real cki;
if (invki2>1.001*invki1) {
bo = (log(invki2)-log(invki1))/(time2-time1);
cki = (invki2-invki1)/bo;
}
else {
bo = 0.001/(time2-time1);
cki = (time2-time1)*(invki2+invki1)/2;
}
cki_b[1] = cki;
cki_b[2] = bo;
return cki_b;
}
real chromgratrapz(int steps,
vector logkwx, vector logkmx, vector pKaw, vector alpha,
real S2, vector apH, vector chargesA, vector chargesB,
int nDiss, vector hplcparam) {
real tg = hplcparam[1];
real td = hplcparam[2];
real to = hplcparam[3];
vector[1] sol;
real time1;
real time2;
vector[2] fipH1;
vector[2] fipH2;
real logki1;
real logki2;
real invki1;
real invki2;
vector[2] cki_b;
real cumki1;
real cumki2;
real bo;
real tr;
real dt;
dt = tg/steps;
time1 = 0;
time2 = td;
fipH1 = gra_state(time1, hplcparam);
// fipH2 = fipH1;
logki1 = funlogki(logkwx, logkmx, pKaw, alpha, S2, apH, nDiss, chargesA, chargesB, fipH1);
//logki2 = logki1;
invki1 = 1/to/10^logki1;
invki2 = invki1;
cumki1 = 0;
cumki2 = td*invki1; // cumulative area
bo = 0.001/td; // slope
for(x in 1:steps){
if (cumki2>=1) continue;
time1 = time2;
time2 += dt;
// fipH1 = fipH2;
fipH2 = gra_state(time2, hplcparam);
// logki1 = logki2;
logki2 = funlogki(logkwx, logkmx, pKaw, alpha, S2, apH, nDiss, chargesA, chargesB, fipH2);
invki1 = invki2;
invki2 = 1/to/10^logki2;
cki_b = areaandslope(time1, time2, invki1, invki2);
cumki1 = cumki2;
cumki2 += cki_b[1]; // cumulative area
bo = cki_b[2]; //slope
}
if (cumki2>=1) {
tr = time1+log1p((1-cumki1)*bo/invki1)/bo;
}
else if (cumki2<1) {
tr = time2+(1-cumki2)/invki2;
}
return tr;
}
real partial_sum(int[] ind, int start, int end, vector trObs,
int[] mod, int[] steps, int[] analyte,
vector[] hplcparam, vector[] chargesA, vector[] chargesB, int[] nDiss,
vector[] logkwx1, vector dlogkT1, vector[] S1mx1, vector[] S1ax1,
vector[] pKaw1, vector[] alpham1, vector[] alphaa1,
real S2m1, real S2a1,
vector apH1,
vector sigma1,
vector[] logkwx2, vector dlogkT2, vector[] S1mx2, vector[] S1ax2,
vector[] pKaw2, vector[] alpham2, vector[] alphaa2,
real S2m2, real S2a2,
vector apH2,
vector sigma2,
int[] column) {
real lp = 0;
real y_hat;
for(z in start:end){
if(column[z]==1){
if (mod[z]==1) {
y_hat = chromgratrapz(steps[z],
logkwx1[analyte[z],] + dlogkT1[analyte[z]]*hplcparam[z,11],
S1mx1[analyte[z],],
pKaw1[analyte[z],],
alpham1[analyte[z],],
S2m1,
apH1,
chargesA[analyte[z],],
chargesB[analyte[z],],
nDiss[analyte[z]],
hplcparam[z]);
}
if (mod[z]==2) {
y_hat = chromgratrapz(steps[z],
logkwx1[analyte[z],] + dlogkT1[analyte[z]]*hplcparam[z,11],
S1ax1[analyte[z],],
pKaw1[analyte[z],],
alphaa1[analyte[z],],
S2a1,
apH1,
chargesA[analyte[z],],
chargesB[analyte[z],],
nDiss[analyte[z]],
hplcparam[z]);
}
real trHat = hplcparam[z,3] + hplcparam[z,4] + y_hat;
lp = lp + student_t_lpdf(trObs[z] | 3, trHat, sigma1[analyte[z]]);
}
if(column[z]==2){
if (mod[z]==1) {
y_hat = chromgratrapz(steps[z],
logkwx2[analyte[z],] + dlogkT2[analyte[z]]*hplcparam[z,11],
S1mx2[analyte[z],],
pKaw2[analyte[z],],
alpham2[analyte[z],],
S2m2,
apH2,
chargesA[analyte[z],],
chargesB[analyte[z],],
nDiss[analyte[z]],
hplcparam[z]);
}
if (mod[z]==2) {
y_hat = chromgratrapz(steps[z],
logkwx2[analyte[z],] + dlogkT2[analyte[z]]*hplcparam[z,11],
S1ax2[analyte[z],],
pKaw2[analyte[z],],
alphaa2[analyte[z],],
S2a2,
apH2,
chargesA[analyte[z],],
chargesB[analyte[z],],
nDiss[analyte[z]],
hplcparam[z]);
}
real trHat = hplcparam[z,3] + hplcparam[z,4] + y_hat;
lp = lp + student_t_lpdf(trObs[z] | 3, trHat, sigma2[analyte[z]]);
}
}
return lp;
}
}
data{
int nAnalytes; // number of analytes
int nObs; // number of observations
int npH; // npH;
int analyte[nObs]; // analyte indexes
int<lower=1> steps[nObs]; // steps for gradient retention time aproimation
vector[11] hplcparam[nObs]; // [tg, td, to, te, fio, fik, mod, pHo, alpha1, alpha2, (temp-25)/10]
int<lower=0> mod[nObs]; // MeOH==1, ACN==2 (repeats hplcparam(:,7))
int<lower=0> column[nObs]; // MeOH==1, ACN==2 (repeats hplcparam(:,7))
vector[nAnalytes] logPobs;
int<lower=0,upper=2> maxR;
int<lower=0,upper=2> R[nAnalytes];
ordered[maxR] pKaslit[nAnalytes];
vector[maxR] pKasliterror[nAnalytes];
vector[maxR] groupsA[nAnalytes];
vector[maxR] groupsB[nAnalytes];
vector[maxR+1] chargesA[nAnalytes];
vector[maxR+1] chargesB[nAnalytes];
int<lower=0> K; // number of predictors (functional groups)
matrix[nAnalytes, K] nrfungroups; // predictor matrix (functional groups)
vector[nObs] trobs; // observed retention factors
}
transformed data {
int grainsize = 1;
int ind[nObs] = rep_array(1, nObs);
vector[3] point_mu_lower = [0.75,0.75,0.75]'; // mean priors for rho
vector[3] point_scale_lower = [0.125,0.125,0.125]'; // std priors for rho
}
parameters{
real logkwHat1; // typical value of logkw [N]
real S1mHat1; // typical value of S1m [N]
real S1aHat1; // typical value of S1a [N]
real dlogkwHat1[2]; // typical value of dlogkw [A,B]
real dSmHat1[2]; // typical value of dlogkm [A,B]
real dSaHat1[2]; // typical value of dlogka [A,B]
real<lower = 0> S2mHat1; // typical value of S2m
real<lower = 0> S2aHat1; // typical value of S2a
vector[3] beta1; // effects of logP
real dlogkTHat1; // typical dlogkT
vector[2] alphaAHat; // changes of pKa with org. mod for acids [MeOH, ACN]
vector[2] alphaBHat; // changes of pKa with org. mod for bases [MeOH, ACN]
vector<lower = 0.01>[3] omega1; // between analyte variabilities (neutral forms)
corr_matrix[3] rho11; // correlation matrix
vector<lower = 0.01>[3] kappa1; // between analyte variabilities (diss. forms)
vector<lower = 0.01>[2] tau; // between analyte variabilities for acids pKa
cholesky_factor_corr[2] L2; // cholesky
real<lower = 0.01, upper = 1> omegadlogkT1; // between analyte variability for temperature
// between buffer differences
vector[2] apH1; // pH effects
vector[K] pilogkw1; // regression coefficient for logkw
vector[K] piS1m1; // regression coefficient for S1m
vector[K] piS1a1; // regression coefficient for S1a
vector<lower = 0.01>[3] sdpi1; // between analyte variabilities for acids pKa
// residual variability
real<lower = 0.01> msigma1; // mean
real<lower = 0.01> ssigma1; // scale
real logkwHat2; // typical value of logkw [N]
real S1mHat2; // typical value of S1m [N]
real S1aHat2; // typical value of S1a [N]
real dlogkwHat2[2]; // typical value of dlogkw [A,B]
real dSmHat2[2]; // typical value of dlogkm [A,B]
real dSaHat2[2]; // typical value of dlogka [A,B]
real<lower = 0> S2mHat2; // typical value of S2m
real<lower = 0> S2aHat2; // typical value of S2a
vector[3] beta2; // effects of logP
real dlogkTHat2; // typical dlogkT
vector<lower = 0.01>[3] omega2; // between analyte variabilities (neutral forms)
corr_matrix[3] rho12; // correlation matrix
vector<lower = 0.01>[3] kappa2; // between analyte variabilities (diss. forms)
real<lower = 0.01, upper = 1> omegadlogkT2; // between analyte variability for temperature
// between buffer differences
vector[2] apH2; // pH effects
vector[K] pilogkw2; // regression coefficient for logkw
vector[K] piS1m2; // regression coefficient for S1m
vector[K] piS1a2; // regression coefficient for S1a
vector<lower = 0.01>[3] sdpi2; // between analyte variabilities for acids pKa
// residual variability
real<lower = 0.01> msigma2; // mean
real<lower = 0.01> ssigma2; // scale
// individual values of chromatographic parameters
vector[3] param1[nAnalytes];
vector[nAnalytes] dlogkT1;
matrix[nAnalytes,maxR+1] dlogkwA1;
matrix[nAnalytes,maxR+1] dlogkwB1;
matrix[nAnalytes,maxR+1] dSmA1;
matrix[nAnalytes,maxR+1] dSmB1;
matrix[nAnalytes,maxR+1] dSaA1;
matrix[nAnalytes,maxR+1] dSaB1;
vector[maxR] pKaw1[nAnalytes];
matrix[maxR,nAnalytes] etaStd11;
matrix[maxR,nAnalytes] etaStd21;
vector[3] param2[nAnalytes];
vector[nAnalytes] dlogkT2;
matrix[nAnalytes,maxR+1] dlogkwA2;
matrix[nAnalytes,maxR+1] dlogkwB2;
matrix[nAnalytes,maxR+1] dSmA2;
matrix[nAnalytes,maxR+1] dSmB2;
matrix[nAnalytes,maxR+1] dSaA2;
matrix[nAnalytes,maxR+1] dSaB2;
vector[maxR] pKaw2[nAnalytes];
matrix[maxR,nAnalytes] etaStd12;
matrix[maxR,nAnalytes] etaStd22;
// and residuals
vector<lower = 0.01, upper = 4>[nAnalytes] sigma1;
vector<lower = 0.01, upper = 4>[nAnalytes] sigma2;
}
transformed parameters{
vector[maxR+1] logkwx1[nAnalytes];
vector[maxR+1] S1mx1[nAnalytes];
vector[maxR+1] S1ax1[nAnalytes];
matrix[nAnalytes,maxR] alpha11; //MeOH or ACN
matrix[nAnalytes,maxR] alpha21; //MeOH or ACN
vector[maxR] alpham1[nAnalytes];
vector[maxR] alphaa1[nAnalytes];
vector[3] miu1[nAnalytes];
vector[maxR+1] logkwx2[nAnalytes];
vector[maxR+1] S1mx2[nAnalytes];
vector[maxR+1] S1ax2[nAnalytes];
matrix[nAnalytes,maxR] alpha12; //MeOH or ACN
matrix[nAnalytes,maxR] alpha22; //MeOH or ACN
vector[maxR] alpham2[nAnalytes];
vector[maxR] alphaa2[nAnalytes];
vector[3] miu2[nAnalytes];
cov_matrix[3] Omega1; // variance-covariance matrix
Omega1 = quad_form_diag(rho11, omega1); // diag_matrix(omega) * rho * diag_matrix(omega)
cov_matrix[3] Omega2; // variance-covariance matrix
Omega2 = quad_form_diag(rho12, omega2); // diag_matrix(omega) * rho * diag_matrix(omega)
// Matt's trick to use unit scale
alpha11 = diag_pre_multiply(tau, L2 * etaStd11)';
alpha21 = diag_pre_multiply(tau, L2 * etaStd21)';
alpha12 = diag_pre_multiply(tau, L2 * etaStd12)';
alpha22 = diag_pre_multiply(tau, L2 * etaStd22)';
for(i in 1:nAnalytes){
miu1[i,1] = logkwHat1 + beta1[1] * (logPobs[i]-2.2) + nrfungroups[i] * pilogkw1;
miu1[i,2] = S1mHat1 + beta1[2] * (logPobs[i]-2.2) + nrfungroups[i] * piS1m1;
miu1[i,3] = S1aHat1 + beta1[3] * (logPobs[i]-2.2) + nrfungroups[i] * piS1a1;
miu2[i,1] = logkwHat2 + beta2[1] * (logPobs[i]-2.2) + nrfungroups[i] * pilogkw2;
miu2[i,2] = S1mHat2 + beta2[2] * (logPobs[i]-2.2) + nrfungroups[i] * piS1m2;
miu2[i,3] = S1aHat2 + beta2[3] * (logPobs[i]-2.2) + nrfungroups[i] * piS1a2;
}
for(i in 1:nAnalytes){
for(r in 1:maxR+1){
logkwx1[i,r] = param1[i, 1] +
dlogkwA1[i,r]*chargesA[i,r] +
dlogkwB1[i,r]*chargesB[i,r];
S1mx1[i,r] = (param1[i, 2] +
dSmA1[i,r]*chargesA[i,r] +
dSmB1[i,r]*chargesB[i,r])*(1+(S2mHat1));
S1ax1[i,r] = (param1[i, 3] +
dSaA1[i,r]*chargesA[i,r] +
dSaB1[i,r]*chargesB[i,r])*(1+(S2aHat1));
logkwx2[i,r] = param2[i, 1] +
dlogkwA2[i,r]*chargesA[i,r] +
dlogkwB2[i,r]*chargesB[i,r];
S1mx2[i,r] = (param2[i, 2] +
dSmA2[i,r]*chargesA[i,r] +
dSmB2[i,r]*chargesB[i,r])*(1+(S2mHat2));
S1ax2[i,r] = (param2[i, 3] +
dSaA2[i,r]*chargesA[i,r] +
dSaB2[i,r]*chargesB[i,r])*(1+(S2aHat2));
}}
for(i in 1:nAnalytes){
alpham1[i,1] = (alphaAHat[1]+alpha11[i,1]) * groupsA[i,1] + (alphaBHat[1]+alpha11[i,1]) * groupsB[i,1];
alpham1[i,2] = (alphaAHat[1]+alpha21[i,1]) * groupsA[i,2] + (alphaBHat[1]+alpha21[i,1]) * groupsB[i,2];
alphaa1[i,1] = (alphaAHat[2]+alpha11[i,2]) * groupsA[i,1] + (alphaBHat[2]+alpha11[i,2]) * groupsB[i,1];
alphaa1[i,2] = (alphaAHat[2]+alpha21[i,2]) * groupsA[i,2] + (alphaBHat[2]+alpha21[i,2]) * groupsB[i,2];
alpham2[i,1] = (alphaAHat[1]+alpha12[i,1]) * groupsA[i,1] + (alphaBHat[1]+alpha12[i,1]) * groupsB[i,1];
alpham2[i,2] = (alphaAHat[1]+alpha22[i,1]) * groupsA[i,2] + (alphaBHat[1]+alpha22[i,1]) * groupsB[i,2];
alphaa2[i,1] = (alphaAHat[2]+alpha12[i,2]) * groupsA[i,1] + (alphaBHat[2]+alpha12[i,2]) * groupsB[i,1];
alphaa2[i,2] = (alphaAHat[2]+alpha22[i,2]) * groupsA[i,2] + (alphaBHat[2]+alpha22[i,2]) * groupsB[i,2];
}
}
model{
logkwHat1 ~ normal(2.2, 2);
S1mHat1 ~ normal(4, 1);
S1aHat1 ~ normal(5, 1);
dlogkwHat1 ~ normal(-1,0.125);
dSmHat1 ~ normal(0,0.5);
dSaHat1 ~ normal(0,0.5);
S2mHat1 ~ lognormal(-1.6,0.125);
S2aHat1 ~ lognormal(0.69,0.125);
alphaAHat ~ normal(2,0.25);
alphaBHat ~ normal(-1,0.25);
beta1[{1}] ~ normal(1,0.125);
beta1[{2,3}] ~ normal(0.5,0.5);
omega1 ~ normal(0,2);
rho11 ~ lkj_corr_point_lower_tri(point_mu_lower, point_scale_lower);
kappa1 ~ normal(0,0.5);
apH1 ~ normal(0,0.1);
pilogkw1 ~ normal(0,sdpi1[1]);
piS1m1 ~ normal(0,sdpi1[2]);
piS1a1 ~ normal(0,sdpi1[3]);
sdpi1 ~ normal(0,0.1);
logkwHat2 ~ normal(2.2, 2);
S1mHat2 ~ normal(4, 1);
S1aHat2 ~ normal(5, 1);
dlogkwHat2 ~ normal(-1,0.125);
dSmHat2 ~ normal(0,0.5);
dSaHat2 ~ normal(0,0.5);
S2mHat2 ~ lognormal(-1.6,0.125);
S2aHat2 ~ lognormal(0.69,0.125);
beta2[{1}] ~ normal(1,0.125);
beta2[{2,3}] ~ normal(0.5,0.5);
omega2 ~ normal(0,2);
rho12 ~ lkj_corr_point_lower_tri(point_mu_lower, point_scale_lower);
kappa2 ~ normal(0,0.5);
apH2 ~ normal(0,0.1);
pilogkw2 ~ normal(0,sdpi2[1]);
piS1m2 ~ normal(0,sdpi2[2]);
piS1a2 ~ normal(0,sdpi2[3]);
sdpi2 ~ normal(0,0.1);
tau ~ normal(0,0.5);
L2 ~ lkj_corr_cholesky_point_lower_tri_two(0.75, 0.125);
dlogkTHat1 ~ normal(-0.087,0.022);
omegadlogkT1 ~ normal(0,0.022);
dlogkTHat2 ~ normal(-0.087,0.022);
omegadlogkT2 ~ normal(0,0.022);
sigma1 ~ lognormal(log(msigma1),ssigma1);
sigma2 ~ lognormal(log(msigma2),ssigma2);
msigma1 ~ normal(0,1);
ssigma1 ~ normal(0,1);
msigma2 ~ normal(0,1);
ssigma2 ~ normal(0,1);
for(i in 1:nAnalytes){
param1[i] ~ multi_normal(miu1[i],Omega1);
param2[i] ~ multi_normal(miu2[i],Omega2);
}
to_vector(dlogkwA1) ~ normal(dlogkwHat1[1],kappa1[1]);
to_vector(dlogkwB1) ~ normal(dlogkwHat1[2],kappa1[1]);
to_vector(dSmA1) ~ normal(dSmHat1[1],kappa1[2]);
to_vector(dSmB1) ~ normal(dSmHat1[2],kappa1[2]);
to_vector(dSaA1) ~ normal(dSaHat1[1],kappa1[3]);
to_vector(dSaB1) ~ normal(dSaHat1[2],kappa1[3]);
to_vector(etaStd11) ~ std_normal();
to_vector(etaStd21) ~ std_normal();
dlogkT1 ~ normal(dlogkTHat1,omegadlogkT1);
to_vector(dlogkwA2) ~ normal(dlogkwHat2[1],kappa2[1]);
to_vector(dlogkwB2) ~ normal(dlogkwHat2[2],kappa2[1]);
to_vector(dSmA2) ~ normal(dSmHat2[1],kappa2[2]);
to_vector(dSmB2) ~ normal(dSmHat2[2],kappa2[2]);
to_vector(dSaA2) ~ normal(dSaHat2[1],kappa2[3]);
to_vector(dSaB2) ~ normal(dSaHat2[2],kappa2[3]);
to_vector(etaStd12) ~ std_normal();
to_vector(etaStd22) ~ std_normal();
dlogkT2 ~ normal(dlogkTHat2,omegadlogkT2);
for (i in 1:nAnalytes){
pKaw1[i] ~ normal(pKaslit[i],pKasliterror[i]);
pKaw2[i] ~ normal(pKaslit[i],pKasliterror[i]);
}
target += reduce_sum(partial_sum, ind, grainsize, trobs,
mod, steps, analyte, hplcparam, chargesA, chargesB, R,
logkwx1, dlogkT1, S1mx1, S1ax1,
pKaw1, alpham1, alphaa1,
S2mHat1, S2aHat1,
apH1, sigma1,
logkwx2, dlogkT2, S1mx2, S1ax2,
pKaw2, alpham2, alphaa2,
S2mHat2, S2aHat2,
apH2, sigma2,column);
}