functions {
// credit http://srmart.in/informative-priors-for-correlation-matrices-an-easy-approach/
vector lower_tri(matrix mat) {
int d = rows(mat);
int lower_tri_d = d * (d - 1) / 2;
vector[lower_tri_d] lower;
int count = 1;
for(r in 2:d) {
for(c in 1:(r - 1)) {
lower[count] = mat[r,c];
count += 1;
}
}
return(lower);
}
// credit http://srmart.in/informative-priors-for-correlation-matrices-an-easy-approach/
real lkj_corr_point_lower_tri_lpdf(matrix rho, vector point_mu_lower, vector point_scale_lower) {
real lpdf = lkj_corr_lpdf(rho | 1) + normal_lpdf(lower_tri(rho) | point_mu_lower, point_scale_lower);
return(lpdf);
}
real lkj_corr_cholesky_point_lower_tri_two_lpdf(matrix cor_L, real point_mu_lower, real point_scale_lower) {
real lpdf = lkj_corr_cholesky_lpdf(cor_L | 1);
int d = rows(cor_L);
matrix[d,d] cor = multiply_lower_tri_self_transpose(cor_L);
lpdf += normal_lpdf(cor[2,1] | point_mu_lower, point_scale_lower);
return(lpdf);
}
// pH and fi at a given time at column inlet
vector gra_state(real t, vector hplcparam) {
vector[2] sol;
real tg = hplcparam[1];
real td = hplcparam[2];
real fio = hplcparam[5];
real fik = hplcparam[6];
real pHo = hplcparam[8];
real alpha1 = hplcparam[9];
real alpha2 = hplcparam[10];
real fi;
fi = fio+(fik-fio)/tg*(t-td);
if (t<td)
fi = fio;
else if (t>tg+td)
fi = fik;
sol[1]=fi;
sol[2]=pHo+alpha1*fi+alpha2*fi^2;
return sol;
}
real funlogki(vector logkwx, vector S1, vector pKaw, vector alpha, real S2, vector apH,
int nDiss, vector chargesA, vector chargesB, vector fipH) {
real logki;
vector[3] logkix;
vector[2] pHmpKa;
real fi=fipH[1];
real pH=fipH[2];
logkix=logkwx-S1*fi/(1+S2*fi)+ chargesA*apH[1]*(pH-7) + chargesB*apH[2]*(pH-7);
pHmpKa=pH-(pKaw+alpha*fi);
if (nDiss==0) {
logki = logkix[1];
}
else if (nDiss==1){
logki=logkix[1] +
log1p_exp(log(10)*(pHmpKa[1]+logkix[2]-logkix[1]))/log(10)-
log1p_exp(log(10)*(pHmpKa[1]))/log(10);
}
else if (nDiss==2){
logki = logkix[1] +
log1p_exp(log(10)*(pHmpKa[1]+logkix[2]-logkix[1]) +
log1p_exp(log(10)*(pHmpKa[2]+logkix[3]-logkix[2])))/log(10)-
log1p_exp(log(10)*(pHmpKa[1]) +
log1p_exp(log(10)*(pHmpKa[2])))/log(10);
}
return logki;
}
vector areaandslope(real time1, real time2, real invki1, real invki2) {
vector[2] cki_b;
real bo;
real cki;
if (invki2>1.001*invki1) {
bo = (log(invki2)-log(invki1))/(time2-time1);
cki = (invki2-invki1)/bo;
}
else {
bo = 0.001/(time2-time1);
cki = (time2-time1)*(invki2+invki1)/2;
}
cki_b[1] = cki;
cki_b[2] = bo;
return cki_b;
}
real chromgratrapz(int steps,
vector logkwx, vector logkmx, vector pKaw, vector alpha,
real S2, vector apH, vector chargesA, vector chargesB,
int nDiss, vector hplcparam) {
real tg = hplcparam[1];
real td = hplcparam[2];
real to = hplcparam[3];
vector[1] sol;
real time1;
real time2;
vector[2] fipH1;
vector[2] fipH2;
real logki1;
real logki2;
real invki1;
real invki2;
vector[2] cki_b;
real cumki1;
real cumki2;
real bo;
real tr;
real dt;
dt = tg/steps;
time1 = 0;
time2 = td;
fipH1 = gra_state(time1, hplcparam);
// fipH2 = fipH1;
logki1 = funlogki(logkwx, logkmx, pKaw, alpha, S2, apH, nDiss, chargesA, chargesB, fipH1);
//logki2 = logki1;
invki1 = 1/to/10^logki1;
invki2 = invki1;
cumki1 = 0;
cumki2 = td*invki1; // cumulative area
bo = 0.001/td; // slope
for(x in 1:steps){
if (cumki2>=1) continue;
time1 = time2;
time2 += dt;
// fipH1 = fipH2;
fipH2 = gra_state(time2, hplcparam);
// logki1 = logki2;
logki2 = funlogki(logkwx, logkmx, pKaw, alpha, S2, apH, nDiss, chargesA, chargesB, fipH2);
invki1 = invki2;
invki2 = 1/to/10^logki2;
cki_b = areaandslope(time1, time2, invki1, invki2);
cumki1 = cumki2;
cumki2 += cki_b[1]; // cumulative area
bo = cki_b[2]; //slope
}
if (cumki2>=1) {
tr = time1+log1p((1-cumki1)*bo/invki1)/bo;
}
else if (cumki2<1) {
tr = time2+(1-cumki2)/invki2;
}
return tr;
}
real partial_sum(int[] ind, int start, int end, vector trObs,
int[] mod, int[] steps, int[] analyte, int[] pHid,
vector[] hplcparam, vector[] chargesA, vector[] chargesB, int[] nDiss,
vector[] logkwx, vector dlogkT, vector[] S1mx, vector[] S1ax,
vector[] pKaw, vector[] alpham, vector[] alphaa,
real S2m, real S2a,
vector apH,
vector sigma) {
real lp = 0;
real y_hat;
for(z in start:end){
if (mod[z]==1) {
y_hat = chromgratrapz(steps[z],
logkwx[analyte[z],] + dlogkT[analyte[z]]*hplcparam[z,11],
S1mx[analyte[z],],
pKaw[analyte[z],],
alpham[analyte[z],],
S2m,
apH,
chargesA[analyte[z],],
chargesB[analyte[z],],
nDiss[analyte[z]],
hplcparam[z]);
}
if (mod[z]==2) {
y_hat = chromgratrapz(steps[z],
logkwx[analyte[z],] + dlogkT[analyte[z]]*hplcparam[z,11],
S1ax[analyte[z],],
pKaw[analyte[z],],
alphaa[analyte[z],],
S2a,
apH,
chargesA[analyte[z],],
chargesB[analyte[z],],
nDiss[analyte[z]],
hplcparam[z]);
}
real trHat = hplcparam[z,3] + hplcparam[z,4] + y_hat;
lp = lp + student_t_lpdf(trObs[z] | 3, trHat, sigma[analyte[z]]);
}
return lp;
}
}
data{
int nAnalytes1; // number of analytes
int nObs1; // number of observations
int npH; // npH;
int analyte1[nObs1]; // analyte indexes
int pHid1[nObs1];
int<lower=1> steps1[nObs1]; // steps for gradient retention time aproimation
vector[11] hplcparam1[nObs1]; // [tg, td, to, te, fio, fik, mod, pHo, alpha1, alpha2, (temp-25)/10]
int<lower=0> mod1[nObs1]; // MeOH==1, ACN==2 (repeats hplcparam(:,7))
vector[nAnalytes1] logPobs1;
int<lower=0,upper=2> maxR;
int<lower=0,upper=2> R1[nAnalytes1];
ordered[maxR] pKaslit1[nAnalytes1];
vector[maxR] pKasliterror1[nAnalytes1];
vector[maxR] groupsA1[nAnalytes1];
vector[maxR] groupsB1[nAnalytes1];
vector[maxR+1] chargesA1[nAnalytes1];
vector[maxR+1] chargesB1[nAnalytes1];
int<lower=0> K; // number of predictors (functional groups)
matrix[nAnalytes1, K] nrfungroups1; // predictor matrix (functional groups)
vector[nObs1] trobs1; // observed retention factors
int nAnalytes2; // number of analytes
int nObs2; // number of observations
int analyte2[nObs2]; // analyte indexes
int pHid2[nObs2];
int<lower=1> steps2[nObs2]; // steps for gradient retention time aproimation
vector[11] hplcparam2[nObs2]; // [tg, td, to, te, fio, fik, mod, pHo, alpha1, alpha2, (temp-25)/10]
int<lower=0> mod2[nObs2]; // MeOH==1, ACN==2 (repeats hplcparam(:,7))
vector[nAnalytes2] logPobs2;
int<lower=0,upper=2> R2[nAnalytes2];
ordered[maxR] pKaslit2[nAnalytes2];
vector[maxR] pKasliterror2[nAnalytes2];
vector[maxR] groupsA2[nAnalytes2];
vector[maxR] groupsB2[nAnalytes2];
vector[maxR+1] chargesA2[nAnalytes2];
vector[maxR+1] chargesB2[nAnalytes2];
// number of predictors (functional groups)
matrix[nAnalytes2, K] nrfungroups2; // predictor matrix (functional groups)
vector[nObs2] trobs2; // observed retention factors
}
transformed data {
int grainsize = 1;
int ind1[nObs1] = rep_array(1, nObs1);
int ind2[nObs2] = rep_array(1, nObs2);
vector[3] point_mu_lower = [0.75,0.75,0.75]'; // mean priors for rho
vector[3] point_scale_lower = [0.125,0.125,0.125]'; // std priors for rho
}
parameters{
real logkwHat1; // typical value of logkw [N]
real S1mHat1; // typical value of S1m [N]
real S1aHat1; // typical value of S1a [N]
real dlogkwHat1[2]; // typical value of dlogkw [A,B]
real dSmHat1[2]; // typical value of dlogkm [A,B]
real dSaHat1[2]; // typical value of dlogka [A,B]
real<lower = 0> S2mHat1; // typical value of S2m
real<lower = 0> S2aHat1; // typical value of S2a
vector[3] beta1; // effects of logP
real dlogkTHat1; // typical dlogkT
vector[2] alphaAHat1; // changes of pKa with org. mod for acids [MeOH, ACN]
vector[2] alphaBHat1; // changes of pKa with org. mod for bases [MeOH, ACN]
vector<lower = 0.01>[3] omega1; // between analyte variabilities (neutral forms)
corr_matrix[3] rho11; // correlation matrix
vector<lower = 0.01>[3] kappa1; // between analyte variabilities (diss. forms)
vector<lower = 0.01>[2] tau; // between analyte variabilities for acids pKa
cholesky_factor_corr[2] L2; // cholesky
real<lower = 0.01, upper = 1> omegadlogkT1; // between analyte variability for temperature
// between buffer differences
vector[2] apH1; // pH effects
vector[K] pilogkw1; // regression coefficient for logkw
vector[K] piS1m1; // regression coefficient for S1m
vector[K] piS1a1; // regression coefficient for S1a
vector<lower = 0.01>[3] sdpi1; // between analyte variabilities for acids pKa
// residual variability
real<lower = 0.01> msigma1; // mean
real<lower = 0.01> ssigma1; // scale
real logkwHat2; // typical value of logkw [N]
real S1mHat2; // typical value of S1m [N]
real S1aHat2; // typical value of S1a [N]
real dlogkwHat2[2]; // typical value of dlogkw [A,B]
real dSmHat2[2]; // typical value of dlogkm [A,B]
real dSaHat2[2]; // typical value of dlogka [A,B]
real<lower = 0> S2mHat2; // typical value of S2m
real<lower = 0> S2aHat2; // typical value of S2a
vector[3] beta2; // effects of logP
real dlogkTHat2; // typical dlogkT
vector[2] alphaAHat2; // changes of pKa with org. mod for acids [MeOH, ACN]
vector[2] alphaBHat2; // changes of pKa with org. mod for bases [MeOH, ACN]
vector<lower = 0.01>[3] omega2; // between analyte variabilities (neutral forms)
corr_matrix[3] rho12; // correlation matrix
vector<lower = 0.01>[3] kappa2; // between analyte variabilities (diss. forms)
real<lower = 0.01, upper = 1> omegadlogkT2; // between analyte variability for temperature
// between buffer differences
vector[2] apH2; // pH effects
vector[K] pilogkw2; // regression coefficient for logkw
vector[K] piS1m2; // regression coefficient for S1m
vector[K] piS1a2; // regression coefficient for S1a
vector<lower = 0.01>[3] sdpi2; // between analyte variabilities for acids pKa
// residual variability
real<lower = 0.01> msigma2; // mean
real<lower = 0.01> ssigma2; // scale
// individual values of chromatographic parameters
vector[3] param1[nAnalytes1];
vector[nAnalytes1] dlogkT1;
matrix[nAnalytes1,maxR+1] dlogkwA1;
matrix[nAnalytes1,maxR+1] dlogkwB1;
matrix[nAnalytes1,maxR+1] dSmA1;
matrix[nAnalytes1,maxR+1] dSmB1;
matrix[nAnalytes1,maxR+1] dSaA1;
matrix[nAnalytes1,maxR+1] dSaB1;
vector[maxR] pKaw1[nAnalytes1];
matrix[maxR,nAnalytes1] etaStd11;
matrix[maxR,nAnalytes1] etaStd21;
vector[3] param2[nAnalytes2];
vector[nAnalytes2] dlogkT2;
matrix[nAnalytes2,maxR+1] dlogkwA2;
matrix[nAnalytes2,maxR+1] dlogkwB2;
matrix[nAnalytes2,maxR+1] dSmA2;
matrix[nAnalytes2,maxR+1] dSmB2;
matrix[nAnalytes2,maxR+1] dSaA2;
matrix[nAnalytes2,maxR+1] dSaB2;
vector[maxR] pKaw2[nAnalytes2];
matrix[maxR,nAnalytes2] etaStd12;
matrix[maxR,nAnalytes2] etaStd22;
// and residuals
vector<lower = 0.01, upper = 4>[nAnalytes1] sigma1;
vector<lower = 0.01, upper = 4>[nAnalytes2] sigma2;
}
transformed parameters{
vector[maxR+1] logkwx1[nAnalytes1];
vector[maxR+1] S1mx1[nAnalytes1];
vector[maxR+1] S1ax1[nAnalytes1];
matrix[nAnalytes1,maxR] alpha11; //MeOH or ACN
matrix[nAnalytes1,maxR] alpha21; //MeOH or ACN
vector[maxR] alpham1[nAnalytes1];
vector[maxR] alphaa1[nAnalytes1];
vector[3] miu1[nAnalytes1];
vector[maxR+1] logkwx2[nAnalytes2];
vector[maxR+1] S1mx2[nAnalytes2];
vector[maxR+1] S1ax2[nAnalytes2];
matrix[nAnalytes2,maxR] alpha12; //MeOH or ACN
matrix[nAnalytes2,maxR] alpha22; //MeOH or ACN
vector[maxR] alpham2[nAnalytes2];
vector[maxR] alphaa2[nAnalytes2];
vector[3] miu2[nAnalytes2];
cov_matrix[3] Omega1; // variance-covariance matrix
Omega1 = quad_form_diag(rho11, omega1); // diag_matrix(omega) * rho * diag_matrix(omega)
cov_matrix[3] Omega2; // variance-covariance matrix
Omega2 = quad_form_diag(rho12, omega2); // diag_matrix(omega) * rho * diag_matrix(omega)
// Matt's trick to use unit scale
alpha11 = diag_pre_multiply(tau, L2 * etaStd11)';
alpha21 = diag_pre_multiply(tau, L2 * etaStd21)';
alpha12 = diag_pre_multiply(tau, L2 * etaStd12)';
alpha22 = diag_pre_multiply(tau, L2 * etaStd22)';
for(i in 1:nAnalytes1){
miu1[i,1] = logkwHat1 + beta1[1] * (logPobs1[i]-2.2) + nrfungroups1[i] * pilogkw1;
miu1[i,2] = S1mHat1 + beta1[2] * (logPobs1[i]-2.2) + nrfungroups1[i] * piS1m1;
miu1[i,3] = S1aHat1 + beta1[3] * (logPobs1[i]-2.2) + nrfungroups1[i] * piS1a1;
}
for(i in 1:nAnalytes2){
miu2[i,1] = logkwHat2 + beta2[1] * (logPobs2[i]-2.2) + nrfungroups2[i] * pilogkw2;
miu2[i,2] = S1mHat2 + beta2[2] * (logPobs2[i]-2.2) + nrfungroups2[i] * piS1m2;
miu2[i,3] = S1aHat2 + beta2[3] * (logPobs2[i]-2.2) + nrfungroups2[i] * piS1a2;
}
for(i in 1:nAnalytes1){
for(r in 1:maxR+1){
logkwx1[i,r] = param1[i, 1] +
dlogkwA1[i,r]*chargesA1[i,r] +
dlogkwB1[i,r]*chargesB1[i,r];
S1mx1[i,r] = (param1[i, 2] +
dSmA1[i,r]*chargesA1[i,r] +
dSmB1[i,r]*chargesB1[i,r])*(1+(S2mHat1));
S1ax1[i,r] = (param1[i, 3] +
dSaA1[i,r]*chargesA1[i,r] +
dSaB1[i,r]*chargesB1[i,r])*(1+(S2aHat1));
}}
for(i in 1:nAnalytes2){
for(r in 1:maxR+1){
logkwx2[i,r] = param2[i, 1] +
dlogkwA2[i,r]*chargesA2[i,r] +
dlogkwB2[i,r]*chargesB2[i,r];
S1mx2[i,r] = (param2[i, 2] +
dSmA2[i,r]*chargesA2[i,r] +
dSmB2[i,r]*chargesB2[i,r])*(1+(S2mHat2));
S1ax2[i,r] = (param2[i, 3] +
dSaA2[i,r]*chargesA2[i,r] +
dSaB2[i,r]*chargesB2[i,r])*(1+(S2aHat2));
}}
for(i in 1:nAnalytes1){
alpham1[i,1] = (alphaAHat1[1]+alpha11[i,1]) * groupsA1[i,1] + (alphaBHat1[1]+alpha11[i,1]) * groupsB1[i,1];
alpham1[i,2] = (alphaAHat1[1]+alpha21[i,1]) * groupsA1[i,2] + (alphaBHat1[1]+alpha21[i,1]) * groupsB1[i,2];
alphaa1[i,1] = (alphaAHat1[2]+alpha11[i,2]) * groupsA1[i,1] + (alphaBHat1[2]+alpha11[i,2]) * groupsB1[i,1];
alphaa1[i,2] = (alphaAHat1[2]+alpha21[i,2]) * groupsA1[i,2] + (alphaBHat1[2]+alpha21[i,2]) * groupsB1[i,2];
}
for(i in 1:nAnalytes2){
alpham2[i,1] = (alphaAHat2[1]+alpha12[i,1]) * groupsA2[i,1] + (alphaBHat2[1]+alpha12[i,1]) * groupsB2[i,1];
alpham2[i,2] = (alphaAHat2[1]+alpha22[i,1]) * groupsA2[i,2] + (alphaBHat2[1]+alpha22[i,1]) * groupsB2[i,2];
alphaa2[i,1] = (alphaAHat2[2]+alpha12[i,2]) * groupsA2[i,1] + (alphaBHat2[2]+alpha12[i,2]) * groupsB2[i,1];
alphaa2[i,2] = (alphaAHat2[2]+alpha22[i,2]) * groupsA2[i,2] + (alphaBHat2[2]+alpha22[i,2]) * groupsB2[i,2];
}
}
model{
logkwHat1 ~ normal(2.2, 2);
S1mHat1 ~ normal(4, 1);
S1aHat1 ~ normal(5, 1);
dlogkwHat1 ~ normal(-1,0.125);
dSmHat1 ~ normal(0,0.5);
dSaHat1 ~ normal(0,0.5);
S2mHat1 ~ lognormal(-1.6,0.125);
S2aHat1 ~ lognormal(0.69,0.125);
alphaAHat1 ~ normal(2,0.25);
alphaBHat1 ~ normal(-1,0.25);
beta1[{1}] ~ normal(1,0.125);
beta1[{2,3}] ~ normal(0.5,0.5);
omega1 ~ normal(0,2);
rho11 ~ lkj_corr_point_lower_tri(point_mu_lower, point_scale_lower);
kappa1 ~ normal(0,0.5);
apH1 ~ normal(0,0.1);
pilogkw1 ~ normal(0,sdpi1[1]);
piS1m1 ~ normal(0,sdpi1[2]);
piS1a1 ~ normal(0,sdpi1[3]);
sdpi1 ~ normal(0,0.1);
logkwHat2 ~ normal(2.2, 2);
S1mHat2 ~ normal(4, 1);
S1aHat2 ~ normal(5, 1);
dlogkwHat2 ~ normal(-1,0.125);
dSmHat2 ~ normal(0,0.5);
dSaHat2 ~ normal(0,0.5);
S2mHat2 ~ lognormal(-1.6,0.125);
S2aHat2 ~ lognormal(0.69,0.125);
alphaAHat2 ~ normal(2,0.25);
alphaBHat2 ~ normal(-1,0.25);
beta2[{1}] ~ normal(1,0.125);
beta2[{2,3}] ~ normal(0.5,0.5);
omega2 ~ normal(0,2);
rho12 ~ lkj_corr_point_lower_tri(point_mu_lower, point_scale_lower);
kappa2 ~ normal(0,0.5);
apH2 ~ normal(0,0.1);
pilogkw2 ~ normal(0,sdpi2[1]);
piS1m2 ~ normal(0,sdpi2[2]);
piS1a2 ~ normal(0,sdpi2[3]);
sdpi2 ~ normal(0,0.1);
tau ~ normal(0,0.5);
L2 ~ lkj_corr_cholesky_point_lower_tri_two(0.75, 0.125);
dlogkTHat1 ~ normal(-0.087,0.022);
omegadlogkT1 ~ normal(0,0.022);
dlogkTHat2 ~ normal(-0.087,0.022);
omegadlogkT2 ~ normal(0,0.022);
sigma1 ~ lognormal(log(msigma1),ssigma1);
sigma2 ~ lognormal(log(msigma2),ssigma2);
msigma1 ~ normal(0,1);
ssigma1 ~ normal(0,1);
msigma2 ~ normal(0,1);
ssigma2 ~ normal(0,1);
for(i in 1:nAnalytes1){
param1[i] ~ multi_normal(miu1[i],Omega1);
}
for(i in 1:nAnalytes2){
param2[i] ~ multi_normal(miu2[i],Omega2);
}
to_vector(dlogkwA1) ~ normal(dlogkwHat1[1],kappa1[1]);
to_vector(dlogkwB1) ~ normal(dlogkwHat1[2],kappa1[1]);
to_vector(dSmA1) ~ normal(dSmHat1[1],kappa1[2]);
to_vector(dSmB1) ~ normal(dSmHat1[2],kappa1[2]);
to_vector(dSaA1) ~ normal(dSaHat1[1],kappa1[3]);
to_vector(dSaB1) ~ normal(dSaHat1[2],kappa1[3]);
to_vector(etaStd11) ~ std_normal();
to_vector(etaStd21) ~ std_normal();
dlogkT1 ~ normal(dlogkTHat1,omegadlogkT1);
to_vector(dlogkwA2) ~ normal(dlogkwHat2[1],kappa2[1]);
to_vector(dlogkwB2) ~ normal(dlogkwHat2[2],kappa2[1]);
to_vector(dSmA2) ~ normal(dSmHat2[1],kappa2[2]);
to_vector(dSmB2) ~ normal(dSmHat2[2],kappa2[2]);
to_vector(dSaA2) ~ normal(dSaHat2[1],kappa2[3]);
to_vector(dSaB2) ~ normal(dSaHat2[2],kappa2[3]);
to_vector(etaStd12) ~ std_normal();
to_vector(etaStd22) ~ std_normal();
dlogkT2 ~ normal(dlogkTHat2,omegadlogkT2);
for (i in 1:nAnalytes1) pKaw1[i] ~ normal(pKaslit1[i],pKasliterror1[i]);
for (i in 1:nAnalytes2) pKaw2[i] ~ normal(pKaslit2[i],pKasliterror2[i]);
target += reduce_sum(partial_sum, ind1, grainsize, trobs1,
mod1, steps1, analyte1, pHid1, hplcparam1, chargesA1, chargesB1, R1,
logkwx1, dlogkT1, S1mx1, S1ax1,
pKaw1, alpham1, alphaa1,
S2mHat1, S2aHat1,
apH1, sigma1);
target += reduce_sum(partial_sum, ind2, grainsize, trobs2,
mod2, steps2, analyte2, pHid2, hplcparam2, chargesA2, chargesB2, R2,
logkwx2, dlogkT2, S1mx2, S1ax2,
pKaw2, alpham2, alphaa2,
S2mHat2, S2aHat2,
apH2, sigma2);
}
generated quantities{
}
5 Stan code
Stan code for model: