# AltGroneau.txt # # This file defines some estimation functions that are alternatives to the one described in "Bayesian Mixture Modeling of Significant p Values" by Groneau, Duizer, Bakker and Wagenmakers (2017) in the Journal of Experimental Psychology. An online app for the Markov chain Monte Carlo method of Groneau et all. is available at https://qfgronau.shinyapps.io/bmmsp. The functions are # GroneauExact: Obtain the marginal posterior of phi with numerical integration # GroneauDiscrete: Approximate the posterior with a discrete distribution. # mleg: Maximum likelihood for the mixture model # freeg1 Unrestricted normal-gamma prior on mu and sigma with numerical integration. # freeg2 Like freeg1, but witha discrete approximation. ############################################################################### GroneauExact = function(pvals,logp=F,cc=T) # Calculates and plots the posterior of phi using numerical integration. # Also produces some summary measures of the posterior. # To avoid p-values that are numerically zero, provide natural logs. # with logp = T. { if(cc) # Display Creative Commons license { cat("\nGroneauExact 1.0 \n") cat("Copyright Jerry Brunner 2018 \n") cat("Licensed under a Creative Commons Attribution-ShareAlike 3.0 (or later) Unported License. \n") cat("For details, visit http://creativecommons.org/licenses/by-sa/3.0 \n\n") } # Check input if(logp==F) { if(max(pvals)>0.05) stop('All p-values must be less than 0.05') if(min(pvals)<0) stop("P-values can't be negative!") if(min(pvals)==0) stop("One or more p-value equals zero. Try inputting ln(p). -Inf is not allowed.") } if(logp==T) { if(max(exp(pvals))>0.05) stop('All p-values must be less than 0.05') } ############################################################################ # Define necessary functions internally so GroneauExact will be self-contained. ############################################################################ post = function(sigma,mu,phi,x) # Result is proportional to the posterior. # Make sure mu < 0, 0 < sigma < 1, 0 < phi < 1 { # Avoiding numerical problems for small sigma by using logs logcd = dnorm(x,mu,sigma, log=T) - pnorm(-1.645,mu,sigma,log.p=T) condens = exp(logcd) like = prod( phi*dnorm(x)/0.05 + (1-phi)*condens ) value = like * dnorm(mu) return(value) } # End function post vpost = Vectorize(post, vectorize.args='sigma') fun1 = function(mu,phi,x) # Integral of post over sigma { value = integrate(vpost,lower = 0, upper = 1, mu = mu, phi=phi, x=x)$value return(value) } # End function fun1 vfun1 = Vectorize(fun1, vectorize.args='mu') fun2 = function(phi,x) # Integral of fun1 over mu # fun2 is proportional to pi(phi|x) { value = integrate(vfun1,lower = -Inf, upper = 0, phi=phi, x=x)$value return(value) } # End of function fun2 vfun2 = Vectorize(fun2, vectorize.args='phi') fun3 = function(phi,x) # Integral is proportional to E(phi|x) { value = phi * vfun2(phi,x=x) return(value) } # End of function fun3 vfun3 = Vectorize(fun3, vectorize.args='phi') ##################### End function definitions ############################# datta = qnorm(pvals,log.p=logp) # Transform p-values konstant = integrate(vfun2, lower=0, upper=1, x=datta)$value # Normalizing constant Phi = seq(from=0, to=1, by=0.01) Density = Phi-Phi for(j in 1:length(Phi)) Density[j] = fun2(Phi[j],datta) Density = Density / konstant titlestring = expression(paste('Exact Posterior Density of ',phi)) plot(Phi,Density,type='l',main=titlestring,xlab=expression(phi)) Prob = Density/sum(Density) # Ephi = sum(Phi*Prob) # cat("\n Rough E(phi|data) =",Ephi,"\n\n") # This numerical integration can take along time # PosterExpect = integrate(vfun3, lower=0, upper=1, x=datta)$value / konstant # This usually differs in the 3d decimal place PosterExpect = sum(Phi*Prob) value = round(PosterExpect,3) names(value) = 'E(phi|data)' return(value) } # End of function GroneauExact ############################################################################### ############################################################################### ############################################################################### ############################################################################### ############################################################################### GroneauDiscrete = function(pvals,logp=F, sdmu=1, cc=T, marginals=T, sigvals = seq(from=0.01,to=1,by=0.01), muvals = seq(from=-6,to=0,by=0.1), phivals = seq(from=0,to=1,by=0.01) ) # Calculates the posterior using a discrete approximation. sdmu is the standard # deviation of the prior normal distribution on mu. Natural logs # are used to try to avoid underflow. The function plots the # approximate marginal posterior density of phi and displays a table of posterior # expected values. With the default value of marginals=T, it returns a list # of data frames containing the marginal posteriors of phi,sigma and mu. With # marginals=F, it returns a data frame with the entire posterior distribution. # To avoid p-values that are numerically zero, provide natural logs and # set logp = T. { if(cc) # Display Creative Commons license { cat("\nGroneauDiscrete 1.1 \n") cat("Copyright Jerry Brunner 2018 \n") cat("Licensed under a Creative Commons Attribution-ShareAlike 3.0 (or later) Unported License. \n") cat("For details, visit http://creativecommons.org/licenses/by-sa/3.0 \n\n") } # Check input if(logp==F) { if(max(pvals)>0.05) stop('All p-values must be less than 0.05') if(min(pvals)<0) stop("P-values can't be negative!") if(min(pvals)==0) stop("One or more p-value equals zero. Try inputting ln(p). -Inf is not allowed.") } if(logp==T) { if(max(exp(pvals))>0.05) stop('All p-values must be less than 0.05') } ############################################################################ # Define the loglike function internally so GroneauDiscrete will be self-contained. ############################################################################ loglike = function(sigma,mu,phi,x) # Result is proportional to the posterior. # Make sure mu < 0, 0 < sigma < 1, 0 < phi < 1 { # Avoiding numerical problems for small sigma by using logs logcd = dnorm(x,mu,sigma, log=T) - pnorm(-1.645,mu,sigma,log.p=T) condens = exp(logcd) value = sum(log( phi*dnorm(x)/0.05 + (1-phi)*condens )) return(value) } # End function loglike ############################################################################ datta = qnorm(pvals,log.p=logp) # Transform p-values nlines = length(sigvals)*length(muvals)*length(phivals); nlines SIGMA = MU = PHI = LL = numeric(nlines) j=0 for(s in sigvals) { for(m in muvals) { for(f in phivals) { j = j+1 SIGMA[j]=s; MU[j]=m; PHI[j]=f LL[j] = loglike(s,m,f,datta) } # Next f } # Next m } # Next s # phi=0 with small sigma can cause log likelihood = -Inf bad = (1:nlines)[LL==-Inf] LL[bad] = NA logpostpc = LL + dnorm(MU,mean=0,sd=sdmu,log=T) # Log of posterior, plus a constant adjusted = logpostpc - max(logpostpc,na.rm=T) # Now max(adjusted) = 0, and exp(0)=1 propost = exp(adjusted) # Proportional to posterior, except NA are really zeros propost[bad] = 0 POSTERIOR = propost/sum(propost) # Normalize, making them probabilities POSTERIOR = round(POSTERIOR,10) # Better for display and makes no practical difference. posterior = data.frame(cbind(SIGMA, MU, PHI, POSTERIOR)) phiframe = aggregate(POSTERIOR ~ PHI, data=posterior, FUN=sum) phipost = phiframe$POSTERIOR CUMPROB = cumsum(phipost) phiframe = cbind(phiframe,CUMPROB) muframe = aggregate(POSTERIOR ~ MU, data=posterior, FUN=sum) mupost = muframe$POSTERIOR CUMPROB = cumsum(mupost) muframe = cbind(muframe,CUMPROB) sigmaframe = aggregate(POSTERIOR ~ SIGMA, data=posterior, FUN=sum) sigmapost = sigmaframe$POSTERIOR CUMPROB = cumsum(sigmapost) sigmaframe = cbind(sigmaframe,CUMPROB) PostExpectation = numeric(3) names(PostExpectation) = c("phi","mu","sigma") Ephi = round(sum(phivals*phipost),3) Emu = round(sum(muvals*mupost),3) Esigma = round(sum(sigvals*sigmapost),3) PostExpectation[1] = Ephi; PostExpectation[2] = Emu PostExpectation[3] = Esigma cat("\nPosterior Expected values \n"); print(PostExpectation); cat("\n") # Plot posterior of phi phigap = phivals[2]-phivals[1] # Assuming equally spaced even if user input. phidens = phipost/phigap # Divide by gap so area of bar = probability dtitlephi = expression(paste('Posterior Density of ',phi)) plot(phivals,phidens,type='l' ,xlab = expression(phi), ylab='Density', main=dtitlephi) if(marginals) {value = list(phiframe,muframe,sigmaframe) names(value) = c("Phi","Mu","Sigma") } else value = posterior return(value) } # End of function GroneauDiscrete ############################################################################### ############################################################################### ############################################################################### ############################################################################### ############################################################################### mleg = function(pvalues, logp=F, startvals=c(0.5,0,1), cc=T, details=F) # Calculates the mle of (phi, mu, sigma) for the mixture model of # Groneau et al. (2017) { if(cc) # Display Creative Commons license { cat("\nmleg 1.0 \n") cat("Copyright Jerry Brunner 2018 \n") cat("Licensed under a Creative Commons Attribution-ShareAlike 3.0 (or later) Unported License. \n") cat("For details, visit http://creativecommons.org/licenses/by-sa/3.0 \n\n") } # Check input if(logp==F) { if(max(pvalues)>0.05) stop('All p-values must be less than 0.05') if(min(pvalues)<0) stop("P-values can't be negative!") if(min(pvalues)==0) stop("One or more p-value equals zero. Try inputting ln(p). -Inf is not allowed.") } if(logp==T) { if(max(exp(pvalues))>0.05) stop('All p-values must be less than 0.05') } ############################################################################ # Define the minus log likelihood function internally ############################################################################ mloglike = function(theta,x) # In phi, mu, sigma order { phi = theta[1]; mu = theta[2]; sigma = theta[3] # Avoiding numerical problems for small sigma by using logs logcd = dnorm(x,mu,sigma, log=T) - pnorm(-1.645,mu,sigma,log.p=T) condens = exp(logcd) value = -sum(log( phi*dnorm(x)/0.05 + (1-phi)*condens )) if(value==Inf) value = 100^100 return(value) } # End function mloglike zz = qnorm(pvalues,log.p=logp) # Transform p-values seek = optim(par=startvals, fn=mloglike, method="L-BFGS-B", lower = c(0,-Inf,0), upper = c(1,Inf,Inf), x=zz) if(details) print(seek) mle = seek$par; names(mle) = c("phi","mu","sigma") return(mle) } # End of function mleg ############################################################################### ############################################################################### ############################################################################### ############################################################################### ############################################################################### freeg1 = function(pvals, muparam = c(0,10), sigmaparam = c(6,1/2), logp=F, cc=T, phivals = seq(from=0, to=1, by=0.01) ) # This function is based on the groneau et al. model, but without the restrictions # on mu and sigma, and with a proper prior built in. Prior on mu is normal # (Expected value, Standard deviation) and independent prior on sigma # is gamma (shape=alpha, scale=beta). This function # calculates and plots the posterior of phi using numerical integration. # If some p-values are numerically zero, you can get around it by providing # natural logs of the p-values and using the logp = T option. { if(cc) # Display Creative Commons license { cat("\nfreeg1 1.0 \n") cat("Copyright Jerry Brunner 2018 \n") cat("Licensed under a Creative Commons Attribution-ShareAlike 3.0 (or later) Unported License. \n") cat("For details, visit http://creativecommons.org/licenses/by-sa/3.0 \n\n") } # Check input if(logp==F) { if(max(pvals)>0.05) stop('All p-values must be less than 0.05') if(min(pvals)<0) stop("P-values can't be negative!") if(min(pvals)==0) stop("One or more p-value equals zero. Try inputting ln(p). -Inf is not allowed.") } if(logp==T) { if(max(exp(pvals))>0.05) stop('All p-values must be less than 0.05') } ############################################################################ # Define necessary functions internally so GroneauExact will be self-contained. ############################################################################ post = function(sigma,mu,phi,x) # Result is proportional to the posterior. # Make sure mu < 0, 0 < sigma < 1, 0 < phi < 1 { # Avoiding numerical problems for small sigma by using logs logcd = dnorm(x,mu,sigma, log=T) - pnorm(-1.645,mu,sigma,log.p=T) condens = exp(logcd) like = prod( phi*dnorm(x)/0.05 + (1-phi)*condens ) # Multiply the likelihood by the prior, independent normal-gamma # Non-traditional because gamma is on the sd, not the precision. muprior = dnorm(mu,muparam[1],muparam[2]) sigmaprior = dgamma(sigma,shape=sigmaparam[1], scale=sigmaparam[2]) value = like * muprior * sigmaprior return(value) } # End function post vpost = Vectorize(post, vectorize.args='sigma') fun1 = function(mu,phi,x) # Integral of post over sigma { value = integrate(vpost,lower = 0, upper = Inf, mu = mu, phi=phi, x=x)$value return(value) } # End function fun1 vfun1 = Vectorize(fun1, vectorize.args='mu') fun2 = function(phi,x) # Integral of fun1 over mu # fun2 is proportional to pi(phi|x) { value = integrate(vfun1,lower = -Inf, upper = Inf, phi=phi, x=x)$value return(value) } # End of function fun2 vfun2 = Vectorize(fun2, vectorize.args='phi') fun3 = function(phi,x) # Integral is proportional to E(phi|x) { value = phi * vfun2(phi,x=x) return(value) } # End of function fun3 vfun3 = Vectorize(fun3, vectorize.args='phi') ##################### End function definitions ############################# datta = qnorm(pvals,log.p=logp) # Transform p-values konstant = integrate(vfun2, lower=0, upper=1, x=datta)$value # Normalizing constant Phi = phivals Density = Phi-Phi for(j in 1:length(Phi)) Density[j] = fun2(Phi[j],datta) Density = Density / konstant titlestring = expression(paste('Exact Posterior Density of ',phi)) plot(Phi,Density,type='l',main=titlestring,xlab=expression(phi)) Prob = Density/sum(Density) # Ephi = sum(Phi*Prob) # cat("\n Rough E(phi|data) =",Ephi,"\n\n") # This numerical integration can take along time # PosterExpect = integrate(vfun3, lower=0, upper=1, x=datta)$value / konstant # This usually differs in the 3d decimal place PosterExpect = sum(Phi*Prob) value = PosterExpect names(value) = 'E(phi|data)' return(value) } # End of function freeg1 ############################################################################### ############################################################################### ############################################################################### ############################################################################### ############################################################################### freeg2 = function(pvals, muparam = c(0,10), sigmaparam = c(6,1/2), logp=F, cc=T, sigvals = seq(from=0.1,to=9,by=0.1), muvals = -5:40, phivals = seq(from=0,to=1,by=0.01) ) # In this version of posterior for the Groneau et al. model, the # parameters mu and sigma are freed, with mu any real number and sigma # greater than zero. The prior is normal on mu, gamma on sigma, independent. # Gamma parameters are shape alpha and scale beta, so E(sigma) = alpha*beta. # Calculates the posterior using a discrete approximation, as in GroneauDiscrete. # Natural logs are used to try to avoid underflow. The function plots the # approximate marginal posterior density of phi, and returns a list # of data frames containing the marginal posteriors of phi, sigma and mu. # To avoid p-values that are numerically zero, provide natural logs and # use the logp = T option. { if(cc) # Display Creative Commons license { cat("\nfreeg2 1.0 \n") cat("Copyright Jerry Brunner 2018 \n") cat("Licensed under a Creative Commons Attribution-ShareAlike 3.0 (or later) Unported License. \n") cat("For details, visit http://creativecommons.org/licenses/by-sa/3.0 \n\n") } # Check input if(logp==F) { if(max(pvals)>0.05) stop('All p-values must be less than 0.05') if(min(pvals)<0) stop("P-values can't be negative!") if(min(pvals)==0) stop("One or more p-value equals zero. Try inputting ln(p). -Inf is not allowed.") pvals=log(pvals) # Always work with logs. } if(logp==T) { if(max(exp(pvals))>0.05) stop('All p-values must be less than 0.05') } ############################################################################ # Define the loglike function internally so GroneauDiscrete will be self-contained. ############################################################################ loglike = function(sigma,mu,phi,x) # Result is proportional to the posterior. { # Avoiding numerical problems for small sigma by using logs logcd = dnorm(x,mu,sigma, log=T) - pnorm(-1.645,mu,sigma,log.p=T) condens = exp(logcd) value = sum(log( phi*dnorm(x)/0.05 + (1-phi)*condens )) return(value) } # End function loglike ############################################################################ datta = qnorm(pvals,log.p=T) # Transform p-values nlines = length(sigvals)*length(muvals)*length(phivals); nlines SIGMA = MU = PHI = LL = numeric(nlines) j=0 for(s in sigvals) { for(m in muvals) { for(f in phivals) { j = j+1 SIGMA[j]=s; MU[j]=m; PHI[j]=f LL[j] = loglike(s,m,f,datta) } # Next f } # Next m } # Next s # phi=0 with small sigma can cause log likelihood = -Inf bad = (1:nlines)[LL==-Inf] LL[bad] = NA # logpostpc will be log of posterior, plus a constant logpostpc = LL + dnorm(MU,mean=muparam[1],sd=muparam[2],log=T) + dgamma(SIGMA, shape=sigmaparam[1], scale=sigmaparam[2]) adjusted = logpostpc - max(logpostpc,na.rm=T) # Now max(adjusted) = 0, exponentiate and get one propost = exp(adjusted) # Proportional to posterior, except NA are really zeros propost[bad] = 0 POSTERIOR = propost/sum(propost) # Normalize, making them probabilities POSTERIOR = round(POSTERIOR,10) # Better for display and makes no practical difference. posterior = data.frame(cbind(SIGMA, MU, PHI, POSTERIOR)) phiframe = aggregate(POSTERIOR ~ PHI, data=posterior, FUN=sum) phipost = phiframe$POSTERIOR CUMPROB = cumsum(phipost) phiframe = cbind(phiframe,CUMPROB) muframe = aggregate(POSTERIOR ~ MU, data=posterior, FUN=sum) mupost = muframe$POSTERIOR CUMPROB = cumsum(mupost) muframe = cbind(muframe,CUMPROB) sigmaframe = aggregate(POSTERIOR ~ SIGMA, data=posterior, FUN=sum) sigmapost = sigmaframe$POSTERIOR CUMPROB = cumsum(sigmapost) sigmaframe = cbind(sigmaframe,CUMPROB) PostExpectation = numeric(3) names(PostExpectation) = c("phi","mu","sigma") Ephi = round(sum(phivals*phipost),3) Emu = round(sum(muvals*mupost),3) Esigma = round(sum(sigvals*sigmapost),3) PostExpectation[1] = Ephi; PostExpectation[2] = Emu PostExpectation[3] = Esigma cat("\nPosterior Expected values \n"); print(PostExpectation); cat("\n") # Plot posterior of phi phigap = phivals[2]-phivals[1] # Assuming equally spaced even if user input. phidens = phipost/phigap # Divide by gap so area of bar = probability dtitlephi = expression(paste('Posterior Density of ',phi)) plot(phivals,phidens,type='l' ,xlab = expression(phi), ylab='Density', main=dtitlephi) value = list(phiframe,muframe,sigmaframe) names(value) = c("Phi","Mu","Sigma") return(value) } # End of function freeg2 ############################################################################### ############################################################################### ############################################################################### ############################################################################### ############################################################################### DiscreteUniform = function(pvals, logp=F, cc=T, sigvals = seq(from=0.1,to=9,by=0.1), muvals = -20:20, phivals = seq(from=0,to=1,by=0.01) ) # In this version of posterior for the Groneau et al. model, the # parameters mu and sigma are freed, with mu any real number and sigma # greater than zero. The prior is flat, which may be interpreted as an # improper prior, or a proper uniform prior on a wide range of values. The # default grid of parameter values is intended to help locate the posterior. # Finding the MLE (posterior mode) with mleg before running DiscreteUniform # is highly recommended. # Calculates the posterior using a discrete approximation, as in GroneauDiscrete. # Natural logs are used to try to avoid underflow. The function plots the # approximate marginal posterior density of phi, and returns a list # of data frames containing the marginal posteriors of phi, sigma and mu. # To avoid p-values that are numerically zero, provide natural logs and # use the logp = T option. { if(cc) # Display Creative Commons license { cat("\nfreeg2 1.0 \n") cat("Copyright Jerry Brunner 2018 \n") cat("Licensed under a Creative Commons Attribution-ShareAlike 3.0 (or later) Unported License. \n") cat("For details, visit http://creativecommons.org/licenses/by-sa/3.0 \n\n") } # Check input if(logp==F) { if(max(pvals)>0.05) stop('All p-values must be less than 0.05') if(min(pvals)<0) stop("P-values can't be negative!") if(min(pvals)==0) stop("One or more p-value equals zero. Try inputting ln(p). -Inf is not allowed.") pvals=log(pvals) # Always work with logs. } if(logp==T) { if(max(exp(pvals))>0.05) stop('All p-values must be less than 0.05') } ############################################################################ # Define the loglike function internally so GroneauDiscrete will be self-contained. ############################################################################ loglike = function(sigma,mu,phi,x) # Result is proportional to the posterior. { # Avoiding numerical problems for small sigma by using logs logcd = dnorm(x,mu,sigma, log=T) - pnorm(-1.645,mu,sigma,log.p=T) condens = exp(logcd) value = sum(log( phi*dnorm(x)/0.05 + (1-phi)*condens )) return(value) } # End function loglike ############################################################################ datta = qnorm(pvals,log.p=T) # Transform p-values nlines = length(sigvals)*length(muvals)*length(phivals); nlines SIGMA = MU = PHI = LL = numeric(nlines) j=0 for(s in sigvals) { for(m in muvals) { for(f in phivals) { j = j+1 SIGMA[j]=s; MU[j]=m; PHI[j]=f LL[j] = loglike(s,m,f,datta) } # Next f } # Next m } # Next s # phi=0 with small sigma can cause log likelihood = -Inf bad = (1:nlines)[LL==-Inf] LL[bad] = NA # logpostpc will be log of posterior, plus a constant logpostpc = LL # Plus zero -- flat prior adjusted = logpostpc - max(logpostpc,na.rm=T) # Now max(adjusted) = 0, exponentiate and get one propost = exp(adjusted) # Proportional to posterior, except NA are really zeros propost[bad] = 0 POSTERIOR = propost/sum(propost) # Normalize, making them probabilities POSTERIOR = round(POSTERIOR,10) # Better for display and makes no practical difference. posterior = data.frame(cbind(SIGMA, MU, PHI, POSTERIOR)) phiframe = aggregate(POSTERIOR ~ PHI, data=posterior, FUN=sum) phipost = phiframe$POSTERIOR CUMPROB = cumsum(phipost) phiframe = cbind(phiframe,CUMPROB) muframe = aggregate(POSTERIOR ~ MU, data=posterior, FUN=sum) mupost = muframe$POSTERIOR CUMPROB = cumsum(mupost) muframe = cbind(muframe,CUMPROB) sigmaframe = aggregate(POSTERIOR ~ SIGMA, data=posterior, FUN=sum) sigmapost = sigmaframe$POSTERIOR CUMPROB = cumsum(sigmapost) sigmaframe = cbind(sigmaframe,CUMPROB) PostExpectation = numeric(3) names(PostExpectation) = c("phi","mu","sigma") Ephi = round(sum(phivals*phipost),3) Emu = round(sum(muvals*mupost),3) Esigma = round(sum(sigvals*sigmapost),3) PostExpectation[1] = Ephi; PostExpectation[2] = Emu PostExpectation[3] = Esigma cat("\nPosterior Expected values \n"); print(PostExpectation); cat("\n") # Plot posterior of phi phigap = phivals[2]-phivals[1] # Assuming equally spaced even if user input. phidens = phipost/phigap # Divide by gap so area of bar = probability dtitlephi = expression(paste('Posterior Density of ',phi)) plot(phivals,phidens,type='l' ,xlab = expression(phi), ylab='Density', main=dtitlephi) value = list(phiframe,muframe,sigmaframe) names(value) = c("Phi","Mu","Sigma") return(value) } # End of function DiscreteUniform