\documentclass[pdf, colorBG,slideColor]{prosper}
\hypersetup{pdfpagemode=FullScreen}
\usepackage{amsmath, amsfonts, amsbsy, pstricks, pst-node, pst-text, pst-3d}
%\usepackage{pdftex}
\usepackage{graphicx}
\usepackage{rotate, bm}
\usepackage{time}
\usepackage{morefloats}%for number of float objects over 18 and less than 36.
\pagenumbering{arabic}
%\newlength{\MiniPageLeft}
%\newlength{\MiniPageRight}
%\newlength{\ThisFigureWidth}
\newlength{\MiniPageLeft}
\newlength{\MiniPageRight}
\newlength{\ThisFigureWidth}
%\newcommand{\slidetextsize}{\footnotesize}
%\setlength{\oddsidemargin}{0in}
\newcommand{\gives}{\Rightarrow}
\newcommand{\betahat}{\hat{\beta}}
\newcommand{\mubar}{\bar{\mu}}
\newcommand{\muhat}{\hat{\mu}}
\newcommand{\thetabar}{\bar{\theta}}
\newcommand{\thetahat}{\hat{\theta}}
\newcommand{\sigmahat}{\hat{\sigma}}
\newcommand{\ybar}{\bar{y}}
\newcommand{\fracs}[1]{\frac{1}{#1}} %simple fraction: 1 over #1
\newcommand{\ifff}{\Leftrightarrow}
\newcommand{\noo}{\nonumber\\}
\newcommand{\no}{\nonumber}
\newcommand{\inv}{^{-1}}
\newcommand{\segfun}[1]{\left\{\begin{array}{cc}#1\end{array}\right.}
\newcommand{\dev}[2]{\frac{\partial #1}{\partial #2}}
\newcommand{\matrx}[2]{\left[\begin{array}{#1}#2\end{array}\right]}
\newcommand{\beq}{\begin{eqnarray}}
\newcommand{\eeq}{\end{eqnarray}}
\newcommand{\bi}{\begin{itemize}}
\newcommand{\ei}{\end{itemize}}
\renewcommand{\i}{\item}
\newcommand{\norm}[1]{\left\|#1\right\|}
\newcommand{\be}{\begin{enumerate}}
\newcommand{\ee}{\end{enumerate}}
%Bold math
\newcommand{\by}{\textbf{y}}
\newcommand{\bY}{\textbf{Y}}
\newcommand{\br}{\textbf{r}}
\newcommand{\bzero}{\textbf{0}}
\newcommand{\bp}{\textbf{p}}
\newcommand{\btheta}{\bm \theta}
\newcommand{\bTheta}{\bm \Theta}
\newcommand{\bbeta}{\bm \beta}
\newcommand{\brho}{\bm \rho}
\newcommand{\bPsi}{\bm \Psi}
\newcommand{\bPhi}{\bm \Phi}
\newcommand{\bpi}{\bm \pi}
\newcommand{\bmu}{\bm \mu}
\newcommand{\bSigma}{\bm \Sigma}
\newcommand{\bolde}{\bm e}
\newcommand{\bepsilon}{\bm \epsilon}
\newcommand{\bdelta}{\bm \delta}
\slideCaption{\scriptsize $\cdots$}
\begin{document}
%%%%%%%%slide 0%%%%%%%TITLE PAGE%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\Large Intro to Bayesian Computing Series (I)}
\author{\large IBC members present\\
\normalsize Yi He, William Wu, Cindy Chen, Chuan Zhou, Heidi, Angel An, \& Lily Wang}
\email{ Yi.He@Vanderbilt.Edu }
\institution{Department of Biostatistics \\
School of Medicine\\
Vanderbilt University
Feburary 16, 2006
}
\maketitle
%%%%%%%%slide 1%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize A little history of statistics}{
\ptsize{10}
As Efron (2004) said,
\bi
\i 19th century is Bayesian statistics
\i 20th century is frequentist statistics
\i What's next? Possible Empirical Bayesian?
\i In the last two decades, fast development of computing faciliaties and invention of Markov Chain Monte Carlo (MCMC) faciliates Bayesian analysis.
\i Bayesian analysis now is feasible and attracts scientists more and more attention in various applications.
\ei
}
\end{slide}
%%%%%%%%slide 2%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Baye's rule}{
\ptsize{10}
\bi
\i The essence of Bayesian analysis is to draw inference of unknow quantities or quantiles of interest from the posterior distribution $p(\btheta|\by)$, which is from prior beliefs and data information. Bayes' rule provides such connection.
\beq\label{eq:Bayes.rule}
p(\btheta|\by)&=&\frac{p(\btheta)p(\by|\btheta)} {p(\by)}\noo
\mbox{posterior}&\propto& p(\btheta)p(\by|\btheta)\propto \mbox{prior}\times \mbox{data information}
\eeq
\i Why this makes sense? Our human brain is essentially a Bayesian machine.
\ei
}
\end{slide}
%%%%%%%%slide 3%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Non-informative prior}{
\ptsize{10}
Bayesian analysis requires prior information. Can I still use Bayesian analysis without ``prior" information about $\btheta$? Yes.
\bi
\i Non-informative prior, vague prior, reference prior
\i Ways to construct non-informative prior
\bi
\i Intuitively, flat / almost flat over the parameter space. e.g., $X_i \sim N(\mu, \sigma^2), iid$ with $\sigma^2$ known. Then use prior $p(\mu) \propto 1$ or $p(\mu) \sim N(0, 10^6)$.
\i Jeffrey's prior, which is invariant under transformation, $p(\btheta) \propto [I(\btheta)]^{1/2}$ where $I(\btheta)$ is the expected Fisher information in the model.
\i Non-informative prior may be improper, in the sense that $\int p(\theta) d \theta=\infty$.
\ei
\ei
}
\end{slide}
%%%%%%%%slide 4%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Rats Data}{
\ptsize{10}
Data are obtained from WinBUGS (Spielhalter et al. 2002) example volume I (\texttt{http://www.mrc-bsu.cam.ac.uk/bugs}), originally from Gelfand et al. (1990).
\begin{figure}[htb]
\begin{center}
\includegraphics[height=2.3in,width=2.5in]{data/rats_data.ps}
\caption{Rats data in hierarchical normal model}
\label{fig:rats.data}
\end{center}
\end{figure}
}
\end{slide}
%%%%%%%%slide 5%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Random effects model}{
\ptsize{10}
\bi
\i The data suggest a growing pattern with age with a little downward curvature.
\beq\label{eq:rats.linear}
Y_{ij} &\sim& N(a_i + \beta trt_i + b_i(x_j - \bar{x}), \tau_0^{-1})\noo
a_i &\sim & N(\mu_a, \tau_a^{-1})\noo
b_i &\sim& N(\mu_b, \tau_b^{-1}),
\eeq
where $\bar{x} = 22$, the average of $x$, $trt_i$ is the group assignment for rat $i$, and $\tau_0, \tau_a, \tau_b$ are precisions (1/variance) for the corresponding normal distributions.
\i This model suggests that for each subject (i.e., fix random effects $a_i$ and $b_i$, and group $trt_i$), the growth curve is linear with noise precision $\tau_0$. The group effect can be captured by $\beta$.
\ei
}
\end{slide}
%%%%%%%%slide 6%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Hierarchical normal / normal model}{
\ptsize{10}
\bi
\i Hierarchical normal / normal model is analogous to mixed model, however in Bayesian world, there are no fixed effects because all parameters are treated as random with distributions.
\i The above model is not a fully Bayesian model, because it can be treated as a typical mixed model with fixed effects \texttt{intercept}, \texttt{day}, \texttt{trt} and random effects \texttt{intercept}, \texttt{day}.
\i The first equation in model (\ref{eq:rats.linear}) specifies the likelihood and the other two specify priors for $\bm a$ and $\bm b$ through another level of parameters $\mu_a, \mu_b, \tau_a$, and $\tau_b$.
\ei
}
\end{slide}
%%%%%%%%slide 7%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Likelihood, priors and hyperpriors}{
\ptsize{10}
\bi
\i Likelihood / data information: $Y_{ij} \sim N(a_i + \beta trt_i + b_i(x_j - \bar{x}), \tau_0^{-1})$.
\i Priors: $a_i \sim N(\mu_a, \tau_a^{-1})$, $b_i \sim N(\mu_b,\tau_b^{-1})$, non-informative priors are specified for $\tau_0$ and $\beta$: $\tau_0\sim \mbox{Gamma}(\epsilon, \epsilon)$, $\beta \sim N(0, 10^{6})$.
\i Vague hyper-priors: $\mu_a, \mu_b \sim N(0, 10^{6})$ and $\tau_a, \tau_b \sim $ Gamma$(\epsilon, \epsilon)$.
\i The fully Bayesian model (\ref{eq:rats.linear}) consists of three levels: data-based likelihood level $p(\by|\btheta)$, prior level $p(\btheta|\bm \psi)$, and hyperprior level $p(\bm \psi)$.
\i Complex models may involve more levels, but models with more than four levels are unusual and unhelpful.
\i Information contribution to posterior: Likelihood $>$ prior $>$ hyperprior.
\ei
}
\end{slide}
%%%%%%%%slide 8%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize BUGS program}{
\ptsize{8}
\footnotesize
\begin{verbatim}
model{
#likelihood p(Y|theta)
for( i in 1 : N ) {
for( j in 1 : T ) {
Y[i , j] ~ dnorm(mu[i , j],tau.0)
mu[i , j] <- a[i] + beta * trt[i] + b[i] * (x[j] - xbar)
}
#Prior p(theta|Psi)
a[i] ~ dnorm(mu.a, tau.a)
b[i] ~ dnorm(mu.b, tau.b)
}
#prior
tau.0 ~ dgamma(0.001,0.001)
beta ~ dnorm(0.0,1.0E-6)
#hyper-priors
mu.a ~ dnorm(0.0,1.0E-6)
mu.b ~ dnorm(0.0,1.0E-6)
tau.a ~ dgamma(0.001,0.001)
tau.b ~ dgamma(0.001,0.001)
#parameters of interest
sigma <- 1 / sqrt(tau.0) #error sd
w0[1] <- mu.a - xbar * mu.b #weight at birth for 1st group
w0[2] <- mu.a + beta - xbar * mu.b #weight at birth for 2nd group
}
\end{verbatim}
}
\end{slide}
%%%%%%%%slide 9%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize BUGS data}{
\ptsize{10}
\bi
\i List format created from R, but be careful about two issues:
\be
\i list data obtained from R do not have the required \texttt{.Data} keyword for BUGS. Add this keyword for BUGS. \i BUGS reads matrix in a different way from R. For example, there is a matrix $M: 5\times 3$ in R. In order to use it in BUGS, follow this procedure:
\be
\i transpose $M$: \texttt{M <- t(M)};
\i dump $M$: \texttt{dput(M, "M.dat")};
\i open \texttt{M.dat}, add \texttt{.Data} keyword and change \texttt{.Dim = c(3,5)} to \texttt{.Dim = c(5,3)}.
\ee
\ee
\i List data example
\footnotesize
\begin{verbatim}
list(x = c(8.0, 15.0, 22.0, 29.0, 36.0), xbar = 22, N = 30, T = 5,
Y = structure(
.Data = c(151, 199, 246, 283, 320,
145, 199, 249, 293, 354,
147, 214, 263, 312, 328,
155, 200, 237, 272, 297,
.......................
.Dim = c(30,5)))
\end{verbatim}
\ei
}
\end{slide}
%%%%%%%%slide 10%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize BUGS data}{
\ptsize{10}
\bi
\i Table format
\begin{verbatim}
n[] x[]
47 0
148 18
119 8
END
\end{verbatim}
\ei
}
\end{slide}
%%%%%%%%slide 11%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Initialize MCMC}{
\ptsize{10}
\bi
\i BUGS may automatically generate initial values, but it is highly recommended to provide initial values for fixed effects. Good initial values potentially improve convergence.
\i For this model, the fixed effects are $\mu_a, \mu_b, \beta, \tau_0, \tau_a,$ and $\tau_b$. So it is recommended to initialize at least these parameters.
\i
\footnotesize
\begin{verbatim}
list(mu.a = 150, mu.b = 10, beta=0, tau.0 = 1, tau.a = 1, tau.b = 1)
\end{verbatim}\normalsize
\ei
}
\end{slide}
%%%%%%%%slide 12%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Procedure to run WinBUGS}{
\ptsize{10}
See live demonstration.
\be
\i Check code: select \texttt{Specification} from the \texttt{Model} menu. Highlight \texttt{list} in the code, and click \texttt{check model} button.
\i Load data: Then highlight \texttt{list} in the data code, and click \texttt{load data}.
\i Compile: click \texttt{compile} button and select the number of MCMC chains.
\i Initialize model: click \texttt{initialize} button.
\i Burn-in: Pull down \texttt{Model} menu and click \texttt{Update}.
\i Monitor samples: click \texttt{Samples}. Type parameters of interest and click \texttt{set} button.
\ee
}
\end{slide}
%%%%%%%%slide 13%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Procedure to run OpenBUGS using BRUGS package in R}{
\ptsize{9}
See live demonstration. \emph{The current \texttt{BRugs} package only work on Windows.}
\be
\i Create three text files namely \texttt{ratsmodel.txt}, \texttt{ratsdata.txt}, \texttt{ratsinits.txt} and save the three pieces of code in these files, respectively.
\i loading BRugs: \texttt{library(BRugs)}
\i Check code: \texttt{modelCheck("ratsmodel.txt") }
\i Load data: \texttt{modelData("ratsdata.txt")}
\i Compile: \texttt{modelCompile(numChains=2) }
\i Initialize model: \texttt{modelInits(rep("ratsinits.txt", 2)) }
\i Burn-in: \texttt{modelUpdate(1000) }
\i Monitor samples: \texttt{samplesSet(c("w0", "beta"))}
\i More samples: \texttt{modelUpdate(1000)}
\i Statistical inference and plots are also available (see BRugs package information).
\ee
}
\end{slide}
%%%%%%%%slide 14%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%
\begin{slide}{\normalsize Results and interpretation}{
\ptsize{10}
See live demonstration.
\bi
\i The posterior densities of these parameters can be estimated by the MCMC samples after convergence.
\i Since 95\%CI of $\beta$ covers 0, there is no significant difference between these two groups at .05 level.
\i As a conclusion, once we have the distribution of a parameter of interest, we completely know that parameter in statistical sense, so we can do whatever inference from it.
\ei
}
\end{slide}
\end{document}