確率モデルは面白い
7scientist
## データの準備
X <- c(-27.020,3.570,8.191,9.808,9.603,9.945,10.056)
sc7 <- list(N=NROW(X),X=X)
## モデルコンパイル
model <- stan_model("SevenScientist.stan")
## data{
## int<lower=0> N;
## real X[N];
## }
##
## parameters{
## real mu;
## real<lower=0> sig[N];
## }
##
## model{
## for(n in 1:N){
## //likelihood
## X[n] ~ normal(mu,sig[n]);
## //prior
## sig[n] ~ inv_gamma(0.0001,0.0001);
## }
## // prior
## mu ~ normal(0,1000);
## }
## 推定
fit <- sampling(model,sc7,iter=10000)
## Warning: There were 7154 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
## 表示
fit
## Inference for Stan model: SevenScientist.
## 4 chains, each with iter=10000; warmup=5000; thin=1;
## post-warmup draws per chain=5000, total post-warmup draws=20000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu 9.90 0.01 0.11 9.60 9.81 9.94 9.95 10.06 104 1.03
## sig[1] 154.21 13.63 906.95 17.49 32.33 51.92 107.89 576.71 4426 1.00
## sig[2] 34.45 6.60 439.63 2.94 5.80 10.01 20.97 153.57 4438 1.00
## sig[3] 7.66 0.82 53.58 0.77 1.55 2.79 6.32 25.70 4264 1.00
## sig[4] 0.43 0.04 3.28 0.00 0.06 0.15 0.29 1.97 7303 1.00
## sig[5] 2.38 0.78 62.10 0.03 0.25 0.41 0.75 6.07 6311 1.00
## sig[6] 0.52 0.09 8.72 0.00 0.00 0.08 0.23 1.99 9369 1.00
## sig[7] 1.13 0.31 34.71 0.00 0.08 0.19 0.48 3.62 12485 1.00
## lp__ -3.26 0.39 2.92 -9.71 -5.25 -2.94 -0.86 0.90 55 1.09
##
## Samples were drawn using NUTS(diag_e) at Mon Sep 3 19:35:11 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
## 描画
plot(fit,pars=c("sig[1]","sig[2]","sig[3]","sig[4]",
"sig[5]","sig[6]","sig[7]"),show_density=T)
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
打ち切りデータ
## データの準備
nfails <- 949
n <- 50 # Number of questions
datasasoon <- list(nF=nfails,N=n)
## モデルコンパイル
sasoon <- stan_model("censored.stan")
## data{
## int<lower=0> nF; // number of failes
## int<lower=0> N; // number of questions
## }
##
## parameters{
## real<lower=0.25,upper=1> theta;
## }
##
## model{
## 30 ~ binomial(N,theta);
## target += nF * (log(binomial_cdf(25,N,theta) - binomial_cdf(14,N,theta)));
## }
fit <- sampling(sasoon,datasasoon)
fit
## Inference for Stan model: censored.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## theta 0.40 0.00 0.00 0.39 0.40 0.40 0.4 0.41 1228
## lp__ -152.25 0.02 0.69 -154.19 -152.42 -151.98 -151.8 -151.75 1291
## Rhat
## theta 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Mon Sep 3 19:35:15 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
fit %>% as.array %>%
bayesplot::mcmc_dens(pars="theta") + xlim(0,1)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
二項分布を確認しておこう
二項分布の例
theta <- 0.4
Ntrial <- 50
size <- 10000
data.frame(x=rbinom(size,Ntrial,theta)) %>%
ggplot(aes(x=x))+geom_histogram(binwidth = 0.5)
今回の例を乱数で作ってみる
### 仮にtheta=0.4とする
N <- 10000
data.frame(X=rbinom(N,50,0.4)) %>%
table() %>% data.frame() %>%
rename(tbl=1) %>% mutate(tbl=as.numeric(levels(tbl)[tbl])) %>%
right_join(.,data.frame(tbl=1:50)) %>%
mutate(Freq=replace_na(Freq,0)) %>%
mutate(Cum = cumsum(Freq)/N) %>%
mutate(COL = ifelse(tbl<15,1,ifelse(tbl<25,2,3))) %>%
ggplot(aes(x=tbl,y=Cum,fill=as.factor(COL))) +
geom_bar(stat="identity",alpha=0.5) +
xlab("")+ylab("")+ theme(legend.position = "none")
## Joining, by = "tbl"
変化点の検出
c <- scan("changepointdata.txt")
n <- length(c)
t <- 1:n
datapoints <- list(C=c,N=n,Time=t)
ChangeDetection <- stan_model("ChangeDetection.stan")
## data{
## int<lower=0> N;
## int<lower=0> Time[N];
## real C[N];
## }
##
## parameters{
## vector<lower=0>[2] mu;
## real<lower=0> sigma;
## real<lower=0,upper=N> tau;
## }
##
##
## model{
## //prior
## mu[1] ~ normal(0,1000);
## mu[2] ~ normal(0,1000);
## sigma ~ cauchy(0,5);
##
## //likelihood
## for(n in 1:N){
## if(Time[n]> tau){
## C[n] ~ normal(mu[2],sigma);
## } else {
## C[n] ~ normal(mu[1],sigma);
## }
## }
##
## tau ~ uniform(0,N);
## }
fit <- sampling(ChangeDetection,datapoints)
## Warning: There were 3994 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print(fit)
## Inference for Stan model: ChangeDetection.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5%
## mu[1] 37.88 0.03 0.27 37.37 37.70 37.88 38.06 38.41
## mu[2] 30.60 0.02 0.30 30.00 30.39 30.60 30.81 31.18
## sigma 6.84 0.01 0.13 6.58 6.75 6.84 6.93 7.10
## tau 732.08 0.12 1.93 729.08 731.08 731.70 733.01 737.12
## lp__ -2841.63 0.08 1.47 -2845.05 -2842.39 -2841.43 -2840.58 -2839.47
## n_eff Rhat
## mu[1] 109 1.03
## mu[2] 177 1.02
## sigma 93 1.01
## tau 241 1.01
## lp__ 333 1.01
##
## Samples were drawn using NUTS(diag_e) at Mon Sep 3 19:40:18 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
可視化
df <- transform(c)
Ms <- rstan::get_posterior_mean(fit,pars="mu")[,5]
point <- round(apply(as.matrix(rstan::extract(fit,pars="tau")$tau),2,median))
df$Mu <- c(rep(Ms[1],point),rep(Ms[2],n-point))
df %>% dplyr::mutate(num=row_number()) %>% ggplot(aes(x=num,y=X_data))+geom_line(alpha=0.5)+
geom_point(aes(y=Mu),color="blue")
飛行機を再捕獲する
x <- 10 # number of captures
k <- 4 # number of recaptures from n
n <- 5 # size of second sample
tmax <- 50 # maximum population size
datastan <- list(X=x,N=n,K=k,TMax=tmax)
planeModel <- stan_model("plane.stan")
## data{
## int<lower=0> X; //第一標本サイズ
## int<lower=0> N; //第二標本サイズ
## int<lower=0,upper=N> K; //再捕獲した数
## int<lower=X> TMax; // ありえそうな最大数
## }
##
## transformed data{
## int<lower=X> tmin; //少なくともこれぐらいはいる
## tmin = X + N - K;
## }
##
## parameters{
## }
##
## transformed parameters{
## vector[TMax] lp; //最大数までの尤度
## for(t in 1:TMax){
## if(t < tmin){
## // 最低限以下はあり得ないので尤度を負の無限大にする
## lp[t] = log(1.0/TMax) + negative_infinity();
## }else{
## // 最大値まで均等にありそうな超幾何分布 HM(K|N,X,台数)
## lp[t] = log(1.0/TMax) + hypergeometric_lpmf(K|N,X,t-X);
## }
## }
## }
##
## model{
## target += log_sum_exp(lp);
## }
##
## generated quantities{
## int<lower=tmin,upper=TMax> t;
## simplex[TMax] tp;
## tp = softmax(lp);
## t = categorical_rng(tp);
## }
fit <- sampling(planeModel,datastan,algorithm="Fixed_param")
fit %>% as.array %>% bayesplot::mcmc_hist(pars="t",binwidth=1)