#Program 8
#
# This program simulates the generation of data under a logistic regression model where there is
# one predictor variable and two parameters (i.e., eta = beta0 + beta1*x).
# The data are then analysed using a standard logistic regression, and it is recorded whether
# at least one estimated standard error is greater than 10 (which is taken to be an
# indication that separation has occurred).  This is an attempt to estimate the proportion  
# of 'experiments' for which separation occurs

# nsim is the number of simulated data sets to be produced and analysed, after which the
# probability is estimated of obtaining a data set that displays separation

#The required input consists of a seed for the simulations, the vector containing the values of
#beta0 and beta1,a vector containing the values of x at the three support points, a 
#vector containing the sample sizes at the three support points, and the value of nsim.

seed <- 12345
betavec <- c(0,1)
xvec <- c(0,0.5,1)
nvec <- c(4,4,4)
nsims <- 1000

s <- length(xvec)
pvec <- rep(0,s)

f <- function(x)
{
f <- c(1,x)
}

for (i in 1:s)
{
pvec[i] <- 1/(exp(-f(xvec[i])%*%betavec)+1)
}

data <- matrix(0,nsims,s)
set.seed(seed)
for(i in 1:s)
{
data[,i] <- rbinom(nsims,nvec[i],pvec[i])
}

count <- 0
for (i in 1:nsims)
{
 yvec <- data[i,]
 fail <- nvec - yvec
 model <- glm(cbind(yvec,fail)~xvec,family="binomial")
 output <- summary.glm(model)$coefficients
 check <- sum(output[,2]>10)
 count <- count + (check>0)
}
cat("The proportion of samples with at least one s.e. greater than 10 was",count/nsims,"\n")