#Program 8
#
# This program simulates the generation of data under a logistic regression model where there is
# one predictor variable and two parameters (i.e., eta = beta0 + beta1*x).
# The data are then analysed using a standard logistic regression, and it is recorded whether
# at least one estimated standard error is greater than 10 (which is taken to be an
# indication that separation has occurred). This is an attempt to estimate the proportion
# of 'experiments' for which separation occurs
# nsim is the number of simulated data sets to be produced and analysed, after which the
# probability is estimated of obtaining a data set that displays separation
#The required input consists of a seed for the simulations, the vector containing the values of
#beta0 and beta1,a vector containing the values of x at the three support points, a
#vector containing the sample sizes at the three support points, and the value of nsim.
seed <- 12345
betavec <- c(0,1)
xvec <- c(0,0.5,1)
nvec <- c(4,4,4)
nsims <- 1000
s <- length(xvec)
pvec <- rep(0,s)
f <- function(x)
{
f <- c(1,x)
}
for (i in 1:s)
{
pvec[i] <- 1/(exp(-f(xvec[i])%*%betavec)+1)
}
data <- matrix(0,nsims,s)
set.seed(seed)
for(i in 1:s)
{
data[,i] <- rbinom(nsims,nvec[i],pvec[i])
}
count <- 0
for (i in 1:nsims)
{
yvec <- data[i,]
fail <- nvec - yvec
model <- glm(cbind(yvec,fail)~xvec,family="binomial")
output <- summary.glm(model)$coefficients
check <- sum(output[,2]>10)
count <- count + (check>0)
}
cat("The proportion of samples with at least one s.e. greater than 10 was",count/nsims,"\n")