heart = read.csv("https://web.stanford.edu/~hastie/ElemStatLearn/datasets/SAheart.data",row.names=1)
heart$famhist= as.numeric(heart$famhist) # need only numbers
heart=array(unlist(heart),dim=c(462,10)) # move from data frame to array for keras

n = dim(heart)[1]
p = dim(heart)[2]
test.id = sample(n,n/3)

x_train = heart[-test.id,-p]
y_train = heart[-test.id,p]
x_test = heart[test.id,-p]
y_test = heart[test.id,p]



# need to install package keras, and also have python on the computer (Anaconda 3.#) 
library(keras)
keras::install_keras() # run this only the first time 

batch_size <- 32
epochs <- 1000
rm(model)
model <- keras_model_sequential()
model %>% 
  layer_dense(units = 1, activation = 'sigmoid', input_shape = c(dim(x_train)[2]))

# example with two layers: 
#  model %>% 
#  layer_dense(units = 3, activation = 'sigmoid', input_shape = c(dim(x_train)[2])) %>%
#  layer_dense(units = 1, activation = 'sigmoid')


summary(model)

model %>% compile(
  loss = 'binary_crossentropy',
  optimizer = optimizer_rmsprop(),
  metrics = c('accuracy')
)


model %>% fit(
  x_train, y_train,
  batch_size = batch_size,
  epochs = epochs,
  verbose = 1,
  validation_data = list(x_test, y_test)
)

phat_NN_1_sig = predict_proba(model,x_test, batch_size = NULL, verbose = 0, steps = NULL)
summary(phat_NN_1_sig)
table(phat_NN_1_sig>0.5, y_test) # 2*2 table


# logistic regression
mod.lr = glm(y~., data=data.frame(x=x_train,y=y_train), family=binomial)
phat_logit = predict.glm(mod.lr, newdata = data.frame(x=x_test,y=y_test),type="response")
table(phat_logit>0.5, y_test) # 2*2 table