heart = read.csv("https://web.stanford.edu/~hastie/ElemStatLearn/datasets/SAheart.data",row.names=1) heart$famhist= as.numeric(heart$famhist) # need only numbers heart=array(unlist(heart),dim=c(462,10)) # move from data frame to array for keras n = dim(heart)[1] p = dim(heart)[2] test.id = sample(n,n/3) x_train = heart[-test.id,-p] y_train = heart[-test.id,p] x_test = heart[test.id,-p] y_test = heart[test.id,p] # need to install package keras, and also have python on the computer (Anaconda 3.#) library(keras) keras::install_keras() # run this only the first time batch_size <- 32 epochs <- 1000 rm(model) model <- keras_model_sequential() model %>% layer_dense(units = 1, activation = 'sigmoid', input_shape = c(dim(x_train)[2])) # example with two layers: # model %>% # layer_dense(units = 3, activation = 'sigmoid', input_shape = c(dim(x_train)[2])) %>% # layer_dense(units = 1, activation = 'sigmoid') summary(model) model %>% compile( loss = 'binary_crossentropy', optimizer = optimizer_rmsprop(), metrics = c('accuracy') ) model %>% fit( x_train, y_train, batch_size = batch_size, epochs = epochs, verbose = 1, validation_data = list(x_test, y_test) ) phat_NN_1_sig = predict_proba(model,x_test, batch_size = NULL, verbose = 0, steps = NULL) summary(phat_NN_1_sig) table(phat_NN_1_sig>0.5, y_test) # 2*2 table # logistic regression mod.lr = glm(y~., data=data.frame(x=x_train,y=y_train), family=binomial) phat_logit = predict.glm(mod.lr, newdata = data.frame(x=x_test,y=y_test),type="response") table(phat_logit>0.5, y_test) # 2*2 table