R에서 머신러닝

머신러닝을 배울 때, 가장 중요한 도구는 tensorflow이다. Rstudio에서는 tensorflow를 더 쉽게 쓸 수 있도록 keras를 비롯한 패키지를 지속적으로 업데이트 하고 있다.

https://tensorflow.rstudio.com/

Quickstart에서 제공하는 가장 간단한 예를 아래와 같이 실행해 보았다.

# Tensorflow

library(tensorflow)
library(keras)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## √ ggplot2 3.3.0     √ purrr   0.3.4
## √ tibble  3.0.1     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

mnist <- dataset_mnist()
mnist$train$x <- mnist$train$x/255
mnist$test$x <- mnist$test$x/255

model <- keras_model_sequential() %>% 
  layer_flatten(input_shape = c(28, 28)) %>% 
  layer_dense(units = 128, activation = "relu") %>% 
  layer_dropout(0.2) %>% 
  layer_dense(10, activation = "softmax")
summary(model)

## Model: "sequential"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## flatten (Flatten)                   (None, 784)                     0           
## ________________________________________________________________________________
## dense (Dense)                       (None, 128)                     100480      
## ________________________________________________________________________________
## dropout (Dropout)                   (None, 128)                     0           
## ________________________________________________________________________________
## dense_1 (Dense)                     (None, 10)                      1290        
## ================================================================================
## Total params: 101,770
## Trainable params: 101,770
## Non-trainable params: 0
## ________________________________________________________________________________

model %>% 
  compile(
    loss = "sparse_categorical_crossentropy",
    optimizer = "adam",
    metrics = "accuracy"
  )

model %>% 
  fit(
    x = mnist$train$x, y = mnist$train$y,
    epochs = 5,
    validation_split = 0.3,
    verbose = 2
  )

model %>% 
  evaluate(mnist$test$x, mnist$test$y, verbose = 0)

## $loss
## [1] 0.08423539
## 
## $accuracy
## [1] 0.9753

save_model_tf(object = model, filepath = "model")

reloaded_model <- load_model_tf("model")
all.equal(predict(model, mnist$test$x), predict(reloaded_model, mnist$test$x))

## [1] TRUE

2020/04/24