통계 이야기

빅데이터 분석 ; 신경망 알고리즘 ; 뉴런

창이 2021. 7. 22.
728x90
반응형

데이터 불러오기

data <- read.csv("cereal.csv")

변수가 너무 많아 변수조정

dim(data)
str(data)
# 77 16

'data.frame': 77 obs. of 16 variables:
$ name : chr "100% Bran" "100% Natural Bran" "All-Bran" "All-Bran with Extra Fiber" ...
$ mfr : chr "N" "Q" "K" "K" ...
$ type : chr "C" "C" "C" "C" ...
$ calories: int 70 120 70 50 110 110 110 130 90 90 ...
$ protein : int 4 3 4 4 2 2 2 3 2 3 ...
$ fat : int 1 5 1 0 2 2 0 2 1 0 ...
$ sodium : int 130 15 260 140 200 180 125 210 200 210 ...
$ fiber : num 10 2 9 14 1 1.5 1 2 4 5 ...
$ carbo : num 5 8 7 8 14 10.5 11 18 15 13 ...
$ sugars : int 6 8 5 0 8 10 14 8 6 5 ...
$ potass : int 280 135 320 330 -1 70 30 100 125 190 ...
$ vitamins: int 25 0 25 25 25 25 25 25 25 25 ...
$ shelf : int 3 3 3 3 3 1 2 3 1 3 ...
$ weight : num 1 1 1 1 1 1 1 1.33 1 1 ...
$ cups : num 0.33 1 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
$ rating : num 68.4 34 59.4 93.7 34.4 ...

data <- data[c(2:5)]
data = data[-2]

조정된 데이터셋 확인 및 탐색

str(data)

'data.frame': 77 obs. of 3 variables:
$ mfr : chr "N" "Q" "K" "K" ...
$ calories: int 70 120 70 50 110 110 110 130 90 90 ...
$ protein : int 4 3 4 4 2 2 2 3 2 3 ...

sum(is.na(data))

[1] 0

par(mfrow = c(2, 2))
barplot(table(mfr))
hist(calories)
hist(protein)

관심가는 제조사 선택

data$mfr <- as.factor(data$mfr)
data$mfr <- as.numeric(data$mfr)
str(data)

'data.frame': 77 obs. of 3 variables:
$ mfr : num 4 6 3 3 7 2 3 2 7 5 ...
$ calories: int 70 120 70 50 110 110 110 130 90 90 ...
$ protein : int 4 3 4 4 2 2 2 3 2 3 ...

library(dplyr)
data <- data %>% filter(mfr == 2 | mfr == 3 | mfr == 5)

train data와 test데이터를 나누기

data$mfr <- as.factor(data$mfr)
c.scaled <- cbind(scale(data[-1]), data[1])
set.seed(1000)
train <- c.scaled[index, ]
test <- c.scaled[-index, ]

library(nnet)

신경망 알고리즘 ( hidden layer가 2층)

model.nnet <- nnet(mfr ~ ., data = train, size = 2, decay = 5e-04)

actual <- test$mfr
table(actual, pre)

library(neuralnet)
c_model <-

neuralnet(mfr ~ ., data = train, hidden = c(3,3)
)plot(c_model)

728x90
반응형

댓글

추천 글