BAE(Certification)/[작업형2] 정형데이터 분석
[작업형2] 입학여부 확률 구하기
코딩베어
2023. 6. 20. 17:32
728x90
x_train = read.csv("C:/BigDataCertificationCourses-main/3rd/t2-2-X_train.csv", header=T)
x_test = read.csv("C:/BigDataCertificationCourses-main/3rd/t2-2-X_test.csv", header=T)
y_train = read.csv("C:/BigDataCertificationCourses-main/3rd/t2-2-Y_train.csv", header=T)
summary(x_train)
summary(x_test)
summary(y_train)
df.train = as.data.frame(merge(x_train, y_train)) #train 데이터 합치기
summary(df.train)
library(caret)
idx = caret::createDataPartition(df.train[,1],
times = 1,
p = 0.8)
train = df.train[idx$Resample1,]
test = df.train[-idx$Resample1,]
library(randomForest)
md.rf = randomForest(Chance.of.Admit ~ .,
data = train)
md.lm = lm(Chance.of.Admit ~ .,
data = train)
pred.rf = predict(md.rf, newdata = test) #테스트 데이터로 모델 정확도 확인
pred.lm = predict(md.lm, newdata = test)
rmse = rmse(test$Chance.of.Admit, pred.rf)
rmse
rmse2 = rmse(test$Chance.of.Admit, pred.lm)
rmse2
#random forest 모델의 RMSE 값이 더 우수하므로 예측 모델로 적용
pred = predict(md.rf, newdata = x_test)
final_data = cbind(x_test$Serial.No.,pred) #결과값 예측 저장
colnames(final_data) = c("id", "target")
final_data
728x90