728x90
x_train = read.csv("C:/BigDataCertificationCourses-main/3rd/t2-2-X_train.csv", header=T)
x_test = read.csv("C:/BigDataCertificationCourses-main/3rd/t2-2-X_test.csv", header=T)
y_train = read.csv("C:/BigDataCertificationCourses-main/3rd/t2-2-Y_train.csv", header=T)
summary(x_train)
summary(x_test)
summary(y_train)

df.train = as.data.frame(merge(x_train, y_train)) #train 데이터 합치기
summary(df.train)

library(caret)
idx = caret::createDataPartition(df.train[,1],
                           times = 1,
                           p = 0.8)
train = df.train[idx$Resample1,]
test = df.train[-idx$Resample1,]

library(randomForest)
md.rf = randomForest(Chance.of.Admit ~ .,
                     data = train)

md.lm = lm(Chance.of.Admit ~ .,
           data = train)

pred.rf = predict(md.rf, newdata = test) #테스트 데이터로 모델 정확도 확인
pred.lm = predict(md.lm, newdata = test)
rmse = rmse(test$Chance.of.Admit, pred.rf)
rmse
rmse2 = rmse(test$Chance.of.Admit, pred.lm)
rmse2
#random forest 모델의 RMSE 값이 더 우수하므로 예측 모델로 적용

pred = predict(md.rf, newdata = x_test)

final_data = cbind(x_test$Serial.No.,pred) #결과값 예측 저장 
colnames(final_data) = c("id", "target")
final_data
728x90