728x90
  • 자동차 가격을 예측해주세요!
  • 예측할 값(y): price
  • 평가: RMSE (Root Mean Squared Error)
  • data: train.csv, test.csv
  • 제출 형식: submission.csv파일을 아래와 같은 형식(수치형)으로 제출
    (id는 test의 index임)
id,price
0,11000
1,20500
2,19610
...    
1616,11995
train.df = read.csv("C:/BigDataCertificationCourses-main/5th/train.csv", header=T)
test.df = read.csv("C:/BigDataCertificationCourses-main/5th/test.csv", header=T)
summary(train.df)
head(train.df, 10)

library(caret)
test_idx = createDataPartition(train.df$model, p = 0.8)

train = train.df[test_idx$Resample1, ]
test = train.df[-test_idx$Resample1, ]

library(randomForest)
md.rf = randomForest(price ~ .,
                     data = train)
md.lm = lm(price ~ .,
           data = train)
md.lm = step(md.lm) #step function으로 선형회귀모델 독립변수 결정
library(rpart)
md.rpart = rpart(price ~ .,
                 data = train)

pred.rf = predict(md.rf, newdata = test)
pred.lm = predict(md.lm, newdata = test)
pred.rpart = predict(md.rpart, newdata = test)

ModelMetrics::rmse(test$price, pred.rf)
ModelMetrics::rmse(test$price, pred.lm)
ModelMetrics::rmse(test$price, pred.rpart)

pred = predict(md.rf, newdata = test.df)

final = cbind(row.names(test.df), round(pred, 0))
colnames(final) = c("index", "price")
final
728x90