Random Forest in R ..Recommending College based on Rank and Branch Choice
R Notebook-College and Rank
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
ds<-read.csv("collegedatainput.csv")
head(ds)
Rank College Branch
1 1 IITB CSE
2 3 IITB CSE
3 10 IITB CSE
4 20 IITB CSE
5 30 IITB CSE
6 40 IITB CSE
str(ds)
'data.frame': 47 obs. of 3 variables:
$ Rank : int 1 3 10 20 30 40 50 260 270 280 ...
$ College: Factor w/ 3 levels "IITB","IITD",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Branch : Factor w/ 3 levels "CSE","EEE","MECH": 1 1 1 1 1 1 1 2 2 2 ...
ds
Rank College Branch
1 1 IITB CSE
2 3 IITB CSE
3 10 IITB CSE
4 20 IITB CSE
5 30 IITB CSE
6 40 IITB CSE
7 50 IITB CSE
8 260 IITB EEE
9 270 IITB EEE
10 280 IITB EEE
11 290 IITB EEE
12 300 IITB EEE
13 360 IITB MECH
14 370 IITB MECH
15 380 IITB MECH
16 390 IITB MECH
17 400 IITB MECH
18 60 IITM CSE
19 70 IITM CSE
20 80 IITM CSE
21 90 IITM CSE
22 100 IITM CSE
23 210 IITM MECH
24 220 IITM MECH
25 230 IITM MECH
26 240 IITM MECH
27 250 IITM MECH
28 310 IITM EEE
29 320 IITM EEE
30 330 IITM EEE
31 340 IITM EEE
32 350 IITM EEE
33 110 IITD CSE
34 120 IITD CSE
35 130 IITD CSE
36 140 IITD CSE
37 150 IITD CSE
38 160 IITD MECH
39 170 IITD MECH
40 180 IITD MECH
41 190 IITD MECH
42 200 IITD MECH
43 410 IITD EEE
44 420 IITD EEE
45 430 IITD EEE
46 440 IITD EEE
47 450 IITD EEE
library(randomForest)
nrow(ds)
smp_size <- floor(0.75 * nrow(ds))
## set the seed to make your partition reproducible
set.seed(123)
train_ind <- sample(seq_len(nrow(ds)), size = smp_size)
train_ds <- ds[train_ind, ]
test_ds <- ds[-train_ind, ]
colleg_rf <- randomForest(College~.,data=train_ds,ntree=100,proximity=TRUE)
colleg_rf
Call:
randomForest(formula = College ~ ., data = train_ds, ntree = 100, proximity = TRUE)
Type of random forest: classification
Number of trees: 100
No. of variables tried at each split: 1
OOB estimate of error rate: 20%
Confusion matrix:
IITB IITD IITM class.error
IITB 13 0 0 0.0000000
IITD 3 7 1 0.3636364
IITM 2 1 8 0.2727273
table(predict(colleg_rf),train_ds$College)
IITB IITD IITM
IITB 13 3 2
IITD 0 7 1
IITM 0 1 8
varImpPlot(colleg_rf)
collegePred<-predict(colleg_rf,newdata=test_ds)
nrow(test_ds)
[1] 12
table(collegePred, test_ds$College)
collegePred IITB IITD IITM
IITB 4 0 3
IITD 0 3 0
IITM 0 1 1
head(test_ds)
Rank College Branch
5 30 IITB CSE
7 50 IITB CSE
9 270 IITB EEE
13 360 IITB MECH
26 240 IITM MECH
30 330 IITM EEE
testinput<-data.frame("Rank"=100,"Branch"="CSE")
testinput
Rank Branch
1 100 CSE
str(testinput)
'data.frame': 1 obs. of 2 variables:
$ Rank : num 100
$ Branch: Factor w/ 1 level "CSE": 1
testin<-read.csv("TestIn.csv")
str(testin)
'data.frame': 2 obs. of 2 variables:
$ Rank : int 100 310
$ Branch: Factor w/ 1 level "CSE": 1 1
nrow(testin)
[1] 2
levels(testin$Branch) <- levels(train_ds$Branch)
str(testin)
'data.frame': 2 obs. of 2 variables:
$ Rank : int 100 310
$ Branch: Factor w/ 3 levels "CSE","EEE","MECH": 1 1
testin
Rank Branch
1 100 CSE
2 310 CSE
predict(colleg_rf,testin,type="response")
1 2
IITM IITD
Levels: IITB IITD IITM
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
ds<-read.csv("collegedatainput.csv")
head(ds)
Rank College Branch
1 1 IITB CSE
2 3 IITB CSE
3 10 IITB CSE
4 20 IITB CSE
5 30 IITB CSE
6 40 IITB CSE
str(ds)
'data.frame': 47 obs. of 3 variables:
$ Rank : int 1 3 10 20 30 40 50 260 270 280 ...
$ College: Factor w/ 3 levels "IITB","IITD",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Branch : Factor w/ 3 levels "CSE","EEE","MECH": 1 1 1 1 1 1 1 2 2 2 ...
ds
Rank College Branch
1 1 IITB CSE
2 3 IITB CSE
3 10 IITB CSE
4 20 IITB CSE
5 30 IITB CSE
6 40 IITB CSE
7 50 IITB CSE
8 260 IITB EEE
9 270 IITB EEE
10 280 IITB EEE
11 290 IITB EEE
12 300 IITB EEE
13 360 IITB MECH
14 370 IITB MECH
15 380 IITB MECH
16 390 IITB MECH
17 400 IITB MECH
18 60 IITM CSE
19 70 IITM CSE
20 80 IITM CSE
21 90 IITM CSE
22 100 IITM CSE
23 210 IITM MECH
24 220 IITM MECH
25 230 IITM MECH
26 240 IITM MECH
27 250 IITM MECH
28 310 IITM EEE
29 320 IITM EEE
30 330 IITM EEE
31 340 IITM EEE
32 350 IITM EEE
33 110 IITD CSE
34 120 IITD CSE
35 130 IITD CSE
36 140 IITD CSE
37 150 IITD CSE
38 160 IITD MECH
39 170 IITD MECH
40 180 IITD MECH
41 190 IITD MECH
42 200 IITD MECH
43 410 IITD EEE
44 420 IITD EEE
45 430 IITD EEE
46 440 IITD EEE
47 450 IITD EEE
library(randomForest)
nrow(ds)
smp_size <- floor(0.75 * nrow(ds))
## set the seed to make your partition reproducible
set.seed(123)
train_ind <- sample(seq_len(nrow(ds)), size = smp_size)
train_ds <- ds[train_ind, ]
test_ds <- ds[-train_ind, ]
colleg_rf <- randomForest(College~.,data=train_ds,ntree=100,proximity=TRUE)
colleg_rf
Call:
randomForest(formula = College ~ ., data = train_ds, ntree = 100, proximity = TRUE)
Type of random forest: classification
Number of trees: 100
No. of variables tried at each split: 1
OOB estimate of error rate: 20%
Confusion matrix:
IITB IITD IITM class.error
IITB 13 0 0 0.0000000
IITD 3 7 1 0.3636364
IITM 2 1 8 0.2727273
table(predict(colleg_rf),train_ds$College)
IITB IITD IITM
IITB 13 3 2
IITD 0 7 1
IITM 0 1 8
varImpPlot(colleg_rf)
collegePred<-predict(colleg_rf,newdata=test_ds)
nrow(test_ds)
[1] 12
table(collegePred, test_ds$College)
collegePred IITB IITD IITM
IITB 4 0 3
IITD 0 3 0
IITM 0 1 1
head(test_ds)
Rank College Branch
5 30 IITB CSE
7 50 IITB CSE
9 270 IITB EEE
13 360 IITB MECH
26 240 IITM MECH
30 330 IITM EEE
testinput<-data.frame("Rank"=100,"Branch"="CSE")
testinput
Rank Branch
1 100 CSE
str(testinput)
'data.frame': 1 obs. of 2 variables:
$ Rank : num 100
$ Branch: Factor w/ 1 level "CSE": 1
testin<-read.csv("TestIn.csv")
str(testin)
'data.frame': 2 obs. of 2 variables:
$ Rank : int 100 310
$ Branch: Factor w/ 1 level "CSE": 1 1
nrow(testin)
[1] 2
levels(testin$Branch) <- levels(train_ds$Branch)
str(testin)
'data.frame': 2 obs. of 2 variables:
$ Rank : int 100 310
$ Branch: Factor w/ 3 levels "CSE","EEE","MECH": 1 1
testin
Rank Branch
1 100 CSE
2 310 CSE
predict(colleg_rf,testin,type="response")
1 2
IITM IITD
Levels: IITB IITD IITM
Comments
Post a Comment