scikit Naive Bayes Jupyter Notebook
In [8]:
import numpy as np
import pandas as pd
import urllib
import sklearn
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.cross_validation import train_test_split
In [9]:
from sklearn import metrics
from sklearn.metrics import accuracy_score
Naive Bayes
Use Naive Bayes to predict spam
Use Naive Bayes to predict spam
In [12]:
url="https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data"
raw_data=urllib.request.urlopen(url)
dataset=np.loadtxt(raw_data,delimiter=",")
print(dataset.shape)
dataset[0]
Out[12]:
In [17]:
#predictor dataset
X=dataset[:,0:48]
y=dataset[:,-1]
print(X[0])
print(y[0])
In [18]:
# Split to train and test
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.33,random_state=17)
In [19]:
## We first convert few attr to binary and see how it fares with Bernoulli Naive Bayes
BernNB=BernoulliNB(binarize=True)
In [20]:
BernNB.fit(X_train,y_train)
print(BernNB)
In [22]:
y_expect=y_test
y_pred=BernNB.predict(X_test)
print(accuracy_score(y_expect,y_pred))
Comments
Post a Comment