# Naive Bayes # Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd #importing the dataset dataset=pd.read_csv('clusterincluster.csv'); columns=dataset.iloc[:, [0,1]].values; # here first colon means selecting all rows then after comma means selecting feature/column so manually we can enter 0 and 1 and we can also put -1 to select all rows except the last one as we are making the last one as class #independent variables label=dataset.iloc[:, [2]].values; # dependent variables index in python starts from 0 #splitting the dataset into the traiing set and test set from sklearn.cross_validation import train_test_split columns_train, columns_test, label_train, label_test=train_test_split(columns,label, test_size=0.2, random_state=0) #if 10 observations/sample then 0.2 means two observations in a test set and 8 observations in test set